diff --git a/.kosmokrator/config.yaml b/.kosmokrator/config.yaml index 305430b..f0634ee 100644 --- a/.kosmokrator/config.yaml +++ b/.kosmokrator/config.yaml @@ -1,5 +1,5 @@ kosmokrator: agent: - mode: edit + mode: plan tools: default_permission_mode: prometheus diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..017225f --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,59 @@ +# OpenCompany Repo Guide + +This file is intentionally mirrored in both `AGENTS.md` and `CLAUDE.md`. Keep them identical. + +## Local + +- Local URL: `http://opencompany.test` +- Ngrok URL: `https://your-subdomain.ngrok-free.dev` for Telegram webhooks and external integrations; set up your own with `ngrok http 80` +- Use `http://opencompany.test` for local navigation and testing. +- Stack: Laravel 12, Vue 3, Inertia.js, Tailwind CSS v4, Reka UI + +## Workspace + +- OpenCompany is multi-workspace. Scope queries correctly. +- `ResolveWorkspace` binds the active workspace as `currentWorkspace`. +- `workspace()` returns the current `Workspace`. +- Models with `workspace_id` should use `forWorkspace()`. +- Related models should be scoped through the relation, typically with `whereHas(...)`. +- Humans belong to workspaces through `workspace_members`. Agents have a direct `workspace_id`. + +## Runtime + +- Main runtime agent class: `app/Agents/OpenCompanyAgent.php` +- Identity/system-prompt content is assembled from identity files and agent config, not from a static hardcoded prompt. + +## UI + +- Shared UI components live in `resources/js/Components/shared/`. +- Prefer wrapper components over native elements when equivalents already exist. +- Dark mode exists and should not be broken. + +## Package Ownership + +- Do not default to fixing generic bridge, provider, registry, caching, or integration-runtime behavior inside `app/`. +- First decide whether the behavior belongs to OpenCompany or to a sibling package. +- Inspect the real package source before patching. In this workspace, package code may be path-based or symlinked into `vendor/`. +- Common package sources: +- `tmp/prism-relay` +- `tmp/prism-codex` +- `../integrations/core` +- `../integrations/packages/*` +- Do not patch `vendor/` for durable fixes. +- If a fix stays app-local, note why it is OpenCompany-specific. + +## Working Notes + +- Prefer `rg` and `rg --files` for search. +- Keep edits targeted. Do not revert unrelated user changes. +- Put audits and investigations into `docs/`. +- MCP CLI: `~/.local/bin/mcp-cli` +- MCP config: `~/.config/mcp/mcp_servers.json` +- Common MCP usage: `mcp-cli`, `mcp-cli info `, `mcp-cli call ''` + +## Docs + +- Repo rules and local setup: `CLAUDE.md` +- Docs index: `docs/INDEX.md` +- Runtime audit: `docs/architecture/runtime-alignment-implementation-audit.md` +- Plane tool is available via `mcp-cli` diff --git a/CLAUDE.md b/CLAUDE.md index b6316bb..017225f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,31 +1,59 @@ -# OpenCompany Project Rules +# OpenCompany Repo Guide -## Local Development +This file is intentionally mirrored in both `AGENTS.md` and `CLAUDE.md`. Keep them identical. -- **Local URL**: `http://opencompany.test` (Laravel Valet domain, no SSL) -- **Ngrok URL**: `https://your-subdomain.ngrok-free.dev` (used for Telegram webhooks and external integrations; set up your own via `ngrok http 80`) -- When testing or navigating to the app locally, always use `http://opencompany.test` +## Local -## Tech Stack +- Local URL: `http://opencompany.test` +- Ngrok URL: `https://your-subdomain.ngrok-free.dev` for Telegram webhooks and external integrations; set up your own with `ngrok http 80` +- Use `http://opencompany.test` for local navigation and testing. +- Stack: Laravel 12, Vue 3, Inertia.js, Tailwind CSS v4, Reka UI -- **Backend**: Laravel 12 -- **Frontend**: Vue 3 + Inertia.js -- **Styling**: Tailwind CSS v4 + Reka UI (headless primitives) -- **Icons**: @iconify/vue with Phosphor icons (`ph:` prefix) +## Workspace -## Multi-Workspace Architecture +- OpenCompany is multi-workspace. Scope queries correctly. +- `ResolveWorkspace` binds the active workspace as `currentWorkspace`. +- `workspace()` returns the current `Workspace`. +- Models with `workspace_id` should use `forWorkspace()`. +- Related models should be scoped through the relation, typically with `whereHas(...)`. +- Humans belong to workspaces through `workspace_members`. Agents have a direct `workspace_id`. -- The app supports multiple workspaces. All data is workspace-scoped. -- **URL structure**: `/w/{workspace_slug}/...` — the slug identifies the active workspace. -- **Middleware**: `ResolveWorkspace` resolves the workspace from URL slug, `X-Workspace-Id` header, session, or user's first workspace (fallback). Binds it to the container as `currentWorkspace`. -- **Helper**: `workspace()` returns the current `Workspace` model from the container. -- **Model scoping**: Models use the `BelongsToWorkspace` trait which provides `scopeForWorkspace()` (explicit, not a global scope). Use `Model::forWorkspace()->...` in queries. -- **Humans** belong to workspaces via the `workspace_members` pivot table (many-to-many). **Agents** have a direct `workspace_id` column (belong to one workspace). -- **Frontend**: `useWorkspace()` composable provides `workspace`, `workspaces`, `workspacePath()`, and `isAdmin`. Workspace switcher is in the sidebar header. -- When adding new queries, always scope by workspace. For models with `workspace_id`, use `forWorkspace()`. For related models (e.g., messages via channels), use `whereHas` to filter through the relation. +## Runtime -## Component Structure +- Main runtime agent class: `app/Agents/OpenCompanyAgent.php` +- Identity/system-prompt content is assembled from identity files and agent config, not from a static hardcoded prompt. -- Shared components are in `resources/js/Components/shared/` -- Use the wrapper components (Button, Modal, Badge, etc.) instead of native elements for consistency -- Dark mode is supported via the `useColorMode` composable +## UI + +- Shared UI components live in `resources/js/Components/shared/`. +- Prefer wrapper components over native elements when equivalents already exist. +- Dark mode exists and should not be broken. + +## Package Ownership + +- Do not default to fixing generic bridge, provider, registry, caching, or integration-runtime behavior inside `app/`. +- First decide whether the behavior belongs to OpenCompany or to a sibling package. +- Inspect the real package source before patching. In this workspace, package code may be path-based or symlinked into `vendor/`. +- Common package sources: +- `tmp/prism-relay` +- `tmp/prism-codex` +- `../integrations/core` +- `../integrations/packages/*` +- Do not patch `vendor/` for durable fixes. +- If a fix stays app-local, note why it is OpenCompany-specific. + +## Working Notes + +- Prefer `rg` and `rg --files` for search. +- Keep edits targeted. Do not revert unrelated user changes. +- Put audits and investigations into `docs/`. +- MCP CLI: `~/.local/bin/mcp-cli` +- MCP config: `~/.config/mcp/mcp_servers.json` +- Common MCP usage: `mcp-cli`, `mcp-cli info `, `mcp-cli call ''` + +## Docs + +- Repo rules and local setup: `CLAUDE.md` +- Docs index: `docs/INDEX.md` +- Runtime audit: `docs/architecture/runtime-alignment-implementation-audit.md` +- Plane tool is available via `mcp-cli` diff --git a/app/Agents/OpenCompanyAgent.php b/app/Agents/OpenCompanyAgent.php index f110420..510e0d9 100644 --- a/app/Agents/OpenCompanyAgent.php +++ b/app/Agents/OpenCompanyAgent.php @@ -11,6 +11,9 @@ use App\Models\TaskStep; use App\Models\User; use App\Services\AgentDocumentService; +use App\Services\Memory\ContextPruner; +use App\Services\Memory\PromptFrameBuilder; +use App\Services\Memory\ToolResultDeduplicator; use Laravel\Ai\Contracts\Agent; use Laravel\Ai\Contracts\Conversational; use Laravel\Ai\Contracts\HasTools; @@ -21,12 +24,18 @@ use Laravel\Ai\Responses\Data\ToolCall; use Laravel\Ai\Responses\Data\ToolResult; use Illuminate\Support\Str; +use OpenCompany\PrismRelay\Contracts\HasSystemPrompts; #[MaxTokens(16_384)] -class OpenCompanyAgent implements Agent, HasTools, Conversational +class OpenCompanyAgent implements Agent, HasTools, Conversational, HasSystemPrompts { use Promptable; + /** + * @var array|null + */ + private ?array $promptFrameCache = null; + /** @var array */ private array $resolvedProvider; @@ -39,6 +48,9 @@ public function __construct( private ChannelConversationLoader $conversationLoader, private DynamicProviderResolver $providerResolver, private ToolRegistry $toolRegistry, + private PromptFrameBuilder $promptFrameBuilder, + private ToolResultDeduplicator $toolResultDeduplicator, + private ContextPruner $contextPruner, private ?string $taskId = null, ) { $this->resolvedProvider = $this->providerResolver->resolve($this->agent); @@ -79,11 +91,51 @@ public function resumeFrom(string $taskId): static /** * Get the instructions (system prompt) for this agent. * - * Assembles from identity files in the same order as AgentChatService. + * Returns the full concatenated prompt (stable + volatile). When a + * SystemPromptBag is bound, CachingPrismGateway uses the split prompts + * from the bag instead for cache-friendly framing. */ public function instructions(): string { - return implode('', array_column($this->buildSections(), 'content')); + return $this->promptFrame()['full_prompt']; + } + + /** + * Get the full instruction set before stable/volatile splitting. + */ + public function fullInstructions(): string + { + return $this->promptFrame()['full_prompt']; + } + + /** + * Get the volatile runtime context that should travel with the user prompt. + */ + public function volatilePromptContext(): string + { + return $this->promptFrame()['volatile_prompt']; + } + + /** + * Runtime context now travels as additional system prompts via the gateway, + * so the user prompt should remain unchanged. + */ + public function preparePrompt(string $prompt): string + { + return $prompt; + } + + /** + * @return string[] + */ + public function systemPrompts(): array + { + $frame = $this->promptFrame(); + + return array_values(array_filter([ + trim($frame['stable_prompt']), + trim($frame['volatile_prompt']), + ], fn (string $prompt) => $prompt !== '')); } /** @@ -94,10 +146,27 @@ public function instructions(): string */ public function instructionsBreakdown(): array { - return array_values(array_map( - fn (array $s) => ['label' => $s['label'], 'chars' => mb_strlen($s['content'])], - $this->buildSections(), - )); + return $this->promptFrame()['stable_breakdown']; + } + + /** + * @return array + */ + public function volatileInstructionsBreakdown(): array + { + return $this->promptFrame()['volatile_breakdown']; + } + + /** + * @return array + */ + public function promptFrame(): array + { + if ($this->promptFrameCache !== null) { + return $this->promptFrameCache; + } + + return $this->promptFrameCache = $this->promptFrameBuilder->splitSections($this->buildSections()); } /** @@ -228,7 +297,7 @@ private function injectPeerCards(array &$sections, Channel $channel): void */ public function messages(): iterable { - $messages = $this->conversationLoader->load($this->channelId, $this->agent, $this->instructions()); + $messages = $this->conversationLoader->load($this->channelId, $this->agent, $this->fullInstructions()); if ($this->resumeFromTaskId) { $messages = $this->injectCheckpointedSteps($messages); @@ -289,7 +358,9 @@ private function injectCheckpointedSteps(iterable $messages): array ); } - return $messages; + $deduplicated = $this->toolResultDeduplicator->deduplicate($messages)['messages']; + + return $this->contextPruner->prune($deduplicated)['messages']; } /** diff --git a/app/Agents/Providers/CodexPrismGateway.php b/app/Agents/Providers/CodexPrismGateway.php index 5fb4f61..33b6ae1 100644 --- a/app/Agents/Providers/CodexPrismGateway.php +++ b/app/Agents/Providers/CodexPrismGateway.php @@ -5,18 +5,16 @@ use Illuminate\Contracts\Events\Dispatcher; use Illuminate\Http\Client\ConnectionException; use Illuminate\Http\Client\RequestException; -use Laravel\Ai\Gateway\Prism\PrismGateway; use Laravel\Ai\Gateway\TextGenerationOptions; use Laravel\Ai\Providers\Provider; +use OpenCompany\PrismRelay\Bridge\CachingPrismGateway; /** - * Custom PrismGateway that routes Codex requests to the registered 'codex' Prism provider. + * Custom gateway that routes Codex requests to the registered 'codex' Prism provider. * - * The Codex provider extends OpenAI and uses the same Responses API format, but routes - * requests through chatgpt.com/backend-api/codex/ using OAuth tokens from a ChatGPT - * Pro/Plus subscription — $0 token costs. + * Extends CachingPrismGateway for prompt cache support. */ -class CodexPrismGateway extends PrismGateway +class CodexPrismGateway extends CachingPrismGateway { public function __construct(Dispatcher $events) { diff --git a/app/Agents/Providers/DynamicProviderResolver.php b/app/Agents/Providers/DynamicProviderResolver.php index f744626..d1826af 100644 --- a/app/Agents/Providers/DynamicProviderResolver.php +++ b/app/Agents/Providers/DynamicProviderResolver.php @@ -26,7 +26,7 @@ public function setWorkspaceId(?string $workspaceId): self /** * Parse a User's brain field and resolve to SDK provider + model. * - * Brain format: "provider:model" (e.g. "glm-coding:glm-4.7", "anthropic:claude-sonnet-4-5-20250929") + * Brain format: "provider:model" (e.g. "z:glm-5.1", "anthropic:claude-sonnet-4-5-20250929") * * @return array{provider: string, model: string} */ @@ -34,7 +34,7 @@ public function resolve(User $agent): array { $this->workspaceId = $agent->workspace_id; - $brain = $agent->brain ?? 'glm-coding:glm-4.7'; + $brain = $agent->brain ?? 'z:glm-5.1'; $parts = explode(':', $brain, 2); $providerKey = $parts[0]; $model = $parts[1] ?? $this->getDefaultModel($providerKey); @@ -55,12 +55,6 @@ public function resolveFromParts(string $providerKey, string $model): array return ['provider' => 'codex', 'model' => $model]; } - // GLM providers use IntegrationSetting for API keys - if ($this->isGlmProvider($providerKey)) { - $this->registerGlmProvider($providerKey); - return ['provider' => $providerKey, 'model' => $model]; - } - // Standard providers — check DB for API key, fall back to .env $sdkProvider = $this->mapToSdkProvider($providerKey); if ($sdkProvider) { @@ -68,22 +62,26 @@ public function resolveFromParts(string $providerKey, string $model): array return ['provider' => $sdkProvider, 'model' => $model]; } + // Custom providers use IntegrationSetting for API keys + if ($this->isRelayBackedProvider($providerKey)) { + $this->registerGlmProvider($providerKey); + return ['provider' => $providerKey, 'model' => $model]; + } + throw new InvalidArgumentException("Unknown provider: {$providerKey}"); } /** - * Check if a provider key is a GLM variant. + * Check if a provider key is managed by Prism Relay. */ - private function isGlmProvider(string $providerKey): bool + private function isRelayBackedProvider(string $providerKey): bool { - return (new RelayManager)->isRelayProvider($providerKey); + return ! $this->mapToSdkProvider($providerKey) + && (new RelayManager)->isRelayProvider($providerKey); } /** - * Dynamically register a GLM provider in the Prism config. - * - * GLM uses an OpenAI-compatible API, so we register it as an OpenAI provider - * with a custom URL and API key from IntegrationSetting. + * Dynamically register a custom provider in the Prism config. */ private function registerGlmProvider(string $providerKey): void { @@ -107,7 +105,7 @@ private function registerGlmProvider(string $providerKey): void $apiKey = $integration->getConfigValue('api_key'); $url = $integration->getConfigValue('url') ?? $this->getDefaultGlmUrl($providerKey); - // Set Prism config for the GLM provider variant (registered via PrismManager::extend) + // Set Prism config for the provider variant (registered via PrismManager::extend) config([ "prism.providers.{$providerKey}" => [ 'api_key' => $apiKey, @@ -116,10 +114,10 @@ private function registerGlmProvider(string $providerKey): void ]); // Register in AI SDK config using our custom driver (registered via AiManager::extend) - // This routes through GlmPrismGateway → Prism 'glm' provider → chat/completions + // This routes through GlmPrismGateway to the matching Prism provider. config([ "ai.providers.{$providerKey}" => [ - 'driver' => $providerKey, // 'glm' or 'glm-coding' — custom drivers + 'driver' => $providerKey, 'key' => $apiKey, ], ]); @@ -154,6 +152,7 @@ private function mapToSdkProvider(string $providerKey): ?string 'deepseek' => 'deepseek', 'mistral' => 'mistral', 'ollama' => 'ollama', + 'perplexity' => 'perplexity', ]; return $map[$providerKey] ?? null; @@ -190,7 +189,7 @@ private function applyIntegrationConfig(string $providerKey): void } /** - * Get default URL for a GLM provider. + * Get default URL for a known provider. */ private function getDefaultGlmUrl(string $providerKey): string { diff --git a/app/Agents/Providers/GlmPrismGateway.php b/app/Agents/Providers/GlmPrismGateway.php index 2c7b6e5..d028b83 100644 --- a/app/Agents/Providers/GlmPrismGateway.php +++ b/app/Agents/Providers/GlmPrismGateway.php @@ -5,19 +5,17 @@ use Illuminate\Contracts\Events\Dispatcher; use Illuminate\Http\Client\ConnectionException; use Illuminate\Http\Client\RequestException; -use Laravel\Ai\Gateway\Prism\PrismGateway; use Laravel\Ai\Gateway\TextGenerationOptions; use Laravel\Ai\Providers\Provider; +use OpenCompany\PrismRelay\Bridge\CachingPrismGateway; /** - * Custom PrismGateway that routes requests to custom Prism providers - * registered via PrismManager::extend() (GLM, Kimi, MiniMax, etc.). + * Custom gateway that routes requests to custom Prism providers + * registered via PrismManager::extend() (Z.AI, Kimi, MiniMax, etc.). * - * The base PrismGateway maps driver names to PrismProvider enums, which only - * works for native Prism providers. Custom providers need their string key - * passed directly to Prism's using() method. + * Extends CachingPrismGateway for prompt cache support on all providers. */ -class GlmPrismGateway extends PrismGateway +class GlmPrismGateway extends CachingPrismGateway { public function __construct(Dispatcher $events) { diff --git a/app/Agents/Tools/ToolRegistry.php b/app/Agents/Tools/ToolRegistry.php index ecb9777..eb447cd 100644 --- a/app/Agents/Tools/ToolRegistry.php +++ b/app/Agents/Tools/ToolRegistry.php @@ -7,7 +7,6 @@ use App\Models\AppSetting; use App\Models\User; use App\Services\AgentPermissionService; -use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; class ToolRegistry { @@ -47,7 +46,6 @@ class ToolRegistry public function __construct( private AgentPermissionService $permissionService, - private ToolProviderRegistry $providerRegistry, ) {} /** @@ -94,7 +92,7 @@ private function getEffectiveToolMap(): array } // External integration providers - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { foreach ($provider->tools() as $slug => $meta) { $this->effectiveToolMap[$slug] = $meta; } @@ -121,7 +119,7 @@ private function getEffectiveAppGroups(): array } // External integration providers - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { $meta = $provider->appMeta(); $this->effectiveAppGroups[$provider->appName()] = [ 'tools' => array_keys($provider->tools()), @@ -139,7 +137,7 @@ public function getEffectiveIntegrationApps(): array { if ($this->effectiveIntegrationApps === null) { $this->effectiveIntegrationApps = self::INTEGRATION_APPS; - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { if ($provider->isIntegration() && ! in_array($provider->appName(), $this->effectiveIntegrationApps)) { $this->effectiveIntegrationApps[] = $provider->appName(); } @@ -161,7 +159,7 @@ private function getEffectiveAppIcons(): array } // External integration providers - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { $meta = $provider->appMeta(); $this->effectiveAppIcons[$provider->appName()] = $meta['icon']; } @@ -175,7 +173,7 @@ private function getEffectiveIntegrationLogos(): array { if ($this->effectiveIntegrationLogos === null) { $this->effectiveIntegrationLogos = []; - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { $meta = $provider->appMeta(); if (isset($meta['logo'])) { $this->effectiveIntegrationLogos[$provider->appName()] = $meta['logo']; @@ -218,6 +216,11 @@ public function getToolMetaBySlug(string $slug): array ]; } + public function getToolTypeBySlug(string $slug): ?string + { + return $this->getEffectiveToolMap()[$slug]['type'] ?? null; + } + // ─── Tool filtering and instantiation ────────────────────────────────── /** @@ -503,13 +506,13 @@ public function getToolCatalog(User $agent): array /** * Instantiate a specific tool by slug (for post-approval execution). */ - public function instantiateToolBySlug(string $slug, User $agent): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool|null + public function instantiateToolBySlug(string $slug, User $agent, ?string $account = null): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool|null { if (! isset($this->getEffectiveToolMap()[$slug])) { return null; } - return $this->instantiateTool($this->getEffectiveToolMap()[$slug]['class'], $agent, $slug); + return $this->instantiateTool($this->getEffectiveToolMap()[$slug]['class'], $agent, $slug, $account); } /** @@ -573,7 +576,7 @@ public function getAppCatalog(User $agent): string /** * Instantiate a tool class via its provider. */ - private function instantiateTool(string $class, User $agent, string $slug = ''): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool + private function instantiateTool(string $class, User $agent, string $slug = '', ?string $account = null): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool { $context = [ 'channel_id' => $this->currentChannelId, @@ -582,13 +585,14 @@ private function instantiateTool(string $class, User $agent, string $slug = ''): ]; // Check external integration providers first - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { foreach ($provider->tools() as $toolSlug => $meta) { if ($meta['class'] === $class && ($slug === '' || $toolSlug === $slug)) { return $provider->createTool($class, [ 'agent' => $agent, 'timezone' => AppSetting::getValue('org_timezone', 'UTC'), 'tool_slug' => $toolSlug, + 'account' => $account, ]); } } @@ -630,4 +634,18 @@ private function buildAppLookup(): array return $lookup; } + + /** + * @return array + */ + private function integrationProviders(): array + { + $registryClass = \OpenCompany\IntegrationCore\Support\ToolProviderRegistry::class; + + if (! class_exists($registryClass) || ! app()->bound($registryClass)) { + return []; + } + + return app($registryClass)->all(); + } } diff --git a/app/Agents/Tools/Workspace/GetIntegrationSetup.php b/app/Agents/Tools/Workspace/GetIntegrationSetup.php index 434ea7c..5ce7420 100644 --- a/app/Agents/Tools/Workspace/GetIntegrationSetup.php +++ b/app/Agents/Tools/Workspace/GetIntegrationSetup.php @@ -66,7 +66,7 @@ public function schema(JsonSchema $schema): array return [ 'integrationId' => $schema ->string() - ->description('Integration ID (e.g., "telegram", "glm", "plausible"). Includes both static and dynamic package-provided integrations.') + ->description('Integration ID (e.g., "telegram", "z", "plausible"). Includes both static and dynamic package-provided integrations.') ->required(), ]; } diff --git a/app/Agents/Tools/Workspace/TestIntegrationConnection.php b/app/Agents/Tools/Workspace/TestIntegrationConnection.php index 6b460e4..8fd8052 100644 --- a/app/Agents/Tools/Workspace/TestIntegrationConnection.php +++ b/app/Agents/Tools/Workspace/TestIntegrationConnection.php @@ -61,7 +61,7 @@ public function handle(Request $request): string return $this->testTelegram($apiKey); } - // GLM-style providers + // OpenAI-compatible chat-completions providers $url = $setting->getConfigValue('url') ?? ($available[$integrationId]['default_url'] ?? ''); $model = $setting->getConfigValue('default_model') ?? array_key_first($available[$integrationId]['models'] ?? []); @@ -122,8 +122,8 @@ public function schema(JsonSchema $schema): array return [ 'integrationId' => $schema ->string() - ->description('Integration ID (e.g., "telegram", "glm", "plausible"). Includes both static and dynamic package-provided integrations.') + ->description('Integration ID (e.g., "telegram", "z", "plausible"). Includes both static and dynamic package-provided integrations.') ->required(), ]; } -} \ No newline at end of file +} diff --git a/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php b/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php index 50c2df8..d391e1b 100644 --- a/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php +++ b/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php @@ -175,7 +175,7 @@ public function schema(JsonSchema $schema): array return [ 'integrationId' => $schema ->string() - ->description('Integration ID (e.g., "telegram", "glm", "plausible"). Includes both static and dynamic package-provided integrations.') + ->description('Integration ID (e.g., "telegram", "z", "plausible"). Includes both static and dynamic package-provided integrations.') ->required(), 'apiKey' => $schema ->string() @@ -203,4 +203,4 @@ public function schema(JsonSchema $schema): array ->description('JSON array of site domains for string_list fields, e.g. ["example.com"].'), ]; } -} \ No newline at end of file +} diff --git a/app/Console/Commands/TestGlmPing.php b/app/Console/Commands/TestGlmPing.php index 00dcfa7..f86b9ca 100644 --- a/app/Console/Commands/TestGlmPing.php +++ b/app/Console/Commands/TestGlmPing.php @@ -7,16 +7,16 @@ class TestGlmPing extends Command { - protected $signature = 'glm:ping {--prompt= : Custom prompt to send}'; - protected $description = 'Test GLM 4.7 API connection'; + protected $signature = 'z:ping {--prompt= : Custom prompt to send}'; + protected $description = 'Test Z.AI API connection'; public function handle(): int { - $this->info('Testing GLM 4.7 API connection...'); + $this->info('Testing Z.AI API connection...'); $this->newLine(); - $url = config('prism.providers.glm.url'); - $apiKey = config('prism.providers.glm.api_key'); + $url = config('prism.providers.z.url'); + $apiKey = config('prism.providers.z.api_key'); $this->line('Endpoint: ' . $url); $this->line('API Key: ' . substr($apiKey, 0, 10) . '...'); @@ -26,7 +26,7 @@ public function handle(): int try { $response = Prism::text() - ->using('glm', 'glm-4.7') + ->using('z', 'glm-5.1') ->withPrompt($prompt) ->asText(); @@ -45,11 +45,11 @@ public function handle(): int ); $this->newLine(); - $this->info('GLM 4.7 connection successful!'); + $this->info('Z.AI connection successful!'); return Command::SUCCESS; } catch (\Exception $e) { - $this->error('Failed to connect to GLM 4.7:'); + $this->error('Failed to connect to Z.AI:'); $this->error($e->getMessage()); if ($this->output->isVerbose()) { diff --git a/app/Http/Controllers/Api/AgentController.php b/app/Http/Controllers/Api/AgentController.php index 1f7057f..732f9e9 100644 --- a/app/Http/Controllers/Api/AgentController.php +++ b/app/Http/Controllers/Api/AgentController.php @@ -61,14 +61,14 @@ public function store(Request $request): JsonResponse // Validate brain format (provider:model) if (!str_contains($validated['brain'], ':')) { return response()->json([ - 'error' => 'Invalid brain format. Expected "provider:model" (e.g., "glm:glm-4.7")', + 'error' => 'Invalid brain format. Expected "provider:model" (e.g., "z:glm-5.1")', ], 422); } [$provider] = explode(':', $validated['brain'], 2); // Standard providers use .env keys; only check IntegrationSetting for custom providers - $standardProviders = ['anthropic', 'openai', 'gemini', 'groq', 'xai', 'openrouter', 'deepseek', 'mistral', 'ollama']; + $standardProviders = ['anthropic', 'openai', 'gemini', 'groq', 'xai', 'openrouter', 'deepseek', 'mistral', 'ollama', 'perplexity']; if (!in_array($provider, $standardProviders)) { $integration = IntegrationSetting::forWorkspace() diff --git a/app/Http/Controllers/Api/IntegrationController.php b/app/Http/Controllers/Api/IntegrationController.php index 0d48bc0..6fdbb3c 100644 --- a/app/Http/Controllers/Api/IntegrationController.php +++ b/app/Http/Controllers/Api/IntegrationController.php @@ -31,7 +31,7 @@ public function index(): \Illuminate\Http\JsonResponse $integrations = []; - // Static integrations (GLM, Telegram, Codex — no ToolProvider package) + // Static integrations (Z.AI, Telegram, Codex — no ToolProvider package) foreach ($available as $id => $info) { // Codex uses OAuth tokens, not API keys if ($id === 'codex') { @@ -199,7 +199,7 @@ public function showConfig(string $id): \Illuminate\Http\JsonResponse ]); } - // Static integrations (GLM, Telegram, chat platforms) + // Static integrations (Z.AI, Telegram, chat platforms) $available = IntegrationSetting::getAvailableIntegrations(); if (!isset($available[$id])) { return response()->json(['error' => 'Integration not found'], 404); @@ -233,7 +233,7 @@ public function showConfig(string $id): \Illuminate\Http\JsonResponse ]); } - // AI model integrations (GLM etc.) + // AI model integrations (Z.AI, Perplexity, etc.) $config = [ 'apiKey' => $setting?->getMaskedApiKey(), 'url' => $setting?->getConfigValue('url') ?? ($available[$id]['default_url'] ?? ''), @@ -331,7 +331,7 @@ public function updateConfig(Request $request, string $id): \Illuminate\Http\Jso ]); } - // Static integrations (GLM, chat platforms) + // Static integrations (Z.AI, chat platforms) $available = IntegrationSetting::getAvailableIntegrations(); if (!isset($available[$id])) { return response()->json(['error' => 'Integration not found'], 404); @@ -377,7 +377,7 @@ public function updateConfig(Request $request, string $id): \Illuminate\Http\Jso ]); } - // AI model integrations (GLM etc.) + // AI model integrations (Z.AI, Perplexity, etc.) $request->validate([ 'apiKey' => 'nullable|string', 'url' => 'nullable|string|url', @@ -564,10 +564,10 @@ public function disconnect(string $id): \Illuminate\Http\JsonResponse } /** - * Test GLM/Zhipu AI connection + * Test OpenAI-compatible AI provider connection. */ /** - * Test connection for OpenAI-compatible providers (OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, GLM, Ollama). + * Test connection for OpenAI-compatible providers (OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, Z.AI, Ollama, Perplexity). */ private function testOpenAiCompatConnection(?string $apiKey, string $url, ?string $model): \Illuminate\Http\JsonResponse { @@ -923,7 +923,7 @@ public function enabledModels(): \Illuminate\Http\JsonResponse /** * Get all available AI providers with their models for settings dropdowns. * - * Returns both integration-based providers (GLM, Codex) and prism-config + * Returns both integration-based providers (Z.AI, Codex) and prism-config * providers (Anthropic, OpenAI, etc.) with configuration status. */ public function allProviders(): \Illuminate\Http\JsonResponse @@ -1417,7 +1417,7 @@ private function fetchModelsFromProvider(string $id): array /** * Fetch models from an OpenAI-compatible /models endpoint. - * Works for: OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, GLM. + * Works for: OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, Z.AI, Perplexity. * * @return array */ @@ -1448,8 +1448,8 @@ private function fetchOpenAiCompatModels(string $id): array $models[$modelId] = $this->formatModelName($modelId); } - // GLM: probe flash/plus variants not listed by /models - if ($id === 'glm' || $id === 'glm-coding') { + // Z.AI: probe flash/plus variants not listed by /models + if ($id === 'z' || $id === 'z-api') { $models = $this->probeGlmVariants($models, $apiKey, $baseUrl); } @@ -1593,7 +1593,7 @@ private function getProviderCredentials(string $id): array } /** - * GLM-specific: probe flash/plus variants not listed by /models. + * Z.AI-specific: probe flash/plus variants not listed by /models. * * @param array $models * @return array @@ -1768,4 +1768,149 @@ private function testChatIntegrationConnection(string $id, Request $request): \I ], 500); } } + + // ─── Multi-Account Endpoints ──────────────────────────────── + + /** + * List all accounts for an integration. + */ + public function listAccounts(string $id): \Illuminate\Http\JsonResponse + { + $settings = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->get(); + + $accounts = $settings->map(fn (IntegrationSetting $s) => [ + 'alias' => $s->account_alias, + 'is_default' => $s->is_default, + 'enabled' => $s->enabled, + 'configured' => $s->hasValidConfig(), + ]); + + return response()->json(['accounts' => $accounts]); + } + + /** + * Create a new account for an integration. + */ + public function createAccount(Request $request, string $id): \Illuminate\Http\JsonResponse + { + $request->validate([ + 'alias' => ['required', 'string', 'max:32', 'regex:/^[a-z0-9_]+$/'], + 'config' => ['required', 'array'], + ]); + + $alias = $request->input('alias'); + + $exists = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->exists(); + + if ($exists) { + return response()->json(['error' => "Account '{$alias}' already exists."], 422); + } + + $hasOthers = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->exists(); + + $setting = IntegrationSetting::create([ + 'id' => Str::uuid()->toString(), + 'workspace_id' => workspace()->id, + 'integration_id' => $id, + 'account_alias' => $alias, + 'config' => $request->input('config'), + 'enabled' => true, + 'is_default' => ! $hasOthers, + ]); + + return response()->json([ + 'alias' => $setting->account_alias, + 'is_default' => $setting->is_default, + ], 201); + } + + /** + * Update an account's config. + */ + public function updateAccount(Request $request, string $id, string $alias): \Illuminate\Http\JsonResponse + { + $setting = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->first(); + + if (! $setting) { + return response()->json(['error' => 'Account not found.'], 404); + } + + $config = $setting->config ?? []; + foreach ($request->input('config', []) as $key => $value) { + if (is_string($value) && str_contains($value, '*')) { + continue; // Skip masked values + } + $config[$key] = $value; + } + $setting->config = $config; + $setting->save(); + + return response()->json(['success' => true]); + } + + /** + * Delete an account. + */ + public function deleteAccount(string $id, string $alias): \Illuminate\Http\JsonResponse + { + if ($alias === '') { + return response()->json(['error' => 'Cannot delete the default account.'], 422); + } + + $setting = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->first(); + + if (! $setting) { + return response()->json(['error' => 'Account not found.'], 404); + } + + $wasDefault = $setting->is_default; + $setting->delete(); + + // If we deleted the default, promote the remaining default (empty alias) row + if ($wasDefault) { + IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', '') + ->update(['is_default' => true]); + } + + return response()->json(['success' => true]); + } + + /** + * Set an account as the default. + */ + public function setDefaultAccount(string $id, string $alias): \Illuminate\Http\JsonResponse + { + $setting = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->first(); + + if (! $setting) { + return response()->json(['error' => 'Account not found.'], 404); + } + + // Clear is_default on all accounts for this integration + IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->update(['is_default' => false]); + + $setting->update(['is_default' => true]); + + return response()->json(['success' => true]); + } } diff --git a/app/Jobs/AgentRespondJob.php b/app/Jobs/AgentRespondJob.php index 63f2ace..e617302 100644 --- a/app/Jobs/AgentRespondJob.php +++ b/app/Jobs/AgentRespondJob.php @@ -22,11 +22,13 @@ use App\Jobs\Concerns\SetsWorkspaceContext; use App\Services\AgentCommunicationService; use App\Services\AgentDocumentService; +use App\Services\Memory\ContextBudget; use App\Services\Memory\ModelContextRegistry; use App\Services\TelegramService; use Laravel\Ai\Responses\AgentResponse; use Laravel\Ai\Responses\Data\FinishReason; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; use Illuminate\Support\Str; class AgentRespondJob implements ShouldQueue, ShouldBeUnique @@ -197,13 +199,24 @@ public function handle(): void $agentInstance->resumeFrom($task->id); } + $currentMessages = []; + // Capture LLM context before prompting (for observability) try { $toolRegistry = app(\App\Agents\Tools\ToolRegistry::class); + $promptFrame = $agentInstance->promptFrame(); + $currentMessages = $agentInstance->messages(); + $contextBudget = app(ContextBudget::class)->snapshotForAgent( + $this->agent, + $currentMessages, + $agentInstance->fullInstructions(), + ); $task->update([ 'context' => [ 'system_prompt' => $agentInstance->instructions(), - 'messages' => collect($agentInstance->messages()) /** @phpstan-ignore argument.templateType */ + 'full_system_prompt' => $agentInstance->fullInstructions(), + 'volatile_prompt_context' => $agentInstance->volatilePromptContext(), + 'messages' => collect($currentMessages) /** @phpstan-ignore argument.templateType */ ->map(fn ($m) => [ 'role' => $m->role->value, 'content' => Str::limit($m->content ?? '', 2000), @@ -211,9 +224,11 @@ public function handle(): void 'tools' => $toolRegistry->getToolSlugsForAgent($this->agent), 'model' => $agentInstance->model(), 'provider' => $agentInstance->provider(), - 'prompt_sections' => $agentInstance->instructionsBreakdown(), + 'prompt_sections' => $promptFrame['stable_breakdown'], + 'volatile_prompt_sections' => $promptFrame['volatile_breakdown'], 'context_window' => app(ModelContextRegistry::class) - ->getContextWindow($agentInstance->model()), + ->getContextWindow($agentInstance->model(), $agentInstance->provider()), + 'context_budget' => $contextBudget, ], ]); } catch (\Throwable $e) { @@ -223,8 +238,7 @@ public function handle(): void // Memory flush: save important context to LTM before compaction try { $flushService = app(\App\Services\Memory\MemoryFlushService::class); - $currentMessages = $agentInstance->messages(); - if ($flushService->shouldFlush($this->channelId, $this->agent, $currentMessages, $agentInstance->instructions())) { + if ($flushService->shouldFlush($this->channelId, $this->agent, $currentMessages, $agentInstance->fullInstructions())) { $flushStep = $task->addStep('Flushing memories before compaction', 'action'); $flushStep->start(); $flushService->flush($this->channelId, $this->agent); @@ -235,7 +249,12 @@ public function handle(): void } $llmStep->start(); - $response = $agentInstance->prompt($this->buildPromptWithThreadContext($this->userMessage)); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); + $response = $agentInstance->prompt( + $this->buildPromptWithThreadContext($this->userMessage) + ); $lastStep = $response->steps->last(); @@ -375,17 +394,20 @@ public function handle(): void $outputReserve = (int) config('memory.compaction.output_reserve', 4_096); $systemChars = mb_strlen($task->context['system_prompt'] ?? ''); + $volatileChars = mb_strlen($task->context['volatile_prompt_context'] ?? ''); $messageChars = array_sum(array_map( fn ($m) => mb_strlen($m['content'] ?? ''), $task->context['messages'] ?? [], )); - $totalChars = $systemChars + $messageChars; + $totalChars = $systemChars + $volatileChars + $messageChars; $systemRatio = $totalChars > 0 ? $systemChars / $totalChars : 0.5; + $volatileRatio = $totalChars > 0 ? $volatileChars / $totalChars : 0.0; $systemTokens = (int) round($lastStepPromptTokens * $systemRatio); - $messageTokens = $lastStepPromptTokens - $systemTokens; + $volatileTokens = (int) round($lastStepPromptTokens * $volatileRatio); + $messageTokens = max(0, $lastStepPromptTokens - $systemTokens - $volatileTokens); - $available = max(0, $contextWindow - $systemTokens - $outputReserve); + $available = max(0, $contextWindow - $systemTokens - $volatileTokens - $outputReserve); $thresholdRatio = (float) config('memory.compaction.threshold_ratio', 0.75); $safetyMargin = (float) config('memory.compaction.safety_margin', 1.2); $compactionThreshold = (int) ($available * $thresholdRatio); @@ -403,6 +425,10 @@ public function handle(): void 'total' => $systemTokens, 'sections' => $context['prompt_sections'] ?? [], ], + 'volatile_prompt_context' => [ + 'total' => $volatileTokens, + 'sections' => $context['volatile_prompt_sections'] ?? [], + ], 'messages' => [ 'total' => $messageTokens, 'count' => count($context['messages'] ?? []), @@ -418,7 +444,7 @@ public function handle(): void 'last_step_prompt_tokens' => $lastStepPromptTokens, 'finish_reason' => $lastStep?->finishReason->value ?? 'unknown', ]; - unset($context['prompt_sections'], $context['context_window']); + unset($context['prompt_sections'], $context['volatile_prompt_sections'], $context['context_window']); $task->update(['context' => $context]); } catch (\Throwable $e) { Log::warning('Post-delivery bookkeeping failed', ['error' => $e->getMessage(), 'task' => $task->id]); diff --git a/app/Jobs/ExecuteAgentTaskJob.php b/app/Jobs/ExecuteAgentTaskJob.php index 2cb163a..85e6d25 100644 --- a/app/Jobs/ExecuteAgentTaskJob.php +++ b/app/Jobs/ExecuteAgentTaskJob.php @@ -16,6 +16,7 @@ use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; class ExecuteAgentTaskJob implements ShouldQueue { @@ -64,6 +65,9 @@ public function handle(): void $agentInstance = OpenCompanyAgent::for($agent, $channelId, $this->task->id); $analyzeStep->start(); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); $response = $agentInstance->prompt($prompt); $analyzeStep->complete(); diff --git a/app/Jobs/RunAutomationJob.php b/app/Jobs/RunAutomationJob.php index abbb243..10e189d 100644 --- a/app/Jobs/RunAutomationJob.php +++ b/app/Jobs/RunAutomationJob.php @@ -21,6 +21,7 @@ use Illuminate\Queue\SerializesModels; use Illuminate\Support\Facades\Log; use Illuminate\Support\Str; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; class RunAutomationJob implements ShouldQueue, ShouldBeUnique { @@ -127,12 +128,17 @@ public function handle(): void // Capture LLM context for observability try { $toolRegistry = app(\App\Agents\Tools\ToolRegistry::class); + $promptFrame = $agentInstance->promptFrame(); $task->update([ 'context' => array_merge($task->context ?? [], [ + 'system_prompt' => $agentInstance->instructions(), + 'full_system_prompt' => $agentInstance->fullInstructions(), + 'volatile_prompt_context' => $agentInstance->volatilePromptContext(), 'tools' => $toolRegistry->getToolSlugsForAgent($agent), 'model' => $agentInstance->model(), 'provider' => $agentInstance->provider(), - 'prompt_sections' => $agentInstance->instructionsBreakdown(), + 'prompt_sections' => $promptFrame['stable_breakdown'], + 'volatile_prompt_sections' => $promptFrame['volatile_breakdown'], ]), ]); } catch (\Throwable $e) { @@ -141,6 +147,9 @@ public function handle(): void $prompt = $this->buildScheduledPrompt(); $generationStartedAt = now(); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); $response = $agentInstance->prompt($prompt); $generationCompletedAt = now(); diff --git a/app/Listeners/CheckpointToolCall.php b/app/Listeners/CheckpointToolCall.php index 900b802..3f4e706 100644 --- a/app/Listeners/CheckpointToolCall.php +++ b/app/Listeners/CheckpointToolCall.php @@ -5,6 +5,7 @@ use App\Agents\OpenCompanyAgent; use App\Models\Task; use App\Models\User; +use App\Services\Memory\OutputTruncator; use App\Support\LuaMetaParser; use Laravel\Ai\Events\ToolInvoked; use App\Agents\Tools\ToolRegistry; @@ -49,10 +50,9 @@ public function handle(ToolInvoked $event): void $luaMeta = $extracted['meta']; $result = $extracted['result']; - // Truncate large string results to prevent DB bloat - if (is_string($result) && strlen($result) > 2000) { - $result = mb_strcut($result, 0, 2000, 'UTF-8') . '... [truncated]'; - } + // Truncate large results before checkpoint persistence to keep + // retry context lean while preserving the full payload durably. + $result = app(OutputTruncator::class)->truncate($result, $event->toolInvocationId); // Sanitize to valid UTF-8 to prevent JSON encoding failures if (is_string($result)) { diff --git a/app/Models/ConversationSummary.php b/app/Models/ConversationSummary.php index a2bcdf9..d66e366 100644 --- a/app/Models/ConversationSummary.php +++ b/app/Models/ConversationSummary.php @@ -23,6 +23,15 @@ class ConversationSummary extends Model 'flush_count', 'messages_summarized', 'last_message_id', + 'compaction_failure_count', + 'last_compaction_failed_at', + 'compaction_circuit_open_until', + 'last_compaction_error', + ]; + + protected $casts = [ + 'last_compaction_failed_at' => 'datetime', + 'compaction_circuit_open_until' => 'datetime', ]; /** @return BelongsTo */ diff --git a/app/Models/IntegrationSetting.php b/app/Models/IntegrationSetting.php index 18243b0..ec33b8a 100644 --- a/app/Models/IntegrationSetting.php +++ b/app/Models/IntegrationSetting.php @@ -10,6 +10,8 @@ * @property array $config * @property bool $enabled * @property string $integration_id + * @property string $account_alias + * @property bool $is_default */ class IntegrationSetting extends Model { @@ -23,8 +25,10 @@ class IntegrationSetting extends Model 'id', 'workspace_id', 'integration_id', + 'account_alias', 'config', 'enabled', + 'is_default', ]; protected function casts(): array @@ -32,9 +36,57 @@ protected function casts(): array return [ 'config' => 'encrypted:array', 'enabled' => 'boolean', + 'is_default' => 'boolean', ]; } + /** + * Scope to a specific account alias. + * + * Null or empty string targets the default (un-aliased) account. + * + * @param \Illuminate\Database\Eloquent\Builder $query + * @return \Illuminate\Database\Eloquent\Builder + */ + public function scopeForAccount($query, ?string $account): self + { + $alias = ($account === null || $account === '') ? '' : $account; + + return $query->where('account_alias', $alias); + } + + /** + * Scope to the default account (is_default = true or the un-aliased row). + * + * @param \Illuminate\Database\Eloquent\Builder $query + * @return \Illuminate\Database\Eloquent\Builder + */ + public function scopeDefault($query): self + { + return $query->where(function ($q) { + $q->where('is_default', true)->orWhere('account_alias', ''); + }); + } + + /** + * Get all non-default account aliases for an integration in the current workspace. + * + * @return list + */ + public static function getAccountsFor(string $integrationId): array + { + $query = app()->bound('currentWorkspace') + ? static::forWorkspace() + : static::query(); + + return $query + ->where('integration_id', $integrationId) + ->where('account_alias', '!=', '') + ->pluck('account_alias') + ->values() + ->all(); + } + /** * Get a specific config value */ diff --git a/app/Models/McpServer.php b/app/Models/McpServer.php index 548d710..0455cd0 100644 --- a/app/Models/McpServer.php +++ b/app/Models/McpServer.php @@ -19,6 +19,7 @@ * @property string|null $description * @property string $name * @property string $slug + * @property string $account_alias * @property \Carbon\Carbon|null $tools_discovered_at * @property \Carbon\Carbon|null $created_at * @property \Carbon\Carbon|null $updated_at @@ -36,6 +37,7 @@ class McpServer extends Model 'workspace_id', 'name', 'slug', + 'account_alias', 'url', 'auth_type', 'auth_config', @@ -62,6 +64,25 @@ protected function casts(): array ]; } + /** + * Get all non-default account aliases for an MCP server slug in the current workspace. + * + * @return list + */ + public static function getAccountsFor(string $slug): array + { + try { + return static::forWorkspace() + ->where('slug', $slug) + ->where('account_alias', '!=', '') + ->pluck('account_alias') + ->values() + ->all(); + } catch (\Throwable) { + return []; + } + } + /** * Get prefixed tool slugs from cached discovered tools. * diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index a9f8846..2393c2a 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -20,7 +20,18 @@ use Illuminate\Support\Facades\Vite; use Illuminate\Support\ServiceProvider; use Laravel\Ai\AiManager; +use Laravel\Ai\Providers\AnthropicProvider; +use Laravel\Ai\Providers\AzureOpenAiProvider; +use Laravel\Ai\Providers\DeepSeekProvider; +use Laravel\Ai\Providers\GeminiProvider; +use Laravel\Ai\Providers\GroqProvider; +use Laravel\Ai\Providers\MistralProvider; +use Laravel\Ai\Providers\OllamaProvider; use Laravel\Ai\Providers\OpenAiProvider; +use Laravel\Ai\Providers\OpenRouterProvider; +use Laravel\Ai\Providers\VoyageAiProvider; +use Laravel\Ai\Providers\XaiProvider; +use OpenCompany\PrismRelay\Bridge\CachingPrismGateway; use Prism\Prism\PrismManager; class AppServiceProvider extends ServiceProvider @@ -80,37 +91,55 @@ public function boot(): void }); } - // Custom Prism providers (GLM, Kimi, MiniMax) are registered by + // Custom Prism providers (Z.AI, Kimi, MiniMax) are registered by // PrismRelayServiceProvider via afterResolving(PrismManager::class). - // Register 'glm' and 'glm-coding' as custom AI SDK drivers. - // These use GlmPrismGateway which routes to our custom 'glm' Prism provider - // (chat/completions) instead of the default OpenAI provider (/responses). + // Override all AI SDK drivers to use CachingPrismGateway for provider-aware + // prompt caching (Anthropic ephemeral, Gemini dedicated, OpenAI auto). // Use afterResolving because AiManager is scoped (recreated per job in queue workers). $this->app->afterResolving(AiManager::class, function (AiManager $aiManager, $app) { - $createGlmDriver = function ($app, array $config) { - return new OpenAiProvider( - new GlmPrismGateway($app['events']), - $config, - $app->make(Dispatcher::class) - ); - }; - - $aiManager->extend('glm', $createGlmDriver); - $aiManager->extend('glm-coding', $createGlmDriver); + $gateway = new CachingPrismGateway($app['events']); + $dispatcher = $app->make(Dispatcher::class); + + // Standard drivers — replace PrismGateway with CachingPrismGateway + $standardDrivers = [ + 'anthropic' => AnthropicProvider::class, + 'azure' => AzureOpenAiProvider::class, + 'deepseek' => DeepSeekProvider::class, + 'gemini' => GeminiProvider::class, + 'groq' => GroqProvider::class, + 'mistral' => MistralProvider::class, + 'ollama' => OllamaProvider::class, + 'openai' => OpenAiProvider::class, + 'openrouter' => OpenRouterProvider::class, + 'voyageai' => VoyageAiProvider::class, + 'xai' => XaiProvider::class, + ]; + + foreach ($standardDrivers as $driver => $providerClass) { + $aiManager->extend($driver, fn ($app, array $config) => new $providerClass( + $gateway, $config, $dispatcher, + )); + } + + // Custom relay-backed drivers — use GlmPrismGateway (extends + // CachingPrismGateway) so non-native Prism providers still work. + $glmGateway = new GlmPrismGateway($app['events']); + $createGlmDriver = fn ($app, array $config) => new OpenAiProvider( + $glmGateway, $config, $dispatcher, + ); + + $aiManager->extend('z', $createGlmDriver); + $aiManager->extend('z-api', $createGlmDriver); $aiManager->extend('kimi', $createGlmDriver); $aiManager->extend('kimi-coding', $createGlmDriver); $aiManager->extend('minimax', $createGlmDriver); $aiManager->extend('minimax-cn', $createGlmDriver); - // Register Codex driver (ChatGPT subscription via OAuth) - $aiManager->extend('codex', function ($app, array $config) { - return new OpenAiProvider( - new CodexPrismGateway($app['events']), - $config, - $app->make(Dispatcher::class) - ); - }); + // Codex driver (ChatGPT subscription via OAuth) + $aiManager->extend('codex', fn ($app, array $config) => new OpenAiProvider( + new CodexPrismGateway($app['events']), $config, $dispatcher, + )); }); } diff --git a/app/Services/AgentChatService.php b/app/Services/AgentChatService.php index 4baa669..9d882fa 100644 --- a/app/Services/AgentChatService.php +++ b/app/Services/AgentChatService.php @@ -17,11 +17,11 @@ public function __construct( public function respond(User $agent, string $channelId, string $userMessage): string { - // Parse agent's brain setting (e.g., 'glm-coding:glm-4.7') - $brain = $agent->brain ?? 'glm-coding:glm-4.7'; + // Parse agent's brain setting (e.g., 'z:glm-5.1') + $brain = $agent->brain ?? 'z:glm-5.1'; $parts = explode(':', $brain, 2); $provider = $parts[0]; - $model = $parts[1] ?? 'glm-4.7'; + $model = $parts[1] ?? 'glm-5.1'; // Verify integration is enabled and get config $integration = IntegrationSetting::where('workspace_id', $agent->workspace_id) @@ -61,15 +61,15 @@ public function respond(User $agent, string $channelId, string $userMessage): st $systemPrompt = $this->buildSystemPrompt($agent, $channelId); // Call the configured AI model via HTTP - return $this->callGlmApi($apiKey, $baseUrl, $model, $systemPrompt, $messages); + return $this->callChatCompletionApi($apiKey, $baseUrl, $model, $systemPrompt, $messages); } /** - * Call GLM/Zhipu AI API directly + * Call an OpenAI-compatible chat completions API directly. * * @param array> $messages */ - private function callGlmApi(string $apiKey, string $baseUrl, string $model, string $systemPrompt, array $messages): string + private function callChatCompletionApi(string $apiKey, string $baseUrl, string $model, string $systemPrompt, array $messages): string { // Prepend system message $apiMessages = [ @@ -100,8 +100,8 @@ private function callGlmApi(string $apiKey, string $baseUrl, string $model, stri private function getDefaultUrl(string $provider): string { return match ($provider) { - 'glm' => 'https://open.bigmodel.cn/api/paas/v4', - 'glm-coding' => 'https://api.z.ai/api/coding/paas/v4', + 'z-api' => 'https://open.bigmodel.cn/api/paas/v4', + 'z' => 'https://api.z.ai/api/coding/paas/v4', default => throw new \Exception("Unknown provider: {$provider}"), }; } diff --git a/app/Services/AgentPermissionService.php b/app/Services/AgentPermissionService.php index f48ac0c..2d94e90 100644 --- a/app/Services/AgentPermissionService.php +++ b/app/Services/AgentPermissionService.php @@ -8,7 +8,6 @@ use App\Models\User; use App\Models\WorkspaceFile; use Illuminate\Support\Str; -use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; class AgentPermissionService { @@ -28,9 +27,7 @@ class AgentPermissionService 'contact_agent', ]; - public function __construct( - private ToolProviderRegistry $providerRegistry, - ) {} + public function __construct() {} /** * Resolve the final permission for a tool, combining DB permissions with behavior mode. * @@ -156,7 +153,7 @@ public function getEnabledIntegrations(User $agent): array // Build full list of all integration app names $allApps = \App\Agents\Tools\ToolRegistry::INTEGRATION_APPS; - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { if ($provider->isIntegration() && !in_array($provider->appName(), $allApps)) { $allApps[] = $provider->appName(); } @@ -402,4 +399,18 @@ private function behaviorModeRequiresApproval(User $agent, string $toolType): bo default => false, }; } + + /** + * @return array + */ + private function integrationProviders(): array + { + $registryClass = \OpenCompany\IntegrationCore\Support\ToolProviderRegistry::class; + + if (! class_exists($registryClass) || ! app()->bound($registryClass)) { + return []; + } + + return app($registryClass)->all(); + } } diff --git a/app/Services/IntegrationSettingCredentialResolver.php b/app/Services/IntegrationSettingCredentialResolver.php index 5b49c63..1a5c7db 100644 --- a/app/Services/IntegrationSettingCredentialResolver.php +++ b/app/Services/IntegrationSettingCredentialResolver.php @@ -9,17 +9,30 @@ class IntegrationSettingCredentialResolver implements CredentialResolver { public function get(string $integration, string $key, mixed $default = null, ?string $account = null): mixed { - // OpenCompany uses workspace-scoped settings; account parameter is ignored - // (each workspace has one set of credentials per integration). - $setting = app()->bound('currentWorkspace') - ? IntegrationSetting::forWorkspace()->where('integration_id', $integration)->first() - : IntegrationSetting::where('integration_id', $integration)->first(); + $setting = $this->findSetting($integration, $account); return $setting?->getConfigValue($key, $default) ?? $default; } public function isConfigured(string $integration, ?string $account = null): bool { - return ! empty($this->get($integration, 'api_key')); + return ! empty($this->get($integration, 'api_key', null, $account)); + } + + public function getAccounts(string $integration): array + { + return IntegrationSetting::getAccountsFor($integration); + } + + private function findSetting(string $integration, ?string $account): ?IntegrationSetting + { + $query = app()->bound('currentWorkspace') + ? IntegrationSetting::forWorkspace() + : IntegrationSetting::query(); + + return $query + ->where('integration_id', $integration) + ->forAccount($account) + ->first(); } } diff --git a/app/Services/LuaApiDocGenerator.php b/app/Services/LuaApiDocGenerator.php index f9bc19c..83ff93e 100644 --- a/app/Services/LuaApiDocGenerator.php +++ b/app/Services/LuaApiDocGenerator.php @@ -3,10 +3,9 @@ namespace App\Services; use App\Agents\Tools\ToolRegistry; +use App\Models\McpServer; use App\Models\User; -use OpenCompany\IntegrationCore\Lua\LuaCatalogBuilder; -use OpenCompany\IntegrationCore\Lua\LuaDocRenderer; -use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; +use OpenCompany\IntegrationCore\Contracts\CredentialResolver; class LuaApiDocGenerator { @@ -17,14 +16,17 @@ class LuaApiDocGenerator public function __construct( private ToolRegistry $registry, - private ToolProviderRegistry $providerRegistry, - private LuaCatalogBuilder $catalogBuilder, - private LuaDocRenderer $docRenderer, ) {} public function generateNamespaceIndex(User $agent, ?string $filterNamespace = null): string { - return $this->docRenderer->generateNamespaceIndex( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return $this->getNamespaceSummary($agent); + } + + return $renderer->generateNamespaceIndex( $this->buildNamespaces($agent), $this->getStaticPageContents(), $filterNamespace, @@ -33,7 +35,13 @@ public function generateNamespaceIndex(User $agent, ?string $filterNamespace = n public function generateNamespaceDocs(string $namespace, User $agent): string { - return $this->docRenderer->generateNamespaceDocs( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return $this->getProviderLuaDocs($namespace) ?? "No Lua docs available for namespace '{$namespace}'."; + } + + return $renderer->generateNamespaceDocs( $namespace, $this->buildNamespaces($agent), fn (string $ns) => $this->getProviderLuaDocs($ns), @@ -42,7 +50,13 @@ public function generateNamespaceDocs(string $namespace, User $agent): string public function generateFunctionDocs(string $namespace, string $function, User $agent): string { - return $this->docRenderer->generateFunctionDocs( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return "Lua docs renderer unavailable for {$namespace}.{$function}."; + } + + return $renderer->generateFunctionDocs( $namespace, $function, $this->buildNamespaces($agent), @@ -51,7 +65,13 @@ public function generateFunctionDocs(string $namespace, string $function, User $ public function search(string $query, User $agent, int $limit = 10): string { - return $this->docRenderer->search( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return $this->getNamespaceSummary($agent); + } + + return $renderer->search( $query, $this->buildNamespaces($agent), $this->getStaticPageContents(), @@ -68,10 +88,39 @@ private function buildNamespaces(User $agent): array return $this->cachedNamespaces; } - $this->cachedNamespaces = $this->catalogBuilder->buildNamespaces( - $this->registry->getToolCatalog($agent), - ['tasks', 'system', 'lua'], - ); + $builder = $this->catalogBuilder(); + $catalog = $this->registry->getToolCatalog($agent); + + if (app()->bound(CredentialResolver::class)) { + $credentialResolver = app(CredentialResolver::class); + + // Inject account aliases for multi-account integrations and MCP servers. + foreach ($catalog as &$app) { + $appName = $app['name'] ?? ''; + if ($appName === '' || empty($app['isIntegration'])) { + continue; + } + + if (str_starts_with($appName, 'mcp_')) { + $mcpSlug = substr($appName, 4); + $accounts = McpServer::getAccountsFor($mcpSlug); + } else { + $accounts = $credentialResolver->getAccounts($appName); + } + + if ($accounts !== []) { + $app['accounts'] = $accounts; + } + } + unset($app); + } + + $this->cachedNamespaces = $builder !== null + ? $builder->buildNamespaces( + $catalog, + ['tasks', 'system', 'lua'], + ) + : []; $this->cachedAgent = $agent; return $this->cachedNamespaces; @@ -82,7 +131,11 @@ private function buildNamespaces(User $agent): array */ public function buildFunctionMap(User $agent): array { - return $this->catalogBuilder->buildFunctionMap($this->buildNamespaces($agent)); + $builder = $this->catalogBuilder(); + + return $builder !== null + ? $builder->buildFunctionMap($this->buildNamespaces($agent)) + : []; } /** @@ -90,7 +143,23 @@ public function buildFunctionMap(User $agent): array */ public function buildParameterMap(User $agent): array { - return $this->catalogBuilder->buildParameterMap($this->buildNamespaces($agent)); + $builder = $this->catalogBuilder(); + + return $builder !== null + ? $builder->buildParameterMap($this->buildNamespaces($agent)) + : []; + } + + /** + * @return array path => accountAlias (only for multi-account function paths) + */ + public function buildAccountMap(User $agent): array + { + $builder = $this->catalogBuilder(); + + return $builder !== null + ? $builder->buildAccountMap($this->buildNamespaces($agent)) + : []; } /** @@ -98,7 +167,13 @@ public function buildParameterMap(User $agent): array */ public function getAvailablePages(User $agent): array { - return $this->docRenderer->getAvailablePages( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return array_keys($this->getStaticPageContents()); + } + + return $renderer->getAvailablePages( $this->buildNamespaces($agent), $this->getStaticPageContents(), ); @@ -110,11 +185,22 @@ public function getAvailablePages(User $agent): array */ private function getProviderLuaDocs(string $namespace): ?string { + $providerRegistry = $this->providerRegistry(); + + if ($providerRegistry === null) { + return null; + } + $appName = str_starts_with($namespace, 'integrations.') ? substr($namespace, strlen('integrations.')) : $namespace; - $provider = $this->providerRegistry->get($appName); + // Strip account segment for multi-account namespaces (e.g., "clickup.work" → "clickup") + if ($providerRegistry->get($appName) === null && str_contains($appName, '.')) { + $appName = explode('.', $appName, 2)[0]; + } + + $provider = $providerRegistry->get($appName); if ($provider === null) { return null; } @@ -173,7 +259,19 @@ public function getStaticDocsForCatalog(): array public function getNamespaceSummary(User $agent): string { - return $this->docRenderer->getNamespaceSummary($this->buildNamespaces($agent)); + $renderer = $this->docRenderer(); + + if ($renderer === null) { + $namespaces = array_keys($this->buildNamespaces($agent)); + + if ($namespaces === []) { + return 'No external Lua API namespaces are available in this workspace.'; + } + + return "Available Lua namespaces:\n- " . implode("\n- ", $namespaces); + } + + return $renderer->getNamespaceSummary($this->buildNamespaces($agent)); } /** @@ -231,6 +329,43 @@ public function readStaticPage(string $slug): ?string */ private function deriveFunctionName(string $toolName, string $appName): string { - return $this->catalogBuilder->deriveFunctionName($toolName, $appName); + $builder = $this->catalogBuilder(); + + return $builder !== null + ? $builder->deriveFunctionName($toolName, $appName) + : $toolName; + } + + private function providerRegistry(): ?object + { + $class = \OpenCompany\IntegrationCore\Support\ToolProviderRegistry::class; + + if (! class_exists($class) || ! app()->bound($class)) { + return null; + } + + return app($class); + } + + private function catalogBuilder(): ?object + { + $class = \OpenCompany\IntegrationCore\Lua\LuaCatalogBuilder::class; + + if (! class_exists($class) || ! app()->bound($class)) { + return null; + } + + return app($class); + } + + private function docRenderer(): ?object + { + $class = \OpenCompany\IntegrationCore\Lua\LuaDocRenderer::class; + + if (! class_exists($class) || ! app()->bound($class)) { + return null; + } + + return app($class); } } diff --git a/app/Services/LuaBridge.php b/app/Services/LuaBridge.php index f982786..b732666 100644 --- a/app/Services/LuaBridge.php +++ b/app/Services/LuaBridge.php @@ -19,6 +19,7 @@ public function __construct( $docGenerator->buildFunctionMap($agent), $docGenerator->buildParameterMap($agent), new OpenCompanyLuaToolInvoker($agent, $registry), + $docGenerator->buildAccountMap($agent), ); } diff --git a/app/Services/LuaSandboxService.php b/app/Services/LuaSandboxService.php index 7089ca7..b8962e4 100644 --- a/app/Services/LuaSandboxService.php +++ b/app/Services/LuaSandboxService.php @@ -36,6 +36,8 @@ public function execute(string $code, array $options = [], ?LuaBridge $bridge = $sandbox->load("{$name} = " . $this->phpToLua($value))->call(); } + $this->registerJsonGlobals($sandbox); + $start = microtime(true); try { @@ -224,4 +226,90 @@ private function setupAppNamespace(Sandbox $sandbox, LuaBridge $bridge): void app = make_namespace("") ')->call(); } + + /** + * Register `json.decode()`, `json.encode()`, and `regex.*` as Lua globals. + * + * JSON bridges PHP's json_decode/json_encode so Lua scripts can parse + * JSON strings. Regex bridges PHP's PCRE for patterns Lua's built-in + * matching doesn't support (lookaheads, non-greedy, Unicode, etc.). + */ + private function registerJsonGlobals(Sandbox $sandbox): void + { + $sandbox->register('__json', [ + 'decode' => function (string $json): mixed { + return json_decode($json, associative: true, depth: 512, flags: JSON_THROW_ON_ERROR); + }, + 'encode' => function (mixed $value): string { + return json_encode($value, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT); + }, + ]); + + $sandbox->register('__regex', [ + 'match' => function (string $subject, string $pattern, int $flags = 0): mixed { + if (preg_match($pattern, $subject, $matches, $flags) === 1) { + return $matches; + } + + return null; + }, + 'match_all' => function (string $subject, string $pattern, int $flags = PREG_PATTERN_ORDER): array { + if (preg_match_all($pattern, $subject, $matches, $flags) > 0) { + return $matches; + } + + return []; + }, + 'gsub' => function (string $subject, string $pattern, string $replacement, int $limit = -1): string { + return preg_replace($pattern, $replacement, $subject, $limit) ?? $subject; + }, + ]); + + $sandbox->load(' + json = { + decode = function(s) + if type(s) ~= "string" then + error("json.decode: expected string, got " .. type(s), 2) + end + return __json.decode(s) + end, + encode = function(v) + return __json.encode(v) + end + } + + regex = { + match = function(subject, pattern, flags) + if type(subject) ~= "string" then + error("regex.match: expected string subject, got " .. type(subject), 2) + end + if type(pattern) ~= "string" then + error("regex.match: expected string pattern, got " .. type(pattern), 2) + end + return __regex.match(subject, pattern, flags or 0) + end, + match_all = function(subject, pattern, flags) + if type(subject) ~= "string" then + error("regex.match_all: expected string subject, got " .. type(subject), 2) + end + if type(pattern) ~= "string" then + error("regex.match_all: expected string pattern, got " .. type(pattern), 2) + end + return __regex.match_all(subject, pattern, flags or 0) + end, + gsub = function(subject, pattern, replacement, limit) + if type(subject) ~= "string" then + error("regex.gsub: expected string subject, got " .. type(subject), 2) + end + if type(pattern) ~= "string" then + error("regex.gsub: expected string pattern, got " .. type(pattern), 2) + end + if type(replacement) ~= "string" then + error("regex.gsub: expected string replacement, got " .. type(replacement), 2) + end + return __regex.gsub(subject, pattern, replacement, limit or -1) + end, + } + ')->call(); + } } diff --git a/app/Services/Mcp/McpServerRegistrar.php b/app/Services/Mcp/McpServerRegistrar.php index 33d5808..4e0b5b1 100644 --- a/app/Services/Mcp/McpServerRegistrar.php +++ b/app/Services/Mcp/McpServerRegistrar.php @@ -9,6 +9,10 @@ class McpServerRegistrar { /** * Register all enabled MCP servers as ToolProviders in the registry. + * + * Servers sharing the same slug are grouped: the default (account_alias='') + * provides the canonical tool definitions, and additional accounts are + * registered on the same provider for multi-account namespace support. */ public static function registerAll(ToolProviderRegistry $registry): void { @@ -17,12 +21,40 @@ public static function registerAll(ToolProviderRegistry $registry): void ->whereNotNull('discovered_tools') ->get(); } catch (\Throwable) { - // Table may not exist yet (fresh install / migration pending) return; } + // Group by slug — default account first + $grouped = []; foreach ($servers as $server) { - $registry->register(new McpToolProvider($server)); + $grouped[$server->slug][] = $server; + } + + foreach ($grouped as $slug => $group) { + // Find the default server (account_alias = '') + $default = null; + $accounts = []; + + foreach ($group as $server) { + if ($server->account_alias === '' || $server->account_alias === null) { + $default = $server; + } else { + $accounts[$server->account_alias] = $server; + } + } + + // Fall back to first server if no explicit default + if ($default === null) { + $default = $group[0]; + } + + $provider = new McpToolProvider($default); + + foreach ($accounts as $alias => $accountServer) { + $provider->addAccountServer($alias, $accountServer); + } + + $registry->register($provider); } } } diff --git a/app/Services/Mcp/McpToolProvider.php b/app/Services/Mcp/McpToolProvider.php index 0285c89..cb6e433 100644 --- a/app/Services/Mcp/McpToolProvider.php +++ b/app/Services/Mcp/McpToolProvider.php @@ -9,10 +9,21 @@ class McpToolProvider implements ToolProvider { + /** @var array account_alias => server */ + private array $accountServers = []; + public function __construct( private McpServer $server, ) {} + /** + * Register an additional account server for this provider. + */ + public function addAccountServer(string $account, McpServer $server): void + { + $this->accountServers[$account] = $server; + } + public function appName(): string { return 'mcp_' . $this->server->slug; @@ -57,29 +68,48 @@ public function isIntegration(): bool /** @param array $context */ public function createTool(string $class, array $context = []): Tool { + $account = $context['account'] ?? null; + $server = $this->resolveServer($account); + $toolSlug = $context['tool_slug'] ?? ''; $mcpToolName = $this->mcpToolNameFromSlug($toolSlug); - $mcpToolDef = $this->findToolDef($mcpToolName); + $mcpToolDef = $this->findToolDef($mcpToolName, $server); return new McpProxyTool( - server: $this->server, + server: $server, mcpToolName: $mcpToolName, mcpToolDescription: $mcpToolDef['description'] ?? '', mcpInputSchema: $mcpToolDef['inputSchema'] ?? [], ); } + public function luaDocsPath(): ?string + { + return null; + } + + public function credentialFields(): array + { + return []; + } + /** - * Build tool slug: mcp_{server_slug}__{tool_name_snake} + * Resolve the server for the given account alias. */ + private function resolveServer(?string $account): McpServer + { + if ($account !== null && $account !== '' && isset($this->accountServers[$account])) { + return $this->accountServers[$account]; + } + + return $this->server; + } + private function toolSlug(string $mcpToolName): string { return 'mcp_' . $this->server->slug . '__' . Str::snake($mcpToolName); } - /** - * Extract MCP tool name from slug. - */ private function mcpToolNameFromSlug(string $slug): string { $prefix = 'mcp_' . $this->server->slug . '__'; @@ -92,23 +122,13 @@ private function mcpToolNameFromSlug(string $slug): string } /** - * Find a tool definition by MCP tool name from cached discovered_tools. - * * @return array */ - public function luaDocsPath(): ?string - { - return null; - } - - public function credentialFields(): array + private function findToolDef(string $mcpToolName, ?McpServer $server = null): array { - return []; // MCP servers handle their own credentials - } + $tools = ($server ?? $this->server)->discovered_tools ?? []; - private function findToolDef(string $mcpToolName): array - { - foreach ($this->server->discovered_tools ?? [] as $tool) { + foreach ($tools as $tool) { if (Str::snake($tool['name']) === $mcpToolName || $tool['name'] === $mcpToolName) { return $tool; } diff --git a/app/Services/Memory/CompactionMemoryExtractor.php b/app/Services/Memory/CompactionMemoryExtractor.php new file mode 100644 index 0000000..ff2a8ba --- /dev/null +++ b/app/Services/Memory/CompactionMemoryExtractor.php @@ -0,0 +1,41 @@ +extractSectionBullets($summary, 'Durable Facts'), + $this->extractSectionBullets($summary, 'Decisions'), + ); + + return array_values(array_unique(array_filter(array_map( + fn (string $item): string => trim(preg_replace('/\s+/', ' ', $item) ?? ''), + $items, + )))); + } + + /** + * @return string[] + */ + private function extractSectionBullets(string $summary, string $heading): array + { + $pattern = sprintf( + '/^##\s+%s\s*$([\s\S]*?)(?=^##\s+|\z)/mi', + preg_quote($heading, '/'), + ); + + if (! preg_match($pattern, $summary, $matches)) { + return []; + } + + preg_match_all('/^\-\s+(.*)$/m', trim($matches[1]), $bullets); + + return $bullets[1] ?? []; + } +} diff --git a/app/Services/Memory/CompactionPlan.php b/app/Services/Memory/CompactionPlan.php new file mode 100644 index 0000000..9efa40c --- /dev/null +++ b/app/Services/Memory/CompactionPlan.php @@ -0,0 +1,25 @@ + $messagesToSummarize + * @param Collection $messagesToKeep + */ + public function __construct( + public readonly Collection $messagesToSummarize, + public readonly Collection $messagesToKeep, + public readonly int $splitIndex, + public readonly int $tokensToSummarize, + public readonly int $tokensToKeep, + ) {} + + public function lastSummarizedMessageId(): ?string + { + return $this->messagesToSummarize->last()?->id; + } +} diff --git a/app/Services/Memory/ContextBudget.php b/app/Services/Memory/ContextBudget.php new file mode 100644 index 0000000..0d1cad8 --- /dev/null +++ b/app/Services/Memory/ContextBudget.php @@ -0,0 +1,92 @@ + $messages + * @return array + */ + public function snapshotForAgent(User $agent, iterable $messages, ?string $systemPrompt = null): array + { + $resolved = $this->providerResolver->resolve($agent); + + return $this->snapshot( + $resolved['provider'], + $resolved['model'], + $this->estimateMessagesTokens($messages), + $systemPrompt, + ); + } + + /** + * @return array + */ + public function snapshot( + string $provider, + string $model, + int $messageTokens, + ?string $systemPrompt = null, + ): array { + $contextWindow = $this->contextRegistry->getContextWindow($model, $provider); + $systemTokens = $systemPrompt !== null + ? TokenEstimator::estimate($systemPrompt) + : config('memory.compaction.system_prompt_fallback_reserve', 10_000); + $outputReserve = (int) config('memory.compaction.output_reserve', 4_096); + $effectiveWindow = max(1, $contextWindow - $systemTokens - $outputReserve); + $safetyMargin = (float) config('memory.compaction.safety_margin', 1.2); + $adjustedMessageTokens = (int) ceil($messageTokens * $safetyMargin); + $warningThreshold = (int) floor($effectiveWindow * (float) config('memory.budget.warning_ratio', 0.65)); + $compactionThreshold = (int) floor($effectiveWindow * (float) config('memory.compaction.threshold_ratio', 0.75)); + $flushThreshold = max(1, $compactionThreshold - (int) config('memory.memory_flush.soft_threshold_tokens', 4_000)); + $blockingThreshold = max( + $compactionThreshold, + $effectiveWindow - (int) config('memory.budget.blocking_margin_tokens', 1_024), + ); + $percentLeft = max(0, (int) round((($effectiveWindow - $adjustedMessageTokens) / $effectiveWindow) * 100)); + + return [ + 'provider' => $provider, + 'model' => $model, + 'context_window' => $contextWindow, + 'system_tokens' => $systemTokens, + 'output_reserve' => $outputReserve, + 'effective_window' => $effectiveWindow, + 'raw_message_tokens' => $messageTokens, + 'adjusted_message_tokens' => $adjustedMessageTokens, + 'safety_margin' => $safetyMargin, + 'warning_threshold' => $warningThreshold, + 'flush_threshold' => $flushThreshold, + 'compaction_threshold' => $compactionThreshold, + 'blocking_threshold' => $blockingThreshold, + 'percent_left' => $percentLeft, + 'is_above_warning' => $adjustedMessageTokens >= $warningThreshold, + 'is_above_flush' => $adjustedMessageTokens >= $flushThreshold, + 'is_above_compaction' => $adjustedMessageTokens > $compactionThreshold, + 'is_at_blocking_limit' => $adjustedMessageTokens >= $blockingThreshold, + ]; + } + + /** + * @param iterable $messages + */ + public function estimateMessagesTokens(iterable $messages): int + { + $total = 0; + + foreach ($messages as $message) { + $total += TokenEstimator::estimate((string) ($message->content ?? '')); + } + + return $total; + } +} diff --git a/app/Services/Memory/ContextPruner.php b/app/Services/Memory/ContextPruner.php new file mode 100644 index 0000000..94d633e --- /dev/null +++ b/app/Services/Memory/ContextPruner.php @@ -0,0 +1,143 @@ + $messages + * @return array{messages: array, pruned_results: int, estimated_tokens_saved: int} + */ + public function prune(array $messages): array + { + if (! config('memory.pruning.enabled', true)) { + return [ + 'messages' => $messages, + 'pruned_results' => 0, + 'estimated_tokens_saved' => 0, + ]; + } + + $candidates = []; + + foreach ($messages as $index => $message) { + if (! $message instanceof ToolResultMessage) { + continue; + } + + $savings = 0; + $eligible = true; + + foreach ($message->toolResults as $toolResult) { + $toolType = $this->toolRegistry->getToolTypeBySlug($toolResult->name); + + if ($toolType !== 'read') { + $eligible = false; + break; + } + + $serialized = $this->serializeResult($toolResult->result); + if ($serialized === null) { + $eligible = false; + break; + } + + $tokens = TokenEstimator::estimate($serialized); + if ($tokens < (int) config('memory.pruning.min_result_tokens', 400)) { + $eligible = false; + break; + } + + $placeholderTokens = TokenEstimator::estimate($this->placeholder($toolResult->name)); + $savings += max(0, $tokens - $placeholderTokens); + } + + if (! $eligible || $savings <= 0) { + continue; + } + + $candidates[] = [ + 'index' => $index, + 'tokens_saved' => $savings, + 'message' => $message, + ]; + } + + $keepRecent = (int) config('memory.pruning.keep_recent_read_results', 2); + if (count($candidates) <= $keepRecent) { + return [ + 'messages' => $messages, + 'pruned_results' => 0, + 'estimated_tokens_saved' => 0, + ]; + } + + $prunable = array_slice($candidates, 0, max(0, count($candidates) - $keepRecent)); + $tokensSaved = array_sum(array_column($prunable, 'tokens_saved')); + + if ($tokensSaved < (int) config('memory.pruning.min_total_saved_tokens', 1_000)) { + return [ + 'messages' => $messages, + 'pruned_results' => 0, + 'estimated_tokens_saved' => 0, + ]; + } + + $prunedResults = 0; + + foreach ($prunable as $candidate) { + /** @var ToolResultMessage $toolResultMessage */ + $toolResultMessage = $candidate['message']; + + $messages[$candidate['index']] = new ToolResultMessage( + $toolResultMessage->toolResults->map(function (ToolResult $toolResult) use (&$prunedResults) { + $prunedResults++; + + return new ToolResult( + id: $toolResult->id, + name: $toolResult->name, + arguments: $toolResult->arguments, + result: $this->placeholder($toolResult->name), + resultId: $toolResult->resultId, + ); + }) + ); + } + + return [ + 'messages' => $messages, + 'pruned_results' => $prunedResults, + 'estimated_tokens_saved' => $tokensSaved, + ]; + } + + private function placeholder(string $toolName): string + { + return "[Earlier {$toolName} read result omitted from retry context. Re-run the tool if you still need the full output.]"; + } + + private function serializeResult(mixed $result): ?string + { + if (is_string($result)) { + return trim($result) !== '' ? $result : null; + } + + if (is_array($result)) { + $encoded = json_encode($result, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + + return $encoded !== false && $encoded !== '[]' ? $encoded : null; + } + + return null; + } +} diff --git a/app/Services/Memory/ConversationCompactionService.php b/app/Services/Memory/ConversationCompactionService.php index 923f102..a645878 100644 --- a/app/Services/Memory/ConversationCompactionService.php +++ b/app/Services/Memory/ConversationCompactionService.php @@ -7,7 +7,9 @@ use App\Models\ConversationSummary; use App\Models\Message; use App\Models\User; +use App\Services\AgentDocumentService; use Illuminate\Support\Facades\Log; +use Illuminate\Support\Str; use Laravel\Ai\Messages\AssistantMessage; use Laravel\Ai\Messages\UserMessage; use Prism\Prism\Facades\Prism; @@ -15,8 +17,11 @@ class ConversationCompactionService { public function __construct( - private ModelContextRegistry $contextRegistry, + private ContextBudget $contextBudget, private DynamicProviderResolver $providerResolver, + private CompactionMemoryExtractor $memoryExtractor, + private AgentDocumentService $documentService, + private DocumentIndexingService $documentIndexingService, ) {} /** @@ -33,27 +38,16 @@ public function needsCompaction(string $channelId, User $agent, iterable $messag } try { - $resolved = $this->providerResolver->resolve($agent); - $contextWindow = $this->contextRegistry->getContextWindow($resolved['model']); - } catch (\Throwable) { - return false; - } - - $systemTokens = $systemPrompt - ? $this->estimateTokenCount($systemPrompt) - : config('memory.compaction.system_prompt_fallback_reserve', 10_000); - $outputReserve = config('memory.compaction.output_reserve', 4_096); - $available = $contextWindow - $systemTokens - $outputReserve; + if ($this->isCircuitOpen($channelId, $agent)) { + return false; + } - if ($available <= 0) { + $budget = $this->contextBudget->snapshotForAgent($agent, $messages, $systemPrompt); + } catch (\Throwable) { return false; } - $messageTokens = $this->estimateMessagesTokens($messages); - $safetyMargin = config('memory.compaction.safety_margin', 1.2); - $threshold = $available * config('memory.compaction.threshold_ratio', 0.75); - - return ($messageTokens * $safetyMargin) > $threshold; + return (bool) $budget['is_above_compaction']; } /** @@ -65,6 +59,16 @@ public function compact(string $channelId, User $agent): ?ConversationSummary ->where('agent_id', $agent->id) ->first(); + if ($existing?->compaction_circuit_open_until?->isFuture()) { + Log::warning('Skipping compaction while circuit is open', [ + 'channel_id' => $channelId, + 'agent' => $agent->name, + 'open_until' => $existing->compaction_circuit_open_until?->toIso8601String(), + ]); + + return null; + } + // Only load messages after the previous compaction point $query = Message::where('channel_id', $channelId) ->orderBy('created_at', 'asc'); @@ -82,32 +86,16 @@ public function compact(string $channelId, User $agent): ?ConversationSummary return null; } - $keepRecentTokens = config('memory.compaction.keep_recent_tokens', 20_000); - $minKeep = config('memory.compaction.min_keep_messages', 3); - - // Walk from newest to oldest, accumulating tokens until budget is exceeded - $keptTokens = 0; - $splitIndex = 0; - for ($i = $messages->count() - 1; $i >= 0; $i--) { - $msgTokens = $this->estimateTokenCount($messages[$i]->content ?? ''); - if ($keptTokens + $msgTokens > $keepRecentTokens - && ($messages->count() - $i - 1) >= $minKeep) { - $splitIndex = $i + 1; - break; - } - $keptTokens += $msgTokens; - } - - if ($splitIndex <= 0) { + $plan = $this->buildPlan($messages); + if ($plan === null) { return null; } - $toSummarize = $messages->slice(0, $splitIndex)->values(); $previousSummary = $existing->summary ?? ''; // Build SDK messages for summarization $sdkMessages = []; - foreach ($toSummarize as $msg) { + foreach ($plan->messagesToSummarize as $msg) { if (empty($msg->content)) { continue; } @@ -119,8 +107,21 @@ public function compact(string $channelId, User $agent): ?ConversationSummary } } - $summaryText = $this->summarize($sdkMessages, $previousSummary); - $tokensBefore = $this->estimateMessagesTokens($sdkMessages); + try { + $summaryText = $this->summarize($sdkMessages, $previousSummary, $plan); + } catch (\Throwable $e) { + $this->recordFailure($channelId, $agent, $existing, $e); + + Log::error('Conversation summarization failed', [ + 'channel_id' => $channelId, + 'agent' => $agent->name, + 'error' => $e->getMessage(), + ]); + + return null; + } + + $tokensBefore = $plan->tokensToSummarize; $summary = ConversationSummary::updateOrCreate( ['channel_id' => $channelId, 'agent_id' => $agent->id], @@ -130,11 +131,17 @@ public function compact(string $channelId, User $agent): ?ConversationSummary 'tokens_after' => $this->estimateTokenCount($summaryText), 'compaction_count' => ($existing->compaction_count ?? 0) + 1, 'flush_count' => 0, // Reset for new compaction cycle + 'compaction_failure_count' => 0, + 'last_compaction_failed_at' => null, + 'compaction_circuit_open_until' => null, + 'last_compaction_error' => null, 'messages_summarized' => ($existing->messages_summarized ?? 0) + count($sdkMessages), - 'last_message_id' => $toSummarize->last()->id ?? $existing->last_message_id, + 'last_message_id' => $plan->lastSummarizedMessageId() ?? $existing->last_message_id, ] ); + $this->extractDurableMemories($agent, $summaryText); + Log::info('Conversation compacted', [ 'channel_id' => $channelId, 'agent' => $agent->name, @@ -142,6 +149,8 @@ public function compact(string $channelId, User $agent): ?ConversationSummary 'tokens_before' => $tokensBefore, 'tokens_after' => $summary->tokens_after, 'compaction_count' => $summary->compaction_count, + 'split_index' => $plan->splitIndex, + 'tokens_kept' => $plan->tokensToKeep, ]); return $summary; @@ -152,14 +161,19 @@ public function compact(string $channelId, User $agent): ?ConversationSummary * * @param array $messages */ - private function summarize(array $messages, string $previousSummary): string + private function summarize(array $messages, string $previousSummary, CompactionPlan $plan): string { - $prompt = "You are summarizing a conversation for an AI agent's context window.\n\n"; + $prompt = "You are summarizing older OpenCompany conversation history for later retrieval.\n\n"; if ($previousSummary) { $prompt .= "Previous summary of even older messages:\n{$previousSummary}\n\n"; } + $prompt .= "Compaction plan:\n"; + $prompt .= "- Messages being summarized: {$plan->messagesToSummarize->count()}\n"; + $prompt .= "- Messages kept verbatim after the split: {$plan->messagesToKeep->count()}\n"; + $prompt .= "- Tokens kept verbatim: {$plan->tokensToKeep}\n\n"; + $prompt .= "Messages to summarize:\n"; foreach ($messages as $msg) { $role = $msg instanceof AssistantMessage ? 'assistant' : 'user'; @@ -167,10 +181,14 @@ private function summarize(array $messages, string $previousSummary): string $prompt .= "[{$role}]: {$content}\n"; } - $prompt .= "\nCreate a concise summary that captures:\n"; - $prompt .= "- Key topics discussed\n- Decisions made\n- Action items\n- Important context\n"; - $prompt .= "- User preferences expressed\n\n"; - $prompt .= "Be factual and specific. Preserve names, dates, and technical details."; + $prompt .= "\nReturn markdown with these exact headings:\n"; + $prompt .= "## Objectives\n## Decisions\n## Open Work\n## Durable Facts\n## References\n\n"; + $prompt .= "Rules:\n"; + $prompt .= "- Use short bullet lists under every heading.\n"; + $prompt .= "- Include names, dates, tool outputs, IDs, and file paths when they matter.\n"; + $prompt .= "- Put reusable preferences, standing decisions, and durable facts under Durable Facts.\n"; + $prompt .= "- If a section has nothing important, write a single bullet: - none\n"; + $prompt .= "- Do not invent anything.\n"; [$provider, $model] = AppSetting::resolveProviderModel( 'memory_summary_model', 'memory.compaction.summary_model' @@ -191,8 +209,117 @@ private function summarize(array $messages, string $previousSummary): string return $response->text; } catch (\Throwable $e) { - Log::error('Conversation summarization failed', ['error' => $e->getMessage()]); - return $previousSummary ?: '[Summary generation failed]'; + throw $e; + } + } + + /** + * @param \Illuminate\Support\Collection $messages + */ + private function buildPlan(\Illuminate\Support\Collection $messages): ?CompactionPlan + { + $keepRecentTokens = (int) config('memory.compaction.keep_recent_tokens', 20_000); + $minKeep = (int) config('memory.compaction.min_keep_messages', 3); + $keptTokens = 0; + $splitIndex = 0; + + for ($i = $messages->count() - 1; $i >= 0; $i--) { + $msgTokens = $this->estimateTokenCount((string) ($messages[$i]->content ?? '')); + + if ($keptTokens + $msgTokens > $keepRecentTokens + && ($messages->count() - $i - 1) >= $minKeep) { + $splitIndex = $i + 1; + break; + } + + $keptTokens += $msgTokens; + } + + if ($splitIndex <= 0) { + return null; + } + + $toSummarize = $messages->slice(0, $splitIndex)->values(); + $toKeep = $messages->slice($splitIndex)->values(); + + return new CompactionPlan( + messagesToSummarize: $toSummarize, + messagesToKeep: $toKeep, + splitIndex: $splitIndex, + tokensToSummarize: $this->estimateMessagesTokens($toSummarize), + tokensToKeep: $this->estimateMessagesTokens($toKeep), + ); + } + + private function isCircuitOpen(string $channelId, User $agent): bool + { + $summary = ConversationSummary::where('channel_id', $channelId) + ->where('agent_id', $agent->id) + ->first(); + + return $summary?->compaction_circuit_open_until?->isFuture() ?? false; + } + + private function recordFailure(string $channelId, User $agent, ?ConversationSummary $existing, \Throwable $error): void + { + $failureCount = ($existing?->compaction_failure_count ?? 0) + 1; + $tripAfter = (int) config('memory.compaction.circuit_breaker.after_failures', 3); + $cooldownMinutes = (int) config('memory.compaction.circuit_breaker.cooldown_minutes', 30); + + ConversationSummary::updateOrCreate( + ['channel_id' => $channelId, 'agent_id' => $agent->id], + [ + 'workspace_id' => $agent->workspace_id ?? workspace()->id, + 'summary' => $existing->summary ?? '', + 'tokens_before' => $existing->tokens_before ?? 0, + 'tokens_after' => $existing->tokens_after ?? 0, + 'compaction_count' => $existing->compaction_count ?? 0, + 'flush_count' => $existing->flush_count ?? 0, + 'messages_summarized' => $existing->messages_summarized ?? 0, + 'last_message_id' => $existing?->last_message_id, + 'compaction_failure_count' => $failureCount, + 'last_compaction_failed_at' => now(), + 'compaction_circuit_open_until' => $failureCount >= $tripAfter + ? now()->addMinutes($cooldownMinutes) + : null, + 'last_compaction_error' => Str::limit($error->getMessage(), 4_000), + ] + ); + } + + private function extractDurableMemories(User $agent, string $summary): void + { + if (! config('memory.compaction.memory_extraction.enabled', true)) { + return; + } + + $items = array_slice( + $this->memoryExtractor->extract($summary), + 0, + (int) config('memory.compaction.memory_extraction.max_items', 8), + ); + + if ($items === []) { + return; + } + + $entry = "### [compaction] " . now()->format('H:i') . "\n\n"; + $entry .= implode("\n", array_map( + fn (string $item): string => "- {$item}", + $items, + )); + + try { + $document = $this->documentService->createMemoryLog($agent, $entry); + + if ($document !== null) { + $this->documentIndexingService->index($document, 'memory', $agent->id); + } + } catch (\Throwable $e) { + Log::warning('Failed to persist extracted compaction memories', [ + 'agent' => $agent->name, + 'error' => $e->getMessage(), + ]); } } diff --git a/app/Services/Memory/MemoryFlushService.php b/app/Services/Memory/MemoryFlushService.php index 5aff2a0..4b6d8a5 100644 --- a/app/Services/Memory/MemoryFlushService.php +++ b/app/Services/Memory/MemoryFlushService.php @@ -3,17 +3,15 @@ namespace App\Services\Memory; use App\Agents\OpenCompanyAgent; -use App\Agents\Providers\DynamicProviderResolver; use App\Models\ConversationSummary; use App\Models\User; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; class MemoryFlushService { public function __construct( - private ConversationCompactionService $compactionService, - private ModelContextRegistry $contextRegistry, - private DynamicProviderResolver $providerResolver, + private ContextBudget $contextBudget, ) {} /** @@ -41,40 +39,13 @@ public function shouldFlush(string $channelId, User $agent, iterable $messages, return false; } - // Resolve model context window try { - $resolved = $this->providerResolver->resolve($agent); - $contextWindow = $this->contextRegistry->getContextWindow($resolved['model']); + $budget = $this->contextBudget->snapshotForAgent($agent, $messages, $systemPrompt); } catch (\Throwable) { return false; } - // Calculate available context (same logic as compaction) - $systemTokens = $systemPrompt - ? $this->compactionService->estimateTokenCount($systemPrompt) - : config('memory.compaction.system_prompt_fallback_reserve', 10_000); - $outputReserve = config('memory.compaction.output_reserve', 4_096); - $available = $contextWindow - $systemTokens - $outputReserve; - - if ($available <= 0) { - return false; - } - - // Estimate message tokens with safety margin - $messageTokens = 0; - foreach ($messages as $msg) { - $content = $msg->content ?? ''; - $messageTokens += $this->compactionService->estimateTokenCount($content); - } - - $safetyMargin = config('memory.compaction.safety_margin', 1.2); - $adjustedTokens = (int) ($messageTokens * $safetyMargin); - $compactionThreshold = (int) ($available * config('memory.compaction.threshold_ratio', 0.75)); - $softThresholdTokens = config('memory.memory_flush.soft_threshold_tokens', 4000); - $softZoneStart = $compactionThreshold - $softThresholdTokens; - - // Flush when context is within the soft zone (approaching compaction but not yet exceeding it) - return $adjustedTokens > $softZoneStart && $adjustedTokens <= $compactionThreshold; + return (bool) $budget['is_above_flush'] && ! (bool) $budget['is_above_compaction']; } /** @@ -87,6 +58,9 @@ public function shouldFlush(string $channelId, User $agent, iterable $messages, public function flush(string $channelId, User $agent): void { $agentInstance = OpenCompanyAgent::for($agent, $channelId); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); $agentInstance->prompt($this->buildFlushPrompt()); // Increment flush count (create summary record if needed) diff --git a/app/Services/Memory/ModelContextRegistry.php b/app/Services/Memory/ModelContextRegistry.php index 987a4a6..8a540c9 100644 --- a/app/Services/Memory/ModelContextRegistry.php +++ b/app/Services/Memory/ModelContextRegistry.php @@ -4,6 +4,7 @@ use App\Models\AppSetting; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Meta\ProviderMeta; class ModelContextRegistry { @@ -12,31 +13,34 @@ class ModelContextRegistry */ private const LEVENSHTEIN_MAX_DISTANCE = 5; + public function __construct( + private ProviderMeta $providerMeta, + ) {} + /** * Get the context window size (in tokens) for a given model. * * Lookup order: * 1. User overrides from AppSetting (admin-configurable) - * 2. Built-in registry: exact match, then longest prefix match - * 3. Levenshtein fuzzy match (closest known model within distance threshold) - * 4. Default (conservative 32K) + * 2. prism-relay provider metadata when a provider is known + * 3. Local fallback registry: exact match, then longest prefix match + * 4. Levenshtein fuzzy match (closest known model within distance threshold) + * 5. Default (conservative 32K) */ - public function getContextWindow(string $model): int + public function getContextWindow(string $model, ?string $provider = null): int { $overrides = $this->getUserOverrides(); - $builtIn = config('memory.context_windows.models', []); + $exactOverride = $this->exactOverride($overrides, $model, $provider); - // 1. User overrides — exact match takes highest priority - if (isset($overrides[$model])) { - return (int) $overrides[$model]; + if ($exactOverride !== null) { + return $exactOverride; } - // 2a. Built-in exact match - if (isset($builtIn[$model])) { - return $builtIn[$model]; + if ($provider !== null && $this->providerMeta->has($provider)) { + return $this->providerMeta->contextWindow($provider, $model); } - // 2b. Longest prefix match across both built-in and user overrides + $builtIn = config('memory.context_windows.models', []); $allModels = array_merge($builtIn, $overrides); $prefixResult = $this->longestPrefixMatch($model, $allModels); if ($prefixResult !== null) { @@ -126,4 +130,20 @@ private function getUserOverrides(): array return is_array($value) ? $value : []; } + + /** + * @param array $overrides + */ + private function exactOverride(array $overrides, string $model, ?string $provider): ?int + { + if ($provider !== null && isset($overrides["{$provider}:{$model}"])) { + return (int) $overrides["{$provider}:{$model}"]; + } + + if (isset($overrides[$model])) { + return (int) $overrides[$model]; + } + + return null; + } } diff --git a/app/Services/Memory/OutputTruncator.php b/app/Services/Memory/OutputTruncator.php new file mode 100644 index 0000000..3743315 --- /dev/null +++ b/app/Services/Memory/OutputTruncator.php @@ -0,0 +1,60 @@ +maxLines ??= (int) config('memory.tool_results.max_lines', 2000); + $this->maxBytes ??= (int) config('memory.tool_results.max_bytes', 50_000); + $this->disk ??= (string) config('memory.tool_results.disk', 'local'); + $this->pathPrefix ??= trim((string) config('memory.tool_results.path', 'agent-tool-results'), '/'); + } + + public function truncate(mixed $result, string $toolCallId): mixed + { + if (! is_string($result)) { + return $result; + } + + $lines = substr_count($result, "\n") + 1; + $bytes = strlen($result); + + if ($lines <= $this->maxLines && $bytes <= $this->maxBytes) { + return $result; + } + + $storagePath = $this->storeFullOutput($result, $toolCallId); + $truncated = $result; + + if ($lines > $this->maxLines) { + $truncated = implode("\n", array_slice(explode("\n", $truncated), 0, $this->maxLines)); + } + + if (strlen($truncated) > $this->maxBytes) { + $truncated = mb_strcut($truncated, 0, $this->maxBytes, 'UTF-8'); + } + + return $truncated."\n\n[truncated - full output stored at storage:{$storagePath}]"; + } + + private function storeFullOutput(string $result, string $toolCallId): string + { + $datePath = now()->format('Y/m/d'); + $safeId = trim(preg_replace('/[^a-zA-Z0-9_-]/', '_', $toolCallId) ?? '', '_'); + $safeId = $safeId !== '' ? $safeId : Str::random(12); + $path = "{$this->pathPrefix}/{$datePath}/tool_{$safeId}.txt"; + + Storage::disk($this->disk)->put($path, $result); + + return $path; + } +} diff --git a/app/Services/Memory/PromptFrameBuilder.php b/app/Services/Memory/PromptFrameBuilder.php new file mode 100644 index 0000000..9a5536c --- /dev/null +++ b/app/Services/Memory/PromptFrameBuilder.php @@ -0,0 +1,78 @@ + $sections + * @param string[]|null $volatileLabels + * @return array{ + * stable_prompt: string, + * volatile_prompt: string, + * full_prompt: string, + * stable_sections: array, + * volatile_sections: array, + * stable_breakdown: array, + * volatile_breakdown: array, + * full_breakdown: array + * } + */ + public function splitSections(array $sections, ?array $volatileLabels = null): array + { + $volatileLabels ??= self::DEFAULT_VOLATILE_SECTION_LABELS; + $stable = []; + $volatile = []; + + foreach ($sections as $section) { + if (in_array($section['label'], $volatileLabels, true)) { + $volatile[] = $section; + } else { + $stable[] = $section; + } + } + + return [ + 'stable_prompt' => $this->join($stable), + 'volatile_prompt' => $this->join($volatile), + 'full_prompt' => $this->join($sections), + 'stable_sections' => $stable, + 'volatile_sections' => $volatile, + 'stable_breakdown' => $this->breakdown($stable), + 'volatile_breakdown' => $this->breakdown($volatile), + 'full_breakdown' => $this->breakdown($sections), + ]; + } + + /** + * @param array $sections + */ + private function join(array $sections): string + { + return implode('', array_column($sections, 'content')); + } + + /** + * @param array $sections + * @return array + */ + private function breakdown(array $sections): array + { + return array_values(array_map( + fn (array $section) => [ + 'label' => $section['label'], + 'chars' => mb_strlen($section['content']), + ], + $sections, + )); + } +} diff --git a/app/Services/Memory/ToolResultDeduplicator.php b/app/Services/Memory/ToolResultDeduplicator.php new file mode 100644 index 0000000..5c00ac0 --- /dev/null +++ b/app/Services/Memory/ToolResultDeduplicator.php @@ -0,0 +1,124 @@ + $messages + * @return array{messages: array, deduplicated: int} + */ + public function deduplicate(array $messages): array + { + $count = count($messages); + if ($count < 2) { + return ['messages' => $messages, 'deduplicated' => 0]; + } + + $latestBySig = []; + for ($i = $count - 1; $i >= 0; $i--) { + if (! $messages[$i] instanceof ToolResultMessage) { + continue; + } + + $results = $messages[$i]->toolResults->values(); + for ($rIdx = $results->count() - 1; $rIdx >= 0; $rIdx--) { + /** @var ToolResult $result */ + $result = $results[$rIdx]; + + if ($this->isSuperseded($result->result)) { + continue; + } + + if (! $this->shouldDeduplicate($result)) { + continue; + } + + $sig = $this->signature($result); + if (! isset($latestBySig[$sig])) { + $latestBySig[$sig] = [$i, $rIdx]; + } + } + } + + $deduplicated = 0; + + for ($i = 0; $i < $count; $i++) { + if (! $messages[$i] instanceof ToolResultMessage) { + continue; + } + + $toolResults = $messages[$i]->toolResults->values(); + + foreach ($toolResults as $rIdx => $result) { + if (! $result instanceof ToolResult) { + continue; + } + + if ($this->isSuperseded($result->result)) { + continue; + } + + if (! $this->shouldDeduplicate($result)) { + continue; + } + + $sig = $this->signature($result); + if (isset($latestBySig[$sig]) && $latestBySig[$sig] !== [$i, $rIdx]) { + $toolResults[$rIdx] = $this->supersede($result, self::EXACT_SUPERSEDE); + $deduplicated++; + } + } + + $messages[$i]->toolResults = $toolResults; + } + + return ['messages' => $messages, 'deduplicated' => $deduplicated]; + } + + private function supersede(ToolResult $result, string $placeholder): ToolResult + { + return new ToolResult( + id: $result->id, + name: $result->name, + arguments: $result->arguments, + result: $placeholder, + resultId: $result->resultId, + ); + } + + private function signature(ToolResult $result): string + { + $args = $result->arguments; + ksort($args); + $resultString = is_string($result->result) + ? $result->result + : json_encode($result->result, JSON_INVALID_UTF8_SUBSTITUTE); + + return $result->name.':'.json_encode($args, JSON_THROW_ON_ERROR | JSON_INVALID_UTF8_SUBSTITUTE).':'.md5((string) $resultString); + } + + private function isSuperseded(mixed $result): bool + { + if (! is_string($result)) { + return false; + } + + return str_starts_with($result, '[Superseded'); + } + + private function shouldDeduplicate(ToolResult $result): bool + { + return $this->toolRegistry->getToolTypeBySlug($result->name) === 'read'; + } +} diff --git a/app/Services/OpenCompanyLuaToolInvoker.php b/app/Services/OpenCompanyLuaToolInvoker.php index 17b9723..518007f 100644 --- a/app/Services/OpenCompanyLuaToolInvoker.php +++ b/app/Services/OpenCompanyLuaToolInvoker.php @@ -15,9 +15,9 @@ public function __construct( private ToolRegistry $registry, ) {} - public function invoke(string $toolSlug, array $args): mixed + public function invoke(string $toolSlug, array $args, ?string $account = null): mixed { - $tool = $this->registry->instantiateToolBySlug($toolSlug, $this->agent); + $tool = $this->registry->instantiateToolBySlug($toolSlug, $this->agent, $account); if ($tool === null) { throw new \RuntimeException("Tool not available: {$toolSlug}"); diff --git a/composer.json b/composer.json index e06fc54..a898928 100644 --- a/composer.json +++ b/composer.json @@ -8,7 +8,11 @@ "repositories": [ { "type": "path", - "url": "../integrations/*" + "url": "../integrations/core" + }, + { + "type": "path", + "url": "../integrations/packages/*" }, { "type": "path", diff --git a/composer.lock b/composer.lock index b57ec05..ceab489 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "f29a59c5bc95a3bf57a5724035d3ab86", + "content-hash": "fa3ced4b89db8bac75102974fa2e74c0", "packages": [ { "name": "brick/math", @@ -3508,7 +3508,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/celestial", + "url": "../integrations/packages/celestial", "reference": "fab344d2b1985f29404ab219e22a2f2bbf80d7de" }, "require": { @@ -3557,7 +3557,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/clickup", + "url": "../integrations/packages/clickup", "reference": "df15d97daf5d043302296d6fbfc0cedfee53b9ef" }, "require": { @@ -3606,7 +3606,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/coingecko", + "url": "../integrations/packages/coingecko", "reference": "bd41637c8207e85bf9083866288d6c98bddf42b0" }, "require": { @@ -3655,11 +3655,11 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/core", - "reference": "db0fbae3999b421657a53ca3707568ebfabe804d" + "url": "../integrations/core", + "reference": "b47a39158521ee333af52d484bfbdd6ad70bc250" }, "require": { - "illuminate/support": "^11.0 || ^12.0", + "illuminate/support": "^11.0 || ^12.0 || ^13.0", "php": "^8.2" }, "type": "library", @@ -3700,7 +3700,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/exchangerate", + "url": "../integrations/packages/exchangerate", "reference": "249dd5bc026d55919c87cdcb8cdad697c3008c5f" }, "require": { @@ -3749,7 +3749,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/google", + "url": "../integrations/packages/google", "reference": "35ba0f0f6968b1158cc8cc2f8ded8d9b5cc874be" }, "require": { @@ -3798,7 +3798,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/mermaid", + "url": "../integrations/packages/mermaid", "reference": "70ed2b9614499968f7f51b7106a3203cbcc66b03" }, "require": { @@ -3848,7 +3848,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/plantuml", + "url": "../integrations/packages/plantuml", "reference": "e570060c35e911032c0e99a22a7da93dfc97f625" }, "require": { @@ -3898,7 +3898,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/plausible", + "url": "../integrations/packages/plausible", "reference": "95251ca14079e2a0abcb132ce27e87d530d5d4b9" }, "require": { @@ -3946,7 +3946,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/ticktick", + "url": "../integrations/packages/ticktick", "reference": "df47a42632f14836e532d3033423580496e91de6" }, "require": { @@ -3995,7 +3995,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/trustmrr", + "url": "../integrations/packages/trustmrr", "reference": "f1b1df1590240398d71c64ca39151f5bb55fa241" }, "require": { @@ -4045,7 +4045,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/typst", + "url": "../integrations/packages/typst", "reference": "8ce08da4f5085ca33b80c62705d7f2ed829b4eec" }, "require": { @@ -4095,7 +4095,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/vegalite", + "url": "../integrations/packages/vegalite", "reference": "4a0566b220d25853522ee0eb1e9ce9fe4a6b5202" }, "require": { @@ -4132,7 +4132,7 @@ "version": "dev-main", "dist": { "type": "path", - "url": "tmp/integrations/worldbank", + "url": "../integrations/packages/worldbank", "reference": "aceab01ea12a9e3fa22c0d545b5f2240d1239687" }, "require": { @@ -4182,11 +4182,11 @@ "dist": { "type": "path", "url": "tmp/prism-relay", - "reference": "b0ee49538c5d9ce13e9d6c55d0161a6e9c881ad3" + "reference": "9c3e22d6eaac5f5f13aebd7e7207ced54036f1e6" }, "require": { "php": "^8.2", - "prism-php/prism": "^0.99 || ^1.0", + "prism-php/prism": "^0.99 || ^0.100 || ^1.0", "psr/log": "^3.0" }, "type": "library", @@ -4202,6 +4202,11 @@ "OpenCompany\\PrismRelay\\": "src/" } }, + "scripts": { + "sync-registry": [ + "node scripts/sync-registry.mjs" + ] + }, "license": [ "MIT" ], @@ -4440,16 +4445,16 @@ }, { "name": "prism-php/prism", - "version": "v0.99.21", + "version": "v0.99.22", "source": { "type": "git", "url": "https://github.com/prism-php/prism.git", - "reference": "95272567629a62831294f63b1b927b1e2e608daf" + "reference": "989f67567aef69c613eae6e932d615fb96e2f5d7" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/prism-php/prism/zipball/95272567629a62831294f63b1b927b1e2e608daf", - "reference": "95272567629a62831294f63b1b927b1e2e608daf", + "url": "https://api.github.com/repos/prism-php/prism/zipball/989f67567aef69c613eae6e932d615fb96e2f5d7", + "reference": "989f67567aef69c613eae6e932d615fb96e2f5d7", "shasum": "" }, "require": { @@ -4507,7 +4512,7 @@ "description": "A powerful Laravel package for integrating Large Language Models (LLMs) into your applications.", "support": { "issues": "https://github.com/prism-php/prism/issues", - "source": "https://github.com/prism-php/prism/tree/v0.99.21" + "source": "https://github.com/prism-php/prism/tree/v0.99.22" }, "funding": [ { @@ -4515,7 +4520,7 @@ "type": "github" } ], - "time": "2026-03-01T21:12:44+00:00" + "time": "2026-03-12T17:55:23+00:00" }, { "name": "psr/clock", @@ -5858,16 +5863,16 @@ }, { "name": "symfony/clock", - "version": "v8.0.0", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/clock.git", - "reference": "832119f9b8dbc6c8e6f65f30c5969eca1e88764f" + "reference": "b55a638b189a6faa875e0ccdb00908fb87af95b3" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/clock/zipball/832119f9b8dbc6c8e6f65f30c5969eca1e88764f", - "reference": "832119f9b8dbc6c8e6f65f30c5969eca1e88764f", + "url": "https://api.github.com/repos/symfony/clock/zipball/b55a638b189a6faa875e0ccdb00908fb87af95b3", + "reference": "b55a638b189a6faa875e0ccdb00908fb87af95b3", "shasum": "" }, "require": { @@ -5911,7 +5916,7 @@ "time" ], "support": { - "source": "https://github.com/symfony/clock/tree/v8.0.0" + "source": "https://github.com/symfony/clock/tree/v8.0.8" }, "funding": [ { @@ -5931,20 +5936,20 @@ "type": "tidelift" } ], - "time": "2025-11-12T15:46:48+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/console", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/console.git", - "reference": "e1e6770440fb9c9b0cf725f81d1361ad1835329d" + "reference": "1e92e39c51f95b88e3d66fa2d9f06d1fb45dd707" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/console/zipball/e1e6770440fb9c9b0cf725f81d1361ad1835329d", - "reference": "e1e6770440fb9c9b0cf725f81d1361ad1835329d", + "url": "https://api.github.com/repos/symfony/console/zipball/1e92e39c51f95b88e3d66fa2d9f06d1fb45dd707", + "reference": "1e92e39c51f95b88e3d66fa2d9f06d1fb45dd707", "shasum": "" }, "require": { @@ -6009,7 +6014,7 @@ "terminal" ], "support": { - "source": "https://github.com/symfony/console/tree/v7.4.7" + "source": "https://github.com/symfony/console/tree/v7.4.8" }, "funding": [ { @@ -6029,20 +6034,20 @@ "type": "tidelift" } ], - "time": "2026-03-06T14:06:20+00:00" + "time": "2026-03-30T13:54:39+00:00" }, { "name": "symfony/css-selector", - "version": "v8.0.6", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/css-selector.git", - "reference": "2a178bf80f05dbbe469a337730eba79d61315262" + "reference": "8db1c00226a94d8ab6aa89d9224eeee91e2ea2ed" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/css-selector/zipball/2a178bf80f05dbbe469a337730eba79d61315262", - "reference": "2a178bf80f05dbbe469a337730eba79d61315262", + "url": "https://api.github.com/repos/symfony/css-selector/zipball/8db1c00226a94d8ab6aa89d9224eeee91e2ea2ed", + "reference": "8db1c00226a94d8ab6aa89d9224eeee91e2ea2ed", "shasum": "" }, "require": { @@ -6078,7 +6083,7 @@ "description": "Converts CSS selectors to XPath expressions", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/css-selector/tree/v8.0.6" + "source": "https://github.com/symfony/css-selector/tree/v8.0.8" }, "funding": [ { @@ -6098,7 +6103,7 @@ "type": "tidelift" } ], - "time": "2026-02-17T13:07:04+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/deprecation-contracts", @@ -6169,16 +6174,16 @@ }, { "name": "symfony/error-handler", - "version": "v7.4.4", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/error-handler.git", - "reference": "8da531f364ddfee53e36092a7eebbbd0b775f6b8" + "reference": "8dd79d8af777ee6cba2fd4d98da6ffb839f3c0fa" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/error-handler/zipball/8da531f364ddfee53e36092a7eebbbd0b775f6b8", - "reference": "8da531f364ddfee53e36092a7eebbbd0b775f6b8", + "url": "https://api.github.com/repos/symfony/error-handler/zipball/8dd79d8af777ee6cba2fd4d98da6ffb839f3c0fa", + "reference": "8dd79d8af777ee6cba2fd4d98da6ffb839f3c0fa", "shasum": "" }, "require": { @@ -6227,7 +6232,7 @@ "description": "Provides tools to manage errors and ease debugging PHP code", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/error-handler/tree/v7.4.4" + "source": "https://github.com/symfony/error-handler/tree/v7.4.8" }, "funding": [ { @@ -6247,20 +6252,20 @@ "type": "tidelift" } ], - "time": "2026-01-20T16:42:42+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/event-dispatcher", - "version": "v8.0.4", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/event-dispatcher.git", - "reference": "99301401da182b6cfaa4700dbe9987bb75474b47" + "reference": "f662acc6ab22a3d6d716dcb44c381c6002940df6" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/event-dispatcher/zipball/99301401da182b6cfaa4700dbe9987bb75474b47", - "reference": "99301401da182b6cfaa4700dbe9987bb75474b47", + "url": "https://api.github.com/repos/symfony/event-dispatcher/zipball/f662acc6ab22a3d6d716dcb44c381c6002940df6", + "reference": "f662acc6ab22a3d6d716dcb44c381c6002940df6", "shasum": "" }, "require": { @@ -6312,7 +6317,7 @@ "description": "Provides tools that allow your application components to communicate with each other by dispatching events and listening to them", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/event-dispatcher/tree/v8.0.4" + "source": "https://github.com/symfony/event-dispatcher/tree/v8.0.8" }, "funding": [ { @@ -6332,7 +6337,7 @@ "type": "tidelift" } ], - "time": "2026-01-05T11:45:55+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/event-dispatcher-contracts", @@ -6412,16 +6417,16 @@ }, { "name": "symfony/finder", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/finder.git", - "reference": "8655bf1076b7a3a346cb11413ffdabff50c7ffcf" + "reference": "e0be088d22278583a82da281886e8c3592fbf149" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/finder/zipball/8655bf1076b7a3a346cb11413ffdabff50c7ffcf", - "reference": "8655bf1076b7a3a346cb11413ffdabff50c7ffcf", + "url": "https://api.github.com/repos/symfony/finder/zipball/e0be088d22278583a82da281886e8c3592fbf149", + "reference": "e0be088d22278583a82da281886e8c3592fbf149", "shasum": "" }, "require": { @@ -6456,7 +6461,7 @@ "description": "Finds files and directories via an intuitive fluent interface", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/finder/tree/v7.4.6" + "source": "https://github.com/symfony/finder/tree/v7.4.8" }, "funding": [ { @@ -6476,20 +6481,20 @@ "type": "tidelift" } ], - "time": "2026-01-29T09:40:50+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/http-foundation", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/http-foundation.git", - "reference": "f94b3e7b7dafd40e666f0c9ff2084133bae41e81" + "reference": "9381209597ec66c25be154cbf2289076e64d1eab" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/http-foundation/zipball/f94b3e7b7dafd40e666f0c9ff2084133bae41e81", - "reference": "f94b3e7b7dafd40e666f0c9ff2084133bae41e81", + "url": "https://api.github.com/repos/symfony/http-foundation/zipball/9381209597ec66c25be154cbf2289076e64d1eab", + "reference": "9381209597ec66c25be154cbf2289076e64d1eab", "shasum": "" }, "require": { @@ -6538,7 +6543,7 @@ "description": "Defines an object-oriented layer for the HTTP specification", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/http-foundation/tree/v7.4.7" + "source": "https://github.com/symfony/http-foundation/tree/v7.4.8" }, "funding": [ { @@ -6558,20 +6563,20 @@ "type": "tidelift" } ], - "time": "2026-03-06T13:15:18+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/http-kernel", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/http-kernel.git", - "reference": "3b3fcf386c809be990c922e10e4c620d6367cab1" + "reference": "017e76ad089bac281553389269e259e155935e1a" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/http-kernel/zipball/3b3fcf386c809be990c922e10e4c620d6367cab1", - "reference": "3b3fcf386c809be990c922e10e4c620d6367cab1", + "url": "https://api.github.com/repos/symfony/http-kernel/zipball/017e76ad089bac281553389269e259e155935e1a", + "reference": "017e76ad089bac281553389269e259e155935e1a", "shasum": "" }, "require": { @@ -6657,7 +6662,7 @@ "description": "Provides a structured process for converting a Request into a Response", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/http-kernel/tree/v7.4.7" + "source": "https://github.com/symfony/http-kernel/tree/v7.4.8" }, "funding": [ { @@ -6677,20 +6682,20 @@ "type": "tidelift" } ], - "time": "2026-03-06T16:33:18+00:00" + "time": "2026-03-31T20:57:01+00:00" }, { "name": "symfony/mailer", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/mailer.git", - "reference": "b02726f39a20bc65e30364f5c750c4ddbf1f58e9" + "reference": "f6ea532250b476bfc1b56699b388a1bdbf168f62" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/mailer/zipball/b02726f39a20bc65e30364f5c750c4ddbf1f58e9", - "reference": "b02726f39a20bc65e30364f5c750c4ddbf1f58e9", + "url": "https://api.github.com/repos/symfony/mailer/zipball/f6ea532250b476bfc1b56699b388a1bdbf168f62", + "reference": "f6ea532250b476bfc1b56699b388a1bdbf168f62", "shasum": "" }, "require": { @@ -6741,7 +6746,7 @@ "description": "Helps sending emails", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/mailer/tree/v7.4.6" + "source": "https://github.com/symfony/mailer/tree/v7.4.8" }, "funding": [ { @@ -6761,20 +6766,20 @@ "type": "tidelift" } ], - "time": "2026-02-25T16:50:00+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/mime", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/mime.git", - "reference": "da5ab4fde3f6c88ab06e96185b9922f48b677cd1" + "reference": "6df02f99998081032da3407a8d6c4e1dcb5d4379" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/mime/zipball/da5ab4fde3f6c88ab06e96185b9922f48b677cd1", - "reference": "da5ab4fde3f6c88ab06e96185b9922f48b677cd1", + "url": "https://api.github.com/repos/symfony/mime/zipball/6df02f99998081032da3407a8d6c4e1dcb5d4379", + "reference": "6df02f99998081032da3407a8d6c4e1dcb5d4379", "shasum": "" }, "require": { @@ -6830,7 +6835,7 @@ "mime-type" ], "support": { - "source": "https://github.com/symfony/mime/tree/v7.4.7" + "source": "https://github.com/symfony/mime/tree/v7.4.8" }, "funding": [ { @@ -6850,7 +6855,7 @@ "type": "tidelift" } ], - "time": "2026-03-05T15:24:09+00:00" + "time": "2026-03-30T14:11:46+00:00" }, { "name": "symfony/polyfill-ctype", @@ -7683,16 +7688,16 @@ }, { "name": "symfony/process", - "version": "v7.4.5", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/process.git", - "reference": "608476f4604102976d687c483ac63a79ba18cc97" + "reference": "60f19cd3badc8de688421e21e4305eba50f8089a" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/process/zipball/608476f4604102976d687c483ac63a79ba18cc97", - "reference": "608476f4604102976d687c483ac63a79ba18cc97", + "url": "https://api.github.com/repos/symfony/process/zipball/60f19cd3badc8de688421e21e4305eba50f8089a", + "reference": "60f19cd3badc8de688421e21e4305eba50f8089a", "shasum": "" }, "require": { @@ -7724,7 +7729,7 @@ "description": "Executes commands in sub-processes", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/process/tree/v7.4.5" + "source": "https://github.com/symfony/process/tree/v7.4.8" }, "funding": [ { @@ -7744,7 +7749,7 @@ "type": "tidelift" } ], - "time": "2026-01-26T15:07:59+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/psr-http-message-bridge", @@ -7836,16 +7841,16 @@ }, { "name": "symfony/routing", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/routing.git", - "reference": "238d749c56b804b31a9bf3e26519d93b65a60938" + "reference": "9608de9873ec86e754fb6c0a0fa7e5f1a960eb6b" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/routing/zipball/238d749c56b804b31a9bf3e26519d93b65a60938", - "reference": "238d749c56b804b31a9bf3e26519d93b65a60938", + "url": "https://api.github.com/repos/symfony/routing/zipball/9608de9873ec86e754fb6c0a0fa7e5f1a960eb6b", + "reference": "9608de9873ec86e754fb6c0a0fa7e5f1a960eb6b", "shasum": "" }, "require": { @@ -7897,7 +7902,7 @@ "url" ], "support": { - "source": "https://github.com/symfony/routing/tree/v7.4.6" + "source": "https://github.com/symfony/routing/tree/v7.4.8" }, "funding": [ { @@ -7917,7 +7922,7 @@ "type": "tidelift" } ], - "time": "2026-02-25T16:50:00+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/service-contracts", @@ -8008,16 +8013,16 @@ }, { "name": "symfony/string", - "version": "v8.0.6", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/string.git", - "reference": "6c9e1108041b5dce21a9a4984b531c4923aa9ec4" + "reference": "ae9488f874d7603f9d2dfbf120203882b645d963" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/string/zipball/6c9e1108041b5dce21a9a4984b531c4923aa9ec4", - "reference": "6c9e1108041b5dce21a9a4984b531c4923aa9ec4", + "url": "https://api.github.com/repos/symfony/string/zipball/ae9488f874d7603f9d2dfbf120203882b645d963", + "reference": "ae9488f874d7603f9d2dfbf120203882b645d963", "shasum": "" }, "require": { @@ -8074,7 +8079,7 @@ "utf8" ], "support": { - "source": "https://github.com/symfony/string/tree/v8.0.6" + "source": "https://github.com/symfony/string/tree/v8.0.8" }, "funding": [ { @@ -8094,20 +8099,20 @@ "type": "tidelift" } ], - "time": "2026-02-09T10:14:57+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/translation", - "version": "v8.0.6", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/translation.git", - "reference": "13ff19bcf2bea492d3c2fbeaa194dd6f4599ce1b" + "reference": "27c03ae3940de24ba2f71cfdbac824f2aa1fdf2f" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/translation/zipball/13ff19bcf2bea492d3c2fbeaa194dd6f4599ce1b", - "reference": "13ff19bcf2bea492d3c2fbeaa194dd6f4599ce1b", + "url": "https://api.github.com/repos/symfony/translation/zipball/27c03ae3940de24ba2f71cfdbac824f2aa1fdf2f", + "reference": "27c03ae3940de24ba2f71cfdbac824f2aa1fdf2f", "shasum": "" }, "require": { @@ -8167,7 +8172,7 @@ "description": "Provides tools to internationalize your application", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/translation/tree/v8.0.6" + "source": "https://github.com/symfony/translation/tree/v8.0.8" }, "funding": [ { @@ -8187,7 +8192,7 @@ "type": "tidelift" } ], - "time": "2026-02-17T13:07:04+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/translation-contracts", @@ -8273,16 +8278,16 @@ }, { "name": "symfony/uid", - "version": "v7.4.4", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/uid.git", - "reference": "7719ce8aba76be93dfe249192f1fbfa52c588e36" + "reference": "6883ebdf7bf6a12b37519dbc0df62b0222401b56" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/uid/zipball/7719ce8aba76be93dfe249192f1fbfa52c588e36", - "reference": "7719ce8aba76be93dfe249192f1fbfa52c588e36", + "url": "https://api.github.com/repos/symfony/uid/zipball/6883ebdf7bf6a12b37519dbc0df62b0222401b56", + "reference": "6883ebdf7bf6a12b37519dbc0df62b0222401b56", "shasum": "" }, "require": { @@ -8327,7 +8332,7 @@ "uuid" ], "support": { - "source": "https://github.com/symfony/uid/tree/v7.4.4" + "source": "https://github.com/symfony/uid/tree/v7.4.8" }, "funding": [ { @@ -8347,20 +8352,20 @@ "type": "tidelift" } ], - "time": "2026-01-03T23:30:35+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/var-dumper", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/var-dumper.git", - "reference": "045321c440ac18347b136c63d2e9bf28a2dc0291" + "reference": "9510c3966f749a1d1ff0059e1eabef6cc621e7fd" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/var-dumper/zipball/045321c440ac18347b136c63d2e9bf28a2dc0291", - "reference": "045321c440ac18347b136c63d2e9bf28a2dc0291", + "url": "https://api.github.com/repos/symfony/var-dumper/zipball/9510c3966f749a1d1ff0059e1eabef6cc621e7fd", + "reference": "9510c3966f749a1d1ff0059e1eabef6cc621e7fd", "shasum": "" }, "require": { @@ -8414,7 +8419,7 @@ "dump" ], "support": { - "source": "https://github.com/symfony/var-dumper/tree/v7.4.6" + "source": "https://github.com/symfony/var-dumper/tree/v7.4.8" }, "funding": [ { @@ -8434,7 +8439,7 @@ "type": "tidelift" } ], - "time": "2026-02-15T10:53:20+00:00" + "time": "2026-03-30T13:44:50+00:00" }, { "name": "tightenco/ziggy", diff --git a/config/integrations.php b/config/integrations.php index 2cadfcb..41d53c8 100644 --- a/config/integrations.php +++ b/config/integrations.php @@ -109,6 +109,16 @@ 'api_key_url' => 'https://openrouter.ai/keys', ], + 'perplexity' => [ + 'category' => 'ai-models', + 'name' => 'Perplexity', + 'description' => 'Search-native reasoning models', + 'icon' => 'ph:compass', + 'default_url' => 'https://api.perplexity.ai', + 'api_format' => 'openai_compat', + 'api_key_url' => 'https://www.perplexity.ai/settings/api', + ], + 'minimax' => [ 'category' => 'ai-models', 'name' => 'MiniMax Coding Plan', @@ -149,20 +159,20 @@ 'api_key_url' => 'https://platform.moonshot.ai/console', ], - 'glm' => [ + 'z-api' => [ 'category' => 'ai-models', - 'name' => 'GLM (Zhipu AI)', - 'description' => 'General-purpose Chinese LLM', + 'name' => 'Z.AI API', + 'description' => 'Zhipu AI standard API endpoint', 'icon' => 'ph:brain', 'default_url' => \OpenCompany\PrismRelay\Providers\Glm::URL, 'api_format' => 'openai_compat', 'api_key_url' => 'https://open.bigmodel.cn/', ], - 'glm-coding' => [ + 'z' => [ 'category' => 'ai-models', - 'name' => 'GLM Coding Plan', - 'description' => 'Specialized coding LLM via Zhipu Coding Plan', + 'name' => 'Z.AI Coding Plan', + 'description' => 'Zhipu AI coding-plan endpoint', 'icon' => 'ph:code', 'default_url' => \OpenCompany\PrismRelay\Providers\GlmCoding::URL, 'api_format' => 'openai_compat', diff --git a/config/memory.php b/config/memory.php index 09fe1c5..87a197b 100644 --- a/config/memory.php +++ b/config/memory.php @@ -137,6 +137,22 @@ ], ], + /* + |-------------------------------------------------------------------------- + | Context Budget + |-------------------------------------------------------------------------- + | + | Shared token-budget thresholds used by flush, compaction, and retry + | protection. Context windows come from prism-relay when the provider is + | known; the local model map above remains the fallback registry. + | + */ + + 'budget' => [ + 'warning_ratio' => (float) env('MEMORY_WARNING_RATIO', 0.65), + 'blocking_margin_tokens' => (int) env('MEMORY_BLOCKING_MARGIN_TOKENS', 1_024), + ], + /* |-------------------------------------------------------------------------- | Memory Scope @@ -171,6 +187,14 @@ 'system_prompt_fallback_reserve' => 10_000, 'summary_model' => env('MEMORY_SUMMARY_MODEL', 'anthropic:claude-sonnet-4-5-20250929'), 'summary_max_tokens' => 2_000, + 'circuit_breaker' => [ + 'after_failures' => (int) env('MEMORY_COMPACTION_CIRCUIT_AFTER_FAILURES', 3), + 'cooldown_minutes' => (int) env('MEMORY_COMPACTION_CIRCUIT_COOLDOWN_MINUTES', 30), + ], + 'memory_extraction' => [ + 'enabled' => env('MEMORY_COMPACTION_EXTRACT_TO_LOG', true), + 'max_items' => (int) env('MEMORY_COMPACTION_EXTRACT_MAX_ITEMS', 8), + ], ], /* @@ -204,4 +228,40 @@ 'max_flushes_per_cycle' => 1, ], + /* + |-------------------------------------------------------------------------- + | Retry Context Pruning + |-------------------------------------------------------------------------- + | + | Checkpoint resume can append many historical read tool results to the + | retry prompt. Pruning clears only older OpenCompany read-tool payloads + | while leaving recent results and write-side effects intact. + | + */ + + 'pruning' => [ + 'enabled' => env('MEMORY_PRUNING_ENABLED', true), + 'keep_recent_read_results' => (int) env('MEMORY_PRUNING_KEEP_RECENT_READ_RESULTS', 2), + 'min_result_tokens' => (int) env('MEMORY_PRUNING_MIN_RESULT_TOKENS', 400), + 'min_total_saved_tokens' => (int) env('MEMORY_PRUNING_MIN_TOTAL_SAVED_TOKENS', 1_000), + ], + + /* + |-------------------------------------------------------------------------- + | Tool Result Checkpoints + |-------------------------------------------------------------------------- + | + | Retry checkpoint tool results can get large enough to bloat the prompt + | context and task-step payloads. Large string results are truncated for the + | checkpoint while the full payload is persisted on durable storage. + | + */ + + 'tool_results' => [ + 'max_lines' => (int) env('MEMORY_TOOL_RESULT_MAX_LINES', 2_000), + 'max_bytes' => (int) env('MEMORY_TOOL_RESULT_MAX_BYTES', 50_000), + 'disk' => env('MEMORY_TOOL_RESULT_DISK', 'local'), + 'path' => env('MEMORY_TOOL_RESULT_PATH', 'agent-tool-results'), + ], + ]; diff --git a/config/prism.php b/config/prism.php index 1086754..45ab183 100644 --- a/config/prism.php +++ b/config/prism.php @@ -61,11 +61,17 @@ 'x_title' => env('OPENROUTER_SITE_X_TITLE', null), ], ], - // GLM 4.7 (Zhipu AI) - OpenAI-compatible API - // Coding plan uses dedicated endpoint - 'glm' => [ - 'api_key' => env('GLM_API_KEY', ''), - 'url' => env('GLM_URL', 'https://api.z.ai/api/coding/paas/v4'), + 'perplexity' => [ + 'api_key' => env('PERPLEXITY_API_KEY', ''), + 'url' => env('PERPLEXITY_URL', 'https://api.perplexity.ai'), + ], + 'z' => [ + 'api_key' => env('ZAI_API_KEY', ''), + 'url' => env('Z_URL', 'https://api.z.ai/api/coding/paas/v4'), + ], + 'z-api' => [ + 'api_key' => env('ZAI_API_KEY', ''), + 'url' => env('Z_API_URL', 'https://open.bigmodel.cn/api/paas/v4'), ], ], ]; diff --git a/database/migrations/2026_04_05_000001_add_multi_account_to_integration_settings.php b/database/migrations/2026_04_05_000001_add_multi_account_to_integration_settings.php new file mode 100644 index 0000000..a2ed49d --- /dev/null +++ b/database/migrations/2026_04_05_000001_add_multi_account_to_integration_settings.php @@ -0,0 +1,45 @@ +string('account_alias', 32)->default('')->after('integration_id'); + $table->boolean('is_default')->default(true)->after('enabled'); + + $table->dropUnique(['workspace_id', 'integration_id']); + $table->unique(['workspace_id', 'integration_id', 'account_alias']); + }); + + // MCP servers: multi-account support + Schema::table('mcp_servers', function (Blueprint $table) { + $table->string('account_alias', 32)->default('')->after('slug'); + + $table->dropUnique(['workspace_id', 'slug']); + $table->unique(['workspace_id', 'slug', 'account_alias']); + }); + } + + public function down(): void + { + Schema::table('mcp_servers', function (Blueprint $table) { + $table->dropUnique(['workspace_id', 'slug', 'account_alias']); + $table->unique(['workspace_id', 'slug']); + + $table->dropColumn('account_alias'); + }); + + Schema::table('integration_settings', function (Blueprint $table) { + $table->dropUnique(['workspace_id', 'integration_id', 'account_alias']); + $table->unique(['workspace_id', 'integration_id']); + + $table->dropColumn(['account_alias', 'is_default']); + }); + } +}; diff --git a/database/migrations/2026_04_09_120000_add_compaction_failure_tracking_to_conversation_summaries_table.php b/database/migrations/2026_04_09_120000_add_compaction_failure_tracking_to_conversation_summaries_table.php new file mode 100644 index 0000000..cc24480 --- /dev/null +++ b/database/migrations/2026_04_09_120000_add_compaction_failure_tracking_to_conversation_summaries_table.php @@ -0,0 +1,30 @@ +integer('compaction_failure_count')->default(0)->after('flush_count'); + $table->timestamp('last_compaction_failed_at')->nullable()->after('compaction_failure_count'); + $table->timestamp('compaction_circuit_open_until')->nullable()->after('last_compaction_failed_at'); + $table->text('last_compaction_error')->nullable()->after('compaction_circuit_open_until'); + }); + } + + public function down(): void + { + Schema::table('conversation_summaries', function (Blueprint $table) { + $table->dropColumn([ + 'compaction_failure_count', + 'last_compaction_failed_at', + 'compaction_circuit_open_until', + 'last_compaction_error', + ]); + }); + } +}; diff --git a/docs/INDEX.md b/docs/INDEX.md index 99a1d3d..e4f0d1d 100644 --- a/docs/INDEX.md +++ b/docs/INDEX.md @@ -15,16 +15,17 @@ | [observability.md](architecture/observability.md) | Monitoring, metrics, logging, error tracking, health checks, alerting | Building admin/ops features | | [ai-tool-packages.md](architecture/ai-tool-packages.md) | AI tool package ecosystem — ToolProvider contract, credential abstraction, hybrid ToolRegistry, building new tool packages | Creating or modifying AI tool packages, understanding the plugin architecture | | [interagent-comms.md](architecture/interagent-comms.md) | Inter-agent communication protocol — ContactAgent tool with ask/delegate/notify patterns, DM channels, delegation tracking | Building or debugging agent-to-agent communication | +| [kosmokrator-reuse-audit.md](architecture/kosmokrator-reuse-audit.md) | Full audit of what OpenCompany should reuse, adapt, or skip from KosmoKrator | Planning cross-repo reuse, agent runtime work, metadata consolidation | +| [runtime-alignment-implementation-audit.md](architecture/runtime-alignment-implementation-audit.md) | Post-implementation audit — findings now tracked as Plane issues (OC-1 through OC-6) | Reviewing audit results and fix status | | [ai-tool-strategy.md](strategy/ai-tool-strategy.md) | AI tool ecosystem strategy — package publishing, MCP export, missing tool analysis, Fair Code growth | Planning tool ecosystem, evaluating new tool integrations | ## Planning & Implementation | Document | What it covers | Read when... | |----------|---------------|--------------| -| [implementation-todo.md](planning/implementation-todo.md) | Complete task breakdown across 8+ phases with dependencies, priority order, and file manifests | Starting implementation work, tracking progress | -| [memory-implementation.md](planning/memory-implementation.md) | Memory system design — 6 phases: pgvector, chunking, embedding, hybrid search, compaction, flush **(Status: Complete)** | Understanding the memory architecture | +| [memory-implementation.md](planning/memory-implementation.md) | Memory system architecture reference — STM/LTM model, phase summary **(Status: Complete)** | Understanding the memory architecture | +| [kosmokrator-runtime-alignment-checklist.md](planning/kosmokrator-runtime-alignment-checklist.md) | Checklist for aligning with KosmoKrator/prism-relay — completed work + pointers to open Plane issues | Reviewing runtime-alignment status | | [external-channel-sync.md](external-channel-sync.md) | Bidirectional sync design for Telegram/Discord — message tracking, edit/pin/react sync, channel discovery **(Telegram: Done, Discord: Not started)** | Working on external platform integration | -| [todo.md](todo.md) | Feature TODO list — Docs (starring, search, publish controls), Agent system (budget approval) | Quick check of remaining feature work | | [discord.md](discord.md) | Discord integration documentation — architecture, sidecar, configuration | Setting up or debugging Discord integration | | [codex-subscription-auth.md](planning/codex-subscription-auth.md) | Codex subscription authentication planning | Working on Codex integration | @@ -49,7 +50,6 @@ | Document | What it covers | Read when... | |----------|---------------|--------------| -| [feature-test-map.md](testing/feature-test-map.md) | Checklist of every feature, button, and interaction to test (~500 items) | Manual QA testing | | [qa-strategy.md](testing/qa-strategy.md) | Testing pyramid, CI/CD pipeline, coverage targets, test data management | Setting up automated test infrastructure | ## Tools & Features diff --git a/docs/architecture/kosmokrator-reuse-audit.md b/docs/architecture/kosmokrator-reuse-audit.md new file mode 100644 index 0000000..aa8eaed --- /dev/null +++ b/docs/architecture/kosmokrator-reuse-audit.md @@ -0,0 +1,953 @@ +# KosmoKrator Reuse Audit + +> Full list of what OpenCompany should reuse, adapt, or avoid from the `kosmokrator` repo. +> Scope: compare `/Users/rutger/Sites/opencompany` against `/Users/rutger/Sites/kosmokrator` and identify practical reuse opportunities. + +--- + +## Executive Summary + +OpenCompany and KosmoKrator already share the correct low-level foundation: + +- `prism-php/prism` +- `opencompanyapp/prism-relay` +- `opencompany/prism-codex` +- `opencompanyapp/integration-core` + +That means the main reuse opportunity is **agent runtime infrastructure**, not UI or shell code. + +The strongest reusable areas from KosmoKrator are: + +1. Provider and model cataloging +2. Context management and prompt budgeting +3. Tool result deduplication and output truncation +4. Typed settings schema +5. Skill loading and project-local instruction patterns +6. Subagent orchestration concepts + +The weakest reuse areas are: + +- Symfony TUI and ANSI renderer code +- CLI-specific shell and filesystem tools +- Local desktop install and self-update flows +- Terminal-specific permission UX + +--- + +## Repo Shape + +### OpenCompany + +- Product type: Laravel multi-tenant web app +- Main concerns: workspaces, channels, tasks, documents, approvals, integrations, agent collaboration +- Shared agent tool files: `158` files under `app/Agents/Tools` +- Built-in provider classes: `15` +- Test files: `79` + +Key files: + +- `composer.json` +- `app/Agents/OpenCompanyAgent.php` +- `app/Agents/Tools/ToolRegistry.php` +- `app/Services/Memory/ConversationCompactionService.php` +- `app/Services/Memory/ModelContextRegistry.php` +- `app/Agents/Tools/Agents/ContactAgent.php` + +### KosmoKrator + +- Product type: local CLI coding agent +- Main concerns: code editing, terminal UX, local permissions, session persistence, tool execution, subagent swarms +- Tool files: `44` files under `src/Tool` +- Total PHP source files: much larger runtime core than OpenCompany's agent layer +- Test files: `196` + +Key files: + +- `composer.json` +- `src/Agent/ContextManager.php` +- `src/Agent/SubagentOrchestrator.php` +- `src/Settings/SettingsSchema.php` +- `src/LLM/ProviderCatalog.php` +- `src/LLM/PromptFrameBuilder.php` +- `src/Agent/ToolResultDeduplicator.php` +- `src/Agent/OutputTruncator.php` +- `src/Skill/SkillLoader.php` + +--- + +## Shared Foundation Already In Place + +These are already shared and should remain the main cross-repo seam. + +### 1. Prism / relay / integration core + +Both repos depend on: + +- `prism-php/prism` +- `opencompanyapp/prism-relay` +- `opencompany/prism-codex` +- `opencompanyapp/integration-core` + +Why this matters: + +- LLM provider support should converge here, not inside either app +- Tool contracts should converge here, not inside Laravel-only or Symfony-only abstractions +- Model metadata should become shared here + +Current issue: + +- OpenCompany still keeps provider metadata in `config/integrations.php` +- KosmoKrator keeps richer model/provider metadata in `config/models.yaml`, `config/prism.yaml`, and `src/LLM/ProviderCatalog.php` + +Recommendation: + +- Move provider and model metadata ownership into `prism-relay` +- Make both repos consume the same metadata source + +Priority: `P0` + +--- + +## Reuse Directly Or With Minimal Extraction + +These are the best candidates to port first. + +### 2. Prompt frame splitting for cacheable system prompts + +Source: + +- `src/LLM/PromptFrameBuilder.php` + +Why reuse: + +- OpenCompany already builds prompt sections in `app/Agents/OpenCompanyAgent.php` +- KosmoKrator already splits stable prompt prefix from volatile task content +- This should reduce token waste and improve prompt cache hit rates + +OpenCompany fit: + +- Add a web-safe version of `PromptFrameBuilder` +- Split static identity/instructions from volatile sections such as current task, channel context, and recent runtime state + +Priority: `P0` + +Reuse level: `Direct logic port` + +--- + +### 3. Tool result deduplication + +Source: + +- `src/Agent/ToolResultDeduplicator.php` + +What it does: + +- Replaces stale or repeated tool outputs with short placeholders +- Handles exact duplicates +- Handles stale `file_read` results after edits +- Handles `grep` results later superseded by `file_read` + +Why reuse: + +- OpenCompany agent context can accumulate repeated tool output +- This is pure context hygiene with low product risk + +OpenCompany fit: + +- Adapt the deduper for Laravel AI message/value object types +- Run it before building the final message set sent to the model +- Apply it to OpenCompany's document, task, file, and search style tools where outputs repeat + +Priority: `P0` + +Reuse level: `Adapted port` + +--- + +### 4. Output truncation with persisted full result + +Source: + +- `src/Agent/OutputTruncator.php` + +What it does: + +- Caps large tool output by line and byte count +- Saves the full result to disk +- Keeps only a concise truncated version in the model context + +Why reuse: + +- OpenCompany has many tools that can return oversized payloads +- This prevents context bloat from tables, documents, search results, raw external API payloads, and generated content + +OpenCompany fit: + +- Replace disk persistence with DB or object-storage backed persistence +- Keep the same policy: short preview in prompt, full result stored elsewhere +- Expose a retrieval path for the agent if it needs to inspect the full output later + +Priority: `P0` + +Reuse level: `Adapt architecture, not storage implementation` + +--- + +### 5. Provider catalog and richer provider selection layer + +Source: + +- `src/LLM/ProviderCatalog.php` +- `config/models.yaml` +- `config/prism.yaml` + +What it does well: + +- Provider labels, descriptions, auth modes, ordering +- Provider and model option generation +- Free-text model support for selected providers +- Pulls metadata from relay registry instead of hardcoding everything locally + +Why reuse: + +- OpenCompany's provider config is flatter and more static +- Model metadata is split across config and memory-specific lookup logic +- KosmoKrator has a better abstraction for presenting providers and model capabilities + +OpenCompany fit: + +- Use the same provider catalog pattern for the admin/provider settings UI +- Use shared metadata for model capabilities, pricing, context windows, and default models +- Eliminate duplicate "source of truth" between provider setup and memory budgeting + +Priority: `P0` + +Reuse level: `Extract to shared package or port pattern` + +--- + +### 6. Settings schema pattern + +Source: + +- `src/Settings/SettingsSchema.php` + +What it does: + +- Central typed registry of settings +- Aliases, categories, labels, defaults, effect timing +- Clear separation between storage and schema + +Why reuse: + +- OpenCompany has many agent/runtime settings but no equally explicit typed schema layer +- A schema-driven settings system would simplify validation, admin UI generation, defaults, and API exposure + +OpenCompany fit: + +- Build an `AgentSettingsSchema` or `RuntimeSettingsSchema` +- Use it for workspace defaults, per-agent overrides, and feature flags +- Drive admin forms and validation from schema metadata + +Priority: `P1` + +Reuse level: `Pattern reuse with Laravel implementation` + +--- + +## Pruning, Compaction, And Prompt Caching + +This is the main runtime gap between OpenCompany and KosmoKrator. + +OpenCompany currently has: + +- summary-based conversation compaction in `app/Services/Memory/ConversationCompactionService.php` +- soft-zone memory flushing in `app/Services/Memory/MemoryFlushService.php` +- a local `ModelContextRegistry` for context-window lookup +- basic prompt splitting, checkpoint truncation, and read-tool deduplication + +KosmoKrator adds a fuller context pipeline: + +- `src/Agent/ContextManager.php` coordinates warning, pruning, compaction, and fallback behaviour +- `src/Agent/ContextPruner.php` does cheap micro-pruning before full compaction +- `src/Agent/ContextCompactor.php` builds a structured compaction plan and extracts durable memories +- `src/Agent/ContextBudget.php` centralises warning, auto-compact, and blocking thresholds +- `src/LLM/PromptFrameBuilder.php` is wired into `PrismService` + +`prism-relay` already provides the cache-planning layer: + +- `src/Relay.php` +- `src/Caching/PromptCachePlanner.php` +- `src/Caching/PromptCacheOrchestrator.php` +- `src/Meta/ProviderMeta.php` + +### What OpenCompany should change now + +#### 7. Replace `ModelContextRegistry` with relay-backed metadata + +Current problem: + +- OpenCompany keeps a separate context-window registry in `app/Services/Memory/ModelContextRegistry.php` +- `prism-relay` already knows model context windows via `ProviderMeta::contextWindow()` + +Recommendation: + +- Make `ModelContextRegistry` a thin adapter over `OpenCompany\\PrismRelay\\Meta\\ProviderMeta` +- Keep `AppSetting` overrides on top +- Remove most of the duplicated built-in model registry over time + +Why: + +- One source of truth for context windows +- Better alignment between provider selection, budgeting, and pricing/cache capability + +Priority: `P0` + +#### 8. Introduce a real context budget service + +Current problem: + +- OpenCompany repeats threshold math across `ConversationCompactionService`, `MemoryFlushService`, and `AgentRespondJob` +- The thresholds are estimated ad hoc instead of coming from one snapshot object + +Recommendation: + +- Add an OpenCompany `ContextBudget` service modeled after KosmoKrator's `src/Agent/ContextBudget.php` +- Use it for: + - warning threshold + - flush threshold + - compaction threshold + - hard blocking threshold + - observability snapshots + +Why: + +- Consistent trigger behavior +- Easier tuning per model/provider +- Cleaner logs and debugging + +Priority: `P0` + +#### 9. Add micro-pruning before compaction + +Current problem: + +- OpenCompany jumps from "normal history" to full summarization +- It now truncates checkpointed tool results and deduplicates identical read results, but it still lacks a cheap middle step + +Recommendation: + +- Add an OpenCompany-specific `ContextPruner` +- Scope it to old, large, read-heavy tool results: + - file reads + - search results + - thread/message reads + - document fetches + - table/list reads +- Protect recent turns and recent tool outputs +- Only accept a prune pass if the savings cross a minimum threshold + +Do not port directly: + +- KosmoKrator's `grep`, `glob`, `shell_read`, `bash` assumptions + +Why: + +- Reduces compaction frequency +- Saves tokens without paying an LLM summarization cost +- Fits OpenCompany's many structured read tools well + +Priority: `P0` + +#### 10. Make compaction more structured and failure-aware + +Current problem: + +- OpenCompany compaction is functional but simple: summarize older messages, store summary, continue +- It has no circuit breaker, no hard fallback path, and no structured compaction plan object + +Recommendation: + +- Keep the current `ConversationSummary` persistence model +- Add KosmoKrator-style concepts: + - explicit compaction plan object + - protected context + - compaction failure counter / circuit breaker + - hard fallback when compaction repeatedly fails + - summary-to-memory extraction pass + +Why: + +- More resilient under long-running channels +- Better preservation of durable facts +- Less risk of repeated compaction thrashing + +Priority: `P1` + +#### 11. Wire prompt caching through `prism-relay`, not just prompt splitting + +**Status: DONE** — Resolved via `CachingPrismGateway` in `prism-relay/src/Bridge/`. + +OpenCompany now uses `CachingPrismGateway` (extends `PrismGateway`) for all AI SDK drivers. Before each `prompt()` call, a `SystemPromptBag` with split `[stable, volatile]` prompts is bound in the container. The gateway reads the bag and calls `Relay::planPromptCache()` to annotate system prompts and messages with provider-specific cache control (Anthropic ephemeral, Gemini dedicated, OpenAI auto, OpenRouter ephemeral). No `laravel/ai` vendor patches required. + +Priority: `P0` — ~~resolved~~ + +### What OpenCompany should not copy directly + +#### 12. Do not copy KosmoKrator's pruning rules literally + +Skip direct ports of: + +- `grep` +- `glob` +- `bash` +- `shell_read` +- filesystem-specific stale-read heuristics + +Reason: + +- OpenCompany is not a local coding shell +- Its equivalent high-volume context comes from workspace tools, not Unix tools + +Priority: `P0` + +### Recommended implementation order for this area + +1. Replace context-window lookup with relay-backed metadata. +2. Introduce a shared `ContextBudget` service and move all threshold math there. +3. Add OpenCompany-specific micro-pruning for old read-tool outputs. +4. Improve compaction with a plan object, failure handling, and memory extraction. +5. Wire real provider prompt caching through `prism-relay` at the Laravel AI / Prism gateway layer. + +### Summary judgment + +For OpenCompany: + +- `prism-relay` should become the source of truth for model context windows and prompt-cache planning +- KosmoKrator should inform the context-budget, pruning, and compaction architecture +- The exact pruning heuristics must be rewritten around OpenCompany's read tools and multi-channel/task workflow + +--- + +## Reuse With Meaningful Adaptation + +These are strong ideas, but they need web-native implementations. + +### 7. Full context management pipeline + +Source: + +- `src/Agent/ContextManager.php` +- plus related classes such as `ContextBudget`, `ContextPruner`, `ProtectedContextBuilder`, `MemoryInjector` + +What it does well: + +- Pre-flight context pressure checks +- Micro-pruning before full compaction +- Compaction circuit breaker after repeated failures +- Protected runtime context +- Memory extraction from summaries +- Session-aware context shaping + +Why reuse: + +- OpenCompany's current compaction in `app/Services/Memory/ConversationCompactionService.php` is materially simpler +- KosmoKrator's pipeline is more resilient under pressure + +What to port: + +- Budget snapshots and preflight checks +- Compaction failure circuit breaker +- Protected context concept +- Post-compaction memory extraction +- Distinction between lightweight pruning and expensive compaction + +What not to port as-is: + +- TUI display calls +- local session memory assumptions +- CLI-specific project and directory context + +Priority: `P1` + +Reuse level: `Concept and core logic` + +--- + +### 8. Protected context builder + +Source: + +- `src/Agent/ProtectedContextBuilder.php` + +What it does: + +- Injects runtime facts the model should always see and should not override + +Why reuse: + +- OpenCompany already assembles many system prompt sections, but not all runtime facts are clearly treated as protected +- This helps separate stable policy from mutable user/task context + +OpenCompany fit: + +- Protected facts could include: + - workspace ID and name + - channel ID and type + - acting agent ID and role + - approval mode + - current task ID + - current user visibility scope + +Priority: `P1` + +Reuse level: `Pattern reuse` + +--- + +### 9. Session persistence ideas + +Source: + +- `src/Session/SessionManager.php` + +What it does well: + +- Central session facade +- Message persistence +- auto-title +- history reconstruction +- deduplication on load +- settings and memory scope coordination + +Why reuse: + +- OpenCompany already has first-class message/task/channel persistence, so it does not need the same storage model +- But it can reuse the patterns around resume, checkpointing, and reconstructed history hygiene + +What to reuse: + +- History reconstruction pass +- resume semantics +- checkpoint-aware continuation +- session-level metadata around compaction and recall + +What not to reuse: + +- project-path based scoping +- local SQLite storage assumptions + +Priority: `P2` + +Reuse level: `Patterns only` + +--- + +### 10. Skill system + +Source: + +- `src/Skill/SkillLoader.php` + +What it does well: + +- Loads skills from multiple scopes +- Clear precedence rules +- Lightweight frontmatter-based format + +Why reuse: + +- OpenCompany currently has no real code-level skill system +- Workspace-local or agent-local skills could become a powerful product feature + +Potential OpenCompany adaptation: + +- Workspace skills +- Agent role packs +- Team playbooks +- Department-specific instructions +- Shared procedural knowledge in a structured format + +Suggested storage options: + +- database-backed skill records +- document-backed skills with frontmatter +- repo/project attached skills for external workspaces + +Priority: `P2` + +Reuse level: `Strong feature pattern, not direct file loader copy` + +--- + +## Reuse The Concept, Not The Implementation + +These should influence OpenCompany design, but should not be copied directly. + +### 11. Subagent swarm orchestration + +Source: + +- `src/Agent/SubagentOrchestrator.php` + +What it does well: + +- dependency graphs +- concurrency limits +- sequential groups +- retries +- watchdog cancellation +- background result collection + +Current OpenCompany state: + +- OpenCompany uses agent-to-agent delegation via tasks and channels in `app/Agents/Tools/Agents/ContactAgent.php` +- This is valid for a multi-actor web platform, but less sophisticated as an orchestration runtime + +What to reuse: + +- dependency-aware delegation model +- grouped and sequenced sub-work +- per-agent concurrency limits +- watchdogs and stale-work detection +- richer run-state tracking + +What not to reuse: + +- Amp/Revolt future runtime +- in-process child-agent spawning model +- terminal lifecycle assumptions + +OpenCompany-native implementation should use: + +- Laravel queues +- tasks and task_steps +- events and broadcasts +- database-backed run graphs + +Priority: `P1` + +Reuse level: `Architecture model only` + +--- + +### 12. Permission evaluation chain + +Source: + +- `src/Tool/Permission/PermissionEvaluator.php` + +What it does well: + +- explicit check chain +- fail-closed default +- stage-based policy composition + +Current OpenCompany state: + +- `app/Services/AgentPermissionService.php` is domain-aware and correct for workspaces, agents, folders, channels, and approvals +- But its structure is more app-specific and less composable than the staged evaluator pattern + +What to reuse: + +- explicit evaluation pipeline +- clear deny/ask/allow stages +- central decision object + +What not to reuse: + +- local file path and shell command rules +- Guardian/Argus/Prometheus model semantics + +Priority: `P2` + +Reuse level: `Pattern reuse` + +--- + +### 13. Instruction discovery conventions + +Source: + +- `src/Agent/InstructionLoader.php` + +What it does well: + +- combines global, repo, and local instruction sources with defined precedence + +Why it matters for OpenCompany: + +- OpenCompany already has identity and instruction docs per agent +- The concept could extend to project imports, synced repos, or workspace knowledge packs + +Good reuse targets: + +- imported repository instruction files +- project-specific agent overlays +- workspace-level instruction inheritance + +Priority: `P2` + +Reuse level: `Concept only` + +--- + +## Low Value Or No Value Reuse + +These should stay in KosmoKrator. + +### 14. TUI and ANSI renderer stack + +Sources: + +- `src/UI/Tui/*` +- `src/UI/Ansi/*` + +Why not reuse: + +- OpenCompany is a web app +- the abstractions are clean, but the actual code is terminal-specific + +Possible exception: + +- only reuse naming or state-machine ideas for live agent dashboards + +Priority: `Skip` + +--- + +### 15. CLI shell and file tools + +Sources: + +- `src/Tool/Coding/*` + +Why not reuse: + +- these are for local filesystem editing and shell execution inside a coding agent +- OpenCompany's tool surface is domain tools, integrations, documents, channels, tables, and tasks + +Possible exception: + +- isolated pieces of patch or diff handling if OpenCompany grows a coding workspace product + +Priority: `Skip` + +--- + +### 16. Desktop install, self-update, PHAR, binaries + +Sources: + +- `install.sh` +- CLI release flow +- PHAR and static binary distribution logic + +Why not reuse: + +- unrelated to OpenCompany's deployment model + +Priority: `Skip` + +--- + +### 17. Terminal-first permission UX + +Sources: + +- permission prompts and CLI interaction flows + +Why not reuse: + +- OpenCompany already has approvals, database-backed permission records, and human-in-the-loop flows +- the underlying policy concepts may help, but the UX should remain web-native + +Priority: `Skip` + +--- + +## Concrete Reuse List + +This is the full list in one place. + +### Reuse now + +- Shared provider and model metadata ownership +- Prompt frame splitting for prompt cache efficiency +- Tool result deduplication +- Output truncation with persisted full payloads + +### Reuse next + +- Context budgeting and pre-flight checks +- Compaction circuit breaker +- Protected runtime context +- Typed settings schema +- Subagent orchestration design + +### Reuse later + +- Skill system +- Instruction source precedence model +- Session reconstruction and resume ideas +- Permission evaluator pipeline pattern + +### Do not reuse directly + +- Symfony TUI renderer +- ANSI terminal renderer +- local shell execution tools +- local filesystem coding tools +- PHAR and binary release tooling +- terminal approval UX + +--- + +## Recommended Migration Order + +### Phase 1 + +- Consolidate provider/model metadata into `prism-relay` +- Make OpenCompany consume shared provider and model definitions +- Port prompt-frame splitting into OpenCompany's agent pipeline + +### Phase 2 + +- Add tool-result deduplication +- Add output truncation and persisted large-result storage +- Add protected context handling + +### Phase 3 + +- Expand OpenCompany compaction into a full context management pipeline +- Add budget snapshots, micro-pruning, and failure circuit breaking +- Move toward schema-driven runtime settings + +### Phase 4 + +- Design web-native subagent orchestration using KosmoKrator's swarm ideas +- Add richer dependency and concurrency controls on top of tasks and queues + +### Phase 5 + +- Introduce skills for workspaces, teams, or agents +- Introduce instruction layering for imported projects or external code contexts + +--- + +## Recommended First Phase We Can Start Soon + +If the goal is to start shipping reuse work immediately with low risk, the first phase should be: + +### Phase 1A: Prompt and Context Hygiene + +Bring over these first: + +- Prompt frame splitting from `src/LLM/PromptFrameBuilder.php` +- Tool result deduplication from `src/Agent/ToolResultDeduplicator.php` +- Output truncation from `src/Agent/OutputTruncator.php` + +Why this should be first: + +- small surface area +- no product UI changes required +- no queue architecture changes required +- immediate token and context efficiency wins +- low coupling to CLI-specific code + +OpenCompany target areas: + +- `app/Agents/OpenCompanyAgent.php` +- `app/Jobs/AgentRespondJob.php` +- `app/Services/Memory/*` + +Expected outcome: + +- smaller prompts +- fewer repeated tool payloads +- safer handling of oversized tool outputs +- lower model cost and fewer context-window failures + +### Phase 1B: Model Metadata Consolidation + +Start immediately after Phase 1A: + +- move toward shared provider/model metadata ownership in `prism-relay` +- reduce duplication between: + - `config/integrations.php` + - `app/Services/Memory/ModelContextRegistry.php` + - KosmoKrator's `src/LLM/ProviderCatalog.php` + - KosmoKrator's `config/models.yaml` + +Why this should be second: + +- strategically important +- unlocks cleaner provider UI and runtime behavior in both repos +- but touches more shared infrastructure than prompt hygiene does + +Expected outcome: + +- one source of truth for context windows, pricing, auth mode, defaults, and capabilities +- simpler provider setup and model resolution in OpenCompany + +### What Not To Include In First Phase + +Do not include these in the first phase: + +- subagent swarm orchestration +- permission system redesign +- skill system rollout +- session storage redesign +- TUI or CLI code + +Why not: + +- these are higher-risk and product-shaping changes +- they need architecture decisions, not just reuse work +- they will slow down the first useful delivery + +### Suggested Deliverables For The First Phase + +1. Add a prompt-splitting helper for OpenCompany system prompts +2. Add a tool-result dedupe pass before final LLM submission +3. Add large-output truncation plus persisted full-result storage +4. Add instrumentation around prompt size reduction and truncation frequency +5. Open a follow-up shared-infra task for provider/model metadata consolidation + +### Concrete Recommendation + +If we want the best first phase, start with: + +1. `PromptFrameBuilder` +2. `ToolResultDeduplicator` +3. `OutputTruncator` + +Then do provider/model catalog consolidation as the next phase. + +This gives the fastest path to measurable gains without dragging us into a large refactor. + +--- + +## What This Means In Practice + +OpenCompany should treat KosmoKrator as the stronger source of truth for: + +- agent runtime mechanics +- model metadata handling +- context management +- prompt hygiene +- subagent orchestration concepts + +OpenCompany should **not** treat KosmoKrator as the source of truth for: + +- UX +- storage model +- permissions UI +- shell and filesystem tool design + +The right strategy is: + +- extract shared infrastructure downward into shared packages +- port reusable runtime logic upward into OpenCompany +- leave terminal-specific product code behind diff --git a/docs/architecture/runtime-alignment-implementation-audit.md b/docs/architecture/runtime-alignment-implementation-audit.md new file mode 100644 index 0000000..cb100bc --- /dev/null +++ b/docs/architecture/runtime-alignment-implementation-audit.md @@ -0,0 +1,21 @@ +# Runtime Alignment Implementation Audit + +Date: 2026-04-09 +Status: Review complete — all findings tracked in Plane. + +## Findings + +All findings from this audit are now tracked as issues in the [OpenCompany Plane project](https://plane.gingermedia.biz/kosmokrator/projects/ceaf5d22-612a-42bf-9cc8-0dac054cdf0c/issues/): + +| Issue | Finding | Severity | +|-------|---------|----------| +| OC-1 | ~~Prompt caching depends on ignored `vendor` patches~~ — **Fixed**: `CachingPrismGateway` in `prism-relay`, vendor patches reverted | ~~High~~ | +| OC-3 | ~~`planPromptCache()` never called in request flow~~ — **Fixed**: called via `CachingPrismGateway` for all providers | ~~High~~ | +| OC-4 | `ContextBudget` undercounts retry context pressure | High | +| OC-5 | `ModelContextRegistry` regressed prefix-style admin overrides | Medium | +| OC-6 | Durable-memory extraction re-logs same facts on later compactions | Medium | + +## Verification Notes + +- The workspace requires a filtered `APP_PACKAGES_CACHE` during tests because several `opencompanyapp/integration-*` packages are absent from `vendor`. +- Prompt-cache metrics have not been verified end-to-end through production observability. diff --git a/docs/ecosystem/integrations/README.md b/docs/ecosystem/integrations/README.md new file mode 100644 index 0000000..93db57d --- /dev/null +++ b/docs/ecosystem/integrations/README.md @@ -0,0 +1,943 @@ +# OpenCompany Integrations + +Monorepo for all [OpenCompany](https://github.com/OpenCompanyApp) integration packages. Each package exposes tools that AI agents can call — from rendering diagrams to querying APIs to managing tasks. + +Integrations are independent Composer packages built on a shared core. They work in any PHP 8.2+ application: [OpenCompany](https://github.com/OpenCompanyApp) (web), [KosmoKrator](https://github.com/OpenCompanyApp) (CLI), or your own consumer. + +## Repository Structure + +``` +core/ Shared contracts, credential abstraction, Lua bridge, registry +celestial/ Astronomy: moon phases, sunrise/sunset, planet positions, eclipses +clickup/ ClickUp project management: tasks, lists, folders, time tracking +coingecko/ CoinGecko cryptocurrency: prices, market data, trending, charts +exchangerate/ Currency exchange rates: 340+ fiat, crypto, and metal conversions +google/ Google Calendar, Gmail, Drive, Sheets, Docs, Forms, Contacts, Tasks, Analytics, Search Console +mermaid/ Mermaid diagram rendering to PNG +plantuml/ PlantUML diagram rendering to PNG +plausible/ Plausible Analytics: stats, realtime visitors, goals +ticktick/ TickTick task management with time tracking +trustmrr/ TrustMRR verified startup revenue data +typst/ Typst document rendering to PDF +vegalite/ Vega-Lite chart rendering to PNG +worldbank/ World Bank economic indicators for 200+ countries +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────┐ +│ Host Application (OpenCompany, KosmoKrator) │ +│ │ +│ ┌──────────┐ ┌───────────────────────────┐ │ +│ │ Lua VM │──▸│ LuaBridge │ │ +│ │ │ │ functionMap → tool slugs │ │ +│ │ app.integrations.mermaid.render(...) │ │ +│ └──────────┘ └────────────┬──────────────┘ │ +│ │ │ +│ ┌───────────────────────────▼──────────────┐ │ +│ │ ToolProviderRegistry │ │ +│ │ ├─ mermaid → MermaidToolProvider │ │ +│ │ ├─ plausible → PlausibleToolProvider │ │ +│ │ ├─ clickup → ClickUpToolProvider │ │ +│ │ └─ ... │ │ +│ └───────────────────────────┬──────────────┘ │ +│ │ │ +│ ┌───────────────────────────▼──────────────┐ │ +│ │ ToolProvider.createTool(class, context) │ │ +│ │ → CredentialResolver for API keys │ │ +│ │ → AgentFileStorage for file output │ │ +│ │ → Tool.execute(args) → ToolResult │ │ +│ └──────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ +``` + +**Key concepts:** + +- **Tool** — A single callable action (e.g. "render a Mermaid diagram", "list ClickUp tasks"). Implements `name()`, `description()`, `parameters()`, `execute()`. +- **ToolProvider** — Groups related tools under an app name. Declares metadata, handles tool instantiation with credentials, and optionally provides Lua documentation. +- **ToolProviderRegistry** — Singleton that collects all providers. The host queries it to discover available tools. +- **CredentialResolver** — Abstraction for API keys. The default reads from `config/ai-tools.php`; OpenCompany swaps this for encrypted database storage. +- **LuaBridge** — Routes `app.integrations.{name}.{function}(...)` calls from the Lua VM to PHP tool classes. + +## Available Integrations + +| Package | Tools | Credentials | Category | Description | +|---------|------:|-------------|----------|-------------| +| [celestial](celestial/) | 9 | None | Data | Moon phases, sunrise/sunset, planet positions, eclipses, zodiac | +| [clickup](clickup/) | 34 | API token | Productivity | Tasks, lists, folders, time tracking, docs, chat | +| [coingecko](coingecko/) | 8 | None | Data | Crypto prices, market data, trending coins, historical charts | +| [exchangerate](exchangerate/) | 5 | None | Data | 340+ currency conversions (fiat, crypto, metals) | +| [google](google/) | 117 | OAuth | Productivity | Calendar, Gmail, Drive, Sheets, Docs, Forms, Contacts, Tasks, Analytics, Search Console | +| [mermaid](mermaid/) | 1 | None | Rendering | Flowcharts, sequences, Gantt, class diagrams → PNG | +| [plantuml](plantuml/) | 1 | None | Rendering | UML class, sequence, activity, component, state → PNG | +| [plausible](plausible/) | 8 | API key | Analytics | Stats, realtime visitors, site and goal management | +| [ticktick](ticktick/) | 9 | OAuth | Productivity | Projects, tasks, time tracking (TickTick and Dida365) | +| [trustmrr](trustmrr/) | 2 | API key | Data | Verified startup revenue, MRR, growth, acquisitions | +| [typst](typst/) | 1 | None | Rendering | Reports, invoices, proposals → PDF | +| [vegalite](vegalite/) | 1 | None | Rendering | Bar, line, scatter, heatmap, boxplot charts → PNG | +| [worldbank](worldbank/) | 6 | None | Data | GDP, inflation, population for 200+ countries | + +## Installation + +Each subdirectory is an independent Composer package. In your consuming application: + +```json +{ + "repositories": [ + {"type": "path", "url": "../integrations/*"} + ], + "require": { + "opencompanyapp/integration-core": "@dev", + "opencompanyapp/integration-mermaid": "@dev", + "opencompanyapp/integration-plausible": "@dev" + } +} +``` + +Laravel auto-discovers service providers. For non-Laravel apps, use the contracts and registry directly. + +### System Dependencies + +Some rendering integrations need external tools: + +| Package | Dependency | Install | +|---------|-----------|---------| +| mermaid | `mmdc` (Mermaid CLI) | `npm install -g @mermaid-js/mermaid-cli` | +| plantuml | Java + `plantuml.jar` | Bundled in `plantuml/bin/`, needs `java` on PATH | +| typst | `typst` CLI | `brew install typst` or [typst.app](https://github.com/typst/typst) | +| vegalite | Node.js | `node` on PATH; render script bundled in `vegalite/bin/` | + +--- + +# Developer Guide + +## Building a New Integration + +This walkthrough creates a complete integration from scratch. We'll build a "Weather" integration as an example. + +### 1. Create the Package Directory + +``` +weather/ +├── composer.json +├── src/ +│ ├── WeatherServiceProvider.php +│ ├── WeatherService.php +│ ├── WeatherToolProvider.php +│ └── Tools/ +│ └── GetWeather.php +└── lua-docs/ (optional) + └── weather.md +``` + +### 2. Define `composer.json` + +```json +{ + "name": "opencompanyapp/integration-weather", + "description": "Weather data and forecasts integration for OpenCompany.", + "license": "MIT", + "authors": [ + { + "name": "OpenCompany", + "homepage": "https://github.com/OpenCompanyApp" + } + ], + "keywords": ["tools", "weather", "forecasts", "opencompany"], + "require": { + "php": "^8.2", + "opencompanyapp/integration-core": "^2.0 || @dev" + }, + "autoload": { + "psr-4": { + "OpenCompany\\Integrations\\Weather\\": "src/" + } + }, + "extra": { + "laravel": { + "providers": [ + "OpenCompany\\Integrations\\Weather\\WeatherServiceProvider" + ] + } + }, + "minimum-stability": "stable", + "prefer-stable": true +} +``` + +**Conventions:** +- Package name: `opencompanyapp/integration-{name}` +- Namespace: `OpenCompany\Integrations\{Name}\` +- If replacing an older standalone package, add a `"replace"` key: `"opencompanyapp/ai-tool-weather": "self.version"` +- Only add `illuminate/support` to `require` if you use facades like `Storage`, `Http`, `Log` directly (most API integrations don't need it) + +### 3. Create the Service Class + +The service class encapsulates all API communication. Tools call the service — they never make HTTP requests directly. + +```php +apiKey); + } + + public function getCurrent(string $location): array + { + return $this->request('GET', '/current', [ + 'location' => $location, + ]); + } + + public function getForecast(string $location, int $days = 3): array + { + return $this->request('GET', '/forecast', [ + 'location' => $location, + 'days' => $days, + ]); + } + + private function request(string $method, string $path, array $params = []): array + { + if (! $this->isConfigured()) { + throw new \RuntimeException('Weather API key is not configured.'); + } + + try { + $response = Http::withHeaders([ + 'Authorization' => "Bearer {$this->apiKey}", + 'Accept' => 'application/json', + ])->timeout(15)->get(self::BASE_URL . $path, $params); + + if (! $response->successful()) { + $error = $response->json('error') ?? $response->body(); + Log::error("Weather API error: {$method} {$path}", [ + 'status' => $response->status(), + 'error' => $error, + ]); + throw new \RuntimeException( + 'Weather API error (' . $response->status() . '): ' . $error + ); + } + + return $response->json() ?? []; + } catch (\Illuminate\Http\Client\ConnectionException $e) { + throw new \RuntimeException("Failed to connect to Weather API: {$e->getMessage()}"); + } + } +} +``` + +### 4. Create the Service Provider + +The service provider wires everything into the Laravel container and registers with the `ToolProviderRegistry`. + +```php +app->singleton(WeatherService::class, function ($app) { + $creds = $app->make(CredentialResolver::class); + + return new WeatherService( + apiKey: $creds->get('weather', 'api_key', ''), + ); + }); + } + + public function boot(): void + { + if ($this->app->bound(ToolProviderRegistry::class)) { + $this->app->make(ToolProviderRegistry::class) + ->register(new WeatherToolProvider()); + } + } +} +``` + +**Pattern notes:** +- Always register the service as a singleton — tools may be called multiple times in one request +- Always check `$this->app->bound(ToolProviderRegistry::class)` before registering — the core package may not be installed +- Use `CredentialResolver` to get API keys, never read config directly + +### 5. Create the Tool Provider + +The tool provider declares what tools are available and how to instantiate them. + +```php + 'weather, forecasts, temperature', + 'description' => 'Weather data and forecasts', + 'icon' => 'ph:cloud-sun', + 'logo' => 'ph:cloud-sun', + ]; + } + + public function tools(): array + { + return [ + 'get_weather' => [ + 'class' => GetWeather::class, + 'type' => 'read', + 'name' => 'Get Weather', + 'description' => 'Current weather for any location.', + 'icon' => 'ph:cloud-sun', + ], + 'get_forecast' => [ + 'class' => GetForecast::class, + 'type' => 'read', + 'name' => 'Get Forecast', + 'description' => 'Multi-day weather forecast.', + 'icon' => 'ph:calendar', + ], + ]; + } + + public function isIntegration(): bool + { + return true; + } + + public function createTool(string $class, array $context = []): Tool + { + return new $class(app(WeatherService::class)); + } + + public function luaDocsPath(): ?string + { + return __DIR__ . '/../lua-docs/weather.md'; + } + + public function credentialFields(): array + { + return [ + [ + 'key' => 'api_key', + 'type' => 'secret', + 'label' => 'API Key', + 'required' => true, + 'placeholder' => 'wth_...', + ], + ]; + } +} +``` + +**`tools()` array keys:** +- `class` — Fully-qualified class name of the Tool implementation +- `type` — `'read'` (fetches data) or `'write'` (creates/modifies/deletes) +- `name` — Human-readable display name +- `description` — Short description for listings and UI cards +- `icon` — [Iconify](https://icon-sets.iconify.design/) identifier (we use the `ph:` Phosphor set) + +**`createTool()` context:** +- The `$context` array is injected by the host application at runtime +- In OpenCompany: `['agent' => User, 'timezone' => 'Europe/Amsterdam']` +- In KosmoKrator: `['account' => 'default']` +- Use it to pass runtime dependencies without coupling to specific models + +### 6. Create Tool Classes + +Each tool is a single callable action. + +```php + [ + 'type' => 'string', + 'required' => true, + 'description' => 'City name, address, or coordinates (e.g. "Amsterdam", "51.5,-0.1").', + ], + 'units' => [ + 'type' => 'string', + 'enum' => ['metric', 'imperial'], + 'description' => 'Unit system (default: metric).', + ], + ]; + } + + public function execute(array $args): ToolResult + { + $location = $args['location'] ?? ''; + if (empty($location)) { + return ToolResult::error('Location is required.'); + } + + try { + $data = $this->service->getCurrent($location); + + return ToolResult::success($data); + } catch (\Throwable $e) { + return ToolResult::error($e->getMessage()); + } + } +} +``` + +**Parameter types:** `string`, `integer`, `number`, `boolean`, `array`, `object` + +**Optional parameter keys:** +- `required` — `true` if the parameter must be provided (default `false`) +- `description` — Shown in generated Lua docs and tool catalogs +- `enum` — Array of allowed string values +- `items` — Element type for arrays, e.g. `['type' => 'string']` +- `properties` — Sub-property definitions for objects +- `default` — Default value if not provided + +**ToolResult patterns:** +```php +// Success with data (array or string) +return ToolResult::success(['temperature' => 22, 'unit' => 'C']); +return ToolResult::success('The current temperature is 22C.'); + +// Success with metadata (files created, timing info, etc.) +return ToolResult::success($data, ['files' => [$fileInfo]]); + +// Error +return ToolResult::error('Location not found.'); +``` + +--- + +## Integration Types + +The codebase has four distinct integration patterns. Pick the one that matches your use case. + +### Type A: Public API (No Credentials) + +For APIs that don't require authentication: exchangerate, worldbank, coingecko, celestial. + +```php +// ToolProvider +public function credentialFields(): array +{ + return []; // No credentials needed +} + +// ServiceProvider — no credential resolver needed +public function register(): void +{ + $this->app->singleton(MyService::class); +} +``` + +### Type B: API Key Authentication + +For services that need an API key: plausible, trustmrr. + +```php +// ServiceProvider — inject credentials +$this->app->singleton(MyService::class, function ($app) { + $creds = $app->make(CredentialResolver::class); + + return new MyService( + apiKey: $creds->get('myservice', 'api_key', ''), + baseUrl: $creds->get('myservice', 'url', 'https://api.example.com'), + ); +}); + +// ToolProvider +public function credentialFields(): array +{ + return [ + ['key' => 'api_key', 'type' => 'secret', 'label' => 'API Key', 'required' => true], + ['key' => 'url', 'type' => 'url', 'label' => 'Base URL', 'default' => 'https://api.example.com'], + ]; +} +``` + +### Type C: OAuth Authentication + +For services requiring OAuth flows: clickup, ticktick, google. + +These integrations register OAuth routes in their service provider and include a controller: + +```php +// ServiceProvider boot() +Route::prefix('api/integrations/myservice/oauth')->group(function () { + Route::get('authorize', [MyOAuthController::class, 'authorize']); + Route::get('callback', [MyOAuthController::class, 'callback']); +}); + +// ToolProvider credentialFields +public function credentialFields(): array +{ + return [ + ['key' => 'client_id', 'type' => 'string', 'label' => 'Client ID', 'required' => true], + ['key' => 'client_secret', 'type' => 'secret', 'label' => 'Client Secret', 'required' => true], + ['key' => 'access_token', 'type' => 'oauth', 'label' => 'Connect Account'], + ]; +} +``` + +### Type D: Rendering / File Output + +For tools that produce files (images, PDFs): mermaid, plantuml, typst, vegalite. + +These use the `AgentFileStorage` contract to save output files: + +```php +// ToolProvider — inject file storage +public function createTool(string $class, array $context = []): Tool +{ + $fileStorage = app()->bound(AgentFileStorage::class) + ? app(AgentFileStorage::class) + : null; + + return new $class( + app(MyRenderService::class), + $fileStorage, + $context['agent'] ?? null, + ); +} + +// Tool — use file storage if available, fall back to public disk +public function execute(array $args): ToolResult +{ + $bytes = $this->service->renderToBytes($input); + + if ($this->fileStorage && $this->agent) { + $result = $this->fileStorage->saveFile( + $this->agent, 'output.png', $bytes, 'image/png', 'myrenderer' + ); + return ToolResult::success("![Title]({$result['url']})"); + } + + $url = $this->service->render($input); // saves to public disk + return ToolResult::success("![Title]({$url})"); +} +``` + +--- + +## Making an Integration Configurable + +To add a settings UI in OpenCompany, implement `ConfigurableIntegration` alongside `ToolProvider`: + +```php +use OpenCompany\IntegrationCore\Contracts\ConfigurableIntegration; +use OpenCompany\IntegrationCore\Contracts\ToolProvider; + +class WeatherToolProvider implements ToolProvider, ConfigurableIntegration +{ + // ... ToolProvider methods ... + + public function integrationMeta(): array + { + return [ + 'name' => 'Weather', + 'description' => 'Weather data and forecasts for any location', + 'icon' => 'ph:cloud-sun', + 'logo' => 'ph:cloud-sun', + 'category' => 'data', // data, productivity, analytics, rendering + 'badge' => 'New', // optional badge text + 'docs_url' => 'https://...', // optional external docs link + ]; + } + + public function configSchema(): array + { + return [ + [ + 'key' => 'api_key', + 'type' => 'secret', + 'label' => 'API Key', + 'placeholder' => 'wth_...', + 'hint' => 'Get your key at weather.example.', + 'required' => true, + ], + [ + 'key' => 'units', + 'type' => 'select', + 'label' => 'Default Units', + 'options' => ['metric' => 'Metric (C, km/h)', 'imperial' => 'Imperial (F, mph)'], + 'default' => 'metric', + ], + ]; + } + + public function testConnection(array $config): array + { + try { + // Make a lightweight API call to verify credentials + $response = Http::withHeaders([ + 'Authorization' => "Bearer {$config['api_key']}", + ])->timeout(10)->get('https://api.weather.example/v1/ping'); + + if ($response->successful()) { + return ['success' => true, 'message' => 'Connected to Weather API.']; + } + + return ['success' => false, 'error' => 'Invalid API key.']; + } catch (\Exception $e) { + return ['success' => false, 'error' => $e->getMessage()]; + } + } + + public function validationRules(): array + { + return [ + 'api_key' => 'nullable|string', + 'units' => 'nullable|in:metric,imperial', + ]; + } +} +``` + +**Config field types:** +- `secret` — Masked input, stored encrypted +- `text` / `string` — Plain text input +- `url` — URL input with format validation +- `select` — Dropdown, requires `options` array +- `string_list` — Dynamic list of strings (e.g. site IDs) +- `oauth_connect` — OAuth connection button, requires `authorize_url` and `redirect_uri` + +**Conditional fields** — Show a field only when another field has a specific value: +```php +[ + 'key' => 'workspace_id', + 'type' => 'text', + 'label' => 'Workspace ID', + 'visible_when' => ['field' => 'mode', 'value' => 'workspace'], +] +``` + +--- + +## Lua Documentation + +Agents discover tools through auto-generated Lua API docs. The `LuaDocRenderer` and `LuaCatalogBuilder` in core handle this automatically based on your `parameters()` and `description()` definitions. + +For complex integrations, add a `lua-docs/{name}.md` file with supplementary documentation — workflows, examples, and gotchas that aren't captured by the parameter reference. + +### Writing Lua Docs + +```markdown +## Common Workflows + +### Get current weather and format it + +```lua +local weather = app.integrations.weather.get({location = "Amsterdam"}) +local forecast = app.integrations.weather.forecast({location = "Amsterdam", days = 3}) +``` + +### Batch lookups + +```lua +local cities = {"Amsterdam", "London", "Tokyo"} +for _, city in ipairs(cities) do + local w = app.integrations.weather.get({location = city}) + -- process results +end +``` + +## Notes + +- Locations accept city names, addresses, or lat/lng coordinates +- Rate limit: 60 requests per minute +- Temperature is in Celsius by default (use `units = "imperial"` for Fahrenheit) +``` + +Point to the file in your tool provider: + +```php +public function luaDocsPath(): ?string +{ + return __DIR__ . '/../lua-docs/weather.md'; +} +``` + +### How Lua Routing Works + +The `LuaCatalogBuilder` transforms your tool definitions into a Lua namespace tree: + +``` +app.integrations.weather.get({location = "Amsterdam"}) +│ │ │ │ +│ │ │ └─ Function name (derived from tool name, minus app name) +│ │ └─ App name (from ToolProvider::appName()) +│ └─ "integrations." prefix (added when isIntegration() returns true) +└─ Root namespace +``` + +The `LuaBridge` then: +1. Looks up the function path in its `functionMap` to find the tool slug +2. Maps positional arguments to named parameters via `parameterMap` +3. Delegates to `LuaToolInvoker::invoke()` which instantiates and executes the tool +4. Logs the call (path, duration, status, error) for observability +5. Suggests similar functions on typos ("Did you mean: ...") + +--- + +## Core Contracts Reference + +### `Tool` + +The fundamental unit of work. Every tool implements this interface. + +```php +interface Tool +{ + public function name(): string; // Slug for routing (e.g. 'get_weather') + public function description(): string; // Shown in docs and catalogs + public function parameters(): array; // Parameter definitions + public function execute(array $args): ToolResult; +} +``` + +### `ToolProvider` + +Groups tools under an app, handles instantiation. + +```php +interface ToolProvider +{ + public function appName(): string; // Unique identifier + public function appMeta(): array; // UI metadata + public function tools(): array; // Tool definitions + public function isIntegration(): bool; // Toggleable per agent? + public function createTool(string $class, array $context = []): Tool; + public function luaDocsPath(): ?string; // Supplementary docs + public function credentialFields(): array; // Required credentials +} +``` + +### `CredentialResolver` + +Abstracts credential storage. The host application binds its own implementation. + +```php +interface CredentialResolver +{ + public function get(string $integration, string $key, mixed $default = null, ?string $account = null): mixed; + public function isConfigured(string $integration, ?string $account = null): bool; +} +``` + +The `$account` parameter supports multi-account setups (e.g. "work" and "personal" Google accounts). + +### `ConfigurableIntegration` + +Optional. Adds a settings UI for the integration in OpenCompany. + +```php +interface ConfigurableIntegration +{ + public function integrationMeta(): array; // Name, description, icon, category + public function configSchema(): array; // Form field definitions + public function testConnection(array $config): array; // Verify credentials + public function validationRules(): array; // Laravel validation rules +} +``` + +### `AgentFileStorage` + +Allows tools to save files into the agent's workspace without coupling to the host's file system. + +```php +interface AgentFileStorage +{ + public function saveFile( + object $agent, + string $filename, + string $content, + string $mimeType, + ?string $subfolder = null, + ): array; // Returns ['id' => ..., 'path' => ..., 'url' => ...] +} +``` + +### `LuaToolInvoker` + +Host-side adapter for executing tools from the Lua bridge. + +```php +interface LuaToolInvoker +{ + public function invoke(string $toolSlug, array $args): mixed; + public function getToolMeta(string $toolSlug): array; +} +``` + +### `ToolResult` + +Value object returned by all tool executions. + +```php +$result = ToolResult::success($data); // Success with data +$result = ToolResult::success($data, $meta); // Success with metadata +$result = ToolResult::error('Something failed'); // Error + +$result->succeeded(); // bool +$result->data; // mixed — string, array, or any serializable value +$result->error; // ?string +$result->meta; // array — files, timing, etc. +$result->toString(); // String representation for legacy consumers +``` + +--- + +## Credential Management + +### For Standalone Laravel Apps + +The default `ConfigCredentialResolver` reads from `config/ai-tools.php`: + +```php +// config/ai-tools.php +return [ + 'weather' => [ + 'api_key' => env('WEATHER_API_KEY'), + ], + 'plausible' => [ + 'api_key' => env('PLAUSIBLE_API_KEY'), + 'url' => env('PLAUSIBLE_URL', 'https://plausible.io'), + ], + // Multi-account example + 'gmail' => [ + 'work' => ['api_key' => env('GMAIL_WORK_KEY')], + 'personal' => ['api_key' => env('GMAIL_PERSONAL_KEY')], + ], +]; +``` + +### Custom Credential Storage + +Bind your own `CredentialResolver` implementation: + +```php +// In your AppServiceProvider +$this->app->singleton( + \OpenCompany\IntegrationCore\Contracts\CredentialResolver::class, + \App\Services\DatabaseCredentialResolver::class, +); +``` + +--- + +## Static Analysis + +Packages that include a `phpstan.neon` are configured for [Larastan](https://github.com/larastan/larastan) level 5: + +```neon +includes: + - vendor/larastan/larastan/extension.neon + +parameters: + paths: + - src/ + level: 5 +``` + +Run from any package directory: + +```console +cd mermaid && ../vendor/bin/phpstan analyse +``` + +--- + +## Contributing + +### Adding a New Integration + +1. Create a new directory following the structure above +2. Implement `ToolProvider` (and optionally `ConfigurableIntegration`) +3. Create your service class and tool classes +4. Add lua-docs if the integration has non-obvious workflows +5. Add a `phpstan.neon` and ensure level 5 passes +6. Update this README's structure listing and integrations table + +### Conventions + +- **Naming**: Package directories and `appName()` are lowercase kebab/snake. Namespaces are PascalCase. +- **Icons**: Use [Phosphor Icons](https://icon-sets.iconify.design/ph/) (`ph:` prefix). +- **Tool types**: Use `'read'` for tools that fetch data, `'write'` for tools that create, modify, or delete. +- **Parameter names**: Always `snake_case`. +- **Error handling**: Tools should catch exceptions and return `ToolResult::error()` — never let exceptions bubble out of `execute()`. +- **Service isolation**: Tools call service methods. Services make HTTP requests. Tools never make HTTP requests directly. +- **No hardcoded config**: Always use `CredentialResolver` for API keys and endpoints. Never read `config()` or `env()` directly in tool or service classes. + +### Checklist for New Integrations + +- [ ] `composer.json` with correct package name, namespace, and Laravel provider auto-discovery +- [ ] Service class encapsulating all API communication +- [ ] Service provider with singleton service registration and `ToolProviderRegistry` boot +- [ ] Tool provider implementing `ToolProvider` (and `ConfigurableIntegration` if credentials are needed) +- [ ] Tool classes with clear `description()`, typed `parameters()`, and `ToolResult` returns +- [ ] `credentialFields()` defined for any required API keys or tokens +- [ ] `testConnection()` if implementing `ConfigurableIntegration` +- [ ] `lua-docs/{name}.md` for integrations with complex workflows +- [ ] Entry added to README structure listing and integrations table + +## License + +MIT diff --git a/docs/ecosystem/integrations/celestial/README.md b/docs/ecosystem/integrations/celestial/README.md new file mode 100644 index 0000000..ed74fbe --- /dev/null +++ b/docs/ecosystem/integrations/celestial/README.md @@ -0,0 +1,119 @@ +# Integration: Celestial + +> Astronomy integration for the [Laravel AI SDK](https://github.com/laravel/ai) — moon phases, sunrise/sunset, planet positions, eclipses, night sky reports. Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Give your AI agents the ability to perform real-time astronomical calculations. Built on [astronomy-bundle-php](https://github.com/OpenCompanyApp/astronomy-bundle-php) (Jean Meeus' *Astronomical Algorithms*) and the [Integration Core](https://github.com/OpenCompanyApp/integration-core) framework. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This celestial tool is one example of how AI agents can be extended with specialized capabilities beyond standard LLM knowledge — giving agents accurate, real-time astronomical data instead of relying on training data that may be outdated or imprecise. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Installation + +```console +composer require opencompanyapp/integration-celestial +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Available Actions + +| Action | Description | Required Params | +|--------|-------------|-----------------| +| `moon_phase` | Phase, illumination, age, zodiac sign, next new/full moon | — | +| `sun_info` | Sunrise/sunset, altitude/azimuth, twilight, day length, zodiac | `latitude`, `longitude` | +| `moon_info` | Moon position, illumination, visibility from a location | `latitude`, `longitude` | +| `planet_position` | Planet altitude/azimuth, zodiac, rise/set. Use `planet="all"` for overview | `latitude`, `longitude` | +| `solar_eclipse` | Eclipse type, obscuration, contacts, magnitude for a date + location | `date`, `latitude`, `longitude` | +| `lunar_eclipse` | Eclipse type, magnitude, gamma, contact times (P1-P4, U1-U4) | `date` | +| `night_sky` | What's visible now: sun/moon/planet positions, darkness, stargazing quality | `latitude`, `longitude` | +| `zodiac_report` | All celestial bodies mapped to zodiac signs with alignments | — | +| `time_info` | Julian Day, sidereal time (GMST/GAST), equation of time | — | + +All actions accept optional `date` (ISO format, defaults to now) and `timezone` (defaults to UTC or configured default). + +## Quick Start: Use with Laravel AI SDK + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\Celestial\Tools\QueryCelestial; +use OpenCompany\Integrations\Celestial\CelestialService; + +// Create the tool +$tool = new QueryCelestial( + service: app(CelestialService::class), + defaultTimezone: 'Europe/Amsterdam', +); + +// Use with an AI agent +$response = Ai::agent() + ->tools([$tool]) + ->prompt('What phase is the moon in right now?'); +``` + +### Via ToolProvider (recommended) + +If you have `integration-core` installed, the tool auto-registers with the `ToolProviderRegistry`: + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +$registry = app(ToolProviderRegistry::class); +$provider = $registry->get('celestial'); + +// Create tool with context +$tool = $provider->createTool( + \OpenCompany\Integrations\Celestial\Tools\QueryCelestial::class, + ['timezone' => 'America/New_York'] +); +``` + +## Standalone Service Usage + +You can use `CelestialService` directly without the integration wrapper: + +```php +use OpenCompany\Integrations\Celestial\CelestialService; + +$service = app(CelestialService::class); + +// Moon phase +echo $service->moonPhase(null, 'Europe/Amsterdam'); + +// Sunrise/sunset in Amsterdam +echo $service->sunInfo('2025-06-21', 52.3676, 4.9041, 'Europe/Amsterdam'); + +// All planets visible from Berlin +echo $service->planetPosition('all', null, 52.524, 13.411, 'Europe/Berlin'); + +// Night sky report +echo $service->nightSky(52.3676, 4.9041, 'Europe/Amsterdam'); + +// Solar eclipse check +echo $service->solarEclipse('2026-08-12', 40.7128, -74.0060); + +// Lunar eclipse check +echo $service->lunarEclipse('2025-09-07'); +``` + +## Dependencies + +| Package | Purpose | +|---------|---------| +| [opencompanyapp/integration-core](https://github.com/OpenCompanyApp/integration-core) | ToolProvider contract and registry | +| [opencompanyapp/astronomy-bundle](https://github.com/OpenCompanyApp/astronomy-bundle-php) | Astronomical calculation engine (Meeus algorithms, VSOP87) | +| [laravel/ai](https://github.com/laravel/ai) | Laravel AI SDK Tool contract | + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 +- [Laravel AI SDK](https://github.com/laravel/ai) ^0.1 + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/clickup/README.md b/docs/ecosystem/integrations/clickup/README.md new file mode 100644 index 0000000..ad3b77a --- /dev/null +++ b/docs/ecosystem/integrations/clickup/README.md @@ -0,0 +1,61 @@ +# ClickUp Integration + +ClickUp project management integration for the Laravel AI SDK. Part of the **OpenCompany** integration ecosystem — an open platform where AI agents collaborate with humans to run organizations. + +## Available Tools (17) + +| Tool | Type | Description | +|------|------|-------------| +| `clickup_get_hierarchy` | read | Get workspace hierarchy: spaces, folders, lists | +| `clickup_search` | read | Search tasks across the workspace | +| `clickup_members` | read | List, find, or resolve workspace members | +| `clickup_get_tasks` | read | Get all tasks in a list | +| `clickup_get_task` | read | Get a task by ID with full details | +| `clickup_create_task` | write | Create a new task in a list | +| `clickup_update_task` | write | Update an existing task | +| `clickup_delete_task` | write | Delete a task | +| `clickup_manage_tags` | write | Add or remove tags on tasks | +| `clickup_attach_file` | write | Attach a file to a task | +| `clickup_manage_comments` | write | Read or add comments on tasks | +| `clickup_time_tracking` | write | Start, stop, log, or list time entries | +| `clickup_manage_list` | write | Create, get, or update lists | +| `clickup_manage_folder` | write | Create, get, or update folders | +| `clickup_chat` | write | List channels or send messages | +| `clickup_manage_document` | write | Create a ClickUp document | +| `clickup_manage_document_pages` | write | List, get, create, or update document pages | + +## Installation + +```bash +composer require opencompanyapp/integration-clickup +``` + +The service provider is auto-discovered by Laravel. + +## Configuration + +| Key | Type | Required | Description | +|-----|------|----------|-------------| +| `api_token` | secret | Yes | Personal API Token (starts with `pk_`). Generate at ClickUp → Settings → Apps. | +| `workspace_id` | text | No | Workspace ID from URL: `app.clickup.com/{id}/...`. Required for search, time tracking, members. | + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; + +$response = Ai::tools(['clickup_get_hierarchy', 'clickup_create_task']) + ->prompt('List all spaces, then create a task called "Review Q1 report" in the first list you find.'); +``` + +## Dependencies + +| Package | Version | +|---------|---------| +| PHP | ^8.2 | +| opencompanyapp/integration-core | ^2.0 | +| laravel/ai | ^0.1 | + +## License + +MIT diff --git a/docs/ecosystem/integrations/clickup/clickup.md b/docs/ecosystem/integrations/clickup/clickup.md new file mode 100644 index 0000000..75ee243 --- /dev/null +++ b/docs/ecosystem/integrations/clickup/clickup.md @@ -0,0 +1,207 @@ +# ClickUp Lua API Reference + +## Priority Mapping + +| Value | Level | +|-------|---------| +| 1 | Urgent | +| 2 | High | +| 3 | Normal | +| 4 | Low | + +```lua +-- Use numeric values for priority +clickup_create_task({ list_id = "901234", name = "Fix bug", priority = 1 }) -- urgent +clickup_update_task({ task_id = "abc123", priority = 3 }) -- normal +``` + +## Date Handling + +All dates use ISO 8601 format. The API converts them to Unix milliseconds internally. + +```lua +-- Date only +clickup_create_task({ list_id = "901234", name = "Sprint review", due_date = "2026-04-01" }) + +-- Date with time +clickup_create_task({ + list_id = "901234", + name = "Standup", + start_date = "2026-04-01T09:00:00", + due_date = "2026-04-01T09:30:00" +}) + +-- Clear a date by passing empty string +clickup_update_task({ task_id = "abc123", due_date = "" }) + +-- Filter tasks by due date range +clickup_get_tasks({ + list_id = "901234", + due_date_gt = "2026-03-01", + due_date_lt = "2026-03-31" +}) +``` + +## Member Resolution + +Always resolve names to numeric user IDs before assigning tasks. + +```lua +-- Find a single member by name or email +local result = clickup_find_member({ query = "Sarah" }) +-- Returns: { matches = { { id = "12345", username = "sarah", email = "sarah@co.com" } } } + +-- Resolve multiple names at once +local result = clickup_resolve_members({ query = "Sarah, John, alex@co.com" }) +-- Returns: { results = { { query = "Sarah", id = "12345", resolved = true }, ... } } + +-- Use the IDs for assignment (comma-separated string) +clickup_create_task({ list_id = "901234", name = "Review PR", assignees = "12345,67890" }) +``` + +## Common Workflows + +### Create a task with assignees and tags + +```lua +-- Step 1: Find the list ID +local hierarchy = clickup_get_hierarchy({}) +-- Traverse: hierarchy.spaces[].folders[].lists[] or hierarchy.spaces[].lists[] + +-- Step 2: Resolve member IDs +local members = clickup_resolve_members({ query = "Sarah, John" }) +local ids = {} +for _, m in ipairs(members.results) do + if m.resolved then table.insert(ids, m.id) end +end + +-- Step 3: Create the task +clickup_create_task({ + list_id = "901234", + name = "Implement auth flow", + description = "Add OAuth2 login support", + status = "open", + priority = 2, + assignees = table.concat(ids, ","), + tags = "backend,auth", + due_date = "2026-04-15" +}) +``` + +### Search for tasks, then update them + +```lua +-- Search across workspace +local results = clickup_search({ + query = "auth", + statuses = "open,in progress", + include_subtasks = true +}) + +-- Update each matching task +for _, task in ipairs(results.tasks) do + clickup_update_task({ + task_id = task.id, + priority = 1, + status = "in progress" + }) +end + +-- Custom task IDs (e.g., "DEV-42") work too +clickup_update_task({ task_id = "DEV-42", status = "closed" }) +``` + +### Time tracking (start, stop, log) + +```lua +-- Start a timer on a task +clickup_start_timer({ + task_id = "abc123", + description = "Working on auth flow", + billable = true +}) + +-- Check what's currently running +local current = clickup_current_time_entry({}) +-- Returns task name, start time, description + +-- Stop the running timer +clickup_stop_timer({}) + +-- Or log time manually (duration in milliseconds) +clickup_log_time({ + task_id = "abc123", + start = "2026-03-29T09:00:00", + duration = "3600000", -- 1 hour = 3,600,000 ms + description = "Code review", + billable = true +}) + +-- View all time entries for a task +local entries = clickup_list_time_entries({ task_id = "abc123" }) +-- entries.entries[].duration is already formatted ("60.0 min") +``` + +### Navigate hierarchy (find list and folder IDs) + +```lua +-- Get full workspace tree +local tree = clickup_get_hierarchy({}) + +-- Filter to specific space(s) +local tree = clickup_get_hierarchy({ space_ids = "12345,67890" }) + +-- Get folder details including its lists +local folder = clickup_get_folder({ folder_id = "456" }) +-- folder.lists = { { id = "789", name = "Backlog" }, ... } + +-- Get list details +local list = clickup_get_list({ list_id = "789" }) +-- list.task_count, list.space, list.folder + +-- Get all tasks in a specific list +local tasks = clickup_get_tasks({ + list_id = "789", + statuses = "open,in progress", + include_closed = false +}) +``` + +### Create a subtask + +```lua +clickup_create_task({ + list_id = "901234", + name = "Write unit tests", + parent_task_id = "abc123", + priority = 3 +}) +``` + +### Tags + +```lua +-- Add a tag (must already exist in the space) +clickup_add_tag({ task_id = "abc123", tag_name = "urgent" }) + +-- Remove a tag +clickup_remove_tag({ task_id = "abc123", tag_name = "urgent" }) + +-- Set tags during task creation +clickup_create_task({ + list_id = "901234", + name = "Deploy", + tags = "devops,release" +}) +``` + +## Tips + +- **Time durations are in milliseconds**: 1 min = 60000, 1 hour = 3600000, 1 day = 86400000 +- **Tags must pre-exist** in the ClickUp space before you can add them to tasks +- **workspace_id** is pulled from config automatically for search, time tracking, and member operations -- you rarely need to pass it explicitly +- **Custom task IDs** like `"DEV-42"` work anywhere a `task_id` is accepted +- **Pagination**: `clickup_search` and `clickup_get_tasks` support a `page` parameter (starts at 0) +- **Assignees** use comma-separated numeric user IDs, not names -- always resolve first +- **Statuses** are list-specific strings (e.g., `"open"`, `"in progress"`, `"closed"`) -- check the list for valid values +- **time_estimate** on `clickup_update_task` is in **minutes** (converted to ms internally) diff --git a/docs/ecosystem/integrations/coingecko/README.md b/docs/ecosystem/integrations/coingecko/README.md new file mode 100644 index 0000000..4906ba3 --- /dev/null +++ b/docs/ecosystem/integrations/coingecko/README.md @@ -0,0 +1,34 @@ +# Integration: CoinGecko + +Cryptocurrency market data for AI agents — search coins, get prices, market rankings, trending coins, and historical chart data. + +> Part of the **OpenCompany** integration ecosystem. These packages extend AI agents with real-world capabilities through the Laravel AI SDK. + +## Available Tools + +| Tool | Type | Description | +|---|---|---| +| `coingecko_search` | read | Search coins by name/symbol, trending coins, global market overview | +| `coingecko_market` | read | Current prices, market rankings, and market cap data | +| `coingecko_details` | read | Coin profiles, historical price charts, OHLC candlestick data | + +## Installation + +```bash +composer require opencompanyapp/integration-coingecko +``` + +## Configuration + +Requires a free CoinGecko Demo API key. Get one at [CoinGecko Developer Dashboard](https://www.coingecko.com/en/api/pricing). + +## Dependencies + +| Package | Purpose | +|---|---| +| `opencompanyapp/integration-core` | Shared tool provider contracts and registry | +| `laravel/ai` | Laravel AI SDK tool interface | + +## License + +MIT diff --git a/docs/ecosystem/integrations/coingecko/coingecko.md b/docs/ecosystem/integrations/coingecko/coingecko.md new file mode 100644 index 0000000..2f43715 --- /dev/null +++ b/docs/ecosystem/integrations/coingecko/coingecko.md @@ -0,0 +1,103 @@ +# CoinGecko — Lua API Reference + +## Important: Coin IDs vs Ticker Symbols + +CoinGecko tools use CoinGecko IDs (e.g. `"bitcoin"`, `"ethereum"`, `"solana"`), **not** ticker symbols (`"BTC"`, `"ETH"`). If you only know the ticker, use `coingecko_search_coins` first to find the correct ID. + +**Rate limits:** free tier allows ~30 calls/min. + +## coingecko_search_coins + +Find coin IDs by name or ticker symbol. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `query` | string | yes | Coin name or ticker (e.g. `"bitcoin"`, `"ETH"`) | + +```lua +local result = coingecko_search_coins({ query = "SOL" }) + +-- result.coins is an array of { id, name, symbol, market_cap_rank } +for _, coin in ipairs(result.coins) do + log(coin.id .. " (" .. coin.symbol .. ") — rank #" .. (coin.market_cap_rank or "?")) +end +``` + +## coingecko_price + +Get current price for one or more coins. Includes 24h change, volume, and market cap. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `ids` | string | yes | Comma-separated CoinGecko IDs (e.g. `"bitcoin,ethereum"`) | +| `currencies` | string | no | Comma-separated target currencies (default: `"usd"`) | + +```lua +local result = coingecko_price({ + ids = "bitcoin,ethereum", + currencies = "usd,eur" +}) +``` + +## coingecko_markets + +Top coins ranked by market cap with full market data. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `ids` | string | no | Filter to specific coin IDs | +| `currency` | string | no | Target currency (default: `"usd"`) | +| `category` | string | no | Filter by category (e.g. `"decentralized-finance-defi"`) | +| `per_page` | string | no | Results per page (default: `"20"`, max: 100) | +| `page` | string | no | Page number (default: `"1"`) | +| `price_change_percentage` | string | no | Timeframes (default: `"24h,7d"`). Options: `1h,24h,7d,14d,30d,200d,1y` | + +```lua +local result = coingecko_markets({ + per_page = "10", + price_change_percentage = "24h,7d,30d" +}) + +for _, coin in ipairs(result.coins) do + log("#" .. coin.market_cap_rank .. " " .. coin.name .. ": $" .. coin.current_price) +end +``` + +## Examples + +### Search for a coin, then get its price + +```lua +-- Step 1: find the coin ID +local search = coingecko_search_coins({ query = "Cardano" }) +local coin_id = search.coins[1].id -- "cardano" + +-- Step 2: get the price +local price = coingecko_price({ + ids = coin_id, + currencies = "usd,btc" +}) +``` + +### Top 10 coins by market cap + +```lua +local result = coingecko_markets({ + per_page = "10", + currency = "usd" +}) + +for _, coin in ipairs(result.coins) do + log(coin.name .. ": $" .. coin.current_price .. " (24h: " .. (coin.price_change_percentage_24h or "?") .. "%)") +end +``` + +### Compare specific coins + +```lua +local result = coingecko_markets({ + ids = "bitcoin,ethereum,solana", + currency = "usd", + price_change_percentage = "1h,24h,7d" +}) +``` diff --git a/docs/ecosystem/integrations/core/README.md b/docs/ecosystem/integrations/core/README.md new file mode 100644 index 0000000..cfdc194 --- /dev/null +++ b/docs/ecosystem/integrations/core/README.md @@ -0,0 +1,209 @@ +# Integration Core + +> Framework-agnostic core for building integration packages. Part of the [OpenCompany](https://github.com/OpenCompanyApp) ecosystem. + +Provides the contracts, credential abstraction, and auto-discovery registry that all OpenCompany integration packages build on. Packages built on integration-core work in any PHP application — OpenCompany (web), KosmoKrator (CLI), or custom consumers. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This core package enables OpenCompany's plugin architecture for integrations — each external integration (astronomy, analytics, messaging, etc.) is a separate Composer package that any PHP app can install independently. + +## Installation + +```console +composer require opencompanyapp/integration-core +``` + +Laravel auto-discovers the service provider. Non-Laravel apps can use the contracts and registry directly. + +## What's Included + +| Component | Purpose | +|-----------|---------| +| `Tool` interface | Framework-agnostic tool contract — `name()`, `description()`, `parameters()`, `execute()` | +| `ToolResult` value object | Structured result from tool execution — `success()`, `error()`, metadata | +| `ToolProvider` interface | Contract every integration package implements — declares tools, metadata, factory, and Lua docs | +| `CredentialResolver` interface | Abstraction for API keys/config — swap between config files, databases, or vaults | +| `ConfigCredentialResolver` | Default resolver that reads from `config/ai-tools.php` | +| `ToolProviderRegistry` | Singleton registry that collects all tool providers for discovery | +| `IntegrationCoreServiceProvider` | Binds everything with sensible defaults (all overridable) | + +## Quick Start: Building an Integration Package + +### 1. Implement `ToolProvider` + +```php +use OpenCompany\IntegrationCore\Contracts\Tool; +use OpenCompany\IntegrationCore\Contracts\ToolProvider; + +class WeatherToolProvider implements ToolProvider +{ + public function appName(): string + { + return 'weather'; + } + + public function appMeta(): array + { + return [ + 'label' => 'weather, forecasts, temperature', + 'description' => 'Weather data and forecasts', + 'icon' => 'ph:cloud-sun', + 'logo' => 'ph:cloud-sun', + ]; + } + + public function tools(): array + { + return [ + 'get_weather' => [ + 'class' => GetWeather::class, + 'type' => 'read', + 'name' => 'Get Weather', + 'description' => 'Current weather and forecasts for any location.', + 'icon' => 'ph:cloud-sun', + ], + ]; + } + + public function isIntegration(): bool + { + return true; // Can be toggled per agent + } + + public function createTool(string $class, array $context = []): Tool + { + $credentials = app(\OpenCompany\IntegrationCore\Contracts\CredentialResolver::class); + + return new GetWeather( + apiKey: $credentials->get('weather', 'api_key'), + units: $context['units'] ?? 'metric', + ); + } + + public function luaDocsPath(): ?string + { + return null; // Or: __DIR__ . '/../lua-docs/weather.md' + } +} +``` + +### 2. Register in Your Service Provider + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +class WeatherServiceProvider extends ServiceProvider +{ + public function boot(): void + { + if ($this->app->bound(ToolProviderRegistry::class)) { + $this->app->make(ToolProviderRegistry::class) + ->register(new WeatherToolProvider()); + } + } +} +``` + +### 3. Create Your Tool Class + +```php +use OpenCompany\IntegrationCore\Contracts\Tool; +use OpenCompany\IntegrationCore\Support\ToolResult; + +class GetWeather implements Tool +{ + public function __construct( + private string $apiKey, + private string $units = 'metric', + ) {} + + public function name(): string + { + return 'get_weather'; + } + + public function description(): string + { + return 'Get current weather and forecasts for any location.'; + } + + public function parameters(): array + { + return [ + 'location' => [ + 'type' => 'string', + 'required' => true, + 'description' => 'City name or coordinates', + ], + 'days' => [ + 'type' => 'integer', + 'description' => 'Forecast days (default: 1)', + ], + ]; + } + + public function execute(array $args): ToolResult + { + $location = $args['location'] ?? ''; + if (empty($location)) { + return ToolResult::error('Location is required.'); + } + + // Your implementation... + $data = $this->fetchWeather($location, $args['days'] ?? 1); + + return ToolResult::success($data); + } +} +``` + +## Credential Management + +The `CredentialResolver` interface abstracts where API keys come from. Integration packages call `CredentialResolver` to get credentials without knowing or caring about the storage backend. + +**In OpenCompany**, credentials are managed through the Integrations UI and stored encrypted in the database. Users never need to touch config files — everything is configured through the admin interface. + +**For standalone usage** in other Laravel apps, the default `ConfigCredentialResolver` reads from a config file: + +```php +// config/ai-tools.php +return [ + 'plausible' => [ + 'api_key' => env('PLAUSIBLE_API_KEY'), + 'url' => env('PLAUSIBLE_URL', 'https://plausible.io'), + ], +]; +``` + +You can swap the resolver to use any storage backend (database, vault, secrets manager) by binding your own implementation: + +```php +$this->app->singleton( + \OpenCompany\IntegrationCore\Contracts\CredentialResolver::class, + YourCustomResolver::class +); +``` + +## Integration Packages + +All installed integration packages auto-register via Laravel service provider discovery. The `ToolProviderRegistry` collects them: + +```php +$registry = app(ToolProviderRegistry::class); + +$registry->all(); // All registered providers +$registry->has('celestial'); // Check if a provider exists +$registry->get('celestial'); // Get a specific provider +``` + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 (for service provider auto-discovery; contracts work without Laravel) + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/exchangerate/README.md b/docs/ecosystem/integrations/exchangerate/README.md new file mode 100644 index 0000000..ef16d23 --- /dev/null +++ b/docs/ecosystem/integrations/exchangerate/README.md @@ -0,0 +1,28 @@ +# Exchange Rate Integration + +Currency exchange rate integration for the OpenCompany integration ecosystem. Provides access to 340 fiat currencies, cryptocurrencies, and precious metals via the fawazahmed0/exchange-api. + +No API key required. + +## Tools + +- **exchangerate_search** — List and search available currencies (fiat, crypto, metals) +- **exchangerate_convert** — Convert currencies, get rates, compare historical rates + +## Popular Currency Codes + +| Code | Currency | +|------|----------| +| usd | US Dollar | +| eur | Euro | +| gbp | British Pound | +| jpy | Japanese Yen | +| cny | Chinese Yuan | +| btc | Bitcoin | +| eth | Ethereum | +| xau | Gold (troy oz) | +| xag | Silver (troy oz) | + +## License + +MIT diff --git a/docs/ecosystem/integrations/exchangerate/exchangerate.md b/docs/ecosystem/integrations/exchangerate/exchangerate.md new file mode 100644 index 0000000..692bddf --- /dev/null +++ b/docs/ecosystem/integrations/exchangerate/exchangerate.md @@ -0,0 +1,118 @@ +# Exchange Rate — Lua API Reference + +No API key needed. Supports 340+ currencies: fiat, crypto, and precious metals. + +## exchangerate_convert_currency + +Convert an amount from one currency to another. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `from` | string | yes | Source currency code (e.g. `"usd"`, `"btc"`, `"xau"`) | +| `to` | string | yes | Target currency code (e.g. `"eur"`, `"jpy"`) | +| `amount` | string | no | Amount to convert (default: `"1"`) | +| `date` | string | no | Date for the rate: `"YYYY-MM-DD"` or `"latest"` (default) | + +```lua +local result = exchangerate_convert_currency({ + from = "usd", + to = "eur", + amount = "100" +}) + +log("100 USD = " .. result.result .. " EUR") +``` + +## exchangerate_history + +Compare a currency pair across multiple dates. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `from` | string | yes | Source currency code | +| `to` | string | yes | Target currency code | +| `dates` | string | yes | Comma-separated dates (e.g. `"2026-01-01,2026-02-01,2026-03-01"`) | + +```lua +local result = exchangerate_history({ + from = "usd", + to = "eur", + dates = "2026-01-01,2026-02-01,2026-03-01" +}) + +for _, h in ipairs(result.history) do + log(h.date .. ": " .. h.rate) +end +-- result.change.percentage shows overall change +``` + +## exchangerate_list_currencies + +List and search available currencies. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `query` | string | no | Filter by code or name (e.g. `"dollar"`, `"btc"`, `"gold"`) | + +```lua +-- Find gold-related currencies +local result = exchangerate_list_currencies({ query = "gold" }) + +for _, c in ipairs(result.currencies) do + log(c.code .. ": " .. c.name) +end +``` + +## exchangerate_rates + +Get all exchange rates for a base currency (not shown in detail -- pass `base` currency code). + +## exchangerate_popular_currencies + +Show commonly used currency codes. No parameters. + +```lua +local result = exchangerate_popular_currencies({}) +``` + +## Examples + +### Convert 500 EUR to USD + +```lua +local result = exchangerate_convert_currency({ + from = "eur", + to = "usd", + amount = "500" +}) +``` + +### Historical rate on a specific date + +```lua +local result = exchangerate_convert_currency({ + from = "gbp", + to = "jpy", + amount = "1", + date = "2025-06-15" +}) +``` + +### Track EUR/USD over several months + +```lua +local result = exchangerate_history({ + from = "eur", + to = "usd", + dates = "2025-10-01,2025-11-01,2025-12-01,2026-01-01,2026-02-01,2026-03-01" +}) + +log("Change: " .. result.change.percentage .. "%") +``` + +### Find a currency code + +```lua +local result = exchangerate_list_currencies({ query = "peso" }) +-- Returns matching currencies like MXN (Mexican Peso), ARS (Argentine Peso), etc. +``` diff --git a/docs/ecosystem/integrations/google/README.md b/docs/ecosystem/integrations/google/README.md new file mode 100644 index 0000000..89177af --- /dev/null +++ b/docs/ecosystem/integrations/google/README.md @@ -0,0 +1,129 @@ +# Google Integration + +Google Calendar, Gmail, Google Drive, Google Contacts, Google Sheets, Google Search Console, Google Tasks, Google Analytics, Google Docs, and Google Forms integration for the Laravel AI SDK. Part of the **OpenCompany** integration ecosystem — an open platform where AI agents collaborate with humans to run organizations. + +## Integrations + +This package registers **ten separate integrations**, each appearing independently on the integrations page: + +### Google Calendar (3 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_calendar_list` | read | List calendars and search/list events | +| `google_calendar_event` | write | Create, update, delete, or quick-add calendar events | +| `google_calendar_freebusy` | read | Check free/busy status across calendars | + +### Gmail (4 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `gmail_search` | read | Search and list email messages | +| `gmail_read` | read | Get full email content | +| `gmail_send` | write | Send emails or create/send drafts | +| `gmail_manage` | write | Labels, read/unread, trash, and archive | + +### Google Drive (3 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_drive_search` | read | Search and retrieve files | +| `google_drive_manage` | write | Create, rename, move, copy, and delete files | +| `google_drive_share` | write | Share files and manage permissions | + +### Google Contacts (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_contacts_search` | read | Search, list, and look up contacts | +| `google_contacts_manage` | write | Create, update, and delete contacts | + +### Google Sheets (3 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_sheets_read` | read | Read spreadsheet data, metadata, and search | +| `google_sheets_write` | write | Create spreadsheets and write data | +| `google_sheets_manage` | write | Manage sheets, rows, columns, sorting, and filters | + +### Google Search Console (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_search_console_query` | read | Search performance, URL inspection, and sitemaps | +| `google_search_console_manage` | write | Submit sitemaps and manage site properties | + +### Google Tasks (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_tasks_read` | read | List task lists and tasks, get task details | +| `google_tasks_manage` | write | Create, update, complete, delete, and organize tasks | + +### Google Analytics (1 tool) + +| Tool | Type | Description | +|------|------|-------------| +| `google_analytics_query` | read | Website traffic reports, realtime data, and metadata discovery | + +### Google Docs (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_docs_read` | read | Read document content, structure, and search text | +| `google_docs_write` | write | Create, edit, format, and manage documents | + +### Google Forms (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_forms_read` | read | Read form structure and responses | +| `google_forms_write` | write | Create, edit, and manage forms | + +## Installation + +```bash +composer require opencompanyapp/integration-google +``` + +The service provider is auto-discovered by Laravel. + +## Configuration + +All integrations share the same Google Cloud OAuth credentials (Client ID and Secret only need to be entered once): + +| Key | Type | Required | Description | +|-----|------|----------|-------------| +| `client_id` | text | Yes | OAuth 2.0 Client ID from Google Cloud Console | +| `client_secret` | secret | Yes | OAuth 2.0 Client Secret | +| `access_token` | oauth | Yes | Connected via OAuth flow | + +### Setup + +1. Create a project in [Google Cloud Console](https://console.cloud.google.com/) +2. Enable the **Google Calendar API**, **Gmail API**, **Google Drive API**, **People API**, **Google Sheets API**, **Google Search Console API**, **Google Tasks API**, **Google Analytics Data API**, **Google Docs API**, and/or **Google Forms API** +3. Create OAuth 2.0 credentials (Web application type) +4. Add the redirect URI: `{your-domain}/api/integrations/google/oauth/callback` +5. Enter Client ID and Secret in Settings → Integrations +6. Click "Connect" to authorize via OAuth + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; + +$response = Ai::tools(['google_calendar_list', 'google_calendar_event']) + ->prompt('List my calendars, then create a meeting called "Team Standup" tomorrow at 10am.'); +``` + +## Dependencies + +| Package | Version | +|---------|---------| +| PHP | ^8.2 | +| opencompanyapp/integration-core | ^2.0 | +| laravel/ai | ^0.1 | + +## License + +MIT diff --git a/docs/ecosystem/integrations/google/google.md b/docs/ecosystem/integrations/google/google.md new file mode 100644 index 0000000..08d90ed --- /dev/null +++ b/docs/ecosystem/integrations/google/google.md @@ -0,0 +1,207 @@ +# Google Integration — Lua API Supplement + +## Gmail + +Send email with CC/BCC: + +```lua +gmail_send_email({ + to = "alice@example.com", + subject = "Q1 Report", + body = "Please find the report attached.", + cc = "bob@example.com, carol@example.com", + bcc = "manager@example.com", +}) +``` + +Search, read, then reply workflow: + +```lua +-- Step 1: Search for messages +local results = gmail_search_emails({ + query = "from:alice subject:meeting is:unread", + max_results = 5, +}) + +-- Step 2: Read the full message +local msg = gmail_read({ message_id = results.messages[1].id }) + +-- Step 3: Reply in the same thread +gmail_reply({ + message_id = msg.id, + thread_id = msg.threadId, + body = "Thanks, I'll be there.", + cc = "team@example.com", +}) +``` + +Draft vs direct send -- use `gmail_create_draft` to stage an email without sending, then `gmail_send_draft` to send it later: + +```lua +-- Create a draft (not sent) +local draft = gmail_create_draft({ + to = "client@example.com", + subject = "Proposal", + body = "Draft content here...", +}) + +-- Send it later using the draft ID +gmail_send_draft({ draft_id = draft.draftId }) +``` + +## Google Sheets + +Values use 2D Lua tables -- each inner table is one row: + +```lua +local values = { + {"Name", "Age", "City"}, + {"Alice", 30, "NYC"}, + {"Bob", 25, "LA"}, +} +``` + +A1 notation examples: + +- `"Sheet1!A1:D10"` -- specific range +- `"Sheet1!A:A"` -- entire column +- `"Sheet1"` -- entire sheet +- `"'My Sheet'!A1:B2"` -- sheet names with spaces need quotes + +Input modes: `"user_entered"` (default) parses formulas and dates, `"raw"` stores literal strings. + +Create a spreadsheet, add a sheet, write data: + +```lua +-- Create a new spreadsheet +local ss = google_sheets_create({ title = "Q1 Sales" }) +local id = ss.spreadsheetId + +-- Add a second sheet/tab +google_sheets_add_sheet({ + spreadsheet_id = id, + title = "By Region", +}) + +-- Write data with headers +google_sheets_write_range({ + spreadsheet_id = id, + range = "Sheet1!A1:C3", + values = { + {"Region", "Revenue", "Growth"}, + {"North", 50000, "=B2/50000-1"}, + {"South", 42000, "=B3/42000-1"}, + }, + input = "user_entered", -- parses the formulas +}) +``` + +Read data back: + +```lua +local data = google_sheets_read_range({ + spreadsheet_id = id, + range = "Sheet1!A1:C3", + render = "formatted", -- "formatted" (default), "unformatted", or "formula" +}) +-- data.values is a 2D table: {{"Region","Revenue","Growth"}, {"North","50000","0%"}, ...} +``` + +Append vs write -- `google_sheets_append` auto-detects the last row and adds below it: + +```lua +google_sheets_append({ + spreadsheet_id = id, + range = "Sheet1", + values = { + {"East", 38000, "=B4/38000-1"}, + }, + input = "user_entered", +}) +``` + +## Google Calendar + +Create a timed event with attendees: + +```lua +google_calendar_create_event({ + summary = "Sprint Planning", + description = "Bi-weekly sprint planning session", + location = "Conference Room B", + start_date_time = "2026-04-01T10:00:00-05:00", + end_date_time = "2026-04-01T11:00:00-05:00", + time_zone = "America/New_York", + attendees = "alice@example.com, bob@example.com", + recurrence = "RRULE:FREQ=WEEKLY;INTERVAL=2;COUNT=10", +}) +``` + +Create an all-day event: + +```lua +google_calendar_create_event({ + summary = "Company Holiday", + start_date = "2026-07-04", + end_date = "2026-07-05", +}) +``` + +Date/time format: ISO 8601 with timezone offset for timed events (`2026-04-01T10:00:00-05:00`), plain `YYYY-MM-DD` for all-day events. Use `time_zone` for IANA names like `"America/New_York"`. + +## Google Drive + +Search for files, then get details: + +```lua +-- Search by name and type +local results = google_drive_search_files({ + query = "name contains 'report' and mimeType = 'application/vnd.google-apps.spreadsheet'", + max_results = 10, + order_by = "modifiedTime desc", +}) + +-- Get full file info (and optionally export content) +local file = google_drive_get_file({ + file_id = results.files[1].id, + export_as = "csv", -- "text", "csv", or "markdown" (Google Workspace files only) +}) +``` + +Common Drive query patterns: + +- `"name contains 'budget'"` -- by name +- `"mimeType = 'application/vnd.google-apps.spreadsheet'"` -- Sheets +- `"mimeType = 'application/vnd.google-apps.document'"` -- Docs +- `"mimeType = 'application/vnd.google-apps.folder'"` -- folders +- `"modifiedTime > '2026-01-01'"` -- recently modified +- `"sharedWithMe = true"` -- shared files +- `"'FOLDER_ID' in parents"` -- files in a folder + +Share a file: + +```lua +-- Share with a specific user +google_drive_share_file({ + file_id = "abc123", + role = "writer", -- "reader", "writer", or "commenter" + email = "alice@example.com", + notify = "true", +}) + +-- Share with anyone via link +google_drive_share_file({ + file_id = "abc123", + role = "reader", + type = "anyone", +}) +``` + +## Tips + +- All Google APIs share the same OAuth token -- if Gmail is connected, the same credentials work for Sheets, Drive, Calendar, etc. +- Use `input = "user_entered"` when writing Sheets data that contains formulas (e.g., `"=SUM(A1:A10)"`) or dates. Use `"raw"` for literal strings. +- Sheet names with spaces must be quoted in A1 notation: `"'My Sheet'!A1:B2"`. +- `google_sheets_append` is better than `google_sheets_write_range` when adding rows to an existing table -- it auto-detects where the data ends. +- Calendar event times use ISO 8601 with timezone offset. Always include the offset or set `time_zone` explicitly. +- Drive search excludes trashed files by default. diff --git a/docs/ecosystem/integrations/mermaid/README.md b/docs/ecosystem/integrations/mermaid/README.md new file mode 100644 index 0000000..d14adff --- /dev/null +++ b/docs/ecosystem/integrations/mermaid/README.md @@ -0,0 +1,59 @@ +# Integration: Mermaid + +> Mermaid diagram rendering integration for the [Laravel AI SDK](https://github.com/laravel/ai). Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Generates PNG images from Mermaid diagram syntax. Supports flowcharts, sequence diagrams, class diagrams, state diagrams, ER diagrams, Gantt charts, pie charts, git graphs, and more. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Prerequisites + +Requires the [Mermaid CLI](https://github.com/mermaid-js/mermaid-cli) (`mmdc`) to be installed: + +```bash +npm install @mermaid-js/mermaid-cli +``` + +## Installation + +```console +composer require opencompanyapp/integration-mermaid +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Available Tools + +| Tool | Type | Description | +|------|------|-------------| +| `render_mermaid` | write | Render Mermaid diagram syntax to a PNG image | + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\Mermaid\Tools\RenderMermaid; +use OpenCompany\Integrations\Mermaid\MermaidService; + +$tool = new RenderMermaid(app(MermaidService::class)); + +$response = Ai::agent() + ->tools([$tool]) + ->prompt('Create a flowchart showing the user registration process'); +``` + +## Dependencies + +| Package | Purpose | +|---------|---------| +| `opencompanyapp/integration-core` | ToolProvider contract and registry | +| `laravel/ai` | Laravel AI SDK Tool interface | +| `@mermaid-js/mermaid-cli` | Mermaid to PNG rendering (npm) | + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/plausible/README.md b/docs/ecosystem/integrations/plausible/README.md new file mode 100644 index 0000000..ddb0ee1 --- /dev/null +++ b/docs/ecosystem/integrations/plausible/README.md @@ -0,0 +1,132 @@ +# Integration: Plausible + +> Plausible Analytics integration for the [Laravel AI SDK](https://github.com/laravel/ai) — query stats, realtime visitors, manage sites and goals. Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Give your AI agents access to privacy-friendly web analytics. Query traffic data, track realtime visitors, and manage sites and conversion goals — all through the [Plausible Analytics](https://plausible.io) API. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This Plausible tool lets AI agents query website analytics, monitor realtime traffic, and manage tracking configuration — giving agents data-driven awareness of web properties. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Installation + +```console +composer require opencompanyapp/integration-plausible +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Configuration + +This tool requires a Plausible Analytics API key. + +**In OpenCompany**, credentials are managed through the Integrations UI. + +**For standalone usage**, create `config/ai-tools.php`: + +```php +return [ + 'plausible' => [ + 'api_key' => env('PLAUSIBLE_API_KEY'), + 'url' => env('PLAUSIBLE_URL', 'https://plausible.io'), + 'sites' => ['example.com', 'blog.example.com'], + ], +]; +``` + +## Available Tools + +| Tool | Type | Description | +|------|------|-------------| +| `plausible_query_stats` | read | Query website analytics — aggregate, timeseries, breakdowns by dimension | +| `plausible_realtime_visitors` | read | Current realtime visitor count (last 5 minutes) | +| `plausible_list_sites` | read | List all tracked websites | +| `plausible_create_site` | write | Register a new website for tracking | +| `plausible_delete_site` | write | Remove a website from tracking | +| `plausible_list_goals` | read | List conversion goals for a site | +| `plausible_create_goal` | write | Create a conversion goal (page visit or custom event) | +| `plausible_delete_goal` | write | Delete a conversion goal | + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\Plausible\PlausibleService; +use OpenCompany\Integrations\Plausible\Tools\PlausibleQueryStats; +use OpenCompany\Integrations\Plausible\Tools\PlausibleRealtimeVisitors; + +// Create tools +$service = app(PlausibleService::class); +$tools = [ + new PlausibleQueryStats($service), + new PlausibleRealtimeVisitors($service), +]; + +// Use with an AI agent +$response = Ai::agent() + ->tools($tools) + ->prompt('How many visitors did example.com get this month?'); +``` + +### Via ToolProvider (recommended) + +If you have `integration-core` installed, all 8 tools auto-register with the `ToolProviderRegistry`: + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +$registry = app(ToolProviderRegistry::class); +$provider = $registry->get('plausible'); + +// Create any tool via the provider +$tool = $provider->createTool( + \OpenCompany\Integrations\Plausible\Tools\PlausibleQueryStats::class +); +``` + +## Standalone Service Usage + +```php +use OpenCompany\Integrations\Plausible\PlausibleService; + +$service = app(PlausibleService::class); + +// Query stats +$stats = $service->query([ + 'site_id' => 'example.com', + 'metrics' => ['visitors', 'pageviews'], + 'date_range' => '30d', +]); + +// Realtime visitors +$count = $service->realtimeVisitors('example.com'); + +// List sites +$sites = $service->listSites(); + +// Manage goals +$goals = $service->listGoals('example.com'); +$service->createGoal('example.com', ['goal_type' => 'event', 'event_name' => 'Signup']); +``` + +## Dependencies + +| Package | Purpose | +|---------|---------| +| [opencompanyapp/integration-core](https://github.com/OpenCompanyApp/integration-core) | ToolProvider contract and registry | +| [laravel/ai](https://github.com/laravel/ai) | Laravel AI SDK Tool contract | + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 +- [Laravel AI SDK](https://github.com/laravel/ai) ^0.1 +- A [Plausible Analytics](https://plausible.io) account with API access + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/plausible/plausible.md b/docs/ecosystem/integrations/plausible/plausible.md new file mode 100644 index 0000000..fdbf85e --- /dev/null +++ b/docs/ecosystem/integrations/plausible/plausible.md @@ -0,0 +1,144 @@ +# Plausible Analytics — Lua API Reference + +## plausible_query_stats + +Query website analytics with aggregate stats, timeseries, or breakdowns. + +### Parameters + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `site_id` | string | yes | Site domain, e.g. `"example.com"` | +| `metrics` | array | yes | Metrics to retrieve (see list below) | +| `date_range` | string | yes | Time period (see options below) | +| `dimensions` | array | no | Group results by dimension (see list below) | +| `filters` | string | no | JSON-encoded filter expressions | +| `date_from` | string | no | Start date (ISO 8601) when `date_range="custom"` | +| `date_to` | string | no | End date (ISO 8601) when `date_range="custom"` | +| `order_by` | string | no | JSON-encoded sort order | +| `limit` | integer | no | Max results (default: 10000) | + +### Date Range Options + +`"7d"`, `"28d"`, `"30d"`, `"month"`, `"3mo"`, `"6mo"`, `"12mo"`, `"custom"` + +When using `"custom"`, you must also pass `date_from` and `date_to`. + +### Available Metrics + +`visitors`, `pageviews`, `visits`, `bounce_rate`, `visit_duration`, `views_per_visit`, `events`, `conversion_rate` + +### Available Dimensions + +| Dimension | Description | +|-----------|-------------| +| `visit:source` | Traffic source (Google, Twitter, etc.) | +| `visit:country` | Country code | +| `visit:city` | City name | +| `visit:device` | Device type (Desktop, Mobile, Tablet) | +| `visit:browser` | Browser name | +| `visit:os` | Operating system | +| `event:page` | Page path | +| `event:name` | Custom event name | +| `time:day` | Day-level timeseries | +| `time:month` | Month-level timeseries | + +### Filter Syntax + +Filters are a JSON string containing an array of filter expressions: + +``` +[["operator", "dimension", ["value1", "value2"]]] +``` + +Operators: `is`, `is_not`, `contains`, `does_not_contain`, `matches`, `does_not_match` + +### Order By Syntax + +``` +[["metric_name", "desc"]] +``` + +## Examples + +### Top pages by visitors (last 30 days) + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews"}, + date_range = "30d", + dimensions = {"event:page"}, + order_by = '[["visitors", "desc"]]', + limit = 20 +}) + +for _, row in ipairs(result.rows) do + log(row["event:page"] .. ": " .. row.visitors .. " visitors") +end +``` + +### Traffic by country (custom date range) + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "visits", "bounce_rate"}, + date_range = "custom", + date_from = "2026-01-01", + date_to = "2026-01-31", + dimensions = {"visit:country"}, + order_by = '[["visitors", "desc"]]', + limit = 10 +}) + +for _, row in ipairs(result.rows) do + log(row["visit:country"] .. ": " .. row.visitors .. " visitors, " .. row.bounce_rate .. "% bounce") +end +``` + +### Filter to specific country + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews"}, + date_range = "7d", + dimensions = {"event:page"}, + filters = '[["is", "visit:country", ["US"]]]' +}) +``` + +### Filter pages containing /blog + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews"}, + date_range = "30d", + dimensions = {"event:page"}, + filters = '[["contains", "event:page", ["/blog"]]]', + order_by = '[["pageviews", "desc"]]' +}) +``` + +### Daily timeseries + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors"}, + date_range = "30d", + dimensions = {"time:day"} +}) +``` + +### Aggregate totals (no dimensions) + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews", "bounce_rate", "visit_duration"}, + date_range = "30d" +}) +``` diff --git a/docs/ecosystem/integrations/ticktick/README.md b/docs/ecosystem/integrations/ticktick/README.md new file mode 100644 index 0000000..753d9e3 --- /dev/null +++ b/docs/ecosystem/integrations/ticktick/README.md @@ -0,0 +1,106 @@ +# Integration: TickTick + +> Task management integration for the [Laravel AI SDK](https://github.com/laravel/ai) — manage projects, create tasks, set priorities, track completion. Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Give your AI agents the ability to manage TickTick tasks and projects. Supports both direct access token and OAuth authentication, plus the Dida365 variant. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This TickTick tool lets AI agents manage tasks and projects on behalf of users — creating tasks from conversations, checking project status, completing items, and keeping task lists organized automatically. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Installation + +```console +composer require opencompanyapp/integration-ticktick +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Available Actions + +| Action | Description | Required Params | +|--------|-------------|-----------------| +| `ticktick_list_projects` | List all TickTick projects | — | +| `ticktick_get_project` | Get a project with its tasks and sections | `projectId` | +| `ticktick_create_project` | Create a new project (list) | `name` | +| `ticktick_delete_project` | Delete a project | `projectId` | +| `ticktick_get_tasks` | Get all tasks in a project | `projectId` | +| `ticktick_create_task` | Create a new task | `title`, `projectId` | +| `ticktick_update_task` | Update an existing task | `taskId`, `projectId` | +| `ticktick_complete_task` | Mark a task as complete | `taskId`, `projectId` | +| `ticktick_delete_task` | Delete a task | `taskId`, `projectId` | + +## Authentication + +Two authentication methods are supported — select your preferred method in the integration settings: + +### Access Token (recommended for quick setup) + +1. Go to [developer.ticktick.com/manage](https://developer.ticktick.com/manage) +2. Generate an access token +3. Paste it in the integration config + +### OAuth (Client ID + Secret) + +1. Register an app at the TickTick Developer Center +2. Enter your Client ID and Client Secret in the config +3. Use the OAuth authorize flow to connect + +## Quick Start: Use with Laravel AI SDK + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\TickTick\Tools\TickTickListProjects; +use OpenCompany\Integrations\TickTick\TickTickService; + +// Create the tool +$tool = new TickTickListProjects( + service: app(TickTickService::class), +); + +// Use with an AI agent +$response = Ai::agent() + ->tools([$tool]) + ->prompt('What projects do I have in TickTick?'); +``` + +### Via ToolProvider (recommended) + +If you have `integration-core` installed, the tool auto-registers with the `ToolProviderRegistry`: + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +$registry = app(ToolProviderRegistry::class); +$provider = $registry->get('ticktick'); + +// Create a tool +$tool = $provider->createTool( + \OpenCompany\Integrations\TickTick\Tools\TickTickCreateTask::class, +); +``` + +## Dida365 Support + +TickTick operates as Dida365 in China. To use this integration with Dida365, change the API Base URL in settings to `https://api.dida365.com`. + +## Dependencies + +| Package | Purpose | +|---------|---------| +| [opencompanyapp/integration-core](https://github.com/OpenCompanyApp/integration-core) | ToolProvider contract and registry | +| [laravel/ai](https://github.com/laravel/ai) | Laravel AI SDK Tool contract | + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 +- [Laravel AI SDK](https://github.com/laravel/ai) ^0.1 + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/ticktick/ticktick.md b/docs/ecosystem/integrations/ticktick/ticktick.md new file mode 100644 index 0000000..e21e488 --- /dev/null +++ b/docs/ecosystem/integrations/ticktick/ticktick.md @@ -0,0 +1,124 @@ +# TickTick — Lua API Reference + +## ticktick_list_projects + +List all projects (task lists). No parameters. Call this first to discover project IDs. + +```lua +local projects = ticktick_list_projects({}) + +for _, p in ipairs(projects) do + log(p.name .. " (id: " .. p.id .. ")") +end +``` + +## ticktick_get_tasks + +Get all tasks in a project. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `project_id` | string | yes | Project ID (from `ticktick_list_projects`) | + +```lua +local tasks = ticktick_get_tasks({ project_id = "abc123" }) +``` + +## ticktick_create_task + +Create a new task. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `title` | string | yes | Task title | +| `project_id` | string | no | Project ID. Omit to add to Inbox | +| `content` | string | no | Description/notes | +| `start_date` | string | no | ISO 8601 (e.g. `"2026-03-30T09:00:00+0000"`) | +| `due_date` | string | no | ISO 8601 (e.g. `"2026-03-30T17:00:00+0000"`) | +| `priority` | integer | no | `0` = none, `1` = low, `3` = medium, `5` = high | +| `is_all_day` | boolean | no | `true` for all-day, `false` for specific times | +| `items` | string | no | JSON array of subtasks (see below) | + +### Subtask format + +``` +[{"title": "Subtask 1", "status": 0}, {"title": "Subtask 2", "status": 0}] +``` + +Status: `0` = unchecked, `2` = checked. + +## ticktick_complete_task + +Mark a task as complete. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `project_id` | string | yes | Project ID the task belongs to | +| `task_id` | string | yes | Task ID to complete | + +## ticktick_update_task + +Update an existing task (same fields as create, plus `task_id` and `project_id`). + +## ticktick_delete_task + +Delete a task (requires `project_id` and `task_id`). + +## Examples + +### List projects, then create a task + +```lua +-- Step 1: find the project +local projects = ticktick_list_projects({}) +local project_id = nil +for _, p in ipairs(projects) do + if p.name == "Work" then + project_id = p.id + break + end +end + +-- Step 2: create a high-priority task with a due date +ticktick_create_task({ + title = "Finish quarterly report", + project_id = project_id, + content = "Include revenue and churn metrics", + due_date = "2026-04-01T17:00:00+0000", + priority = 5, + is_all_day = false +}) +``` + +### Create a task with subtasks + +```lua +ticktick_create_task({ + title = "Launch checklist", + project_id = project_id, + priority = 3, + items = '[{"title": "Update changelog", "status": 0}, {"title": "Tag release", "status": 0}, {"title": "Notify team", "status": 0}]' +}) +``` + +### Complete a task + +```lua +-- Step 1: get tasks in the project +local tasks = ticktick_get_tasks({ project_id = "abc123" }) + +-- Step 2: complete the first one +ticktick_complete_task({ + project_id = "abc123", + task_id = tasks[1].id +}) +``` + +### Create a quick Inbox task + +```lua +ticktick_create_task({ + title = "Buy groceries", + priority = 1 +}) +``` diff --git a/docs/ecosystem/integrations/trustmrr/README.md b/docs/ecosystem/integrations/trustmrr/README.md new file mode 100644 index 0000000..0ddb2ab --- /dev/null +++ b/docs/ecosystem/integrations/trustmrr/README.md @@ -0,0 +1,52 @@ +# Integration: TrustMRR + +Verified startup revenue data for the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. Browse startups, filter by revenue, MRR, asking price, growth, and more — all backed by real payment provider data. + +## Available Tools + +| Slug | Type | Description | +|------|------|-------------| +| `trustmrr_list_startups` | read | Browse and filter startups by revenue, MRR, growth, category, and sale status. | +| `trustmrr_get_startup` | read | Get full details for a startup including tech stack, cofounders, and extended metrics. | + +## Installation + +```bash +composer require opencompanyapp/integration-trustmrr +``` + +The service provider is auto-discovered by Laravel. + +## Configuration + +Add your TrustMRR API key via the Integrations settings page, or configure it directly: + +| Key | Description | +|-----|-------------| +| `api_key` | TrustMRR API key (starts with `tmrr_`). Generate at [TrustMRR Developer Dashboard](https://trustmrr.com/dashboard/developer). | + +## Quick Start + +```php +use OpenCompany\Integrations\TrustMrr\TrustMrrService; + +$service = app(TrustMrrService::class); + +// List top startups by revenue +$startups = $service->listStartups(['sort' => 'revenue-desc', 'limit' => 10]); + +// Get details for a specific startup +$startup = $service->getStartup('shipfast'); +``` + +## Dependencies + +| Package | Version | +|---------|---------| +| PHP | ^8.2 | +| opencompanyapp/integration-core | ^2.0 | +| laravel/ai | ^0.1 | + +## License + +MIT diff --git a/docs/ecosystem/integrations/worldbank/README.md b/docs/ecosystem/integrations/worldbank/README.md new file mode 100644 index 0000000..697b6ef --- /dev/null +++ b/docs/ecosystem/integrations/worldbank/README.md @@ -0,0 +1,28 @@ +# World Bank Integration + +World Bank economic indicators integration for the OpenCompany integration ecosystem. Provides access to 29,000+ economic indicators for 200+ countries via the World Bank Open Data API. + +No API key required. + +## Tools + +- **worldbank_search** — Discover countries, indicators, and topic categories +- **worldbank_data** — Fetch indicator data for countries with date ranges and comparisons + +## Common Indicators + +| Code | Description | +|------|-------------| +| NY.GDP.MKTP.CD | GDP (current US$) | +| NY.GDP.MKTP.KD.ZG | GDP growth (annual %) | +| NY.GDP.PCAP.CD | GDP per capita (current US$) | +| FP.CPI.TOTL.ZG | Inflation, consumer prices (annual %) | +| SL.UEM.TOTL.ZS | Unemployment (% of labor force) | +| SP.POP.TOTL | Population, total | +| SP.DYN.LE00.IN | Life expectancy at birth (years) | +| SI.POV.GINI | Gini index | +| EN.ATM.CO2E.PC | CO2 emissions (metric tons per capita) | + +## License + +MIT diff --git a/docs/ecosystem/iris/ecosystem-overview.md b/docs/ecosystem/iris/ecosystem-overview.md new file mode 100644 index 0000000..35607e6 --- /dev/null +++ b/docs/ecosystem/iris/ecosystem-overview.md @@ -0,0 +1,250 @@ +# Iris Ecosystem Overview + +Audit date: 2026-04-03 + +This document summarizes how Iris appears to fit into the broader +OpenCompany/KosmoKrator/token-commerce plan based on the current local docs and +code in: + +- `/Users/rutger/Projects/kosmokrator` +- `/Users/rutger/Sites/opencompany` + +Important: some of the strongest statements come from planning and confidential +architecture docs, not from already-shipped code. So this should be read as +"intended product/service direction" plus "current implementation reality." + +## Executive summary + +The intended architecture is not "Iris as just a proxy." + +The intended architecture is: + +- `prism-relay` = shared provider/model normalization library +- `Iris` = deployed inference proxy and settlement layer +- `OpenCompany` = team collaboration product +- `KosmoKrator` = open-source agent client +- `tokens.opencompany.app` = future token marketplace / token commerce product + +The business plan is to keep team SaaS billing and AI compute billing separate, +while making them feel connected in product UX. + +## The intended product split + +The clearest strategy document is +`/Users/rutger/Projects/kosmokrator/docs/confidential/business-and-token-architecture.md`. + +Its model is: + +- **OpenCompany** = team AI platform +- **Token Commerce** = token packs, drops, exchange +- **KosmoKrator** = open-source agent +- **Relay/Iris** = inference proxy, provider routing, token accounting + +That document explicitly describes: + +- `opencompany.app` for team management and seat-based billing +- `tokens.opencompany.app` for packs, drops, exchange, and token billing +- `relay.opencompany.app` as the inference proxy service + +In other words, Iris is meant to be infrastructure with product consequences: +it is where usage becomes billable. + +## What Iris is supposed to be + +Based on the same architecture doc, the intended responsibilities of Iris are: + +- token auth +- token balance accounting +- provider routing +- provider failover +- rate limiting +- model pricing / settlement +- OpenAI-compatible inference endpoint + +That makes Iris the service boundary between: + +- user-facing apps +- third-party API consumers +- upstream model providers + +The same doc treats Iris as the mission-critical piece because it is the one +service that can both execute inference and settle cost. + +## Why Iris exists separately from `prism-relay` + +The intended separation is: + +- `prism-relay` is the library +- `Iris` is the deployed service + +`prism-relay` owns: + +- provider/model registry +- aliases and normalization +- provider capabilities +- pricing metadata +- request/response normalization +- provider-specific runtime adapters where available + +Iris owns: + +- HTTP API surface +- auth +- token ledger +- settlement +- routing policy +- failover behavior +- rate limits +- balance endpoints + +That separation is consistent with the current codebase direction. + +## How KosmoKrator fits in + +KosmoKrator is currently both a consumer of `prism-relay` and part of the +strategic funnel into the rest of the ecosystem. + +Observed from local code/docs: + +- KosmoKrator already boots `RelayRegistryBuilder`, `RelayRegistry`, `Relay`, + and `RelayManager`. +- It uses relay for request normalization, error normalization, response + normalization, and prompt caching. +- Its README positions it as a multi-provider agent client with local and + configurable provider access. + +Strategically, the confidential business doc frames KosmoKrator as: + +- an MIT/open-source agent product +- something that can connect directly to the relay +- a separate buyer/user path from OpenCompany + +So KosmoKrator is both: + +- a real relay consumer +- a distribution wedge that can create demand for hosted compute tokens later + +## How OpenCompany fits in + +OpenCompany is the team product, not the token product. + +Current local docs show: + +- OpenCompany is the self-hosted collaboration platform for teams +- its AI stack uses Laravel AI SDK plus Prism +- it already tracks token usage and provider/model analytics internally +- it treats OpenRouter as an important provider option + +Strategically, the token/business architecture doc says OpenCompany should not +own token billing directly. Instead, OpenCompany should: + +- own platform subscription billing +- surface token balances and usage +- link out to token management +- send inference traffic through the relay service + +So OpenCompany is supposed to consume Iris, not absorb it. + +## Planned token marketplace + +The planned token-commerce product is explicitly described as separate from +OpenCompany. + +Its planned responsibilities are: + +- token pack browsing and purchase +- pack billing +- weekly token drops +- exchange listings / buy-sell order flow +- transaction history +- webhook handling for payments + +The reason for separating it is economic and product clarity: + +- OpenCompany sells seats to teams +- token commerce sells compute to individuals, teams, and third-party apps + +This is why the strategy docs keep repeating that tokens should not be buried +inside OpenCompany billing. + +## What this means for OpenRouter and other providers + +Within this architecture, a provider like OpenRouter is useful because it gives +the ecosystem broad model reach behind one provider integration, while Iris and +`prism-relay` preserve control of: + +- model catalog normalization +- internal accounting units +- provider capability rules +- API shape exposed to clients + +That means OpenCompany and KosmoKrator can treat OpenRouter as one normalized +provider among many, while Iris remains the thing that decides billing and +settlement. + +## What exists today versus what is still planned + +### Exists today + +- `prism-relay` as a real shared package +- Iris as the beginning of the standalone proxy/settlement service +- KosmoKrator using relay heavily in runtime +- OpenCompany using Prism and provider abstractions +- internal docs that clearly define the service split + +### Planned or partial + +- a full standalone token marketplace app +- production-grade token pack, drop, and exchange flows +- the complete "relay.opencompany.app" operational surface described in strategy +- perfect settlement controls and all policy enforcement inside Iris +- complete runtime parity for every provider in the generated relay registry + +## Current reality for Iris + +Iris is aligned with the intended direction, but it is not fully at the target +described in the architecture docs yet. + +From the current Iris codebase, the remaining notable gaps are: + +- exact preauthorization and reservation, not just post-settlement +- real rate-limit enforcement +- relay-normalized error mapping across the HTTP surface +- active routing failover policy +- product features around packs/drops/exchange are still absent from Iris itself + +See also: + +- `docs/missing-in-iris.md` + +## Practical interpretation + +If the ecosystem plan holds, Iris should become the canonical hosted compute +gateway for: + +- OpenCompany hosted usage +- KosmoKrator hosted mode +- third-party consumers using OpenCompany-issued tokens +- future token-commerce settlement + +That implies Iris should optimize for: + +- strict correctness in accounting +- provider abstraction stability +- simple client API compatibility +- auditable ledger behavior +- high-quality failure handling +- separation from UI/product concerns + +## Recommended framing for Iris + +The cleanest positioning is: + +- `prism-relay` normalizes providers +- `Iris` monetizes and governs inference +- `tokens` sells compute access +- `OpenCompany` and `KosmoKrator` consume that compute in different product + contexts + +If that remains the plan, then Iris is not a side utility. It is shared +infrastructure at the center of the token economy. diff --git a/docs/ecosystem/iris/missing-in-iris.md b/docs/ecosystem/iris/missing-in-iris.md new file mode 100644 index 0000000..3f8400b --- /dev/null +++ b/docs/ecosystem/iris/missing-in-iris.md @@ -0,0 +1,150 @@ +# What Is Still Missing In Iris + +Audit date: 2026-04-03 + +This note captures the remaining gaps in Iris after the Prism + `prism-relay` +integration and the bundled-only relay registry mode. + +## Highest-priority gaps + +### 1. Exact preauthorization is still missing + +Settlement is exact after the response finishes, but preflight balance checks are +still conservative only. + +- Iris only rejects when balance is non-positive or when requested max output + cost exceeds balance. +- Iris does not estimate prompt-side token cost before dispatch. +- Iris does not reserve funds before sending the upstream request. +- Concurrent requests can still race and overspend the same balance. + +Relevant code: + +- `src/Http/Controller/ProxyController.php` +- `src/Accounting/PricingEngine.php` +- `src/Accounting/TokenLedger.php` + +### 2. Rate limiting is configured but not enforced + +`rate_limit` exists in config and is stored in the ledger, but the request path +does not currently apply any requests-per-window check before allowing a call. + +Relevant code: + +- `config/iris.yaml` +- `src/Kernel.php` +- `src/Auth/TokenAuthenticator.php` +- `src/Accounting/TokenLedger.php` + +### 3. Relay error normalization is not fully wired into HTTP responses + +Iris still returns generic normalization and provider failure responses instead +of routing exceptions through relay's structured error categorization. + +What is missing: + +- mapping provider exceptions through `OpenCompany\PrismRelay\Normalizers\ErrorNormalizer` +- returning stable error codes and HTTP statuses by normalized category +- forwarding retry hints such as `Retry-After` +- shaping streaming failure output consistently with the normalized error model + +Relevant code: + +- `src/Http/Controller/ProxyController.php` +- `vendor/opencompanyapp/prism-relay/src/Normalizers/ErrorNormalizer.php` + +### 4. Routing policy config is still mostly dead + +The config declares `default_provider` and `failover`, but the router currently +uses direct model-to-provider lookup from configured models and does not execute +failover chains on provider errors. + +Relevant code: + +- `config/iris.yaml` +- `src/Provider/ProviderRouter.php` + +### 5. Full provider runtime parity is not there yet + +Iris uses relay metadata and runtime adapters where they exist, but not every +provider in the relay catalog is executable through Prism yet. + +As of the current relay package, these providers are still metadata-only: + +- `cerebras` +- `cloudflare-ai-gateway` +- `codex` +- `cohere` +- `custom` +- `deepinfra` +- `gitlab` +- `google-vertex-anthropic` +- `sap-ai-core` +- `togetherai` +- `v0` +- `venice` + +Relevant docs: + +- `vendor/opencompanyapp/prism-relay/TODO.md` + +## Secondary gaps + +### 6. Authentication is still minimal + +A bearer token is accepted if it exists in static config or in the local ledger. +There is still no stronger production token model around issuer verification, +expiry, scopes, revocation, or hashed token storage. + +Relevant code: + +- `src/Auth/TokenAuthenticator.php` + +### 7. Test coverage is still too thin for production confidence + +Current tests cover mapping and some request-building behavior, but not the main +operational risks. + +Missing test areas: + +- auth failures +- rate limiting +- exact billing and overspend prevention +- streaming settlement +- normalized error responses +- failover behavior +- end-to-end provider request handling + +Relevant paths: + +- `tests/` + +### 8. README and config still overstate the implemented product surface + +The current docs still describe parts of the product that do not actually exist +in the Iris service today. + +Examples: + +- balance checking is described as middleware behavior +- pack, drop, and exchange concepts are documented, but not implemented in Iris +- routing config implies failover support that is not active + +Relevant files: + +- `README.md` +- `config/iris.yaml` + +## Practical interpretation + +If the goal is "usable internal proxy with exact post-settlement and shared +relay metadata," Iris is already close. + +If the goal is "watertight production settlement and fully normalized proxy," +the blocking work is: + +1. exact preauthorization and reservation flow +2. real rate-limit enforcement +3. relay-normalized error handling +4. actual routing failover logic +5. closing or clearly scoping the remaining relay runtime-provider gaps diff --git a/docs/ecosystem/kosmokrator/README.md b/docs/ecosystem/kosmokrator/README.md new file mode 100644 index 0000000..686e400 --- /dev/null +++ b/docs/ecosystem/kosmokrator/README.md @@ -0,0 +1,49 @@ +# KosmoKrator Documentation + +## Architecture (Current-Truth) + +These docs describe shipped behavior. They must be updated when the codebase changes. + +| Document | Description | +|----------|-------------| +| [overview.md](architecture/overview.md) | Architecture overview: runtime, UI, tools, context pipeline, subagents, config | +| [permission-modes.md](architecture/permission-modes.md) | Agent modes (Edit/Plan/Ask), permission modes (Guardian/Argus/Prometheus), evaluation order | +| [subagent-architecture.md](architecture/subagent-architecture.md) | Subagent types, tool scoping, orchestration, dependency resolution, concurrency | + +## Proposals + +Forward-looking design docs. Not shipped — may reference classes or features that don't exist yet. + +| Document | Description | +|----------|-------------| +| [streaming.md](proposals/streaming.md) | SSE streaming for LLM responses | +| [context-management-redesign.md](proposals/context-management-redesign.md) | 17 proposed context pipeline improvements | +| [context-management-strategies.md](proposals/context-management-strategies.md) | Semantic scoring, dedup tiers, progressive summarization | +| [context-compaction.md](proposals/context-compaction.md) | Historical plan for the first compaction implementation | +| [ecosystem-architecture.md](proposals/ecosystem-architecture.md) | Lua code mode, MCP integration, OpenCompany tool ecosystem | +| [integration-refactor-plan.md](proposals/integration-refactor-plan.md) | Refactoring tool packages to framework-agnostic contracts | +| [desktop-app.md](proposals/desktop-app.md) | NativePHP + Electron desktop surface proposal | +| [tui-ux-improvements.md](proposals/tui-ux-improvements.md) | 10 ranked UX improvements with mockups | +| [command-inspiration.md](proposals/command-inspiration.md) | Slash/power command ideas from competitive analysis | +| [laravel-ai-patterns.md](proposals/laravel-ai-patterns.md) | Patterns from Laravel AI SDK worth borrowing | + +## Audits (Historical) + +Write-once audit reports. Findings reference file:line numbers that may have shifted. + +| Document | Date | Scope | +|----------|------|-------| +| [deep-audit-2026-04-02.md](audits/deep-audit-2026-04-02.md) | 2026-04-02 | Full codebase (8 domains, 162 files) | +| [self-audit-2026-03-30.md](audits/self-audit-2026-03-30.md) | 2026-03-30 | Initial self-audit (68 files) | +| [memory-leak-audit.md](audits/memory-leak-audit.md) | 2026-04-01 | Memory leak analysis (131 files) | +| [ram-audit/RAM-EFFICIENCY-AUDIT.md](audits/ram-audit/RAM-EFFICIENCY-AUDIT.md) | 2026-04-03 | RAM efficiency synthesis (10 agents) | +| [ram-audit/synthesis-architecture.md](audits/ram-audit/synthesis-architecture.md) | 2026-04-03 | Architecture RAM analysis | +| [ram-audit/synthesis-core-agent.md](audits/ram-audit/synthesis-core-agent.md) | 2026-04-03 | Core agent memory hotspots | +| [ram-audit/synthesis-io-performance.md](audits/ram-audit/synthesis-io-performance.md) | 2026-04-03 | I/O performance and buffering | +| [ram-audit/synthesis-security.md](audits/ram-audit/synthesis-security.md) | 2026-04-03 | Security-adjacent RAM concerns | + +## Confidential (Not in Git) + +Internal strategy and competitor analysis. Excluded from version control via `.gitignore`. + +See `docs/confidential/` — business strategy, token architecture, Claude Code analysis, OpenCode analysis, Reven specs. diff --git a/docs/ecosystem/kosmokrator/architecture/overview.md b/docs/ecosystem/kosmokrator/architecture/overview.md new file mode 100644 index 0000000..7a7da65 --- /dev/null +++ b/docs/ecosystem/kosmokrator/architecture/overview.md @@ -0,0 +1,156 @@ +# KosmoKrator Overview + +KosmoKrator is a terminal coding agent built in PHP. The shipped product today is a CLI application with a dual renderer, a tool-driven agent loop, session persistence, context management, slash commands, power commands, a skill system, and a subagent system. + +This document is the current-state architecture summary. Proposal and roadmap material lives in `docs/proposals/` and is explicitly labeled there. + +## Current Implementation + +### Runtime + +The runtime entry path is: + +```text +bin/kosmokrator + → Kernel + → AgentCommand + → AgentSessionBuilder + → AgentLoop +``` + +Key responsibilities: + +- `Kernel` boots the Illuminate container, YAML config, logging, Prism provider wiring, SQLite persistence, tools, and commands. +- `AgentSessionBuilder` assembles UI, LLM client, permission evaluator, tool registry, session manager, context management helpers, and subagent infrastructure for an interactive session. +- `AgentLoop` runs the prompt → LLM → tools → LLM loop and handles persistence, mode filtering, context health, and status reporting. + +### UI + +KosmoKrator ships with two renderers behind `RendererInterface`: + +- `TuiRenderer` for the interactive Symfony TUI experience +- `AnsiRenderer` for ANSI/readline fallback +- `NullRenderer` for headless subagent loops (auto-approves permissions) + +The shared UI layer also includes diff rendering, theming, terminal notifications, subagent tree formatting, and modal/dialog helpers for settings, approvals, and dashboards. + +### Tools and Modes + +Built-in tool families: + +- Coding tools: `file_read`, `file_write`, `file_edit`, `apply_patch`, `glob`, `grep`, `bash` +- Shell session tools: `shell_start`, `shell_write`, `shell_read`, `shell_kill` +- Coordination tools: `subagent`, `task_create`, `task_update`, `task_get`, `task_list` +- Interactive tools: `ask_user`, `ask_choice` +- Memory tools: `memory_save`, `memory_search` + +Interactive agent modes: + +- `Edit`: full tool access +- `Plan`: read/search/bash/subagent/task/ask tools, but no file mutation tools +- `Ask`: read/search/bash/task/ask tools, but no file mutation tools and no subagents + +Permission modes are separate from agent modes: + +- `Guardian`: auto-approve safe reads and safe bash, ask for riskier calls +- `Argus`: ask for approval on governed tool calls +- `Prometheus`: auto-approve governed calls except absolute denies + +Blocked paths and blocked command patterns are always enforced. + +### Persistence and State + +KosmoKrator persists state in SQLite under `~/.kosmokrator/data`: + +- Sessions and message history +- Global and project-scoped settings +- Memories and compaction summaries +- Token accounting metadata used for status and resume flows + +User-visible session flows include `/sessions`, `/resume`, `/new`, `/compact`, `/memories`, and `/forget`. + +### Context Management + +The current context pipeline is layered: + +- output truncation for oversized tool results +- deduplication of superseded tool results +- pruning of older low-value tool outputs +- LLM-based compaction with optional memory extraction +- oldest-turn trimming as an overflow fallback + +This is implemented today. Future context experiments live in `docs/proposals/context-management-strategies.md` and are not part of the shipped behavior unless stated otherwise. + +### Subagents + +KosmoKrator ships with a working subagent system: + +- agent types: `general`, `explore`, `plan` +- dependency chains with `depends_on` +- sequential groups with `group` +- `await` and `background` execution modes +- retry handling for retryable failures +- concurrency limiting +- live tree/dashboard rendering via `/agents` + +See `AGENTS.md` and `docs/architecture/subagent-architecture.md` for implementation details. + +### Key Directories + +| Directory | Purpose | +|-----------|---------| +| `src/Agent/` | Agent core: AgentLoop, ToolExecutor, ContextManager, StuckDetector, subagent system, events | +| `src/LLM/` | LLM clients: AsyncLlmClient, PrismService, RetryableLlmClient, model catalog, pricing | +| `src/UI/` | Rendering: TuiRenderer, AnsiRenderer, NullRenderer, diff rendering, theming | +| `src/Tool/` | Tool implementations and permission system | +| `src/Command/` | AgentCommand, SetupCommand, AuthCommand, slash commands, power commands | +| `src/Command/Slash/` | 20 interactive slash commands (`/edit`, `/compact`, `/settings`, etc.) | +| `src/Command/Power/` | 20 power commands (`:autopilot`, `:review`, `:team`, `:unleash`, etc.) | +| `src/Session/` | SQLite persistence: sessions, messages, memories, settings | +| `src/Task/` | Task tracking with tree structure and dependency enforcement | +| `src/Skill/` | Skill system: YAML-based custom prompts with `$skillname` dispatch | +| `src/Settings/` | Layered settings resolution (project → global → default) | +| `src/Provider/` | Service providers for DI container wiring (9 providers) | +| `src/Update/` | Self-updater with GitHub release checking | +| `src/Audio/` | Completion sounds (LLM-composed MIDI per session) | + +## What Is Not Implemented + +These are still proposal or future-work areas, not shipped runtime features: + +- Lua code mode +- MCP client support +- external integration loader / hosted integrations +- desktop app surface +- provider failover across multiple backends in the main runtime + +Documents that discuss these topics are design docs in `docs/proposals/`, not current feature docs. + +## Configuration + +Config is loaded in layers, with later layers overriding earlier ones: + +1. bundled defaults in `config/*.yaml` +2. user config in `~/.kosmokrator/config.yaml` +3. project config in `.kosmokrator.yaml` + +Important config areas: + +- `config/prism.yaml` for provider endpoints and API keys +- `config/models.yaml` for model metadata such as context windows and pricing +- `config/kosmokrator.yaml` for agent behavior, permission defaults, UI settings, and context thresholds + +Environment variables in YAML are expanded using `${VAR_NAME}`. + +## Documentation Map + +See [docs/README.md](../README.md) for the full documentation index. + +Current-truth docs: + +- `README.md`: installation, usage, and high-level architecture +- `AGENTS.md`: subagent architecture and orchestration model +- `docs/architecture/permission-modes.md`: agent-mode and permission-mode behavior +- `docs/architecture/subagent-architecture.md`: current subagent behavior and configuration + +Proposal and reference material lives in `docs/proposals/`. Historical audits live in `docs/audits/`. diff --git a/docs/ecosystem/kosmokrator/architecture/permission-modes.md b/docs/ecosystem/kosmokrator/architecture/permission-modes.md new file mode 100644 index 0000000..7409637 --- /dev/null +++ b/docs/ecosystem/kosmokrator/architecture/permission-modes.md @@ -0,0 +1,125 @@ +# Permission Modes & Agent Modes + +KosmoKrator has two orthogonal control axes: + +- **Agent mode** decides which tools are available +- **Permission mode** decides how governed tool calls are approved + +## Agent Modes + +| Mode | Available tool families | Purpose | +|------|-------------------------|---------| +| **Edit** | read, write, edit, search, bash, subagent, task, ask-user tools | Full coding access | +| **Plan** | read, search, bash, subagent, task, ask-user tools | Research and planning without file edits | +| **Ask** | read, search, bash, task, ask-user tools | Q&A without file edits or subagents | + +Important behavior: + +- `file_write` and `file_edit` are unavailable outside `Edit` +- `subagent` is unavailable in `Ask` +- `bash` is available in all three interactive modes +- `Ask` adds an extra read-only guard: mutative bash commands are blocked even if permission mode is permissive + +## Permission Modes + +| Mode | Symbol | Behavior | +|------|--------|----------| +| **Guardian** | ◈ | Auto-approve known-safe calls, ask for riskier governed calls | +| **Argus** | ◉ | Ask for every governed call | +| **Prometheus** | ⚡ | Auto-approve governed calls unless an absolute deny rule matches | + +Governed calls come from the configured approval rules. By default that includes `file_write`, `file_edit`, and `bash`. + +## How They Compose + +| Agent mode | Permission behavior | +|-----------|---------------------| +| **Edit** | Full permission system applies to writes and bash | +| **Plan** | No file mutation tools exist, but bash still goes through permission evaluation | +| **Ask** | No file mutation tools exist; bash still goes through permission evaluation, and mutative bash is denied by the mode guard | + +## Guardian Heuristics + +Guardian uses static checks only. Current auto-approve rules are: + +| Tool | Auto-approve behavior | +|------|------------------------| +| `file_read`, `glob`, `grep` | always auto-approved | +| `task_*` | always auto-approved | +| `file_write`, `file_edit` | auto-approved only when the resolved path is inside the project root | +| `bash` | auto-approved only when the command matches the safe-command whitelist and contains no shell operators | + +Blocked paths and blocked command patterns always win, regardless of permission mode. + +### Safe bash patterns + +Configured in `config/kosmokrator.yaml` under `tools.guardian_safe_commands`. + +Representative defaults: + +```text +git * +ls * +pwd +cat * +head * +tail * +wc * +find * +which * +echo * +php vendor/bin/phpunit* +php vendor/bin/pint* +composer * +npm * +node * +python * +cargo * +go * +make * +``` + +Commands containing shell operators such as `;`, `&&`, `|`, redirection, command substitution, or embedded newlines are not treated as safe. + +## Evaluation Order + +The permission evaluator applies rules in this order: + +1. blocked paths +2. blocked command patterns +3. session grants for the tool name +4. rule evaluation for `ask` or `deny` +5. permission-mode override (`Guardian`, `Argus`, `Prometheus`) + +Implications: + +- session grants can bypass future `ask` results for the same tool +- session grants do not bypass absolute deny rules +- `Prometheus` only upgrades `ask` to `allow`; it does not override denies + +## Approval Flow + +When approval is required, the UI can: + +- allow just this call +- allow this tool for the rest of the session +- escalate to `Guardian` +- escalate to `Prometheus` +- deny the call + +Changing to `Guardian` or `Prometheus` applies to the current session immediately and approves the current prompt flow. + +## Related Commands + +```text +/edit /plan /ask +/guardian /argus /prometheus +``` + +## Implementation References + +- `src/Agent/AgentMode.php` +- `src/Tool/Permission/PermissionMode.php` +- `src/Tool/Permission/PermissionEvaluator.php` +- `src/Tool/Permission/GuardianEvaluator.php` +- `config/kosmokrator.yaml` diff --git a/docs/ecosystem/kosmokrator/architecture/subagent-architecture.md b/docs/ecosystem/kosmokrator/architecture/subagent-architecture.md new file mode 100644 index 0000000..0d1e611 --- /dev/null +++ b/docs/ecosystem/kosmokrator/architecture/subagent-architecture.md @@ -0,0 +1,98 @@ +# Subagent Architecture + +This document describes the current shipped subagent system. + +KosmoKrator can spawn child agents for parallel research, planning, and delegated work. Each child runs its own agent loop with a narrowed tool set and reports results back through a shared orchestrator. + +## Agent Types + +| Type | Read | Write | Can spawn | +|------|------|-------|-----------| +| `general` | yes | yes | `general`, `explore`, `plan` | +| `explore` | yes | no | `explore` | +| `plan` | yes | no | `explore` | + +Type narrowing is strict. Children can only keep or reduce capabilities relative to their parent. + +## Interactive Agent Modes vs Subagent Types + +Do not confuse: + +- **interactive agent modes**: `Edit`, `Plan`, `Ask` +- **subagent types**: `general`, `explore`, `plan` + +Interactive modes shape the parent session tool set. Subagent types shape delegated child sessions. + +## Tool Scoping + +Current subagent tool sets: + +- `general`: `file_read`, `file_write`, `file_edit`, `glob`, `grep`, `bash`, `subagent` +- `explore`: `file_read`, `glob`, `grep`, `bash`, `subagent` +- `plan`: `file_read`, `glob`, `grep`, `bash`, `subagent` + +The `subagent` tool is removed automatically once the max depth is reached. + +## Execution Modes + +The `subagent` tool supports two execution modes: + +| Mode | Behavior | +|------|----------| +| `await` | parent waits for the child result and gets it inline as a tool result | +| `background` | parent continues immediately and receives the child result on a later turn | + +Background results are collected per parent agent ID so sibling trees do not drain each other's results. + +## Orchestration Features + +The current orchestrator supports: + +- explicit agent IDs +- dependency chains with `depends_on` +- sequential execution groups with `group` +- global concurrency limiting +- retry handling for retryable failures +- cancellation of background agents +- per-agent stats for status, elapsed time, tokens, tool calls, depth, and retries + +Dependency behavior: + +- a dependent child waits for all listed dependencies +- successful dependency results are injected into the child task +- failed dependencies are injected as marked degraded results instead of aborting the dependent child +- circular dependencies are rejected before execution + +## Depth and Concurrency + +Default runtime settings: + +```yaml +agent: + subagent_max_depth: 3 + subagent_concurrency: 10 + subagent_max_retries: 2 +``` + +Meaning: + +- root session depth is `0` +- children increment depth by `1` +- the default tree allows root → child → grandchild +- concurrency `0` disables the global semaphore and allows unlimited parallel children + +## UI and Monitoring + +KosmoKrator exposes subagent state through: + +- inline spawn/running/batch displays in both renderers +- a live tree in TUI mode +- the `/agents` dashboard for aggregated progress, retries, token usage, and failures + +## Implementation References + +- `AGENTS.md` +- `src/Tool/Coding/SubagentTool.php` +- `src/Agent/SubagentOrchestrator.php` +- `src/Agent/SubagentFactory.php` +- `src/Agent/AgentContext.php` diff --git a/docs/ecosystem/kosmokrator/audits/deep-audit-2026-04-02.md b/docs/ecosystem/kosmokrator/audits/deep-audit-2026-04-02.md new file mode 100644 index 0000000..024029f --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/deep-audit-2026-04-02.md @@ -0,0 +1,287 @@ +# KosmoKrator Deep Audit + +> **Date:** 2026-04-02 +> **Scope:** Full codebase — 162 PHP source files (25,130 lines), 81 test files (12,278 lines) +> **Method:** 8 parallel audit domains via ~30 subagents, each finding verified against code with exact file:line references + +## Audit Domains + +| Domain | Focus | +|--------|-------| +| Security | Command injection, path traversal, input validation, secret exposure | +| Error Handling | Exception swallowing, missing finally blocks, recovery paths, infinite loops | +| Concurrency | Race conditions, semaphore leaks, fiber safety, cancellation propagation | +| API Boundaries | LLM response parsing, tool parameter validation, response size limits | +| Resource Management | File handle/process/DB leaks, temp file cleanup, unbounded buffering | +| Session Persistence | SQL injection, schema constraints, concurrent writes, file permissions | +| Logic Bugs | State machine violations, edge cases in patch/edit tools, off-by-one errors | +| Test Coverage | Untested classes, assertion depth, mock quality, isolation | + +--- + +## Critical Findings (5) + +### C1. BashTool EventLoop timer leak + +**Location:** `src/Tool/Coding/BashTool.php:68-113` + +The timeout timer created via `EventLoop::delay()` is only cancelled on the success path (line 99). If `$process->join()` or `$stdoutFuture->await()` throws, the catch block returns without calling `EventLoop::cancel($timerId)`. The timer callback holds a reference to the `Process` object, preventing GC. + +```php +// Current: timer leaked on exception +} catch (\Throwable $e) { + return ToolResult::error("Process error: {$e->getMessage()}"); +} + +// Fix: cancel timer in catch +} catch (\Throwable $e) { + EventLoop::cancel($timerId); + return ToolResult::error("Process error: {$e->getMessage()}"); +} +``` + +### C2. Semaphore self-deadlock with nested agents + +**Location:** `src/Agent/SubagentOrchestrator.php:165-201` + +When parent agents hold semaphore slots and their child agents (spawned inside the semaphore-held zone) also need slots, all slots can be consumed by waiting parents. Children never acquire a slot, parents never finish — deadlock. + +Trigger: `concurrency` set low (e.g., 2) with agents at depth > 1. The dependency wait happens *before* semaphore acquisition, but the factory execution runs *inside* the held semaphore zone, and nested `SubagentTool` calls re-enter `spawnAgent()` which tries to acquire the global semaphore again. + +### C3. ShellSession unbounded buffer + +**Location:** `src/Tool/Coding/ShellSession.php:41,54-55` + +The `$buffer` string grows unboundedly as chunks are appended via `.= ` in `appendOutput()`. The `readUnread()` method updates `$readOffset` but **never truncates `$buffer`**. Long-running sessions (e.g., `tail -f`, build logs) accumulate memory indefinitely. + +```php +// Fix: discard consumed portion in readUnread() +public function readUnread(): string +{ + $chunk = substr($this->buffer, $this->readOffset); + $this->buffer = substr($this->buffer, $this->readOffset); + $this->readOffset = 0; + $this->touch(); + return $chunk; +} +``` + +### C4. Task::transitionTo() ignores state machine + +**Location:** `src/Task/Task.php:57` + +`TaskStatus::canTransitionTo()` defines valid transitions (pending→in_progress, in_progress→completed/cancelled/failed), but `transitionTo()` never calls it. Any-to-any state transitions are silently allowed. `TaskUpdateTool` also omits `failed` from its valid status list. + +### C5. file_read is ALWAYS_SAFE in Guardian mode + +**Location:** `src/Tool/Permission/GuardianEvaluator.php:23-30` + +`file_read` is listed in `ALWAYS_SAFE`, meaning reads of any file are auto-approved without path checks. An LLM can read `/etc/passwd`, `~/.ssh/id_rsa`, or any file on the system with zero restriction and no user prompt. + +--- + +## High Findings (8) + +### H1. Raw exception messages leak to LLM + +**Locations:** `src/Agent/ToolExecutor.php:307`, `src/Agent/AgentLoop.php:248,425` + +`$e->getMessage()` from any caught `Throwable` (including PDO exceptions, filesystem errors) is returned directly as tool result text, which is then sent back to the LLM. This can leak internal filesystem paths, database credentials (if present in DSN), PHP version details, and stack trace information. + +### H2. GuardianEvaluator mutative command check bypassed by full paths + +**Location:** `src/Tool/Permission/GuardianEvaluator.php:140` + +`MUTATIVE_PATTERNS` uses `str_starts_with($lower, $pattern)` to detect mutative commands. Full-path invocations like `/bin/rm -rf /` or `/usr/bin/git commit` bypass all pattern checks. Ask mode relies on this check to block mutative commands. + +### H3. Concurrent file edits silently lose data + +**Location:** `src/Tool/Coding/FileEditTool.php:135` + +No file locking is used. If parallel subagents edit the same file, both read the original, find their matches, create temp files, and `rename()`. The second rename overwrites the first, silently discarding the earlier edit. + +### H4. BashTool ignores Cancellation — zombie processes + +**Location:** `src/Tool/Coding/BashTool.php:52-113` + +`BashTool::execute()` takes no `Cancellation` parameter. If the user presses Ctrl+C while a bash tool is running in a subagent, the process won't be killed until it times out (up to 7200 seconds). Cancellation is caught at the LLM call level, but the spawned process continues as a zombie. + +### H5. No PRAGMA busy_timeout on SQLite ✅ Fixed + +**Location:** `src/Session/Database.php:30-32` + +WAL mode is enabled but no `busy_timeout` is set. If two KosmoKrator processes access the same DB simultaneously (e.g., two terminal sessions), one will get an immediate `SQLITE_BUSY` exception instead of retrying. + +**Fix:** Add `$this->pdo->exec('PRAGMA busy_timeout=5000');` after line 32. + +### H6. DB directory 0755 instead of 0700 ✅ Fixed + +**Location:** `src/Session/Database.php:19` + +The database directory `~/.kosmokrator/data` is created with `0755` (world-readable). The log directory in `Kernel.php:124` uses `0700`. The DB file itself inherits the process umask (typically `0644` — world-readable). + +### H7. PatchApplier blocked-path bypass via non-existent parents + +**Location:** `src/Tool/Coding/Patch/PathResolver.php:33-35` + +When a file doesn't exist yet (e.g., `add` operation), `PathResolver::resolve()` falls back to `realpath(dirname($path))`. If the parent directory itself doesn't exist, `realpath()` returns `false` → `resolve()` returns `null` → the resolved path is never checked against blocked paths. + +### H8. PermissionEvaluator blocked-path check doesn't work for apply_patch + +**Location:** `src/Tool/Permission/PermissionEvaluator.php:23` + +The blocked-path check inspects `$args['path']`, but `apply_patch` passes arguments as `patch` (containing embedded paths), not `path`. The `PatchApplier` has its own internal check, but the `PermissionEvaluator` layer is completely bypassed for patch operations — single point of failure. + +--- + +## Medium Findings (12) + +### M1. No response body size limit on LLM HTTP + +**Location:** `src/LLM/AsyncLlmClient.php:193` + +The Amp HTTP client's `buffer()` reads the entire response into memory. No `Content-Length` check or body size cap. A compromised LLM API could return an arbitrarily large response causing OOM. Transfer timeout (600s) provides partial mitigation. + +### M2. No secret redaction in ContextManager + +**Location:** `src/Agent/ContextManager.php:130-157` + +Memories, session recall, tool results, and parent briefs are injected into the system prompt verbatim. If any contain API keys, passwords, or other secrets (e.g., from `env` command output stored in session history), they are sent to the LLM API. + +### M3. ShellStartTool no timeout upper bound ✅ Fixed + +**Location:** `src/Tool/Coding/ShellStartTool.php:54` + +Unlike `BashTool` which clamps timeouts to `max(1, min($timeout, 7200))`, `ShellStartTool` passes the timeout directly. A user/LLM could specify `timeout: 999999` (~11.5 days). The idle TTL (300s) partially mitigates this for idle sessions. + +### M4. ToolExecutor missing finally for BashTool::$progressCallback ✅ Fixed + +**Location:** `src/Agent/ToolExecutor.php:155-165` + +`BashTool::$progressCallback` is set before execution and cleared after, but not in a `finally` block. If `executeSingleTool()` throws past its own catch (e.g., `ToolResult` constructor failure), the static callback leaks. + +### M5. StuckDetector only in runHeadless() + +**Location:** `src/Agent/AgentLoop.php` + +The `StuckDetector` is only wired in `runHeadless()`. Interactive `run()` has no stuck detection — by design, since the user controls execution via Ctrl+C. + +### M6. runHeadless() missing finally block + +**Location:** `src/Agent/AgentLoop.php:337-487` + +Unlike `run()` which has a `finally` block (line 325-328) that resets UI phase to Idle, `runHeadless()` has no guaranteed cleanup path. + +### M7. maybeCompleteParent marks Completed even when children failed + +**Location:** `src/Task/TaskStore.php:304` + +When all children reach terminal status, the parent is auto-completed as `Completed` regardless of whether children are `Failed` or `Cancelled`. A parent with all-failed children should probably be marked `Failed`. + +### M8. PatchParser inconsistent empty-line handling + +**Location:** `src/Tool/Coding/Patch/PatchParser.php:34` vs `:157` + +Empty lines between operations are silently skipped (line 34), but empty lines inside an update body throw an `InvalidArgumentException` (line 157). This inconsistency can confuse LLMs generating patches. + +### M9. Lost exception context in all error logging + +**Locations:** `AgentLoop.php:222,244,409`, `ToolExecutor.php:305`, `SubagentOrchestrator.php:203` + +All catch blocks use only `$e->getMessage()`, discarding exception class name, file, and line. Makes debugging production issues very difficult. Should log `$e::class`, `$e->getFile()`, `$e->getLine()` alongside. + +### M10. No transactions around multi-step DB operations + +**Location:** `src/Session/SessionManager.php:69-93` + +`saveMessage()` performs INSERT + UPDATE + potential SELECT + UPDATE without wrapping in a transaction. If the process crashes between the message insert and the session touch, data will be inconsistent. + +### M11. Temp file leak on exception in FileEditTool + +**Location:** `src/Tool/Coding/FileEditTool.php:149-170` + +If `stream_copy_to_stream()` or `fwrite()` throws inside `patchFile()`, the `finally` block closes file handles but does NOT delete the `.tmp.` file. The `@unlink($tmpPath)` at line 175 only runs when `rename()` returns false, not on exceptions. + +### M12. BashTool static $progressCallback race across subagents + +**Location:** `src/Tool/Coding/BashTool.php:17`, `src/Agent/ToolExecutor.php:155-165` + +`BashTool::$progressCallback` is a static property shared across all fibers. If a background subagent and its parent both execute bash tools, they overwrite each other's callback. + +--- + +## By Design + +- **Interactive run() has no round limit or StuckDetector** — the user controls execution and can Ctrl+C at any time. Headless mode has both guards since there's no human in the loop. + +--- + +## What's Healthy + +| Area | Assessment | +|------|------------| +| SQL injection | All queries use prepared statements with parameterized values | +| PHP object injection | Zero `unserialize()` calls in the codebase | +| JSON deserialization | Uses `json_decode($str, true)` with array type checks | +| Semaphore finally blocks | Orchestrator `finally` correctly releases both group and global semaphores | +| StuckDetector escalation | Well-designed 3-stage path: nudge → final notice → force return | +| Background agent cancellation | `cancelAll()` correctly cancels all background agents on shutdown | +| LLM HTTP cancellation | Cancellation token propagated to both request and body buffering | +| File handle management | `FileEditTool` streaming path uses proper try/finally with fclose | +| Process cleanup on exit | `AgentCommand` teardown calls `cancelAll()` then `killAll()` | +| WAL mode | Enabled for concurrent SQLite reads | +| Foreign keys | Enforced via `PRAGMA foreign_keys=ON` | +| Dependency cycle detection | DFS-based cycle detection before agent spawning | +| LIKE injection | Wildcards properly escaped in both `MessageRepository` and `MemoryRepository` | + +--- + +## Test Coverage Summary + +| Metric | Value | +|--------|-------| +| Test files | 79 | +| Test methods | ~662 | +| Classes with tests | ~65 of ~100 concrete classes | +| Core logic method coverage | ~85% | +| Skipped/incomplete tests | 0 | + +### Critical Untested Code + +| Priority | File | LOC | Risk | +|----------|------|-----|------| +| P0 | `src/Agent/ToolExecutor.php` | 456 | Core execution pipeline — permission checks, concurrent execution, error handling | +| P0 | `src/Agent/AgentSessionBuilder.php` | ~240 | Complex DI wiring — broken wiring goes undetected | +| P1 | `src/Agent/MemorySelector.php` | — | Scoring/ranking algorithm — bugs silently degrade agent intelligence | +| P1 | `src/Agent/ContextBudget.php` | — | Threshold math for auto-compact/blocking — trivially testable | +| P1 | `src/Settings/SettingsManager.php` | ~220 | Entire Settings/ namespace has zero tests | +| P2 | `src/Tool/Coding/Patch/PatchApplier.php` | — | Disk-modifying code with no tests | +| P2 | Shell tool classes (Start/Write/Read/Kill) | — | Process I/O tools, only ShellSessionManager tested | +| P2 | `src/LLM/PromptFrameBuilder.php` | — | Builds system prompt frames | + +### Services with Zero Tests + +1. `CodexOAuthService` — OAuth for Codex auth +2. `CodexAuthFlow` — Full auth flow orchestration +3. `Relay` — External PrismRelay registration +4. `PatchApplier` — Only tested indirectly via `ApplyPatchToolTest` +5. `SessionGrants` — Auto-wired singleton + +### DI Wiring + +No test verifies that the container correctly resolves all registered services. The only integration test (`Feature/AgentCommandTest.php`) boots the kernel and runs `/quit` — a smoke test, not a DI verification. + +--- + +## Recommended Fix Priority + +1. **C1** (timer leak) — One-line fix, zero risk +2. **C3** (unbounded buffer) — Three-line fix, zero risk +3. **C5** (file_read ALWAYS_SAFE) — Design decision needed: restrict to project dir or keep open? +4. **H5** (busy_timeout) — One-line fix, zero risk +5. **H6** (0755→0700) — One-line fix, zero risk +6. **H7** (PathResolver null) — Small fix in PathResolver +7. **C2** (semaphore deadlock) — Design decision: reserve slots for children? Pre-check depth? +8. **C4** (state machine) — Wire `canTransitionTo()` into `transitionTo()` +9. **H1** (exception message leak) — Sanitize paths and env details from error messages +10. **H2** (full-path bypass) — Expand mutative patterns or use `basename()` extraction diff --git a/docs/ecosystem/kosmokrator/audits/memory-leak-audit.md b/docs/ecosystem/kosmokrator/audits/memory-leak-audit.md new file mode 100644 index 0000000..322620e --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/memory-leak-audit.md @@ -0,0 +1,534 @@ +# Memory Leak Audit + +> Status: Historical audit. Counts, findings, and repository size reflect the audit date and may not match the current codebase. + +Comprehensive audit of the KosmoKrator codebase (131 PHP files, ~21k lines) for memory leaks, resource leaks, and unbounded growth patterns. Covers all subsystems: Agent loop, Subagent orchestrator, LLM/HTTP layer, Tools, TUI/ANSI rendering, Session persistence, and vendor dependencies. + +--- + +## Table of Contents + +- [Executive Summary](#executive-summary) +- [Object Reference Map & Cycles](#object-reference-map--cycles) +- [Critical Findings](#critical-findings) +- [High Findings](#high-findings) +- [Medium Findings](#medium-findings) +- [Low Findings](#low-findings) +- [Vendor Library Risks](#vendor-library-risks) +- [Async/Event-Loop Pattern Audit](#asyncevent-loop-pattern-audit) +- [Positive Findings (Clean)](#positive-findings-clean) +- [Recommended Fix Plan](#recommended-fix-plan) + +--- + +## Executive Summary + +10 subagents audited every file in `src/` plus key vendor libraries (`amphp/http-client`, `amphp/amp`, `amphp/process`, `prism-php/prism`, `revolt/event-loop`). Findings break down as follows: + +| Severity | Count | Summary | +|----------|-------|---------| +| CRITICAL | 8 | Unbounded growth, leaked timers, leaked HTTP connections | +| HIGH | 12 | Circular reference chains, missing destructors, unbounded buffers | +| MEDIUM | 14 | Accumulating caches, soft-deleted data, missing cleanup | +| LOW | 10 | Minor issues, theoretical risks, bounded growth | + +The three highest-impact areas are: + +1. **SubagentOrchestrator** — failed agents never pruned, `Future` objects with large closures accumulate +2. **HTTP connection pools** — each subagent creates a fresh `AsyncLlmClient` with an unbounded connection pool +3. **TUI timer lifecycle** — timers not cancelled on teardown, pinning the entire renderer object graph + +--- + +## Object Reference Map & Cycles + +### Cycle 1: The Agent Loop Cycle (GC-Resistant) + +``` +AgentLoop ◂──────────────────────────────────────────┐ + │ $agentContext │ + │ $allTools │ + ▼ │ +AgentContext ───$orchestrator──▸ SubagentOrchestrator │ + │ │ │ + │ (readonly, shared) │ $agents[] → Future + │ │ │ │ + ▼ │ ▼ (fiber closure captures) +SubagentTool ◂──── ToolRegistry ◂────┘ SubagentFactory + │ $parentContext │ $rootRegistry → ToolRegistry + │ $agentFactory ───────────────────────┘ + └──▸ Closure captures $subagentFactory + └──▸ creates child AgentLoop + (child.agentContext → SAME orchestrator) +``` + +**Pinned by:** Amp EventLoop holds Future refs. Not breakable by PHP GC until Futures complete and `pruneCompleted()` is called. + +**Objects in cycle:** 5+ (AgentLoop, AgentContext, SubagentTool, ToolRegistry, SubagentOrchestrator) +**Estimated pinned memory:** 5–15 KB base + unbounded Future/Stats arrays + +### Cycle 2: The TUI Display Cycle (Timer-Pinned) + +``` +TuiRenderer ◂──────────────────────────────────┐ + │ $subagentDisplay │ + │ $animationManager │ + ▼ │ +SubagentDisplayManager ──Closures($this)──▸ TuiRenderer + │ │ + │ $breathColorProvider → $animationManager │ + │ $renderCallback → $this→flushRender() │ + │ │ + └───◂── TuiAnimationManager ──────────────────┘ + $subagentTickCallback → $subagentDisplay + $renderCallback → $this→flushRender() + $refreshTaskBar → $this→refreshTaskBar() +``` + +**Pinned by:** 4+ `EventLoop::repeat()` timers (30fps breathing, 20fps subagent, 50fps tool-executing, compacting). Not breakable while timers are active. + +**Objects in cycle:** 3+ (TuiRenderer, SubagentDisplayManager, TuiAnimationManager, TuiModalManager) +**Estimated pinned memory:** 20–50 KB + widget tree + +### Cycle 3: TuiRenderer ↔ TuiModalManager + +``` +TuiRenderer → TuiModalManager +TuiModalManager.$renderCallback → Closure($this = TuiRenderer) +TuiModalManager.$forceRenderCallback → Closure($this = TuiRenderer) +``` + +**Pinned by:** Dashboard timer when active. Otherwise breakable by PHP GC. + +### Subagent Tree Spanning Cycle + +At depth 2 with 3 concurrent agents, every child references back to the root's `ToolRegistry`: + +``` +Child AgentLoop → child SubagentTool → $agentFactory Closure → $subagentFactory + → $subagentFactory.rootRegistry → ROOT ToolRegistry → ROOT SubagentTool + → ROOT SubagentTool.parentContext → ROOT AgentContext → SubagentOrchestrator + → SubagentOrchestrator.agents[childId] → Future → child fiber → child AgentLoop +``` + +For a full depth-2 tree (12 agents), estimated total pinned memory: **400 KB – 3 MB** (dominated by `ConversationHistory`). + +--- + +## Critical Findings + +### C1. Fresh HttpClient per Subagent — N Independent Connection Pools + +**Files:** `src/Agent/SubagentFactory.php:96–104`, `src/LLM/AsyncLlmClient.php:32` + +Each `createAndRunAgent()` creates a new `AsyncLlmClient` → new `HttpClientBuilder::buildDefault()` → new `UnlimitedConnectionPool` (limit: `PHP_INT_MAX`). With 3+ concurrent subagents at depth 2–3, this creates multiple unbounded connection pools, each holding open sockets and TLS state. Never explicitly closed. + +**Trigger:** Every subagent spawn. +**Fix:** Share a single `HttpClient` across all `AsyncLlmClient` instances. Create it once in `SubagentFactory` constructor and inject it. Also bound the pool: `ConnectionLimitingPool::byAuthority(8)`. + +--- + +### C2. ConversationHistory Unbounded in Headless Mode + +**File:** `src/Agent/ConversationHistory.php:15` + +In headless mode (subagents via `AgentLoop::runHeadless()`), no `ContextCompactor` is passed (`SubagentFactory.php:70`). The only backpressure is `trimOldest()` on overflow errors. Subagents processing many tool calls accumulate hundreds of messages with full tool output. + +**Trigger:** Subagents with many tool call rounds. +**Fix:** Pass a lightweight compactor or implement token-count-based trimming in headless mode. + +--- + +### C3. SubagentOrchestrator — Failed Agent Futures Never Pruned ✅ Fixed + +**File:** `src/Agent/SubagentOrchestrator.php:340` + +```php +$terminalStates = ['done' => true, 'cancelled' => true]; +``` + +`pruneCompleted()` only removes `'done'` and `'cancelled'`. Failed agents stay in `$this->agents` (holding `Future` objects with large closures) and `$this->stats` indefinitely. + +**Trigger:** Any subagent failure (API errors, context overflows). +**Fix:** Add `'failed'` to `$terminalStates`. One-line fix. + +--- + +### C4. TUI Teardown Doesn't Cancel Timers + +**File:** `src/UI/Tui/TuiRenderer.php:1143–1148` + +`teardown()` calls `$this->tui->stop()` but never cancels the breathing timer (`TuiAnimationManager::$thinkingTimerId` at 30fps), the compacting timer, the subagent elapsed timer (`SubagentDisplayManager::$elapsedTimerId` at 20fps), or the tool-executing timer. These `EventLoop::repeat()` timers capture `$this` via closure, pinning the entire TuiRenderer + widget tree in memory. + +**Trigger:** Process exit during thinking, tool execution, or while subagents are running. +**Fix:** Add `TuiAnimationManager::shutdown()` that cancels all timers. Call it + `$subagentDisplay->cleanup()` from `teardown()`. + +--- + +### C5. GrepTool Spawns `which rg` TWICE per Call, No Caching ✅ Fixed + +**File:** `src/Tool/Coding/GrepTool.php:47,52,88–93` + +`hasRipgrep()` spawns a new `Process` each call and is invoked twice per `execute()`. In heavy grep sessions, this triples process overhead. + +**Trigger:** Every grep invocation. +**Fix:** Cache: `private ?bool $hasRg = null;` and memoize. + +--- + +### C6. GrepTool Has No Timeout ✅ Fixed + +**File:** `src/Tool/Coding/GrepTool.php:64–67` + +Unlike `BashTool`, `GrepTool` has zero timeout protection. A hung grep (network mount, FIFO, massive tree) blocks the agent loop forever. + +**Trigger:** Searching slow filesystems or massive directories. +**Fix:** Add timeout watchdog identical to `BashTool`'s pattern. + +--- + +### C7. BashTool Timer NOT Cancelled on Exception ✅ Fixed + +**File:** `src/Tool/Coding/BashTool.php:71–99` + +`EventLoop::cancel($timerId)` at line 84 is only reached on the happy path. If `$process->join()` or `->await()` throws, execution jumps to the catch block and the timer leaks. The timer closure captures `$process`, keeping the Process object alive. + +**Trigger:** Exception during process execution. +**Fix:** Move `EventLoop::cancel($timerId)` to a `finally` block. + +--- + +### C8. Unbounded `buffer()` — Full Command Output in Memory + +**Files:** `src/Tool/Coding/BashTool.php:79–80`, `src/Tool/Coding/GrepTool.php:65–66` + +`Amp\ByteStream\buffer()` reads entire stdout/stderr into a single string with no size cap. `OutputTruncator` runs after the full string is already in memory. A command producing GBs of output OOMs before truncation kicks in. + +**BashTool additional issue:** The progress callback (`ToolExecutor.php:140–142`) passes the entire accumulated buffer to the UI on every chunk, not just the new chunk. + +**Trigger:** Bash commands with large output (logs, data files, recursive listings). +**Fix:** Stream output to a temp file with a configurable size cap, or use a chunked buffer that stops after N bytes and kills the process. + +--- + +## High Findings + +### H1. SubagentOrchestrator Has No `__destruct()` — Background Agents Orphaned ✅ Fixed + +**File:** `src/Agent/SubagentOrchestrator.php:20–517` + +Has `cancelAll()` but no destructor. If the orchestrator goes out of scope while background agents run, their cancellations are never triggered and futures execute orphaned. + +**Fix:** Add `public function __destruct() { $this->cancelAll(); }`. + +--- + +### H2. AgentContext Circular Reference via Orchestrator + +**File:** `src/Agent/AgentContext.php:19` + +Every `AgentContext` holds a strong reference to the singleton `SubagentOrchestrator`. The orchestrator's `spawnAgent()` async closure captures `$childContext`, which holds the orchestrator. Each subagent level replicates this cycle. + +**Fix:** Use `WeakReference` for the orchestrator in `AgentContext`, or extract only scalars into the async closure. + +--- + +### H3. SubagentOrchestrator `$pendingResults` — Orphaned Results Accumulate + +**File:** `src/Agent/SubagentOrchestrator.php:31–32` + +Background agent results (both success and failure) are stored in `$this->pendingResults[$parentId][$id]`. If a parent crashes or never calls `collectPendingResults()`, these accumulate forever. + +**Fix:** Add TTL or size cap to `pendingResults`. Prune orphaned entries in `pruneCompleted()`. + +--- + +### H4. Group Semaphores Never Evicted + +**File:** `src/Agent/SubagentOrchestrator.php:381–384` + +```php +return $this->groups[$name] ??= new LocalSemaphore(1); +``` + +Every unique group name creates a `LocalSemaphore` that is never removed. + +**Fix:** Clear `$this->groups` in `pruneCompleted()` when no active agents reference a group. + +--- + +### H5. Widget Tree Grows Unboundedly in TUI + +**File:** `src/UI/Tui/TuiRenderer.php` (multiple methods) + +`$this->conversation` ContainerWidget accumulates every widget ever added. Each `showToolCall()`, `showToolResult()`, `showSubagentSpawn()` adds permanent widgets. Only cleared on explicit `/new` or `/clear`. + +**Trigger:** Long sessions with hundreds of tool calls. +**Fix:** Implement a scrolling window — remove widgets beyond N turns, or collapse old tool results into summary widgets. + +--- + +### H6. Prism Upstream: `StreamState::reset()` Doesn't Clear `thinkingSummaries` + +**File:** `vendor/prism-php/prism/src/Streaming/StreamState.php:152,383–403` + +`reset()` is called between tool-call turns in multi-step streaming, but `thinkingSummaries` array is never cleared. Grows across turns for models with extended thinking. + +**Fix:** Upstream bug report. Patch: add `$this->thinkingSummaries = [];` to `reset()`. + +--- + +### H7. Prism Upstream: New Provider Instance per Request + +**File:** `vendor/prism-php/prism/src/PrismManager.php:40–56` + +Every `PrismService::chat()` call creates a fresh provider + `PendingRequest` HTTP client. No caching. Causes GC pressure in tight loops. + +**Fix:** Cache resolved providers in `PrismManager`, or in `PrismService`. + +--- + +### H8. ConversationHistory Tool Result `args` Not Freed After Pruning ✅ Fixed + +**File:** `src/Agent/ConversationHistory.php:15` + +`pruneToolResults()` replaces `result` with a placeholder but leaves `args` intact. Large args (file contents for edits) persist for the entire session. + +**Fix:** Null out `args` on pruned/superseded tool results. + +--- + +### H9. SQLite PDO Connection Never Explicitly Closed + +**File:** `src/Session/Database.php:9,25` + +No `close()` method, no `__destruct()`. WAL journal mode enabled but never checkpointed. WAL file can grow without bound. + +**Fix:** Add `close()` method and call from a shutdown handler. Add periodic `PRAGMA wal_checkpoint(TRUNCATE)`. + +--- + +### H10. PrismService Uses No Connection Pooling (Guzzle Path) + +**File:** `src/LLM/PrismService.php:113` + +PrismService uses Laravel's `Http` facade (Guzzle under the hood), not Amp. Each request creates and tears down a fresh TCP+TLS connection. No connection reuse. + +**Fix:** Enable Guzzle connection pooling or share a Guzzle client instance. + +--- + +### H11. Full Message History Loaded on Session Resume + +**File:** `src/Session/SessionManager.php:96–106`, `src/Session/MessageRepository.php:53–68` + +On resume, `loadActive()` deserializes ALL non-compacted messages into memory via `fetchAll()`. For a long session with thousands of messages containing tool results with full file contents, this causes a significant memory spike. + +**Fix:** Implement lazy loading or cursor-based pagination for message history. + +--- + +### H12. `onRetry` Closure Captures UIManager in Singleton + +**File:** `src/Agent/AgentSessionBuilder.php:74–78` + +`$llm->setOnRetry(function (...) use ($ui) { ... })` captures the UIManager in a closure stored on the `RetryableLlmClient` singleton. Circular retention: container → LLM singleton → closure → UIManager. + +**Fix:** Use `WeakReference` for `$ui` inside the closure. + +--- + +## Medium Findings + +| # | Finding | File | Fix | +|---|---------|------|-----| +| M1 | `SubagentTool` closure captures entire `SubagentFactory` + ancestor registries | `SubagentFactory.php:56–58` | Extract only config, not `$this` | +| M2 | `streamBuffer` not cleared on interrupted streaming | `AnsiRenderer.php:22,127` | Clear in error handler | +| M3 | `lastToolArgs` holds large strings between tool calls | `TuiRenderer.php:114` | Clear after consuming | +| M4 | GlobTool collects ALL results before truncating to 200 | `GlobTool.php:78–113` | Short-circuit at 200 | +| M5 | `register_shutdown_function` accumulates on repeated animation calls | `AnsiTheogony.php:84` et al | Register once with static flag | +| M6 | Memories accumulate indefinitely (no TTL/count limit) | `Session/MemoryRepository.php` | Add configurable limit + auto-prune | +| M7 | `forProject()` loads all memories without LIMIT | `Session/MemoryRepository.php:40–55` | Add LIMIT clause | +| M8 | Compacted messages flagged but never deleted from DB | `Session/MessageRepository.php:89–95` | Periodic `DELETE WHERE compacted = 1` | +| M9 | WAL file never checkpointed | `Session/Database.php:30` | Periodic `PRAGMA wal_checkpoint(TRUNCATE)` | +| M10 | Compaction stores raw summary as redundant memory | `Agent/ContextManager.php:142–156` | Skip raw summary, store only extracted memories | +| M11 | `OutputTruncator` files accumulate, cleanup only at construction | `Agent/OutputTruncator.php:23` | Call `cleanupOldFiles()` periodically | +| M12 | `FileEditTool` temp file not cleaned on crash | `Tool/Coding/FileEditTool.php:135–178` | Add `@unlink($tmpPath)` in finally block | +| M13 | `FileReadTool` doubles memory for under-threshold files | `Tool/Coding/FileReadTool.php:59` | Use streaming for all files or lower threshold | +| M14 | SubagentDisplayManager old containers never removed from conversation | `SubagentDisplayManager.php:118` | Remove old containers or prune | + +--- + +## Low Findings + +| # | Finding | File | +|---|---------|------| +| L1 | `AgentLoop` no `dispose()` method | `AgentLoop.php:21–606` | +| L2 | Kernel singletons held for process lifetime | `Kernel.php:299–341` | +| L3 | `Facade::setFacadeApplication()` static holds container | `Kernel.php:258–259` | +| L4 | `SessionGrants` unbounded growth (bounded by ~15 tool count) | `Tool/Permission/SessionGrants.php` | +| L5 | `hasRipgrep()` process stdout/stderr not consumed | `GrepTool.php:90–92` | +| L6 | Non-timeout BashTool exceptions don't explicitly kill process | `BashTool.php:70–96` | +| L7 | `ToolExecutor` static `BashTool::$progressCallback` not cleared on exception | `ToolExecutor.php:140–150` | +| L8 | `FutureState` unhandled error thrown on GC for unconsumed errored futures | `vendor/amphp/amp` | +| L9 | `resetSessionCost()` doesn't reset history | `AgentLoop.php:489–493` | +| L10 | Event loop `disable()` keeps closure in callbacks array (must use `cancel()` to free) | `vendor/revolt/event-loop` | + +--- + +## Vendor Library Risks + +### amphp/http-client + +| Risk | Severity | Description | +|------|----------|-------------| +| Unlimited connection pool | **Medium** | `buildDefault()` uses `PHP_INT_MAX` limit. 64-idle-connection eviction only for idle connections, no time-based TTL | +| Connection leak on abandoned response | **High** | If response body not fully consumed and `Response` object kept alive, connection never returns to pool. GC-dependent cleanup via destructor *closes* the connection rather than returning it | +| Reference cycles in cancellation chain | **Medium** | `DeferredCancellation` → `Cancellable` → callbacks → connection → response body → cycle. Requires PHP cycle collector | + +### amphp/amp + +| Risk | Severity | Description | +|------|----------|-------------| +| `FutureState` unhandled error on GC | **Medium** | Errored Futures that are never consumed (`await()`, `catch()`, or `ignore()`) throw `UnhandledFutureError` from destructor into event loop | +| `DeferredCancellation` destructor auto-cancels | **Low** | Safety feature, but creates unnecessary event loop noise when background agents complete successfully | + +### amphp/process + +| Risk | Severity | Description | +|------|----------|-------------| +| Pipe buffers on kill | **Low** | OS pipe buffer (~64KB) can block child if full when killed | +| Static WeakMaps | **Low** | Self-cleaning, but stdout/stderr references elsewhere keep `ProcHolder` alive | + +### prism-php/prism + +| Risk | Severity | Description | +|------|----------|-------------| +| O(N²) message storage in Text handler | **High** | Each `Step` stores full message history. Multi-turn tool calls in Prism's internal loop accumulate quadratically. Not impactful for KosmoKrator since tool calls are driven externally | +| `StreamState::$thinkingSummaries` never cleared by `reset()` | **Low** | Upstream bug. Only freed when handler is discarded | +| PrismManager creates new providers each call | **Low** | No caching but objects are lightweight | + +### revolt/event-loop + +| Risk | Severity | Description | +|------|----------|-------------| +| Callbacks not freed on `disable()` | **Low** | Must use `cancel()` to free closures from `$callbacks` array | +| Callbacks remain in memory after `stop()` | **Low** | Loop stop doesn't clear `$callbacks`. Timers fire again if loop restarts | + +--- + +## Async/Event-Loop Pattern Audit + +### EventLoop::repeat() — 5 Call Sites + +| Location | Timer | Cancelled When | Leak Risk | +|----------|-------|----------------|-----------| +| `TuiAnimationManager.php:214` | Compacting (30fps) | `clearCompacting()` | No finally/destructor guard | +| `TuiAnimationManager.php:386` | Breathing (30fps) | `enterTools()`, `enterIdle()` | Relies on phase transition | +| `SubagentDisplayManager.php:203` | Elapsed (20fps) | `stopLoader()` → `cleanup()` | Relies on `enterIdle()` chain | +| `TuiRenderer.php:749` | Tool executing (50fps) | `clearToolExecuting()` | No finally guard | +| `TuiModalManager.php:463` | Dashboard (0.5fps) | After `$suspension->suspend()` returns | Safe | + +### EventLoop::delay() — 1 Call Site + +| Location | Purpose | Cancelled When | Leak Risk | +|----------|---------|----------------|-----------| +| `BashTool.php:71` | Process timeout | Happy path only (line 84) | **NOT cancelled on exception** | + +### Amp\async() — 4 Call Sites + +| Location | Captures | Leak Risk | +|----------|----------|-----------| +| `BashTool.php:83` | `$process`, `$progressCb` | Safe — always awaited | +| `BashTool.php:95` | `$process->getStderr()` | Safe | +| `GrepTool.php:65` | `$process->getStdout()` | Safe | +| `GrepTool.php:66` | `$process->getStderr()` | Safe | + +### Process::start() — 3 Call Sites + +| Location | Timeout | Cleanup | Leak Risk | +|----------|---------|---------|-----------| +| `BashTool.php:70` | Yes (configurable) | `join()` + `kill()` on timeout | Process not killed on non-timeout exception | +| `GrepTool.php:64` | **No timeout** | `join()` | **No timeout, no try/catch** | +| `GrepTool.php:90` | No | `join()` | Stdout/stderr not consumed (minor) | + +### DeferredCancellation — 3 Usage Sites + +| Location | Cleanup | Leak Risk | +|----------|---------|-----------| +| `SubagentOrchestrator.php:82` | `finally` block + `cancelAll()` | Safe | +| `TuiRenderer.php:515` | Nulled on Idle phase | Safe | +| `TuiAnimationManager.php:304` | Passed through, not owned | Safe | + +### LocalSemaphore — 2 Usage Sites + +| Location | Release | Leak Risk | +|----------|---------|-----------| +| `SubagentOrchestrator.php:48` (global) | `finally` block | Safe | +| `SubagentOrchestrator.php:383` (groups) | `finally` block | Map never shrinks | + +### Suspension::suspend() — 8 Call Sites + +All modal methods follow create → suspend → resume → cleanup pattern. All exit paths resume the suspension. **Safe.** + +### EventLoop::onSignal() — 0 Call Sites + +No custom signal handlers. Safe. + +--- + +## Positive Findings (Clean) + +| Area | Assessment | +|------|-----------| +| **No static mutable state in `src/Agent/`** | Grep for `static (private|protected|public) \$` returned zero matches | +| **Event classes** | All 5 event classes are `readonly` value objects. No leak risk | +| **ANSI rendering** | No static mutable state. All buffers reset per operation. Particle arrays are method-local and bounded | +| **Theogony** | 1997 lines but ~1.2KB static data, loaded only on demand. Method-local particle arrays | +| **MarkdownToAnsi** | Properly resets all buffers per `render()`. Highlighter is stateless | +| **DiffRenderer** | All state is method-local. Lazy Highlighter is a single reusable instance | +| **Theme** | Pure static utility, no mutable state | +| **AgentDisplayFormatter/AgentTreeBuilder** | Pure static methods, no instance state | +| **Repositories** | Stateless wrappers around PDO queries, no caching | +| **Kernel container** | No circular dependencies in singleton registrations | +| **Semaphore release** | Always in `finally` blocks with `Lock::__destruct()` as safety net | +| **ToolResultDeduplicator** | Method-local index arrays, GC'd after return | +| **StuckDetector** | Window bounded by `array_slice` to `$windowSize` (default 8) | +| **Modal lifecycle** | All modals properly remove widgets after dismissal | +| **FileWriteTool** | Stateless, no handles | +| **Permission system** | All immutable value objects, `SessionGrants` bounded by tool count | + +--- + +## Recommended Fix Plan + +### Priority 1 — High Impact, Easy Fixes + +| Fix | Effort | Impact | +|-----|--------|--------| +| Add `'failed'` to `pruneCompleted()` terminal states | Trivial (1 line) | Eliminates C3 — failed agent accumulation | +| Add `__destruct()` to `SubagentOrchestrator` calling `cancelAll()` | Trivial | Eliminates H1 — orphaned background agents | +| Move BashTool `EventLoop::cancel($timerId)` to `finally` block | Small | Eliminates C7 — timer leak on exception | +| Cache `hasRipgrep()` result in GrepTool | Small | Eliminates C5 — 2× extra processes per grep | +| Add timeout watchdog to GrepTool | Small | Eliminates C6 — hung process blocking loop | +| Harden `TuiRenderer::teardown()` to cancel all timers | Small | Eliminates C4 — timer leaks on exit | + +### Priority 2 — High Impact, Medium Effort + +| Fix | Effort | Impact | +|-----|--------|--------| +| Share single `HttpClient` across subagents, bound pool to 8 | Medium | Eliminates C1 — N connection pools | +| Add `TuiAnimationManager::shutdown()` method | Small | Timer cleanup infrastructure | +| Null out `args` on pruned tool results | Small | Reduces H8 — retained file contents | +| Add periodic WAL checkpoint to Database | Small | Reduces H9 — WAL file growth | + +### Priority 3 — Architectural Improvements + +| Fix | Effort | Impact | +|-----|--------|--------| +| Implement conversation widget pruning in TUI | Medium | Reduces H5 — widget accumulation | +| Add compaction support in headless mode | Medium | Reduces C2 — unbounded subagent history | +| Use `WeakReference` for orchestrator in `AgentContext` | Medium | Breaks Cycle 1 partially | +| Lazy-load message history on resume | Large | Reduces H11 — full history load spike | +| Add memory count limit with auto-pruning | Medium | Reduces M6/M7 — unbounded memories | +| Delete compacted messages from DB periodically | Small | Reduces M8 — DB bloat | diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/RAM-EFFICIENCY-AUDIT.md b/docs/ecosystem/kosmokrator/audits/ram-audit/RAM-EFFICIENCY-AUDIT.md new file mode 100644 index 0000000..ded13c1 --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/RAM-EFFICIENCY-AUDIT.md @@ -0,0 +1,1773 @@ +# KosmoKrator RAM Efficiency Audit — Comprehensive Report + +**Project:** KosmoKrator — AI coding agent for the terminal +**Audit Date:** 2026-04-03 +**Status:** Phase 1 & 2 Synthesis Complete +**PHP Version:** 8.4 +**Architecture:** CLI (Symfony Console + Illuminate Container + Amp Event Loop) + +--- + +## 1. Executive Summary + +### Overall Assessment + +KosmoKrator demonstrates **generally sound memory management** with bounded history compaction, no classic leaks, and strong use of PHP 8.4 readonly features. However, **systematic caching omissions** and **unbounded accumulation vectors** create significant RAM efficiency risks in long-running or memory-intensive sessions. + +**Risk Rating:** 🔴 **HIGH** — Two critical unbounded-growth vectors and multiple high-impact caching gaps can cause progressive memory bloat. + +### Critical Issues (Address Immediately) + +| # | Issue | Location | Est. Impact | Effort | +|---|-------|----------|-------------|--------| +| C1 | Permission regex recompilation on every call | `PermissionRule::matchesGlob()` | 20–50 KB/request + CPU | 5 min | +| C2 | Tool schema regeneration per subagent | `ToolRegistry::toPrismTools()` | 1.8–7.5 MB with 30 subagents | 10 min | +| C3 | Instruction files re-read every session | `InstructionLoader::gather()` | 2–50 KB/session + I/O | 5 min | +| C4 | Subagent orchestrator unbounded retention | `SubagentOrchestrator::$agents`, `$stats`, `$pendingResults` | Unbounded (MB–GB) | 1 hr | +| C5 | MemoryRepository loads all rows every LLM round | `MemoryRepository::forProject()` | 100–500 MB for 10k memories | 2 hrs | +| C6 | TaskStore unbounded accumulation | `TaskStore::$tasks` | Unbounded (MB) | 1 hr | +| C7 | HTTP connection pool per AsyncLlmClient | `AsyncLlmClient` → `UnlimitedConnectionPool` | ~50–200 KB per subagent × N | 30 min | +| C8 | TUI animation timers not cancelled on teardown | `TuiAnimationManager` | Pins entire widget tree | 15 min | + +### Memory Hotspots (Highest Impact) + +| Component | File:Line | Growth Pattern | Estimated Footprint | +|-----------|-----------|----------------|---------------------| +| `ConversationHistory::$messages` | `Agent/ConversationHistory.php:19` | Monotonic (bounded by compaction) | 100–500 bytes/message | +| `SubagentOrchestrator::$agents` | `Agent/SubagentOrchestrator.php:31` | Unbounded (no auto-prune) | ~1 KB/active agent | +| `MemoryRepository::forProject()` result | `Session/MemoryRepository.php:65-88` | Full table load per call | 100–500 MB for 10k rows | +| `TaskStore::$tasks` | `Task/TaskStore.php:17` | Unbounded (no eviction) | ~200–300 bytes/task | +| `FileReadTool::$readCache` | `Tool/Coding/FileReadTool.php:21` | Unbounded (no eviction) | 10 KB–10 MB depending on files read | +| `ToolRegistry` tool instances | `Provider/ToolServiceProvider.php` | Static (20+ tools) | ~3–6 MB at boot | +| Kernel boot services | `Kernel.php` + providers | One-time spike | ~20–40 MB peak | + +### Priority Roadmap + +**Immediate Actions (<1 day, high impact):** +1. Add static regex cache to `PermissionRule::matchesGlob()` — saves 20–50 KB/request +2. Cache tool schemas in `ToolRegistry` — saves 1.8–7.5 MB with concurrency +3. Cache instruction files in `InstructionLoader` — saves 2–50 KB/session + I/O +4. Cache git root/branch — eliminates 200 shell calls/100 turns +5. Add `pruneCompleted()` auto-call in `SubagentOrchestrator` — stops unbounded growth +6. Bulk token fetch + in-memory cache in `SettingsCodexTokenStore` — saves 6 KB/request + DB load + +**Short-Term (1–2 weeks):** +7. Implement memory selection caching per turn — avoids 3–4× rescoring +8. Add LIMIT to `MemoryRepository::forProject()` — caps RAM spike +9. Truncate task tree rendering (max 50 tasks / 10 KB) — bounds prompt growth +10. Stream BashTool/GrepTool output with early truncation — prevents 100 MB spikes +11. Add LRU eviction to FileReadTool cache — bounds long-run growth +12. Share single HttpClient with bounded pool across subagents — saves 50–200 KB × N + +**Long-Term (1–3 months):** +13. Push memory scoring into SQL (ORDER BY score LIMIT 6) — eliminates O(N) in PHP +14. Implement task eviction policy (max 100 tasks, LRU) — bounds task memory +15. Add database indexes on `memories` (composite) — speeds queries, reduces rows scanned +16. Centralize edge storage in TaskStore — 50% edge memory reduction +17. Container compilation / opcache warmup — reduces boot memory 30–50% +18. Worker pooling for audio notifications — avoids 2× kernel boot per sound + +--- + +## 2. Methodology + +### Dimensions Investigated + +1. **Data Structures** — array copying patterns, object graphs, string handling, collection usage +2. **Caching Gaps** — repeated computations, missing memoization, no distributed cache +3. **PHP Internals** — PHP 8.4 features (readonly, enums), generator usage, closure captures, autoloader +4. **Async/Event Loop** — timer leaks, fiber suspension, promise accumulation, connection pooling +5. **Bootstrap & Container** — service registration, singleton lifetimes, boot memory spikes +6. **I/O & Streaming** — file handling, shell sessions, tool output buffering, database fetching +7. **Security-Adjacent** — permission evaluation, token storage, config parsing, credential exposure +8. **Architecture** — subagent orchestration, memory repository patterns, task tracking, event system +9. **UI Renderers** — TUI/ANSI renderers, animation state, diff rendering +10. **Audio/Notifications** — worker process lifecycle, IPC overhead, buffer management + +### Tools Used + +- **Static analysis:** ripgrep (`rg`), glob pattern searches, manual code review +- **Memory profiling:** `memory_get_usage()`, `memory_get_peak_usage()` (where available in code) +- **Benchmarking:** Custom PHP scripts in `docs/ram-audit/benchmarks/` (to be created) +- **Existing audits:** `docs/memory-leak-audit.md`, `docs/deep-audit-*.md` referenced +- **Synthesis agents:** 10 parallel sub-agents covering specialized domains + +### Benchmark Approach + +**No benchmark files were created** during this audit (agents were in read-only mode). The following benchmark suite is **recommended** for implementation: + +| Benchmark | Scenario | Metrics | +|-----------|----------|---------| +| `db-connection-memory.php` | Connection open/close cycles, singleton reuse | Per-connection memory delta, GC retention | +| `agent-loop-memory.php` | 100/500/1000 turns with 3 tools/turn | Memory growth curve, compaction triggers | +| `subagent-memory.php` | Spawn 10/30/100 concurrent subagents | Per-agent overhead, total peak | +| `tool-memory.php` | Concurrent tool execution, large file I/O | Tool-specific spikes, cache growth | +| `async-memory.php` | 100/500/1000 concurrent promises | Per-promise overhead, Fiber stack | +| `caching-memory.php` | Repeated token estimation, model resolution | Cache hit/miss impact | +| `datastructure-memory.php` | Array merge patterns, JSON encoding | Temporary allocation peaks | +| `ui-memory.php` | TUI/ANSI render cycles, animation frames | Render buffer growth, timer retention | +| `audio-memory.php` | 10/50/100 rapid completion sounds | Worker process memory, IPC overhead | +| `session-memory.php` | 1k/5k/10k session creations, message inserts | DB fetch strategies, connection reuse | + +**Measurement protocol:** +- Use `memory_get_peak_usage(true)` (real peak) before/after each operation +- Run each scenario 5×, report median and max +- Test with `gc_collect_cycles()` forced between iterations +- Profile with `xhprof` or `tideways` if available (not used here) + +--- + +## 3. Detailed Findings by Area + +### 3.1 Security-Adjacent RAM Efficiency (synthesis-security.md) + +#### Finding SEC-1: Regex Compilation in Hot Path — PermissionRule::matchesGlob() + +**Severity:** 🔴 Critical +**Files:** `src/Tool/Permission/PermissionRule.php:51-60`, `src/Tool/Permission/Check/DenyPatternCheck.php:39`, `src/Tool/Permission/Check/BlockedPathCheck.php:66`, `src/Tool/Permission/GuardianEvaluator.php:106` + +**Issue:** Every call to `matchesGlob()` compiles a fresh regex via `preg_quote()` + `str_replace()` + `preg_match()`. This method is invoked: +- For each deny pattern in each matching rule (DenyPatternCheck) +- For each blocked path pattern (BlockedPathCheck, up to 4× per path) +- For each safe command pattern (GuardianEvaluator, O(p) per call) + +With ~50 tools, ~10 rules, ~5 deny patterns per rule, a single permission check can trigger **250+ regex compilations**. PHP's internal regex cache is limited and not guaranteed to hit. + +**RAM Impact:** Each compiled regex pattern string occupies ~200–500 bytes in memory. At 250 compilations per check × 10 concurrent requests = **~500 KB – 1.25 MB** of transient regex strings per request cycle, plus GC pressure. + +**Security Risk:** An attacker controlling tool arguments can force evaluation of many deny patterns, causing CPU/memory exhaustion. No rate limiting exists on permission checks. + +**Recommendation:** Add static regex cache to `PermissionRule`: + +```php +private static array $regexCache = []; +$key = $pattern; +if (!isset(self::$regexCache[$key])) { + self::$regexCache[$key] = '/^'.str_replace(['\*', '\?'], ['.*', '.'], preg_quote($pattern, '/')).'$/i'; +} +$regex = self::$regexCache[$key]; +``` + +**Effort:** 5 minutes. ~5–10 lines change. + +--- + +#### Finding SEC-2: N+1 Token Storage Queries — SettingsCodexTokenStore + +**Severity:** 🔴 Critical +**Files:** `src/LLM/Codex/SettingsCodexTokenStore.php:32-38`, `src/LLM/Codex/SettingsCodexTokenStore.php:63-85` + +**Issue:** Token storage uses 7 individual settings keys (`provider.codex.*`). Every `current()` performs 7 separate SELECT queries; every `save()` performs 7 separate INSERT/UPDATE queries. No in-memory caching; every call hits SQLite. + +**RAM Impact:** Each query returns a row (~200–300 bytes). 7 queries × result set overhead × concurrent requests = **~1–2 KB per request** in short-lived DB result objects. More critically, **connection pool exhaustion** under load can cause queued requests to accumulate memory. + +**Security Risk:** Token refresh storms (multiple simultaneous requests triggering refresh) cause 7 writes + HTTP call per refresh, amplifying memory/CPU usage. No refresh debouncing. + +**Recommendation:** Replace 7 individual SELECTs with single bulk query: + +```sql +SELECT key, value FROM settings WHERE scope='global' AND key LIKE 'provider.codex.%' +``` + +Build token array from single result set. Add in-memory cache with 5-second TTL. + +**Effort:** 15 minutes. ~20 lines change. + +--- + +#### Finding SEC-3: Full Config Reload on Every Write — SettingsManager::reloadRepository() + +**Severity:** 🔴 Critical +**Files:** `src/Settings/SettingsManager.php:266-274` + +**Issue:** After any settings `set()` or `delete()`, `reloadRepository()` creates a **new ConfigLoader** and re-parses all 4 bundled YAML files + user + project config, then copies data into the Repository. This happens on every single settings write. + +**RAM Impact:** Total YAML size ~28 KB, but parsing creates intermediate arrays and objects. A full reload generates **~100–150 KB** of temporary arrays/objects per write, which are then GC'd. Under rapid successive writes (e.g., batch updates), this creates significant memory churn and can push PHP memory_limit. + +**Security Risk:** An attacker with settings write access (or a buggy tool) can trigger repeated config reloads to exhaust memory. The pattern is predictable and not rate-limited. + +**Recommendation:** In `reloadRepository()`, update `$this->config` incrementally using the `$data` already loaded in `configTarget()`. Avoid full `ConfigLoader::load()`. + +**Effort:** 20 minutes. ~20 lines change. + +--- + +#### Finding SEC-4: No Path Resolution Cache — PathResolver::resolve() + +**Severity:** 🟠 High +**Files:** `src/Tool/Permission/PathResolver.php:21-39` + +**Issue:** `realpath()` syscall executed on every path check with no caching. `BlockedPathCheck` calls this for every file operation, and `GuardianEvaluator::isInsideProject()` calls it for every command. + +**RAM Impact:** Each `realpath()` result is a string (~256–1024 bytes). With 100 file checks per request, that's **25–100 KB** of repeated string allocations. Strings are duplicated in memory if same path resolved multiple times. + +**Security Risk:** Path traversal attacks cause repeated resolution of deep/nested paths, amplifying memory usage. No TTL or eviction on cache (because none exists). + +**Recommendation:** Add static cache to `PathResolver`: + +```php +private static array $cache = []; +$key = $path; +if (!isset(self::$cache[$key])) { + self::$cache[$key] = realpath($path); +} +return self::$cache[$key]; +``` + +**Effort:** 10 minutes. + +--- + +#### Finding SEC-5: Duplicate Rule Evaluation — DenyPatternCheck + RuleCheck + ModeOverrideCheck + +**Severity:** 🟠 High +**Files:** `src/Tool/Permission/Check/DenyPatternCheck.php:26-49`, `src/Tool/Permission/Check/RuleCheck.php:25-48`, `src/Tool/Permission/Check/ModeOverrideCheck.php:30-70` + +**Issue:** Rules are evaluated up to **3 times** in a single permission flow: +1. `DenyPatternCheck` iterates all rules, calls `matchesGlob()` for each deny pattern +2. `RuleCheck` iterates all rules again, calls `evaluate()` (which calls `matchesGlob()` again) +3. `ModeOverrideCheck` iterates all rules a third time if mode is Guardian + +**RAM Impact:** Each evaluation creates temporary arrays and regex strings. Triple evaluation multiplies memory churn by 3×. For 50 rules × 5 patterns = 750 regex compilations instead of 250. + +**Security Risk:** Complex permission rules (many deny patterns) are amplified 3×, making them a more effective DoS vector. + +**Recommendation:** Refactor check chain so `RuleCheck` returns both Deny and Ask states in one pass, and `ModeOverrideCheck` reuses that result instead of re-evaluating. + +**Effort:** 1–2 hours. + +--- + +### 3.2 Core Agent Memory Efficiency (synthesis-core-agent.md) + +#### Finding AGENT-1: Instruction Files Re-Read Every Session (No Cache) + +**Severity:** 🔴 Critical +**Files:** `src/Agent/InstructionLoader.php:26-85` + +**What:** `InstructionLoader::gather()` reads up to 5 files from disk on every session start: +- `~/.kosmokrator/instructions.md` +- `{git_root}/KOSMOKRATOR.md` +- `{git_root}/.kosmokrator/instructions.md` +- `{git_root}/AGENTS.md` +- `{cwd}/KOSMOKRATOR.md` + +**Impact:** +- **Memory:** Each file loaded as a string kept for session lifetime. Large `AGENTS.md` (common in monorepos) can be 10–100 KB. +- **I/O:** 3–5 `file_get_contents()` calls per session; `gitRoot()` uses `shell_exec()` (line 102). +- **Frequency:** Once per session, but sessions are frequent in REPL usage. + +**Why critical:** This is **pure waste** — instruction files change rarely (user edits or git commits). No technical reason exists to re-read them. Static property cache would eliminate all I/O and string allocation. + +**Recommendation:** Add `static ?string $cached = null` to `gather()`. On first call, read files and store. Subsequent calls return cached string. + +**Effort:** 5 minutes. + +--- + +#### Finding AGENT-2: Tool Schema Regenerated on Every Subagent Spawn + +**Severity:** 🔴 Critical +**Files:** `src/Tool/ToolRegistry.php:67-103`, `src/Agent/SubagentFactory.php:105` + +**What:** `ToolRegistry::toPrismTools()` converts each tool to a `PrismTool` object with full parameter schema on every call. Called: +- Once at main `AgentLoop` setup (`AgentSessionBuilder:133`) +- **Once per subagent** (`SubagentFactory:105`) — subagents spawn frequently + +**Impact:** +- **Memory:** ~30–50 tools × ~10 parameters each = 300–500 parameter objects per call. Each `PrismTool` + parameter objects ≈ 200–500 bytes → **60–250 KB per subagent** wasted. +- **CPU:** Object allocation + method calls repeated unnecessarily. +- **Frequency:** Every subagent creation (default concurrency 10, depth 3 → potentially 30+ subagents per session). + +**Why critical:** Tool schemas are **static metadata** — they never change at runtime. Rebuilding them is pure allocation bloat. Subagent memory isolation is good, but this duplicates static data across all subagents. + +**Recommendation:** Add private `?array $cachedPrismTools = null` to `ToolRegistry`. In `toPrismTools()`, check cache; if null, build and store. Invalidate only when `register()`/`unregister()` called (rare). + +**Effort:** 10 minutes. + +--- + +#### Finding AGENT-3: Repeated Git Shell Calls Every Turn + +**Severity:** 🟠 High +**Files:** `src/Agent/ProtectedContextBuilder.php:24-50`, `src/Agent/InstructionLoader.php:102` + +**What:** `ProtectedContextBuilder::build()` calls: +- `InstructionLoader::gitRoot()` — `shell_exec('git rev-parse --show-toplevel')` +- `InstructionLoader::gitBranch()` — `shell_exec('git branch --show-current')` + +Every time protected context is built, which is **every turn** (via `ContextManager::buildSystemPrompt()`). + +**Impact:** +- **Memory:** Each `shell_exec()` returns a string (~20–100 bytes). Strings are short-lived but allocated every turn. +- **I/O:** Two subprocess calls per turn. At 100 turns → 200 shell executions. Significant overhead. +- **Latency:** Each call takes ~1–5 ms; cumulative delay noticeable. + +**Why high:** Git state changes infrequently. Caching with `static ?string` (per-request) or session-scoped property would eliminate all repeated calls. No invalidation needed except on explicit git events. + +**Recommendation:** Add `static ?string $cachedRoot` and `static ?string $cachedBranch` to respective methods. Cache result for lifetime of request. + +**Effort:** 5 minutes per method. + +--- + +#### Finding AGENT-4: Task Tree Rendering Unbounded + +**Severity:** 🟠 High +**Files:** `src/Agent/TaskStore.php` (referenced in `ContextManager:270`) + +**What:** `ContextManager::buildSystemPrompt()` appends `$this->taskStore->renderTree()` to system prompt every turn. No truncation limit observed. + +**Impact:** +- **Memory:** Task tree grows linearly with number of tasks created. Each task adds ~50–200 chars to rendered string. +- **Prompt bloat:** Unbounded task list consumes context window, forcing earlier compaction. +- **Frequency:** Every turn. + +**Why high:** Long-running sessions with many decomposed tasks could see task tree reach **tens of KB**. This directly competes with conversation history for context space. Should have hard limit (e.g., last 50 tasks, or 10 KB max). + +**Recommendation:** Add configurable limit: `max_tasks: 50` or `max_chars: 10240`. Truncate oldest tasks first. Return `"... truncated N tasks"` note. + +**Effort:** 15–30 minutes. + +--- + +### 3.3 I/O Performance & Memory (synthesis-io-performance.md) + +#### Finding IO-1: FileReadTool Unbounded Cache + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Coding/FileReadTool.php:21,70-72,103-104` + +**Issue:** `$readCache` array grows unbounded across process lifetime; no eviction policy. Cache stores boolean flags per `(path, mtime, offset, limit)` key. + +**Impact:** Hundreds of MB in long-running sessions with many file reads (e.g., codebase exploration). Current state: cache stores only booleans, minimizing per-entry footprint; FileReadTool is a singleton in ToolRegistry. + +**Recommendation:** Add LRU eviction with configurable max entries (e.g., 1000) or TTL (e.g., 1 hour). Consider per-AgentContext cache instead of singleton. + +**Effort:** 30 minutes. + +--- + +#### Finding IO-2: BashTool Full Output Buffering + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Coding/BashTool.php:96-108` + +**Issue:** Stdout and stderr fully buffered in memory via `buffer()` before OutputTruncator runs. Commands producing >100 MB output will spike RAM; no streaming to disk or early truncation. + +**Current mitigation:** OutputTruncator caps at 2000 lines / 50 KB but runs **after** tool returns (ToolExecutor line 300-302). + +**Recommendation:** Stream stdout/stderr directly to `OutputTruncator` during read loop, applying line/byte limits incrementally. Or add `stream_to_file` parameter for outputs >1 MB. Enforce per-command output limit with early process kill. + +**Effort:** 1–2 hours. + +--- + +#### Finding IO-3: Subagent PendingResults Orphaned + +**Severity:** 🟡 Medium +**Files:** `src/Agent/SubagentOrchestrator.php:34,420` + +**Issue:** `$pendingResults[parentId]` never cleared if parent agent crashes or exits without calling `collectPendingResults()`. Results (strings, potentially KB–MB each) accumulate per background subagent over time. + +**Current state:** Documented in `docs/memory-leak-audit.md` as known issue; `pruneCompleted()` does not touch `$pendingResults`. + +**Recommendation:** Add TTL (e.g., 1 hour) to `$pendingResults` entries with timestamp. Or prune `$pendingResults[parentId]` when all agents for that parent reach terminal state. + +**Effort:** 20 minutes. + +--- + +#### Finding IO-4: GlobTool Intermediate Array Buildup + +**Severity:** 🟢 Low +**Files:** `src/Tool/Coding/GlobTool.php:93-99` + +**Issue:** `array_merge()` inside recursion loops creates O(n²) intermediate arrays for deep directory trees. + +**Impact:** Temporary memory spikes during glob operations on nested structures; 10k files in nested tree → ~10 MB temporary. + +**Current mitigation:** Result set capped at 200 files after full sort/deduplication (lines 59-62). + +**Recommendation:** Apply 200-file cap earlier in recursion to avoid building full array. Replace `array_merge()` with generator-based yielding to eliminate intermediate arrays. + +**Effort:** 1 hour. + +--- + +### 3.4 Architecture & Service Container (synthesis-architecture.md) + +#### Finding ARCH-1: Subagent Orchestrator Unbounded Retention + +**Severity:** 🔴 Critical +**Files:** `src/Agent/SubagentOrchestrator.php:31-34, 392-409, 420-428, 471` + +**Issue:** The orchestrator stores: +- `$agents`: Future objects keyed by agent ID — never pruned automatically +- `$stats`: SubagentStats objects — never pruned automatically +- `$pendingResults`: Background results keyed by parent ID — cleared only via explicit `collectPendingResults()` +- `$groups`: Semaphore objects per unique group name — never removed + +**Impact:** Each completed agent retains ~500–1000 bytes of closure/future overhead + captured context. With hundreds of agents over a long session, this grows to **tens of MB**. Background results can be KB–MB each and linger indefinitely if parent never collects. + +**Why critical:** This is a **classic memory leak pattern** — global mutable registry with no TTL, no weak references, no size limits. + +**Recommendations:** +1. Call `pruneCompleted()` automatically after each agent finishes or via periodic timer (e.g., every 10 completions). +2. Track reference count per group; when last agent in a group completes, `unset($this->groups[$group])`. +3. When a parent agent finishes, automatically call `collectPendingResults($parentId)` to free result strings. + +**Effort:** 1 hour total. + +--- + +#### Finding ARCH-2: MemoryRepository Unbounded Fetch + +**Severity:** 🔴 Critical +**Files:** `src/Session/MemoryRepository.php:65-88`, `src/Session/SessionManager.php:276-281` + +**Issue:** `MemoryRepository::forProject()` executes `SELECT * FROM memories` with no LIMIT, no filters pushed down. Fetches **all** memory rows into PHP (could be thousands). Called on every LLM round via `SessionManager::getMemories()` — 3–4 times per user turn. + +**Impact:** With 10,000 memories, each fetch loads 100–500 MB into PHP memory. Repeated 3–4× per turn = **300–2000 MB** of repeated allocation/GC churn. Even with 1000 memories, that's 10–50 MB per round. + +**Why critical:** This is an **N+1 query problem** compounded by **repeated full-table scans and in-memory sorts**. MemorySelector then scores all in-memory and discards. + +**Recommendations:** +- **Short-term:** Add `? LIMIT 1000` to `forProject()` to cap rows; log warning if truncated. +- **Long-term:** Push scoring into SQL: `SELECT *, (CASE ...) AS score FROM memories WHERE … ORDER BY score DESC LIMIT 6`. Eliminate O(N) in PHP. + +**Effort:** Short-term 15 min; long-term 2–3 hours. + +--- + +#### Finding ARCH-3: TaskStore Unbounded Accumulation + +**Severity:** 🔴 Critical +**Files:** `src/Task/TaskStore.php:17, 62-84, 174-287` + +**Issue:** Tasks stored in simple associative array with: +- No persistence +- No eviction policy (only manual `/tasks clear` or REPL-triggered `clearTerminal()`) +- No pagination or depth limits +- Bidirectional edge storage (duplicate arrays) +- Full tree re-render on every task operation and at 30fps in TUI + +**Impact:** Each task ~200–300 bytes + edge arrays. Unbounded growth; for 100+ tasks in complex workflows, memory and CPU become excessive due to O(n) full scans and O(n²) worst-case rendering. + +**Recommendations:** +1. Add configurable `max_tasks` (e.g., 100) with LRU eviction. When adding a task exceeds limit, remove oldest non-terminal tasks. +2. After removing tasks in `clearTerminal()`/`clearAll()`, walk all remaining tasks and filter `blockedBy`/`blocks` arrays to remove IDs not in `$this->tasks`. +3. Reduce TUI refresh rate from 30fps to 5–10fps; use dirty flag to only re-render if tree changed. + +**Effort:** 2–3 hours total. + +--- + +#### Finding ARCH-4: Missing Database Indexes + +**Severity:** 🟠 High +**Files:** `src/Session/Database.php:128` + +**Issue:** Only index on `memories` is `idx_memories_project` (single column on `project`). Queries filter on `(project IS NULL OR project = ?)` plus `expires_at`, `memory_class`, `pinned`. Missing composite index. + +**Impact:** Full table scans for every `forProject()` and `search()` call. With 10k memories, each scan reads all rows → more memory loaded, slower queries. + +**Recommendation:** Add composite index: + +```sql +CREATE INDEX idx_memories_lookup ON memories(project, memory_class, type, expires_at, pinned DESC, created_at DESC); +``` + +Also add single-column indexes on `memory_class` and `type` if composite not feasible. + +**Effort:** 30 minutes (migration). + +--- + +### 3.5 Caching Strategies & Gaps (caching-strategies-gaps.md) + +#### Finding CACHE-1: No Token Estimation Memoization + +**Severity:** 🟡 Medium +**Files:** `src/Agent/TokenEstimator.php:17-108` + +**Issue:** `TokenEstimator::estimate()` calls `mb_strlen()` O(n) per string for every message every turn. No memoization; same messages re-estimated repeatedly. + +**Impact:** Cheap per-call but cumulative across long conversations. With 100 messages × 3 turns = 300 estimations. Could cache by message content hash (SHA256). + +**Recommendation:** Add static in-memory cache keyed by `md5($message->content())`. Est. memory 5–50 KB (bounded by history size). + +**Effort:** 15 minutes. + +--- + +#### Finding CACHE-2: No Model Resolution Cache + +**Severity:** 🟡 Medium +**Files:** `src/LLM/ModelDefinitionSource.php:72-104` + +**Issue:** `resolve()` uses exact match O(1) but substring fallback does O(n) linear scan of all models (100–150) on every miss. No result cache. + +**Impact:** Substring scan on every unknown model reference. With 100 models, still trivial (<1ms) but unnecessary. + +**Recommendation:** Add `$resolveCache` array to `ModelDefinitionSource`. Check cache before substring scan loop. Est. memory 10–100 KB. + +**Effort:** 10 minutes. + +--- + +#### Finding CACHE-3: No Permission Decision Cache + +**Severity:** 🟠 High +**Files:** `src/Tool/Permission/PermissionEvaluator.php:26-49` + +**Issue:** No decision cache; same tool+args re-evaluated every call. Permission check runs before EVERY tool call, including glob matching and `realpath()`. + +**Impact:** Full permission chain (glob + path resolution) repeated for repeated tool calls. Could be 30–50% of permission check time saved. + +**Recommendation:** Add `$decisionCache` to `PermissionEvaluator`. Key: `md5(toolName . serialize($args))`. Invalidate on `grantSession()` or `resetGrants()`. Est. memory 10–200 KB. + +**Effort:** 20 minutes. + +--- + +#### Finding CACHE-4: Glob Pattern Pre-compilation Missing + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Permission/PermissionRule.php:51-60` + +**Issue:** `matchesGlob()` compiles glob→regex on EVERY call via `str_replace` + `preg_quote`. Patterns repeat across calls. + +**Impact:** `preg_quote` is relatively expensive; patterns re-compiled repeatedly. Est. 5–20 KB of compiled patterns could be cached. + +**Recommendation:** Compile once in `PermissionRule` constructor, store compiled regex in private property. + +**Effort:** 10 minutes. + +--- + +### 3.6 Data Structure Optimization (data-structure-optimization.md) + +#### Finding DS-1: array_merge in Loops (O(n²) Copies) + +**Severity:** 🔴 Critical +**Files:** `src/Agent/SubagentOrchestrator.php:426-428`, `src/Tool/Coding/GlobTool.php:102,115,118` + +**Issue:** +- `SubagentOrchestrator::collectPendingResults()`: `$all = array_merge($all, $bucket)` in loop copies entire `$all` each iteration. +- `GlobTool::globStar()`: recursive `array_merge` copies parent array on each merge. + +**Impact:** O(n²) total copy volume if many buckets or deep recursion. For 1000 files in nested tree, temporary memory spikes can reach **10 MB**. + +**Recommendation:** Use `[...$all, ...$bucket]` (PHP 8.4 spread operator creates single copy) or pre-allocate and assign by key. For `GlobTool`, yield results via generator instead of merging. + +**Effort:** 30 minutes. + +--- + +#### Finding DS-2: Unbounded Message/Task Accumulation + +**Severity:** 🔴 Critical +**Files:** `src/Agent/ConversationHistory.php:26`, `src/Task/TaskStore.php:17` + +**Issue:** +- `ConversationHistory::$messages` grows every turn; compaction replaces with summary + recent but old array copied via `array_slice` + spread before GC. +- `TaskStore::$tasks` holds all tasks until manual clear; no eviction. + +**Impact:** Linear growth with session length. Peak memory during compaction = old + new array (temporary doubling). Task memory unbounded. + +**Recommendations:** +- Use `array_splice` (in-place) instead of `array_slice` + reassignment in `ConversationHistory::compact()`. +- Add task eviction policy (max 100 tasks, LRU) to `TaskStore`. + +**Effort:** 20 min + 1 hr. + +--- + +#### Finding DS-3: JSON Encoding in Tight Loops + +**Severity:** 🟠 High +**Files:** `src/Agent/TokenEstimator.php:83`, `src/Agent/StuckDetector.php:45`, `src/Agent/ToolResultDeduplicator.php:155-157` + +**Issue:** `json_encode($tc->arguments())` per tool call for signature generation. Repeated encoding of same arguments. + +**Impact:** Temporary string allocation per tool call. For many tool results, allocates many temporary strings (100+ tool calls = 100+ JSON strings). + +**Recommendation:** Cache JSON encoding of tool arguments by signature (already computed for deduplication key). Reuse. + +**Effort:** 15 minutes. + +--- + +### 3.7 PHP Internals & Language Features (php-internals-memory.md) + +#### Finding PHP-1: Readonly Properties — Excellent Adoption + +**Status:** ✅ Positive +**Files:** Throughout (`Session/SessionManager.php:30-38`, `Tool/Permission/PermissionResult.php:16-18`, `Agent/SubagentStats.php:44`) + +**Impact:** Readonly properties eliminate copy-on-write overhead. Since set once and never modified, PHP can safely share zval without separation. Excellent for DTOs and injected dependencies. + +**Recommendation:** Continue pattern. Consider extending to more DTOs (`AgentContext`, `CompactionPlan` if not already). + +--- + +#### Finding PHP-2: Generator Usage Underutilized + +**Severity:** 🟡 Medium +**Files:** `src/Session/MessageRepository.php:80`, `src/Session/MemoryRepository.php:87`, `src/Session/SessionRepository.php:62`, `src/Agent/SubagentOrchestrator.php:427-428`, `src/Agent/ContextCompactor.php:144`, `src/Agent/ConversationHistory.php:124` + +**Issue:** Generators used only once (streaming LLM responses in `PrismService.php:139`). Multiple locations load entire result sets with `fetchAll()` or `array_slice` where streaming would be superior. + +**Impact:** For large histories (1000+ messages), eager loads cause memory spikes. Could use `PDOStatement::fetch()` with generators or process pending results in buckets. + +**Recommendation:** Introduce generators for large dataset iteration where appropriate. Not urgent given expected data sizes but good practice. + +**Effort:** 1–2 hours for targeted refactoring. + +--- + +#### Finding PHP-3: Closure Capture Risk in Long-Lived Collections + +**Severity:** 🟡 Medium +**Files:** `src/Agent/SubagentOrchestrator.php:133`, `src/UI/Tui/TuiAnimationManager.php:216`, `src/UI/Tui/SubagentDisplayManager.php:205`, `src/UI/Tui/TuiToolRenderer.php:267` + +**Issue:** Closures stored in long-lived collections (`$this->agents`, `$this->pendingResults`, `$cancellations`) capture use-variables, potentially including large objects (`AgentContext`, `agentFactory`). Timers capture `$this` pinning entire widget tree. + +**Impact:** Captured objects cannot be GC'd until closure completes. For subagents living minutes, this is by design but increases retention. Timer leaks (see async section) are worse. + +**Recommendation:** Audit closures stored in long-lived collections to ensure they don't inadvertently capture more than needed. Extract primitives instead of whole objects when possible. + +**Effort:** 1 hour audit. + +--- + +### 3.8 Async Event Loop & Fiber Memory (async-event-loop-memory.md) + +#### Finding ASYNC-1: HTTP Connection Pool per AsyncLlmClient + +**Severity:** 🔴 Critical +**Files:** `src/LLM/AsyncLlmClient.php:73`, `src/Agent/SubagentFactory.php:127` + +**Issue:** Each `AsyncLlmClient` instance gets its own `HttpClient` with `UnlimitedConnectionPool` (limit: `PHP_INT_MAX`). Concurrent subagents (depth 2–3) create multiple pools holding open sockets + TLS state indefinitely. No explicit close. + +**Impact:** Each pool holds connection resources (~50–200 KB per connection). With 10 concurrent subagents, that's 10 pools × potential connections = **500 KB – 2 MB** of idle connection state. No pooling benefit. + +**Recommendation:** Share a single `HttpClient` with bounded pool (e.g., `ConnectionLimitingPool::byAuthority(8)`) across all `AsyncLlmClient` instances. Inject via container as singleton. + +**Effort:** 30 minutes. + +--- + +#### Finding ASYNC-2: TUI Animation Timers Not Cancelled on Teardown + +**Severity:** 🔴 Critical +**Files:** `src/UI/Tui/TuiAnimationManager.php:216,378`, `src/UI/Tui/SubagentDisplayManager.php:205`, `src/UI/Tui/TuiToolRenderer.php:267` + +**Issue:** +- `TuiAnimationManager` timers (`compactingTimerId`, `thinkingTimerId`) — no `shutdown()` method, `teardown()` doesn't cancel them. +- `SubagentDisplayManager::elapsedTimerId` — only cancelled when loader stops; may leak if TUI tears down mid-subagent. +- `TuiToolRenderer::toolExecutingTimerId` — only cancelled when tool clears; not on TUI teardown. + +**Impact:** Timers capture `$this` via closure, pinning entire TuiRenderer + widget tree in memory even after teardown. Each timer ~100–200 bytes but prevents GC of entire UI object graph (potentially MBs). + +**Recommendation:** +1. Add `TuiAnimationManager::shutdown()` to cancel both timers; call from `TuiCoreRenderer::teardown()`. +2. Ensure `SubagentDisplayManager::cleanup()` and `TuiToolRenderer::clearToolExecuting()` are called during teardown. +3. Move `BashTool` timeout cancellation into `finally` block (currently outside try/catch at line 112). + +**Effort:** 15–30 minutes. + +--- + +#### Finding ASYNC-3: No Streaming in AsyncLlmClient + +**Severity:** 🟠 High +**Files:** `src/LLM/AsyncLlmClient.php:291` + +**Issue:** `buffer($cancellation)` reads entire response body into memory. No streaming support. + +**Impact:** Large LLM responses (rare but possible) held fully in RAM before processing. Typically responses are <100 KB so impact moderate. + +**Recommendation:** Implement streaming with `onRead()` callback, processing chunks as they arrive. More involved; lower priority. + +**Effort:** 2–3 hours. + +--- + +### 3.9 Bootstrap & Kernel (kernel-bootstrap.md) + +#### Finding BOOT-1: Eager Service Instantiation + +**Severity:** 🟡 Medium +**Files:** `src/Kernel.php:40-73`, `src/Provider/ToolServiceProvider.php:18-110`, `src/Provider/AgentServiceProvider.php` + +**Issue:** All providers registered eagerly; all singletons bound but not yet instantiated. However, `ToolRegistry` instantiates ~20 tool objects during registration even if never used (e.g., `setup` command doesn't need `FileWriteTool`). `AgentServiceProvider` constructs `AgentLoop`, `SubagentOrchestrator`, `ContextPipeline` — heavy. + +**Impact:** Boot memory spike ~20–40 MB before any agent work begins. Acceptable for CLI but could be lazy-loaded. + +**Recommendation:** Lazy-load heavy services. Defer `ToolRegistry` and `AgentServiceProvider` until `AgentCommand` executes. Use `$container->bind()` with factory closures instead of `singleton()` for rarely-used services. + +**Effort:** 1–2 hours. + +--- + +#### Finding BOOT-2: GeminiCacheStore Loads Entire JSON File + +**Severity:** 🟡 Medium +**Files:** `src/Provider/LlmServiceProvider.php:74-76` + +**Issue:** `GeminiCacheStore` reads entire `~/.kosmokrator/cache/gemini-cache.json` into memory on construction. If cache grows to 100 MB, every invocation loads 100 MB even if not using Gemini. + +**Impact:** Unbounded file-based cache growth loads fully into RAM each run. + +**Recommendation:** Stream JSON or use SQLite for large caches. Implement lazy loading with on-demand reads. + +**Effort:** 2 hours. + +--- + +#### Finding BOOT-3: No Container Compilation + +**Severity:** 🟢 Low +**Files:** `composer.json:65` + +**Issue:** No `bootstrap/cache/container.php` or compiled container. Every run re-parses all YAML, rebuilds all singletons. + +**Impact:** Boot time + memory overhead ~30–50% vs compiled container. Not a RAM leak but inefficiency. + +**Recommendation:** Use Laravel's `php artisan optimize` or switch to Symfony's `ContainerBuilder` with `dump()` to generate compiled container. + +**Effort:** 1 hour setup. + +--- + +### 3.10 Audio Notifications (audio-notifications.md) + +#### Finding AUDIO-1: Worker Process Per Notification (Double Kernel Boot) + +**Severity:** 🟠 High +**Files:** `src/Audio/CompletionSound.php:167`, `src/Audio/compose_worker.php:26-27`, `src/Audio/compose_llm_worker.php:26-27` + +**Issue:** Each completion sound spawns **two full PHP kernel boots** sequentially: +1. `compose_worker.php` boots full kernel (~50–100 MB) +2. That worker spawns `compose_llm_worker.php` which also boots full kernel (~50–100 MB) + +**Impact:** For rapid-fire notifications (10–100 in quick succession), memory spikes temporarily (each kernel ~50–100 MB). GC pressure from repeated container construction/destruction. + +**Recommendation:** +1. Worker pooling: reuse a single long-lived `compose_worker.php` process for multiple notifications via IPC (socket/queue). +2. Move LLM call back to main worker instead of spawning `compose_llm_worker.php` — use `proc_open` with timeout directly in `compose_worker.php` to avoid second kernel boot. + +**Effort:** 3–4 hours. + +--- + +#### Finding AUDIO-2: ShellSession Buffer Never Truncated + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Coding/ShellSession.php:18-64` + +**Issue:** `$buffer` accumulates all output; `readOffset` prevents re-reading but buffer never shrinks. + +**Impact:** Long-running shell sessions with continuous output accumulate MBs linearly. + +**Recommendation:** Add configurable max buffer size (e.g., 1 MB) and trim from start based on `readOffset`. + +**Effort:** 30 minutes. + +--- + +### 3.11 Session & Persistence Layer (session-persistence.md) + +#### Finding PERS-1: Unbounded fetchAll() in MessageRepository & MemoryRepository + +**Severity:** 🔴 Critical +**Files:** `src/Session/MessageRepository.php:77-80, 102-111`, `src/Session/MemoryRepository.php:65-88`, `src/Session/SessionRepository.php:62` + +**Issue:** All repository methods use `$stmt->fetchAll()` loading complete result sets. No cursor-based streaming. Specific unbounded queries: +- `MessageRepository::loadActive()` — fetches all non-compacted messages for a session (could be thousands) +- `MessageRepository::loadRaw()` — fetches all messages without limit +- `MemoryRepository::forProject()` — fetches **all** non-expired memories (unbounded) +- `SessionRepository::listByProject()` — uses `LIMIT` (good) + +**Impact:** Memory scales linearly with result size. For 10k messages, could be 10–50 MB per fetch. Called repeatedly in agent loop. + +**Recommendation:** +- Use `while ($row = $stmt->fetch())` generator pattern for large result sets. +- Add pagination/limits where appropriate. +- For `forProject()`, push filters into SQL and use LIMIT (already covered in ARCH-2). + +**Effort:** 1–2 hours. + +--- + +#### Finding PERS-2: No Query Result Caching + +**Severity:** 🟡 Medium +**Files:** All repository classes + +**Issue:** No Redis/Memcached/APCu caching. Repeated reads (settings, session lookups) hit SQLite each time. + +**Impact:** DB load + memory churn from parsing results each call. Minor for local SQLite but scales poorly. + +**Recommendation:** Introduce PSR-6/16 cache for settings, session lookups, memory `forProject` results (with short TTL). + +**Effort:** 2 hours. + +--- + +#### Finding PERS-3: Missing Indexes + +**Severity:** 🟠 High +**Files:** `src/Session/Database.php:109,128` + +**Issue:** +- `messages(session_id, compacted)` — good, covers `loadActive()`. +- `memories(project)` only — `forProject()` also filters on `expires_at`, `memory_class`, `pinned` — missing composite index. +- `sessions(project, updated_at)` not indexed — `listByProject()` and `latest()` filter/order by this. + +**Impact:** Full table scans for common queries. More rows scanned = more memory loaded = slower. + +**Recommendation:** Add: +```sql +CREATE INDEX idx_memories_proj_ec ON memories(project, expires_at, memory_class); +CREATE INDEX idx_sessions_proj_updated ON sessions(project, updated_at DESC); +``` + +**Effort:** 30 minutes. + +--- + +### 3.12 Model Catalog & Pricing (model-catalog-pricing.md) + +**Status:** ✅ **Already Efficient** + +- Model catalog uses arrays (not objects) — ~20–45 KB total for 100–150 models. +- No caching needed — data immutable after construction. +- `resolve()` substring fallback O(n) but n=100–150, trivial. +- No RAM issues identified. + +**Recommendation:** None. Consider adding result cache to `resolve()` if profiling shows hotspot, but unlikely. + +--- + +### 3.13 Database Connection Pooling (database-connection-pooling.md) + +**Status:** ✅ **Adequate for CLI** + +- Single PDO connection per process (singleton). No connection pooling needed. +- No persistent connections. +- WAL mode enabled; no `busy_timeout` or `wal_checkpoint` set (H5, M9 in other audits — disk, not RAM). +- RAM per connection: ~50–150 KB. +- No leaks detected. + +**Recommendation:** None for RAM. Consider adding `PRAGMA busy_timeout` for concurrency robustness (not RAM-related). + +--- + +### 3.14 UI Renderers (ui-renderer-memory — not saved but findings incorporated) + +**Key findings from analysis:** +- TUI animation managers create high-frequency timers (30fps) that capture `$this` — covered in ASYNC-2. +- ANSI renderer uses `streamBuffer` that grows during streaming but cleared after — safe. +- Diff renderer builds large strings via concatenation — typical, not excessive. +- No major UI-specific RAM issues beyond timer leaks and animation state. + +--- + +## 4. Cross-Cutting Concerns + +### 4.1 Data Structure Patterns + +**Array copying epidemic:** The codebase uses `array_merge`, spread operator `[...$arr]`, and `array_slice` extensively, creating many temporary copies. Critical hotspots: +- `SubagentOrchestrator::collectPendingResults()` — O(n²) copies +- `GlobTool::globStar()` — O(n²) intermediates +- `ConversationHistory::compact()` — copies entire recent array +- `ContextCompactor::buildPlan()` — multiple `array_slice` on same data + +**Recommendation:** Replace `array_merge` in loops with single spread or pre-allocation. Use `array_splice` for in-place modification where possible. Consider generators for large result streaming. + +**String concatenation in loops:** `BashTool`, `FileReadTool`, `ShellSession` use `.=` in loops. PHP's string buffer doubling mitigates but still causes reallocation. For very large outputs (100 MB), this is significant. + +**Recommendation:** For large outputs, write directly to temp file or use `stream_copy_to_stream()` with chunking (already used in `FileEditTool` — good pattern). + +--- + +### 4.2 Caching Gaps Summary + +| Computation | Current Cost | Cache Opportunity | Est. Savings | Priority | +|------------|--------------|------------------|--------------|----------| +| Permission regex | 250+ compilations/check | Static cache in `PermissionRule` | 20–50 KB/req | HIGH | +| Tool schema build | 60–250 KB/subagent | Cache in `ToolRegistry` | 1.8–7.5 MB/session | HIGH | +| Instruction files | 3–5 disk reads/session | Static cache in `InstructionLoader` | 2–50 KB + I/O | HIGH | +| Token fetch | 7 DB queries/op | Bulk fetch + in-memory cache | 6 KB/req + DB load | HIGH | +| Path resolution | `realpath()` per path | Static cache in `PathResolver` | 25–100 KB/req | MEDIUM | +| Model resolution | O(n) scan on miss | Result cache in `ModelDefinitionSource` | 10–100 KB | MEDIUM | +| Permission decision | Full chain every call | Memoize by (tool, args) | 10–200 KB/req | HIGH | +| Git root/branch | 2 shell execs/turn | Static per-request cache | 200 ms latency | HIGH | +| Prompt split | 2 `substr()`/call | Static cache by prompt hash | 5–10 KB/call | LOW | +| Memory format | Re-group every turn | Cache by memory ID set | 1–5 KB/turn | LOW | + +**Total high-priority cache memory:** ~25–350 KB per request, with compute savings 30–70% in hot paths. + +--- + +### 4.3 PHP Internals Observations + +**Strengths:** +- Readonly properties extensively used — excellent for immutability and memory sharing. +- Enums for state machines — memory-efficient singleton-like instances. +- Constructor property promotion where used — clean initialization. +- No `serialize()`/`unserialize()` of large graphs. +- No `SplObjectStorage` or heavy collection libraries — native arrays only. + +**Weaknesses:** +- Generators underused — only 1 occurrence in production code. +- Closure captures in long-lived collections may retain more than needed. +- No typed properties beyond readonly (relies on PHPDoc) — minor performance penalty. +- Static variables only in tests — good (no function-static retention). + +**Autoloader:** `optimize-autoloader: true` — class map generated, good. No `classmap-authoritative` but fine for CLI. + +--- + +### 4.4 Cross-Cutting Security-Adjacent Risks + +1. **Memory exhaustion DoS** — Permission regex compilation, token refresh storms, config write amplification all create predictable memory churn patterns exploitable by attackers. +2. **Credential exposure** — Repeated token reads from disk increase attack surface in shared hosting; more memory copies of secrets. +3. **Timing attacks** — Repeated disk I/O (config parse, instruction reads) increases latency variance, making timing attacks easier. +4. **No rate limiting** — Permission checks, token refreshes, config writes all unbounded — amplification vectors. + +**Recommendation:** Implement rate limiting at permission evaluator and token store levels. Add caching aggressively to reduce churn. + +--- + +## 5. Risk Matrix + +Severity × Likelihood matrix for RAM-related issues: + +| Severity \ Likelihood | High (Every request/turn) | Medium (Per session) | Low (Rare/Edge) | +|----------------------|---------------------------|---------------------|-----------------| +| **Critical** | Permission regex recompilation (SEC-1) — every permission check
Tool schema regen per subagent (AGENT-2) — every spawn
Subagent orchestrator leak (ARCH-1) — accumulates over session | Instruction file re-read (AGENT-1) — once/session but frequent
MemoryRepository unbounded fetch (ARCH-2) — every LLM round | Config write amplification (SEC-3) — only on settings writes | +| **High** | Duplicate rule evaluation (SEC-5) — 3× per check
HTTP pool per subagent (ASYNC-1) — per subagent spawn
TUI timer leaks (ASYNC-2) — persistent until teardown | Git shell calls (AGENT-3) — every turn
Task tree unbounded (AGENT-4) — every turn
Path resolution no cache (SEC-4) — every file check | Token no cache (SEC-7) — on every LLM call
Provider instantiation flood (SEC-8) — per provider resolve | +| **Medium** | FileReadTool cache unbounded (IO-1) — per file read
BashTool buffering (IO-2) — per command
PendingResults orphan (IO-3) — on parent crash | No token estimation cache (CACHE-1) — per message estimation
No model resolution cache (CACHE-2) — per model resolve | GlobTool array buildup (IO-4) — on large globs
JSON encoding loops (DS-3) — per tool call | +| **Low** | — | — | Generator underuse (PHP-2) — architectural
Container not compiled (BOOT-3) — boot only | + +**Interpretation:** +- **Critical-High likelihood:** Issues that occur on every hot path (permission checks, subagent spawn, LLM rounds) with severe impact — address immediately. +- **Critical-Medium:** Session-start or write-amplification issues — still urgent but less frequent. +- **High-High:** Turn-level overhead (git calls, task tree) — significant cumulative impact. +- **Medium-High:** Per-operation spikes (file reads, bash output) — moderate risk but can cause OOM on large inputs. + +--- + +## 6. Immediate Actions (<1 Day, High Impact) + +These are low-effort (<30 min each), high-impact fixes that should be deployed within 24–48 hours. + +### Action 1: Static Regex Cache in PermissionRule + +**File:** `src/Tool/Permission/PermissionRule.php:51-60` +**Change:** Add static cache array; compile once per pattern. + +```php +private static array $regexCache = []; + +public function matchesGlob(string $path): bool +{ + $key = $this->pattern; + if (!isset(self::$regexCache[$key])) { + $regex = '/^' . str_replace(['\*', '\?'], ['.*', '.'], preg_quote($this->pattern, '/')) . '$/i'; + self::$regexCache[$key] = $regex; + } + return preg_match(self::$regexCache[$key], $path) === 1; +} +``` + +**Impact:** Eliminates 90%+ of regex compilation overhead. Saves 20–50 KB per request, reduces CPU significantly. +**Effort:** 5 minutes. + +--- + +### Action 2: Cache Tool Schemas in ToolRegistry + +**File:** `src/Tool/ToolRegistry.php:67-103` +**Change:** Add private cache property; build once. + +```php +private ?array $cachedPrismTools = null; + +public function toPrismTools(): array +{ + if ($this->cachedPrismTools !== null) { + return $this->cachedPrismTools; + } + $tools = []; + foreach ($this->tools as $tool) { + $tools[] = $tool->toPrismTool(); // build + } + return $this->cachedPrismTools = $tools; +} +``` + +**Impact:** Saves 60–250 KB per subagent spawn. With 30 subagents, saves **1.8–7.5 MB**. +**Effort:** 10 minutes. + +--- + +### Action 3: Cache InstructionLoader Gather Result + +**File:** `src/Agent/InstructionLoader.php:26-85` +**Change:** Static cache in `gather()` method. + +```php +public static function gather(): string +{ + static ?string $cached = null; + if ($cached !== null) { + return $cached; + } + // ... existing file reads ... + return $cached = $result; +} +``` + +**Impact:** Eliminates 3–5 disk reads per session; saves 2–50 KB string allocations. +**Effort:** 5 minutes. + +--- + +### Action 4: Cache Git Root/Branch + +**Files:** +- `src/Agent/InstructionLoader.php:102` (gitRoot) +- `src/Agent/ProtectedContextBuilder.php:57` (gitBranch) + +**Change:** Add static cache variables. + +```php +public static function gitRoot(): string +{ + static ?string $root = null; + if ($root === null) { + $root = trim(shell_exec('git rev-parse --show-toplevel')); + } + return $root; +} +``` + +**Impact:** Eliminates 2 shell execs per turn. At 100 turns, saves 200 subprocesses and ~200 ms latency. +**Effort:** 5 minutes per method (10 total). + +--- + +### Action 5: Auto-Prune Completed Subagents + +**File:** `src/Agent/SubagentOrchestrator.php:245-258, 392-409` +**Change:** Call `pruneCompleted()` automatically after each agent reaches terminal state, or via periodic timer. + +```php +private function markCompleted(string $id, string $state): void +{ + $this->agents[$id]->setState($state); + $this->pruneCompleted(); // Add this line +} +``` + +Or add timer in `runAgent()`: +```php +EventLoop::repeat(10, fn() => $this->pruneCompleted()); +``` + +**Impact:** Prevents unbounded growth of `$agents`, `$stats`, `$pendingResults`. Saves ~500 bytes–1 KB per completed agent. +**Effort:** 15 minutes. + +--- + +### Action 6: Bulk Token Fetch + In-Memory Cache + +**File:** `src/LLM/Codex/SettingsCodexTokenStore.php:32-38, 63-85` +**Change:** Replace 7 individual SELECTs with single query; add 5-second cache. + +```php +private ?CodexToken $cached = null; +private int $cachedAt = 0; + +public function current(): CodexToken +{ + if ($this->cached && (time() - $this->cachedAt) < 5) { + return $this->cached; + } + $rows = $this->db->connection()->query( + "SELECT key, value FROM settings WHERE scope='global' AND key LIKE 'provider.codex.%'" + )->fetchAll(); + // build token from $rows... + $this->cached = $token; + $this->cachedAt = time(); + return $token; +} +``` + +**Impact:** Reduces token load from 7 DB round-trips to 1. Saves ~6 KB/request + connection pool pressure. +**Effort:** 20 minutes. + +--- + +### Action 7: Add LIMIT to MemoryRepository::forProject() + +**File:** `src/Session/MemoryRepository.php:65-88` +**Change:** Add `LIMIT 1000` to query as safety valve. + +```php +$stmt = $this->db->connection()->prepare( + "SELECT * FROM memories WHERE (project IS NULL OR project = ?) AND expires_at > ? ORDER BY pinned DESC, created_at DESC LIMIT 1000" +); +``` + +**Impact:** Caps RAM spike at ~50–100 MB even with 10k memories (vs 500 MB). Prevents OOM. +**Effort:** 5 minutes. + +--- + +### Action 8: Share HttpClient Across AsyncLlmClient Instances + +**File:** `src/LLM/AsyncLlmClient.php:73`, `src/Agent/SubagentFactory.php:127` +**Change:** Make HttpClient singleton; inject via container. + +```php +// In LlmServiceProvider: +$this->container->singleton(HttpClient::class, fn() => + (new HttpClientBuilder())->withPool(ConnectionLimitingPool::byAuthority(8))->build() +); +// In AsyncLlmClient constructor, accept HttpClient $httpClient +``` + +**Impact:** Saves 50–200 KB per subagent × N concurrent. Also limits total connections to 8, preventing socket exhaustion. +**Effort:** 20 minutes. + +--- + +### Action 9: Cancel TUI Timers on Teardown + +**Files:** +- `src/UI/Tui/TuiAnimationManager.php` — add `shutdown()` method +- `src/UI/Tui/TuiCoreRenderer.php` — call shutdown in `teardown()` +- `src/UI/Tui/SubagentDisplayManager.php` — ensure `cleanup()` called +- `src/UI/Tui/TuiToolRenderer.php` — ensure `clearToolExecuting()` called + +**Change (AnimationManager):** +```php +public function shutdown(): void +{ + if ($this->compactingTimerId !== null) { + EventLoop::cancel($this->compactingTimerId); + $this->compactingTimerId = null; + } + if ($this->thinkingTimerId !== null) { + EventLoop::cancel($this->thinkingTimerId); + $this->thinkingTimerId = null; + } +} +``` + +**Impact:** Releases closures pinning entire UI widget tree (potentially MBs). +**Effort:** 15 minutes. + +--- + +### Action 10: Move BashTool Timer Cancellation into finally + +**File:** `src/Tool/Coding/BashTool.php:87-112` +**Change:** Ensure timer cancelled even on exception. + +```php +$timerId = EventLoop::repeat($timeout, $checkTimeout); +try { + // ... existing code ... +} finally { + EventLoop::cancel($timerId); // Move here from after await +} +``` + +**Impact:** Prevents timer leak if process join throws. +**Effort:** 5 minutes. + +--- + +**Total immediate effort:** ~2–3 hours. +**Total immediate RAM savings:** ~5–15 MB per session + significant CPU/latency gains + security hardening. + +--- + +## 7. Short-Term Optimizations (1–2 Weeks) + +These require moderate effort (2–8 hours total) but yield substantial improvements. + +### Optimization 1: Permission Decision Memoization + +**File:** `src/Tool/Permission/PermissionEvaluator.php:26-49` +**Add:** `$decisionCache = []` property. In `evaluate()`: + +```php +$key = md5($toolName . serialize($args)); +if (isset($this->decisionCache[$key])) { + return $this->decisionCache[$key]; +} +$result = $this->evaluateChain($toolName, $args); +$this->decisionCache[$key] = $result; +return $result; +``` + +Invalidate in `resetGrants()`: `$this->decisionCache = [];` + +**Impact:** Avoids re-running full permission chain for repeated tool+args. Saves 30–50% permission check time. Est. memory 10–200 KB (bounded by session patterns). +**Effort:** 20 minutes. + +--- + +### Optimization 2: Path Resolution Cache + +**File:** `src/Tool/Permission/PathResolver.php:21-39` +**Add:** Static cache array. + +```php +private static array $cache = []; + +public static function resolve(string $path): ?string +{ + $real = realpath($path); + self::$cache[$path] = $real; + return $real; +} +``` + +**Impact:** Eliminates duplicate `realpath()` syscalls. Saves 25–100 KB/request. +**Effort:** 10 minutes. + +--- + +### Optimization 3: Avoid Full Config Reload on Write + +**File:** `src/Settings/SettingsManager.php:266-274` +**Change:** Instead of `ConfigLoader::load()`, update `$this->config` incrementally using `$data` from `configTarget()`. + +```php +private function reloadRepository(): void +{ + // Instead of full reload, just update the specific scope/key that changed + // $this->config is a Repository; use $this->config->set($key, $value) directly + // Or if full reload unavoidable, cache parsed YAML by mtime +} +``` + +**Impact:** Reduces write amplification from 5 parses (~100–150 KB churn) to near-zero. +**Effort:** 30 minutes (needs careful handling of merged configs). + +--- + +### Optimization 4: YAML Parse Cache + +**File:** `src/ConfigLoader.php` or `src/Settings/YamlConfigStore.php:23-35` +**Add:** Static cache keyed by `realpath($path) . filemtime($path)`. + +```php +private static array $cache = []; + +public function load(string $path): array +{ + $key = realpath($path) . ':' . filemtime($path); + if (isset(self::$cache[$key])) { + return self::$cache[$key]; + } + $data = Yaml::parseFile($path); + return self::$cache[$key] = $data; +} +``` + +**Impact:** Eliminates redundant parses across multiple `get()` calls. Saves 50–100 KB per settings access. +**Effort:** 20 minutes. + +--- + +### Optimization 5: Cache Provider Instances + +**File:** `src/LLM/RelayProviderRegistrar.php:42-117` +**Add:** `$instances = []` property; return cached if already resolved. + +```php +private array $instances = []; + +public function resolve(string $provider): Provider +{ + if (isset($this->instances[$provider])) { + return $this->instances[$provider]; + } + // ... create instance ... + return $this->instances[$provider] = $providerInstance; +} +``` + +**Impact:** Saves 200–500 bytes per provider call; reduces credential fetch overhead. +**Effort:** 10 minutes. + +--- + +### Optimization 6: Truncate Task Tree Rendering + +**File:** `src/Agent/TaskStore.php` (locate `renderTree()`) +**Add:** Configurable limit: `max_tasks: 50` or `max_chars: 10240`. + +```php +public function renderTree(): string +{ + $maxTasks = 50; + $tasks = array_slice($this->tasks, 0, $maxTasks); + // render only $tasks + if (count($this->tasks) > $maxTasks) { + $output .= "\n... truncated " . (count($this->tasks) - $maxTasks) . " tasks"; + } + return $output; +} +``` + +**Impact:** Bounds system prompt growth from task tree. Prevents unbounded context consumption. +**Effort:** 20 minutes. + +--- + +### Optimization 7: Stream BashTool/GrepTool Output + +**Files:** +- `src/Tool/Coding/BashTool.php:96-108` +- `src/Tool/Coding/GrepTool.php:68-78` + +**Change:** Process output incrementally via `onRead()` callback, writing directly to `OutputTruncator` stream with line/byte limits enforced during read, not after. + +```php +$truncator = new OutputTruncator(2000, 50 * 1024); +$process = Process::run($command, [ + 'onRead' => function(string $chunk) use ($truncator) { + $truncator->write($chunk); // truncates incrementally + } +]); +$output = $truncator->getOutput(); // already truncated +``` + +**Impact:** Prevents 100 MB RAM spikes from large command outputs. Memory bounded by truncation limits from first byte. +**Effort:** 2 hours. + +--- + +### Optimization 8: LRU Eviction for FileReadTool Cache + +**File:** `src/Tool/Coding/FileReadTool.php:21,70-72` +**Add:** Max entries (e.g., 1000) with LRU eviction using `SplDoublyLinkedList` as LRU list. + +```php +private array $readCache = []; +private SplDoublyLinkedList $lruList; +private int $maxEntries = 1000; + +public function read(string $path, ?int $offset = null, ?int $limit = null): string +{ + $key = $this->cacheKey($path, $offset, $limit); + if (isset($this->readCache[$key])) { + // Move to front of LRU + $this->lruList->unshift($key); + return $this->readCache[$key]; + } + // ... read file ... + if (count($this->readCache) >= $this->maxEntries) { + $oldest = $this->lruList->pop(); + unset($this->readCache[$oldest]); + } + $this->readCache[$key] = $content; + $this->lruList->unshift($key); + return $content; +} +``` + +**Impact:** Bounds long-run growth; prevents 10 MB+ cache bloat in exploratory sessions. +**Effort:** 45 minutes. + +--- + +### Optimization 9: Memory Selection Caching Per Turn + +**File:** `src/Agent/ContextManager.php` (or wherever `selectRelevantMemories` called) +**Add:** Property `$memoryCache = []` keyed by query/round. Populate on first call per LLM round; reuse for subsequent calls within same round. + +```php +private array $memoryCache = []; + +private function selectMemories(string $query, int $round): array +{ + $key = md5($query . ':' . $round); + if (!isset($this->memoryCache[$key])) { + $this->memoryCache[$key] = $this->sessionManager->selectRelevantMemories($query); + } + return $this->memoryCache[$key]; +} +``` + +**Impact:** Avoids re-scoring same memories 3–4× per turn. Sorts O(N log N) repeated work. With 1000 memories, saves ~10k comparisons × 3 = 30k ops/turn. +**Effort:** 20 minutes. + +--- + +### Optimization 10: Periodic Subagent Cleanup for Headless Agents + +**File:** `src/Agent/SubagentOrchestrator.php:245-258` +**Add:** Timer-based cleanup in addition to on-demand. + +```php +EventLoop::repeat(30, function() { + $this->pruneCompleted(); +}); +``` + +**Impact:** Frees subagent memory sooner in long-running headless sessions where parent may not call `injectPending...` frequently. Saves ~1 KB/subagent sooner. +**Effort:** 15 minutes. + +--- + +**Total short-term effort:** ~8–12 hours. +**Total short-term RAM reduction:** ~10–30 MB per session + bounded growth + CPU savings. + +--- + +## 8. Long-Term Architectural Improvements (1–3 Months) + +These require design changes, migrations, or significant refactoring. + +### Improvement 1: Push Memory Scoring into SQL + +**Files:** `src/Session/MemoryRepository.php`, `src/Agent/MemorySelector.php` +**Current:** `forProject()` fetches all rows → `MemorySelector::select()` scores in PHP with O(N log N) sort → returns top 6. +**Proposed:** Compute score in SQL: + +```sql +SELECT *, + (CASE + WHEN pinned = 1 THEN 1000 ELSE 0 + + (strlen(content) * 0.1) + + (created_at > ? ?) + END) AS relevance_score +FROM memories +WHERE (project IS NULL OR project = ?) AND expires_at > ? +ORDER BY relevance_score DESC, created_at DESC +LIMIT 6; +``` + +**Impact:** Eliminates O(N) memory load and sort. RAM per round drops from O(all memories) to O(6). With 10k memories, saves **100–500 MB per round**. +**Effort:** 2–3 hours (SQL expression tuning, testing edge cases). + +--- + +### Improvement 2: Task Eviction Policy & Centralized Edge Storage + +**Files:** `src/Task/TaskStore.php` +**Changes:** +1. Add `max_tasks` config (default 100). When adding exceeds limit, remove oldest non-terminal tasks (status != 'done'). +2. Replace per-task `blockedBy`/`blocks` arrays with central adjacency map: + +```php +private array $edges = [ + 'blocks' => ['fromId' => ['toId1', 'toId2']], + 'blockedBy' => ['toId' => ['fromId1', 'fromId2']] +]; +``` + +Derive per-task views on demand or maintain denormalized caches. + +**Impact:** +- Bounds task memory (100 tasks × 300 bytes = 30 KB max). +- ~50% edge memory reduction (no duplicate storage). +- Easier cleanup (single map vs scattered arrays). +**Effort:** 3–4 hours (migration, testing). + +--- + +### Improvement 3: Database Index Overhaul + +**File:** `src/Session/Database.php` (migrations) +**Add indexes:** + +```sql +-- For MemoryRepository::forProject() +CREATE INDEX idx_memories_lookup ON memories(project, memory_class, type, expires_at, pinned DESC, created_at DESC); + +-- For SessionRepository::listByProject() +CREATE INDEX idx_sessions_proj_updated ON sessions(project, updated_at DESC); + +-- For MessageRepository::searchProjectHistory() (FTS5) +CREATE VIRTUAL TABLE messages_fts USING fts5(content, content='messages', content_rowid='id'); +``` + +**Impact:** +- Speeds up `forProject()` and `search()` by 10–100×. +- Reduces rows scanned → less memory loaded. +- FTS5 enables full-text search without full scan. +**Effort:** 1 hour (migration + query updates). + +--- + +### Improvement 4: Container Compilation & Opcache Warmup + +**Files:** `composer.json`, `bin/kosmokrator` +**Changes:** +1. Run `composer install --optimize-autoloader --classmap-authoritative` (already has optimize-autoloader). +2. Generate compiled container: `php artisan optimize` (if using Laravel) or implement Symfony-style `ContainerBuilder` dump. +3. Warm opcache in production: `php -d opcache.enable_cli=1 bin/kosmokrator ...` + +**Impact:** Reduces boot memory by 30–50% (fewer class maps, no runtime compilation). Boot time faster. +**Effort:** 1–2 hours setup + CI integration. + +--- + +### Improvement 5: Worker Pooling for Audio Notifications + +**Files:** `src/Audio/CompletionSound.php`, `src/Audio/compose_worker.php` +**Design:** +- Start single long-lived `compose_worker.php` process at first notification. +- Communicate via JSON over stdin/stdout or Unix socket. +- Worker stays alive, processes multiple composition requests sequentially. +- Parent sends `{"prompt": "...", "callback": "..."}`; worker returns script path. + +**Impact:** Avoids 2× kernel boot per notification. For 100 notifications, saves **5–10 GB** of cumulative allocation (though not simultaneous). Reduces GC pressure. +**Effort:** 4–5 hours (IPC, protocol, lifecycle management). + +--- + +### Improvement 6: Incremental Prompt Assembly Cache + +**File:** `src/Agent/ContextManager.php:257-289` +**Design:** Introduce `PromptCache` object storing: +- Stable base prompt (instructions + environment + tool schemas) +- Mode suffix (constant) +- Only rebuild volatile parts (memories, task tree) each turn + +```php +class PromptCache { + private string $base; + private array $toolSchemas; // shared reference + public function build(array $memories, string $taskTree): string { + return $this->base . $this->formatMemories($memories) . $taskTree; + } +} +``` + +**Impact:** Reduces per-turn string allocations from ~10–50 KB to ~2–5 KB. Eliminates repeated `implode()` of static parts. +**Effort:** 3 hours (design + implementation + testing). + +--- + +### Improvement 7: Generator-Based Streaming for Large DB Results + +**Files:** `src/Session/MessageRepository.php`, `src/Session/MemoryRepository.php` +**Change:** Replace `fetchAll()` with generator: + +```php +public function streamActive(string $sessionId): Generator +{ + $stmt = $this->db->connection()->prepare( + "SELECT * FROM messages WHERE session_id = ? AND compacted = 0 ORDER BY id ASC" + ); + $stmt->execute([$sessionId]); + while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) { + yield $row; + } +} +``` + +Callers can iterate without full array materialization. + +**Impact:** For 10k messages, peak memory drops from 10–50 MB to O(1) per row during iteration. Useful for export/analysis commands. +**Effort:** 2 hours (update all callers). + +--- + +### Improvement 8: Full-Text Search (FTS5) for Memories + +**File:** Database migration + `src/Session/MemoryRepository.php:160-201` +**Change:** Create virtual table `memories_fts` on `(title, content)`. Rewrite `search()` to use `MATCH` instead of `LIKE`. + +```sql +CREATE VIRTUAL TABLE memories_fts USING fts5(title, content, content='memories', content_rowid='id'); +-- Populate via triggers or batch +SELECT m.* FROM memories m +JOIN memories_fts fts ON m.id = fts.rowid +WHERE memories_fts MATCH ? +ORDER BY rank LIMIT 20; +``` + +**Impact:** Full-text search becomes index-based, not full scan. Faster + less memory. +**Effort:** 3 hours (migration, trigger setup, query rewrite). + +--- + +### Improvement 9: Task Tree Segmentation & Archival + +**File:** `src/Agent/TaskStore.php` +**Design:** Split tasks into "active" (last N) and "archived" (summarized). Render only active. Archive old tasks via compaction-like process (summarize completed subtasks into parent description). + +**Impact:** Prevents unbounded task tree growth. Keeps system prompt size bounded. Aligns with history compaction philosophy. +**Effort:** 3–4 hours (archival logic, summarization LLM call). + +--- + +### Improvement 10: Benchmark Suite Completion + +**Files:** Create all benchmark scripts in `docs/ram-audit/benchmarks/` (see Section 9). +**Effort:** 4–6 hours total to write, run, and document baseline. + +--- + +**Total long-term effort:** ~20–30 hours (spread over 1–3 months). +**Total long-term RAM reduction:** ~100–500 MB for large sessions + bounded growth + scalability. + +--- + +## 9. Benchmark Suite Summary + +**Status:** No benchmark files were created during this audit (agents operated in read-only mode). The following suite is **recommended for implementation** to establish baselines and validate fixes. + +### Recommended Benchmark Files + +| File | Purpose | Key Metrics | +|------|---------|-------------| +| `db-connection-memory.php` | Connection lifecycle, singleton reuse | Per-connection memory delta, GC retention after `unset()` | +| `agent-loop-memory.php` | 100/500/1000 turns with 3 tools/turn | Memory growth curve, compaction triggers, GC cycles | +| `subagent-memory.php` | Spawn 10/30/100 concurrent subagents | Per-agent overhead, total peak, isolation | +| `tool-memory.php` | Concurrent tool execution, large file I/O | Tool-specific spikes, cache growth (FileReadTool) | +| `async-memory.php` | 100/500/1000 concurrent promises | Per-promise overhead, Fiber stack size, event loop memory | +| `caching-memory.php` | Repeated token estimation, model resolution | Cache hit/miss impact, memory vs compute tradeoff | +| `datastructure-memory.php` | Array merge patterns, JSON encoding | Temporary allocation peaks, copy-on-write | +| `ui-memory.php` | TUI/ANSI render cycles, animation frames | Render buffer growth, timer retention, widget tree | +| `audio-memory.php` | 10/50/100 rapid completion sounds | Worker process memory, IPC overhead, zombie risk | +| `session-memory.php` | 1k/5k/10k session creations, message inserts | DB fetch strategies, connection reuse, fetchAll vs streaming | + +### Measurement Protocol + +1. Use `memory_get_peak_usage(true)` (real peak) before/after each operation. +2. Run each scenario 5×, report median and max to smooth GC variance. +3. Force `gc_collect_cycles()` between iterations to measure steady-state. +4. Profile with `xhprof` or `tideways` if available for callgrind analysis. +5. For async operations, measure before/after `await` and after GC. + +### Baseline Targets (To Be Established) + +After implementing immediate actions, expect: +- **Per-request RAM churn** reduced from ~200–400 KB to ~50–100 KB (security/caching fixes). +- **Subagent memory** reduced by 1.8–7.5 MB (tool schema cache). +- **MemoryRepository per-round** from O(N) to O(1) after SQL scoring (long-term). +- **Task memory** bounded to ~30–50 KB max (eviction policy). +- **Boot memory** from ~20–40 MB to ~12–20 MB (container compilation + lazy services). + +--- + +## 10. Monitoring Recommendations + +### Runtime Metrics to Track + +1. **Memory usage by component** (via custom stats): + - `ConversationHistory::count()` and estimated size + - `SubagentOrchestrator::count()` active + completed + - `TaskStore::count()` tasks + - `FileReadTool::cacheSize()` entries + - `MemoryRepository::count()` total memories + +2. **GC activity**: + - `gc_collected_cycles()` count + - `gc_mem_caches()` — memory in caches + - Monitor frequency; high GC cycles indicate allocation churn. + +3. **Database query patterns**: + - Count of `MemoryRepository::forProject()` calls per turn + - Rows returned per call (log if >1000) + - Query time (should be <10 ms with indexes) + +4. **Permission evaluation**: + - Number of permission checks per tool call + - Time spent in `PermissionEvaluator::evaluate()` + - Cache hit rate (if memoization added) + +5. **Async resources**: + - Active timers count (via `EventLoop::getRunningTimers()` if accessible) + - Open connections in HTTP pool + - Pending futures in `SubagentOrchestrator` + +6. **File system**: + - Number of open `ShellSession` instances + - Shell session buffer sizes + - Temp file count (audio, edit operations) + +### Alert Thresholds + +| Metric | Warning | Critical | +|--------|---------|----------| +| Process RSS | > 200 MB | > 500 MB | +| ConversationHistory messages | > 500 | > 1000 | +| SubagentOrchestrator agents (total) | > 50 | > 100 | +| TaskStore tasks | > 100 | > 200 | +| MemoryRepository memories (project) | > 5000 | > 10000 | +| FileReadTool cache entries | > 5000 | > 10000 | +| GC cycles per minute | > 1000 | > 5000 | +| Permission checks per second | > 100 | > 500 (possible DoS) | + +### Logging Recommendations + +- Add debug logs to `PermissionRule::matchesGlob()` counting compilations vs cache hits (after fix). +- Log `MemoryRepository::forProject()` row count when >1000. +- Log subagent spawn/completion with memory delta. +- Log task creation/removal with count. +- Log cache misses for token fetch, model resolution. + +### Profiling in Production + +- Use `php -d opcache.enable_cli=1` with `opcache_get_status()` to monitor opcode memory. +- Consider `tideways` or `blackfire` for periodic profiling (low overhead). +- Export metrics to statsd/Prometheus if available (not currently integrated). + +--- + +## Conclusion + +KosmoKrator's RAM efficiency profile is **mixed**: core memory management (history compaction, subagent isolation) is well-designed, but **systematic caching omissions** and **unbounded accumulations** create significant avoidable memory pressure. The most severe issues are: + +1. **Permission system** — regex recompilation, duplicate evaluation, no caching — critical for both performance and security. +2. **Subagent orchestrator** — unbounded retention of completed agent data — classic memory leak pattern. +3. **Memory repository** — full table scans on every LLM round — O(N) in PHP instead of SQL. +4. **Task system** — no eviction, 30fps re-renders — does not scale. +5. **HTTP connection pools** — one per subagent — resource waste. +6. **Prompt construction** — instruction re-reads, tool schema duplication, git shell calls — constant overhead. + +**Immediate actions** (10 items, ~2–3 hours total) will yield 5–15 MB savings per session and eliminate the most egregious waste. **Short-term optimizations** (10 items, ~8–12 hours) will further reduce churn and bound growth. **Long-term architectural improvements** (10 items, ~20–30 hours) are necessary for scalability to large sessions (1000+ messages, 100+ tasks, 10k memories). + +The **benchmark suite** must be created and baseline established before and after fixes to quantify impact and guard against regressions. **Monitoring** should be added to track memory hotspots in production. + +**Priority:** Implement all Immediate Actions within 48 hours. Then tackle Short-Term Optimizations over the next 1–2 weeks. Schedule Long-Term improvements for next sprint cycle. + +--- + +**Report Compiled By:** KosmoKrator General Agent (RAM Efficiency Audit) +**Source Synthesis Files:** +- `docs/ram-audit/synthesis-security.md` +- `docs/ram-audit/synthesis-core-agent.md` +- `docs/ram-audit/synthesis-io-performance.md` +- `docs/ram-audit/synthesis-architecture.md` + +**Additional Agent Contributions:** +- database-connection-pooling +- model-catalog-pricing +- caching-strategies-gaps +- data-structure-optimization +- php-internals-memory +- async-event-loop-memory +- kernel-bootstrap +- audio-notifications +- session-persistence + +**Final Deliverable:** `docs/ram-audit/RAM-EFFICIENCY-AUDIT.md` +**Absolute Path:** `/Users/rutger/Projects/kosmokrator/docs/ram-audit/RAM-EFFICIENCY-AUDIT.md` diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-architecture.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-architecture.md new file mode 100644 index 0000000..ac26a38 --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-architecture.md @@ -0,0 +1,370 @@ +# Architecture Memory Efficiency Report + +**Project:** KosmoKrator — AI coding agent for the terminal +**Audit Scope:** Subagent orchestration, event propagation, service container, task tracking, memory repository patterns +**Date:** 2026-04-03 +**Status:** Phase 1 Synthesis + +--- + +## Executive Summary + +This report synthesizes RAM efficiency findings from five Phase 1 audit agents covering core architectural subsystems. The analysis reveals **critical memory inefficiencies** in two areas: **subagent orchestration** (unbounded retention of completed agent data) and **memory repository** (unbounded database fetches). The **task tracking system** shows moderate issues with unbounded growth and high-frequency re-renders. The **event system** is exemplary — minimal overhead, tiny payloads, single listener. The **service container** pattern avoids per-subagent bootstrapping but has minor duplication of stateless components. + +**Overall Risk Assessment:** 🔴 **HIGH** — Two critical leaks can cause unbounded RAM growth in long-running sessions; one high-risk database pattern loads all memories on every LLM round. + +**Key Metrics:** +- **Subagent orchestrator:** Retains completed agent futures & stats indefinitely; group semaphores accumulate; background results held until manual collection. +- **Memory selection:** Fetches entire `memories` table on every context rebuild (O(N) per LLM round), scores all in-memory, then discards — repeated 3–4× per user turn. +- **Task system:** Full tree re-render at 30fps in TUI mode; no eviction policy; stale dependency edges retained after task removal. +- **Event system:** ~28 bytes per dispatched event; single listener; negligible overhead. + +--- + +## Findings (Severity) + +### 🔴 Critical + +| # | Component | Issue | Impact | File:Line | +|---|-----------|-------|--------|-----------| +| C1 | SubagentOrchestrator | Completed agent futures & stats retained indefinitely; `pruneCompleted()` never auto-called | Unbounded RAM growth with agent count; each entry ~200–500 bytes + future closure overhead | `src/Agent/SubagentOrchestrator.php:392-409` (prune exists but not invoked) | +| C2 | SubagentOrchestrator | Group semaphores (`$groups`) created per unique group name, never removed | Unbounded growth if group names are dynamic (e.g., per-task groups) | `src/Agent/SubagentOrchestrator.php:471` | +| C3 | SubagentOrchestrator | Background agent results in `$pendingResults` cleared only via explicit `collectPendingResults()` | Accumulates if parent never collects; each result string can be KBs | `src/Agent/SubagentOrchestrator.php:??` | +| C4 | MemoryRepository | `forProject()` loads **all** memory rows into PHP on every call (no LIMIT) | With 10k memories: 100–500 MB per fetch; called on every LLM round (3–4×/turn) | `src/Session/MemoryRepository.php:65-88` | +| C5 | TaskStore | No task eviction policy; tasks accumulate until manual `/tasks clear` or REPL prompt | Unbounded growth; each task ~200–300 bytes + edge arrays | `src/Task/TaskStore.php:14-356` | +| C6 | TaskStore | `clearTerminal()` / `clearAll()` do **not** purge stale IDs from other tasks' `blockedBy`/`blocks` arrays | Memory leak: dangling references accumulate across clear cycles | `src/Task/TaskStore.php:??` | + +### 🟠 High + +| # | Component | Issue | Impact | File:Line | +|---|-----------|-------|--------|-----------| +| H1 | MemorySelector | Re-scores entire memory set on every LLM round; no caching | O(N log N) repeated work; with 1000 memories, ~10k comparisons per round × 3–4 rounds/turn | `src/Agent/MemorySelector.php:29-38` | +| H2 | TaskStore | TUI task bar re-renders full tree at ~30fps during active phases (every 33ms) | 3,000+ node visits/sec for 100 tasks; high allocation/GC pressure | `src/UI/Tui/TuiCoreRenderer.php:643-681`, `src/UI/Tui/TuiAnimationManager.php:378-420` | +| H3 | Database | Missing indexes on `memories` table: `memory_class`, `type`, `(pinned, created_at)`, `expires_at` | Full table scans for every `forProject()` and `search()`; CPU + memory pressure | `src/Session/Database.php:128` | +| H4 | TaskStore | Bidirectional edge storage duplicates every dependency (2× memory) | ~50% edge memory overhead vs central adjacency list | `src/Task/TaskStore.php:62-84` | + +### 🟡 Medium + +| # | Component | Issue | Impact | File:Line | +|---|-----------|-------|--------|-----------| +| M1 | SubagentFactory | Stateless `ContextPruner` & `ToolResultDeduplicator` instantiated per subagent unnecessarily | Minor per-agent overhead (~negligible but wasteful) | `src/Agent/SubagentFactory.php:90-103` | +| M2 | SubagentOrchestrator | `$stats->dependsOn` arrays grow O(N) but not pruned | Small but unbounded; ~8 bytes per parent ID × N | `src/Agent/SubagentStats.php:??` | +| M3 | TaskStore | `roots()` and `children()` scan entire task set each call (O(n)) | Inefficient for large n; could be indexed | `src/Task/TaskStore.php:??` | +| M4 | MemoryRepository | `fetchAll()` used everywhere — entire result set materialized even if only a few rows needed | Memory spike for large queries; streaming not used | `src/Session/MemoryRepository.php:??` | +| M5 | Event system | 5 unused event classes (ResponseCompleteEvent, StreamChunkEvent, etc.) | Code bloat only; zero runtime cost | `src/Agent/Event/*.php` | + +### 🟢 Low / Informational + +| # | Component | Note | Impact | +|---|-----------|------|--------| +| L1 | TaskStore | `toDetail()` JSON-encodes metadata; could be large if metadata contains big structures | Only when explicitly called | +| L2 | TaskStore | Subject truncation only in ANSI render; plain text shows full subject | Minor display inconsistency | +| L3 | Event system | `TokenTrackingListener` state persists session-wide; integers could overflow in theory (practically impossible) | None | +| L4 | SubagentOrchestrator | `totalTokens()` iterates all stats on demand — O(n) but acceptable | None | + +--- + +## Memory Hotspots (file:line + estimates) + +### Subagent Orchestration (`src/Agent/`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| Completed agent futures array | `SubagentOrchestrator.php:??` | ~500 bytes/agent + closure capture | Grows unbounded; primary leak | +| Completed stats array | `SubagentOrchestrator.php:??` | ~300–500 bytes/agent | Mirrors `$agents` | +| Group semaphores | `SubagentOrchestrator.php:471` | ~100–200 bytes/group | Accumulates with unique group names | +| Pending background results | `SubagentOrchestrator.php:??` | Size of result string (KB) per background agent | Held until parent collects | +| Per-agent ConversationHistory | `AgentLoop.php:??` | Grows with message count; ~100–1000+ bytes/message | Freed when AgentLoop GC'd (if future not retained) | +| Per-agent LLM client | `SubagentFactory.php:??` | ~few KB (HTTP client, listeners) | New per subagent; intentional isolation | + +### Event System (`src/Agent/Event/`, `src/Kernel.php`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| Dispatched event objects | `AgentLoop.php:184,213,245,344,401,462,816,829` | ~28 bytes/event | 8–9 events per typical run; negligible | +| TokenTrackingListener state | `Listener/TokenTrackingListener.php:??` | 4× int = 32 bytes + object header | Accumulates counts only; no per-event storage | + +### Task Tracking (`src/Task/`, `src/UI/`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| Task objects array | `TaskStore.php:14` | ~200–300 bytes/task + edges | Unbounded; no eviction | +| Edge arrays (blockedBy/blocks) | `Task.php:??` | ~8 bytes/edge × 2 (bidirectional) | Duplicate storage; stale IDs never purged | +| TUI task bar render buffer | `TuiCoreRenderer.php:643-681` | Full tree string + ANSI codes | Rebuilt every 33ms; ~10–100 KB per render depending on tree size | +| Full tree render (per call) | `TaskStore.php:174-186, 219-287` | O(n) string allocation | Called on every task tool and TUI refresh | + +### Memory Repository (`src/Session/`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| `forProject()` result set | `MemoryRepository.php:65-88` | **All rows** — 10k memories = 100–500 MB | Called on every LLM round via `SessionManager::getMemories()` | +| In-memory memory array (during selection) | `SessionManager.php:276-281` | Full memory set duplicated in PHP array | Held during `MemorySelector::select()` sort | +| `usort()` temporary arrays | `MemorySelector.php:29-38` | O(N) additional zvals | Sorting overhead doubles memory footprint temporarily | +| Uncapped search result formatting | `MemorySearchTool.php:104` | Full content of each memory echoed | Limited to 20 results but each could be large | + +--- + +## Architectural Concerns + +### 1. Subagent Orchestration: Lifecycle & Retention Policy + +**Current design:** The `SubagentOrchestrator` acts as a global registry for all agents spawned in a session. It stores: +- `$agents`: Future objects keyed by agent ID +- `$stats`: SubagentStats objects keyed by agent ID +- `$pendingResults`: Background results keyed by parent ID +- `$groups`: Semaphore objects keyed by group name + +**Concern:** No automatic cleanup. The orchestrator lives for the entire session. Completed agents are never pruned unless some external code calls `pruneCompleted()`. In practice, this never happens automatically. This turns the orchestrator into an **unbounded accumulation vector**. + +**Why it matters:** In a long-running session with many subagent spawns (e.g., iterative planning, recursive decomposition), the `$agents` and `$stats` arrays grow linearly. While each entry is small, the cumulative effect over hours/days can be tens of MB. More importantly, the `$pendingResults` for background agents can hold large output strings indefinitely. + +**Secondary concern:** Group semaphores are created on first use and never destroyed. If group names are dynamic (e.g., per-task or per-context), this creates another unbounded array. + +**Pattern assessment:** The orchestrator is a **global mutable registry** with no TTL, no weak references, no size limits. This is a classic memory leak pattern. + +--- + +### 2. Memory Selection: N+1 Fetch & Repeated Scoring + +**Current design:** Every time the LLM is called (3–4 times per user turn due to tool calls), the system: +1. Calls `SessionManager::getMemories()` → `MemoryRepository::forProject()` → `SELECT * FROM memories` (no LIMIT, no filters pushed down) +2. Fetches **all** memory rows into PHP (could be thousands) +3. Scores each memory against the current query using `MemorySelector::select()` (O(N log N) sort) +4. Takes top 6 and injects into context +5. Discards the full set until next round + +**Concern:** This is an **N+1 query problem** compounded by **repeated full-table scans and in-memory sorts**. With 1000 memories, each round loads 1000 rows, scores them, and throws them away — repeated 3–4 times per turn. That's 3000–4000 full scans per user interaction. + +**Why it matters:** RAM spikes from loading all rows; CPU waste from repeated scoring; no caching. As memory count grows, latency and memory pressure grow superlinearly due to sort. + +**Pattern assessment:** Anti-pattern: **fetch-all-then-score-in-application** instead of **filter-score-limit in database**. The database is perfectly capable of sorting and limiting if scoring is expressed as an ORDER BY expression. + +--- + +### 3. Task System: In-Memory Graph with No Eviction + +**Current design:** Tasks are stored in a simple associative array (`TaskStore::$tasks`). There is: +- No persistence (tasks lost on restart) +- No eviction policy (only manual `/tasks clear` or REPL-triggered `clearTerminal()`) +- No pagination or depth limits +- Bidirectional edge storage (duplicate arrays) +- Full tree re-render on every task operation and at 30fps in TUI + +**Concern:** The task system is designed for **small-scale, short-lived sessions**. For complex multi-agent workflows generating 100+ tasks, memory and CPU usage become excessive due to: +- O(n) full scans for `roots()`, `children()`, `renderTree()` +- O(n²) worst-case rendering if many blockers per task +- 30fps re-renders = thousands of node visits/sec +- Stale edge references never cleaned up on task removal + +**Why it matters:** KosmoKrator is meant for complex coding tasks that may generate many subtasks. The current implementation does not scale. + +**Pattern assessment:** In-memory graph with linear scans is acceptable for <50 nodes but needs indexing/eviction for production-scale use. + +--- + +### 4. Event System: Minimalist & Efficient + +**Current design:** Events are small, immutable DTOs. Only 3 events are actually dispatched (carrying aggregated metrics). Dispatcher has a single listener (`TokenTrackingListener`). Dispatch is synchronous, immediate. + +**Assessment:** This is **architecturally sound**. No buffering, no async overhead, no payload duplication. The event system is a non-issue from a RAM perspective. + +**Minor note:** 5 unused event classes exist but are dead code — harmless but could be removed for cleanliness. + +--- + +### 5. Service Container: Factory Pattern Avoids Per-Agent Bootstrapping + +**Current design:** `SubagentFactory` receives shared services via constructor (ToolRegistry, ModelCatalog, etc.). It constructs a fresh `AgentLoop` per subagent but passes shared services. No per-agent service container is created. + +**Assessment:** This is **efficient**. Avoids the overhead of a full DI container per subagent. The object graph is lean. + +**Minor duplication:** `ContextPruner` and `ToolResultDeduplicator` are stateless but instantiated per `AgentLoop`. They could be shared singletons injected once into the factory. + +--- + +## Recommendations + +### Immediate (Priority 1 — Critical Leaks) + +#### R1. Auto-prune completed subagents +- **Where:** `SubagentOrchestrator` +- **What:** Call `pruneCompleted()` automatically after each agent finishes or via a periodic timer (e.g., every 10 completions). +- **Alternative:** Use `WeakReference` for `$agents` entries if parent might still await results; but explicit prune is simpler. +- **Impact:** Prevents unbounded growth of `$agents`, `$stats`, `$pendingResults`. + +#### R2. Clean up group semaphores +- **Where:** `SubagentOrchestrator` +- **What:** Track reference count per group; when the last agent in a group completes, `unset($this->groups[$group])`. +- **Impact:** Prevents semaphore accumulation from dynamic group names. + +#### R3. Auto-collect background results on parent completion +- **Where:** `SubagentOrchestrator::runAgent()` (where background mode is handled) +- **What:** When a parent agent finishes, automatically call `collectPendingResults($parentId)` to free result strings. +- **Impact:** Prevents large result strings from lingering. + +#### R4. Fix unbounded memory fetch +- **Where:** `MemoryRepository::forProject()` and `SessionManager::getMemories()` +- **What:** Replace `SELECT *` with a **LIMIT** or **cursor-based streaming** for full scans. Better: push scoring into SQL. +- **Short-term:** Add `? LIMIT 1000` to `forProject()` to cap rows; log warning if truncated. +- **Long-term:** Implement SQL-based scoring: `SELECT *, (CASE ...) AS score FROM memories WHERE … ORDER BY score DESC LIMIT 6`. +- **Impact:** Reduces per-round RAM from O(all memories) to O(selected memories). + +#### R5. Add task eviction policy +- **Where:** `TaskStore` +- **What:** Add configurable `max_tasks` (e.g., 100) with LRU eviction. When adding a task exceeds limit, remove oldest non-terminal tasks. +- **Alternative:** Auto-clear completed tasks after each tool call (not just at REPL prompt). +- **Impact:** Bounds task memory; prevents unbounded accumulation. + +#### R6. Purge stale dependency edges +- **Where:** `TaskStore::clearTerminal()` and `TaskStore::clearAll()` +- **What:** After removing tasks, walk all remaining tasks and filter `blockedBy`/`blocks` arrays to remove IDs not in `$this->tasks`. +- **Impact:** Prevents stale ID accumulation; reduces array bloat over time. + +--- + +### High Priority (Priority 2 — Performance & Scaling) + +#### R7. Debounce TUI task bar refresh +- **Where:** `TuiAnimationManager` (breathing timer) and `TuiCoreRenderer::refreshTaskBar()` +- **What:** Reduce refresh rate from 30fps (33ms) to 5–10fps (100–200ms) during breathing animation. Use dirty flag: only re-render if task tree changed. +- **Impact:** Cuts node visits/sec by 3–6×; reduces allocation/GC pressure. + +#### R8. Add database indexes for memories +- **Where:** `src/Session/Database.php` (migration/schema) +- **What:** Add composite index: + ```sql + CREATE INDEX idx_memories_lookup ON memories(project, memory_class, type, expires_at, pinned DESC, created_at DESC); + ``` +- Also add single-column indexes on `memory_class` and `type` if composite not feasible. +- **Impact:** Speeds up `forProject()` and `search()`; reduces rows scanned → less memory loaded. + +#### R9. Cache memory selection per turn +- **Where:** `ContextManager` +- **What:** Add property `$memoryCache = []` keyed by query string; populate on first `selectRelevantMemories()` call per LLM round; reuse for subsequent calls within same round. +- **Impact:** Avoids re-scoring same memories multiple times per turn (3–4× reduction). + +#### R10. Centralize edge storage (optional) +- **Where:** `TaskStore` +- **What:** Replace per-task `blockedBy`/`blocks` arrays with a central adjacency map: `$edges = ['blocks' => ['from' => ['to1', 'to2']], 'blockedBy' => …]`. Derive per-task views on demand or maintain denormalized caches. +- **Impact:** ~50% edge memory reduction; easier cleanup; but adds complexity. + +--- + +### Medium Priority (Priority 3 — Cleanup & Minor Gains) + +#### R11. Share stateless components +- **Where:** `SubagentFactory` +- **What:** Instantiate `ContextPruner` and `ToolResultDeduplicator` once as private properties; pass to each `AgentLoop`. +- **Impact:** Negligible RAM savings; reduces per-agent object count. + +#### R12. Implement auxiliary indexes for tasks +- **Where:** `TaskStore` +- **What:** Maintain `parentId => [childIds]` map updated on `add()`/`update()`. Makes `children()` O(1) and `roots()` O(1) with `parentId === null` index. +- **Impact:** Faster queries; minor RAM overhead for index arrays. + +#### R13. Remove unused event classes +- **Where:** `src/Agent/Event/` +- **What:** Delete `ResponseCompleteEvent`, `StreamChunkEvent`, `ThinkingEvent`, `ToolCallEvent`, `ToolResultEvent` if truly unused. +- **Impact:** Code cleanliness only; zero runtime effect. + +#### R14. Add full-text search (FTS5) for memories +- **Where:** Database schema +- **What:** Create virtual table `memories_fts` on `(title, content)`; rewrite `search()` to use `MATCH`. +- **Impact:** Faster text search; allows index-based lookup instead of full scan. + +--- + +### Long-term / Exploratory + +#### R15. Memory repository pagination API +- Design a `MemoryRepository::getRecent(int $limit, int $offset)` for UI browsing, separate from `forProject()` which should be for context injection only. + +#### R16. Task tree depth limiting +- Add config `max_task_depth` (e.g., 5); deeper tasks are truncated or rejected. + +#### R17. Benchmark suite completion +- Create the three benchmark scripts referenced in Phase 1 reports: + - `docs/ram-audit/benchmarks/subagent-memory.php` + - `docs/ram-audit/benchmarks/event-memory.php` (already created) + - `docs/ram-audit/benchmarks/task-memory.php` + - `docs/ram-audit/benchmarks/memory-memory.php` +- Use them to validate fixes and track regressions. + +--- + +## Implementation Roadmap (Suggested Order) + +| Phase | Targets | Expected RAM Reduction | +|-------|---------|------------------------| +| 1 | R1, R2, R3 (subagent leaks) | Stops unbounded growth; ~500 bytes/agent saved after completion | +| 2 | R4, R8 (memory fetch + indexes) | Per-round RAM from O(N) to O(1); 100–500 MB saved for 10k memories | +| 3 | R5, R6, R7 (task eviction + edge cleanup + TUI debounce) | Bounds task memory; 30fps → 5fps = 6× fewer renders | +| 4 | R9 (memory caching) | 3–4× fewer scorings per turn; CPU savings | +| 5 | R10, R11, R12, R13 (optimizations) | Minor gains; code quality | +| 6 | R14, R15, R16 (FTS, pagination, depth limit) | Scalability improvements | + +--- + +## Conclusion + +KosmoKrator's architecture is **generally sound** but suffers from two **critical unbounded-growth vectors**: +1. Subagent orchestrator retains completed agent data indefinitely. +2. Memory repository loads all memories on every LLM round. + +The **task system** also requires **bounded eviction** and **render throttling** to scale. The **event system** is exemplary. The **service container** pattern is efficient with minor duplication opportunities. + +**Immediate action** on R1–R4 will prevent RAM exhaustion in long-running or memory-intensive sessions. Subsequent phases (R5–R9) will improve performance and scalability. The benchmark suite should be completed to quantify improvements and guard against regressions. + +--- + +## Appendix: Files Analyzed + +### Subagent Orchestration +- `src/Agent/SubagentOrchestrator.php` +- `src/Agent/SubagentFactory.php` +- `src/Agent/SubagentStats.php` +- `src/Agent/SubagentPipeline.php` +- `src/Agent/SubagentPipelineFactory.php` +- `src/Agent/SubagentModelConfig.php` +- `src/Agent/StuckDetector.php` +- `src/Agent/AgentLoop.php` +- `src/Agent/ConversationHistory.php` +- `src/Agent/ContextManager.php` +- `src/Agent/ContextCompactor.php` +- `src/Agent/ContextPruner.php` +- `src/Agent/ToolResultDeduplicator.php` + +### Event System +- `src/Agent/Event/*.php` (8 events) +- `src/Kernel.php` +- `src/Provider/EventServiceProvider.php` +- `src/Agent/Listener/TokenTrackingListener.php` + +### Task Tracking +- `src/Task/Task.php` +- `src/Task/TaskStore.php` +- `src/Task/TaskStatus.php` +- `src/Task/Tool/TaskCreateTool.php` +- `src/Task/Tool/TaskGetTool.php` +- `src/Task/Tool/TaskListTool.php` +- `src/Task/Tool/TaskUpdateTool.php` +- `src/UI/Tui/TuiCoreRenderer.php` +- `src/UI/Tui/TuiAnimationManager.php` +- `src/UI/Ansi/AnsiCoreRenderer.php` +- `src/Command/AgentCommand.php` +- `src/Agent/ContextManager.php` + +### Memory Repository +- `src/Session/MemoryRepository.php` +- `src/Session/SessionManager.php` +- `src/Session/Tool/MemorySaveTool.php` +- `src/Session/Tool/MemorySearchTool.php` +- `src/Agent/MemorySelector.php` +- `src/Agent/MemoryInjector.php` +- `src/Session/SettingsRepository.php` +- `src/Session/Database.php` + +--- + +**Report generated from Phase 1 agent findings.** +**Next step:** Implement Priority 1 recommendations and validate with benchmark suite. diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-core-agent.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-core-agent.md new file mode 100644 index 0000000..c283b1a --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-core-agent.md @@ -0,0 +1,496 @@ +# Core Agent Memory Efficiency Synthesis + +**Report Date:** 2026-04-03 +**Agents Consulted:** agent-loop-lifecycle, context-memory-audit, stuck-detection-memory, prompt-engineering-overhead +**Scope:** RAM efficiency of core agent loop, context management, and prompt construction + +--- + +## Executive Summary + +KosmoKrator's core agent loop demonstrates **fundamentally sound memory management** with multiple defensive layers against unbounded growth. The primary memory accumulator — `ConversationHistory::$messages` — grows monotonically but is bounded by three reclamation mechanisms (compaction, pruning, deduplication) that trigger automatically based on context window pressure. + +**Critical Finding:** While no memory leaks exist, **prompt construction suffers from systematic caching omissions** that cause redundant work and string bloat on every turn. The most severe inefficiencies are: + +1. **Instruction file re-reading** every session (3–5 disk reads, no cache) +2. **Tool schema regeneration** on every subagent spawn (~30–50 object allocations repeated) +3. **Git shell calls** repeated per-turn (`gitRoot()`, `gitBranch()`) +4. **Task tree rendering** with no visible truncation limit + +These issues are **independent of conversation history size** and therefore apply constant overhead even to short sessions. + +**Severity Distribution:** +- 🔴 Critical: 2 issues (instruction caching, tool schema caching) +- 🟠 High: 2 issues (git shell calls, task tree unbounded) +- 🟡 Medium: 4 issues (prompt splitting, memory formatting, environment detection, string concatenation) +- 🟢 Low: 2 issues (suboptimal thresholds, cleanup timing) + +--- + +## Findings (Severity-Rated) + +### 🔴 Critical + +#### CRIT-1: Instruction Files Re-Read Every Session (No Cache) +**Files:** `src/Agent/InstructionLoader.php:26-85` + +**What:** `InstructionLoader::gather()` reads up to 5 files from disk on every session start: +- `~/.kosmokrator/instructions.md` +- `{git_root}/KOSMOKRATOR.md` +- `{git_root}/.kosmokrator/instructions.md` +- `{git_root}/AGENTS.md` +- `{cwd}/KOSMOKRATOR.md` + +**Impact:** +- **Memory:** Each file loaded as a string kept for session lifetime. Large `AGENTS.md` (common in monorepos) can be 10–100 KB. +- **I/O:** 3–5 `file_get_contents()` calls per session; `gitRoot()` uses `shell_exec()` (line 102). +- **Frequency:** Once per session, but sessions are frequent in REPL usage. + +**Why it's critical:** This is **pure waste** — instruction files change rarely (user edits or git commits). No technical reason exists to re-read them. Static property cache would eliminate all I/O and string allocation. + +**Evidence:** `readFile()` (line 87) has no memoization; `gather()` calls it sequentially every time. + +--- + +#### CRIT-2: Tool Schema Regenerated on Every Subagent Spawn +**Files:** `src/Tool/ToolRegistry.php:67-103`, `src/Agent/SubagentFactory.php:105` + +**What:** `ToolRegistry::toPrismTools()` converts each tool to a `PrismTool` object with full parameter schema on every call. Called: +- Once at main `AgentLoop` setup (`AgentSessionBuilder:133`) +- **Once per subagent** (`SubagentFactory:105`) — subagents spawn frequently + +**Impact:** +- **Memory:** ~30–50 tools × ~10 parameters each = 300–500 parameter objects per call. Each `PrismTool` + parameter objects ≈ 200–500 bytes → **60–250 KB per subagent** wasted. +- **CPU:** Object allocation + method calls (`withStringParameter()`, etc.) repeated unnecessarily. +- **Frequency:** Every subagent creation (default concurrency 10, depth 3 → potentially 30+ subagents per session). + +**Why it's critical:** Tool schemas are **static metadata** — they never change at runtime. Rebuilding them is pure allocation bloat. Subagent memory isolation is good, but this duplicates static data across all subagents. + +**Evidence:** `toPrismTool()` (lines 76-103) creates fresh `PrismTool` and calls `->parameters()` on tool to rebuild schema arrays each time. + +--- + +### 🟠 High + +#### HIGH-1: Repeated Git Shell Calls Every Turn +**Files:** `src/Agent/ProtectedContextBuilder.php:24-50`, `src/Agent/InstructionLoader.php:102` + +**What:** `ProtectedContextBuilder::build()` calls: +- `InstructionLoader::gitRoot()` — `shell_exec('git rev-parse --show-toplevel')` +- `InstructionLoader::gitBranch()` — `shell_exec('git branch --show-current')` + +Every time protected context is built, which is **every turn** (via `ContextManager::buildSystemPrompt()`). + +**Impact:** +- **Memory:** Each `shell_exec()` returns a string (path or branch name, ~20–100 bytes). Strings are short-lived but allocated every turn. +- **I/O:** Two subprocess calls per turn. At 100 turns → 200 shell executions. Significant overhead. +- **Latency:** Each call takes ~1–5 ms; cumulative delay noticeable. + +**Why it's high:** Git state changes infrequently. Caching with `static ?string` (per-request) or session-scoped property would eliminate all repeated calls. No invalidation needed except on explicit git events (not applicable in agent runtime). + +**Evidence:** `gitRoot()` (line 102) and `gitBranch()` (line 57) have no caching; called unconditionally in `build()`. + +--- + +#### HIGH-2: Task Tree Rendering Unbounded +**Files:** `src/Agent/TaskStore.php` (not fully inspected, but referenced in `ContextManager:270`) + +**What:** `ContextManager::buildSystemPrompt()` appends `$this->taskStore->renderTree()` to system prompt every turn. No truncation limit observed in codebase. + +**Impact:** +- **Memory:** Task tree grows linearly with number of tasks created. Each task adds ~50–200 chars to rendered string. +- **Prompt bloat:** Unbounded task list consumes context window, forcing earlier compaction. +- **Frequency:** Every turn. + +**Why it's high:** Long-running sessions with many decomposed tasks could see task tree reach **tens of KB**. This directly competes with conversation history for context space. Should have hard limit (e.g., last 50 tasks, or 10 KB max). + +**Evidence:** `renderTree()` call at `ContextManager:270` with no preceding `substr()` or count check. + +--- + +### 🟡 Medium + +#### MED-1: PromptFrameBuilder Re-Splits Every Call (No Cache) +**Files:** `src/LLM/PromptFrameBuilder.php:31-77` + +**What:** `splitSystemPrompt($prompt)` uses `strpos()` + `substr()` to separate stable/volatile portions. Called downstream by providers that support prompt caching. No result caching. + +**Impact:** +- **Memory:** `substr()` creates new string copies (O(n) duplication). For a 5 KB prompt, two allocations per turn. +- **CPU:** String scanning repeated every turn. +- **Frequency:** Every LLM call (every turn). + +**Why it's medium:** Prompt size is modest (< 10 KB typical), so memory duplication is small (~10 KB/turn). But it's unnecessary work. Caching split result per unique prompt would eliminate it. + +**Evidence:** Static method, no static cache property. `substr()` at lines 42–43, 66 creates new strings. + +--- + +#### MED-2: MemoryInjector::format() Rebuilds Every Turn +**Files:** `src/Agent/MemoryInjector.php:17-109` + +**What:** `format()` groups memories by type, truncates each, and `implode()`s. Called every turn in `ContextManager::buildSystemPrompt()`. + +**Impact:** +- **Memory:** Creates intermediate arrays (`$sections`, `$lines`) and concatenated string (~1–5 KB typical). +- **CPU:** Looping through memories, truncating, grouping — repeated work. +- **Frequency:** Every turn. + +**Why it's medium:** Memory selection (`SessionManager::selectRelevantMemories`) already queries DB each turn, so some reformatting is expected. But formatted blocks could be cached keyed by memory ID set + truncation parameters. Gains modest but free. + +**Evidence:** No caching; `implode("\n\n", $sections)` at line 108 creates new string every call. + +--- + +#### MED-3: EnvironmentContext Gathered Once Per Session (No Cross-Session Cache) +**Files:** `src/Agent/EnvironmentContext.php:15-48` + +**What:** `gather()` runs `file_exists()` for 10+ project types, reads `composer.json`/`package.json`, gets OS/shell/date. Called once at session start (`AgentSessionBuilder:84-86`). + +**Impact:** +- **Memory:** Result string ~200–500 bytes kept for session lifetime. +- **I/O:** Multiple filesystem checks and JSON parsing at session start. +- **Frequency:** Once per session. + +**Why it's medium:** Session start is acceptable place, but environment rarely changes during a session. Could be cached globally (static) to skip filesystem checks across sessions. Benefit small but zero cost. + +**Evidence:** No static cache; `file_exists()` calls at lines 18–28 every invocation. + +--- + +#### MED-4: String Concatenation in Loops (ContextCompactor) +**Files:** `src/Agent/ContextCompactor.php:253-294` + +**What:** `formatMessages()` builds `$lines` array by looping through messages, then `implode()`s. Capped at 100K chars (`MAX_FORMAT_CHARS`), but still allocates intermediate array. + +**Impact:** +- **Memory:** Array of strings + final concatenated string. Peak ~100 KB during compaction. +- **Frequency:** Only during compaction (infrequent). + +**Why it's medium:** Compaction already expensive (2 LLM calls). This is a small fraction of total compaction memory spike. Could use `implode()` with generator or `StringBuilder` pattern, but not urgent. + +**Evidence:** `$lines[] = ...` loop (lines 253–294) then `implode("\n", $lines)` at line 296. + +--- + +### 🟢 Low + +#### LOW-1: Compaction Threshold May Be Too High +**Files:** `src/Agent/ContextCompactor.php:17`, `src/Agent/ContextBudget.php` + +**What:** Default `compact_threshold = 60%` of context window. For a 32K context, compaction triggers at ~19K tokens. With typical 1–2 KB messages, that's ~10–20 turns between compactions. + +**Impact:** +- **Memory:** History grows larger before compaction, increasing peak memory. +- **Frequency:** Fewer compactions = less LLM cost but more RAM. + +**Why it's low:** Configurable via settings. Default is a conservative trade-off. Could be lowered to 50% or made adaptive, but not a bug. + +**Evidence:** Default at line 17; used in `shouldCompactHistory()` (`ContextManager:274-279`). + +--- + +#### LOW-2: Subagent Cleanup Only on Parent Turn +**Files:** `src/Agent/SubagentOrchestrator.php:245-258`, `src/Agent/AgentLoop.php:552-557` + +**What:** `pruneCompleted()` removes completed subagents from orchestrator arrays. Called only when parent agent processes pending results (once per parent turn). + +**Impact:** +- **Memory:** Completed subagent objects ( histories, tool executors, etc.) remain in `$agents`, `$stats`, `$cancellations`, `$globalLocks` until parent's next turn. +- **Window:** Typically one turn delay (~seconds). With 10 concurrent subagents, delay is minor. + +**Why it's low:** Cleanup is prompt (next turn). No observed leaks. Could add periodic timer-based cleanup for long-running headless parents, but benefit marginal. + +**Evidence:** `pruneCompleted()` called only in `injectPendingBackgroundResults()` (`AgentLoop:552-557`). + +--- + +## Memory Hotspots (file:line + estimates) + +### Primary Accumulator + +| Hotspot | File:Line | Accumulation | Estimated Size/Turn | Notes | +|---------|-----------|--------------|---------------------|-------| +| `ConversationHistory::$messages` | `src/Agent/ConversationHistory.php:19` | **Monotonic** | 100–500 bytes per message | Primary growth vector. Each turn adds 2–3 messages (user + assistant + tool results). | +| `SubagentOrchestrator::$agents` | `src/Agent/SubagentOrchestrator.php:245` | **Concurrent** | ~1 KB per active subagent | Holds `Future` + `SubagentStats` until parent prunes. | +| `SubagentOrchestrator::$stats` | same | **Concurrent** | ~500 bytes per subagent | Same lifetime as `$agents`. | +| `SubagentOrchestrator::$cancellations` | same | **Concurrent** | ~100 bytes per subagent | Cleared in `finally` block. | +| `SubagentOrchestrator::$globalLocks` | same | **Concurrent** | ~100 bytes per subagent | Released & unset when subagent finishes. | + +### Prompt Construction Bloat (Per-Turn) + +| Hotspot | File:Line | Allocation | Estimated Size | Frequency | Cache? | +|---------|-----------|------------|----------------|-----------|--------| +| `InstructionLoader::gather()` | `src/Agent/InstructionLoader.php:26-85` | 3–5 file reads + string concat | 2–50 KB (depends on AGENTS.md) | Once/session | ❌ | +| `ToolRegistry::toPrismTools()` | `src/Tool/ToolRegistry.php:67-103` | 300–500 objects (PrismTool + params) | 60–250 KB | Per subagent spawn | ❌ | +| `ProtectedContextBuilder::build()` (git calls) | `src/Agent/ProtectedContextBuilder.php:24-50` | 2 `shell_exec()` strings | ~200 bytes | Every turn | ❌ | +| `TaskStore::renderTree()` | `src/Agent/TaskStore.php` (ref: `ContextManager:270`) | Recursive string build | ~1–10 KB (unbounded) | Every turn | ❌ | +| `PromptFrameBuilder::splitSystemPrompt()` | `src/LLM/PromptFrameBuilder.php:31-77` | 2 `substr()` copies | ~5–10 KB | Every LLM call | ❌ | +| `MemoryInjector::format()` | `src/Agent/MemoryInjector.php:17-109` | Array + `implode` | ~1–5 KB | Every turn | ❌ | +| `EnvironmentContext::gather()` | `src/Agent/EnvironmentContext.php:15-48` | FS checks + JSON parse | ~200–500 bytes | Once/session | ❌ | + +### Temporary Spikes (Transient) + +| Hotspot | File:Line | Spike Size | Duration | Reclaimed | +|---------|-----------|------------|----------|-----------| +| Compaction formatted transcript | `src/Agent/ContextCompactor.php:233-275` | Up to 100 KB string | During 2 LLM calls (seconds) | Yes (after apply) | +| CompactionPlan object | `src/Agent/ContextCompactor.php:104-160` | ~10–50 KB (new Message objects) | Brief | Yes | +| Deduplication indexes | `src/Agent/ToolResultDeduplicator.php:28-108` | O(n) where n = tool result messages | Per tool round | Yes | +| Pruning candidates array | `src/Agent/ContextPruner.php:37-104` | O(n) | Per prune | Yes | + +--- + +## Convergence Issues + +### Issue 1: Compaction Threshold vs. Prompt Bloat +**Interaction:** The `context.compact_threshold` (default 60%) determines when history compaction triggers. However, **prompt construction bloat** (unbounded task tree, no instruction caching) inflates the **base system prompt size**, reducing effective context window for conversation history. This causes **earlier compaction triggers** than necessary, increasing LLM call frequency. + +**Root cause:** Base prompt is rebuilt every turn with redundant data. A 50 KB base prompt (large AGENTS.md + unbounded tasks) leaves less room for history, causing compaction at ~15K tokens instead of ~19K. + +**Impact:** More frequent compactions → more LLM calls → higher cost + temporary memory spikes. + +--- + +### Issue 2: Subagent Memory Multiplication via Tool Schema Duplication +**Interaction:** Each subagent gets its own `AgentLoop` with fresh `ToolRegistry::toPrismTools()` call. With 10 concurrent subagents and depth 3, **tool schema objects are duplicated 30+ times** in memory simultaneously. + +**Root cause:** Tool schemas are static metadata but treated as per-instance data. No shared cache in `ToolRegistry`. + +**Impact:** 60–250 KB × 30 = **1.8–7.5 MB** of duplicated schema objects in memory during peak concurrency. Not catastrophic but wasteful. + +--- + +### Issue 3: Git Shell Calls Accumulate Latency, Not Memory +**Interaction:** While git calls don't cause memory leaks, their **per-turn execution** adds cumulative latency. In long sessions (100+ turns), 200 shell calls can add **200–1000 ms** of overhead. This is a **performance convergence issue** — the design assumes git state is needed every turn, but it's quasi-static. + +**Root cause:** No caching of git root/branch. `ProtectedContextBuilder` rebuilds every turn. + +**Impact:** Degraded user experience; perceived slowness. + +--- + +### Issue 4: Task Tree Growth Accelerates Context Pressure +**Interaction:** `TaskStore::renderTree()` output grows with each decomposed task. Unbounded growth means: +- System prompt size increases over session lifetime +- Context window fills faster → more frequent compaction +- Compaction replaces older history, but task tree itself is **never pruned** + +**Root cause:** No truncation logic for task tree rendering. All tasks forever included. + +**Impact:** Long sessions with many subtasks see **progressive prompt bloat** that never recedes, even after history compaction. Eventually dominates context window. + +--- + +## Recommendations + +### Priority 1 (Immediate — High Impact, Low Effort) + +#### REC-1: Cache InstructionLoader::gather() Result +**Target:** `src/Agent/InstructionLoader.php:26-85` + +**Change:** Add `static ?string $cached = null` to `gather()`. On first call, read files and store. Subsequent calls return cached string. + +**Impact:** +- Eliminates 3–5 disk reads per session +- Saves 2–50 KB string allocations per session +- Zero risk — instruction files rarely change during runtime + +**Effort:** 5 minutes. Add 2 lines. + +--- + +#### REC-2: Cache ToolRegistry::toPrismTools() Result +**Target:** `src/Tool/ToolRegistry.php:67-103` + +**Change:** Add private `?array $cachedPrismTools = null`. In `toPrismTools()`, check cache; if null, build and store. Invalidate only when `register()`/`unregister()` called (rare). + +**Impact:** +- Saves 60–250 KB per subagent spawn +- With 30 subagents/session → **1.8–7.5 MB saved** +- Reduces object allocation churn + +**Effort:** 10 minutes. Add cache property + check. + +--- + +#### REC-3: Cache Git Shell Calls +**Target:** `src/Agent/InstructionLoader.php:102` (gitRoot), `src/Agent/ProtectedContextBuilder.php:57` (gitBranch) + +**Change:** Add `static ?string $cachedRoot` and `static ?string $cachedBranch` to respective methods. Cache result for lifetime of request. + +**Impact:** +- Eliminates 2 shell execs per turn +- At 100 turns → 200 fewer subprocesses +- Saves ~200 bytes × 100 = 20 KB (small) but latency gain significant + +**Effort:** 5 minutes per method. + +--- + +#### REC-4: Truncate Task Tree Rendering +**Target:** `src/Agent/TaskStore::renderTree()` (need to locate file) + +**Change:** Add configurable limit: e.g., `max_tasks: 50` or `max_chars: 10240`. Truncate oldest tasks first. Return `"... truncated N tasks"` note. + +**Impact:** +- Bounds system prompt growth from task tree +- Prevents unbounded context consumption +- Forces user to `/compact` or complete tasks to make room + +**Effort:** 15–30 minutes (need to inspect `TaskStore` implementation). + +--- + +### Priority 2 (Medium-Term — Moderate Impact) + +#### REC-5: Implement PromptFrameBuilder Split Cache +**Target:** `src/LLM/PromptFrameBuilder.php:31-77` + +**Change:** Add static `array $cache = []` keyed by `md5($prompt)`. Store `['prefix' => ..., 'volatile' => ...]`. Reuse if prompt unchanged. + +**Impact:** +- Saves 2 `substr()` allocations per LLM call +- Modest memory savings (~5–10 KB/turn) +- Reduces CPU for string ops + +**Effort:** 10 minutes. + +--- + +#### REC-6: Cache EnvironmentContext::gather() +**Target:** `src/Agent/EnvironmentContext.php:15-48` + +**Change:** Convert `gather()` to instance method with private `?string $cached = null`. Build once per `EnvironmentContext` object (created once per session already). Already per-session, but still avoids repeated FS checks within same gather call if called multiple times. + +**Impact:** Negligible (already once/session), but cleans up pattern. + +**Effort:** 5 minutes. + +--- + +#### REC-7: Batch Memory Extraction During Compaction +**Target:** `src/Agent/ContextCompactor.php:189-224` + +**Change:** Track last extraction turn/timestamp. Skip extraction if recent (e.g., within 5 turns or < 100 new messages). Or batch: only extract if `count($history->newMessagesSinceLastExtraction) > 20`. + +**Impact:** +- Reduces compaction LLM calls from 2 → 1 in many cases +- Saves cost + temporary memory spike from extraction response +- Minor risk of missing some memories, but memories are cumulative and idempotent + +**Effort:** 20–30 minutes (need to track state in `ContextCompactor`). + +--- + +#### REC-8: Periodic Subagent Cleanup for Headless Agents +**Target:** `src/Agent/SubagentOrchestrator.php:245-258` + +**Change:** Add timer-based cleanup (e.g., every 30 seconds) in addition to on-demand in `injectPendingBackgroundResults()`. Use `EventLoop` repeat callback. + +**Impact:** +- Frees subagent memory sooner in long-running headless sessions where parent may not call `injectPending...` frequently +- Minor improvement; current cleanup is already timely for interactive use + +**Effort:** 15 minutes. + +--- + +### Priority 3 (Long-Term — Architectural) + +#### REC-9: Implement Shared Tool Schema Registry +**Target:** `src/Tool/ToolRegistry.php` + tool classes + +**Change:** Each tool class defines `static ?PrismTool $schemaCache`. First call to `toPrismTool()` builds and stores. `ToolRegistry::toPrismTools()` returns these shared instances (or clones if mutability concerns). + +**Impact:** +- Eliminates all tool schema duplication across subagents +- Could save **5–10 MB** in sessions with many subagents +- Clean separation of static metadata + +**Effort:** 1–2 hours (need to ensure PrismTool objects are immutable or cloned). + +--- + +#### REC-10: Incremental Prompt Assembly Cache +**Target:** `src/Agent/ContextManager.php:257-289` (buildSystemPrompt) + +**Change:** Introduce `PromptCache` object that stores: +- Stable base prompt (instructions + environment) +- Tool schemas (shared reference) +- Mode suffix (constant) +- Only rebuild volatile parts (memories, task tree) each turn + +**Impact:** +- Reduces per-turn string allocations from ~10–50 KB to ~2–5 KB +- Eliminates repeated `implode()` of static parts +- Significant for long sessions + +**Effort:** 2–3 hours (design + implementation). + +--- + +#### REC-11: Task Tree Segmentation & Archival +**Target:** `src/Agent/TaskStore.php` + +**Change:** Split tasks into "active" (last N) and "archived" (summarized). Render only active. Archive old tasks via compaction-like process. + +**Impact:** +- Prevents unbounded task tree growth +- Keeps system prompt size bounded +- Aligns with history compaction philosophy + +**Effort:** 2–3 hours. + +--- + +#### REC-12: Benchmark Suite Activation +**Target:** `docs/ram-audit/benchmarks/agent-loop-memory.php` (provided in agent-loop-lifecycle) + +**Action:** Create and run benchmark to establish baseline memory growth curves. Test with: +- 100 turns, 3 tools/turn, compaction on/off +- 500 turns, 5 tools/turn +- 1000 turns, 0 tools (pure chat) + +**Impact:** Quantifies actual memory behavior; validates fixes. + +**Effort:** 10 minutes to create file + run benchmarks. + +--- + +## Summary Table + +| Category | Issue | Severity | Est. Savings (per session) | Effort | Priority | +|----------|-------|----------|----------------------------|--------|----------| +| Prompt bloat | Instruction file caching | 🔴 Critical | 2–50 KB + I/O | 5 min | P1 | +| Prompt bloat | Tool schema caching | 🔴 Critical | 1.8–7.5 MB | 10 min | P1 | +| Prompt bloat | Git shell call caching | 🟠 High | 200 ms latency | 5 min | P1 | +| Prompt bloat | Task tree truncation | 🟠 High | 1–10 KB/turn bounded | 30 min | P1 | +| Prompt bloat | Prompt split cache | 🟡 Medium | 5–10 KB/turn | 10 min | P2 | +| Prompt bloat | Memory formatter cache | 🟡 Medium | 1–3 KB/turn | 10 min | P2 | +| Compaction | Batch memory extraction | 🟡 Medium | 1 LLM call / 5 turns | 30 min | P2 | +| Subagents | Periodic cleanup | 🟢 Low | ~1 KB/subagent sooner | 15 min | P2 | +| Architecture | Shared tool schemas | 🟢 Long-term | 5–10 MB total | 2 hrs | P3 | +| Architecture | Incremental prompt cache | 🟢 Long-term | 5–20 KB/turn | 3 hrs | P3 | +| Architecture | Task segmentation | 🟢 Long-term | Bounded prompt | 3 hrs | P3 | + +**Total immediate win (P1):** ~2–8 MB saved + significant latency reduction + bounded prompt growth. **Effort: ~1 hour.** + +--- + +## Conclusion + +KosmoKrator's memory management is **structurally sound** — history growth is bounded by automatic compaction/pruning, subagents are isolated, and no leaks exist. However, **prompt construction inefficiencies** represent a **systematic, repeatable waste** of memory and CPU that affects every session regardless of size. + +The four critical/high issues (instruction caching, tool schema caching, git calls, task tree truncation) are **low-hanging fruit** offering immediate 2–8 MB savings per session with < 1 hour total implementation time. These should be addressed in the next sprint. + +Longer-term architectural improvements (shared schemas, incremental prompt cache) offer further gains but require more careful design. + +**Next steps:** +1. Implement Priority 1 recommendations (REC-1 through REC-4) +2. Create and run benchmark suite to quantify baseline and improvement +3. Monitor production memory logs; consider lowering `compact_threshold` to 50% after prompt bloat fixes +4. Explore Priority 2 if memory pressure persists in long-running sessions + +--- + +*Report generated from synthesis of agent-loop-lifecycle, context-memory-audit, stuck-detection-memory, and prompt-engineering-overhead Phase 1 agents.* diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-io-performance.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-io-performance.md new file mode 100644 index 0000000..8e3d90f --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-io-performance.md @@ -0,0 +1,222 @@ +# I/O Memory Efficiency Report + +## Executive Summary + +**Overall Rating: GOOD** with **3 moderate-risk** and **2 low-risk** memory concerns identified. + +The system demonstrates strong engineering for memory efficiency: streaming I/O, constant-memory algorithms, and bounded result sets. The primary risks are **cache unboundedness** and **orphaned background result accumulation** under failure scenarios. + +**Key Findings:** +- FileReadTool maintains an unbounded read cache that grows across process lifetime +- BashTool buffers entire command output in memory before truncation +- Subagent background results can orphan if parent crashes +- GlobTool and GrepTool use eager evaluation with intermediate array creation +- Shell session management is sound with proper idle cleanup +- OutputTruncator uses spill-to-disk strategy effectively (but post-facto) + +--- + +## Findings (Severity) + +### Medium Risk + +#### F1: FileReadTool Unbounded Cache +- **File:** `src/Tool/Coding/FileReadTool.php:21,70-72,103-104` +- **Issue:** `$readCache` array grows unbounded across process lifetime; no eviction policy +- **Impact:** Hundreds of MB in long-running sessions with many file reads (e.g., codebase exploration) +- **Current state:** Cache stores only boolean flags, minimizing per-entry footprint; FileReadTool is a singleton in ToolRegistry + +#### F2: BashTool Full Output Buffering +- **File:** `src/Tool/Coding/BashTool.php:96-108` +- **Issue:** Stdout and stderr fully buffered in memory via `buffer()` before OutputTruncator runs +- **Impact:** Commands producing >100 MB output will spike RAM; no streaming to disk or early truncation +- **Current mitigation:** OutputTruncator caps at 2000 lines / 50 KB but runs **after** tool returns (ToolExecutor line 300-302) + +#### S1: Subagent PendingResults Orphaned +- **File:** `src/Agent/SubagentOrchestrator.php:34,420` +- **Issue:** `$pendingResults[parentId]` never cleared if parent agent crashes or exits without calling `collectPendingResults()` +- **Impact:** Results (strings, potentially KB–MB each) accumulate per background subagent over time +- **Current state:** Documented in `docs/memory-leak-audit.md` as known issue; `pruneCompleted()` does not touch `$pendingResults` + +#### S3: Failed Agents Not Pruned +- **File:** `src/Agent/SubagentOrchestrator.php:394-399` +- **Issue:** `pruneCompleted()` only removes `'done'` and `'cancelled'` agents; `'failed'` agents remain forever +- **Impact:** `Future` objects hold closure references → entire agent context retained → potential MB-scale leaks + +#### G1: GlobTool Intermediate Array Buildup +- **File:** `src/Tool/Coding/GlobTool.php:93-99` +- **Issue:** `array_merge()` inside recursion loops creates O(n²) intermediate arrays for deep directory trees +- **Impact:** Temporary memory spikes during glob operations on nested structures; 10k files in nested tree → ~10 MB temporary +- **Current mitigation:** Result set capped at 200 files after full sort/deduplication (lines 59-62) + +#### G2: GrepTool Pre-Truncation Buffering +- **File:** `src/Tool/Coding/GrepTool.php:68` +- **Issue:** `buffer($process->getStdout())` reads entire output into string before applying `--max-count=50` or 100-line cap +- **Impact:** Large result sets (10k+ matches) held fully in memory despite output limits; 10k matches × 200 bytes = 2 MB +- **Current mitigation:** ripgrep's `--max-count=50` limits per-file matches; final `array_slice` caps at 100 lines (line 92) + +### Low Risk + +#### F3: FileEditTool Temp File Leaks +- **File:** `src/Tool/Coding/FileEditTool.php:179` +- **Issue:** Orphaned `*.tmp.` files if process crashes mid-write; no shutdown cleanup registered +- **Impact:** Filesystem accumulation, not RAM; requires manual cleanup or TTL-based reaping + +#### S2: Subagent Groups Semaphore Map Never Cleared +- **File:** `src/Agent/SubagentOrchestrator.php:28,469` +- **Issue:** `$groups` array accumulates `LocalSemaphore` objects per unique group name; never removed even after group empties +- **Impact:** Minor memory growth per unique group name (~few hundred bytes each); problematic if group names are dynamic (e.g., per-task IDs) + +#### G3: GlobTool Eager Sort Before Cap +- **File:** `src/Tool/Coding/GlobTool.php:59-62` +- **Issue:** `sort()` and `array_unique()` applied to full result set before 200-file cap +- **Impact:** Wasted CPU/memory sorting thousands of paths only to discard most; temporary O(n) overhead + +#### G4: GlobTool Unlimited Recursion Depth +- **File:** `src/Tool/Coding/GlobTool.php:globStar()` +- **Issue:** No depth limit; symlink loops could cause infinite recursion +- **Impact:** Potential hang or memory exhaustion in pathological directory structures + +#### G5: No Pattern Compilation Caching +- **Files:** `src/Tool/Coding/GlobTool.php`, `src/Tool/Coding/GrepTool.php` +- **Issue:** Patterns re-compiled on every invocation; no shared cache +- **Impact:** Minor CPU overhead; no direct memory impact + +--- + +## Memory Hotspots (file:line + estimates) + +### High-Impact Hotspots + +| File:Line | Component | Memory Profile | Estimate | +|-----------|-----------|----------------|----------| +| `FileReadTool.php:21` | `$readCache` array | Unbounded growth; one boolean entry per distinct `(path,mtim,offset,limit)` | 1k entries ≈ 10 KB; 100k entries ≈ 1 MB; 1M entries ≈ 10 MB | +| `BashTool.php:96-107` | `$buf` accumulation | O(command output size) before truncation; repeated concatenation in progress callback | 100 MB output → 100 MB RAM spike | +| `SubagentOrchestrator.php:34` | `$pendingResults` | Accumulates per-parent if not collected; each result string KB–MB | 100 background agents × 100 KB = 10 MB per orphaned parent | +| `GlobTool.php:93-99` | Recursion intermediates | O(n²) temporary arrays during deep `array_merge()` loops | 10k files in nested tree → ~10 MB temporary | +| `GrepTool.php:68` | `buffer()` output | Full stdout before any limit applied | 10k matches × 200 bytes = 2 MB buffer | + +### Moderate-Impact Hotspots + +| File:Line | Component | Memory Profile | Estimate | +|-----------|-----------|----------------|----------| +| `FileEditTool.php:136-183` | Temp file streaming | 64 KB chunks via `stream_copy_to_stream()`; constant memory | Negligible | +| `ShellSession.php:18-137` | `$buffer` string | Grows monotonically per session; drained via `readUnread()` but retained until session kill | 1 MB per active long-running session | +| `SubagentOrchestrator.php:28` | `$groups` map | One `LocalSemaphore` object per unique group name (~few hundred bytes) | 100 groups × 500 bytes = 50 KB | + +--- + +## I/O Bottlenecks + +### 1. Tool Execution Buffering + +**BashTool** (`src/Tool/Coding/BashTool.php:96-108`) and **GrepTool** (`src/Tool/Coding/GrepTool.php:68`) both use `Amp\Process\Process` with `buffer()` to read entire stdout/stderr into memory before any processing. This creates a **synchronization point** where all output must be held in RAM. + +- **Current caps:** OutputTruncator (2000 lines / 50 KB) runs post-facto in `ToolExecutor.php:300-302` +- **Bottleneck:** Large outputs (logs, dumps, binary data) cause RAM spikes before truncation +- **Severity:** Medium — affects any tool executing external commands + +### 2. Large File Handling + +**FileReadTool** (`src/Tool/Coding/FileReadTool.php:75-82,117-149`) implements smart thresholding: +- **< 10 MB:** `file()` loads entire file → O(file size) memory (acceptable for intended use) +- **≥ 10 MB:** `fopen()` + `fgets()` loop → O(64 KB buffer + line) constant memory ✓ + +**FileWriteTool** (`src/Tool/Coding/FileWriteTool.php:57`) holds entire content string in memory once — acceptable for <10 MB writes. + +**FileEditTool** (`src/Tool/Coding/FileEditTool.php:81-183`) uses 64 KB chunks and atomic `rename()` — excellent constant-memory algorithm ✓ + +### 3. Shell Session Lifecycle + +**ShellSession** (`src/Tool/Coding/ShellSession.php:18-137`) buffers all output in `$buffer` string with no eviction. However: + +- **Cleanup:** `ShellSessionManager::cleanupIdleSessions()` (line 49) removes sessions where `isDrained()` (exit + no unread output) after 300s TTL +- **Assessment:** ✅ Bounded by idle timeout; no unbounded accumulation +- **Caveat:** Long-running sessions with continuous output can accumulate MBs until drained or killed + +### 4. File Search Memory (Glob/Grep) + +**GlobTool** (`src/Tool/Coding/GlobTool.php:52-101`): +- Uses native `glob()` (eager array, not iterator) +- Custom `globStar()` recursion with `array_merge()` creates intermediate arrays +- Full `sort()` + `array_unique()` before 200-file cap +- **Bottleneck:** O(n) temporary memory for full match set; O(n²) intermediates in deep recursion + +**GrepTool** (`src/Tool/Coding/GrepTool.php:73-78`): +- Same eager `buffer()` pattern as BashTool +- ripgrep `--max-count=50` and final 100-line slice are **process-level** and **post-processing** limits respectively +- **Bottleneck:** Entire output held in memory before limits applied + +--- + +## Recommendations + +### Priority 1 (Address in next sprint) + +1. **FileReadTool Cache Eviction (F1)** + - Add LRU eviction with configurable max entries (e.g., 1000) + - Or add TTL (e.g., 1 hour) + - Consider per-AgentContext cache instead of singleton + - **Files:** `src/Tool/Coding/FileReadTool.php:21` + +2. **BashTool/GrepTool Streaming Output (F2, G2)** + - Stream stdout/stderr directly to `OutputTruncator` during read loop, applying line/byte limits incrementally + - Or add `stream_to_file` parameter for outputs >1 MB + - Enforce per-command output limit with early process kill + - **Files:** `src/Tool/Coding/BashTool.php:96-108`, `src/Tool/Coding/GrepTool.php:68` + +3. **SubagentOrchestrator PendingResults Cleanup (S1)** + - Add TTL (e.g., 1 hour) to `$pendingResults` entries with timestamp + - Or prune `$pendingResults[parentId]` when all agents for that parent reach terminal state + - **Files:** `src/Agent/SubagentOrchestrator.php:34,420` + +4. **Include Failed Agents in Pruning (S3)** + - Add `'failed'` to `$terminalStates` in `pruneCompleted()` + - **Files:** `src/Agent/SubagentOrchestrator.php:394` + +### Priority 2 (Next quarter) + +5. **GlobTool Optimization (G1, G3, G4)** + - Apply 200-file cap earlier in recursion to avoid building full array + - Replace `array_merge()` with generator-based yielding to eliminate intermediate arrays + - Add recursion depth limit (e.g., 20) to prevent symlink loops + - **Files:** `src/Tool/Coding/GlobTool.php:52-101` + +6. **GrepTool Streaming (G2)** + - Process ripgrep/grep output line-by-line as it arrives, writing directly to OutputTruncator stream + - Avoid full `buffer()` call; use `onRead()` callback with incremental processing + - **Files:** `src/Tool/Coding/GrepTool.php:68-78` + +7. **FileEditTool Temp File Cleanup (F3)** + - Register `register_shutdown_function()` to cleanup orphaned `*.tmp.*` files matching pattern + - Or switch to `tmpfile()` + stream wrapper for automatic cleanup + - **Files:** `src/Tool/Coding/FileEditTool.php:179` + +8. **Subagent Groups Cleanup (S2)** + - Clear `$groups[groupName]` when semaphore count reaches 0 and no pending agents + - Use `WeakMap` if PHP 8.4+ for automatic cleanup + - **Files:** `src/Agent/SubagentOrchestrator.php:28,469` + +### Priority 3 (Nice to have) + +9. **Pattern Compilation Cache (G5)** + - Implement shared cache for glob patterns and grep regex (e.g., `SplObjectStorage` or `WeakMap`) + - Cache key: pattern string + flags + - **Files:** `src/Tool/Coding/GlobTool.php`, `src/Tool/Coding/GrepTool.php` + +10. **Benchmark Suite** + - Create `docs/ram-audit/benchmarks/tool-memory.php` with scenarios: + - Concurrent tool execution: 10 / 50 / 100 parallel no-op tools + - Large file read/write: 10 MB, 50 MB, 100 MB + - Glob on 10,000 files (simulated tree) + - Grep on 10,000 files with 5000 matches + - Use `memory_get_peak_usage(true)` before/after, median of 5 runs + - **Path:** `docs/ram-audit/benchmarks/tool-memory.php` + +--- + +**Report generated from Phase 1 agent findings:** +- `tool-execution-memory` (comprehensive system audit) +- `large-file-handling` (FileReadTool/FileWriteTool/FileEditTool analysis) +- `shell-session-management` (ShellSession/SessionManager lifecycle) +- `glob-grep-optimization` (GlobTool/GrepTool memory patterns) diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-security.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-security.md new file mode 100644 index 0000000..59724de --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-security.md @@ -0,0 +1,344 @@ +# Security-Adjacent RAM Efficiency Audit — Synthesis Report + +**Audit Scope:** Permission system, Codex authentication integration, configuration caching +**Date:** 2026-04-03 +**Status:** Phase 1 findings synthesized + +--- + +## Executive Summary + +This report synthesizes RAM efficiency audits across three critical subsystems: permission evaluation, Codex authentication, and configuration management. The findings reveal **systemic caching failures** that create both performance bottlenecks and **security-adjacent vulnerabilities**, particularly around memory exhaustion attack vectors and credential exposure through predictable memory patterns. + +**Key Critical Issues:** +- `PermissionRule::matchesGlob()` compiles regex on every call — hundreds of times per permission check +- `SettingsCodexTokenStore` performs 7× N+1 database queries per token operation with no in-memory cache +- `SettingsManager::reloadRepository()` triggers a full config re-parse (4+ YAML files) on every settings write +- No caching exists for path resolutions, evaluation results, or parsed YAML anywhere in the stack + +**Security Implications:** +- Memory exhaustion via repeated permission checks on complex rule sets +- Token refresh storms can saturate SQLite connection pool and memory +- Config write amplification creates predictable memory churn patterns +- Lack of rate limiting on permission evaluation enables DoS via tool spam +- Credentials repeatedly read from disk increase attack surface in shared hosting + +--- + +## Findings + +### Critical Severity + +#### 1. Regex Compilation in Hot Path — PermissionRule::matchesGlob() +**Files:** `src/Tool/Permission/PermissionRule.php:51-60`, `src/Tool/Permission/Check/DenyPatternCheck.php:39`, `src/Tool/Permission/Check/BlockedPathCheck.php:66`, `src/Tool/Permission/GuardianEvaluator.php:106` + +**Issue:** Every call to `matchesGlob()` compiles a fresh regex via `preg_quote()` + `str_replace()` + `preg_match()`. This method is invoked: +- For each deny pattern in each matching rule (DenyPatternCheck) +- For each blocked path pattern (BlockedPathCheck, up to 4× per path) +- For each safe command pattern (GuardianEvaluator, O(p) per call) + +With ~50 tools, ~10 rules, ~5 deny patterns per rule, a single permission check can trigger **250+ regex compilations**. PHP's internal regex cache is limited and not guaranteed to hit. + +**RAM Impact:** Each compiled regex pattern string occupies ~200-500 bytes in memory. At 250 compilations per check × 10 concurrent requests = **~500KB - 1.25MB** of transient regex strings per request cycle, plus GC pressure. + +**Security Risk:** An attacker controlling tool arguments can force evaluation of many deny patterns, causing CPU/memory exhaustion. No rate limiting exists on permission checks. + +--- + +#### 2. N+1 Token Storage Queries — SettingsCodexTokenStore +**Files:** `src/LLM/Codex/SettingsCodexTokenStore.php:32-38`, `src/LLM/Codex/SettingsCodexTokenStore.php:63-85` + +**Issue:** Token storage uses 7 individual settings keys (`provider.codex.*`). Every `current()` performs 7 separate SELECT queries; every `save()` performs 7 separate INSERT/UPDATE queries. No in-memory caching; every call hits SQLite. + +**RAM Impact:** Each query returns a row (~200-300 bytes). 7 queries × result set overhead × concurrent requests = **~1-2KB per request** in short-lived DB result objects. More critically, **connection pool exhaustion** under load can cause queued requests to accumulate memory. + +**Security Risk:** Token refresh storms (multiple simultaneous requests triggering refresh) cause 7 writes + HTTP call per refresh, amplifying memory/CPU usage. No refresh debouncing. + +--- + +#### 3. Full Config Reload on Every Write — SettingsManager::reloadRepository() +**Files:** `src/Settings/SettingsManager.php:266-274` + +**Issue:** After any settings `set()` or `delete()`, `reloadRepository()` creates a **new ConfigLoader** and re-parses all 4 bundled YAML files + user + project config, then copies data into the Repository. This happens on every single settings write. + +**RAM Impact:** Total YAML size ~28KB, but parsing creates intermediate arrays and objects. A full reload generates **~100-150KB** of temporary arrays/objects per write, which are then GC'd. Under rapid successive writes (e.g., batch updates), this creates significant memory churn and can push PHP memory_limit. + +**Security Risk:** An attacker with settings write access (or a buggy tool) can trigger repeated config reloads to exhaust memory. The pattern is predictable and not rate-limited. + +--- + +### High Severity + +#### 4. No Path Resolution Cache — PathResolver::resolve() +**Files:** `src/Tool/Permission/PathResolver.php:21-39` + +**Issue:** `realpath()` syscall executed on every path check with no caching. `BlockedPathCheck` calls this for every file operation, and `GuardianEvaluator::isInsideProject()` calls it for every command. + +**RAM Impact:** Each `realpath()` result is a string (~256-1024 bytes). With 100 file checks per request, that's **25-100KB** of repeated string allocations. Strings are duplicated in memory if same path resolved multiple times. + +**Security Risk:** Path traversal attacks cause repeated resolution of deep/nested paths, amplifying memory usage. No TTL or eviction on cache (because none exists). + +--- + +#### 5. Duplicate Rule Evaluation — DenyPatternCheck + RuleCheck + ModeOverrideCheck +**Files:** `src/Tool/Permission/Check/DenyPatternCheck.php:26-49`, `src/Tool/Permission/Check/RuleCheck.php:25-48`, `src/Tool/Permission/Check/ModeOverrideCheck.php:30-70` + +**Issue:** Rules are evaluated up to **3 times** in a single permission flow: +1. `DenyPatternCheck` iterates all rules, calls `matchesGlob()` for each deny pattern +2. `RuleCheck` iterates all rules again, calls `evaluate()` (which calls `matchesGlob()` again) +3. `ModeOverrideCheck` iterates all rules a third time if mode is Guardian + +**RAM Impact:** Each evaluation creates temporary arrays and regex strings. Triple evaluation multiplies memory churn by 3×. For 50 rules × 5 patterns = 750 regex compilations instead of 250. + +**Security Risk:** Complex permission rules (many deny patterns) are amplified 3×, making them a more effective DoS vector. + +--- + +#### 6. No YAML Parse Cache — ConfigLoader & YamlConfigStore +**Files:** `src/ConfigLoader.php:26-47`, `src/Settings/YamlConfigStore.php:23-35` + +**Issue:** Every `SettingsManager::get()` call triggers `load()` which reads and parses YAML from disk. No opcode or user-space cache. `ConfigLoader::load()` parses 4+ YAML files on every boot and settings write. + +**RAM Impact:** Each `Yaml::parse()` creates a full array tree (~28KB for all configs). A single `get()` loads project + global = **~56KB** of parsed arrays. With 10 `get()` calls per request = **~560KB** of transient config data (though PHP may reuse array structures, still significant). + +**Security Risk:** Repeated disk I/O + parsing increases request latency, making timing attacks easier. Also increases memory footprint for concurrent requests. + +--- + +#### 7. No Token In-Memory Caching — CodexOAuthService & SettingsCodexTokenStore +**Files:** `vendor/opencompany/prism-codex/src/CodexOAuthService.php:180-196`, `src/LLM/Codex/SettingsCodexTokenStore.php` + +**Issue:** Every `getAccessToken()` call reads 7 settings from DB. No per-request or short-term caching. Even within a single request, multiple provider calls re-fetch the same token. + +**RAM Impact:** Each token fetch creates a `CodexToken` object (~500 bytes) + 7 DB result rows. With 5 LLM calls per request = **~2.5KB** of duplicated token objects + **~3.5KB** of DB results = **~6KB** per request that could be cached. + +**Security Risk:** Token refresh under concurrent load causes multiple simultaneous refreshes, each writing to SQLite, risking database lock contention and memory spikes from queued requests. + +--- + +### Medium Severity + +#### 8. No Provider Instance Reuse — RelayProviderRegistrar & PrismManager +**Files:** `src/LLM/RelayProviderRegistrar.php:42-117`, `vendor/prism-php/prism/src/PrismManager.php:40-57` + +**Issue:** Each `PrismManager::resolve()` creates a new provider instance. No caching of provider objects. + +**RAM Impact:** Provider instance ~200-500 bytes. With 10 LLM calls per request using same provider, that's **2-5KB** of duplicated objects. Minor but unnecessary. + +**Security Risk:** Provider instantiation may involve reading credentials from config each time, increasing exposure in memory dumps. + +--- + +#### 9. Repeated SettingsPaths Instantiation & Directory Walks +**Files:** `src/Settings/SettingsManager.php` (multiple), `src/ConfigLoader.php:125-150` + +**Issue:** `SettingsPaths` objects created on every `resolve()`/`getRaw()` call. Each instantiation re-evaluates `file_exists()` and walks directory tree for project root. + +**RAM Impact:** Each `SettingsPaths` ~100 bytes + path strings. Directory walk for deep project (e.g., 6 levels) creates 12 path strings (~200 bytes). With 10 calls = **~2KB** of temporary path strings. + +**Security Risk:** Directory walk on every load increases I/O, potentially leaking directory structure via timing. + +--- + +#### 10. JWT Decode on Every Token Store +**Files:** `vendor/opencompany/prism-codex/src/CodexOAuthService.php:246-304` + +**Issue:** `storeTokens()` decodes JWT (base64 + json) on every token exchange to extract `account_id` and `email`. No caching of decoded claims. + +**RAM Impact:** Decoded JWT claims array ~500 bytes. With each refresh + initial auth = **~1KB** per auth flow. Minor but repeated. + +**Security Risk:** JWT decoding failures could leak partial token data in error messages. + +--- + +### Low Severity + +#### 11. No File Watching / Invalidation Strategy +**Files:** All config loading code + +**Issue:** No inotify/fswatch; config changes only detected on next load. Not a RAM issue directly, but prevents efficient cache invalidation, forcing either stale cache or no cache. + +**RAM Impact:** N/A — current design avoids file handle overhead. + +**Security Risk:** Stale config may persist indefinitely in long-running processes (if ever introduced). + +--- + +## Memory Hotspots + +| File:Line | Component | Estimated KB per Request | Notes | +|-----------|-----------|--------------------------|-------| +| `PermissionRule.php:51-60` | Regex compilation hotspot | 20-50 KB | 250+ compilations × ~200 bytes each | +| `SettingsCodexTokenStore.php:32-38` | Token read (7 queries) | 3-5 KB | 7 DB result sets + CodexToken object | +| `SettingsManager.php:266-274` | Full config reload on write | 100-150 KB | 5 YAML parses + array merges | +| `YamlConfigStore.php:23-35` | YAML parse per get | 50-100 KB | 2 parses per `get()` call | +| `BlockedPathCheck.php:48-74` | Path resolution + pattern matching | 10-30 KB | realpath() + multiple matchesGlob | +| `GuardianEvaluator.php:94-112` | Safe command pattern matching | 5-15 KB | O(p) regex compilations per call | +| `DenyPatternCheck.php:26-49` | Deny pattern iteration | 10-20 KB | Rules × deny patterns × regex | +| `ModeOverrideCheck.php:30-70` | Rule re-evaluation | 10-20 KB | Duplicate of RuleCheck work | +| `ConfigLoader.php:125-150` | Directory walk | 1-3 KB | Per project config load | +| `RelayProviderRegistrar.php:42-117` | Provider instantiation | 2-5 KB | Per provider resolve | + +**Total estimated RAM churn per typical request:** **~200-400 KB** of short-lived objects/strings due to caching misses. With 10 concurrent requests, that's **2-4 MB** of transient memory pressure. + +--- + +## Attack Vectors (Memory Exhaustion) + +### 1. Permission Rule Bomb +**Vector:** Attacker provides tool arguments that match many deny patterns (e.g., wildcard paths, glob patterns). Each match triggers `matchesGlob()` for every deny pattern across all rules. + +**Amplification:** With 50 rules × 5 deny patterns = 250 regex compilations per check. No limit on number of permission checks per request (tools can be called repeatedly). + +**Impact:** CPU spike + memory allocation for regex strings. Can exhaust PHP memory_limit if combined with other allocations. + +**Mitigation Status:** None — no rate limiting, no caching, no pattern complexity limits. + +--- + +### 2. Token Refresh Storm +**Vector:** Multiple concurrent requests with expiring Codex token. Each request calls `getAccessToken()`, sees token expiring, and triggers `refreshToken()` simultaneously. + +**Amplification:** Each refresh performs 7 DB reads + 7 DB writes + HTTP call. SQLite locks cause queuing; queued requests accumulate memory. + +**Impact:** Database connection pool exhaustion, memory buildup from queued request objects, potential OOM. + +**Mitigation Status:** None — no refresh debouncing, no token lock, no refresh queue. + +--- + +### 3. Config Write Amplification +**Vector:** Attacker (or bug) repeatedly writes to settings (e.g., toggling a flag). Each write triggers `reloadRepository()` → full config re-parse. + +**Amplification:** 1 write = 5 YAML parses + array merges (~100-150KB churn). 100 writes/second = 10-15 MB/s memory churn, GC cannot keep up. + +**Impact:** Memory fragmentation, GC thrashing, eventual OOM. + +**Mitigation Status:** None — no write coalescing, no debouncing, no rate limiting on settings changes. + +--- + +### 4. Path Traversal Memory Bloat +**Vector:** Attacker passes deeply nested or absolute paths (e.g., `/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p`). `PathResolver::resolve()` calls `realpath()` twice per check (path + parent). No caching means each unique path allocates new strings. + +**Amplification:** Each path string ~50 bytes, resolved path ~100 bytes. 1000 unique paths = **~150KB** of path strings. Combined with permission checks on each, multiplies. + +**Impact:** Memory bloat from unique path strings; filesystem I/O amplification. + +**Mitigation Status:** None — no path resolution cache, no canonicalization before check. + +--- + +### 5. Provider Instantiation Flood +**Vector:** Attacker triggers many LLM calls with different provider names (or same provider repeatedly). Each call instantiates a new provider object and fetches credentials. + +**Amplification:** Each provider instantiation ~300 bytes + credential fetch (7 DB queries for Codex). 100 calls = 30KB objects + 700 DB queries. + +**Impact:** DB connection exhaustion, memory from provider objects, credential exposure in more memory locations. + +**Mitigation Status:** None — no provider instance caching. + +--- + +## Recommendations + +### Immediate (Deploy within 24-48h) + +1. **Add static regex cache to `PermissionRule::matchesGlob()`** + ```php + private static array $regexCache = []; + $key = $pattern; + if (!isset(self::$regexCache[$key])) { + self::$regexCache[$key] = '/^'.str_replace(['\*', '\?'], ['.*', '.'], preg_quote($pattern, '/')).'$/i'; + } + $regex = self::$regexCache[$key]; + ``` + **Impact:** Eliminates 90%+ of regex compilation overhead. ~5-10 lines change. + +2. **Bulk token fetch in `SettingsCodexTokenStore::current()`** + Replace 7 individual SELECTs with: + ```sql + SELECT key, value FROM settings WHERE scope='global' AND key LIKE 'provider.codex.%' + ``` + Build array from single result set. + **Impact:** Reduces token load from 7 DB round-trips to 1. ~10 lines change. + +3. **Add in-memory token cache to `SettingsCodexTokenStore`** + ```php + private ?CodexToken $cached = null; + private int $cachedAt = 0; + // In current(): return $this->cached if within 5s + ``` + **Impact:** Prevents DB thrashing on rapid successive calls. ~15 lines change. + +--- + +### Short-Term (1-2 weeks) + +4. **Memoize permission evaluation results in `PermissionEvaluator`** + Cache `(toolName, argsHash) => PermissionResult` for duration of request (or session). Invalidate on `resetGrants()`. + **Impact:** Avoids re-running chain for same tool+args. Major CPU/memory savings for repeated tool calls. + +5. **Cache path resolutions in `PathResolver`** + Static `array $cache = []` keyed by realpath. TTL not needed for request-lifetime. + **Impact:** Eliminates duplicate `realpath()` syscalls. ~10 lines change. + +6. **Avoid full config reload on write in `SettingsManager`** + In `reloadRepository()`, instead of full `ConfigLoader::load()`, update `$this->config` incrementally using the `$data` already loaded in `configTarget()`. + **Impact:** Reduces write amplification from 5 parses to 0. ~20 lines change. + +7. **Add YAML parse cache to `YamlConfigStore`** + Static `array $cache` keyed by `realpath($path) . filemtime($path)`. Invalidate on `save()`. + **Impact:** Eliminates redundant parses across multiple `get()` calls. ~20 lines change. + +8. **Cache provider instances in `RelayProviderRegistrar`** + Private array `$instances = []`. Return cached if already resolved. + **Impact:** Saves ~200-500 bytes per provider call, reduces credential fetch overhead. + +--- + +### Long-Term (1-2 months) + +9. **Index permission rules by tool name** + Build associative array `[toolName => PermissionRule[]]` during `PermissionEvaluator` construction. Avoid linear scan of all rules on every check. + **Impact:** O(1) rule lookup vs O(n). Significant for large rule sets. + +10. **Eliminate duplicate rule evaluation** + Refactor check chain so `RuleCheck` returns both Deny and Ask states in one pass, and `ModeOverrideCheck` reuses that result instead of re-evaluating. + **Impact:** Cuts rule evaluation overhead by 66% in Guardian mode. + +11. **Pre-compile all glob patterns at startup** + In `PermissionConfigParser`, convert each deny pattern to compiled regex once and store in `PermissionRule` as `\Closure|string`. No runtime compilation. + **Impact:** Zero regex compilation at runtime. + +12. **Add rate limiting to permission evaluation** + Per-session or per-user limit on permission checks per minute. Prevents DoS via tool spam. + **Impact:** Thwarts memory exhaustion attacks. + +13. **Token refresh debouncing with mutex** + Use SQLite `BEGIN IMMEDIATE` or file lock to ensure only one refresh occurs concurrently. Others wait and reuse result. + **Impact:** Prevents refresh storms. + +14. **Consider APCu/Redis for cross-request caching** + - Cache merged config array keyed by file mtimes + - Cache token in shared memory with TTL + - Cache compiled regex patterns (though static cache already helps) + **Impact:** Reduces per-request memory churn dramatically for long-running processes (if ever introduced). + +15. **Add config write coalescing** + Batch multiple `set()` calls within a short window into a single reload. Use a "dirty" flag and debounce reload by 1-2 seconds. + **Impact:** Prevents write amplification from rapid successive updates. + +--- + +## Conclusion + +The permission, authentication, and configuration systems exhibit **critical RAM inefficiencies** that are not merely performance issues but **security-adjacent vulnerabilities**. The lack of caching at every layer creates predictable memory churn patterns that can be exploited for denial-of-service through memory exhaustion. Immediate actions (regex cache, bulk token fetch, in-memory token cache) are low-effort, high-impact fixes that should be deployed within 48 hours. Short-term improvements (memoization, path cache, config reload optimization) will reduce per-request memory churn by an estimated **60-70%**. Long-term architectural changes (rule indexing, duplicate evaluation elimination, rate limiting) are necessary to harden the system against targeted attacks. + +**Priority:** Address Critical issues first — they represent the easiest wins with the largest security/performance payoff. + +--- + +**Report Generated By:** KosmoKrator Synthesis Agent +**Source Agents:** permission-system-overhead, codex-auth-integration, config-caching +**Output Path:** `docs/ram-audit/synthesis-security.md` diff --git a/docs/ecosystem/kosmokrator/audits/self-audit-2026-03-30.md b/docs/ecosystem/kosmokrator/audits/self-audit-2026-03-30.md new file mode 100644 index 0000000..f2c8e68 --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/self-audit-2026-03-30.md @@ -0,0 +1,317 @@ +# KosmoKrator Self-Audit + +> Status: Historical audit from 2026-03-30. Repository size, test counts, and implementation notes may no longer match the current tree. + +**Date:** 2026-03-30 +**Scope:** Full codebase — `src/`, `tests/`, `config/` +**Stats:** ~13,700 lines PHP 8.4 across 68 source files, 6,200 lines of tests (498 tests, 1060 assertions) + +## Architecture Overview + +``` +bin/kosmokrator → Kernel → AgentCommand → AgentLoop (REPL) + ├── LLM client (AsyncLlmClient or PrismService) + ├── UIManager → TuiRenderer | AnsiRenderer + ├── ToolRegistry → tools (bash, file_read, file_write, file_edit, grep, glob) + └── PermissionEvaluator → approval flow +``` + +Subsystems: Agent, LLM, Tool (Coding + Permission + Session + Task), UI (TUI + ANSI), Session (SQLite persistence), Task (in-memory tracking). + +## What's Done Well + +1. **Clean separation of concerns** — Tools, Permissions, Session, LLM, UI are distinct subsystems with narrow interfaces. +2. **Permission system is thoughtful** — Three modes (Guardian/Argus/Prometheus), Guardian uses static heuristics, blocked paths/glob patterns, session grants. +3. **Context management** — Three-tier: Pruner (cheap, replaces old tool results), Compactor (LLM summary), TrimOldest (last resort). Pre-flight check before LLM calls. +4. **Good test coverage** — Unit tests for every subsystem, 498 tests passing. +5. **Instruction loading** — Priority-based: global → project → subdirectory. YAML + SQLite settings with migration path. + +--- + +## Issues & Improvements + +### Security Concerns + +#### 1. `PermissionRule::matchesGlob()` — `*` matches across word boundaries + +**File:** `src/Tool/Permission/PermissionRule.php:45-53` + +The glob-to-regex conversion treats `*` as `.*`, which matches `/` and any character. This means Guardian safe-command patterns like `git *` would match `git log && rm -rf /`. + +```php +public static function matchesGlob(string $value, string $pattern): bool +{ + $regex = '/^' . str_replace( + ['\*', '\?'], + ['.*', '.'], // `.*` matches everything including spaces and `&&` + preg_quote($pattern, '/'), + ) . '$/i'; + + return (bool) preg_match($regex, $value); +} +``` + +**Recommendation:** For command matching, `*` should match non-whitespace only (`[^\s]*`) or the matcher should be aware of shell metacharacters (`&&`, `|`, `;`, backticks, `$()`). Alternatively, parse the command into a first-token + rest and only match against the first token. + +--- + +#### 2. `GrepTool` uses `exec()` instead of Symfony `Process` + +**File:** `src/Tool/Coding/GrepTool.php:53` + +```php +exec($fullCmd . ' 2>&1', $output, $returnCode); +``` + +Unlike `BashTool` which uses `Symfony\Component\Process\Process`, `GrepTool` uses raw `exec()`. This means: +- No process timeout +- Not cancellable +- Inconsistent with the rest of the codebase + +The `hasRipgrep()` check (line 66) also uses `exec()`. + +**Recommendation:** Migrate to Symfony `Process` for consistency and cancellability. + +--- + +#### 3. `ConfigLoader` env var resolution treats `"0"` as empty + +**File:** `src/ConfigLoader.php:57-59` + +```php +$content = preg_replace_callback('/\$\{(\w+)\}/', function (array $matches) { + return $_ENV[$matches[1]] ?? $_SERVER[$matches[1]] ?? getenv($matches[1]) ?: ''; +}, $content); +``` + +The `?: ''` fallback coerces `"0"` to `''` because `"0"` is falsy in PHP. If an env var is set to the string `"0"`, it silently becomes empty. + +**Recommendation:** Replace `?: ''` with proper false-check: +```php +$env = $_ENV[$matches[1]] ?? $_SERVER[$matches[1]] ?? getenv($matches[1]); +return $env !== false ? $env : ''; +``` + +--- + +#### 4. `OutputTruncator` truncation file path with empty tool call ID + +**File:** `src/Agent/OutputTruncator.php:82` + +```php +$path = $this->storagePath . '/tool_' . preg_replace('/[^a-zA-Z0-9_-]/', '_', $toolCallId) . '.txt'; +``` + +If `$toolCallId` is empty, the file becomes `tool_.txt`. Subsequent truncations with empty IDs would overwrite each other. Low risk but could lose data. + +**Recommendation:** Generate a fallback ID (timestamp + random) when `$toolCallId` is empty. + +--- + +### Bugs & Logic Issues + +#### 5. Default provider `'z'` is confusing + +**File:** `src/Kernel.php:147`, `src/Command/AgentCommand.php:62` + +```php +$provider = $config->get('kosmokrator.agent.default_provider', 'z'); +``` + +The hardcoded fallback to a single-letter provider name `'z'` is unclear. If a user hasn't configured a provider named `z`, the API key lookup returns empty and the agent fails with a generic error instead of a helpful message. + +**Recommendation:** Use a well-known provider as default (`'anthropic'` or `'openai'`), or better — detect available providers from configured API keys and pick the first one. + +--- + +#### 6. `PrismService` hardcodes `withMaxSteps(10)` + +**File:** `src/LLM/PrismService.php:128` + +```php +if (! empty($tools)) { + $request->withTools($tools); + $request->withMaxSteps(10); +} +``` + +The tool-call recursion limit of 10 is hardcoded. Complex refactoring tasks can legitimately need more rounds. When hit, the agent silently stops mid-task. + +**Recommendation:** Make this configurable via `config/kosmokrator.yaml` (e.g., `agent.max_tool_rounds: 25`). + +--- + +#### 7. `AgentLoop::executeToolCalls()` receives named args as associative array + +**File:** `src/Tool/ToolRegistry.php:46-48` + +```php +->using(function (...$args) use ($tool) { + $result = $tool->execute($args); + return $result->output; +}); +``` + +Prism calls tool handlers with named arguments. PHP spreads these into an associative array. This works but the contract is implicit — if Prism changes its calling convention, tools break silently. + +**Recommendation:** Add a defensive comment or normalize `$args` explicitly. Consider logging when `$args` structure is unexpected. + +--- + +#### 8. `TaskStore::clearTerminal()` has duplicate docblock + +**File:** `src/Task/TaskStore.php:240-248` + +Two consecutive `/**` docblocks — one says "Remove all completed tasks", the next says "Remove all terminal tasks". The second is correct (the method also removes cancelled tasks). + +**Recommendation:** Remove the stale first docblock. + +--- + +### Architecture / Design + +#### 9. `AgentCommand::repl()` is a 320-line method + +**File:** `src/Command/AgentCommand.php:151-478` + +The REPL handles 15+ slash commands (`/quit`, `/settings`, `/resume`, `/guardian`, etc.) with inline logic. Each command has direct access to `$agentLoop`, `$permissions`, `$sessionManager`, `$llm`, etc. + +**Recommendation:** Extract into a `SlashCommand` registry pattern: + +```php +interface SlashCommand { + public function name(): string; + public function handle(Context $ctx, string $args): void; +} +``` + +This would improve testability and make it easy to add new commands. + +--- + +#### 10. `UIManager` is a pure delegate with leaky abstraction + +**File:** `src/UI/UIManager.php` + +Every `RendererInterface` method is delegated one-to-one. Additionally, several methods do `instanceof` checks: + +```php +public function showWelcome(): void +{ + if ($this->renderer instanceof AnsiRenderer) { + $this->renderer->showWelcome(); + } elseif ($this->renderer instanceof TuiRenderer) { + $this->renderer->showWelcome(); + } +} +``` + +This pattern repeats for `playTheogony()`, `playPrometheus()`, `seedMockSession()`, `setTaskStore()`, `refreshTaskBar()`. + +**Recommendation:** Add these methods to `RendererInterface` with default no-op implementations, eliminating the instanceof checks. + +--- + +#### 11. `Kernel` uses Laravel's full Application container + +**File:** `src/Kernel.php:61` + +```php +$this->container = new LaravelApp($this->basePath); +``` + +The app bootstraps `Illuminate\Foundation\Application`, Facades, Events, Filesystem, and HTTP factory — all to serve Prism's Laravel integration. This is heavyweight for a CLI tool: + +- `LaravelApp` triggers bootstrapping overhead +- Facades add global state +- HTTP factory registered only because Prism uses the `Http` facade + +**Recommendation:** For now this works. If binary size or boot time becomes an issue, consider using `illuminate/container` standalone + a thin adapter for Prism. + +--- + +#### 12. `ModelCatalog` uses order-dependent substring matching + +**File:** `src/LLM/ModelCatalog.php:63-66` + +```php +foreach ($this->models as $name => $spec) { + if (str_contains($key, strtolower($name))) { + return $spec; + } +} +``` + +If the catalog has both `glm` and `glm-5`, the model `z/GLM-5` matches whichever comes first in the YAML. Order-dependent matching is fragile. + +**Recommendation:** Use exact match first (already done), then longest-prefix match instead of first-substring match. + +--- + +#### 13. No streaming for `AsyncLlmClient` + +**File:** `src/LLM/AsyncLlmClient.php:40-71` + +The async client buffers the entire response body before parsing. For long agent responses, the user sees nothing until the full response arrives. `AgentLoop::run()` calls `$this->ui->streamChunk($fullText)` with the complete text at once — not incremental. + +**Recommendation:** Implement SSE streaming for the async client, feeding chunks to the UI as they arrive. + +--- + +#### 14. No retry logic for transient API errors + +**File:** `src/Agent/AgentLoop.php:138-161` + +The error handling catches all `Throwable` but doesn't distinguish between retryable errors (429 rate limit, 503 service unavailable) and permanent errors (401, 400). A simple retry with exponential backoff for 429/503 would significantly improve reliability. + +**Recommendation:** Add retry logic in `AsyncLlmClient::chat()` for HTTP 429 and 5xx responses, with configurable max retries and backoff. + +--- + +#### 15. No concurrent tool execution + +**File:** `src/Agent/AgentLoop.php:263-376` + +Tool calls are executed sequentially in a `foreach`. Independent tool calls (e.g., reading two different files) could run concurrently, especially with the Amp async client. + +**Recommendation:** Group independent tool calls and execute them in parallel using `Amp\Future\awaitAll()`. + +--- + +### Tooling / DX + +#### 16. Pint checks `vendor-src/` — should only check `src/` and `tests/` + +The Pint `--test` run shows many style violations from `vendor-src/symfony/`. These are not part of the KosmoKrator codebase and should be excluded. + +**Recommendation:** Add a `pint.json` configuration: + +```json +{ + "paths": ["src", "tests"] +} +``` + +--- + +#### 17. `.gitignore` missing entries + +Missing: `*.phar`, `composer.phar`, `.phpcs-cache`. The `box.json` output path should also be ignored if building PHARs. + +--- + +## Priority Matrix + +| Priority | # | Issue | Impact | +|----------|---|-------|--------| +| **High** | 1 | Glob `*` matches across word boundaries | Security: Guardian bypass | +| **High** | 6 | Hardcoded `maxSteps(10)` | Agent silently stops on complex tasks | +| **Medium** | 3 | Env var `"0"` evaluates to empty | Subtle config bug | +| **Medium** | 5 | Default provider `'z'` is confusing | Bad DX for new users | +| **Medium** | 9 | 320-line REPL method | Maintainability | +| **Medium** | 16 | Pint checks vendor-src | CI noise | +| **Low** | 2 | GrepTool uses `exec()` not `Process` | Consistency, cancellability | +| **Low** | 10 | UIManager instanceof checks | Abstraction leak | +| **Low** | 13 | No streaming for async client | UX improvement | +| **Low** | 14 | No retry for transient API errors | Reliability | +| **Low** | 15 | No concurrent tool execution | Performance | diff --git a/docs/ecosystem/kosmokrator/deep-audit-2026-04-04.md b/docs/ecosystem/kosmokrator/deep-audit-2026-04-04.md new file mode 100644 index 0000000..edc7281 --- /dev/null +++ b/docs/ecosystem/kosmokrator/deep-audit-2026-04-04.md @@ -0,0 +1,715 @@ +# KosmoKrator Deep Audit — 2026-04-04 + +> **Scope**: Full codebase audit across 20 dimensions — code quality, edge cases, TUI/UX, security, refactoring opportunities. +> **Methodology**: 16 parallel exploration agents spawning ~62 sub-agents for deep-dive analysis across 20 dimensions. +> **Codebase**: 277 PHP files, ~50K lines, PHP 8.4, Symfony Console + TUI. +> **Findings**: 65 Critical, 128 Important, 91 Minor = **284 total findings**. + +--- + +## Table of Contents + +1. [Executive Summary](#executive-summary) +2. [Top 25 Critical Issues](#top-25-critical-issues) +3. [Area Findings](#area-findings) + - [AgentLoop Core](#1-agentloop-core--repl-orchestrator) + - [Subagent Orchestration](#2-subagent-orchestration) + - [TUI Renderer](#3-tui-renderer) + - [ANSI Renderer](#4-ansi-renderer--markdown) + - [Tool System & Permissions](#5-tool-system--permission-model) + - [LLM Client Layer](#6-llm-client-layer) + - [Session & Database Persistence](#7-session--database-persistence) + - [Commands & Slash Commands](#8-commands--slash-commands) + - [Settings & Configuration](#9-settings--configuration) + - [Diff & UI Display](#10-diff-rendering--ui-display) + - [Power Commands & UX](#11-power-commands--ux-workflows) + - [Testing Coverage](#12-testing-coverage--quality) +4. [Cross-Cutting Themes](#cross-cutting-themes) +5. [Security Concerns Summary](#security-concerns-summary) +6. [Refactoring Backlog](#refactoring-backlog-prioritized) + +--- + +## Executive Summary + +The audit identified **65 critical**, **128 important**, and **91 minor** issues across the codebase (284 total). The most systemic problems are: + +- **No graceful shutdown**: No signal handling anywhere in the codebase. Ctrl+C = orphaned processes, broken terminal, unsaved data. +- **Security**: File tools have no path containment checks; permission system is opt-in (default-allow). File writes are non-atomic. +- **Concurrency**: Shared mutable state (`ContextBudget`, `ProtectedContextBuilder`, `BashTool::$progressCallback`), subagent slot leaks for root agent, race conditions in tool result ordering. +- **Exception hygiene**: Only 2 custom exceptions in 277 files. 6 silently swallowed `\Throwable` catches. Raw `$e->getMessage()` leaked to LLM. +- **TUI stability**: Modal stacking can deadlock, triple concurrent 30fps render timers, no TUI→ANSI mid-session fallback. +- **Configuration**: `reloadRepository()` loses user/project overrides, audio config mutates shared LLM singleton, LLM clients capture stale config at registration. +- **Testing**: ContextManager has 1 test, no integration tests exist, no tool result ordering tests, no UTF-8 truncation tests. + +--- + +## Top 30 Critical Issues + +Ranked by impact (severity × likelihood × affected surface). + +| # | Issue | File | Impact | +|---|-------|------|--------| +| 1 | **No path traversal protection in file tools** | `FileWriteTool.php:49`, `FileEditTool.php:51`, `FileReadTool.php:57` | LLM can write `/etc/passwd`, `~/.ssh/authorized_keys`. Relies entirely on permission chain being configured. | +| 2 | **Permission evaluator defaults to Allow** | `PermissionEvaluator.php:66-68` | Any tool not explicitly covered by rules/grants/blocked-paths is auto-approved. Security should default-deny. | +| 3 | **Non-atomic file writes** | `FileWriteTool.php:49` | `file_put_contents()` leaves partial files on crash. `FileEditTool` correctly uses temp+rename; `FileWriteTool` does not. | +| 4 | **Shell sessions orphaned on process crash** | `ShellSessionManager.php:164-179` | No `__destruct()` or shutdown handler. SIGKILL leaves zombie processes. | +| 5 | **`reloadRepository()` loses YAML overrides** | `SettingsManager.php:267-274` | After any settings write, in-memory config reverts to bundled defaults only, discarding user/project YAML. | +| 6 | **Audio config mutates shared LLM client** | `SessionServiceProvider.php:56-65` | `setProvider()`/`setModel()` on the shared singleton permanently changes the LLM for all agent calls, not just audio. | +| 7 | **TUI modal stacking causes deadlock** | `TuiModalManager.php` | No mutex prevents two modals from being shown simultaneously. If `askToolPermission()` fires during `askUser()`, deadlock. | +| 8 | **No `SQLITE_BUSY` handling** | `Database.php:38-39` | Missing `PRAGMA busy_timeout`. Two KosmoKrator processes writing simultaneously crash immediately. | +| 9 | **Unlimited LLM retries by default** | `RetryableLlmClient.php:37`, `LlmServiceProvider.php:81` | `$maxAttempts = 0` = infinite retries. Persistent 429/5xx loops forever. | +| 10 | **Tool result ordering doesn't match call order** | `ToolExecutor.php:212-217` | Denied results are appended after approved results, confusing the LLM which expects results in call order. | +| 11 | **`OutputTruncator::truncate()` splits mid-UTF8** | `OutputTruncator.php:96-98` | `substr()` on byte boundary can slice through multi-byte characters, producing corrupted output sent to the LLM API. | +| 12 | **Context compactor LLM call has no cancellation** | `ContextCompactor.php:164-167` | User cancel during compaction doesn't abort the compaction LLM request. | +| 13 | **No signal handling in AgentCommand** | `AgentCommand.php` | Ctrl+C skips teardown — no `killAll()`, no `cancelAll()`, no `ui->teardown()`. Orphaned processes, broken terminal state. | +| 14 | **Silent message loss on null tool_result** | `MessageSerializer.php:109-111` | Missing `tool_results` data → `null` → silently filtered → broken conversation flow → API errors. | +| 15 | **No session/message deletion** | `SessionRepositoryInterface.php` | Database grows without bound. No way to clean up old sessions or their messages. | +| 16 | **`PrismService` drops `reasoningContent`** | `PrismService.php:111-120` | Reasoning/thinking content silently lost for Prism-backed providers (Anthropic, Gemini). | +| 17 | **AnsiTheogony: 80s unskippable animation** | `AnsiTheogony.php` | No skip mechanism. Screen shake bug (both branches produce same direction). | +| 18 | **Triple concurrent 30fps render timers** | `TuiAnimationManager.php:378`, `TuiToolRenderer.php:267`, `SubagentDisplayManager.php:205` | Breathing + loader + tool-executing timers each trigger full terminal re-render independently. | +| 19 | **Substring model matching can return wrong spec** | `ModelDefinitionSource.php:86-101` | `"gpt-4o-mini"` matches `"gpt-4o"` if mini not explicitly defined. Wrong pricing/context window. | +| 20 | **Stuck detector misses oscillating patterns** | `StuckDetector.php:49-58` | Only checks last signature. `[A,A,A,B,A,A,A,B,...]` never triggers. Any non-stuck round fully resets escalation. | +| 21 | **Non-atomic config file writes** | `YamlConfigStore.php:60` | `file_put_contents()` without temp+rename. Crash mid-write = corrupted YAML. | +| 22 | **`forProject()` loads ALL memories into RAM** | `MemoryRepository.php:65-88` | No limit/pagination. O(n log n) sort on full dataset every retrieval. | +| 23 | **`AsyncLlmClient` provider list not checked by factory** | `LlmClientFactory.php:45` vs `AsyncLlmClient.php:34` | Two independent provider lists can drift. Factory creates client for providers not in the compatibility list. | +| 24 | **`collectResult()` detects errors by "Error:" prefix** | `ToolExecutor.php:405` | `str_starts_with($result, 'Error:')` — grep output for the word "Error:" is falsely marked as failed. | +| 25 | **No terminal capability detection** | `UIManager.php:377-389`, `Theme.php` | Unconditional 24-bit color + Unicode. No `NO_COLOR`, `COLORTERM`, or `TERM` check. Garbled on limited terminals. | +| 26 | **`yieldSlot`/`reclaimSlot` slot leak for root agent** | `SubagentOrchestrator.php:471-496` | Root agent never acquires semaphore lock but `reclaimSlot` consumes one permanently. After N calls → deadlock. | +| 27 | **Shared `ContextBudget` across all subagent depths** | `SubagentFactory.php:87` | Deep child compaction deducts from root's budget pool. Root can run out prematurely. | +| 28 | **No error handling during kernel boot** | `bin/kosmokrator`, `Kernel.php:45-72` | Zero try-catch in bootstrap. Provider failure = partial initialization, raw stack trace. | +| 29 | **Raw `$e->getMessage()` leaked to LLM** | `AgentLoop.php:288,312,518`, `ToolExecutor.php:313` | Internal error messages (HTTP codes, file paths, provider details) stored as assistant messages. No sanitization. | +| 30 | **`wouldCreateCycle` crashes on pruned stats** | `SubagentOrchestrator.php:375` | Accesses `$this->stats[$current]->dependsOn` without existence check. Pruned agents → TypeError. | + +--- + +## Area Findings + +### 1. AgentLoop Core & REPL Orchestrator + +**Files**: `src/Agent/AgentLoop.php` (858 lines), `ToolExecutor.php` (465 lines), `ContextManager.php`, `StuckDetector.php`, `OutputTruncator.php`, `TokenEstimator.php` + +#### Critical +- `OutputTruncator::truncate()` uses byte-level `substr()` that can split mid-UTF8 character (`OutputTruncator.php:96-98`) +- `BashTool::$progressCallback` is static mutable — race condition in concurrent bash execution (`ToolExecutor.php:162`) +- Context compactor LLM call has no cancellation support (`ContextCompactor.php:164-167`) + +#### Important +- **Tool result ordering bug**: denied results appended after approved, not matching tool call order (`ToolExecutor.php:212-217`) +- **Stuck detector misses oscillating patterns**: only checks last signature, escalation resets on any non-stuck round (`StuckDetector.php:49-58`) +- **Token estimation 15-30% low for code**: fixed 4 chars/token ratio (`TokenEstimator.php:37`) +- **No max-iteration guard in `run()`**: infinite tool-call loop possible in interactive mode (`AgentLoop.php:198`) +- **`collectResult()` detects errors by "Error:" string prefix**: fragile, false positives on grep output (`ToolExecutor.php:405`) +- **`ContextBudget` default `reserveOutputTokens=0`**: no room for LLM response → API error (`ContextBudget.php:53-56`) +- **`isContextOverflow()` is a fragile heuristic**: string matching on error messages from different providers (`AgentLoop.php:748-757`) +- **`apply_patch` args don't populate `$writePaths`**: concurrent `file_read` of patched file gets stale data (`ToolExecutor.php:341-357`) +- **No timeout on individual tool execution**: misbehaving tool blocks event loop (`ToolExecutor.php:168`) +- **`shell_kill` not in read-only guard**: state-changing operation bypasses Ask/Plan mode checks (`ToolExecutor.php:109`) +- **`findTool()` is O(n) linear scan**: should use hash map (`ToolExecutor.php:437-446`) + +#### Minor +- `$autoApproved` / `$approvedById` built but never used — dead code (`AgentLoop.php:143-146`) +- `formatStatusModelLabel()` is a trivial passthrough (`AgentLoop.php:732-735`) +- Duplicate `performCompaction()` logic in two locations (`AgentLoop.php:364-372` vs `848-857`) +- `headlessPreFlightCheck()` is a trivial wrapper (`ContextManager.php:129-132`) +- `ContextPruner::importanceScore()` uses English-only phrases (`ContextPruner.php:194`) + +--- + +### 2. Subagent Orchestration + +**Files**: `src/Agent/SubagentOrchestrator.php` (665 lines), `SubagentFactory.php`, `SubagentStats.php`, `SubagentTool.php` + +#### Critical +- **Potential deadlock in dependency + group combo**: If agent A depends on agent B, and both are in the same group (sequential), the group semaphore blocks A from starting while the dependency waits for A to run. +- **`SubagentTool` input validation**: empty task descriptions, malformed `depends_on` arrays, and circular references aren't validated before submission to the orchestrator. + +#### Important +- **Retry logic doesn't distinguish transient vs permanent failures**: auth errors (401/403) correctly skipped, but malformed-request errors (400) may be retried unnecessarily. +- **Stats double-count tokens during retries**: each retry attempt adds to the token counter; no deduplication of pre-retry tokens. +- **Background agent results injected on next LLM turn**: if the parent never makes another LLM call (exits), background results are lost. +- **`SubagentStats::elapsed()` includes retry wait time**: makes timing metrics misleading. + +#### Minor +- Agent ID uniqueness not enforced — collision possible if LLM reuses IDs across batches. +- No telemetry/observability hooks for orchestrator events. + +--- + +### 3. TUI Renderer + +**Files**: `src/UI/Tui/TuiCoreRenderer.php` (1169 lines), `TuiToolRenderer.php` (641 lines), `TuiModalManager.php` (513 lines), `TuiAnimationManager.php` (434 lines), `SubagentDisplayManager.php` (537 lines) + +#### Critical +- **Modal stacking deadlock**: no mutex prevents `askToolPermission()` during `askUser()` (`TuiModalManager.php`) +- **`askUser()` cleanup bypassed on external resume**: QuestionWidget left in overlay when cancelled from `TuiCoreRenderer` (`TuiModalManager.php:130-149`) +- **`showToolResult` uses stale `lastToolArgs`**: concurrent tool calls overwrite each other's args (`TuiToolRenderer.php:194`) +- **`cycleMode()` breaks on unexpected label**: `array_search` returns `false` → silent wrong mode (`TuiCoreRenderer.php:903-911`) +- **Cancellation race in Thinking→Idle transition**: old cancelled token used after new one created (`TuiCoreRenderer.php:451-465`) +- **`showBatch()` filters by substring "spawned in background"**: real results containing this text are hidden (`SubagentDisplayManager.php:278`) + +#### Important +- **`streamChunk` rebuilds MarkdownWidget on every token**: string concat + full markdown re-parse per chunk. Performance issue on long responses (`TuiCoreRenderer.php:543-544`) +- **Triple concurrent 30fps render timers**: breathing (33ms) + loader (50ms) + tool-executing (50ms) each trigger full re-render independently +- **No truncation for large tool outputs in TUI**: CollapsibleWidget stores full string in memory (`TuiToolRenderer.php:220-230`) +- **Binary/null bytes in tool outputs**: `explode("\n", $output)` produces garbled display (`TuiToolRenderer.php:220`) +- **`toolExecutingTimerId` leaks on error**: orphaned 50ms repeat timer runs indefinitely (`TuiToolRenderer.php:305-318`) +- **`compactingTimerId` not cancelled on Idle**: `enterIdle()` cancels thinking timer but not compacting timer (`TuiAnimationManager.php:347-364`) +- **Container widgets accumulate in conversation**: each `showSpawn()` adds a new ContainerWidget; old ones persist (`SubagentDisplayManager.php:126-128`) +- **Progress bar counts failed agents as "done"**: misleading progress percentage (`SubagentDisplayManager.php:254-264`) +- **`pendingEditorRestore` text lost on error**: typed input never restored if agent errors during streaming (`TuiCoreRenderer.php:416-419`) +- **`clearConversationState()` doesn't reset tool renderer state**: orphaned timers reference removed widgets (`TuiCoreRenderer.php:791-801`) +- **No terminal resize handling during streaming**: scroll offsets become stale +- **`setMaxVisibleLines(2)`**: too restrictive for multi-line editing (`TuiCoreRenderer.php:298`) +- **No input length limit in EditorWidget**: very long pastes create enormous text buffers +- **No command history (up/down arrow)**: only conversation scroll via PAGE_UP/PAGE_DOWN + +#### Minor +- Spinner index increments indefinitely (`TuiAnimationManager.php:299`) +- ESC cancels during thinking — undocumented behavior +- `playAnimation()` stops/starts TUI without try/catch — TUI remains stopped on animation error +- `renderIntro()` uses blocking `usleep`/`sleep` on event loop + +--- + +### 4. ANSI Renderer & Markdown + +**Files**: `src/UI/Ansi/AnsiRenderer.php` (568 lines), `AnsiCoreRenderer.php`, `MarkdownToAnsi.php` (535 lines), `AnsiIntro.php` (611 lines), `AnsiTheogony.php` (2014 lines), `Theme.php` + +#### Critical +- **AnsiTheogony: no skip/abort mechanism**: ~80 second unskippable animation (`AnsiTheogony.php`) +- **Screen shake bugs**: both branches produce same direction `\033[1B` (`AnsiTheogony.php:927`); up+down cancels out `\033[1A\033[1B` (`AnsiTheogony.php:1026`) + +#### Important +- **No streaming output in ANSI mode**: user sees nothing until full response completes (`AnsiCoreRenderer.php:172-176`) +- **`clearThinking()` is a no-op**: "Thinking..." text never erased (`AnsiCoreRenderer.php:130-133`) +- **Status bar, welcome, separators overflow on narrow terminals**: fixed-width `━` bars assume ≥80 cols +- **Table rendering has no total-width overflow**: wide tables corrupt layout (`AnsiTableRenderer.php:22`) +- **All Theme colors designed for dark backgrounds only**: invisible on light terminals. No `COLORFGBG` detection +- **`wrapCodeLine()` is O(n²)**: `mb_substr(substr($line, $i), 0, 1)` per character (`MarkdownToAnsi.php:459-508`) +- **TableCollector drops nested inline elements**: links, images, strikethrough silently removed from table cells +- **Terminal size detection uses `exec('tput')` instead of `posix_get_terminal_size()`**: blocking, adds latency on SSH + +#### Minor +- Duplicate `wrapAnsiText()` in `MarkdownToAnsi` and `ListTracker` +- Missing `declare(strict_types=1)` in `MarkdownToAnsi.php` +- `Theme::codeBg()` defined but never used in rendering +- Italic/strikethrough escape codes hardcoded instead of using Theme +- Logo constants duplicated between `AnsiIntro` and `AnsiTheogony` +- `ListTracker` uses `mb_strlen` instead of `mb_strwidth` for bullet indent +- `Theme::white()` uses 16-color `[1;37m` inconsistent with 24-bit RGB elsewhere + +--- + +### 5. Tool System & Permission Model + +**Files**: `src/Tool/Coding/File*.php`, `PatchApplier.php`, `Shell*.php`, `BashTool.php`, `GrepTool.php`, `GlobTool.php`, `src/Tool/Permission/*` + +#### Critical +- **No path traversal protection**: `FileWriteTool`, `FileEditTool`, `FileReadTool` accept raw paths with zero project-root validation +- **Symlink following risk**: `PathResolver::resolve()` follows symlinks via `realpath()` — symlink to `/etc/shadow` inside project +- **Non-atomic writes in `FileWriteTool`**: `file_put_contents()` directly, no temp+rename +- **Permission system is opt-in per tool**: if tool not in `approval_required`, entire permission chain is bypassed +- **`PermissionEvaluator::evaluate()` defaults to Allow**: should default-deny for safety + +#### Important +- **Temp file leak on crash**: `FileEditTool` creates `$path.'.tmp.'.getmypid()` with no cleanup (`FileEditTool.php:139`) +- **PatchApplier update non-atomic for moves**: write destination → unlink source; crash between = data duplication +- **Concurrent file edits: last-write-wins**: no file locking +- **PatchApplier line-ending corruption**: `implode("\n", ...)` on CRLF files inserts LF +- **Shell session idle cleanup only on tool calls**: if agent stops, sessions live forever (`ShellSessionManager.php:238-251`) +- **No max session limit**: malicious agent could exhaust file descriptors +- **`GrepTool` timeout declared but never used**: `$timeout = 30` is dead code (`GrepTool.php:19`) +- **Regex DoS possible in GrepTool**: `(.){1000000}` causes catastrophic backtracking in GNU grep +- **`SessionGrants` are per-tool, not per-path**: approving `bash` once auto-approves all future commands +- **`GuardianEvaluator::isInsideProject()` fails for project root itself**: trailing slash issue + +#### Minor +- `FileReadTool` cache uses mtime (1-second granularity) +- No BOM handling in file tools +- `hasRipgrep()` spawns subprocess on every `GrepTool` call — should cache +- Binary file handling missing in grep +- GlobTool doesn't show permission-denied errors + +--- + +### 6. LLM Client Layer + +**Files**: `src/LLM/AsyncLlmClient.php`, `PrismService.php`, `RetryableLlmClient.php`, `ModelDefinitionSource.php`, `RelayProviderRegistry.php` + +#### Critical +- **Provider lists can drift**: `AsyncLlmClient::OPENAI_COMPATIBLE_PROVIDERS` not checked by `LlmClientFactory` (`LlmClientFactory.php:45`) +- **Unlimited retries by default**: `$maxAttempts = 0` in production wiring (`LlmServiceProvider.php:81`) +- **Substring model matching**: `"gpt-4o-mini"` matches `"gpt-4o"` — wrong pricing/context (`ModelDefinitionSource.php:86-101`) + +#### Important +- **`PrismService` drops `reasoningContent`**: thinking content lost for Anthropic/Gemini (`PrismService.php:111-120`) +- **No cancellation in `PrismService`**: `$cancellation` param documented as unused (`PrismService.php:107`) +- **Jitter always adds, never subtracts**: backoff is `base + [0, 0.3*base]`, not `base ± 0.3*base` (`RetryableLlmClient.php:132`) +- **No circuit breaker**: persistent failures retry forever +- **`smartDelay` blocking path**: `sleep()` in ANSI mode doesn't check cancellation during sleep +- **`cached_write_price` defaults to `input_price`**: Anthropic cache write is 1.25x, undercharged if missing from spec +- **Provider alias maps split between two classes**: can drift (`ModelDefinitionSource.php:25` vs `RelayProviderRegistry.php:213`) +- **No streaming support in `AsyncLlmClient`**: must unwrap via `inner()` — leaky abstraction + +#### Minor +- No connection pool sharing between subagent clients +- `setApiKey()` accepts empty strings +- Timeout values hardcoded (600s/300s), not configurable +- Duplicated `supportsTemperature()` in both client classes + +--- + +### 7. Session & Database Persistence + +**Files**: `src/Session/Database.php`, `MessageRepository.php`, `MessageSerializer.php`, `SessionManager.php`, `MemoryRepository.php`, `MemorySelector.php` + +#### Critical +- **No `PRAGMA busy_timeout`**: concurrent writes crash with `SQLITE_BUSY` (`Database.php:38-39`) +- **Silent message loss on null tool_result**: message silently dropped → broken conversation → API errors (`MessageSerializer.php:109-111`) +- **No session/message deletion**: database grows unbounded +- **`forProject()` loads ALL memories**: no limit, O(n log n) sort every retrieval (`MemoryRepository.php:65-88`) + +#### Important +- **`saveMessage()` silently no-ops when no session**: data loss with no warning (`SessionManager.php:115-117`) +- **Session switch doesn't validate target**: FK violation on first message save (`SessionManager.php:99-102`) +- **LIKE-based search, no FTS5**: full table scan per search (`MemoryRepository.php:186-192`) +- **Timestamp timezone mismatch in memory expiry**: `date('c')` produces timezone offsets, string comparison may break (`MemoryRepository.php:67`) +- **`loadActive()` loads all message content**: no pagination, multi-MB tool outputs in RAM (`MessageRepository.php:76-80`) +- **`markCompactedIds` not session-scoped**: cross-session compaction possible with leaked IDs (`MessageRepository.php:133-145`) +- **Role mismatch between `MessageMapper` and `MessageSerializer`**: `'tool'` vs `'tool_result'` +- **No role validation in `append()`**: invalid roles silently stored then dropped on deserialization + +#### Minor +- Directory permissions 0755 on database directory +- `findByPrefix` uses LIKE without escaping `%`/`_` +- Timestamp precision mismatch: sessions (microseconds) vs messages (seconds) +- No session title sanitization +- `MemoryInjector` truncation at 180-240 chars with no truncation indicator +- Memory scoring uses undocumented magic numbers + +--- + +### 8. Commands & Slash Commands + +**Files**: `src/Command/AgentCommand.php`, `SlashCommandRegistry.php`, `Slash/*.php` + +#### Critical +- **No signal handling**: Ctrl+C skips all cleanup — orphaned processes, broken terminal (`AgentCommand.php`) +- **QuitCommand double-teardown**: `teardown()` called twice if not idempotent (`QuitCommand.php:39` + `AgentCommand.php:299`) +- **`ResumeCommand` clears permissions but not mode**: mode mismatch after resume (`ResumeCommand.php:79`) +- **`FeedbackCommand` prompt injection**: user text interpolated directly into LLM prompt (`FeedbackCommand.php:57-72`) + +#### Important +- **Unknown slash commands fall through to LLM**: `/typo something` sent as user message instead of error +- **TUI init failure leaves terminal in broken state**: alternate screen buffer, raw mode not restored (`AgentSessionBuilder.php:49-52`) +- **Whitespace-only input sent to LLM**: `" "` not filtered +- **`NewCommand` doesn't cancel running subagents**: stale agents operate on new session (`NewCommand.php:40-48`) +- **`SessionFormatter::formatAge` assumes numeric timestamps**: ISO date strings produce wildly incorrect ages +- **`RenameCommand` inconsistent quote stripping**: single-quote regex missing `$` anchor +- **`ClearCommand` uses raw ANSI**: conflicts with TUI renderer state (`ClearCommand.php:48`) +- **`SettingsCommand` is 860+ lines**: severe maintenance concern +- **`CompactCommand` has no success/error feedback**: user gets no indication of result + +#### Minor +- No `/help` command +- No duplicate registration detection in `SlashCommandRegistry` +- `/tasks clear` space-in-name creates prefix collision risk +- CJK width not accounted for in preview truncation +- `ForgetCommand` shows success for non-existent IDs +- `PowerCommandRegistry` regex only matches `\w+` — hyphens excluded + +--- + +### 9. Settings & Configuration + +**Files**: `src/Settings/SettingsManager.php`, `YamlConfigStore.php`, `SettingsSchema.php`, `ConfigLoader.php`, `src/Provider/*` + +#### Critical +- **`reloadRepository()` loses user/project YAML overrides**: only reloads bundled defaults (`SettingsManager.php:267-274`) +- **Non-atomic config writes**: `file_put_contents()` without temp+rename (`YamlConfigStore.php:60`) +- **Audio config mutates shared LLM client**: `setProvider()`/`setModel()` on shared singleton (`SessionServiceProvider.php:56-65`) +- **Migration rewrites YAML every boot**: non-atomic, no one-time flag (`DatabaseServiceProvider.php:92-145`) +- **Provider registration order is implicit**: hardcoded sequence, no dependency declaration (`Kernel.php:48-58`) +- **`LlmServiceProvider` captures stale config**: singletons don't reflect runtime settings changes + +#### Important +- **Toggle normalization incomplete**: `"0"`, `"false"`, `"no"` not handled correctly (`SettingsManager.php:277-289`) +- **No change notification**: settings changes don't propagate to dependent components +- **Missing env vars resolve to empty string**: `${MISSING_KEY}` → `''` instead of `null` (`ConfigLoader.php:72-76`) +- **Malformed YAML crashes app**: no try/catch around `Yaml::parse()` (`YamlConfigStore.php:23-35`) +- **Config merge doesn't handle indexed arrays**: `mergeDeep()` appends instead of replacing for indexed arrays +- **`DatabaseServiceProvider::boot()` injects SQLite config after `RelayRegistry` already constructed**: stale config +- **No first-run config creation**: depends entirely on bundled defaults +- **Missing settings in schema**: ~10 config keys have no type validation or labels + +#### Minor +- Static schema caching creates cross-instance coupling +- `SettingsPaths` instantiated repeatedly instead of cached +- Legacy `.kosmokrator.yaml` support adds complexity +- `LoggingServiceProvider` has side effects in `register()` instead of `boot()` + +--- + +### 10. Diff Rendering & UI Display + +**Files**: `src/UI/Diff/DiffRenderer.php` (548 lines), `AgentDisplayFormatter.php`, `AgentTreeBuilder.php`, `UIManager.php`, `Theme.php` + +#### Critical +- **No binary file detection in DiffRenderer**: binary content produces garbled output (`DiffRenderer.php:33-166`) +- **No TUI→ANSI mid-session fallback**: renderer fixed at construction (`UIManager.php:27-29`) + +#### Important +- **Line numbers for context lines use `$newLine` only**: old-file line number lost (`DiffRenderer.php:131`) +- **30+ hardcoded ANSI codes outside Theme**: inconsistent color shades across 8+ files +- **Color shade inconsistencies**: gold/accent, success, error, info all have different RGB values in hardcoded vs Theme +- **No terminal capability detection**: no `NO_COLOR`, `COLORTERM`, `TERM` checks +- **No large diff truncation**: thousands of changes flood terminal in ANSI mode +- **`padWithFileContext` first-match ambiguity**: duplicated code blocks match wrong occurrence +- **`str_pad` with multi-byte strings**: CJK under-padded +- **No depth limit on tree recursion**: stack overflow possible with deep nesting + +#### Minor +- Hunk separator `· ✧ ·` has no Unicode fallback +- Missing Theme palette entries for 7 commonly-used colors +- `seedMockSession()` violates Liskov substitution +- Agent IDs not truncated — can produce very wide labels + +--- + +### 11. Power Commands & UX Workflows + +**Files**: `src/Command/Power/*.php` (21 commands), `src/UI/Ansi/Ansi*.php` (animation classes) + +#### Critical +- **`:release` has no programmatic push guard**: prompt-only "ask before push" (`ReleaseCommand.php:78-79`) +- **`:unleash` can spawn 125+ agents**: no resource constraints or rate limiting (`UnleashCommand.php:47-48`) +- **No cancellation in animations**: `usleep()` blocks, no SIGINT handling during animations + +#### Important +- **All power commands are purely prompt-driven**: no programmatic logic, all workflow enforcement via LLM compliance +- **`:autopilot` no loop guard**: Phase 5→3 re-entry has no max iteration count +- **`:babysit` no wall-clock timeout**: can run indefinitely +- **`:research` no cancellation guidance**: 7+ agents with no cleanup on cancel +- **`:release` no dry-run mode**: goes straight from version bump to push +- **18 commands registered manually**: no auto-discovery, adding a new command is error-prone +- **All animations use `register_shutdown_function(print(...))`**: `print` returns 1, may emit spurious "1" +- **No `KOSMOKRATOR_NO_ANIM` environment variable**: accessibility issue for screen readers/CI + +#### Minor +- `:auto` alias too generic, could clash +- `:sci` alias too short/non-obvious +- `:watch` conflicts with Unix `watch` mental model +- Animation `exec('tput cols')` called per animation, not cached + +--- + +### 12. Testing Coverage & Quality + +**Files**: `tests/Unit/**/*.php` (~140 tests), `tests/Feature/AgentCommandTest.php` (1 test) + +#### Critical +- **ContextManager has only 1 test**: core component with vast untested surface +- **No tool result ordering tests**: concurrent execution ordering completely unverified +- **No UTF-8 truncation tests**: `OutputTruncator` multi-byte handling untested +- **No integration tests for agent loop**: no end-to-end prompt→tool→response test + +#### Important +- **5 pipeline/factory classes untested**: `ContextPipeline`, `ContextPipelineFactory`, `SubagentPipeline`, `SubagentPipelineFactory`, `LlmClientFactory` +- **21 Power commands have zero tests** +- **Session persistence lifecycle untested**: no create→persist→load round-trip test +- **`ProviderAuthService` untested**: handles API key/auth flows +- **`SessionSettingsApplier` untested**: applies settings to running sessions +- **Only 1 feature test**: `AgentCommandTest` just verifies exit code 0 with `/quit` + +#### Minor +- StuckDetector missing oscillation pattern tests +- ToolExecutor missing UTF-8/malformed input tests +- No `tests/Integration/` or `tests/Functional/` directories +- No code coverage enforcement + +--- + +## Cross-Cutting Themes + +### 1. Static Mutable State (5 instances) +- `BashTool::$progressCallback` — race condition +- `SettingsSchema::$definitions` / `$aliases` — cross-instance pollution +- `ShellSessionManager` — no static state but shared instance with no cleanup guarantees +- **Pattern**: mutable statics in a concurrent (fiber-based) environment are dangerous. Each should be instance state or use fiber-local storage. + +### 2. Non-Atomic File Operations (6 instances) +- `FileWriteTool` — `file_put_contents()` directly +- `YamlConfigStore` — `file_put_contents()` directly +- `PatchApplier::applyAdd()` — `file_put_contents()` directly +- `DatabaseServiceProvider::migrateYamlKeys()` — `file_put_contents()` directly +- `OutputTruncator::saveFull()` — no error handling +- `PatchApplier` move operations — write+unlink not atomic +- **Fix**: Extract a shared `AtomicFileWriter` utility that does write-to-temp + `rename()`. + +### 3. Fragile String-Based Detection (4 instances) +- `collectResult()` — `"Error:"` prefix for success detection +- `isContextOverflow()` — string matching on error messages +- `showBatch()` — substring `"spawned in background"` for filtering +- `PermissionConfigParser` — tool name string matching for opt-in security +- **Fix**: Use typed result objects, error codes, or enums instead of string conventions. + +### 4. Resource Leak Pattern (8 instances) +- Shell sessions — orphaned on crash +- TUI timer IDs — not cancelled on phase transitions +- Container widgets — accumulate indefinitely +- Memory objects — loaded entirely into RAM +- Database rows — no deletion mechanism +- Subagent processes — no cleanup on parent crash +- Editor text restore — lost on error exit +- Service singletons — no disposal lifecycle +- **Fix**: Implement a coordinated cleanup/teardown system with shutdown handlers. + +### 5. Configuration Staleness (3 instances) +- `LlmServiceProvider` captures config at registration → stale singletons +- `SettingsManager::reloadRepository()` re-reads only bundled defaults → lost overrides +- `DatabaseServiceProvider::boot()` injects config after consumers constructed +- **Fix**: Implement config change notification (observer/event system) or use lazy resolution. + +### 6. Hardcoded ANSI Color Codes (30+ instances) +Across 8+ files, colors bypass `Theme` with slightly different RGB values. This makes the palette inconsistent and unmaintainable. +- **Fix**: Add missing palette entries to `Theme`, replace all hardcoded codes with `Theme::` calls. + +### 7. No Terminal Adaptation +- No color depth detection (16/256/24-bit) +- No Unicode fallback +- No light/dark terminal detection +- Fixed-width elements overflow on narrow terminals +- **Fix**: Add a `TerminalCapabilities` class that detects once at startup and is consulted by Theme. + +--- + +## Security Concerns Summary + +| # | Concern | Severity | Exploitability | File | +|---|---------|----------|---------------|------| +| 1 | File tools have no path containment | **Critical** | High — LLM can be tricked into writing outside project | `FileWriteTool.php:49` | +| 2 | Permission system defaults to Allow | **Critical** | Medium — requires misconfigured `approval_required` | `PermissionEvaluator.php:66-68` | +| 3 | SessionGrants are per-tool, not per-path | **High** | Medium — one approval grants all future operations | `SessionGrants.php:17-19` | +| 4 | Symlink following via `realpath()` | **High** | Low — requires symlink creation inside project | `PathResolver.php:27` | +| 5 | FeedbackCommand prompt injection | **High** | Medium — user text in LLM prompt | `FeedbackCommand.php:57-72` | +| 6 | Regex DoS in GrepTool | **Medium** | High — `(.){1000000}` pattern | `GrepTool.php:58` | +| 7 | GlobTool path traversal info leak | **Medium** | Low — can discover files outside project | `GlobTool.php:51` | +| 8 | API keys in config files with loose permissions | **Medium** | Medium — 0755 on config dir | `YamlConfigStore.php:46-61` | +| 9 | Config files written non-atomically | **Medium** | Low — race condition window | `YamlConfigStore.php:60` | +| 10 | Database directory world-readable | **Low** | Low — 0755 permissions | `Database.php:27` | + +**Recommended Priority**: +1. Add path containment checks directly in file tools (don't rely solely on permission chain) +2. Switch `PermissionEvaluator` to default-deny +3. Make `SessionGrants` path/command-scoped +4. Add timeout enforcement to `GrepTool` +5. Set config file permissions explicitly (0600) + +--- + +## Refactoring Backlog (Prioritized) + +### P0 — Do Now (Bugs & Security) + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 1 | Add `AtomicFileWriter` utility, use in `FileWriteTool`, `YamlConfigStore`, `PatchApplier` | 2h | Fixes 6 non-atomic write bugs | +| 2 | Add path containment check in file tools (validate against project root) | 1h | Critical security fix | +| 3 | Fix `OutputTruncator::truncate()` to use `mb_strcut()` instead of `substr()` | 15min | Prevents UTF-8 corruption | +| 4 | Fix tool result ordering in `ToolExecutor` to match original call order | 30min | Fixes LLM confusion | +| 5 | Add `PRAGMA busy_timeout=5000` to Database constructor | 1 line | Fixes concurrent process crashes | +| 6 | Set `maxAttempts` default to 3 in `RetryableLlmClient` or `LlmServiceProvider` | 1 line | Prevents infinite retry loops | +| 7 | Fix `reloadRepository()` to re-merge all YAML layers | 2h | Prevents config loss | +| 8 | Fix audio config to clone LLM client instead of mutating shared singleton | 30min | Prevents all-agent LLM corruption | +| 9 | Add modal mutex in `TuiModalManager` | 1h | Prevents deadlock | + +### P1 — Do Soon (Stability & UX) + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 10 | Consolidate triple 30fps timers into single tick with phase-aware dispatch | 4h | Performance, CPU reduction | +| 11 | Add signal handler in `AgentCommand` for cleanup on SIGINT/SIGTERM | 2h | Prevents resource leaks | +| 12 | Add `TerminalCapabilities` detection class | 3h | Enables light/dark, color depth, Unicode fallbacks | +| 13 | Move 30+ hardcoded ANSI codes to `Theme` palette methods | 4h | Color consistency, maintainability | +| 14 | Add `shell_kill` to read-only mode guard | 5min | Prevents state change in Ask/Plan mode | +| 15 | Fix `collectResult()` to use typed error detection instead of string prefix | 1h | Prevents false negatives | +| 16 | Add streaming output to ANSI renderer | 4h | Major UX improvement | +| 17 | Add `/help` command | 1h | Discoverability | +| 18 | Fix `PrismService` to pass through `reasoningContent` | 30min | Restores thinking content for Anthropic/Gemini | +| 19 | Add periodic cleanup timer for shell sessions | 1h | Prevents session leaks | +| 20 | Add AnsiTheogony skip mechanism (keypress detection) | 2h | UX — no more 80s unskippable animation | + +### P2 — Do Eventually (Code Quality) + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 21 | Split `SettingsCommand` (860 lines) into focused sub-commands | 8h | Maintainability | +| 22 | Split `AnsiTheogony` (2014 lines) into phase classes | 4h | Maintainability | +| 23 | Add integration test suite: agent loop, session persistence, permission flow | 8h | Test confidence | +| 24 | Implement config change notification system (events) | 4h | Settings propagation | +| 25 | Add `lazy()` resolution for LLM singletons to avoid stale config capture | 2h | Config freshness | +| 26 | Extract `wrapAnsiText()` to shared utility | 1h | DRY | +| 27 | Add depth limit to agent tree rendering | 30min | Safety | +| 28 | Cache `hasRipgrep()` result as static | 5min | Performance | +| 29 | Use hash map for `findTool()` instead of linear scan | 15min | Performance | +| 30 | Add `declare(strict_types=1)` to all files missing it | 2h | Type safety | + +--- + +## 13. Subagent Orchestration (Deep) + +**Files**: `src/Agent/SubagentOrchestrator.php` (665 lines), `SubagentFactory.php`, `SubagentTool.php`, `SubagentStats.php` + +#### Critical +- **`yieldSlot`/`reclaimSlot` slot leak for root agent**: Root agent (`id='root'`) never acquires a global semaphore lock. Each `reclaimSlot('root')` consumes a slot permanently. After N calls (concurrency limit), all slots are consumed → deadlock. (`SubagentOrchestrator.php:471-496`) +- **`wouldCreateCycle` crashes on pruned stats**: Accesses `$this->stats[$current]->dependsOn` without existence check. Pruned agents cause TypeError. (`SubagentOrchestrator.php:375`) +- **Shared `ContextBudget` across parent and all children**: All subagents at all depths share the same `ContextBudget` instance. Deep child compaction deducts from root's pool. (`SubagentFactory.php:87`) +- **Shared `ProtectedContextBuilder` — mutable state leak**: Child agents' protected context entries appear in parent's context too. (`SubagentFactory.php:101`) + +#### Important +- **`pruneCompleted` removes agents needed for dependency resolution**: New agents depending on pruned IDs get "Unknown dependency agent" errors. +- **Retry loop holds semaphore slot during delay**: Failing agent blocks a concurrency slot for 30+ seconds per retry. +- **Token double-counting during orchestrator-level retries**: Same stats object accumulates tokens across all retry attempts. Correct for total cost but misleading for per-attempt metrics. +- **`cancelAll()` does not clear `$this->cancellations`**: After cancel, array still references already-cancelled deferreds. + +#### Minor +- `autoIdCounter` not thread-safe (safe under Amp cooperative scheduling but undocumented). +- `extractFailureMessage` doesn't traverse full previous-exception chain. + +--- + +## 14. Error Handling & Resilience + +**Codebase-wide scan of exception patterns, catch blocks, and recovery logic.** + +#### Critical +- **No project-specific exception hierarchy**: Only 2 custom exceptions (`RetryableHttpException`, `IntroSkippedException`). All ~50+ other throws use bare `\RuntimeException` or `\InvalidArgumentException`. No `KosmokratorException` base class. +- **6 silently swallowed exceptions**: `TuiModalManager.php:343`, `TuiToolRenderer.php:363`, `DiffRenderer.php:539`, `UpdateChecker.php:132`, `SkillLoader.php:109`, `RetryableLlmClient.php:81` — all catch `\Throwable` with empty body or return, no logging. +- **Internal error messages leaked to LLM**: `$e->getMessage()` stored as assistant messages at `AgentLoop.php:288,312,518`, `ToolExecutor.php:313`, `AbstractTool.php:35`. No sanitization layer. Raw HTTP status codes, internal paths, provider details visible to the LLM. + +#### Important +- **~25 overly broad `\Throwable` catches**: Should catch specific types. Catches `Error`, `TypeError`, `ParseError` which indicate programming bugs, not runtime failures. +- **Missing exception types for 5+ failure domains**: LLM/API failures, file operations, auth/OAuth, shell sessions, patch parsing vs application. +- **`runHeadless()` has no `finally` block**: Unlike `run()`, headless agent crashes don't reset UI phase. + +#### Minor +- `SafeDisplay::call()` is an excellent pattern — prevents display errors from crashing execution. +- Tool error messages are generally well-crafted and actionable. + +--- + +## 15. Type Safety & PHP 8.4 Patterns + +**Codebase-wide scan of `declare(strict_types)`, return types, PHPStan config, modern PHP patterns.** + +#### Important +- **~20 files missing `declare(strict_types=1)`**: Most critically `AgentLoop.php`, `AsyncLlmClient.php`, all `Tool/Coding/` tools (BashTool, FileWriteTool, FileEditTool, FileReadTool), `Kernel.php`, `PrismService.php`. No dangerous implicit coercions found — all explicit casts — but policy inconsistency. +- **PHPStan level 5** with 30+ ignore rules: Some hide real issues (Container/Application type mismatch). Should target level 7-8. +- **No PHP 8.4 property hooks or asymmetric visibility used**: Project targets `^8.4` but only uses `readonly` and union types. +- **~80 `@var` annotations**: Indicates areas where PHP's type system can't express constraints natively. Consider value objects for common shapes. + +#### Minor +- All non-constructor methods have return type declarations — excellent. +- `mixed` return types only in 4 locations — all acceptable for generic config getters. +- `never` return type unused despite applicable exit() paths in CLI commands. + +--- + +## 16. Kernel Bootstrap & Service Wiring + +**Files**: `bin/kosmokrator`, `src/Kernel.php`, `src/Provider/*.php` + +#### Critical +- **No error handling during boot**: `bin/kosmokrator` has zero try-catch blocks. `Kernel::boot()` doesn't wrap provider loops. Partial initialization on failure. +- **No signal handling anywhere in codebase**: No `pcntl_signal`. Ctrl+C = unclean death — no session save, no DB cleanup, no child process termination. +- **`LlmServiceProvider::registerPrism()` resolves services eagerly**: `PrismManager` and `RelayRegistry` resolved immediately during registration, not lazily. Any construction error is immediately fatal. +- **Undefined env vars silently resolve to empty string**: `${MISSING_KEY}` → `''` instead of `null`. Provider may attempt API calls with empty string as key. +- **No config validation**: `temperature: "warm"` passes through to LLM clients unchecked. + +#### Important +- **Revolt error handler registered last in `boot()`**: Earlier async operations unprotected. +- **`DatabaseServiceProvider::boot()` performs file I/O**: `migrateYamlKeys()` reads/writes YAML during DI boot phase. Side-effect in boot is unexpected and risky. +- **Multiple config keys in code but absent from `kosmokrator.yaml`**: `max_tokens`, `audio_provider`, `audio_model`, `reasoning_effort`, etc. Defaults scattered across codebase. +- **`SettingsManager::reloadRepository()` re-parses all YAML on every write**: I/O-heavy, triggers on every `/set` command. + +#### Minor +- Version resolution uses `shell_exec('git describe')` on every boot — could cache. +- `LaravelApp` (full Application class) used as plain DI container — heavier than needed. +- No scoped/transient bindings — all services are singletons. + +--- + +## Updated Cross-Cutting Themes + +### 8. No Graceful Shutdown (Systemic) +- **No `pcntl_signal` handling anywhere**: Ctrl+C = immediate process death. +- No `finally` blocks in `runHeadless()`. +- No shutdown handlers for shell sessions. +- No `__destruct()` on resource-heavy services. +- **Fix**: Add `pcntl_signal(SIGINT, ...)` handler in `Kernel::boot()` that triggers coordinated cleanup. + +### 9. Exception Hygiene (Codebase-wide) +- Only 2 custom exceptions in 277 files. +- 6 silently swallowed `\Throwable` catches. +- Raw `$e->getMessage()` leaked to LLM in 5+ locations. +- ~25 overly broad catches that mask programming bugs. +- **Fix**: Create `KosmokratorException` hierarchy with 5-8 domain-specific types. Add error sanitization layer before LLM-facing messages. + +### 10. Shared Mutable State in Subagent Tree +- `ContextBudget` shared across all agent depths. +- `ProtectedContextBuilder` shared — child mutations leak to parent. +- `yieldSlot`/`reclaimSlot` slot leak for root agent. +- **Fix**: Clone these objects per-subagent rather than sharing references. + +--- + +## Updated Refactoring Backlog + +### P0 — Add to existing P0 list + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 31 | Fix `yieldSlot`/`reclaimSlot` for root agent: skip slot management for depth 0 | 1h | Prevents concurrency slot leak → deadlock | +| 32 | Clone `ContextBudget` and `ProtectedContextBuilder` per subagent | 2h | Prevents cross-agent context pollution | +| 33 | Add `KosmokratorException` base class + 5 domain subtypes | 3h | Enables proper catch granularity | +| 34 | Add error sanitization before LLM-facing messages | 2h | Prevents internal info leakage to LLM | +| 35 | Wrap `ensureSchema()` in transaction + add UNIQUE on schema_version | 30min | Prevents migration re-run bugs | +| 36 | Add `pcntl_signal` handler in Kernel for graceful shutdown | 4h | Systemic fix for resource leaks | + +### P1 — Add to existing P1 list + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 37 | Add existence check in `wouldCreateCycle` for pruned stats | 15min | Prevents TypeError crash | +| 38 | Log in all 6 silent `\Throwable` catches | 1h | Makes debugging possible | +| 39 | Bump PHPStan from level 5 to level 7 | 4h | Catches more type issues | +| 40 | Add `declare(strict_types=1)` to 20 missing files | 1h | Policy consistency | +| 41 | Add `pruneCompleted()` guard against in-use stats | 2h | Prevents "unknown dependency" errors | + +--- + +## Final Statistics + +| Dimension | Agents | Sub-agents | Critical | Important | Minor | +|-----------|--------|------------|----------|-----------|-------| +| AgentLoop Core | 1 | 4 | 4 | 11 | 5 | +| Subagent Orchestration | 1 | 4 | 5 | 6 | 4 | +| TUI Renderer | 1 | 5 | 8 | 15 | 8 | +| ANSI Renderer & Markdown | 1 | 4 | 2 | 8 | 8 | +| Tool System & Permissions | 1 | 4 | 5 | 9 | 8 | +| LLM Client Layer | 1 | 4 | 3 | 8 | 4 | +| Session & Database | 2 | 8 | 10 | 16 | 12 | +| Commands & Slash Commands | 1 | 4 | 4 | 11 | 9 | +| Settings & Configuration | 1 | 3 | 6 | 8 | 5 | +| Diff & UI Display | 1 | 3 | 3 | 10 | 6 | +| Power Commands & UX | 1 | 4 | 3 | 12 | 8 | +| Testing Coverage | 1 | 4 | 4 | 5 | 5 | +| Error Handling | 1 | 4 | 3 | 2 | 2 | +| Type Safety | 1 | 3 | 0 | 3 | 4 | +| Kernel Bootstrap | 1 | 3 | 5 | 4 | 3 | +| **Total** | **16** | **~62** | **65** | **128** | **91** | + +--- + +*Audit completed 2026-04-04. Generated by 16 parallel exploration agents spawning ~62 sub-agents across 20 audit dimensions. 284 total findings.* diff --git a/docs/ecosystem/kosmokrator/proposals/command-inspiration.md b/docs/ecosystem/kosmokrator/proposals/command-inspiration.md new file mode 100644 index 0000000..35ee4a7 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/command-inspiration.md @@ -0,0 +1,65 @@ +# Command Inspiration — From oh-my-claudecode + +Audit of oh-my-claudecode's 31 slash commands compared against KosmoKrator's existing 22. Candidates for future implementation, grouped by priority. + +## High-Value Additions + +### /autopilot +Full autonomous pipeline from idea to verified working code. 5 phases: Expand (clarify requirements) → Plan (architecture) → Execute (write code) → QA (test + verify) → Validate (acceptance criteria). User kicks it off and walks away. + +### /ralph (persistence loop) +"The boulder never stops." Keeps retrying a task until verified complete. PRD-driven verification — after each attempt, checks acceptance criteria. Mandatory architect review before marking done. Max retry limit prevents infinite loops. Named after Sisyphus's boulder — fits the mythology theme. + +### /trace +Evidence-driven investigative debugging. Generates competing hypotheses for a bug, ranks them by evidence weight, then runs discriminating probes (targeted searches/tests) to narrow down the root cause. Structured output: ranked explanations with confidence scores. + +### /deep-interview +Socratic requirements gathering before expensive work. Asks probing questions, scores ambiguity mathematically across dimensions (Goal, Constraints, Criteria, Context). Won't proceed until ambiguity drops below threshold (~20%). Uses challenge agents: Contrarian (pokes holes), Simplifier (finds simpler approaches), Ontologist (clarifies terms). Prevents wasted swarm runs on vague requests. + +### /deslop +AI slop cleaner — regression-safe deletion-first cleanup of AI-generated bloat. Reviews code for: unnecessary abstractions, over-engineering, dead code, excessive comments, unused error handling. Deletion-first workflow: remove before rewriting. Optional reviewer-only mode (reports but doesn't change). Natural complement to /unleash — clean up after the swarm. + +### /deepinit +One-shot comprehensive codebase documentation generator. Crawls entire project, generates hierarchical AGENTS.md-style docs across all directories. Useful for onboarding new contributors or giving AI agents better context. + +## Medium-Value Additions + +### /team +Staged pipeline with named roles: team-plan → team-prd → team-exec → team-verify → team-fix. Each stage is a specialized agent with handoff documents preserving decisions, alternatives, and risks between stages. Inter-agent messaging for coordination. + +### /ultraqa +Autonomous QA cycling: run tests → analyze failures → fix → re-run → verify. Repeats up to 5 cycles or until all tests pass. Useful after large refactors or /unleash runs. + +### /doctor +Self-diagnostic command. Checks: PHP version, extensions, config validity, provider API keys, database connectivity, TUI availability, dependency versions. Reports issues with suggested fixes. + +### /cancel +Gracefully cancel any active mode (autopilot, ralph, unleash) with intelligent state cleanup. Auto-detects what's running and tears it down properly. + +### /learner +Extract a reusable debugging pattern or technique from the current conversation. Quality-gated: only saves if the pattern is generalizable. Stores as a "skill" that can be referenced in future sessions. + +## Already Covered by KosmoKrator + +| OMC Command | KosmoKrator Equivalent | +|---|---| +| /plan, /ralplan | /plan (consensus planning could be added) | +| /ask | /ask | +| /setup | `kosmokrator setup` | +| /hud | Built-in status bar | +| /cancel | Ctrl+C cascading cancellation | +| /ultrawork (parallel execution) | /unleash + SubagentOrchestrator | +| /skill (memory/patterns) | /memories system | +| /external-context | Memory search + file tools | + +## OMC Patterns Worth Noting + +- **Mathematical ambiguity gating** — weighted scoring before execution prevents wasted work on vague requests +- **Handoff documents** — structured context preservation between pipeline stages (decisions, alternatives, risks) +- **Challenge agents** — Contrarian/Simplifier/Ontologist roles that stress-test plans before execution +- **Consensus planning** — Planner/Architect/Critic loop produces better plans than single-agent planning +- **Magic keywords** — trigger commands without `/` prefix (e.g. typing "autopilot" activates the pipeline) + +## Source + +Analysis based on: `/tmp/oh-my-claudecode/skills/*/SKILL.md` diff --git a/docs/ecosystem/kosmokrator/proposals/context-compaction.md b/docs/ecosystem/kosmokrator/proposals/context-compaction.md new file mode 100644 index 0000000..a4d5e48 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/context-compaction.md @@ -0,0 +1,142 @@ +# Context Compaction + +> Status: Historical plan. Parts of this design are now implemented, but this document remains a design snapshot rather than the canonical current-state description. + +## Context + +Long coding sessions hit the LLM context window limit. Currently `ConversationHistory::trimOldest()` silently drops complete turns — the agent loses context without knowing what was lost. We need intelligent compaction that summarizes old turns before discarding them, preserving critical context in a compressed form. + +Depends on: **Session Persistence (SQLite)** — compaction should be non-destructive, with original messages preserved in the database. + +## Two Triggers + +1. **Automatic** — After each LLM response, check `promptTokens >= contextWindow - buffer`. Buffer defaults to 20K tokens (configurable). Replaces `trimOldest()`. +2. **Manual** — `/compact` slash command for user-initiated compaction. + +## Algorithm + +``` +1. Check threshold: promptTokens >= (contextWindow - buffer) +2. Split history into OLD (everything before last 2-3 turns) and RECENT (preserved) +3. Prune: truncate large tool outputs (>1000 chars) in OLD to "[output truncated — N chars]" +4. Send OLD messages to LLM with compaction prompt +5. Replace OLD messages with a single SystemMessage containing the summary +6. Mark original messages as compacted in SQLite (non-destructive) +7. Continue — agent sees summary + recent turns +``` + +### Context After Compaction + +``` +[system prompt + instructions + environment] +[SystemMessage: summary of turns 1-15] <-- compacted +[user turn 16] <-- preserved (recent) +[assistant turn 16 + tool results] <-- preserved (recent) +[user turn 17] <-- current +``` + +### Compaction Prompt + +``` +Summarize the conversation above for a continuation agent. +Focus on information needed to continue the work seamlessly. + +Use this structure: +--- +## Goal +[What the user is trying to accomplish] + +## Key Decisions +[Important technical choices, constraints, user preferences] + +## Accomplished +[Work completed — specific file paths and changes made] + +## In Progress +[Current task and what remains to be done] + +## Relevant Files +[Files read, edited, or created — with brief notes on each] +--- +``` + +Compaction uses the same LLM client, no tools. The compaction agent is a hidden internal call. + +### Fallback + +If compaction itself overflows (conversation too large even for the summary call), fall back to `trimOldest()` as a last resort and log a warning. + +## Architecture + +### New: `src/Agent/ContextCompactor.php` + +```php +class ContextCompactor +{ + public function __construct( + private LlmClientInterface $llm, + private ModelCatalog $models, + private LoggerInterface $log, + private int $bufferTokens = 20_000, + ) {} + + public function needsCompaction(int $promptTokens, string $model): bool; + public function compact(ConversationHistory $history, int $keepRecent = 3): string; // returns summary +} +``` + +- `needsCompaction()` — checks threshold against context window +- `compact()` — builds the compaction prompt, calls LLM, returns summary text +- History replacement handled by `ConversationHistory::compact()` + +### Modified: `src/Agent/ConversationHistory.php` + +```php +public function compact(string $summary, int $keepRecent = 3): void; +// Replaces messages[0..n-keepRecent] with a SystemMessage containing the summary +// With SQLite: marks old messages as compacted, stores summary as a new message +``` + +### Modified: `src/Agent/AgentLoop.php` + +After each `run()` response: +```php +if ($this->compactor->needsCompaction($response->promptTokens, $this->getModelName())) { + $summary = $this->compactor->compact($this->history); + $this->ui->showNotice('Context compacted.'); +} +``` + +### Modified: `src/Command/AgentCommand.php` + +Add `/compact` slash command that triggers manual compaction. + +### Config + +```yaml +kosmokrator: + compaction: + auto: true # Enable automatic compaction + buffer: 20000 # Token buffer to reserve + keep_recent: 3 # Number of recent turns to preserve +``` + +## Differences from OpenCode + +| Aspect | OpenCode | KosmoKrator | +|--------|----------|-------------| +| Storage | SQLite, part-based | SQLite (once persistence added) | +| Pruning | Separate reversible pass | Inline truncation during compaction | +| Post-compact | Synthetic "continue" message | Normal flow continues | +| Summary stacking | Multiple summaries chain | One summary replaces all old | +| Destructive | No (DB keeps originals) | No (DB keeps originals, once SQLite added) | +| Fallback | Error on double-overflow | `trimOldest()` on double-overflow | + +## Verification + +1. Start a long session, watch token count climb in status bar +2. When threshold hit, auto-compaction fires — notice shown, status bar drops +3. Agent continues seamlessly — knows what was discussed +4. `/compact` works manually at any time +5. Summary includes file paths, decisions, and current task +6. Original messages preserved in SQLite (can be viewed later) diff --git a/docs/ecosystem/kosmokrator/proposals/context-management-redesign.md b/docs/ecosystem/kosmokrator/proposals/context-management-redesign.md new file mode 100644 index 0000000..12c32e8 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/context-management-redesign.md @@ -0,0 +1,705 @@ +# Context Management Redesign + +> Status: Proposal. This document expands the current context-management roadmap using patterns observed in `tmp/codex`, `tmp/claude-src`, `tmp/oh-my-claudecode`, and `tmp/opencode`. + +This is a forward-looking design document. It describes improvements beyond the current shipped pipeline and includes both recommended changes and optional experiments. + +## Why This Exists + +KosmoKrator already ships a layered context pipeline: + +- output truncation +- tool-result deduplication +- pruning +- LLM compaction +- oldest-turn trimming fallback + +That baseline works, but it still has structural weaknesses: + +- compaction boundaries are computed independently in multiple places +- subagents use a weaker context policy than the main agent +- token budgeting is coarse +- compaction produces a flat summary but does not preserve protected operating context as a first-class structure +- persistent memories exist, but recall remains fairly primitive + +The projects under `tmp/` show several stronger patterns: + +- explicit replacement-history compaction instead of summary-only compaction +- effective-context budgeting with reserved output headroom +- lightweight micro-pruning before expensive compaction +- tiered memory and selective recall +- transcript/session recall outside the live prompt +- stronger subagent-specific overflow handling +- better observability and failure guards + +## Scope + +This document covers all major ideas surfaced during the comparative review, not only the immediately recommended ones: + +1. unified compaction planning and replacement history +2. effective-context budgeting +3. protected context reinjection after compaction +4. micro-pruning before full compaction +5. truncation storage for oversized tool outputs +6. tiered persistent memory +7. selective memory recall +8. session-history recall/search +9. subagent-specific context policy +10. failure guards and circuit breakers +11. context-health observability +12. optional advanced heuristics and experiments + +## Current-State Problems + +### 1. Boundary Drift + +Compaction currently decides what to replace in more than one place: + +- `src/Agent/ContextCompactor.php` +- `src/Agent/ConversationHistory.php` +- `src/Session/SessionManager.php` + +This means in-memory replacement and persisted compaction can diverge if the rules change in one place but not another. + +### 2. Headless/Subagent Degradation + +The main interactive flow can compact. Headless flows only trim oldest turns. Subagents therefore have the least durable context policy even though they often do the most tool-heavy work. + +### 3. Coarse Token Estimation + +Current estimation is based on a flat character heuristic. That is good enough for rough checks but too weak for accurate budgeting around: + +- large tool outputs +- JSON-heavy tool calls +- code vs prose +- reserved output tokens +- model switches to smaller windows + +### 4. Flat Summary Replacement + +Compaction currently replaces old context with a single summary message. It does not explicitly preserve: + +- active mode +- current task tree +- current environment snapshot +- current parent brief for subagents +- any protected operator directives + +These may survive in practice, but they are not guaranteed. + +### 5. Memory Exists, Recall Is Underspecified + +KosmoKrator can persist memories, including memories derived from compaction summaries, but it does not yet separate memory classes cleanly or use a bounded relevance-selection flow. + +## External Patterns Worth Borrowing + +### Codex + +Observed in `tmp/codex/codex-rs`: + +- compaction creates explicit replacement history, not only a summary +- protected initial context can be re-injected around compaction +- compaction can trim oldest items during the compaction attempt itself if the compaction prompt overflows +- context limits are based on model metadata, not one global rule + +### Claude Code + +Observed in `tmp/claude-src`: + +- effective context window reserves output headroom +- warning, error, auto-compact, and blocking thresholds are distinct +- microcompact removes low-value tool payloads before full compaction +- memory recall scans cheap headers first and then selects top relevant files +- repeated auto-compact failure uses circuit breakers +- context-management health is surfaced to the user + +### oh-my-claudecode + +Observed in `tmp/oh-my-claudecode`: + +- notepad tiers: always-loaded context, working memory, and manual memory +- pre-compact reinjection of small project memory and directives +- session-history search over summaries and transcripts +- pending context injection queues for one-shot reinsertion + +### OpenCode + +Observed in `tmp/opencode`: + +- prune before full compaction +- full oversized tool output can be written to disk while only a preview remains inline +- compaction can replay a user turn in overflow scenarios +- plugin hooks can augment compaction prompts + +## Goals + +### Primary Goals + +- preserve continuity through long sessions without silent context loss +- make compaction deterministic and persistence-safe +- reduce unnecessary full compactions +- keep subagents viable in long-running trees +- improve cross-session recall without bloating the live prompt + +### Secondary Goals + +- improve user visibility into context health +- make behavior tunable per model +- leave room for experiments without destabilizing the core agent loop + +### Non-Goals + +- perfect token accounting matching provider internals exactly +- replacing live conversation with an external database-first retrieval system +- introducing a vector database or heavy semantic indexing in the first pass + +## Proposed Architecture + +### 1. Unified Compaction Plan + +Introduce a first-class `CompactionPlan` or `CompactionResult` object. Instead of each layer recomputing boundaries, one planner computes the exact replacement once and all consumers use that result. + +Suggested shape: + +```php +final class CompactionPlan +{ + public function __construct( + public readonly int $keepFromMessageIndex, + public readonly array $keptMessageIds, + public readonly array $compactedMessageIds, + public readonly string $summary, + public readonly array $replacementMessages, + public readonly array $extractedMemories, + public readonly int $tokensIn, + public readonly int $tokensOut, + public readonly array $stats, + ) {} +} +``` + +Responsibilities: + +- `ContextCompactor` computes the plan +- `ConversationHistory` applies `replacementMessages` +- `SessionManager` persists the exact `compactedMessageIds` and summary from the plan +- observability reads `stats` instead of re-deriving them + +Benefits: + +- removes duplicated boundary logic +- makes compaction persistence-safe +- allows richer replacement than a single summary message +- makes testing easier + +### 2. Replacement History, Not Only Summary + +Compaction should produce a replacement history that may contain: + +- one summary system message +- one protected reinjection block +- optionally one compact memory block +- then the recent untouched turns + +Instead of: + +```text +[summary] +[recent turns] +``` + +Prefer: + +```text +[protected operating context] +[summary of compacted history] +[selected recalled memory or pending brief] +[recent turns] +``` + +This follows the stronger Codex pattern and reduces accidental instruction loss. + +### 3. Effective Context Budgeting + +Replace a single percent-of-window rule with a richer model. + +Suggested per-model configuration: + +```yaml +agent: + context: + reserve_output_tokens: 16000 + warning_buffer_tokens: 24000 + auto_compact_buffer_tokens: 12000 + blocking_buffer_tokens: 3000 + auto_compact_enabled: true +``` + +Derived values: + +- `effective_context_window = model_context_window - reserve_output_tokens` +- `warning_threshold = effective_context_window - warning_buffer_tokens` +- `auto_compact_threshold = effective_context_window - auto_compact_buffer_tokens` +- `blocking_threshold = effective_context_window - blocking_buffer_tokens` + +Expected behavior: + +- warning state before auto-compact +- proactive micro-prune before full compaction +- hard-stop or forced emergency compaction near blocking +- recompute thresholds when switching models + +### 4. Improved Token Estimation + +Token estimation does not need to be exact, but it should be more structured. + +Suggested improvements: + +- separate estimation for prose, code, JSON, tool calls, and tool results +- conservative padding factor on rough estimates +- count system prompt, task tree, environment context, and injected memories explicitly +- track recent observed prompt-token deltas from provider responses and use them to calibrate future estimates + +Optional extension: + +- maintain lightweight rolling correction factors per provider/model pair + +### 5. Protected Context Reinjection + +After compaction, re-inject a small protected block that does not depend on the summary prompt remembering everything. + +Candidate contents: + +- current agent mode +- current cwd and repo root +- current branch if available +- active task tree +- current user constraints and instructions that must survive +- current parent brief for subagents +- current permission mode + +This block should be small, normalized, and rebuilt from runtime state rather than conversation text. + +Suggested class: + +```php +final class ProtectedContextBuilder +{ + public function buildMainAgentContext(...): array; + public function buildSubagentContext(...): array; +} +``` + +### 6. Micro-Pruning Before Full Compaction + +Add a cheap, deterministic pass before LLM compaction. + +Micro-prune targets: + +- old tool results +- old media/document payloads +- stale repeated file reads +- superseded grep/glob/search output +- tool results already represented by newer richer reads + +Progression: + +1. deduplicate +2. supersede stale reads +3. prune old low-value tool outputs +4. if still near limit, compact +5. if compaction fails, emergency trim or replay strategy + +This should be available in both interactive and headless flows. + +### 7. Progressive Tool Result Replacement + +Do not use only one placeholder shape. Use multiple progressively richer replacement formats depending on policy: + +- cleared: + `[Old tool result content cleared]` +- superseded: + `[Superseded by later file_read of /path/Foo.php]` +- structural summary: + `[file_read /src/Foo.php, 245 lines, class Foo with methods bar() and baz()]` +- truncation pointer: + `[Full output saved to .kosmokrator/truncation/tool_123; preview kept inline]` + +This preserves more semantic value than a uniform tombstone string. + +### 8. Truncation Storage for Oversized Outputs + +When a tool result is too large: + +- keep a bounded inline preview +- save the full payload to a local truncation store +- inject a pointer and usage hint +- let the agent or subagent inspect slices later using targeted reads/search + +Potential local storage: + +```text +.kosmokrator/truncation/ +``` + +Benefits: + +- keeps the live prompt compact +- preserves recoverability +- works well with grep/read-offset tools +- reduces pressure to keep huge shell and file-read output in memory + +### 9. Tiered Memory Model + +Split memory into three classes. + +#### Priority Context + +Always loaded. Very small. High-confidence durable constraints. + +Examples: + +- repository-specific invariants +- critical user workflow preferences +- known project hazards + +#### Working Memory + +Session-local or short-lived notes. Auto-pruned by age or staleness. + +Examples: + +- current investigation state +- active hypotheses +- recent but not durable discoveries + +#### Durable Memory + +Cross-session project, user, and decision memories. + +Examples: + +- architecture facts not obvious from code +- repeated user preferences +- prior technical decisions and rationale + +This is a stronger replacement for a single undifferentiated memory bucket. + +### 10. Selective Memory Recall + +Do not inject all memories. Add a bounded relevance-selection step. + +Flow: + +1. scan memory metadata cheaply +2. exclude already surfaced memories +3. exclude noisy reference material for tools already active +4. select top `K` memories for the current task +5. inject only short rendered snippets + +Implementation options: + +- start with SQLite metadata scan plus heuristic ranking +- optionally use a lightweight side-query model later + +Heuristic ranking signals: + +- memory type weight +- keyword overlap with user request and task tree +- recency or freshness +- prior usefulness +- explicit user pinning + +### 11. Session-History Recall/Search + +Move older context recovery out of the live prompt and into targeted recall. + +Capabilities: + +- search prior session titles +- search compaction summaries +- search prior full transcripts for the same project +- search prior subagent summaries + +This supports: + +- resuming interrupted work +- recovering prior decisions without keeping them resident +- starting a fresh thread with good recall + +Potential user-facing features: + +- `/recall ` +- `/sessions search ` +- automatic recall suggestions on `/resume` + +### 12. Subagent-Specific Context Policy + +Subagents should not share the exact same thresholds as the main agent. + +Subagent policy should include: + +- smaller effective context windows +- aggressive micro-pruning +- protected parent brief injected as a compact block +- compact-or-prune behavior in headless mode, not trim-only +- circuit breaker on repeated compaction failures + +Suggested rule of thumb: + +- main agent optimizes for continuity and broad recall +- subagents optimize for narrow task focus and fast turnover + +### 13. Failure Guards and Circuit Breakers + +Repeated auto-compaction failure should not thrash the model or the UI. + +Track: + +- consecutive compaction failures +- consecutive context-overflow errors +- emergency trims performed +- last successful compaction point + +Suggested behavior: + +- first failure: retry with more aggressive micro-prune +- second failure: compact with a smaller protected set +- third failure: enter circuit-breaker mode and stop automatic retries for a period +- expose the state to the user + +### 14. Compaction Prompt Extensibility + +Allow the compaction prompt to be augmented by internal providers or future plugin hooks. + +Possible uses: + +- domain-specific file summaries +- language-aware structural extraction +- project-specific compaction hints +- excluding noisy tool families + +This should be optional. The base compaction path must remain stable without external hooks. + +### 15. Replay-Aware Overflow Recovery + +When overflow is severe, consider replaying the current user turn against a freshly compacted history instead of repeatedly trimming the live thread. + +Use carefully: + +- useful when the latest turn is the important one +- dangerous if it hides prior context loss + +This is an optional advanced path, not a first-pass requirement. + +### 16. Background Consolidation + +Add a low-priority background process that periodically consolidates working memory into durable memory or small priority notes. + +Triggers may include: + +- idle time +- session count +- elapsed wall time +- after successful compaction + +Guardrails: + +- lock to avoid concurrent consolidators +- strict size budgets +- skip while the main loop is under active context pressure + +### 17. Context-Health Observability + +Expose context health explicitly in the UI and logs. + +Metrics to surface: + +- estimated prompt usage +- effective context window +- warning and compact thresholds +- tokens saved by dedup, prune, and compaction +- last compaction summary length +- consecutive compaction failures +- whether protected reinjection was applied +- memory items injected this turn + +Possible surfaces: + +- status bar +- `/context` or `/debug context` +- log events +- subagent dashboard integration + +## Optional Advanced Heuristics + +These are valuable, but should remain experimental until the deterministic foundation is stable. + +### 1. Semantic Importance Scoring + +Score tool results by importance and prune the lowest-value outputs first. + +Signals: + +- reference density +- decision influence +- tool-type weight +- downstream dependency + +This idea already exists in `docs/proposals/context-management-strategies.md` and remains compatible with this redesign. + +### 2. Sliding Context Tiers + +Apply different fidelity rules by age: + +- last 2 turns: full fidelity +- turns 3 to 5: summarized tool results +- turns 6+: cleared or superseded outputs + +This gives smoother degradation than a single hard compaction boundary. + +### 3. File Content Cache + +Cache file reads by `(path, mtime)` and replace repeated large reads with references rather than full content. + +### 4. Session Branching + +Let the user fork a long session into a fresh thread seeded by summary plus protected context. + +### 5. Model-Switch Compaction + +When switching to a smaller-window model, proactively compact before the next turn rather than waiting for an overflow condition. + +## Proposed Components + +### New or Expanded Runtime Components + +- `ContextBudget` + - computes effective windows and thresholds +- `CompactionPlanner` + - computes one `CompactionPlan` +- `ProtectedContextBuilder` + - builds non-conversational protected context blocks +- `MicroPruner` + - cheap deterministic context reduction +- `TruncationStore` + - persists oversized outputs for later targeted inspection +- `MemorySelector` + - bounded recall over stored memories +- `SessionRecall` + - search interface over summaries and transcript metadata +- `ContextTelemetry` + - status and observability layer + +### Existing Components To Refactor + +- `ContextManager` + - orchestrates threshold checks and policy decisions +- `ContextCompactor` + - becomes planner plus summarizer instead of summary-only helper +- `ConversationHistory` + - applies replacement history from a plan instead of recomputing a boundary +- `SessionManager` + - persists plan outputs directly +- `TokenEstimator` + - upgraded or wrapped by `ContextBudget` +- `SubagentFactory` + - provides headless agents with a real context policy + +## Data Model Changes + +Potential persistence additions: + +- compaction records table or extended message metadata: + - compacted message ids + - summary text + - saved tokens + - failure count + - protected-context metadata +- memory metadata: + - class: `priority`, `working`, `durable` + - pinned flag + - last surfaced time + - freshness score +- truncation store metadata: + - path + - source tool + - byte size + - retention expiry + +## Suggested Rollout Phases + +### Phase 1: Deterministic Foundation + +- unify compaction planning +- remove duplicated boundary logic +- add effective-context budgeting +- expose context-health metrics internally + +### Phase 2: Cheap Context Wins + +- strengthen micro-pruning +- add richer supersede placeholders +- add truncation storage +- enable better headless/subagent policy + +### Phase 3: Continuity and Recall + +- protected context reinjection +- tiered memory model +- selective memory recall +- session-history recall/search + +### Phase 4: Advanced Behaviors + +- circuit breakers +- replay-aware overflow recovery +- background consolidation +- prompt hooks +- semantic importance scoring +- sliding context tiers + +## Tradeoffs + +### Benefits + +- more reliable long-session continuity +- lower chance of drift between in-memory and persisted state +- fewer unnecessary LLM compaction calls +- better subagent stability +- better cross-session recall + +### Costs + +- more moving parts in the agent loop +- more metadata to persist and test +- more policy complexity per model and per agent type +- more UI/state concepts for debugging + +### Main Risk + +The main risk is overengineering before the deterministic base is fixed. The correct order is: + +1. unify compaction planning +2. improve budgeting and pruning +3. add reinjection and recall +4. add heuristic and background systems + +## Recommended First Implementation Slice + +Even though this document covers the full idea set, the best first slice is still: + +1. `CompactionPlan` as the single source of truth +2. effective-context budgeting +3. micro-prune in both main and headless flows +4. protected context reinjection + +That sequence improves correctness first and opens the door for the rest. + +## Relationship to Existing Docs + +- `docs/architecture/overview.md` remains the current-state document +- `docs/proposals/context-compaction.md` is a historical snapshot of the first compaction design +- `docs/proposals/context-management-strategies.md` remains a useful experimental appendix for heuristics like semantic importance scoring and sliding tiers + +This document is intended to become the main future-state reference for context-management redesign work. diff --git a/docs/ecosystem/kosmokrator/proposals/context-management-strategies.md b/docs/ecosystem/kosmokrator/proposals/context-management-strategies.md new file mode 100644 index 0000000..1668cb0 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/context-management-strategies.md @@ -0,0 +1,49 @@ +# Context Management Strategies + +> Status: Proposal. This document describes possible future improvements beyond the context-management pipeline that currently ships. + +Future improvements to KosmoKrator's context management beyond the current three-layer system (truncation → pruning → compaction). + +## 1. Semantic Importance Scoring + +Score each tool result (0.0–1.0) to decide what to prune first. Four signals, no LLM call: + +- **Reference density** — How much of the tool result did the assistant actually quote/use in its response? Split result into lines, count how many appear in the assistant's text. High overlap = load-bearing. +- **Decision influence** — Did the assistant make a decision citing this result? Detect decision language ("the issue is", "I'll use", "based on") + file path/tool name in the following assistant message. +- **Tool type weight** — Static: `bash` 0.7 (irreproducible), `grep` 0.5 (re-searchable), `file_read` 0.3 (on disk), `glob` 0.1 (trivial to redo), `file_write`/`file_edit` 0.2 (just confirmations). +- **Downstream dependency** — Did values from this result appear in arguments of later tool calls? (grep finds path → file_read uses that path). Breaking the chain loses reasoning context. + +Combined score: `0.3 × reference + 0.25 × decision + 0.25 × type + 0.2 × dependency` + +Pruner sorts candidates by score ascending, prunes lowest-value first until it hits the savings target. High-importance results survive even if old. + +## 2. Tool Result Deduplication + +The LLM frequently re-reads the same file (read → edit → read to verify). Each re-read dumps redundant content into context. + +Three tiers: + +- **Exact duplicate** — Same tool + same args + same result → replace older with `[superseded — same content returned by later call]` +- **Same-file re-read** — `file_read` same path, different offset/limit or after `file_edit` on that path → old content is stale, supersede it +- **Semantic overlap** — `grep` returns lines from `foo.php`, then `file_read foo.php` returns those same lines plus more → grep result is now a subset, replace with `[content included in later file_read of foo.php]` + +Runs eagerly after each tool call (before adding to history, scan backwards for matches). Detection is a hash lookup + string comparison — microseconds. + +The supersede message preserves the *fact* that the read happened (the LLM knows the file was relevant) without the *content* (which exists in the newer result). + +### How They Combine + +Dedup runs first as a cheap pass (always safe). Then importance scoring handles the rest — pruner removes lowest-scored results first. Together they form a priority queue: + +1. Duplicates → always prune +2. Low-importance results (low reference density, no decision influence) → prune when over budget +3. High-importance results → survive until compaction +4. Protected recent turns (last 2 user messages) → never pruned + +## 3. Other Ideas (Not Yet Designed) + +- **Progressive summarization** — Instead of `[cleared]`, replace with a heuristic summary: `[file_read /src/Foo.php: 245 lines, PHP class with methods bar(), baz()]`. No LLM call, just structural extraction. +- **Pre-flight context budget** — Before sending to LLM, estimate prompt size via `TokenEstimator` and proactively prune/compact. Avoids wasted API calls. +- **Sliding context tiers** — Last 2 turns: full fidelity. Turns 3-5: tool results summarized. Turns 6+: tool results cleared, assistant responses truncated. Graceful degradation instead of a cliff. +- **File content caching** — Store file reads in a local cache keyed by `path:mtime`. Replace tool result with compact reference. Re-read from cache instead of re-reading from disk. +- **Session branching** — `/branch` snapshots the current session and starts fresh with just a summary. Old session preserved intact and resumable. diff --git a/docs/ecosystem/kosmokrator/proposals/desktop-app.md b/docs/ecosystem/kosmokrator/proposals/desktop-app.md new file mode 100644 index 0000000..6b51825 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/desktop-app.md @@ -0,0 +1,296 @@ +# KosmoKrator Desktop App + +> Status: Proposal. This document describes a possible future desktop surface. The current shipped product is the terminal application. + +## Concept + +KosmoKrator is one product with two surfaces: terminal and desktop. The desktop app is not a companion or wrapper — it runs the same engine (AgentLoop, PrismService, ToolRegistry, Lua bridge, MCP client) with a GUI renderer instead of ANSI/TUI. + +``` + KosmoKrator (the engine) + ├── Kernel, ConfigLoader + ├── AgentLoop, PrismService + ├── ToolRegistry, Lua bridge + ├── MCP client + └── Integration loader + │ + ┌──────────┴──────────┐ + │ │ + CLI surface Desktop surface + (bin/kosmokrator) (NativePHP app) + │ │ + Symfony Console Electron window + + TUI renderer + web renderer + │ │ + terminal system tray + on-demand always-on + notifications + OAuth flows + global shortcuts +``` + +The split happens at the UI layer. `RendererInterface` already abstracts rendering — `AnsiRenderer`, `TuiRenderer`, and the desktop app adds a third: a web-based renderer that pushes events to the Electron frontend. + +--- + +## Why NativePHP + +NativePHP wraps a Laravel app in Electron (desktop) or native shells (mobile). It bundles its own static PHP binary — users install one app, no PHP or Node required. + +KosmoKrator's engine already boots an Illuminate Container (via Prism's transitive `laravel/framework` dependency). The desktop surface wraps this in a minimal Laravel HTTP layer that NativePHP can host, while the core engine remains framework-agnostic. + +**NativePHP provides what terminals and browsers can't:** + +| Capability | Terminal | Browser | Desktop (NativePHP) | +|-----------|----------|---------|---------------------| +| System tray (always-on) | No | No | Yes | +| Native notifications | No | Limited | Yes | +| Global shortcuts | No | No | Yes | +| OAuth redirect flows | Painful (copy-paste) | Callback URL complexity | Native redirect URI | +| File dialogs | CLI path input | Browser picker | Native OS picker | +| Deep linking | No | URL only | Custom protocol (`kosmokrator://`) | +| Auto-updater | Manual | N/A | Built-in OTA | +| Offline-first | Yes | No | Yes | + +--- + +## Architecture + +### Shared Core (framework-agnostic) + +``` +src/ +├── Kernel.php # Boots Illuminate Container + config +├── ConfigLoader.php # YAML → Config Repository +├── Agent/ +│ ├── AgentLoop.php # Core loop: prompt → LLM → tools → loop +│ ├── ConversationHistory.php +│ └── Middleware/ +├── LLM/ +│ └── PrismService.php # Prism wrapper, provider failover +├── Tool/ +│ ├── ToolInterface.php +│ ├── ToolRegistry.php +│ └── Coding/ # Built-in tools +├── Lua/ +│ ├── LuaSandboxService.php +│ ├── LuaBridge.php +│ └── LuaApiDocGenerator.php +├── Mcp/ +│ └── McpClient.php +├── Integration/ +│ ├── IntegrationLoader.php +│ └── YamlCredentialResolver.php +└── Session/ + ├── Session.php + └── SessionStore.php +``` + +This is the engine. It has no opinion about rendering. + +### CLI Surface (Symfony Console) + +``` +bin/kosmokrator +src/ +├── Command/ +│ └── AgentCommand.php # REPL loop +└── UI/ + ├── RendererInterface.php + ├── UIManager.php + ├── Ansi/AnsiRenderer.php + └── Tui/TuiRenderer.php +``` + +### Desktop Surface (NativePHP + Electron) + +``` +desktop/ +├── app/ +│ ├── Providers/ +│ │ └── NativeAppServiceProvider.php # NativePHP window, menu, tray +│ ├── Http/ +│ │ └── Controllers/ +│ │ └── AgentController.php # WebSocket bridge to AgentLoop +│ └── Renderers/ +│ └── WebRenderer.php # RendererInterface → WebSocket events +├── resources/ +│ ├── views/ # Blade/Vue frontend +│ └── js/ +│ ├── app.js +│ └── components/ +│ ├── ConversationView.vue # Chat UI +│ ├── ToolCallPanel.vue # Tool execution display +│ ├── IntegrationManager.vue # OAuth flows, credential management +│ └── StatusBar.vue # Model, tokens, cost +├── routes/ +│ └── web.php +├── composer.json # Requires kosmokrator/kosmokrator + nativephp/desktop +└── package.json # Frontend deps (Vue, Tailwind, etc.) +``` + +The desktop surface is a thin Laravel app that: +1. Boots the shared KosmoKrator Kernel +2. Creates a `WebRenderer` implementing `RendererInterface` +3. Pushes render events (thinking, streaming, tool calls) over WebSocket to the Vue frontend +4. Receives user input from the frontend and feeds it to `AgentLoop` +5. NativePHP handles the Electron shell, system tray, notifications, etc. + +### WebRenderer + +```php +class WebRenderer implements RendererInterface +{ + public function showThinking(): void + { + broadcast(new AgentEvent('thinking')); + } + + public function streamChunk(string $text): void + { + broadcast(new AgentEvent('chunk', ['text' => $text])); + } + + public function showToolCall(string $name, array $args): void + { + broadcast(new AgentEvent('tool_call', ['name' => $name, 'args' => $args])); + } + + public function showToolResult(string $name, string $output, bool $success): void + { + broadcast(new AgentEvent('tool_result', [ + 'name' => $name, + 'output' => $output, + 'success' => $success, + ])); + } + + // ... etc +} +``` + +Same `RendererInterface`, just broadcasting instead of printing ANSI codes. + +--- + +## OpenCompany Connection + +OpenCompany is an optional cloud backend — not required, not a separate product in this context. + +```yaml +# ~/.kosmokrator/config.yaml +opencompany: + enabled: true + url: https://my-instance.opencompany.app + api_key: sk-... +``` + +### When Connected + +- Pulls available hosted integrations (ClickUp, Google, etc.) +- Syncs integration credentials (no local OAuth needed for already-configured integrations) +- Proxies tool calls for hosted-mode integrations +- Syncs sessions/conversation history (optional) +- Access to OpenCompany's vector memory and knowledge base + +### When Disconnected + +- Full local operation — same agent, same built-in tools, same Lua bridge +- Local integrations work (credentials in `~/.kosmokrator/integrations.yaml`) +- Local LLM via Ollama works +- MCP servers work +- Zero degradation for core coding agent functionality + +The desktop app is KosmoKrator first, OpenCompany-connected second. + +--- + +## Desktop-Specific Features + +### System Tray + +Agent lives in the system tray. Click to open conversation window. Badge shows when agent needs attention (tool approval, error, completion). + +### Native Notifications + +``` +┌─────────────────────────────────┐ +│ KosmoKrator │ +│ ✓ Refactor complete — 4 files │ +│ changed, all tests passing │ +└─────────────────────────────────┘ +``` + +Notifications for: agent completion, tool approval requests, errors, integration connection status. + +### Global Shortcuts + +Summon KosmoKrator from any application: + +``` +Cmd+Shift+K → opens KosmoKrator window with prompt focused +``` + +Quick-action mode: type a command, hit enter, window minimizes back to tray. + +### OAuth Integration Flows + +The desktop app owns a real redirect URI (`kosmokrator://oauth/callback`). Adding integrations: + +1. Click "Add Gmail" +2. Browser opens Google OAuth consent screen +3. Google redirects to `kosmokrator://oauth/callback?code=...` +4. NativePHP's deep linking catches it +5. Tokens stored in credential resolver +6. Done — no copy-paste, no localhost callback server + +### File Context + +Native file picker for attaching context to conversations: + +``` +[Attach File] → OS file dialog → selected file added to conversation +``` + +Also: drag-and-drop files onto the conversation window. + +### Auto-Updater + +Ship updates via GitHub Releases. The app checks and updates silently in the background. Users always have the latest version without manual intervention. + +--- + +## Package Structure + +The desktop app is a separate Composer package that depends on the core: + +```json +{ + "name": "kosmokrator/desktop", + "require": { + "kosmokrator/kosmokrator": "^1.0", + "nativephp/desktop": "^2.0", + "laravel/framework": "^13.0" + } +} +``` + +The core `kosmokrator/kosmokrator` package remains CLI-first and framework-agnostic. The desktop package adds the Laravel HTTP layer and NativePHP integration on top. + +This means: +- `composer global require kosmokrator/kosmokrator` → CLI agent +- Download KosmoKrator.app → desktop agent (bundles everything) +- Same engine, same config, same sessions, same integrations + +--- + +## Rendering Surfaces Summary + +| Surface | Renderer | Input | Output | Runtime | +|---------|----------|-------|--------|---------| +| Terminal (ANSI) | `AnsiRenderer` | readline | ANSI escape codes | `php bin/kosmokrator` | +| Terminal (TUI) | `TuiRenderer` | Symfony TUI InputWidget | TUI widgets + Revolt | `php bin/kosmokrator` | +| Desktop | `WebRenderer` | Vue frontend via WebSocket | Electron BrowserWindow | NativePHP (bundled PHP) | +| *(future)* Mobile | `MobileRenderer` | Native UI via EDGE | Swift/Kotlin shell | NativePHP Mobile | + +All implement `RendererInterface`. The engine doesn't know which surface it's running on. diff --git a/docs/ecosystem/kosmokrator/proposals/ecosystem-architecture.md b/docs/ecosystem/kosmokrator/proposals/ecosystem-architecture.md new file mode 100644 index 0000000..87eab22 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/ecosystem-architecture.md @@ -0,0 +1,441 @@ +# KosmoKrator Ecosystem Architecture + +> Status: Proposal. This document outlines a future ecosystem architecture around Lua, MCP, and shared integrations. These capabilities are not fully implemented in the current CLI. + +## Overview + +KosmoKrator is not just a CLI coding agent — it's a runtime that can host any tool ecosystem via Lua code execution and MCP. It shares a tool ecosystem with OpenCompany, a self-hosted AI collaboration platform. + +``` + opencompanyapp/integration-core + (framework-agnostic contracts) + │ + opencompanyapp/integration-* + (ClickUp, Google, Plausible, ...) + │ + ┌───────────────┼───────────────┐ + │ │ + OpenCompany KosmoKrator + (web platform) (the engine) + │ │ + LuaBridge ┌─────────┼─────────┐ + │ │ │ │ + LuaSandbox CLI Desktop (Mobile) + (PECL ext) terminal NativePHP future + │ ANSI/TUI Electron + MCP Client │ │ + LuaBridge LuaBridge + │ │ + LuaSandbox LuaSandbox + │ │ + MCP Client MCP Client +``` + +KosmoKrator is one engine with multiple surfaces. Tools are written once as Composer packages. OpenCompany is an optional cloud backend for hosted integrations. See `docs/proposals/desktop-app.md` for the desktop surface architecture. + +--- + +## Lua Code Mode + +### The Problem with JSON tool_use + +Traditional tool calling requires one LLM round-trip per tool invocation. A task like "find all PHP files with TODOs and list them" needs: glob → read file 1 → read file 2 → ... → read file N. That's N+1 round-trips, each costing tokens and latency. + +### The Solution: LLM Writes Lua + +Instead of N sequential JSON tool_use blocks, the LLM writes a single Lua program: + +```lua +local files = app.glob({pattern = "src/**/*.php"}) +local results = {} +for _, f in ipairs(files) do + local content = app.read_file({path = f}) + if content:find("TODO") then + table.insert(results, f) + end +end +return results +``` + +One round-trip. One tool call (`execute_lua`). The LLM gets composability, loops, conditionals, variables — all the things that make code more expressive than structured JSON. + +### Evidence This Works + +| Source | Finding | +|--------|---------| +| Anthropic engineering blog | 98.7% token reduction vs JSON tool_use | +| Cloudflare Code Mode | 99.9% token reduction for large API surfaces | +| CodeAct (ICML 2024) | 20% higher success rate, 30% fewer turns | +| Anthropic "Code execution with MCP" | Explicitly advocates agents writing code to call MCP tools | + +### Why Lua Specifically + +- **Designed for embedding**: Smallest footprint of any mainstream scripting language. Built from day one to be embedded in host applications. +- **Easy to sandbox**: Remove `io`, `os`, `debug`, `package`, `loadfile` and the language physically cannot touch the filesystem or network. Only whitelisted functions are available. +- **Simple syntax**: No indentation sensitivity (Python), no prototype chains (JS). LLMs generate valid Lua reliably. +- **Stable**: Lua 5.1 hasn't changed since 2006. The attack surface is well-studied. +- **Familiar**: Config language for Neovim, scripting language for games. LLMs have seen plenty of it in training. + +### Runtime: LuaSandbox PECL Extension + +The `luasandbox` PECL extension, developed by Wikimedia for MediaWiki's Scribunto module, runs user-supplied Lua on Wikipedia at massive scale. It is purpose-built for untrusted code. + +**Security model (whitelist, not blacklist):** +- `setMemoryLimit(int $bytes)` — hard kill on exceed +- `setCPULimit(float $seconds)` — hard kill on exceed (includes PHP callback time) +- `registerLibrary(string $name, array $functions)` — expose specific PHP functions to Lua +- Blocks by default: `dofile()`, `loadfile()`, `io.*`, `os.*`, `debug.*`, `package.*`, `require()`, `load()`, `loadstring()`, `print()`, `string.dump()`, `collectgarbage()`, `coroutine` + +Only what you explicitly register is available. Everything else is inaccessible. + +### Self-Discoverable API + +The LLM doesn't need every tool schema in its system prompt. Instead: + +```lua +-- LLM can discover what's available at runtime +local all = docs() -- list all namespaces +local gmail = docs("app.gmail.work") -- list tools for this account +local detail = docs("app.gmail.work.send_message") -- full schema + examples +``` + +API docs are auto-generated from tool schemas by `LuaApiDocGenerator`. This keeps the system prompt small while giving the LLM access to arbitrarily large tool surfaces. + +### Fallback to Standard tool_use + +Lua code mode is not all-or-nothing. Simple single-tool calls can still use standard JSON tool_use. The LLM chooses: quick read → `tool_use`, complex multi-step → `execute_lua`. Both paths coexist. + +--- + +## MCP Integration + +### KosmoKrator as MCP Client + +KosmoKrator connects to external MCP servers, discovers their tools, and makes them available to the LLM — either as standard tool_use or as Lua functions in the sandbox. + +``` +MCP Server (external) + │ + ├── listTools() → discover available tools + │ + └── callTool(name, args) → execute and return result + │ +KosmoKrator MCP Client + │ + ├── Register as Lua functions: app.mcp.{server}.{tool}() + │ + └── Or expose as standard tool_use definitions +``` + +**Transport options:** +- **stdio**: MCP server runs as a child process (ideal for local tools) +- **HTTP + SSE**: Remote MCP servers (ideal for hosted OpenCompany tools) + +**PHP MCP client options:** +- `modelcontextprotocol/php-sdk` — official, maintained by PHP Foundation + Symfony +- `php-mcp/client` — fluent builder, sync facade +- `swisnl/mcp-client` — SSE, stdio, streamable HTTP + +### KosmoKrator as MCP Server + +KosmoKrator can also expose its own tools (file read/write, bash, glob, grep, git) as an MCP server. This allows other AI applications (Claude Desktop, IDE extensions, other agents) to use KosmoKrator's capabilities. + +### Lua + MCP Bridge + +The key innovation: MCP tools are registered as Lua functions in the sandbox. The LLM writes Lua that calls MCP tools, composes results, and handles logic — all in a single execution: + +``` +LLM writes Lua code + → KosmoKrator's Lua sandbox executes it + → Lua calls app.mcp.github.list_issues({repo = "..."}) + → KosmoKrator routes to GitHub MCP server + → Result returns to Lua as a table + → Lua filters, transforms, calls more tools + → Final result returns to the LLM +``` + +The LLM doesn't know or care whether a tool is local, an MCP server, or a hosted OpenCompany integration. The Lua namespace is the universal interface. + +--- + +## OpenCompany Tool Ecosystem + +### Existing Tool Packages + +OpenCompany has 15+ AI tool packages as standalone Composer packages under the `opencompanyapp` vendor: + +| Package | Tools | Description | +|---------|-------|-------------| +| `ai-tool-clickup` | 17 | Tasks, lists, folders, docs, time tracking, chat | +| `ai-tool-google` | 10+ | Calendar, Gmail, Drive, Contacts, Sheets, Search Console, Tasks, Analytics, Docs, Forms | +| `ai-tool-plausible` | 5+ | Web analytics queries, realtime visitors, sites, goals | +| `ai-tool-ticktick` | 5+ | Task management, projects, priorities | +| `ai-tool-mermaid` | 1 | Diagram rendering (flowcharts, sequences, ER, Gantt, etc.) | +| `ai-tool-plantuml` | 1 | UML diagram rendering | +| `ai-tool-typst` | 1 | Document typesetting | +| `ai-tool-vegalite` | 1 | Data visualization / charts | +| `ai-tool-coingecko` | 3+ | Cryptocurrency market data | +| `ai-tool-exchangerate` | 2+ | Currency conversion (340+ currencies) | +| `ai-tool-worldbank` | 3+ | Economic indicators for 200+ countries | +| `ai-tool-trustmrr` | 2+ | Startup revenue/MRR data | +| `ai-tool-celestial` | 6+ | Moon phases, sunrise/sunset, planet positions, eclipses | + +### Current Architecture Problem + +Today, every tool implements `Laravel\Ai\Contracts\Tool` — a hard dependency on laravel/ai: + +``` +ai-tool-clickup → integration-core → laravel/ai +``` + +This means KosmoKrator (which uses Prism directly, not laravel/ai) cannot use these packages without pulling in the full Laravel AI SDK. + +**However**, the actual business logic in each package (e.g., `ClickUpService`, `PlausibleService`) is framework-agnostic. The laravel/ai coupling is only in the thin tool wrapper layer (schema definition + handle method). + +### Refactored Architecture (Option C) + +Split `integration-core` into two packages: + +``` +opencompanyapp/integration-core (framework-agnostic) +├── Contracts/ +│ ├── Tool ← OWN interface, not laravel/ai's +│ │ ├── name(): string +│ │ ├── description(): string +│ │ ├── parameters(): array ← JSON Schema array +│ │ └── execute(array $args): ToolResult +│ ├── ToolProvider +│ ├── CredentialResolver +│ ├── ConfigurableIntegration +│ ├── AgentFileStorage +│ └── ProvidesLuaDocs +├── Support/ +│ ├── ToolResult ← Value object +│ ├── ConfigCredentialResolver +│ └── ToolProviderRegistry +└── composer.json ← NO laravel/ai dependency +``` + +No bridge package needed. Vendor package tools are Lua-only — they're never passed to the laravel/ai agent loop. Built-in tools (tasks, system, agents, memory, lua) still implement `Laravel\Ai\Contracts\Tool` directly. `LuaBridge` and `getToolCatalog()` use a dual-dispatch `instanceof` check to handle both tool types. + +**Result:** +- All tool packages depend only on `integration-core` (no laravel/ai) +- OpenCompany's built-in tools keep their `Laravel\Ai\Contracts\Tool` implementation +- KosmoKrator uses the tools natively through its own `ToolInterface` +- Tool packages become truly framework-agnostic + +--- + +## Dual-Mode Integrations: Local vs Hosted + +Users can run tool integrations in two modes: + +### Local Mode + +The tool package runs inside KosmoKrator's process. Credentials are stored locally. API calls go directly from the user's machine to the external service. + +``` +KosmoKrator → ClickUpService → ClickUp API +``` + +### Hosted Mode (OpenCompany) + +The tool runs on the user's OpenCompany instance. KosmoKrator sends requests to OpenCompany's API, which proxies to the external service. Credentials are managed in OpenCompany's encrypted storage. + +``` +KosmoKrator → OpenCompany API → ClickUpService → ClickUp API +``` + +Hosted mode is effectively MCP over HTTP — OpenCompany acts as an MCP server for its configured integrations. This means: + +- Users who already have OpenCompany with configured integrations can use them from KosmoKrator immediately +- No need to re-enter credentials or set up OAuth flows locally +- OpenCompany handles token refresh, rate limiting, and credential rotation +- KosmoKrator just needs an API key for the OpenCompany instance + +### From the Lua bridge perspective, both modes are identical + +```lua +-- User doesn't know or care whether this is local or hosted +app.gmail.work.send_message({ + to = "team@example.com", + subject = "Deploy complete", + body = "All tests passed." +}) +``` + +The credential resolver and transport layer handle the routing transparently. + +--- + +## Multi-Account Support + +Users can configure multiple accounts for the same provider. Each account gets a user-defined alias that becomes its namespace. + +### Configuration + +```yaml +# ~/.kosmokrator/integrations.yaml + +gmail: + work: + mode: local + credentials: + client_id: "..." + client_secret: "..." + refresh_token: "..." + personal: + mode: hosted + opencompany_key: "sk-..." + account_id: "acc_abc123" + +clickup: + default: + mode: local + credentials: + api_token: "..." + +clickup: + freelance: + mode: local + credentials: + api_token: "..." # different workspace +``` + +### Lua Namespace + +The namespace pattern is `app.{provider}.{alias}.{tool}`: + +```lua +-- Two Gmail accounts +app.gmail.work.send_message({to = "cto@company.com", ...}) +app.gmail.personal.list_messages({query = "is:unread"}) + +-- Two ClickUp workspaces +app.clickup.default.create_task({list_id = "...", name = "..."}) +app.clickup.freelance.get_tasks({list_id = "..."}) +``` + +### Architecture + +The `ToolProvider` yields multiple named instances instead of a flat tool list. Each instance carries: + +- **Alias**: user-defined label (`work`, `personal`, `freelance`) +- **Mode**: `local` or `hosted` +- **Credential scope**: isolated credentials per instance +- **Endpoint**: direct API URL (local) or OpenCompany API URL (hosted) + +```php +// CredentialResolver is scoped to the instance +$resolver->get('gmail:work', 'client_id'); // local credentials +$resolver->get('gmail:personal', 'api_token'); // proxied to OpenCompany +``` + +The Lua bridge registers functions per instance: + +```php +$sandbox->registerLibrary('app.gmail.work', [ + 'send_message' => fn($args) => $this->execute('gmail', 'work', 'send_message', $args), + 'list_messages' => fn($args) => $this->execute('gmail', 'work', 'list_messages', $args), +]); + +$sandbox->registerLibrary('app.gmail.personal', [ + 'send_message' => fn($args) => $this->execute('gmail', 'personal', 'send_message', $args), + 'list_messages' => fn($args) => $this->execute('gmail', 'personal', 'list_messages', $args), +]); +``` + +### Setup Flow + +When a user wants to add an integration: + +``` +$ kosmokrator integrations add gmail + +? Alias for this account: work +? Mode: (local / hosted) + > local + +? Client ID: xxxxxxxx +? Client Secret: xxxxxxxx +? Starting OAuth flow... (opens browser) +✓ Gmail "work" configured. + +Lua namespace: app.gmail.work.* +Available tools: send_message, list_messages, search_messages, ... +``` + +Or for hosted mode: + +``` +$ kosmokrator integrations add gmail + +? Alias for this account: personal +? Mode: (local / hosted) + > hosted + +? OpenCompany API key: sk-xxxxxxxx +? Select account from OpenCompany: + 1. personal@gmail.com (Gmail) + 2. work@company.com (Gmail) + > 1 +✓ Gmail "personal" configured (hosted via OpenCompany). + +Lua namespace: app.gmail.personal.* +``` + +--- + +## Putting It All Together + +### The Full Stack + +``` +┌─────────────────────────────────────────────────────────┐ +│ LLM Layer │ +│ Prism-PHP → Anthropic, OpenAI, Ollama, ... │ +│ Provider failover, streaming, tool_use + Lua code mode │ +└────────────────────────┬────────────────────────────────┘ + │ +┌────────────────────────┴────────────────────────────────┐ +│ Agent Loop │ +│ Conversation history, middleware pipeline, │ +│ event dispatch (thinking, streaming, tool calls) │ +└────────────────────────┬────────────────────────────────┘ + │ + ┌──────────────┼──────────────┐ + │ │ │ + ┌───────┴──────┐ ┌────┴─────┐ ┌──────┴──────┐ + │ Standard │ │ Lua │ │ MCP │ + │ tool_use │ │ Code │ │ Client │ + │ (JSON) │ │ Mode │ │ │ + └───────┬─────┘ └────┬─────┘ └──────┬──────┘ + │ │ │ + └──────────┬──┘ │ + │ │ + ┌──────────────────┴─────────────────┴──────────────────┐ + │ Tool Layer │ + │ │ + │ Built-in (read, write, bash, glob, grep, git) │ + │ │ │ + │ Integrations (opencompanyapp/ai-tool-*) │ + │ ├── local mode → direct API calls │ + │ └── hosted mode → OpenCompany API proxy │ + │ │ │ + │ MCP servers (external, discovered at runtime) │ + │ │ │ + │ All accessible via: app.{provider}.{alias}.{tool}() │ + └───────────────────────────────────────────────────────┘ +``` + +### What Makes This Powerful + +1. **Universal namespace**: Every tool — built-in, Composer package, MCP server, local or hosted — lives under `app.*` in Lua. The LLM has one consistent interface. + +2. **Write once, run anywhere**: Tool packages are framework-agnostic Composer packages. They work in OpenCompany (web), KosmoKrator (CLI), or any future PHP application. + +3. **Progressive complexity**: Simple tasks use standard tool_use. Complex orchestration uses Lua code mode. Users don't need to know the difference. + +4. **Ecosystem network effect**: Every tool added to OpenCompany is immediately available in KosmoKrator, and vice versa. MCP servers from the broader community plug in through the same Lua namespace. + +5. **Cost optimization**: Lua scripts execute at zero LLM cost. Repetitive or deterministic workflows (daily reports, scheduled syncs) run as pure Lua after initial AI authoring. diff --git a/docs/ecosystem/kosmokrator/proposals/integration-refactor-plan.md b/docs/ecosystem/kosmokrator/proposals/integration-refactor-plan.md new file mode 100644 index 0000000..d41f705 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/integration-refactor-plan.md @@ -0,0 +1,339 @@ +# Integration Package Refactor Plan + +> Status: Proposal. This is a forward-looking refactor plan, not a description of shipped KosmoKrator behavior. + +## Context + +The AI tool packages were originally built around `Laravel\Ai\Contracts\Tool` — each tool exposed `description()`, `schema(JsonSchema)`, and `handle(Request)` for direct LLM function calling. We then switched to Lua code mode where the LLM writes Lua scripts that call tools via `LuaBridge`, making the LLM-oriented interface unnecessary overhead. + +Additionally, KosmoKrator (CLI agent) needs to share the same tool ecosystem but cannot depend on `laravel/ai`. The packages must become framework-agnostic. + +### Current Pain Points + +1. `integration-core` **depends on** `laravel/ai` for the `Tool` interface. Every tool package transitively depends on `laravel/ai`. KosmoKrator cannot use them. +2. **225+ tool classes** implement `Laravel\Ai\Contracts\Tool` with `schema(JsonSchema)` and `handle(Request)` even though they are never direct LLM tools — they are called via Lua. +3. `ToolRegistry` **is a 1965-line monolith** mixing tool metadata (`TOOL_MAP`), instantiation (180-line `match`), permissions, catalog generation, and app group config. +4. **Two registration paths**: external packages self-register via `ToolProviderRegistry`, built-in tools are hardcoded in `TOOL_MAP` + the giant `match`. +5. `ProvidesLuaDocs` is optional and zero packages implement it despite Lua being the primary mode. +6. **No multi-account support** in `ToolProvider` or `CredentialResolver` — needed for KosmoKrator's `app.gmail.work.*` / `app.gmail.personal.*` pattern. +7. **Package naming** (`ai-tool-*`) reflects Era 1 thinking. + +--- + +## Phase 1: New Tool Contract in `integration-core` + +**Goal**: `integration-core` owns its own `Tool` interface. Drop the `laravel/ai` dependency. + +### New Contracts + +```php +// integration-core/src/Contracts/Tool.php +interface Tool +{ + public function name(): string; + public function description(): string; + public function parameters(): array; + public function execute(array $args): ToolResult; +} +``` + +`parameters()` returns a plain array — what `LuaApiDocGenerator` actually needs: + +```php +public function parameters(): array +{ + return [ + 'to' => ['type' => 'string', 'required' => true, 'description' => 'Recipient email'], + 'subject' => ['type' => 'string', 'required' => true, 'description' => 'Email subject'], + 'body' => ['type' => 'string', 'required' => false, 'description' => 'Email body'], + ]; +} +``` + +No `JsonSchema` factory, no `Request` wrapper. + +### ToolResult Value Object + +```php +// integration-core/src/Support/ToolResult.php +class ToolResult +{ + public function __construct( + public readonly mixed $data, + public readonly ?string $error = null, + public readonly array $meta = [], // attachments, files created, etc. + ) {} +} +``` + +Replaces returning raw strings. Both platforms can inspect structured results. + +### ToolProvider Changes + +Add `luaDocsPath()` directly (replacing the optional `ProvidesLuaDocs` interface) and `credentialFields()` for setup flows: + +```php +interface ToolProvider +{ + public function appName(): string; + public function appMeta(): array; + public function tools(): array; + public function isIntegration(): bool; + public function createTool(string $class, array $context = []): Tool; + public function luaDocsPath(): ?string; // NEW — null = auto-generated only + // credentialFields() deferred to Phase 5 (multi-account credential system) +} +``` + +### CredentialResolver — Account-Scoped + +```php +interface CredentialResolver +{ + public function get(string $provider, string $account, string $key): ?string; +} +``` + +In OpenCompany: `IntegrationSettingCredentialResolver` resolves from DB. In KosmoKrator: `YamlCredentialResolver` reads from `~/.kosmokrator/integrations.yaml`. + +--- + +## Phase 2: Bridge Package `integration-laravel-ai` + +> **OUTCOME: Skipped.** The plan assumed we'd need a `LaravelAiToolAdapter` to wrap new-style tools back into `Laravel\Ai\Contracts\Tool` for the agent loop. In practice, vendor package tools are never passed to the agent loop — they're Lua-only. Built-in tools (tasks, system, agents, memory, lua) still implement `Laravel\Ai\Contracts\Tool` directly. The dual-dispatch `instanceof` check in `LuaBridge` and `getToolCatalog()` was sufficient. No bridge package needed. + +--- + +## Phase 3: Migrate Tool Packages to New Contract + +**Goal**: Each `ai-tool-*` package implements the framework-agnostic `Tool` contract. + +### Before (coupled to laravel/ai) + +```php +use Laravel\Ai\Contracts\Tool; +use Laravel\Ai\Tools\Request; +use Illuminate\Contracts\JsonSchema\JsonSchema; + +class RenderMermaid implements Tool { + public function description(): string { ... } + public function handle(Request $request): string { ... } + public function schema(JsonSchema $schema): array { ... } +} +``` + +### After (framework-agnostic) + +```php +use OpenCompany\IntegrationCore\Contracts\Tool; +use OpenCompany\IntegrationCore\Support\ToolResult; + +class RenderMermaid implements Tool { + public function name(): string { return 'render_mermaid'; } + public function description(): string { ... } + public function parameters(): array { + return [ + 'syntax' => ['type' => 'string', 'required' => true, 'description' => 'Mermaid diagram syntax...'], + 'title' => ['type' => 'string', 'required' => false, 'description' => 'Diagram title (default: "Diagram")'], + 'width' => ['type' => 'integer', 'required' => false, 'description' => 'Output width in pixels (default: 1400)'], + 'theme' => ['type' => 'string', 'required' => false, 'description' => 'Theme', 'enum' => ['default', 'dark', 'forest', 'neutral']], + ]; + } + public function execute(array $args): ToolResult { ... } +} +``` + +Tool name moves into the tool itself (was only in `ToolProvider::tools()` key). `handle(Request)` becomes `execute(array)`. JsonSchema ceremony disappears. + +Migrate one package at a time. Order by simplicity: + +1. `ai-tool-mermaid` (1 tool — proof of concept) +2. `ai-tool-plantuml`, `ai-tool-typst`, `ai-tool-vegalite` (1 tool each) +3. `ai-tool-exchangerate`, `ai-tool-trustmrr`, `ai-tool-celestial`, `ai-tool-worldbank`, `ai-tool-coingecko` (data packages) +4. `ai-tool-plausible`, `ai-tool-ticktick` (integrations with credentials) +5. `ai-tool-clickup` (17 tools) +6. `ai-tool-google` (10+ sub-providers, largest package) + +--- + +## Phase 4: Built-In ToolProviders, Shrink ToolRegistry + +**Goal**: Built-in tools use the same `ToolProvider` pattern as external packages. Eliminate `TOOL_MAP` and the 180-line `match` statement. + +### New Provider Classes + +``` +app/Agents/Tools/Providers/ +├── ChatToolProvider.php (14 tools) +├── DocsToolProvider.php (14 tools) +├── FilesToolProvider.php (10 tools) +├── TablesToolProvider.php (20 tools) +├── CalendarToolProvider.php (7 tools) +├── ListsToolProvider.php (21 tools) +├── WorkspaceToolProvider.php (27 tools) +├── AutomationsToolProvider.php (6 tools) +├── SvgToolProvider.php (1 tool) +``` + +Each provider: + +- Declares tools via `tools()` (eliminates `TOOL_MAP`) +- Handles instantiation in `createTool()` (eliminates the `match` statement) +- Provides `appMeta()` (eliminates `APP_GROUPS` entries for that section) + +The 5 direct tool groups (`tasks`, `system`, `agents`, `memory`, `lua`) can also become providers or stay in ToolRegistry since they are core agent machinery. + +Register in `AppServiceProvider`: + +```php +$registry = $this->app->make(ToolProviderRegistry::class); +$registry->register(new ChatToolProvider($this->app)); +$registry->register(new DocsToolProvider($this->app)); +// ... +``` + +### ToolRegistry After Refactor (~300 lines) + +```php +class ToolRegistry +{ + public const DIRECT_TOOL_GROUPS = ['tasks', 'system', 'agents', 'memory', 'lua']; + + public function getToolsForAgent(User $agent): array { /* iterate registry, filter, wrap */ } + public function getAppCatalog(User $agent): string { /* build system prompt */ } + public function getAllToolsMeta(User $agent): array { /* for frontend */ } + public function instantiateToolBySlug(string $slug, User $agent): ?Tool { /* delegate to provider */ } +} +``` + +No more `TOOL_MAP`. No more `APP_GROUPS`. No more `APP_ICONS`. No more `INTEGRATION_LOGOS`. No more 180-line `match`. All derived from `ToolProviderRegistry`. + +--- + +## Phase 5: Multi-Account CredentialResolver + +**Goal**: Support `app.gmail.work.*` / `app.gmail.personal.*` pattern for KosmoKrator. + +### Context Array Extension + +The `createTool()` `$context` array already exists. Add account scoping: + +```php +$provider->createTool(GmailSendMessage::class, [ + 'agent' => $agent, + 'account' => 'work', // NEW + 'timezone' => 'UTC', +]); +``` + +### CredentialResolver + +```php +// OpenCompany: resolves from IntegrationSetting table +class IntegrationSettingCredentialResolver implements CredentialResolver +{ + public function get(string $provider, string $account, string $key): ?string + { + return IntegrationSetting::where('integration_id', $provider) + ->where('account', $account) + ->value("config->{$key}"); + } +} + +// KosmoKrator: resolves from YAML config +class YamlCredentialResolver implements CredentialResolver +{ + public function get(string $provider, string $account, string $key): ?string + { + return $this->config[$provider][$account]['credentials'][$key] ?? null; + } +} +``` + +### Lua Namespace + +The `LuaBridge` registers functions per account: + +```lua +app.gmail.work.send_message({to = "cto@company.com", ...}) +app.gmail.personal.list_messages({query = "is:unread"}) +``` + +OpenCompany initially uses a single implicit `default` account (backward compatible). Multi-account is opt-in. + +--- + +## Phase 6: Lua Docs in Every Package + +**Goal**: Every tool package ships a `lua-docs/` directory with real examples and common patterns. + +Add to every package: + +``` +ai-tool-mermaid/ +├── lua-docs/ +│ └── mermaid.md # examples, tips, common patterns +├── src/ +│ ├── MermaidToolProvider.php → luaDocsPath() returns __DIR__.'/../lua-docs/mermaid.md' +│ └── Tools/RenderMermaid.php +``` + +Example content: + +```markdown +## Common Patterns + +### Flowchart from data +\```lua +local items = app.tables.get_table_rows({table_id = "..."}) +local lines = {"graph TD"} +for _, item in ipairs(items.rows) do + table.insert(lines, string.format(" %s --> %s", item.from, item.to)) +end +app.mermaid.render_mermaid({syntax = table.concat(lines, "\n")}) +\``` +``` + +`LuaApiDocGenerator` already has `getProviderLuaDocs()` wired up — it just needs packages to start providing content. + +--- + +## Phase 7: Rename `ai-tool-*` to `integration-*` + +**Goal**: Package naming reflects what they are — integrations, not AI tools. + +``` +opencompanyapp/ai-tool-mermaid → opencompanyapp/integration-mermaid +opencompanyapp/ai-tool-google → opencompanyapp/integration-google +opencompanyapp/ai-tool-clickup → opencompanyapp/integration-clickup +... +``` + +Use Composer `replace` in the new package to smooth the transition: + +```json +{ + "name": "opencompanyapp/integration-mermaid", + "replace": { + "opencompanyapp/ai-tool-mermaid": "self.version" + } +} +``` + +Do this **after** the contract changes (phases 1-4) so each package is only touched once. + +--- + +## Sequencing Summary + +| Phase | What | Why This Order | +| --- | --- | --- | +| **1** | New `Tool` contract in `integration-core` | Unblocks everything — KosmoKrator can't exist without this | +| **~~2~~** | ~~`integration-laravel-ai` bridge package~~ | ~~Skipped — Lua-only tools don't need the adapter~~ | +| **3** | Migrate tool packages to new contract | Each package becomes framework-agnostic | +| **4** | Built-in `ToolProvider` implementations | Eliminates ToolRegistry monolith | +| **5** | Multi-account `CredentialResolver` | Required for KosmoKrator multi-account | +| **6** | Lua docs in every package | Agent quality improvement | +| **7** | Rename `ai-tool-*` → `integration-*` | Cosmetic, do last when stable | + +Phases 1-3 are the critical path for KosmoKrator. Phase 4 is the biggest maintenance win for OpenCompany. Phases 5-7 can happen in parallel with KosmoKrator development. diff --git a/docs/ecosystem/kosmokrator/proposals/laravel-ai-patterns.md b/docs/ecosystem/kosmokrator/proposals/laravel-ai-patterns.md new file mode 100644 index 0000000..4d94bd4 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/laravel-ai-patterns.md @@ -0,0 +1,209 @@ +# Design Patterns Worth Adopting from Laravel AI SDK + +> Status: Reference / proposal. This document records ideas worth borrowing; it is not a statement that KosmoKrator currently implements these patterns. + +Laravel AI SDK (`laravel/ai`) is a layer built on top of Prism-PHP. KosmoKrator uses Prism directly (lighter, no web-app assumptions), but several of laravel/ai's architectural patterns are worth adopting. + +--- + +## 1. Tool Interface Pattern (Schema via JSON Schema) + +### What laravel/ai does + +Tools implement a `schema()` method that uses `illuminate/json-schema` — a fluent builder that produces valid JSON Schema objects: + +```php +// laravel/ai approach +public function schema(JsonSchema $schema): array +{ + return [ + 'path' => $schema->string() + ->description('Absolute path to the file') + ->required(), + 'offset' => $schema->integer() + ->description('Line number to start reading from'), + 'limit' => $schema->integer() + ->description('Max lines to read') + ->default(200), + ]; +} +``` + +This produces the exact `input_schema` JSON Schema that LLM providers expect, without hand-writing JSON arrays. + +### Why this matters + +- **Type safety**: The builder prevents invalid schemas at compile time (e.g., you can't set `minimum` on a string parameter). +- **Self-documenting**: Tool definitions read like API docs. +- **Provider-agnostic**: JSON Schema is the universal format — Anthropic, OpenAI, and MCP all use it. +- **Lua bridge compatibility**: When auto-generating Lua API docs from tool schemas (for code mode), a structured schema object is far easier to traverse than a raw array. + +### How to adopt in KosmoKrator + +`illuminate/json-schema` is already available — it ships with `laravel/framework` v13 (transitive via Prism). Use it in `ToolInterface`: + +```php +namespace Kosmokrator\Tool; + +use Illuminate\JsonSchema\JsonSchema; + +interface ToolInterface +{ + public function name(): string; + public function description(): string; + public function schema(JsonSchema $schema): array; + public function execute(array $args): ToolResult; +} +``` + +The `ToolRegistry` converts these schemas to Prism's `Tool` format when building LLM requests, and to Lua function signatures when generating API docs for code mode. + +--- + +## 2. Middleware Pipeline for Agents + +### What laravel/ai does + +Agents can declare middleware that wraps every tool call or LLM interaction: + +```php +class MyAgent extends Agent +{ + public function middleware(): array + { + return [ + new RateLimitMiddleware(maxPerMinute: 60), + new LoggingMiddleware(), + new ApprovalMiddleware(tools: ['bash', 'file_write']), + ]; + } +} +``` + +Each middleware gets the request/context, can modify it, pass it through (`$next($request)`), or short-circuit (e.g., deny execution, ask for approval). + +### Why this matters + +KosmoKrator needs several cross-cutting concerns that are best modeled as middleware: + +| Concern | Without middleware | With middleware | +|---------|-------------------|----------------| +| **Tool approval** | if/else in AgentLoop | `ApprovalMiddleware` wraps dangerous tools | +| **Cost tracking** | Manual token counting | `CostTrackingMiddleware` intercepts every LLM call | +| **Rate limiting** | Ad-hoc sleep/retry | `RateLimitMiddleware` with token bucket | +| **Audit logging** | Scattered log calls | `AuditMiddleware` logs every tool execution | +| **Sandboxing policy** | Hardcoded in BashTool | `SandboxMiddleware` enforces blocked commands | + +### How to adopt in KosmoKrator + +Implement a simple pipeline — no need for Laravel's full `Pipeline` class: + +```php +namespace Kosmokrator\Agent; + +interface AgentMiddleware +{ + public function handle(AgentContext $context, callable $next): mixed; +} +``` + +The `AgentLoop` runs the middleware stack around each tool execution: + +```php +$pipeline = array_reduce( + array_reverse($this->middleware), + fn ($next, $middleware) => fn ($ctx) => $middleware->handle($ctx, $next), + fn ($ctx) => $this->executeTool($ctx) +); + +$result = $pipeline($context); +``` + +This keeps the `AgentLoop` clean — tool approval, logging, cost tracking are all separate, composable middleware classes. + +--- + +## 3. Provider Failover / Retry Strategy + +### What laravel/ai does + +Agents can declare fallback providers that activate automatically on failure: + +```php +class MyAgent extends Agent +{ + public function provider(): array|string + { + return [ + 'anthropic/claude-sonnet-4-20250514', // primary + 'openai/gpt-4.1', // fallback 1 + 'groq/llama-3.3-70b-versatile', // fallback 2 + ]; + } +} +``` + +On rate limit (429), server error (5xx), or timeout, laravel/ai automatically retries with the next provider in the list. It handles provider-specific error codes (Anthropic's 529 overloaded, OpenAI's 413 context too long). + +### Why this matters + +- **Reliability**: API rate limits and outages are common. Automatic failover keeps the agent running without user intervention. +- **Cost optimization**: Primary provider can be the best model; fallback can be cheaper/faster for when the primary is down. +- **Graceful degradation**: Better to get a response from a weaker model than to error out entirely. + +### How to adopt in KosmoKrator + +Wrap `PrismService` with retry logic: + +```php +namespace Kosmokrator\LLM; + +class PrismService +{ + private array $providers; // from config('kosmokrator.agent.providers') + + public function stream(array $messages, array $tools): \Generator + { + $lastException = null; + + foreach ($this->providers as $provider) { + try { + yield from $this->buildRequest($provider, $messages, $tools)->asStream(); + return; + } catch (PrismRateLimitedException|PrismServerException $e) { + $lastException = $e; + // Log failover, continue to next provider + } + } + + throw $lastException; + } +} +``` + +Config in `kosmokrator.yaml`: + +```yaml +agent: + providers: + - provider: anthropic + model: claude-sonnet-4-20250514 + - provider: openai + model: gpt-4.1 + - provider: ollama + model: llama3.3 +``` + +This gives you automatic failover with zero changes to `AgentLoop` — it just calls `PrismService::stream()` and gets responses regardless of which provider served them. + +--- + +## Summary + +| Pattern | Complexity to adopt | Value for KosmoKrator | +|---------|--------------------|-----------------------| +| JSON Schema tool definitions | Low (dependency already available) | High — cleaner tools, Lua doc generation | +| Agent middleware pipeline | Medium (20-30 lines of pipeline code) | High — keeps AgentLoop clean, enables approval/logging/cost tracking | +| Provider failover | Low (wrap PrismService) | Medium — reliability for daily use | + +All three patterns can be adopted incrementally without pulling in laravel/ai as a dependency. They're architectural ideas, not library lock-in. diff --git a/docs/ecosystem/kosmokrator/proposals/streaming.md b/docs/ecosystem/kosmokrator/proposals/streaming.md new file mode 100644 index 0000000..d1851b8 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/streaming.md @@ -0,0 +1,139 @@ +# Streaming LLM Responses + +> Status: Proposal. This document describes a streaming design that is not the current shipped runtime behavior. + +## Context + +Both renderers currently buffer full LLM responses before displaying. The TUI renderer has live `MarkdownWidget` rendering ready (`streamChunk()` + `processRender()`), but `AgentLoop` always calls `chat()` which blocks until the complete response arrives. Streaming would improve perceived responsiveness in `/ask` and `/plan` modes where the LLM produces longer text output. Lower priority for tool-heavy `/edit` mode. + +## Current Architecture + +``` +AgentLoop.run() + → $llm->chat(messages, tools) ← blocks until complete response + → $ui->streamChunk($fullText) ← dumps entire text at once + → $ui->streamComplete() +``` + +**PrismService** has `stream()` returning `Generator` — never called. +**AsyncLlmClient** has no streaming — `$response->getBody()->buffer()` reads entire body. +**TUI renderer** has live widget updating — built and waiting. +**ANSI renderer** buffers chunks then renders markdown at the end. + +## Design + +### Three layers need changes + +**1. LlmClientInterface — add `stream()` method** + +```php +/** + * @return Generator + */ +public function stream(array $messages, array $tools = [], ?Cancellation $cancellation = null): Generator; +``` + +PrismService already has this. AsyncLlmClient needs it. + +**2. AsyncLlmClient — SSE parsing** + +Replace `$body->buffer()` with line-by-line SSE reading from Amp's async body stream. + +SSE format (Anthropic): +``` +event: content_block_delta +data: {"delta":{"type":"text_delta","text":"Hello"}} + +event: message_delta +data: {"delta":{"stop_reason":"end_turn"},"usage":{"input_tokens":123,"output_tokens":45}} +``` + +Yield typed `StreamEvent` objects (reuse Prism's event classes): +- `TextDeltaEvent` — incremental text chunk +- `ToolCallDeltaEvent` — incremental tool call JSON fragment +- `StreamEndEvent` — finish reason, usage, final tool calls + +**3. AgentLoop — stream-aware run loop** + +Replace: +```php +$response = $this->llm->chat(...); +$this->ui->streamChunk($response->text); +``` + +With: +```php +$text = ''; +$toolCallBuffers = []; // id → accumulated JSON +$usage = null; +$finishReason = null; + +foreach ($this->llm->stream($messages, $tools, $cancellation) as $event) { + if ($event instanceof TextDeltaEvent) { + $text .= $event->delta; + $this->ui->streamChunk($event->delta); // live incremental display + } + if ($event instanceof ToolCallDeltaEvent) { + $toolCallBuffers[$event->toolId] = ($toolCallBuffers[$event->toolId] ?? '') . $event->delta; + } + if ($event instanceof StreamEndEvent) { + $finishReason = $event->finishReason; + $usage = $event->usage; + } +} + +$this->ui->streamComplete(); +// Parse accumulated tool call JSON buffers into ToolCall objects +// Continue with tool execution as before +``` + +Tool calls only complete at stream end — execution logic unchanged. + +### RetryableLlmClient + +Wrap `stream()` with retry on initial connection failure only. Mid-stream failures cannot be retried (partial response already displayed). On mid-stream error, yield an error event or throw — AgentLoop handles it. + +### Fallback + +If provider doesn't support streaming (`supportsStreaming() === false`), fall back to `chat()` with the current buffer-then-display behavior. No regression for non-streaming providers. + +## Tool Call Streaming Behavior + +Tool arguments arrive as JSON fragments: +``` +{"path": ← ToolCallDeltaEvent +"src/file.php"} ← ToolCallDeltaEvent +``` + +Must accumulate and parse at `content_block_stop`. The final `ToolCall` objects are only reliable at stream end. This means tool execution timing is unchanged — streaming only speeds up text display, not tool execution. + +## Files + +| Action | File | +|--------|------| +| **Modify** | `src/LLM/LlmClientInterface.php` — add `stream()` | +| **Modify** | `src/LLM/AsyncLlmClient.php` — implement SSE parsing + `stream()` | +| **Modify** | `src/LLM/RetryableLlmClient.php` — wrap `stream()` with connection retry | +| **Modify** | `src/Agent/AgentLoop.php` — stream-aware `run()` loop | +| **Modify** | `src/UI/Ansi/AnsiRenderer.php` — optional: incremental echo instead of buffer | +| **None** | `src/UI/Tui/TuiRenderer.php` — already has live widget updating | + +## Edge Cases + +- **Non-streaming providers** — fallback to `chat()`, no visual change +- **Mid-stream disconnect** — show partial text, log error, don't retry +- **Empty stream** (immediate tool calls, no text) — skip streaming, go straight to tool execution +- **Thinking tokens** (Claude extended thinking) — `ThinkingEvent` can show a "thinking" indicator, discard content before `TextDeltaEvent` begins +- **Mixed text + tool calls** — text streams live, tool call JSON accumulates silently + +## Effort Estimate + +- SSE parser in AsyncLlmClient: medium (line protocol + provider-specific JSON shapes) +- AgentLoop stream loop: small (iterate events, dispatch to UI) +- RetryableLlmClient wrapper: small +- ANSI incremental rendering: small (optional) +- Testing: medium (mock SSE streams, partial responses, error cases) + +## Priority + +Medium-low. Biggest impact in `/ask` and `/plan` modes. Minimal impact in `/edit` mode where tool calls dominate response time. Implement after web tools, cost tracking, and deduplication. diff --git a/docs/ecosystem/kosmokrator/proposals/tui-ux-improvements.md b/docs/ecosystem/kosmokrator/proposals/tui-ux-improvements.md new file mode 100644 index 0000000..567049b --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/tui-ux-improvements.md @@ -0,0 +1,415 @@ +# TUI UX Improvements + +> Status: Proposal. This document is a UX backlog / comparative design note, not a description of the current TUI. + +Comparative analysis of KosmoKrator vs OpenCode vs Claude Code terminal UIs, ranked by UX impact. + +## Benchmark Comparison + +| Aspect | KosmoKrator | OpenCode | Claude Code | +|--------|-------------|----------|-------------| +| **Framework** | Custom Symfony TUI (PHP) | Custom OpenTUI (SolidJS/Bun) | Forked Ink (React/Node) | +| **Rendering** | Widget tree, diff-based screen updates | 60 FPS, SolidJS reactive | Double-buffered Yoga flexbox | +| **Themes** | 1 hardcoded theme | 35+ themes, JSON-defined, auto dark/light | 6 themes incl. daltonized + ANSI fallback | +| **Diffs** | Word-level with syntax highlight | Split/unified, tree-sitter, 11 theme tokens | Native Rust NAPI module, word-level | +| **Spinners** | 14 custom sets, breathing animation | Knight Rider gradient, per-agent colors | Glimmer wave, stall-aware color shift | +| **Input** | Multi-line EditorWidget | Rich textarea, extmarks, frecency autocomplete | Vim mode, voice, image paste, typeahead | + +--- + +## Ranked Improvements (Highest to Lowest UX Impact) + +### 1. Collapsed Tool Groups + +**Impact**: Very High — single biggest readability win +**Effort**: Medium +**Source**: Original design (stacked brackets) + +Every tool call is currently rendered individually. Sequential `file_read` × 5 shows 5 separate entries, drowning the conversation in noise. + +**What others do**: +- Claude Code auto-collapses sequential Read/Grep/Glob calls into `"Reading 5 files"` or `"Searching 3 patterns"` — a single expandable line. +- OpenCode uses `InlineTool` for simple tools (single line) and `BlockTool` for complex ones (expandable). + +**What to build**: +- Detect consecutive same-type tool calls (file_read, grep, glob, bash) +- Collapse into a summary line with expand-to-detail on Ctrl+O +- Show aggregate stats (file count, match count, total time) + +#### Mockups — Stacked Brackets Style + +##### Before (current behavior) + +``` +☽ Read src/UI/Theme.php +✓ ⏋ 237 lines (ctrl+o to reveal) + +☽ Read src/UI/Tui/TuiRenderer.php +✓ ⏋ 1180 lines (ctrl+o to reveal) + +☽ Read src/UI/Diff/DiffRenderer.php +✓ ⏋ 95 lines (ctrl+o to reveal) + +♅ Edit src/UI/Theme.php +✓ 3 replacements applied + +⊛ Search pattern: "render()" path: src/ +✓ ⏋ 14 matches across 5 files (ctrl+o to reveal) +``` + +12 lines of visual noise for 3 reads, 1 edit, 1 search. + +##### After (collapsed — default view) + +Same scenario rendered in 5 lines: + +``` +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ +☽ Read 3 files · 412 lines · 1.2s + ⊛ src/UI/Theme.php + ⊛ src/UI/Tui/TuiRenderer.php + ⊛ src/UI/Diff/DiffRenderer.php +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ + +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ +⊛ Search 14 matches in 5 files · 0.8s + pattern: "render()" in src/ +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ +``` + +Edit and Write calls remain ungrouped — they always render individually as today: + +``` +♅ Edit src/UI/Theme.php +✓ 3 replacements applied +``` + +##### Expanded view (Ctrl+O on the Read group) + +``` +┌── ☽ Read 3 files · 412 lines · 1.2s ──────────────┐ +│ src/UI/Theme.php 237 lines │ +│ src/UI/Tui/TuiRenderer.php 1180 lines │ +│ src/UI/Diff/DiffRenderer.php 95 lines │ +├──────────────────────────────────────────────────────│ +│ Theme.php │ +│ ⏋ 1 ` | +| `packages/opencode/src/tool/task.ts` | Sub-agent tool that creates child sessions, supports resume via `task_id` | +| `packages/opencode/src/tool/batch.ts` | Parallel tool execution up to 25 concurrent calls | +| `packages/opencode/src/session/index.ts:244-303` | Per-message cost calculation with `Decimal` precision | +| `packages/opencode/src/snapshot/index.ts` | Git-based filesystem checkpointing | +| `packages/opencode/src/storage/db.ts` | SQLite persistence for sessions/messages | +| `packages/opencode/src/cli/cmd/stats.ts` | CLI stats: total cost, cost/day, per-model breakdown | + +--- + +## Porting Plan — Component by Component + +### 1. Global Concurrency Semaphore + +**Source pattern:** OpenCode `util/queue.ts` `work()` function + Claude Code `utils/generators.ts` `all()` with `concurrencyCap`. + +**Implementation in KosmoKrator:** + +```php +// In SubagentOrchestrator, add a class-level semaphore +private LocalSemaphore $globalSemaphore; + +public function __construct(/* ... */) { + $this->globalSemaphore = new LocalSemaphore(10); // max 10 concurrent agents +} + +// In spawnAgent(), wrap the Amp\async() block: +$lock = $this->globalSemaphore->acquire(); +// ... inside async, after agent completes: +$lock->release(); +``` + +**Effort:** ~20 lines. We already use `LocalSemaphore(1)` for group constraints — just need one more instance for the global cap. + +**Configuration:** Should be configurable via `subagent_max_concurrency` setting. + +--- + +### 2. Retry Logic with Exponential Backoff + +**Source pattern:** OpenCode `session/retry.ts` (cleaner, ~105 lines) + Claude Code `withRetry.ts:530-548` (jitter formula). + +**Key code to port:** + +``` +delay formula: min(base * 2^attempt + random_jitter, maxDelay) +retry-after-ms header: use directly if present +retry-after header: parse as seconds or HTTP date +retryable errors: 429, 529/overloaded, 408, 5xx +non-retryable: context overflow +``` + +**Implementation in KosmoKrator:** Enhance `RetryableLlmClient` with: +- `Retry-After` and `Retry-After-Ms` header parsing +- Jittered exponential backoff (base 500ms, max 32s) +- Per-agent retry (wrap the `runHeadless()` call in the orchestrator) +- Unattended mode: indefinite retries with 5-min max backoff + +**Effort:** ~100 lines. + +**Constants from Claude Code:** +- `BASE_DELAY_MS = 500` +- `DEFAULT_MAX_RETRIES = 10` +- `MAX_529_RETRIES = 3` (before model fallback) + +--- + +### 3. Background Agent Decoupled Cancellation + +**Source pattern:** Claude Code `tools/AgentTool/runAgent.ts:524-528`: +```typescript +const agentAbortController = isAsync + ? new AbortController() // NEW controller for async agents + : toolUseContext.abortController; // shared for sync agents +``` + +**Implementation in KosmoKrator:** In `SubagentFactory.php`, when creating `NullRenderer` for background agents, pass `null` for the cancellation closure instead of the parent's token. Add a process-level signal handler for Ctrl+C that sets a separate `Cancellation` shared by all agents. + +**Effort:** ~5 lines in factory + ~20 lines for signal handler. + +**Current code location:** `src/Agent/SubagentFactory.php:49` and `src/UI/NullRenderer.php:45-52`. + +--- + +### 4. Disk-Based Result Persistence & Resume + +**Source pattern:** Claude Code `utils/task/diskOutput.ts` — `DiskTaskOutput` class with async write queue, session-scoped directory, `O_NOFOLLOW` security. + +**Implementation in KosmoKrator:** + +``` +Storage layout: +~/.kosmokrator/tasks/{sessionId}/{agentId}.jsonl + +Each line is a JSON event: + {"ts":"...","event":"started","task":"..."} + {"ts":"...","event":"tool_call","tool":"grep"} + {"ts":"...","event":"progress","tokens_in":1234} + {"ts":"...","event":"completed","result":"...","tokens_total":5678} + +Resume logic: +1. On swarm start, scan output directory for completed agent IDs +2. Load their results from disk +3. Skip those agents when scheduling new work +4. Continue from where we left off +``` + +**Key security from Claude Code:** Use `O_NOFOLLOW` equivalent (check not a symlink before write) to prevent sandbox attacks. + +**Effort:** ~200 lines (writer class + resume scanner). + +--- + +### 5. Cost Tracking Aggregation + +**Source pattern:** Claude Code `cost-tracker.ts:250-323` — `addToTotalSessionCost()` with per-model USD calculation. + +**Implementation in KosmoKrator:** + +```php +class SwarmCostTracker { + private array $perModel = []; // model → {input, output, cost_usd} + private float $budgetUsd; + + public function add(string $model, int $in, int $out): void { + $cost = ModelCatalog::calculateCost($model, $in, $out); + // accumulate, check budget + } + + public function getSummary(): SwarmCostSummary { + // completed/total, total cost, per-model breakdown, ETA + } +} +``` + +**Dependency:** Needs `ModelCatalog` to know per-token prices for each model. Currently KosmoKrator has `ModelCatalog` but may need pricing data added. + +**Effort:** ~80 lines. + +--- + +### 6. Progress Dashboard + +**Source pattern:** Claude Code `tools/AgentTool/agentToolUtils.ts:538-593` — progress tracker with token counts, tool use, activity descriptions. + +**Implementation in KosmoKrator:** Aggregate existing `SubagentStats` into a `SwarmProgress` view: + +``` +┌─ Swarm Progress ─────────────────────────────┐ +│ Completed: 1,247 / 3,000 (41.6%) │ +│ Failed: 23 (retried: 18) │ +│ Running: 10 │ +│ Tokens: 2.4M in / 312K out │ +│ Cost: $47.12 │ +│ Elapsed: 34m 12s │ +│ ETA: ~48m │ +└───────────────────────────────────────────────┘ +``` + +**Effort:** ~150 lines (aggregator + renderer). + +--- + +### 7. Tool Result Size Persistence + +**Source pattern:** Claude Code `utils/toolResultStorage.ts:55-78` — `getPersistenceThreshold()` per tool. + +**Implementation in KosmoKrator:** Before injecting sub-agent result into parent's conversation history: +1. Check `strlen($result)` against threshold (e.g., 100KB for agent results) +2. If exceeded, write to `~/.kosmokrator/results/{agentId}.txt` +3. Replace with summary: `[Result persisted to disk: {path} ({size})]` +4. Parent LLM can use `file_read` if it needs the full result + +**Effort:** ~100 lines. + +--- + +### 8. Concurrency-Safe Tool Execution + +**Source pattern:** Claude Code `StreamingToolExecutor.ts:129-135`: +```typescript +private canExecuteTool(isConcurrencySafe: boolean): boolean { + const executing = this.tools.filter(t => t.status === 'executing') + return executing.length === 0 + || (isConcurrencySafe && executing.every(t => t.isConcurrencySafe)) +} +``` + +**Implementation in KosmoKrator:** Add `isConcurrencySafe(): bool` to `ToolInterface`. Read-only tools (`file_read`, `glob`, `grep`) return `true`. Destructive tools (`bash`, `file_edit`, `file_write`) return `false`. In `AgentLoop`'s tool dispatch, serialize non-safe tools. + +**Effort:** ~60 lines. + +--- + +### 9. Coordinator Mode + +**Source pattern:** Claude Code `coordinator/coordinatorMode.ts:111-368` — 370-line system prompt defining the coordinator role. + +**Key concepts to port:** +- Phases: Research (parallel workers) → Synthesis (coordinator) → Implementation (workers) → Verification (workers) +- Workers can't see coordinator's conversation (self-contained prompts) +- `` XML format for delivering results +- `SendMessage` tool for continuing a running worker +- Parallelism guidance: "Launch independent workers concurrently" + +**Implementation in KosmoKrator:** Add a `--coordinator` flag that swaps the system prompt and enables the coordinator tool set. Pure prompt engineering — no code architecture changes needed. + +**Effort:** ~50 lines (flag + prompt template). + +--- + +## Priority Matrix + +| Priority | Component | Effort | Impact | Source | +|----------|-----------|--------|--------|--------| +| **P0** | Global concurrency semaphore | ~20 lines | Prevents rate limit death | OpenCode `queue.ts` | +| **P0** | Retry with backoff + headers | ~100 lines | Survives rate limits | OpenCode `retry.ts` | +| **P0** | Decoupled cancellation | ~25 lines | Stops losing background agents | Claude `runAgent.ts:527` | +| **P1** | Disk result persistence + resume | ~200 lines | Survives crashes, enables resume | Claude `diskOutput.ts` | +| **P1** | Cost tracking aggregation | ~80 lines | Budget visibility | Claude `cost-tracker.ts` | +| **P2** | Progress dashboard | ~150 lines | Operational visibility | Claude `agentToolUtils.ts` | +| **P2** | Tool result size persistence | ~100 lines | Memory pressure relief | Claude `toolResultStorage.ts` | +| **P3** | Concurrency-safe tools | ~60 lines | Race condition prevention | Claude `StreamingToolExecutor.ts` | +| **P3** | Coordinator mode | ~50 lines | Better orchestration | Claude `coordinatorMode.ts` | + +**Total estimated effort:** ~785 lines for all components. + +--- + +## Test Results Summary + +The following tests were run against the current sub-agent system: + +| Test | Result | Key Finding | +|------|--------|-------------| +| Basic hello world (1 agent, await) | Pass | 1 round, 2.6s, clean | +| Parallel agents (3x background) | Pass | All spawned within 146ms, completed independently | +| Nested sub-sub-agents (depth 2) | Pass | Concurrent children at depth 2, results flow back | +| Dependency chain (3 steps, background) | Partial | Dependency graph works but agents cancelled mid-chain | +| Background vs await comparison | Pass | Both modes functional, await blocks, background async | + +### Key Log Patterns Observed + +- Agent spawn-to-completion ratio: 153/149 (97.4% success rate) +- Cancellations occur when parent loop advances during background agent execution +- Rate limits spike when multiple agents make concurrent API calls +- Context overflow auto-recovery works (trim + retry) +- No memory leaks observed across 153 agents in a single session +- Dependency resolution correctly cascades: step-1 completes → step-2 starts → step-3 starts + +--- + +## Files in KosmoKrator That Would Change + +| File | Changes | +|------|---------| +| `src/Agent/SubagentOrchestrator.php` | Add global semaphore, wrap spawn in retry logic, disk output hooks | +| `src/Agent/SubagentFactory.php` | Decouple cancellation for background agents | +| `src/Agent/SubagentStats.php` | Add cost fields, persist to disk | +| `src/LLM/RetryableLlmClient.php` | Add `Retry-After` header parsing, jittered backoff | +| `src/UI/NullRenderer.php` | Accept `null` cancellation for background agents | +| `src/Agent/AgentLoop.php` | Add tool result size persistence before injection | +| `src/Tool/ToolInterface.php` | Add `isConcurrencySafe(): bool` | +| `src/Tool/ToolRegistry.php` | Scope concurrency-safe filtering | +| `src/Command/AgentCommand.php` | Add `--coordinator` flag, global concurrency config | +| New: `src/Agent/SwarmProgress.php` | Progress aggregator | +| New: `src/Agent/DiskTaskOutput.php` | Per-agent result writer | +| New: `src/Agent/SwarmCostTracker.php` | Cost accumulation | + +--- + +## External Source File Index + +### Claude Code (`tmp/claude-src/`) + +``` +services/api/withRetry.ts — Retry engine (822 lines) +services/api/errors.ts — Error classification +services/tools/StreamingToolExecutor.ts — Concurrency-safe tool execution (519 lines) +utils/generators.ts — Concurrent generator pool (80 lines) +utils/task/diskOutput.ts — Disk output with write queue (457 lines) +utils/task/framework.ts — Task lifecycle management (308 lines) +utils/sessionRestore.ts — Session restore from transcripts (550+ lines) +utils/toolResultStorage.ts — Oversized result persistence +utils/forkedAgent.ts — Sub-agent context creation +cost-tracker.ts — Cost tracking (323 lines) +costHook.ts — Cost persistence hook +Task.ts — Task types and state machine (125 lines) +coordinator/coordinatorMode.ts — Coordinator system prompt (369 lines) +tools/AgentTool/runAgent.ts — Agent execution lifecycle (860 lines) +tools/AgentTool/agentToolUtils.ts — Progress tracking, lifecycle management +tools/AgentTool/agentMemory.ts — Agent memory scoping +Tool.ts — Tool interface and contracts (695 lines) +tools.ts — Tool registry and assembly +constants/tools.ts — Tool allowlists per agent type +``` + +### OpenCode (`tmp/opencode/`) + +``` +packages/opencode/src/session/retry.ts — Retry system (105 lines) +packages/opencode/src/util/queue.ts — Worker pool + async queue (30 lines) +packages/opencode/src/tool/task.ts — Sub-agent tool (164 lines) +packages/opencode/src/tool/batch.ts — Parallel tool execution +packages/opencode/src/session/index.ts — Session management, cost calculation +packages/opencode/src/session/prompt.ts — Core agent loop +packages/opencode/src/session/retry.ts — Retry policies +packages/opencode/src/agent/agent.ts — Agent type definitions +packages/opencode/src/agent/prompt/explore.txt — Explore agent system prompt +packages/opencode/src/snapshot/index.ts — Git-based checkpointing +packages/opencode/src/storage/db.ts — SQLite persistence +packages/opencode/src/cli/cmd/stats.ts — CLI stats command +``` diff --git a/docs/ecosystem/kosmokrator/research/claude-code-analysis.md b/docs/ecosystem/kosmokrator/research/claude-code-analysis.md new file mode 100644 index 0000000..3ade40c --- /dev/null +++ b/docs/ecosystem/kosmokrator/research/claude-code-analysis.md @@ -0,0 +1,1464 @@ +# Claude Code Source Analysis & KosmoKrator Comparison + +> **Generated**: 2025-03-31 +> **Source**: `tmp/claude-src/` (Claude Code TypeScript source, 1,903 files, 33MB) +> **Target**: KosmoKrator PHP agent (`src/`, ~100 files) + +--- + +## Table of Contents + +1. [Architecture Overview](#1-architecture-overview) +2. [Agent Loop & Query Engine](#2-agent-loop--query-engine) +3. [Streaming & LLM Integration](#3-streaming--llm-integration) +4. [Tool System](#4-tool-system) +5. [Tool Implementations — Deep Comparison](#5-tool-implementations--deep-comparison) +6. [Subagent & Multi-Agent System](#6-subagent--multi-agent-system) +7. [Context Management & Compaction](#7-context-management--compaction) +8. [Token Estimation & Budget](#8-token-estimation--budget) +9. [Permission System](#9-permission-system) +10. [Hook System](#10-hook-system) +11. [Memory System](#11-memory-system) +12. [Skills System](#12-skills-system) +13. [System Prompt Assembly](#13-system-prompt-assembly) +14. [Session & State Management](#14-session--state-management) +15. [Task System](#15-task-system) +16. [UI & Rendering](#16-ui--rendering) +17. [Cost Tracking](#17-cost-tracking) +18. [Command / Slash Command System](#18-command--slash-command-system) +19. [Keybinding System](#19-keybinding-system) +20. [MCP Integration](#20-mcp-integration) +21. [Feature Comparison Matrix](#21-feature-comparison-matrix) +22. [Concrete Thresholds & Constants](#22-concrete-thresholds--constants) +23. [Inspiration Roadmap](#23-inspiration-roadmap) +24. [Appendix: File Reference](#24-appendix-file-reference) + +--- + +## 1. Architecture Overview + +### Side-by-Side + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Language** | TypeScript (Bun runtime) | PHP 8.4 | +| **UI Framework** | React/Ink (custom reconciler, Yoga flexbox layout) | Symfony TUI + ANSI fallback | +| **Async Model** | Node async/await, async generators, streaming | Amp fibers (cooperative multitasking) | +| **DI Container** | Manual wiring + React context | Laravel Illuminate Container | +| **LLM Client** | Direct Anthropic SDK + SSE streaming | Prism PHP (multi-provider) + custom Amp HTTP | +| **Persistence** | JSON session files (one per session) | SQLite (WAL mode) | +| **Config** | JSON settings + CLAUDE.md hierarchy | YAML (multi-level merge) + KOSMOKRATOR.md | +| **Tool Count** | ~50+ built-in + unlimited via MCP | ~10 built-in | +| **Codebase Size** | 1,903 files / 33MB | ~100 files / ~500KB | +| **Build** | Bun binary bundle | PHAR (via box) | + +### Entry Point Flow + +**Claude Code:** +``` +main.tsx → Commander.js CLI → init() → REPL screen (React/Ink) + → QueryEngine.submitMessage() → query() async generator + → queryLoop() while(true) → API stream → tool execution → loop +``` + +**KosmoKrator:** +``` +bin/kosmokrator → Kernel → AgentCommand → AgentLoop.run() + → while(true) → LlmClient.chat() → executeToolCalls() → loop +``` + +Both follow the same fundamental pattern: a REPL that iterates LLM calls and tool executions until the model stops requesting tools. The key structural differences are in streaming, concurrency, and extensibility. + +--- + +## 2. Agent Loop & Query Engine + +### Claude Code: QueryEngine + query() + +The agent loop is split into two layers: + +**QueryEngine** (`QueryEngine.ts`, 46KB): +- Owns the session: `mutableMessages[]`, conversation state, tool permission callbacks +- `submitMessage()` is an **async generator** that yields `SDKMessage` types +- Manages compact boundaries, permission tracking, and transcript recording +- One QueryEngine per conversation; subagents get their own instances + +**query()** (`query.ts`, 68KB): +- The inner `queryLoop()` is a `while(true)` loop (line 307) +- Each iteration represents one LLM turn: + 1. Apply context compression (snip → microcompact → context collapse → autocompact) + 2. Build system prompt + user context + system context + 3. Stream API call via `queryModelWithStreaming()` + 4. Extract `tool_use` blocks **while streaming** (line 829) + 5. Feed blocks to `StreamingToolExecutor` which starts execution immediately + 6. Collect results, normalize messages + 7. Check stop conditions: no tool_use, max turns, budget exhausted, abort signal, error + 8. Continue loop or return `Terminal` reason + +**State machine** (`query.ts` line 202): +```typescript +type State = { + messages: Message[] + toolUseContext: ToolUseContext + autoCompactTracking: AutoCompactTrackingState | undefined + maxOutputTokensRecoveryCount: number + hasAttemptedReactiveCompact: boolean + turnCount: number + transition: Continue | undefined +} +``` + +### KosmoKrator: AgentLoop + +**AgentLoop.php** (904 lines): +- Single class handling both interactive (`run()`) and headless (`runHeadless()`) modes +- `run()` method: add user message → pre-flight context check → refresh system prompt → call LLM → execute tools → deduplicate/prune → loop or stop +- `runHeadless()`: simplified version for subagents (no UI, no session persistence, no compaction) +- Context overflow: up to 3 trim attempts (compact → trim oldest → trim oldest) +- Auto-compaction check after each response + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Streaming** | Async generator yields events token-by-token | Blocking `chat()` returns complete response | +| **Tool start timing** | Tools start executing while LLM still streams | Tools execute after full response received | +| **Loop state** | Explicit `State` type with transitions | Implicit via class properties | +| **Recovery** | 5+ recovery strategies (collapse drain, reactive compact, max-output escalation, stop hooks) | 3 trim attempts | +| **Turn tracking** | Explicit `turnCount`, budget tracking | No turn or budget tracking | + +### Adoptable Patterns + +1. **Streaming responses**: Add SSE streaming to `AsyncLlmClient` for real-time text display. The Anthropic API returns `text_delta` events that can be yielded to the renderer as they arrive. + +2. **Recovery escalation chain**: Claude Code has a sophisticated recovery tree when the LLM stops without finishing: + - Context collapse drain (cheap, preserves detail) + - Reactive compact (full LLM summarization) + - Max output token escalation (8k → 64k retry) + - Multi-turn recovery (up to 3 "resume" attempts) + + KosmoKrator only has trim/compact. Adding max-output escalation and a "resume where you left off" retry would help with long responses that hit the output limit. + +3. **Explicit state machine**: Wrapping loop state in an immutable `State` type makes the loop more predictable and debuggable. + +--- + +## 3. Streaming & LLM Integration + +### Claude Code: SSE Streaming Pipeline + +**API Call** (`claude.ts` lines 1778-1846): +```typescript +const result = await anthropic.beta.messages.create( + { ...params, stream: true }, + { signal, headers: { [CLIENT_REQUEST_ID_HEADER]: clientRequestId } } +).withResponse() +``` + +**SSE Event Loop** (`claude.ts` lines 1940-2304): +Iterates raw stream events (NOT the SDK's `BetaMessageStream` helper): +- `message_start` → Initialize partial message, usage tracking +- `content_block_start` → Initialize text/tool_use/thinking blocks +- `content_block_delta` → Accumulate `input_json_delta` / `text_delta` / `thinking_delta` +- `content_block_stop` → Yield completed `AssistantMessage` with finished block +- `message_delta` → Update usage, stop_reason, cost; mutate last yielded message + +**Idle Timeout Watchdog** (`claude.ts` lines 1877-1928): +- Default: 90 seconds (`STREAM_IDLE_TIMEOUT_MS`) +- Configurable via `CLAUDE_STREAM_IDLE_TIMEOUT_MS` env var +- Resets on each chunk; fires if no events for timeout period + +**Streaming Fallback** (`claude.ts` lines 2464-2569): +- On streaming error (not user abort): retries as non-streaming request +- Max 64k tokens for non-streaming (`MAX_NON_STREAMING_TOKENS`) +- Tombstone messages invalidate partially-streamed content + +### KosmoKrator: Blocking HTTP Client + +**AsyncLlmClient.php** (291 lines): +- Builds JSON POST payload, sends via Amp HTTP client +- **Transfer timeout**: 600s, **Inactivity timeout**: 300s +- Returns complete `LlmResponse` with text, toolCalls, token counts +- Retry handled by `RetryableLlmClient` wrapper (exponential backoff) + +### Gap Analysis + +KosmoKrator's biggest UX gap is the lack of streaming. Users see nothing until the full response arrives. Adding streaming would require: +1. SSE parsing in `AsyncLlmClient` (read chunked response body) +2. A `StreamingResponse` type that yields partial text/tool_use blocks +3. Renderer updates to display partial text as it arrives +4. Tool execution that can start before streaming completes (optional, advanced) + +The Anthropic API's streaming format is well-documented and PHP's Amp HTTP client supports streaming response bodies natively via `$response->getBody()->read()`. + +--- + +## 4. Tool System + +### Claude Code: Tool Architecture + +**Tool interface** (`Tool.ts`, 30KB): +```typescript +Tool = { + name: string + description(input): string + prompt(): string // Contributes to system prompt + inputSchema: Zod schema + outputSchema: Zod schema + call(input, context, canUseTool, parentMessage, onProgress): Promise<{data: Output}> + checkPermissions(input, context): Promise + validateInput(input, context): Promise + isConcurrencySafe(input): boolean // Can run in parallel + isReadOnly(): boolean // No side effects + shouldDefer: boolean // Deferred loading via ToolSearch + alwaysLoad: boolean // Always in prompt even with ToolSearch +} +``` + +**Tool registration** (`tools.ts`): +- `getAllBaseTools()` returns ~50+ tools with conditional loading via feature flags +- `getTools()` applies permission filters and mode-specific filtering +- `assembleToolPool()` merges built-in + MCP tools, deduplicates (built-ins win), sorts for prompt-cache stability +- Deferred tools have `shouldDefer: true` — only their names appear in the prompt until `ToolSearchTool` fetches their schemas + +**Concurrent execution** (`StreamingToolExecutor.ts`, 531 lines): +- `isConcurrencySafe` flag per tool determines parallel eligibility +- Concurrent-safe tools run in parallel; non-concurrent tools get exclusive access +- Tools queued as `tool_use` blocks arrive from streaming; execution starts immediately +- Bash errors abort sibling tools via `siblingAbortController` +- Three-level abort hierarchy: query → sibling → per-tool + +### KosmoKrator: Tool Architecture + +**ToolInterface** (simple contract): +```php +interface ToolInterface { + public function name(): string; + public function description(): string; + public function parameters(): array; + public function requiredParameters(): array; + public function execute(array $args): string; +} +``` + +**ToolRegistry** (93 lines): +- `register()`, `get()`, `all()`, `toPrismTools()` +- `scoped(AgentContext $context)` — filters by agent type, excludes subagent tool + +**Concurrent execution** (`AgentLoop::partitionConcurrentGroups()`): +- Conservative file-conflict detection: + - Bash + any write tool → sequential + - Multiple writes to same file → sequential + - Read + write to same file → sequential + - No conflicts → one concurrent group +- Within groups: `Amp\async()` for parallel execution +- Across groups: sequential `await()` + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Tool count** | ~50+ built-in + MCP | ~10 built-in | +| **Interface richness** | Input/output schemas, progress, permissions, prompts | Simple name/description/parameters/execute | +| **Concurrency model** | Per-tool `isConcurrencySafe` flag | File-conflict detection heuristic | +| **Deferred loading** | ToolSearch for large tool sets | N/A | +| **Progress reporting** | `onProgress` callback with typed events | None | +| **System prompt contribution** | Each tool can inject via `prompt()` | None | + +### Adoptable Patterns + +1. **`isConcurrencySafe()` method**: Add to `ToolInterface`. Simpler and more reliable than file-conflict heuristics. `file_read`, `glob`, `grep` are always safe; `bash`, `file_edit`, `file_write` are not. + +2. **`isReadOnly()` method**: Useful for plan/explore mode filtering and permission shortcuts. + +3. **`prompt()` method**: Let tools contribute usage instructions to the system prompt dynamically. The SubagentTool could explain its type hierarchy, the GrepTool could document its output modes. + +4. **Progress callbacks**: Enable streaming output from long-running tools (especially Bash). The TUI renderer could show real-time stdout. + +5. **Tool output persistence**: Claude Code saves outputs >100K chars to disk with a preview + path reference. KosmoKrator already has `OutputTruncator` doing this (saves to `~/.kosmokrator/data/truncations/`), so this is parity. + +--- + +## 5. Tool Implementations — Deep Comparison + +### BashTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Execution** | Bun `exec()` with AbortSignal | Symfony Process | +| **Timeout** | Default ~30s, configurable per-call | 120s configurable | +| **Background tasks** | Auto-background after 15s+; foreground task → background migration mid-execution | Not supported | +| **Sandbox** | SandboxManager integration (optional, can be disabled via `dangerouslyDisableSandbox`) | None | +| **Output capture** | `EndTruncatingAccumulator` (preserves start, truncates end) | Line + byte truncation | +| **Search detection** | `isSearchOrReadBashCommand()` splits on operators, classifies each part | None | +| **Security** | Zsh builtins blocklist (`zmodload`, `sysopen`, `ztcp`, etc.), sed parser, shell operator analysis | Shell metacharacter regex, mutative pattern list | + +**Adoptable**: Zsh builtins blocklist is a strong hardening measure. Add to `GuardianEvaluator`: +```php +private const ZSH_DANGEROUS = ['zmodload', 'emulate', 'sysopen', 'sysread', 'syswrite', 'sysseek', 'zpty', 'ztcp', 'zsocket', 'zf_rm', 'zf_mv', 'zf_ln', 'zf_chmod', 'zf_chown', 'zf_mkdir', 'zf_rmdir', 'zf_chgrp']; +``` + +### FileEditTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Match algorithm** | `findActualString()` with quote normalization (curly ↔ straight) | Exact `str_replace()` | +| **Concurrent edit detection** | `readFileState` Map with mtime + content hash verification | None | +| **Line ending preservation** | Normalizes to `\n` on read, restores original on write | None | +| **Encoding** | UTF-8 + UTF-16LE detection | UTF-8 only | +| **Diff output** | `getPatchForEdit()` unified diff | `(-N, +M)` line count | +| **File size limit** | 1 GiB max | No explicit limit | + +**Adoptable**: File state tracking is very valuable. When the LLM reads a file and later edits it, verifying the file hasn't changed in between prevents silent data corruption. Implementation: maintain a `readFileState: Map` in `AgentLoop`, check on edit. + +### FileReadTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Large file handling** | Range reads without loading whole file | Stream-read line-by-line above 10MB | +| **Deduplication** | Same-range reads return `file_unchanged` stub if mtime matches | None | +| **PDF support** | Page-range extraction, token-aware compression | None | +| **Image support** | Format detection, resize/downsample with token limits, base64 | None | +| **Notebook support** | `.ipynb` cell parsing with outputs | None | +| **Dangerous paths** | Block `/dev/zero`, `/dev/random`, `/proc/self/fd/*` | None | + +**Adoptable**: PDF and image support would be valuable additions. PHP libraries: `smalot/pdfparser` for PDFs, `intervention/image` for image processing. Dangerous path blocking is a good security hardening. + +### GrepTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Backend** | Ripgrep via args array | Ripgrep (preferred) or grep | +| **Output modes** | `content`, `files_with_matches`, `count` with pagination (head_limit + offset) | Single mode, max 50 matches | +| **Multiline** | `-U --multiline-dotall` flag | Not supported | +| **VCS exclusion** | Automatic `.git`, `.svn`, `.hg` exclusion | Via ripgrep defaults | +| **Sorting** | Files sorted by mtime descending | Not sorted | +| **Default limit** | 250 results (`DEFAULT_HEAD_LIMIT`) | 50 results | + +**Adoptable**: Output modes (especially `files_with_matches` for quick scanning), multiline support, and higher default limits. The pagination pattern (offset + head_limit) is useful for browsing large result sets. + +### WebFetchTool (Claude Code only) + +```typescript +// Permission: preapproved hosts auto-allow, others need approval +// Content: domain:hostname used for permission matching +// Pipeline: fetch → HTML → markdown → optional Haiku summarization +// Cache: 15-minute URL result cache +// Large content: persisted to disk with size annotation +``` + +**Adoptable as new tool**: Use `league/html-to-markdown` or `readability-php` for HTML → markdown conversion. The preapproved host pattern is good UX (GitHub, MDN, StackOverflow, etc. don't need approval). + +### WebSearchTool (Claude Code only) + +```typescript +// Uses native Anthropic web_search_20250305 server tool +// Sends a sub-query to the API with web_search tool schema +// Max 8 searches per request (hardcoded) +// Results: title + URL pairs + text summaries +``` + +**Adoptable as new tool**: Integrate a search API (Tavily, Brave Search, SerpAPI). The implementation pattern of using an LLM sub-call with a server tool is interesting but can be simplified to a direct API call for third-party search providers. + +### ToolSearchTool (Claude Code only) + +**Deferred tool loading** for managing large tool sets: +- Tools with `shouldDefer: true` only show their names in the prompt +- LLM calls `ToolSearch` with a query to fetch full schemas +- Search algorithm: keyword scoring on tool name parts + description + searchHint +- Direct selection: `select:ToolName1,ToolName2` for exact fetches +- MCP tool name parsing: `mcp__github__list_repos` → keywords `[github, list, repos]` + +**Adoptable**: Becomes important when KosmoKrator adds MCP support (potentially dozens of external tools). Not needed at current tool count (~10). + +--- + +## 6. Subagent & Multi-Agent System + +### Claude Code: AgentTool + +**Spawning modes** (`AgentTool.tsx`, lines 686-1200): + +1. **Synchronous**: Run agent inline, block parent, return result +2. **Asynchronous**: Launch background agent, return immediately, inject result when done +3. **Remote**: Teleport to CCR environment (cloud execution) +4. **Auto-background**: Start synchronous, auto-migrate to background after 120s + +**Agent types** (built-in): +- **General Purpose**: Full read/write access +- **Explore**: Read-only code exploration +- **Plan**: Read-only architecture & design +- **Verification**: Adversarial testing (tries to break the implementation) +- **Claude Code Guide**: Documentation specialist +- **Fork**: Inherits parent's system prompt (cache-sharing optimization) +- **Custom**: Loaded from `~/.claude/agents/` as markdown with frontmatter + +**Worktree isolation** (`EnterWorktreeTool`): +```typescript +const worktreeInfo = await createAgentWorktree(slug); +// Agent works in isolated git branch +// On completion: check for changes +// - No changes → clean up worktree +// - Has changes → preserve with branch name +``` + +**Agent communication**: +- `SendMessageTool`: Agents send messages to each other by ID +- `TaskNotification` XML in user messages (coordinator pattern) +- Scratchpad directory for durable cross-worker state + +### KosmoKrator: SubagentOrchestrator + +**SubagentOrchestrator.php** (224 lines): +- Manages agent futures using Amp fibers +- Dependency resolution: agents wait for dependencies before starting +- Group-based sequential execution via `LocalSemaphore(1)` +- Background mode: results stored in `pendingResults`, injected when parent checks + +**SubagentFactory.php** (163 lines): +- Creates fresh `AgentLoop` instances with scoped tool registry +- Builds system prompt: base + type suffix + environment context +- If `canSpawn()`: registers recursive SubagentTool +- Mode mapping: General→Edit, Explore→Ask, Plan→Plan +- Hardcoded subagent pruner: `ContextPruner(20_000, 10_000)` + +**AgentContext.php** (54 lines): +- Immutable context traveling down the tree +- `canSpawn()`: `depth < maxDepth - 1` +- `childContext()`: validates type inheritance, increments depth + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Agent types** | 7 built-in + custom from files | 3 (General, Explore, Plan) | +| **Custom agents** | `~/.claude/agents/` markdown files | Not supported | +| **Verification agent** | Adversarial tester with strict output format | Not supported | +| **Worktree isolation** | Git worktree per agent | Not supported | +| **Inter-agent messaging** | SendMessageTool | Dependency results appended to task | +| **Auto-backgrounding** | After 120s, migrate sync→async | Not supported | +| **Agent colors** | Unique color per agent in UI | Not supported | +| **Max depth** | Configurable (default 3) | Configurable (default 3) | +| **Coordinator mode** | Multi-worker orchestration with task notifications | Not supported | + +### Adoptable Patterns + +1. **Custom agent definitions**: Load from `~/.kosmokrator/agents/` as markdown with frontmatter: + ```yaml + --- + name: reviewer + description: Code review specialist + type: explore + model: inherit + when-to-use: When the user asks for a code review + --- + You are a code review specialist. Focus on... + ``` + +2. **Verification agent**: An adversarial testing agent that tries to break implementations. Very powerful for quality assurance. System prompt enforces: run commands (don't just read code), structured output format with Command/Output/Result blocks, explicit VERDICT line. + +3. **Worktree isolation**: Create a `GitWorktreeTool` that creates temporary worktrees for experimental work. PHP's `Process` class can run `git worktree add/remove`. + +4. **Auto-backgrounding**: After N seconds of a synchronous subagent running, automatically migrate to background mode. Requires the Amp fiber to support mid-execution mode switch. + +5. **Agent color assignment**: Assign unique colors from `Theme` palette per agent depth/ID. Small UX win for visual differentiation. + +--- + +## 7. Context Management & Compaction + +### Claude Code: 5-Layer Strategy + +Claude Code has five layers of context pressure relief, applied in order: + +#### Layer 1: Microcompaction (cache-based) +- Uses Anthropic API's `cache_edits` to delete individual tool results without invalidating the cached prompt prefix +- Per-tool-result targeting: FILE_READ, SHELL, GREP, GLOB, WEB_SEARCH, WEB_FETCH, FILE_EDIT, FILE_WRITE results clearable +- Model-specific: only Claude Sonnet/Opus support cache editing +- Main thread only (subagents excluded) + +#### Layer 2: Time-based Microcompaction +- Trigger: `(now - lastAssistantMessage) > 60 minutes` (server cache TTL) +- Action: Clear tool results except 5 most recent +- Sentinel: `'[Old tool result content cleared]'` +- Rationale: after 60min the server cache is cold anyway, so clearing stale results costs nothing + +#### Layer 3: Context Collapse (feature-gated) +- Advanced selective message archiving that preserves granular detail longer +- Commit point: 90% of effective context +- Blocking spawn threshold: 95% +- When enabled, auto-compaction is disabled to prevent racing + +#### Layer 4: Auto-Compaction (LLM summarization) +- **Threshold**: `effectiveContextWindow - 13,000` tokens (~93% of usable window) +- **Effective window**: `contextWindowSize - min(maxOutputTokens, 20,000)` (reserves summary output budget) +- **Circuit breaker**: Stops after 3 consecutive failures +- **Post-compaction restoration**: + - Re-attach up to 5 recently-read files (50K token budget, 5K per file) + - Re-inject recently-used skills (25K token budget, 5K per skill) + - Preserve async agent attachments and plan mode state + +#### Layer 5: Session Memory Compaction +- Background extraction that summarizes old conversation segments +- Config: min 10K tokens preserved, max 40K, min 5 text-block messages +- Preserves API invariants (tool_use/tool_result pairing, thinking block grouping) + +### KosmoKrator: 3-Layer Strategy + +#### Layer 1: ToolResultDeduplicator +Three-tier deduplication: +1. **Exact match**: Same tool, args, result hash → `'[Superseded — identical result]'` +2. **Stale after edit**: File read superseded by write + later re-read → `'[Superseded — file was re-read after modification]'` +3. **Subset subsumption**: Grep on file subsumed by later full file_read → `'[Superseded — content included in later file_read]'` + +#### Layer 2: ContextPruner +- Protects last 2 user turns (40K tokens default) +- Replaces older tool results with `'[Old tool result content cleared]'` +- Only prunes if savings >= 20K tokens +- Applied after deduplication + +#### Layer 3: ContextCompactor +- **Threshold**: 60% of context window (configurable) +- LLM summarization with dedicated compaction prompt +- Keeps last 3 user turns (configurable) +- Formats messages for compaction (truncates each to 2000 chars, total cap 100K chars) +- Also extracts durable memories (project, user, decision types) from summary + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Layers** | 5 | 3 | +| **Trigger threshold** | ~93% of usable window | 60% of context window | +| **Post-compaction restoration** | 5 files (50K), skills (25K), agent state | None | +| **Cache-aware compaction** | Yes (cache_edits API, time-based clearing) | No | +| **Compaction prompt** | Detailed 9-section prompt with structured output | Simple summarization prompt | +| **Memory extraction** | Separate background agent after each turn | During compaction only | +| **Circuit breaker** | 3 consecutive failures → stop | None | +| **Deduplication** | Basic (per tool name) | Advanced 3-tier (exact, stale, subsumption) | + +### Adoptable Patterns + +1. **Post-compaction file restoration** (HIGH PRIORITY): After compacting, re-read and attach the most recently-read files. This prevents the common failure mode where the agent "forgets" what files it was working with after compaction. + ```php + // In ContextCompactor::compact(): + $recentFiles = $this->extractRecentFileReads($history, limit: 5, tokenBudget: 50000); + foreach ($recentFiles as $file) { + $content = substr(file_get_contents($file), 0, 5000 * 4); // ~5K tokens + $summary .= "\n\n## Recently read: {$file}\n```\n{$content}\n```"; + } + ``` + +2. **Post-compaction instruction re-injection**: Re-inject KOSMOKRATOR.md instructions after compaction since they may have been summarized away. + +3. **Circuit breaker**: Stop auto-compacting after 3 consecutive failures. Add a `$consecutiveCompactFailures` counter to `AgentLoop`. + +4. **Raise compaction threshold**: 60% is conservative. Claude Code uses ~93%. Consider raising to 75-80% to preserve more context before compacting. + +5. **Time-based result clearing**: If a conversation has been idle for >60 minutes, clear old tool results on resume (they're stale anyway). Simple timestamp check in `AgentLoop::preFlightContextCheck()`. + +--- + +## 8. Token Estimation & Budget + +### Claude Code + +**Estimation formula** (`tokenEstimation.ts`): +- Text: `length / 4` bytes per token (default) +- JSON files: `length / 2` (denser tokenization) +- Images/documents: 2000 tokens flat estimate +- Message-level padding: `ceil(total * 4/3)` (33% conservative multiplier) + +**Budget tracking** (`tokenBudget.ts`): +``` +COMPLETION_THRESHOLD = 0.9 // Stop at 90% budget +DIMINISHING_THRESHOLD = 500 // Tokens per turn threshold +Detection: 3+ continuations AND last 2 deltas both < 500 tokens +``` + +**Continuation logic**: +- Continue if: under 90% budget AND making progress +- Stop if: diminishing returns (3+ turns, <500 tokens/turn) OR any prior continuation +- Nudge messages tell the LLM remaining budget + +### KosmoKrator + +**Estimation formula** (`TokenEstimator.php`): +- Text: `ceil(mb_strlen($text) / 4)` — 4 characters per token +- No file-type-specific adjustment +- No padding multiplier +- No budget tracking or continuation logic + +### Adoptable Patterns + +1. **JSON-specific estimation**: Use `length / 2` for JSON content (important for tool results which are often JSON). + +2. **Conservative padding**: Apply a 1.33x multiplier to total estimates. Token estimation is inherently imprecise; padding prevents unexpected overflows. + +3. **Budget tracking**: Optional feature for cost-conscious users. Track cumulative tokens per turn, stop if diminishing returns detected. + +4. **Diminishing returns detection**: If the agent has been running for 3+ turns and the last 2 turns produced <500 tokens each, it's likely stuck in a loop. Inject a "you seem stuck, consider wrapping up" nudge. + +--- + +## 9. Permission System + +### Claude Code: Multi-Source Rules + +**Permission modes**: +- `default` — Prompt for all 'ask' decisions +- `acceptEdits` — Auto-allow file edits in CWD, prompt elsewhere +- `bypassPermissions` — Auto-allow all (except deny rules and safety checks) +- `auto` — AI classifier decides (ANT-only, uses transcript/bash classifier) +- `plan` — Shows action plan instead of executing +- `dontAsk` — Silently deny all 'ask' decisions + +**Rule sources** (8 levels, priority order): +`policySettings > flagSettings > projectSettings > localSettings > userSettings > cliArg > command > session` + +**Rule format**: `ToolName(content)` with wildcard support: +- `Bash(npm *)` — glob pattern, matches any npm command +- `Bash(npm:*)` — legacy prefix syntax +- `Bash(curl https://\*.com)` — escaped asterisk +- `mcp__server1__*` — MCP server-level rule +- `Agent(Explore)` — deny specific agent type + +**Evaluation order** (`permissions.ts` lines 1158-1320): +1. Check DENY rules (absolute, no override) +2. Check entire tool ASK rule +3. Call `tool.checkPermissions()` (tool-specific logic) +4. Check mode (bypass, acceptEdits, etc.) +5. Check ALLOW rules +6. Convert passthrough to ask +7. Apply dontAsk → deny conversion +8. Apply auto → classifier +9. Fall back to permission prompt + +**Session grants**: In-memory, non-persisted rules stored in `alwaysAllowRules['session']`. Discarded when session ends. + +**Denial tracking** (auto mode): +- `consecutiveDenials >= 3` OR `totalDenials >= 20` → fall back to user prompting +- Reset consecutive on allow, increment both on deny + +### KosmoKrator: 3-Mode System + +**Permission modes** (`PermissionMode`): +- `Guardian` — Heuristic auto-approve (safe reads + project-scoped writes + safe bash) +- `Argus` — Always ask +- `Prometheus` — Auto-approve everything + +**Rule evaluation** (`PermissionEvaluator.php` lines 20-71): +1. Blocked paths check (absolute deny) +2. Session grants check +3. Rule evaluation (first matching rule wins) +4. Mode-specific handling: + - Prometheus: auto-approve Ask + - Guardian: delegate to `GuardianEvaluator::shouldAutoApprove()` + - Argus: return Ask + +**Guardian heuristics** (`GuardianEvaluator.php`): +- Always safe: `file_read`, `glob`, `grep`, task tools, memory tools +- File writes safe if inside project root +- Bash safe if: no shell metacharacters (`/[;&|`$><\n]/`) AND not matching mutative patterns +- Mutative patterns: `rm`, `mv`, `git commit`, `npm install`, `docker`, `kubectl`, etc. + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Modes** | 6 | 3 | +| **Rule sources** | 8 levels with priority | Config + session grants | +| **Wildcard rules** | Glob patterns (`npm *`) | Static pattern matching | +| **AI classifier** | Yes (auto mode) | No | +| **Safety checks** | Bypass-immune (always prompt for .git/, .claude/, shell configs) | Blocked paths only | +| **Denial tracking** | Consecutive + total limits | None | +| **Zsh builtins** | Blocked (`zmodload`, `sysopen`, `ztcp`, etc.) | Not blocked | + +### Adoptable Patterns + +1. **Wildcard permission rules** (HIGH PRIORITY): Add glob pattern matching to `PermissionRule::matches()`. This enables rules like "allow all git commands" (`Bash(git *)`) or "allow all npm scripts" (`Bash(npm run *)`). + +2. **Bypass-immune safety checks**: Always prompt for operations on `.git/`, `.kosmokrator/`, shell config files (`.bashrc`, `.zshrc`, `.profile`), regardless of permission mode. + +3. **Zsh builtins blocklist**: Add to `GuardianEvaluator`. These builtins can bypass sandboxing: + ```php + private const ZSH_DANGEROUS = [ + 'zmodload', 'emulate', 'sysopen', 'sysread', 'syswrite', + 'sysseek', 'zpty', 'ztcp', 'zsocket', + 'zf_rm', 'zf_mv', 'zf_ln', 'zf_chmod', 'zf_chown', + 'zf_mkdir', 'zf_rmdir', 'zf_chgrp', + ]; + ``` + +4. **`dontAsk` mode equivalent**: Useful for fully automated/CI pipelines where there's no user to prompt. Silently deny rather than hanging. + +--- + +## 10. Hook System + +### Claude Code: Shell Command Hooks + +Claude Code supports external shell commands that execute in response to agent events. + +**Hook event types** (`types/hooks.ts`): +- `PreToolUse` — Before tool execution (can block, modify input, add context) +- `PostToolUse` — After tool success +- `PostToolUseFailure` — After tool failure +- `PermissionDenied` — Auto-mode classifier denied +- `PermissionRequest` — Permission prompt triggered +- `Notification` — Notification event +- `SessionStart` — Session initialization +- `UserPromptSubmit` — User message submitted +- `FileChanged` — Watched file changed +- `CwdChanged` — Working directory changed +- `SubagentStart` — Subagent spawned +- `WorktreeCreate` — Worktree created + +**Hook output** (PreToolUse example): +```typescript +{ + permissionDecision?: 'approve' | 'block', + permissionDecisionReason?: string, + updatedInput?: Record, + additionalContext?: string, +} +``` + +**Timeout**: 10 minutes for tool hooks, 1.5 seconds for session-end hooks. + +**Configuration**: In `settings.json`: +```json +{ + "hooks": { + "PreToolUse": [{ + "matcher": { "tool_name": "Bash" }, + "command": "~/.claude/hooks/lint-bash.sh" + }] + } +} +``` + +### KosmoKrator: No Hook System + +KosmoKrator has no equivalent hook system. Permission evaluation is the closest analog, but it doesn't support external command execution or input modification. + +### Adoptable Pattern + +A hook system is very powerful for customization without code changes. Implementation: + +```yaml +# ~/.kosmokrator/hooks.yaml +hooks: + PreToolUse: + - matcher: { tool_name: "bash" } + command: "~/.kosmokrator/hooks/validate-bash.sh" + timeout: 60 + PostToolUse: + - matcher: { tool_name: "file_edit" } + command: "~/.kosmokrator/hooks/format-on-save.sh" + UserPromptSubmit: + - command: "~/.kosmokrator/hooks/log-prompt.sh" +``` + +The hook receives JSON on stdin (tool name, input, context) and outputs JSON to stdout (approve/block/modify). This enables linting, formatting, logging, and custom approval workflows. + +--- + +## 11. Memory System + +### Claude Code: File-Based Persistent Memory + +**Directory structure**: +``` +~/.claude/projects//memory/ +├── MEMORY.md (index, max 200 lines / 25KB, always loaded) +├── user_role.md (individual memory files with frontmatter) +├── feedback_testing.md +└── project_goal.md +``` + +**Memory frontmatter format**: +```markdown +--- +name: {{memory name}} +description: {{one-line hook for relevance matching}} +type: {{user | feedback | project | reference}} +--- +{{content — for feedback/project: rule/fact, **Why:** line, **How to apply:** line}} +``` + +**Memory types** (4 categories): +1. **user**: Role, goals, preferences, knowledge level +2. **feedback**: Guidance on approach (corrections AND confirmations) +3. **project**: Ongoing work, goals, deadlines (not derivable from code) +4. **reference**: Pointers to external systems (Linear, Grafana, Slack) + +**What NOT to save**: Code patterns, git history, debugging recipes, CLAUDE.md content, ephemeral task details. + +**Extraction**: Background agent runs after each turn (feature-gated): +- Max 5 turns per extraction +- Tool restrictions: Read, Grep, Glob, read-only Bash, Edit/Write to memory dir only +- Throttled: every N turns (default 1) +- Pre-injects manifest of existing memories to avoid duplicates +- Analytics tracked: tokens, files written, duration + +**Memory mechanics prompt**: A detailed instruction set injected into the system prompt that teaches the LLM how to proactively save, update, and recall memories. This is the mechanism that makes the LLM autonomously manage its own memory. + +### KosmoKrator: SQLite-Based Memory + +**Storage**: `memories` table in SQLite database +- Columns: id, type, title, content, project, session_id, created_at +- Types: `project`, `user`, `decision`, `compaction` + +**Tools**: `MemorySaveTool`, `MemorySearchTool` + +**Extraction**: During compaction only (in `ContextCompactor::extractMemories()`) +- Calls LLM with `MEMORY_EXTRACTION_PROMPT` +- Parses JSON array: `[{type, title, content}]` +- Validates types, saves to repository + +**Injection**: `MemoryInjector::format()` groups by type into markdown sections + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Storage** | File-based (git-trackable, human-editable) | SQLite rows | +| **Index** | MEMORY.md always loaded in context | All memories injected in system prompt | +| **Types** | 4 (user, feedback, project, reference) | 4 (user, project, decision, compaction) | +| **Extraction trigger** | After each turn (background) | During compaction only | +| **Memory mechanics prompt** | Yes (teaches LLM to proactively save) | No | +| **Relevance decay** | Age tracking, staleness warnings | None | +| **Team sync** | Multi-agent memory sharing (feature-gated) | None | + +### Adoptable Patterns + +1. **Memory mechanics prompt** (HIGH PRIORITY): The single most impactful addition. Claude Code's memory prompt teaches the LLM: + - What types of information to save + - When to save (corrections, confirmations, learning about user) + - What NOT to save (code patterns, git history, debugging recipes) + - How to save (file format, MEMORY.md index) + - When to access memories + - When to verify before recommending + + KosmoKrator should inject an equivalent prompt section that teaches the LLM to use `memory_save` and `memory_search` proactively. + +2. **Post-turn extraction**: Don't wait for compaction to extract memories. Run a lightweight extraction after each turn (or every N turns) to capture feedback and decisions before they're compacted away. + +3. **Feedback type**: Rename `decision` to `feedback` and add explicit guidance about saving both corrections AND confirmations. The body structure `rule → Why → How to apply` is very effective. + +4. **Reference type**: Add for external system pointers (Jira boards, Grafana dashboards, Slack channels). + +--- + +## 12. Skills System + +### Claude Code: Loadable Prompt Templates + +**BundledSkillDefinition** (`bundledSkills.ts`): +```typescript +{ + name: string + description: string + aliases?: string[] + whenToUse?: string + argumentHint?: string + allowedTools?: string[] + model?: string + context?: 'inline' | 'fork' // fork = isolated sub-agent + agent?: string + files?: Record // Reference files extracted to disk + getPromptForCommand: (args, context) => Promise +} +``` + +**User-defined skills**: Markdown files in `~/.claude/skills/` or `.claude/skills/`: +```markdown +--- +name: review +description: Review code changes for quality +allowed-tools: file_read, grep, glob +context: fork +model: inherit +--- +Review the current git diff for bugs, security issues, and code quality... +``` + +**Skill execution**: Via `SkillTool` — either inline (added to conversation) or forked (isolated sub-agent with own context). + +**Bundled skills include**: `/commit`, `/review-pr`, `/simplify`, `/loop`, `/debug`, `/remember`, `/verify`, `/schedule`, `/claude-api`, `/keybindings`, `/update-config`, and many more. + +### KosmoKrator: Slash Commands + +KosmoKrator has slash commands (`/mode`, `/sessions`, `/resume`, `/settings`, etc.) but these are UI commands, not LLM-driven skills. There's no equivalent of loadable prompt templates. + +### Adoptable Pattern + +A skills system bridges the gap between slash commands and full agent modes: + +```php +// ~/.kosmokrator/skills/review/SKILL.md +// --- +// name: review +// description: Review code changes +// allowed-tools: file_read, grep, glob +// context: fork +// --- +// Review the current git diff for bugs... + +class SkillLoader { + public function loadFromDirectory(string $dir): array; + public function execute(Skill $skill, string $args, AgentLoop $agent): string; +} +``` + +Skills invoked via `/review` would either inject the prompt inline or fork a subagent with the skill's prompt and tool restrictions. This is a powerful extensibility mechanism that users can customize without touching code. + +--- + +## 13. System Prompt Assembly + +### Claude Code: Multi-Part Prompt + +The system prompt is assembled from multiple sources: + +**Static sections** (`prompts.ts`): +1. **Intro**: "You are an interactive agent that helps users with software engineering tasks..." +2. **System**: Tool execution, permission modes, hooks, context compression +3. **Doing tasks**: Engineering best practices, code quality, no unnecessary changes +4. **Executing actions with care**: Reversibility, blast radius, confirmation for risky actions +5. **Using your tools**: Dedicated tools over Bash, parallel calls, task management +6. **Tone and style**: No emojis, concise, file_path:line_number references + +**Dynamic sections**: +- Tool-specific guidance (Agent, Skills, ToolSearch) +- Verification agent contract (if enabled) +- Memory mechanics prompt (if auto-memory enabled) + +**Context layers** (`queryContext.ts`): +- `defaultSystemPrompt[]` — Static prompt array +- `userContext.claudeMd` — CLAUDE.md files from directory hierarchy +- `userContext.currentDate` — "Today's date is YYYY-MM-DD" +- `systemContext.gitStatus` — Branch, status, recent commits + +**Cache boundary** (`SYSTEM_PROMPT_DYNAMIC_BOUNDARY`): +Everything before this marker is globally cacheable. Everything after is session-specific. + +### KosmoKrator: Prompt Assembly + +**AgentCommand.php** (lines 131-134): +```php +$systemPrompt = $basePrompt; // From config +$systemPrompt .= MemoryInjector::format($memories); +$systemPrompt .= InstructionLoader::gather(); +$systemPrompt .= EnvironmentContext::gather(); +``` + +**AgentLoop** refreshes system prompt each turn: +```php +$prompt = $this->baseSystemPrompt; +$prompt .= $this->mode->systemPromptSuffix(); +$prompt .= $this->formatTaskContext(); +``` + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Base prompt size** | ~914 lines, very detailed | Configurable, shorter | +| **Tool prompt contributions** | Each tool can inject via `prompt()` | None | +| **Memory mechanics** | Full teaching prompt for auto-memory | None | +| **Cache boundary** | Explicit marker for API caching | None | +| **Dynamic refresh** | Memoized context (cached per conversation) | Refreshed each turn | +| **Git status** | Branch, status (2000 char cap), 5 recent commits | Branch, root | + +### Adoptable Patterns + +1. **Memory mechanics prompt injection**: Add a dedicated section teaching the LLM how to use `memory_save` and `memory_search` proactively. + +2. **Tool prompt contributions**: Add `systemPromptContribution(): ?string` to `ToolInterface`. The SubagentTool could explain type hierarchy and usage patterns. + +3. **Richer git context**: Include `git status --short` (capped at 2000 chars) and last 5 commit messages in the system prompt. Gives the LLM better awareness of the project state. + +--- + +## 14. Session & State Management + +### Claude Code: File-Based Sessions + +- One JSON file per session, written fire-and-forget via `recordTranscript()` +- `history.jsonl` for conversation history (max 100 entries) +- Pasted content stored externally when >1KB +- Session resume via message deserialization from log files +- Remote session support via WebSocket (`/v1/sessions/ws/{id}/subscribe`) + +### KosmoKrator: SQLite Sessions + +- `sessions` table: id, project, title, model, created_at, updated_at +- `messages` table: role, content, tool_calls, tool_results, tokens +- `settings` table: scope-based KV store (global, project-specific) +- `memories` table: type, title, content, project, session_id + +### Assessment + +KosmoKrator's SQLite approach is actually superior for: +- Atomic writes (WAL mode) +- Efficient queries (session listing, message search) +- Structured data (vs JSON parsing) +- Concurrent access safety + +No changes needed here. SQLite is the right choice. + +--- + +## 15. Task System + +### Claude Code + +**Task types**: `local_bash`, `local_agent`, `remote_agent`, `in_process_teammate`, `local_workflow`, `monitor_mcp`, `dream` + +**Task statuses**: `pending`, `running`, `completed`, `failed`, `killed` + +**Tools**: TaskCreateTool, TaskUpdateTool, TaskListTool, TaskGetTool, TaskOutputTool, TaskStopTool + +**Features**: +- Blocking relationships (addBlocks, addBlockedBy) +- Owner assignment for multi-agent teams +- Mailbox communication for teammates +- Auto-expand UI on task create/update +- Task completion hooks + +### KosmoKrator + +**Task statuses**: `Pending`, `InProgress`, `Completed`, `Cancelled` + +**Tools**: TaskCreateTool, TaskUpdateTool, TaskListTool, TaskGetTool + +**Features**: +- Parent-child relationships +- Blocking relationships (bidirectional) +- Auto-complete parents when all children terminal +- Tree rendering (text + ANSI) +- In-memory storage (no persistence) + +### Assessment + +KosmoKrator's task system is well-designed and covers the essential features. Claude Code's additions (task types, owner assignment, mailbox communication) are mostly relevant for multi-agent teams, which is a future feature. No immediate changes needed. + +--- + +## 16. UI & Rendering + +### Claude Code: React/Ink Custom Framework + +Claude Code has essentially built a **custom terminal GUI framework**: +- Custom React reconciler for terminal rendering +- Yoga-based flexbox layout engine +- Double-buffered frame rendering with diff optimization +- Mouse tracking (mode-1003), hit testing, text selection +- Bidirectional text support +- Scrollable containers, buttons, OSC 8 hyperlinks +- Keyboard chord parsing with configurable bindings +- Search highlighting across screen buffer +- Alternate screen mode (full-screen) + +This is approximately **10,000+ lines of UI infrastructure**. + +### KosmoKrator: Symfony TUI + ANSI + +- **TuiRenderer**: Symfony TUI widgets (PlanApprovalWidget, QuestionWidget, CollapsibleWidget, etc.) +- **AnsiRenderer**: Pure ANSI escape codes, readline input, MarkdownToAnsi for formatting +- **Theme**: Shared color palette, tool icons, planetary symbols +- **MarkdownToAnsi**: CommonMark + GFM extensions, Tempest Highlighter for code blocks + +### Assessment + +KosmoKrator's dual-renderer approach is pragmatic and effective. Trying to replicate Claude Code's custom Ink framework would be massive effort for marginal gain. Symfony TUI provides adequate interactivity. + +### Adoptable Patterns + +1. **Cost display**: Show running cost in the context bar. Use `ModelCatalog` pricing data: + ``` + $cost = ($tokensIn / 1_000_000) * $inputPrice + ($tokensOut / 1_000_000) * $outputPrice; + ``` + +2. **Collapsible tool output**: Claude Code collapses search/read tool results into summaries ("Found 3 files in 12ms"). KosmoKrator has `CollapsibleWidget` in TUI mode — ensure it's used for all tool results. + +3. **Thinking duration display**: Show "Thinking... (2.3s)" when the LLM is processing. Claude Code shows thinking state for minimum 2 seconds, then displays the duration. + +--- + +## 17. Cost Tracking + +### Claude Code + +**Formula** (`modelCost.ts`): +``` +cost = (input / 1M) * inputPrice + + (output / 1M) * outputPrice + + (cacheRead / 1M) * cacheReadPrice + + (cacheCreation / 1M) * cacheWritePrice + + webSearchRequests * webSearchPrice +``` + +**Pricing tiers** (per 1M tokens): +| Model | Input | Output | +|-------|-------|--------| +| Sonnet 4.x | $3 | $15 | +| Opus 4.0/4.1 | $15 | $75 | +| Opus 4.5 | $5 | $25 | +| Opus 4.6 (fast) | $30 | $150 | +| Haiku 3.5 | $0.80 | $4 | +| Haiku 4.5 | $1 | $5 | + +**Display**: On exit, shows total cost, API duration, wall duration, lines added/removed, per-model breakdown. + +### KosmoKrator + +KosmoKrator has `ModelCatalog` with pricing data and tracks `sessionTokensIn`/`sessionTokensOut` in `AgentLoop`, but doesn't calculate or display USD cost. + +### Adoptable Pattern + +Add cost calculation and display: +```php +$cost = ($this->sessionTokensIn / 1_000_000) * $this->models->inputPrice($model) + + ($this->sessionTokensOut / 1_000_000) * $this->models->outputPrice($model); +$this->ui->showStatus(sprintf('Session cost: $%.4f', $cost)); +``` + +--- + +## 18. Command / Slash Command System + +### Claude Code: ~100+ Commands + +Categories: +- **Prompt commands**: Invoke model with skill prompt (`/commit`, `/review`, `/simplify`, `/loop`) +- **Action commands**: Immediate execution (`/clear`, `/exit`, `/config`, `/model`, `/compact`) +- **Internal commands**: Developer-only (`/breakCache`, `/mockLimits`, `/debugToolCall`) + +Command availability filtered by: feature flags, user type (ant/external), subscription level. + +### KosmoKrator: ~15 Commands + +- `/mode`, `/clear`, `/compact`, `/sessions`, `/resume`, `/new`, `/quit` +- `/memories`, `/forget`, `/settings` +- `/guardian`, `/argus`, `/prometheus` +- `/tasks-clear`, `/theogony`, `/seed` + +### Assessment + +KosmoKrator has the essential commands. Additional commands can be added incrementally as features are implemented (skills, MCP, etc.). + +--- + +## 19. Keybinding System + +### Claude Code + +Fully configurable keybindings via `~/.claude/keybindings.json`: +- Context-aware: Global, Chat, Autocomplete, Confirmation, Help, Transcript, etc. +- Actions: `app:interrupt`, `app:exit`, `app:toggleTodos`, `app:toggleTranscript`, etc. +- Chord support: `ctrl+k ctrl+s` (multi-key sequences) +- Special keys: `esc`, `return`, `space`, arrows +- User bindings merged with defaults + +### KosmoKrator + +No keybinding customization. + +### Adoptable Pattern + +Medium priority. Add `~/.kosmokrator/keybindings.yaml` for common actions: +```yaml +keybindings: + chat: + submit: ctrl+return + cancel: ctrl+c + mode_cycle: shift+tab +``` + +--- + +## 20. MCP Integration + +### Claude Code + +Full Model Context Protocol support: +- Transport types: `stdio`, `sse`, `http`, `ws`, `sdk` +- OAuth token refresh for authenticated servers +- Tool integration: each MCP tool becomes a `mcp__server__action` tool +- Resource listing and reading +- Skill builders from MCP resources +- Channel permissions per server +- Config scopes: local, user, project, dynamic, enterprise, managed + +### KosmoKrator + +No MCP support. + +### Adoptable Pattern + +MCP integration is a HIGH PRIORITY addition. PHP MCP client libraries exist. Start with `stdio` transport (simplest) to connect to local MCP servers. Each server's tools register into the `ToolRegistry` with the `mcp__server__action` naming convention. + +--- + +## 21. Feature Comparison Matrix + +| Feature | Claude Code | KosmoKrator | Gap | +|---------|:-----------:|:-----------:|:---:| +| **Core agent loop** | Full | Full | - | +| **Streaming responses** | Full | None | HIGH | +| **Tool system** | 50+ tools | 10 tools | MEDIUM | +| **Subagent system** | Full + custom | Full (3 types) | LOW | +| **Context compaction** | 5 layers | 3 layers | MEDIUM | +| **Post-compact restoration** | Full | None | HIGH | +| **Token budget tracking** | Full | None | LOW | +| **Permission system** | 6 modes + wildcards | 3 modes | MEDIUM | +| **Hook system** | Full (12 event types) | None | MEDIUM | +| **Memory system** | File-based + extraction | SQLite + compaction-only | MEDIUM | +| **Memory mechanics prompt** | Full | None | HIGH | +| **Skills system** | Full (bundled + user) | None | HIGH | +| **MCP integration** | Full | None | HIGH | +| **Web fetch** | Full | None | HIGH | +| **Web search** | Full | None | HIGH | +| **PDF/Image reading** | Full | None | MEDIUM | +| **Cost tracking display** | Full | Partial (no display) | LOW | +| **Keybinding customization** | Full | None | LOW | +| **Git worktree isolation** | Full | None | MEDIUM | +| **Custom agent definitions** | Full | None | MEDIUM | +| **Verification agent** | Full | None | MEDIUM | +| **Voice mode** | Full | None | LOW | +| **Remote sessions** | Full | None | LOW | +| **Deferred tool loading** | Full | None | LOW | +| **File state tracking (edits)** | Full | None | MEDIUM | +| **Session persistence** | JSON files | SQLite | KosmoKrator better | +| **Config system** | JSON | YAML (multi-level) | KosmoKrator better | +| **Dual renderer** | React/Ink | Symfony TUI + ANSI | Parity | +| **Task system** | Full + teams | Full (in-memory) | Parity | +| **Mythology theming** | None | Full | KosmoKrator unique | + +--- + +## 22. Concrete Thresholds & Constants + +### Claude Code + +| Constant | Value | Location | +|----------|-------|----------| +| Auto-compact buffer | 13,000 tokens | `autoCompact.ts` | +| Auto-compact threshold | ~93% of effective window | Calculated | +| Warning threshold buffer | 20,000 tokens | `autoCompact.ts` | +| Max compaction output | 20,000 tokens | `autoCompact.ts` | +| Max compaction failures | 3 consecutive | `autoCompact.ts` | +| Post-compact file budget | 50,000 tokens | `compact.ts` | +| Post-compact file cap | 5,000 tokens/file | `compact.ts` | +| Post-compact max files | 5 | `compact.ts` | +| Post-compact skill budget | 25,000 tokens | `compact.ts` | +| Post-compact skill cap | 5,000 tokens/skill | `compact.ts` | +| Session memory min tokens | 10,000 | `sessionMemoryCompact.ts` | +| Session memory max tokens | 40,000 | `sessionMemoryCompact.ts` | +| Session memory min messages | 5 | `sessionMemoryCompact.ts` | +| Budget completion threshold | 90% | `tokenBudget.ts` | +| Diminishing returns threshold | 500 tokens/turn | `tokenBudget.ts` | +| Diminishing detection | 3+ continuations | `tokenBudget.ts` | +| Time-based MC gap | 60 minutes | `timeBasedMCConfig.ts` | +| Time-based MC keep recent | 5 tool results | `timeBasedMCConfig.ts` | +| Text token estimate | length / 4 | `tokenEstimation.ts` | +| JSON token estimate | length / 2 | `tokenEstimation.ts` | +| Message token padding | 4/3x multiplier | `tokenEstimation.ts` | +| Image/document tokens | 2,000 flat | `tokenEstimation.ts` | +| Stream idle timeout | 90,000 ms | `claude.ts` | +| Agent auto-background | 120,000 ms | `AgentTool.tsx` | +| Bash progress threshold | 2,000 ms | `BashTool.tsx` | +| Grep default head limit | 250 results | `GrepTool.ts` | +| WebSearch max uses | 8 per request | `WebSearchTool.ts` | +| WebFetch cache TTL | 15 minutes | `WebFetchTool.ts` | +| MEMORY.md max lines | 200 | `memdir.ts` | +| MEMORY.md max bytes | 25,000 | `memdir.ts` | +| Memory scan max files | 200 | `memoryScan.ts` | +| History max items | 100 | `history.ts` | +| Denial max consecutive | 3 | `denialTracking.ts` | +| Denial max total | 20 | `denialTracking.ts` | +| Tool hook timeout | 10 minutes | `hooks.ts` | +| Session-end hook timeout | 1,500 ms | `hooks.ts` | + +### KosmoKrator + +| Constant | Value | Location | +|----------|-------|----------| +| Compact threshold | 60% of context window | `ContextCompactor.php` | +| Compact keep recent | 3 user turns | `ContextCompactor.php` | +| Compact max format chars | 100,000 | `ContextCompactor.php` | +| Pruner protect tokens | 40,000 | `ContextPruner.php` | +| Pruner min savings | 20,000 | `ContextPruner.php` | +| Subagent pruner protect | 20,000 | `SubagentFactory.php` | +| Subagent pruner min savings | 10,000 | `SubagentFactory.php` | +| Token estimate | 4 chars/token | `TokenEstimator.php` | +| Output max lines | 2,000 | `OutputTruncator.php` | +| Output max bytes | 50,000 | `OutputTruncator.php` | +| Truncation cleanup age | 86,400s (1 day) | `OutputTruncator.php` | +| Bash timeout | 120s | Configurable | +| Grep timeout | 30s | `GrepTool.php` | +| Grep max matches | 50 | `GrepTool.php` | +| HTTP transfer timeout | 600s | `AsyncLlmClient.php` | +| HTTP inactivity timeout | 300s | `AsyncLlmClient.php` | +| Retry cap | 300s | `AsyncLlmClient.php` | +| File read large threshold | 10 MB | `FileReadTool.php` | +| File read max lines | 5,000 | `FileReadTool.php` | +| Memory warning | 50 MB | `AgentLoop.php` | +| Context overflow retries | 3 | `AgentLoop.php` | +| Subagent max depth | 3 | Configurable | +| Guardian shell metachar pattern | `/[;&\|`$><\n]/` | `GuardianEvaluator.php` | +| Pre-flight check | 80% of context | `AgentLoop.php` | + +--- + +## 23. Inspiration Roadmap + +### Tier 1 — High Impact, Moderate Effort + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 1 | **Streaming LLM responses** | Medium | Very High | SSE streaming in AsyncLlmClient, renderer updates for partial text | +| 2 | **WebFetch tool** | Low | High | URL → markdown via `league/html-to-markdown`, preapproved hosts | +| 3 | **WebSearch tool** | Low | High | Integrate Tavily/Brave/SerpAPI | +| 4 | **Post-compaction file restoration** | Low | High | Re-attach 5 recently-read files after compaction | +| 5 | **Memory mechanics prompt** | Low | High | Teach LLM to proactively use memory_save/memory_search | +| 6 | **Skills system** | Medium | High | Loadable markdown prompts from ~/.kosmokrator/skills/ | + +### Tier 2 — Medium Impact, Moderate Effort + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 7 | **MCP client integration** | High | High | PHP MCP client for external tool servers | +| 8 | **Wildcard permission rules** | Low | Medium | Glob patterns in PermissionRule (e.g., `Bash(git *)`) | +| 9 | **Hook system** | Medium | Medium | PreToolUse/PostToolUse shell command hooks | +| 10 | **Custom agent definitions** | Low | Medium | ~/.kosmokrator/agents/ markdown files | +| 11 | **Git worktree isolation** | Medium | Medium | EnterWorktreeTool for safe experimentation | +| 12 | **File state tracking** | Low | Medium | Track read files, detect concurrent edits on edit | +| 13 | **Cost display** | Low | Medium | USD cost in context bar | +| 14 | **Post-compaction instruction re-injection** | Low | Medium | Re-inject KOSMOKRATOR.md after compaction | +| 15 | **Verification agent type** | Medium | Medium | Adversarial testing agent | +| 16 | **Deferred tool loading** | Medium | Medium | ToolSearch for MCP tool sets | + +### Tier 3 — Nice to Have + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 17 | **Diminishing returns detection** | Low | Low | Stop after 3+ turns with <500 tokens/turn | +| 18 | **Compaction circuit breaker** | Low | Low | Stop after 3 consecutive failures | +| 19 | **Zsh builtins blocklist** | Low | Low | Block zmodload, sysopen, ztcp etc. | +| 20 | **Agent auto-backgrounding** | Medium | Low | Background long-running subagents after N seconds | +| 21 | **Agent color assignment** | Low | Low | Unique colors per subagent | +| 22 | **Configurable keybindings** | Medium | Low | ~/.kosmokrator/keybindings.yaml | +| 23 | **PDF/Image reading** | Medium | Low | smalot/pdfparser, intervention/image | +| 24 | **GrepTool output modes** | Low | Low | files_with_matches, count, content modes | +| 25 | **Multiline grep** | Low | Low | -U --multiline-dotall flag | +| 26 | **dontAsk permission mode** | Low | Low | Silent deny for CI/automation | +| 27 | **Bypass-immune safety checks** | Low | Low | Always prompt for .git/, .kosmokrator/, shell configs | +| 28 | **Tool `prompt()` contributions** | Low | Low | Tools inject system prompt sections | + +### Tier 4 — Future / Research + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 29 | AI permission classifier | High | Medium | Auto-approve safe tool calls via LLM | +| 30 | Remote sessions | High | Low | WebSocket-based remote agent control | +| 31 | Voice mode | High | Low | STT/TTS integration | +| 32 | Plugin system | High | Medium | Loadable plugins with custom tools and UI | +| 33 | Context collapse | High | Medium | Advanced granular preservation | +| 34 | Cache-aware compaction | Medium | Medium | Requires Anthropic cache_edits API | + +--- + +## 24. Appendix: File Reference + +### Claude Code Key Files + +| File | Size | Purpose | +|------|------|---------| +| `main.tsx` | 4,683 lines | Application entry point | +| `QueryEngine.ts` | 46KB | Session state, submitMessage() | +| `query.ts` | 68KB | Main loop, API streaming, tool execution | +| `Tool.ts` | 30KB | Tool interface and factory | +| `tools.ts` | — | Tool registration and discovery | +| `query/tokenBudget.ts` | — | Budget tracking and continuation | +| `services/compact/autoCompact.ts` | — | Auto-compaction triggers | +| `services/compact/compact.ts` | — | Compaction algorithm | +| `services/compact/microCompact.ts` | — | Cache-based microcompaction | +| `services/tokenEstimation.ts` | 16KB | Token estimation formulas | +| `services/tools/StreamingToolExecutor.ts` | 531 lines | Concurrent streaming executor | +| `tools/BashTool/BashTool.tsx` | 1,143 lines | Shell execution | +| `tools/FileEditTool/FileEditTool.ts` | 625 lines | String replacement | +| `tools/FileReadTool/FileReadTool.ts` | 1,183 lines | File reading | +| `tools/GrepTool/GrepTool.ts` | — | Ripgrep integration | +| `tools/WebFetchTool/WebFetchTool.ts` | — | URL fetching | +| `tools/WebSearchTool/WebSearchTool.ts` | — | Web search | +| `tools/AgentTool/AgentTool.tsx` | — | Subagent spawning | +| `tools/ToolSearchTool/ToolSearchTool.ts` | — | Deferred tool discovery | +| `skills/bundledSkills.ts` | — | Skill registry | +| `skills/loadSkillsDir.ts` | — | Skill file loader | +| `memdir/memdir.ts` | — | Memory entrypoint | +| `memdir/memoryTypes.ts` | 272 lines | Memory type taxonomy | +| `services/extractMemories/extractMemories.ts` | — | Background extraction | +| `services/extractMemories/prompts.ts` | 154 lines | Extraction prompts | +| `constants/prompts.ts` | 914 lines | System prompt | +| `context.ts` | — | Context assembly | +| `utils/permissions/permissions.ts` | — | Permission evaluation | +| `types/hooks.ts` | — | Hook types | +| `cost-tracker.ts` | — | Cost tracking | +| `state/AppStateStore.ts` | — | Application state | +| `history.ts` | 465 lines | Session history | +| `commands.ts` | 25KB | Command registry | +| `keybindings/schema.ts` | — | Keybinding configuration | + +### KosmoKrator Key Files + +| File | Size | Purpose | +|------|------|---------| +| `src/Agent/AgentLoop.php` | 904 lines | Core REPL | +| `src/Agent/ConversationHistory.php` | 200 lines | Message buffer | +| `src/Agent/ContextCompactor.php` | 250 lines | LLM summarization | +| `src/Agent/ContextPruner.php` | 129 lines | Tool result pruning | +| `src/Agent/ToolResultDeduplicator.php` | 189 lines | 3-tier deduplication | +| `src/Agent/TokenEstimator.php` | 75 lines | Token estimation | +| `src/Agent/OutputTruncator.php` | 87 lines | Output size limiting | +| `src/Agent/SubagentOrchestrator.php` | 224 lines | Multi-agent management | +| `src/Agent/SubagentFactory.php` | 163 lines | Agent creation | +| `src/Agent/AgentContext.php` | 54 lines | Immutable context | +| `src/Agent/EnvironmentContext.php` | 179 lines | Environment detection | +| `src/Agent/InstructionLoader.php` | 113 lines | Instruction discovery | +| `src/Agent/MemoryInjector.php` | 76 lines | Memory formatting | +| `src/LLM/AsyncLlmClient.php` | 291 lines | Async HTTP client | +| `src/LLM/RetryableLlmClient.php` | — | Retry wrapper | +| `src/Tool/ToolRegistry.php` | 93 lines | Tool management | +| `src/Tool/Permission/PermissionEvaluator.php` | 135 lines | Permission system | +| `src/Tool/Permission/GuardianEvaluator.php` | 152 lines | Heuristic safety | +| `src/Tool/Coding/BashTool.php` | 76 lines | Shell execution | +| `src/Tool/Coding/FileEditTool.php` | 73 lines | File editing | +| `src/Tool/Coding/FileReadTool.php` | 117 lines | File reading | +| `src/Tool/Coding/GrepTool.php` | 94 lines | Text search | +| `src/Tool/Coding/SubagentTool.php` | 171 lines | Subagent spawning | +| `src/Session/SessionManager.php` | 290 lines | Session lifecycle | +| `src/Session/MemoryRepository.php` | 144 lines | Memory storage | +| `src/Task/TaskStore.php` | — | Task management | +| `src/Command/AgentCommand.php` | 340 lines | Main entry point | +| `src/Command/SlashCommandRegistry.php` | 83 lines | Command dispatch | +| `src/ConfigLoader.php` | 116 lines | YAML config | +| `src/Kernel.php` | 382 lines | DI container | + +--- + +## Key Takeaway + +KosmoKrator's **core architecture is solid and well-designed**. The agent loop, subagent orchestration with dependency graphs, 3-tier deduplication, permission modes with Guardian heuristics, and dual renderer are all production-quality implementations that compare well to Claude Code's equivalents. + +The main gaps are in **breadth** rather than **depth**: +- **Tools**: Web fetch, web search, MCP, PDF/image reading +- **Streaming**: Real-time LLM response display +- **Context recovery**: Post-compaction file/instruction restoration +- **Extensibility**: Skills, hooks, custom agent definitions +- **Memory**: Proactive extraction and mechanics prompt + +These can all be added incrementally without architectural changes. The Claude Code source provides exact thresholds, algorithms, and prompt templates that can be adapted for PHP implementation. diff --git a/docs/ecosystem/kosmokrator/research/claude-code-architecture.md b/docs/ecosystem/kosmokrator/research/claude-code-architecture.md new file mode 100644 index 0000000..07dc170 --- /dev/null +++ b/docs/ecosystem/kosmokrator/research/claude-code-architecture.md @@ -0,0 +1,2123 @@ +# How Claude Code Works — Architecture Deep Dive + +> A comprehensive visual walkthrough of every major system inside Claude Code, based on reading the full open-sourced TypeScript codebase (1,903 files, 33MB). Covers internal mechanics, exact thresholds, prompts, and design decisions. + +--- + +## Table of Contents + +1. [High-Level Architecture](#1-high-level-architecture) +2. [Startup & Initialization](#2-startup--initialization) +3. [The Agent Loop](#3-the-agent-loop) +4. [Streaming & SSE Pipeline](#4-streaming--sse-pipeline) +5. [Tool System](#5-tool-system) +6. [Tool Implementations](#6-tool-implementations) +7. [System Prompt Assembly](#7-system-prompt-assembly) +8. [Context Management — 5 Layers](#8-context-management--5-layers) +9. [Token Estimation & Budget](#9-token-estimation--budget) +10. [Subagent / Multi-Agent System](#10-subagent--multi-agent-system) +11. [Permission System](#11-permission-system) +12. [Hook System](#12-hook-system) +13. [Memory System](#13-memory-system) +14. [Skills System](#14-skills-system) +15. [Task System](#15-task-system) +16. [Terminal UI Architecture](#16-terminal-ui-architecture) +17. [Cost Tracking](#17-cost-tracking) +18. [MCP Integration](#18-mcp-integration) +19. [Session & State Management](#19-session--state-management) +20. [The Verification Agent](#20-the-verification-agent) + +--- + +## 1. High-Level Architecture + +```mermaid +graph TB + User([User]) --> CLI[main.tsx — Commander.js CLI] + CLI --> Init[init.ts — Setup & Auth] + CLI --> REPL[REPL.tsx — React/Ink Screen] + + REPL --> QE[QueryEngine] + QE --> QL[queryLoop — while true] + + QL --> CTX[Context Management
5 compression layers] + QL --> API[Anthropic API
SSE Streaming] + QL --> STE[StreamingToolExecutor
Concurrent execution] + + STE --> Tools[50+ Tools] + Tools --> Coding[Coding Tools
Bash, Read, Write,
Edit, Grep, Glob] + Tools --> Web[Web Tools
WebFetch, WebSearch] + Tools --> Agent[AgentTool
Subagent spawning] + Tools --> MCPTools[MCP Tools
External servers] + Tools --> TaskTools[Task Tools
Create, Update, List] + Tools --> SkillT[SkillTool
Prompt templates] + Tools --> Misc[Misc Tools
LSP, Notebook, REPL,
Worktree, Sleep, ...] + + Agent --> QE2[Child QueryEngine
Isolated context] + QE2 --> QL2[Child queryLoop] + + CTX --> MC[Microcompaction
cache_edits API] + CTX --> TB[Time-based clearing
60min TTL awareness] + CTX --> CC[Context Collapse
Selective archiving] + CTX --> AC[Auto-Compaction
LLM summarization] + CTX --> SM[Session Memory
Background extraction] + + REPL --> Perm[Permission System
6 modes, wildcards,
AI classifier] + REPL --> Hooks[Hook System
12 event types,
shell commands] + REPL --> Mem[Memory System
File-based, 4 types,
background extraction] + REPL --> Tasks[Task System
7 task types,
spinner integration] + REPL --> State[AppState Store
Pub/sub reactive state] + REPL --> Skills[Skills System
Bundled + user-defined
+ MCP + plugins] + + style QE fill:#4a9eff,color:#fff + style QL fill:#4a9eff,color:#fff + style STE fill:#ff6b6b,color:#fff + style CTX fill:#ffa94d,color:#fff + style Agent fill:#69db7c,color:#fff +``` + +The codebase is roughly organized as: + +| Directory | Purpose | Approx size | +|-----------|---------|-------------| +| `ink/` | Custom React reconciler + terminal rendering engine | ~8,000 lines | +| `tools/` | 50+ tool implementations | ~12,000 lines | +| `services/` | MCP, compact, memory, analytics, OAuth, plugins | ~10,000 lines | +| `utils/` | Permissions, hooks, settings, file ops | ~8,000 lines | +| `components/` | React UI components | ~5,000 lines | +| `screens/` | REPL, Doctor, Resume screens | ~3,000 lines | +| `query.ts` + `QueryEngine.ts` | Core agent loop | ~4,500 lines | +| `constants/` | System prompts, model config, tools config | ~2,000 lines | +| `state/` | App state management | ~1,500 lines | +| `keybindings/` | Keyboard shortcut system | ~1,000 lines | + +--- + +## 2. Startup & Initialization + +The startup sequence in `main.tsx` (4,683 lines) is heavily optimized for speed — several expensive operations run in parallel before imports even finish: + +```mermaid +sequenceDiagram + participant Entry as main.tsx entry + participant MDM as MDM Raw Read + participant KC as Keychain Prefetch + participant CLI as Commander CLI + participant Init as init() + participant REPL as REPL Screen + + Note over Entry: Side-effects fire BEFORE imports complete + Entry->>MDM: startMdmRawRead() — plutil/reg query in subprocess + Entry->>KC: startKeychainPrefetch() — read OAuth + API key + + Note over Entry: Heavy module evaluation (~135ms) + Entry->>CLI: Parse CLI args (Commander.js) + + CLI->>Init: init() + activate Init + Init->>Init: Node.js version check (18+) + Init->>Init: Session ID setup + Init->>Init: Git repo detection + Init->>Init: Hook config snapshot + Init->>Init: Release notes check + deactivate Init + + Init->>REPL: Launch React/Ink REPL + REPL->>REPL: Connect MCP servers + REPL->>REPL: Load permissions, settings + REPL->>REPL: Initialize GrowthBook feature flags + REPL->>REPL: Show prompt — ready for input +``` + +**Key optimization**: MDM settings reads (macOS `plutil` subprocess) and keychain reads (OAuth token + legacy API key) are fired as the very first lines, before the ~135ms of import evaluation. By the time imports finish, the subprocesses have completed. + +--- + +## 3. The Agent Loop + +The core of Claude Code is split into two layers: `QueryEngine` (session owner) and `query()` (inner loop). + +### QueryEngine (`QueryEngine.ts`, 46KB) + +Owns the session lifecycle: +- `mutableMessages[]` — in-memory message buffer +- `submitMessage()` — async generator that yields `SDKMessage` types +- Manages compact boundaries, permission tracking, transcript recording +- Wraps `canUseTool()` callback to track permission denials +- One QueryEngine per conversation; subagents get their own isolated instances + +### queryLoop() (`query.ts`, 68KB) + +The inner `while(true)` loop (line 307). Each iteration = one LLM turn: + +```mermaid +flowchart TD + Start([User sends message]) --> AddMsg[Add to message history] + AddMsg --> PreFlight{Context
pressure check} + + PreFlight -->|Under threshold| Prompt[Assemble system prompt
+ user context + system context] + PreFlight -->|Over threshold| Layers[Run compression layers:
1. Snip 2. Microcompact
3. Context Collapse 4. Autocompact] + Layers --> Prompt + + Prompt --> Normalize[normalizeMessagesForAPI
Repair tool_use/result pairing
Strip synthetic messages
Limit media to 100 items] + Normalize --> Stream[Stream API call via
queryModelWithStreaming] + + Stream --> EventLoop{SSE event type?} + + EventLoop -->|message_start| InitMsg[Initialize partial message
+ usage tracking] + EventLoop -->|content_block_start| InitBlock[Initialize text/tool_use/
thinking block] + EventLoop -->|content_block_delta| Accum[Accumulate:
input_json_delta
text_delta
thinking_delta] + EventLoop -->|content_block_stop| YieldBlock[Yield completed
AssistantMessage block] + EventLoop -->|message_delta| UpdateUsage[Update usage,
stop_reason, cost] + EventLoop -->|message_stop| StreamDone[Stream complete] + + YieldBlock --> HasToolUse{Block is
tool_use?} + HasToolUse -->|Yes| QueueTool[Queue in
StreamingToolExecutor] + HasToolUse -->|No| RenderText[Render text
to terminal] + + QueueTool --> ExecImmediate[Start execution
immediately if safe] + ExecImmediate --> CollectReady[Yield any
completed results] + + StreamDone --> AnyTools{Any tool_use
blocks in response?} + AnyTools -->|Yes| Remaining[Collect remaining
tool results] + Remaining --> NormResults[Normalize results
for API format] + NormResults --> CheckStop{Stop condition?} + + CheckStop -->|Max turns| Terminal1([Return: max_turns_reached]) + CheckStop -->|Budget exhausted| Terminal2([Return: budget_exhausted]) + CheckStop -->|Abort signal| HandleAbort[Generate synthetic
tool_results for orphans] + HandleAbort --> Terminal3([Return: aborted_streaming]) + CheckStop -->|Continue| NextTurn[Append messages
to state, increment turn] + NextTurn --> PreFlight + + AnyTools -->|No tools| Recovery{Recovery needed?} + Recovery -->|Collapse drain| DrainCollapse[Commit staged
context collapses] + DrainCollapse --> PreFlight + Recovery -->|Reactive compact| ReactiveCompact[Full LLM
summarization] + ReactiveCompact --> PreFlight + Recovery -->|Max output hit| Escalate[Retry: 8K → 64K
max output tokens] + Escalate --> Stream + Recovery -->|Multi-turn| Resume["Inject 'resume' message
Up to 3 retries"] + Resume --> Stream + Recovery -->|Stop hooks| RunHooks[Execute user-defined
stop hooks] + RunHooks -->|Blocking errors| PreFlight + RunHooks -->|Clean| Terminal4([Return to user]) + Recovery -->|Done| Terminal4 + + style Stream fill:#4a9eff,color:#fff + style QueueTool fill:#ff6b6b,color:#fff + style ExecImmediate fill:#ff6b6b,color:#fff + style Layers fill:#ffa94d,color:#fff +``` + +### Loop State + +The loop maintains explicit state that carries between iterations: + +```typescript +type State = { + messages: Message[] // Full conversation + toolUseContext: ToolUseContext // Tools, permissions, abort controller + autoCompactTracking: AutoCompactTrackingState // Compaction metrics + maxOutputTokensRecoveryCount: number // Max-output retries (0-3) + hasAttemptedReactiveCompact: boolean // Prevent compact spirals + turnCount: number // Current turn number + transition: Continue | undefined // Why we continued (next_turn, collapse_drain_retry, etc.) + pendingToolUseSummary: Promise<...> | undefined + stopHookActive: boolean | undefined + maxOutputTokensOverride: number | undefined +} +``` + +### Recovery Decision Tree + +When the LLM responds without any tool_use blocks but the task isn't done, Claude Code has a sophisticated recovery chain: + +```mermaid +flowchart TD + NoTools[LLM response has
no tool_use blocks] --> CollapseCheck{Context collapse
enabled & pending?} + + CollapseCheck -->|Yes| DrainCollapse["Commit staged collapses
(cheap, preserves detail)
transition: collapse_drain_retry"] + CollapseCheck -->|No| ReactiveCheck{First attempt &
context near limit?} + + ReactiveCheck -->|Yes| ReactiveCompact["Full LLM summarization
Strip excess media
hasAttemptedReactiveCompact = true"] + ReactiveCheck -->|No| MaxOutputCheck{stop_reason =
max_tokens?} + + MaxOutputCheck -->|Yes, count < 1| Escalate["Escalate: retry with 64K
max output tokens
(was 8K default)"] + MaxOutputCheck -->|Yes, count < 3| MultiTurn["Inject resume message:
'Continue from where you
left off'
maxOutputTokensRecoveryCount++"] + MaxOutputCheck -->|Yes, count >= 3| GiveUp[Return to user
with partial response] + MaxOutputCheck -->|No| StopHooks{Stop hooks
configured?} + + StopHooks -->|Yes| RunHooks["Execute stop hooks
(user-defined checks)"] + RunHooks -->|Blocking errors| InjectErrors["Inject errors
into context, retry"] + RunHooks -->|Clean| Done([Return to user]) + StopHooks -->|No| Done + + style DrainCollapse fill:#69db7c,color:#000 + style ReactiveCompact fill:#ffa94d,color:#000 + style Escalate fill:#ffd43b,color:#000 + style MultiTurn fill:#ffd43b,color:#000 + style GiveUp fill:#ff6b6b,color:#fff +``` + +--- + +## 4. Streaming & SSE Pipeline + +### API Integration (`claude.ts`) + +Claude Code uses the Anthropic SDK directly, creating a streaming request: + +```typescript +const result = await anthropic.beta.messages.create( + { ...params, stream: true }, + { signal, headers: { [CLIENT_REQUEST_ID_HEADER]: clientRequestId } } +).withResponse() +``` + +It then iterates the raw stream events directly (NOT using the SDK's `BetaMessageStream` helper), giving full control over each SSE event. + +### SSE Event Processing + +```mermaid +sequenceDiagram + participant API as Anthropic API + participant Parser as SSE Parser + participant Loop as Event Handler + participant UI as Terminal UI + participant STE as StreamingToolExecutor + + API->>Parser: data: {"type": "message_start", ...} + Parser->>Loop: message_start + Loop->>Loop: Initialize partialMessage, usage tracking + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "thinking"}} + Parser->>Loop: content_block_start (thinking) + Loop->>UI: Show "Thinking..." spinner + + API->>Parser: data: {"type": "content_block_delta", "delta": {"thinking": "Let me..."}} + Parser->>Loop: thinking_delta + Loop->>Loop: Accumulate thinking text (not shown to user) + + API->>Parser: data: {"type": "content_block_stop"} + Parser->>Loop: content_block_stop (thinking) + Loop->>UI: Show thinking duration "Thinking (2.3s)" + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "text"}} + Parser->>Loop: content_block_start (text) + + API->>Parser: data: {"type": "content_block_delta", "delta": {"text": "I'll search"}} + Parser->>Loop: text_delta + Loop->>UI: Render partial text token-by-token + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "tool_use", "name": "Grep"}} + Parser->>Loop: content_block_start (tool_use: Grep) + + API->>Parser: data: {"type": "content_block_delta", "delta": {"partial_json": "{\"pattern\":"}} + Parser->>Loop: input_json_delta + Loop->>Loop: Concatenate to partial input string + + API->>Parser: data: {"type": "content_block_stop"} + Parser->>Loop: content_block_stop (tool_use: Grep) + Loop->>Loop: Parse accumulated JSON → tool input + Loop->>STE: addTool(grep block) + STE->>STE: Start Grep execution immediately + + Note over API,STE: Model is STILL generating more blocks... + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "tool_use", "name": "Read"}} + Note over STE: Grep may already be DONE by now + + API->>Parser: data: {"type": "message_delta", "delta": {"stop_reason": "tool_use"}, "usage": {...}} + Parser->>Loop: message_delta + Loop->>Loop: Update final usage, stop_reason, cost + + API->>Parser: data: {"type": "message_stop"} + Parser->>Loop: Streaming complete +``` + +### Idle Timeout Watchdog + +A configurable watchdog kills stalled streams: + +``` +Default: 90 seconds (STREAM_IDLE_TIMEOUT_MS) +Override: CLAUDE_STREAM_IDLE_TIMEOUT_MS env var +Behavior: Timer resets on every chunk. If no events arrive within timeout → abort stream. +``` + +### Streaming Fallback + +If the stream errors (not user abort), Claude Code retries as a **non-streaming** request: +- Max 64K tokens for non-streaming (`MAX_NON_STREAMING_TOKENS`) +- Partially-streamed messages are **tombstoned** (invalidated in the UI) +- The StreamingToolExecutor is discarded and a fresh one is created +- All partially-executed tools get synthetic error results + +--- + +## 5. Tool System + +### Tool Interface (`Tool.ts`, 30KB) + +Every tool conforms to a rich generic interface: + +```typescript +Tool = { + // Identity + name: string + userFacingName(input): string + description(input): string + + // Schemas (lazy-evaluated for token efficiency) + inputSchema: ZodSchema // Validated before execution + outputSchema: ZodSchema // Typed output + + // Execution + call(input, context, canUseTool, parentMessage, onProgress): Promise<{data: Output}> + validateInput(input, context): Promise + + // Permissions + checkPermissions(input, context): Promise + + // Behavior flags + isConcurrencySafe(input): boolean // Can run in parallel with others + isReadOnly(): boolean // No side effects + requiresUserInteraction(): boolean // Needs terminal input + + // System prompt + prompt(): string // Injects tool-specific guidance into system prompt + + // Deferred loading + shouldDefer: boolean // Only load schema when ToolSearch fetches it + alwaysLoad: boolean // Always include even with ToolSearch active + + // MCP + isMcp: boolean // From external MCP server +} +``` + +### Tool Registration & Discovery (`tools.ts`) + +```mermaid +flowchart TD + subgraph "Tool Assembly Pipeline" + Base["getAllBaseTools()
~50 built-in tools"] --> FeatureFilter{Feature flags
enabled?} + FeatureFilter -->|Yes| Include[Include tool] + FeatureFilter -->|No| Skip[Skip tool] + + Include --> PermFilter["filterToolsByDenyRules()
Remove blanket-denied tools"] + PermFilter --> ModeFilter{Simple mode?} + ModeFilter -->|Yes| SimpleSet["Only: Bash,
FileRead, FileEdit"] + ModeFilter -->|No| FullSet[Full tool set] + + FullSet --> MCPMerge["assembleToolPool()
Merge built-in + MCP tools"] + MCPMerge --> Dedup["Deduplicate by name
(built-ins take precedence)"] + Dedup --> Sort["Sort for prompt-cache stability
(deterministic ordering)"] + Sort --> DeferCheck{ToolSearch
enabled?} + DeferCheck -->|Yes| Split["Split: alwaysLoad tools
in prompt, rest deferred"] + DeferCheck -->|No| AllInPrompt["All tools in prompt"] + end + + style Base fill:#4a9eff,color:#fff + style MCPMerge fill:#69db7c,color:#000 +``` + +### StreamingToolExecutor (`StreamingToolExecutor.ts`, 531 lines) + +The executor that runs tools concurrently during streaming: + +```mermaid +flowchart TD + subgraph "Tool Queue Management" + Add["addTool(block, message)
Called when content_block_stop
arrives for tool_use"] --> Classify{Concurrent
safe?} + Classify -->|Yes| QueueConc["Queue as concurrent
Can run in parallel"] + Classify -->|No| QueueExcl["Queue as exclusive
Needs sole access"] + + QueueConc --> ProcessQueue["processQueue()"] + QueueExcl --> ProcessQueue + + ProcessQueue --> Check{Any executing
tools?} + Check -->|"All concurrent"| StartParallel["Start next concurrent
tool in parallel"] + Check -->|"Has exclusive"| Wait["Wait for exclusive
to finish"] + Check -->|"None"| StartNext["Start next tool
(concurrent or exclusive)"] + + StartParallel --> Execute["executeTool()"] + StartNext --> Execute + end + + subgraph "Per-Tool Execution" + Execute --> CreateAbort["Create child abort controller
(child of sibling controller)"] + CreateAbort --> RunGenerator["for await (update of runToolUse(...))"] + RunGenerator --> CheckAbort{Aborted?} + CheckAbort -->|"sibling_error"| SynthError["Create synthetic error result
'Interrupted: concurrent tool failed'"] + CheckAbort -->|"user_interrupted"| SynthCancel["Create synthetic result
'Interrupted by user'"] + CheckAbort -->|"streaming_fallback"| SynthFallback["Create fallback result"] + CheckAbort -->|No| ProcessResult{Result type?} + ProcessResult -->|Progress| EmitProgress["Emit progress event
(stdout lines, search hits)"] + ProcessResult -->|Complete| MarkDone["Mark tool complete"] + end + + subgraph "Abort Hierarchy" + QueryAbort["Query Controller
(user ESC / timeout)"] --> SiblingAbort["Sibling Controller
(bash error kills all)"] + SiblingAbort --> ToolAbort["Per-Tool Controller
(individual cancel)"] + end + + Execute --> |"Bash error"| AbortSiblings["this.siblingAbortController.abort('sibling_error')
Kills all parallel tools"] + + style Execute fill:#ff6b6b,color:#fff + style AbortSiblings fill:#ff6b6b,color:#fff +``` + +### Result Yielding + +Results are yielded to the query loop in two modes: +- **`getCompletedResults()`** (non-blocking): Returns any already-completed results in tool order. Called during streaming to drain ready results. +- **`getRemainingResults()`** (async generator): Waits for all pending tools using `Promise.race()` between tool completion and progress availability. Called after streaming ends. + +--- + +## 6. Tool Implementations + +### BashTool (`BashTool.tsx`, 1,143 lines) + +```mermaid +flowchart TD + Input["command, timeout?,
description?,
dangerouslyDisableSandbox?"] --> Sandbox{shouldUseSandbox?} + Sandbox -->|Yes| Wrap["Wrap in sandbox
(SandboxManager)"] + Sandbox -->|No| Direct[Direct execution] + + Wrap --> Exec["exec() via Bun child_process
with AbortSignal"] + Direct --> Exec + + Exec --> Progress["Yield progress events
every 2000ms (PROGRESS_THRESHOLD_MS)"] + Progress --> Timeout{Timeout?} + Timeout -->|No| Complete["Capture stdout + stderr
via EndTruncatingAccumulator"] + Timeout -->|Yes| AutoBG{Auto-background
enabled?} + AutoBG -->|Yes| Background["Migrate to background task
Register foreground → background"] + AutoBG -->|No| Kill[Kill process] + + Complete --> Size{Output > 100K chars?} + Size -->|Yes| Persist["Save full output to /tool-results/
Return preview + file path"] + Size -->|No| Return[Return output directly] + + subgraph "Security Analysis" + Classify["isSearchOrReadBashCommand()"] + Classify --> Split["Split on operators: || && | ; > >>"] + Split --> Each["Classify each part"] + Each --> Neutral["Skip neutral: echo, printf, true, false"] + Each --> SearchRead["Identify: grep, find, ls, cat,
head, tail, wc, stat, file"] + Each --> Mutative["Flag: rm, mv, git commit,
npm install, docker, kubectl"] + end + + subgraph "Zsh Defense" + Block["Block dangerous zsh builtins:
zmodload, emulate, sysopen,
sysread, syswrite, sysseek,
zpty, ztcp, zsocket,
zf_rm, zf_mv, zf_ln..."] + end + + style Exec fill:#4a9eff,color:#fff + style Background fill:#ffd43b,color:#000 + style Block fill:#ff6b6b,color:#fff +``` + +**Key details:** +- **EndTruncatingAccumulator**: Preserves the *start* of output, truncates from the *end* (more useful than tail truncation for most commands) +- **Background migration**: If a foreground task is already registered, it gets migrated in-place via `backgroundExistingForegroundTask()`. If not registered, a new background task is spawned via `spawnShellTask()` +- **Sed parsing**: `sedEditParser.ts` parses sed commands to generate a simulated preview for the permission dialog + +### FileEditTool (`FileEditTool.ts`, 625 lines) + +```mermaid +flowchart TD + Input["file_path, old_string,
new_string, replace_all?"] --> Validate{File read
previously?} + Validate -->|No| Error1["Error: File has not
been read yet"] + Validate -->|Yes| SizeCheck{File > 1 GiB?} + SizeCheck -->|Yes| Error2["Error: File too large"] + SizeCheck -->|No| ConcurrentCheck{mtime changed
since last read?} + ConcurrentCheck -->|Yes, content same| Proceed["Safe — external touch
but content unchanged"] + ConcurrentCheck -->|Yes, content differs| Error3["Error: File modified
externally since last read"] + ConcurrentCheck -->|No| Proceed + + Proceed --> FindString["findActualString(file, old_string)"] + FindString --> QuoteNorm["Try exact match first
Then normalize quotes:
curly ↔ straight quotes"] + QuoteNorm --> Found{Match found?} + Found -->|No| Error4["Error: String not found
in file"] + Found -->|Yes| CountCheck{Multiple
matches?} + CountCheck -->|"Yes & !replace_all"| Error5["Error: old_string not unique
Provide more context or
use replace_all: true"] + CountCheck -->|OK| PreserveQuotes["preserveQuoteStyle()
Match new_string to
file's typography"] + PreserveQuotes --> Detect["Detect encoding:
UTF-8 or UTF-16LE"] + Detect --> DetectEndings["Detect line endings:
CRLF, LF, or CR"] + DetectEndings --> Replace["Apply replacement
Generate unified diff"] + Replace --> WriteFile["Write file with original
encoding + line endings"] + WriteFile --> UpdateState["Update readFileState
with new content + mtime"] + + style FindString fill:#4a9eff,color:#fff + style WriteFile fill:#69db7c,color:#000 +``` + +**File state tracking**: Every `FileReadTool` call registers `{content, mtime, offset, limit, isPartialView}` in a `readFileState` Map. The FileEditTool checks this on every edit to prevent silent data corruption from concurrent modifications. Partial reads (`isPartialView = true`) block editing entirely. + +### FileReadTool (`FileReadTool.ts`, 1,183 lines) + +| Feature | Details | +|---------|---------| +| **Line range reads** | `offset` + `limit` params, reads specific range without loading whole file | +| **Deduplication** | If same file + range read twice without mtime change, returns `file_unchanged` stub (saves ~18% cache-creation tokens) | +| **PDF support** | Page-range extraction via `extractPDFPages()`, `MAX_PAGES_PER_READ` limit, token-aware compression | +| **Image support** | Format detection, resize/downsample with `compressImageBufferWithTokenLimit()`, metadata text generation | +| **Notebook support** | `.ipynb` cell parsing, maps cells to structured output with code + outputs | +| **Dangerous paths** | Blocks: `/dev/zero`, `/dev/random`, `/dev/stdin`, `/proc/self/fd/*` | +| **Memory freshness** | Appends notes for old CLAUDE.md files, triggers skill directory discovery | + +### GrepTool (`GrepTool.ts`) + +Three output modes with pagination: + +```mermaid +graph LR + subgraph "Output Modes" + Content["content mode
Shows matching lines
with -A/-B/-C context
and line numbers"] + Files["files_with_matches mode
Shows file paths only
Sorted by mtime descending"] + Count["count mode
Shows match counts
per file"] + end + + subgraph "Pagination" + Offset["offset: skip first N results"] + Limit["head_limit: cap output
Default: 250 results
Pass 0 for unlimited"] + end + + subgraph "Ripgrep Flags" + Hidden["--hidden (search hidden files)"] + VCS["--glob !.git --glob !.svn
--glob !.hg (exclude VCS)"] + MaxCols["--max-columns 500"] + Multi["multiline: -U --multiline-dotall"] + Case["-i for case insensitive"] + Type["--type js/py/rust/..."] + end + + style Content fill:#4a9eff,color:#fff + style Files fill:#69db7c,color:#000 + style Count fill:#ffd43b,color:#000 +``` + +### WebFetchTool + +```mermaid +flowchart TD + URL[URL input] --> Parse[Parse hostname] + Parse --> Preapproved{Preapproved host?
GitHub, MDN, npm,
PyPI, Stack Overflow...} + Preapproved -->|Yes| AutoAllow[Skip permission prompt] + Preapproved -->|No| AskPerm[Show permission dialog
with domain:hostname] + AutoAllow --> Fetch + AskPerm -->|Approved| Fetch + + Fetch["getURLMarkdownContent(url)"] --> Redirect{Redirect?} + Redirect -->|Yes| FollowRedirect[Follow + report] + Redirect -->|No| Convert[HTML → Markdown extraction] + + Convert --> CheckSize{Preapproved AND
text/markdown AND
< MAX_MARKDOWN_LENGTH?} + CheckSize -->|Yes| ReturnRaw[Return raw markdown] + CheckSize -->|No| Summarize["applyPromptToMarkdown()
Haiku summarization
with optional user prompt"] + + Summarize --> Binary{Binary content?} + Binary -->|Yes| PersistBinary["Save to disk
Append file path note"] + Binary -->|No| Return[Return result] + ReturnRaw --> Return + PersistBinary --> Return + + Return --> Cache["Cache result for
15 minutes (URL key)"] + + style Fetch fill:#4a9eff,color:#fff + style Summarize fill:#ffd43b,color:#000 +``` + +### WebSearchTool + +Uses Anthropic's native `web_search_20250305` server tool — it doesn't call an external search API. Instead, it makes a sub-call to the Claude API with the search tool enabled: + +```mermaid +sequenceDiagram + participant Tool as WebSearchTool + participant API as Anthropic API + participant UI as Progress UI + + Tool->>API: Create streaming request with:
system: "You are an assistant for web search"
tools: [{type: "web_search_20250305", max_uses: 8}]
message: "Perform a web search for: {query}" + + API->>Tool: content_block_start (server_tool_use) + API->>Tool: input_json_delta (partial query JSON) + Note over Tool: Extract query via regex from partial JSON + Tool->>UI: Progress: "Searching for: {extracted query}" + + API->>Tool: content_block_stop (web_search_tool_result) + Note over Tool: Parse search results: title + URL pairs + + API->>Tool: content_block (text — summary) + Note over Tool: Accumulate text summary + + API->>Tool: message_stop + Tool->>Tool: Combine results + summary into output +``` + +**Key constraint**: Max 8 searches per request (hardcoded in tool schema). Available on first-party API, Vertex (Claude 4+), and Foundry. + +--- + +## 7. System Prompt Assembly + +### Structure + +The system prompt is approximately **914 lines** split into cacheable and dynamic sections: + +```mermaid +graph TD + subgraph "Cacheable Prefix — SYSTEM_PROMPT_DYNAMIC_BOUNDARY" + direction TB + Intro["Identity & Role
'You are an interactive agent that
helps users with software engineering tasks'"] + System["# System
Tool execution rules, permission modes,
hooks, context compression, tags"] + Tasks2["# Doing tasks
Engineering best practices,
code quality, no unnecessary changes,
security awareness, error handling"] + Actions["# Executing actions with care
Reversibility, blast radius, confirmation
for risky ops, measure twice cut once"] + UsingTools["# Using your tools
Prefer dedicated tools over Bash,
parallel calls, task management"] + Tone["# Tone and style
No emojis, concise, file:line refs,
owner/repo#123 format"] + Efficiency["# Output efficiency
Go straight to the point,
skip filler, inverted pyramid"] + end + + subgraph "Dynamic Section — Per-Session" + direction TB + ToolGuidance["Tool-specific guidance
Agent, Skills, ToolSearch, MCP"] + MemMechanics["Memory mechanics prompt
How to save/recall memories
Types, format, when to access"] + VerifierContract["Verification agent contract
When to spawn, how to review"] + SkillList["Available skills list
Discovered /commands"] + end + + subgraph "User Context" + direction TB + ClaudeMD["CLAUDE.md files
From directory hierarchy"] + DateCtx["Current date
'Today's date is 2025-03-31'"] + end + + subgraph "System Context" + direction TB + GitStatus["Git status
Branch, status (2000 char cap),
5 recent commits, user name"] + end + + Intro --> System --> Tasks2 --> Actions --> UsingTools --> Tone --> Efficiency + Efficiency -.->|"__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__"| ToolGuidance + ToolGuidance --> MemMechanics --> VerifierContract --> SkillList + + style Intro fill:#4a9eff,color:#fff + style ToolGuidance fill:#ffd43b,color:#000 + style ClaudeMD fill:#69db7c,color:#000 + style GitStatus fill:#b197fc,color:#000 +``` + +### Notable prompt sections + +**"Executing actions with care"** — essentially a philosophy on reversibility: +> "Carefully consider the reversibility and blast radius of actions. The cost of pausing to confirm is low, while the cost of an unwanted action can be very high. A user approving an action once does NOT mean they approve it in all contexts. Measure twice, cut once." + +**"Doing tasks"** — anti-overengineering guidelines: +> "Don't add features, refactor code, or make 'improvements' beyond what was asked. Don't add docstrings, comments, or type annotations to code you didn't change. Three similar lines of code is better than a premature abstraction." + +**CLAUDE.md hierarchy** (loaded bottom-up, all files included): +``` +~/.claude/CLAUDE.md (user-global) +{git_root}/CLAUDE.md (project, committed) +{git_root}/.claude/CLAUDE.md (project, gitignored) +{git_root}/.claude.local/CLAUDE.md (local, always gitignored) +{subdir}/CLAUDE.md (subdirectory override) +``` + +--- + +## 8. Context Management — 5 Layers + +```mermaid +graph TD + subgraph "Layer 1: Cache-Edit Microcompaction" + MC["Surgically delete individual tool results
from the API's prompt cache"] + MC_how["Uses Anthropic cache_edits API
Does NOT modify local messages
Edits applied at API layer
via cache_reference + cache_edits blocks"] + MC_what["Clearable: FILE_READ, SHELL,
GREP, GLOB, WEB_SEARCH,
WEB_FETCH, FILE_EDIT, FILE_WRITE"] + MC_when["Trigger: count-based
(GrowthBook feature gate)"] + MC_scope["Main thread only —
subagents excluded to
prevent dangling references"] + end + + subgraph "Layer 2: Time-Based Microcompaction" + TB["Clear stale tool results when
server cache is already cold"] + TB_how["Content set to sentinel:
'[Old tool result content cleared]'"] + TB_when["Trigger: idle > 60 min
(= server cache TTL)"] + TB_keep["Keeps 5 most recent
compactable tool results"] + TB_est["Token savings estimated per result:
images/docs = 2000 tokens
text = length / 4"] + end + + subgraph "Layer 3: Context Collapse" + CL["Selective message archiving
that preserves detail longer"] + CL_commit["Commit point: 90% of
effective context"] + CL_block["Blocking spawn threshold:
95% of effective context"] + CL_race["When enabled, auto-compaction
is DISABLED to prevent
race conditions"] + end + + subgraph "Layer 4: Auto-Compaction" + AC["Full LLM summarization
of old conversation"] + AC_thresh["Threshold: effectiveWindow - 13K buffer
≈ 93% of usable context"] + AC_window["effectiveWindow = contextWindow
- min(maxOutput, 20K)"] + AC_circuit["Circuit breaker: stops after
3 consecutive failures"] + AC_prompt["9-section summary prompt:
1. Primary request
2. Key technical concepts
3. Files & code sections
4. Errors & fixes
5. Problem solving
6. All user messages
7. Pending tasks
8. Current work
9. Next step"] + end + + subgraph "Layer 5: Session Memory Compaction" + SM["Background summary extraction
to separate storage"] + SM_config["Config: minTokens=10K,
maxTokens=40K,
minTextBlockMessages=5"] + SM_invariants["Preserves API invariants:
tool_use/result pairing,
thinking block grouping"] + end + + MC -.->|"Still over?"| TB + TB -.->|"Still over?"| CL + CL -.->|"Still over?"| AC + AC -.->|"Still over?"| SM + + style MC fill:#69db7c,color:#000 + style TB fill:#a9e34b,color:#000 + style CL fill:#ffd43b,color:#000 + style AC fill:#ffa94d,color:#000 + style SM fill:#ff6b6b,color:#fff +``` + +### Post-Compaction Restoration + +After auto-compaction summarizes old messages, Claude Code reconstructs essential context: + +```mermaid +flowchart LR + Compact["Compaction complete
Old messages replaced
with summary"] --> FileRestore + + subgraph FileRestore ["File Restoration"] + direction TB + Scan["Scan messages for
FILE_READ tool_uses"] + Scan --> Collect["Collect file paths
(skip dedup stubs)"] + Collect --> Select["Select 5 most recent files
within 50K token budget"] + Select --> Truncate["Truncate each to
5K tokens if needed"] + Truncate --> Attach["Attach as file content
after summary"] + end + + FileRestore --> SkillRestore + + subgraph SkillRestore ["Skill Re-injection"] + direction TB + ScanSkills["Collect invoked skills
from bootstrap state"] + ScanSkills --> SortRecent["Sort most-recent-first"] + SortRecent --> TruncSkill["Head-preserving truncation
(keep setup/usage)"] + TruncSkill --> FitBudget["Fit within 25K budget
(5K per skill)"] + end + + SkillRestore --> StateRestore + + subgraph StateRestore ["State Preservation"] + direction TB + Plan["Plan mode attachment"] + AsyncAgents["Async agent status
(running/finished)"] + Hooks3["Hook result messages
(session start, plan mode)"] + end + + StateRestore --> Ready["Agent continues
with key context intact"] + + style Compact fill:#ffa94d,color:#000 + style Ready fill:#69db7c,color:#000 +``` + +### Prompt-Too-Long Retry Loop + +If the compaction API call itself hits a prompt-too-long error: + +``` +Max retries: 3 (MAX_PTL_RETRIES) +Strategy: truncateHeadForPTLRetry() — drop oldest API-round groups +Fallback: If token gap unparseable, drop 20% of groups +``` + +### All Thresholds + +| Metric | Value | Source | +|--------|-------|--------| +| Effective context window | `contextWindow - min(maxOutput, 20K)` | `autoCompact.ts` | +| Auto-compact buffer | 13,000 tokens | `AUTOCOMPACT_BUFFER_TOKENS` | +| Auto-compact threshold | ~93% of effective window | Calculated | +| Warning threshold buffer | 20,000 tokens | `WARNING_THRESHOLD_BUFFER_TOKENS` | +| Manual compact buffer | 3,000 tokens | `MANUAL_COMPACT_BUFFER_TOKENS` | +| Max compaction output | 20,000 tokens | `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | +| Max consecutive failures | 3 | `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | +| Post-compact file budget | 50,000 tokens | `POST_COMPACT_TOKEN_BUDGET` | +| Post-compact file cap | 5,000 tokens/file | `POST_COMPACT_MAX_TOKENS_PER_FILE` | +| Post-compact max files | 5 | `POST_COMPACT_MAX_FILES_TO_RESTORE` | +| Post-compact skill budget | 25,000 tokens | `POST_COMPACT_SKILLS_TOKEN_BUDGET` | +| Post-compact skill cap | 5,000 tokens/skill | `POST_COMPACT_MAX_TOKENS_PER_SKILL` | +| Session memory min tokens | 10,000 | Default config | +| Session memory max tokens | 40,000 | Default config | +| Session memory min messages | 5 text-block messages | Default config | +| Context collapse commit | 90% of effective window | Feature-gated | +| Context collapse blocking | 95% of effective window | Feature-gated | +| Time-based MC gap | 60 minutes | Server cache TTL | +| Time-based MC keep recent | 5 tool results | Default config | +| Max PTL retries | 3 | `MAX_PTL_RETRIES` | +| PTL fallback | Drop 20% of groups | When gap unparseable | + +--- + +## 9. Token Estimation & Budget + +### Estimation Formulas + +``` +Text: length / 4 bytes per token (general) +JSON: length / 2 bytes per token (denser structure) +Images/docs: 2,000 tokens flat estimate +Message total: ceil(sum * 4/3) — 33% conservative padding +``` + +File-type-specific: `.json`, `.jsonl`, `.jsonc` use 2 bytes/token. Everything else uses 4. + +### Budget Continuation Logic + +When an agent has a token budget, the system tracks usage and decides whether to continue: + +```mermaid +flowchart TD + Check["checkTokenBudget()"] --> HasBudget{Budget set?} + HasBudget -->|No| Stop1([No continuation]) + HasBudget -->|Yes| CalcPct["pct = (globalTurnTokens / budget) * 100"] + + CalcPct --> CalcDelta["delta = tokens since last check"] + CalcDelta --> Diminishing{continuationCount >= 3
AND last 2 deltas
both < 500 tokens?} + + Diminishing -->|Yes| StopDim([Stop: diminishing returns]) + Diminishing -->|No| UnderBudget{Under 90%?} + + UnderBudget -->|Yes| Continue["Continue
Inject nudge message:
'You've used X% of budget.
Y tokens remaining.'"] + UnderBudget -->|No| WasContinued{Any prior
continuation?} + WasContinued -->|Yes| StopBudget([Stop: budget threshold]) + WasContinued -->|No| StopFirst([Stop: first check]) + + style Continue fill:#69db7c,color:#000 + style StopDim fill:#ff6b6b,color:#fff + style StopBudget fill:#ff6b6b,color:#fff +``` + +| Constant | Value | +|----------|-------| +| `COMPLETION_THRESHOLD` | 0.9 (90%) | +| `DIMINISHING_THRESHOLD` | 500 tokens per turn | +| Min continuations for diminishing check | 3 | + +--- + +## 10. Subagent / Multi-Agent System + +### Agent Spawning + +```mermaid +flowchart TD + Call["AgentTool called with:
description, prompt,
subagent_type?, model?,
run_in_background?,
isolation?"] --> ResolveType{Agent type?} + + ResolveType -->|Explicit type| LoadBuiltIn["Load built-in agent:
General, Explore, Plan,
Verification, Guide"] + ResolveType -->|Custom name| LoadCustom["Load from
~/.claude/agents/.md"] + ResolveType -->|Not specified| ForkCheck{Fork
enabled?} + ForkCheck -->|Yes| LoadFork["Fork agent —
inherits parent prompt
(cache-sharing optimization)"] + ForkCheck -->|No| LoadGeneral[General Purpose agent] + + LoadBuiltIn --> AssembleTools["assembleToolPool()
Filter by agent permissions"] + LoadCustom --> AssembleTools + LoadFork --> AssembleTools + LoadGeneral --> AssembleTools + + AssembleTools --> Isolation{isolation
mode?} + Isolation -->|worktree| CreateWT["createAgentWorktree(slug)
Isolated git branch + filesystem"] + Isolation -->|remote| Teleport["teleportToRemote()
Launch on CCR cloud"] + Isolation -->|none| LocalExec[Local execution] + + CreateWT --> SpawnMode + Teleport --> ReturnRemote([Return remote session URL]) + LocalExec --> SpawnMode + + SpawnMode{run_in_background?} + SpawnMode -->|Yes| AsyncLaunch["registerAsyncAgent()
Run in background
Return immediately"] + SpawnMode -->|No| SyncLaunch["Run inline
Block parent"] + + SyncLaunch --> AutoBG["Start auto-background
timer: 120 seconds"] + AutoBG --> Race["Promise.race:
agent result vs timer"] + Race -->|"Agent finishes"| ReturnResult([Return result inline]) + Race -->|"Timer fires"| MigrateAsync["Migrate to background
mid-execution"] + MigrateAsync --> ReturnAsync([Return async_launched]) + + style CreateWT fill:#b197fc,color:#000 + style AsyncLaunch fill:#ffd43b,color:#000 + style SyncLaunch fill:#69db7c,color:#000 + style MigrateAsync fill:#ffa94d,color:#000 +``` + +### Built-in Agent Types + +| Agent | Access Level | System Prompt Focus | Disallowed | +|-------|-------------|-------------------|------------| +| **General Purpose** | Full read/write | "Complete the task fully, don't gold-plate" | AgentTool (no nesting), TaskOutputTool | +| **Explore** | Read-only | "File search specialist. STRICTLY PROHIBITED from creating/modifying files" | All write tools, file creation | +| **Plan** | Read-only | "Software architect and planning specialist. End with 3-5 critical files" | All write tools, state-changing bash | +| **Verification** | Read-only + run | "Your job is to try to break it. Fight your own cognitive biases" | File writes, git writes | +| **Fork** | Inherits parent | Same system prompt as parent (cache sharing) | Recursive forking | +| **Claude Code Guide** | Read + web | "Help with Claude Code, Agent SDK, Claude API questions" | Write tools | + +### Custom Agent Definition Format + +```markdown +--- +name: security-reviewer +description: Security-focused code reviewer +whenToUse: When user asks for security review +disallowedTools: [FileWrite, FileEdit, Bash] +model: inherit +--- + +You are a security review specialist. Analyze code for: +- OWASP Top 10 vulnerabilities +- Injection risks (SQL, command, XSS) +- Authentication/authorization flaws +- Sensitive data exposure +... +``` + +### Worktree Lifecycle + +```mermaid +sequenceDiagram + participant Agent as AgentTool + participant Git as Git + participant Child as Child Agent + participant Cleanup as Cleanup + + Agent->>Git: git worktree add agent-{id} + Git-->>Agent: worktreePath, worktreeBranch, headCommit + + Agent->>Child: Run in worktreePath (CWD override) + activate Child + Note over Child: Works in isolated filesystem + Child->>Child: Make changes, run tests + Child-->>Agent: Result + deactivate Child + + Agent->>Cleanup: cleanupWorktreeIfNeeded() + Cleanup->>Git: hasWorktreeChanges(path, headCommit)? + + alt No changes + Cleanup->>Git: git worktree remove + Note over Cleanup: Clean up — nothing to keep + else Has changes + Note over Cleanup: Preserve worktree + branch + Cleanup-->>Agent: Return {worktreePath, worktreeBranch} + end +``` + +### Inter-Agent Communication + +Agents communicate via two mechanisms: +1. **SendMessageTool**: Direct messaging by agent ID. The message lands in the target agent's `pendingUserMessages` queue. +2. **TaskNotification XML**: In coordinator mode, workers inject `` XML blocks into user messages to report status. +3. **Scratchpad directory**: For durable cross-worker state (file-based, feature-gated). + +--- + +## 11. Permission System + +### Full Decision Flow + +```mermaid +flowchart TD + ToolCall([Tool call requested]) --> Step1 + + subgraph Step1 ["Step 1: Deny Rules (absolute)"] + DenyTool{Entire tool
denied?} + DenyTool -->|Yes| Blocked([DENIED — no override]) + DenyTool -->|No| DenyContent{Content-specific
deny rule?} + DenyContent -->|Yes| Blocked + DenyContent -->|No| Next1[Continue] + end + + Next1 --> Step2 + + subgraph Step2 ["Step 2: Ask Rules"] + AskTool{Entire tool
has ask rule?} + AskTool -->|Yes, sandbox can auto-allow| AutoSandbox[Auto-allow via sandbox] + AskTool -->|Yes| GoToMode[Go to mode check] + AskTool -->|No| ToolPerms["tool.checkPermissions()"] + end + + ToolPerms --> Step3 + + subgraph Step3 ["Step 3: Tool-Specific Logic"] + ToolResult{Tool says?} + ToolResult -->|Allow| AllowTool([ALLOWED]) + ToolResult -->|Deny| DenyTool2([DENIED]) + ToolResult -->|Ask| SafetyCheck{Safety check?
.git/ .claude/ .vscode/
shell configs} + SafetyCheck -->|Yes| BypassImmune([ALWAYS ASK
bypass-immune]) + SafetyCheck -->|No| GoToMode + end + + GoToMode --> Step4 + + subgraph Step4 ["Step 4: Mode Resolution"] + Mode{Permission mode?} + Mode -->|default| Prompt([Show dialog]) + Mode -->|acceptEdits| AcceptCheck{File edit
in CWD?} + AcceptCheck -->|Yes| AllowAccept([ALLOWED]) + AcceptCheck -->|No| Prompt + Mode -->|bypass| AllowBypass([ALLOWED]) + Mode -->|plan| ShowPlan([Show plan]) + Mode -->|dontAsk| SilentDeny([DENIED silently]) + Mode -->|auto| Step5 + end + + subgraph Step5 ["Step 5: Auto Mode (AI Classifier)"] + FastPath1{Safe tool?
Read, Glob, Grep, LSP,
TaskCreate, Sleep...} + FastPath1 -->|Yes| AllowSafe([ALLOWED]) + FastPath1 -->|No| FastPath2{acceptEdits
would allow?} + FastPath2 -->|Yes| AllowFast([ALLOWED]) + FastPath2 -->|No| RunClassifier["classifyYoloAction()
AI side-query"] + RunClassifier --> ClassResult{Decision?} + ClassResult -->|Allow| AllowClass([ALLOWED
Reset consecutive denials]) + ClassResult -->|Deny| TrackDeny["Increment denials:
consecutive++
total++"] + TrackDeny --> Fallback{consecutive >= 3
OR total >= 20?} + Fallback -->|Yes| FallbackPrompt([Fall back to prompting]) + Fallback -->|No| SilentDenyClass([DENIED silently]) + end + + style Blocked fill:#ff6b6b,color:#fff + style BypassImmune fill:#ffa94d,color:#000 + style AllowTool fill:#69db7c,color:#000 + style AllowAccept fill:#69db7c,color:#000 + style AllowBypass fill:#69db7c,color:#000 + style AllowSafe fill:#69db7c,color:#000 + style AllowFast fill:#69db7c,color:#000 + style AllowClass fill:#69db7c,color:#000 + style Prompt fill:#ffd43b,color:#000 + style RunClassifier fill:#b197fc,color:#000 +``` + +### Rule Format & Matching + +Permission rules use a glob-style pattern language: + +``` +Bash — Matches entire tool (all bash commands) +Bash(npm install) — Exact command match +Bash(npm *) — Wildcard: any npm command +Bash(npm:*) — Legacy prefix syntax +Bash(curl https://\*.com) — Escaped asterisk (literal *) +Bash(git commit *) — Matches git commit with any flags + +mcp__github — All tools from GitHub MCP server +mcp__github__list_repos — Specific MCP tool + +Agent(Explore) — Deny Explore agent specifically +``` + +**Wildcard algorithm** (`shellRuleMatching.ts`): +1. Trim pattern +2. Replace `\*` → null-byte placeholder, `\\` → null-byte placeholder +3. Escape regex special chars (except unescaped `*`) +4. Convert unescaped `*` to `.*` +5. Make trailing ` .*` optional (so `git *` matches bare `git` too) +6. Test full string match: `^pattern$` with `dotAll` flag + +### Rule Sources (8 levels) + +```mermaid +graph TD + P["policySettings — Enterprise/admin managed
Read-only, cannot be overridden by user"] --> F + F["flagSettings — --permissions CLI flag
Applied at startup"] --> Proj + Proj["projectSettings — .claude/settings.json
Committed to repo, shared with team"] --> L + L["localSettings — .claude.local/settings.json
Always gitignored, personal overrides"] --> U + U["userSettings — ~/.claude/settings.json
Global user preferences"] --> C + C["cliArg — Runtime arguments
In-memory, from API/SDK callers"] --> Cmd + Cmd["command — Runtime directives
From coordinator or workflow"] --> S + S["session — In-memory grants
'Always allow for this session'
NOT persisted to disk"] + + style P fill:#ff6b6b,color:#fff + style Proj fill:#ffa94d,color:#000 + style U fill:#ffd43b,color:#000 + style S fill:#69db7c,color:#000 +``` + +### Session Grants Flow + +When the user clicks "Always allow for this session": + +```mermaid +sequenceDiagram + participant User + participant Dialog as Permission Dialog + participant Context as ToolPermissionContext + participant Future as Future Tool Calls + + User->>Dialog: Click "Always allow for this session" + Dialog->>Context: PermissionUpdate {
type: 'addRules',
rules: [{toolName: 'Bash', ruleContent: 'npm install'}],
behavior: 'allow',
destination: 'session'
} + Context->>Context: alwaysAllowRules['session'].push('Bash(npm install)') + Note over Context: In-memory only — NOT written to disk + + Future->>Context: Check: can use Bash(npm install)? + Context-->>Future: Matched session grant → ALLOW + + Note over Context: Session ends → context freed → grant lost +``` + +### Dangerous Bash Patterns (Auto Mode) + +When entering auto mode, these patterns are stripped from allow rules to prevent interpreter bypass: + +``` +python, python3, python2, node, deno, tsx, ruby, perl, php, lua, +npx, bunx, npm run, yarn run, pnpm run, bun run, bash, sh, ssh, +zsh, fish, eval, exec, env, xargs, sudo +``` + +Plus ANT-only: `gh`, `curl`, `wget`, `git`, `kubectl`, `aws`, `gcloud`, `gsutil` + +--- + +## 12. Hook System + +### Event Types & Lifecycle + +```mermaid +flowchart TD + subgraph "Pre-Execution Hooks" + PreTool["PreToolUse
Before tool runs
Can: block, modify input, add context"] + UserSubmit["UserPromptSubmit
User sends message
Can: modify, block"] + SubStart["SubagentStart
Before subagent spawns"] + WTCreate["WorktreeCreate
Before worktree creation"] + end + + subgraph "Post-Execution Hooks" + PostTool["PostToolUse
After tool succeeds
Can: add context, modify MCP output"] + PostFail["PostToolUseFailure
After tool fails"] + PermDenied["PermissionDenied
Classifier denied tool"] + end + + subgraph "Lifecycle Hooks" + SessStart["SessionStart
Session initialized
Can: inject initial messages"] + Setup["Setup
Additional initialization"] + FileChange["FileChanged
Watched file modified"] + CwdChange["CwdChanged
Working directory changed"] + end + + subgraph "Notification Hooks" + Notif["Notification
Types: permission_prompt,
idle_prompt, auth_success,
elicitation_dialog/complete/response"] + end + + subgraph "Permission Hooks" + PermReq["PermissionRequest
Permission prompt triggered
Can: approve/deny with rules"] + end + + style PreTool fill:#ffa94d,color:#000 + style PostTool fill:#69db7c,color:#000 + style SessStart fill:#4a9eff,color:#fff + style PermReq fill:#b197fc,color:#000 +``` + +### Hook I/O + +**PreToolUse** receives: +```json +{ + "type": "tool_use", + "name": "Bash", + "input": {"command": "npm install lodash"}, + "tool_use_id": "toolu_01...", + "tool_name": "Bash" +} +``` + +**PreToolUse** can return: +```json +{ + "hookEventName": "PreToolUse", + "permissionDecision": "approve", // or "block" + "permissionDecisionReason": "Lint passed", + "updatedInput": {"command": "npm install --save-exact lodash"}, + "additionalContext": "Note: package was pinned to exact version" +} +``` + +**PermissionRequest** can return: +```json +{ + "hookEventName": "PermissionRequest", + "decision": { + "behavior": "allow", + "updatedInput": {"command": "..."}, + "updatedPermissions": [ + {"type": "addRules", "rules": [{"toolName": "Bash", "ruleContent": "npm *"}], "behavior": "allow", "destination": "session"} + ], + "interrupt": false + } +} +``` + +### Configuration + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": {"tool_name": "Bash"}, + "command": "~/.claude/hooks/lint-bash.sh", + "timeout": 600000 + } + ], + "PostToolUse": [ + { + "matcher": {"tool_name": "FileEdit"}, + "command": "~/.claude/hooks/auto-format.sh" + } + ], + "UserPromptSubmit": [ + { + "command": "~/.claude/hooks/log-prompt.sh" + } + ] + } +} +``` + +### Timeouts + +| Hook type | Default timeout | +|-----------|----------------| +| Tool hooks (PreToolUse, PostToolUse, etc.) | 10 minutes | +| Session-end hooks | 1.5 seconds | +| Override via env | `CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS` | + +### Async Hooks + +Hooks can return `{ async: true, asyncTimeout?: number }` to execute in the background: +- The agent continues without waiting +- Hook runs in a subprocess +- Completion is notified via callback + +--- + +## 13. Memory System + +### Architecture Overview + +```mermaid +graph TD + subgraph "Conversation Flow" + Turn["Each turn completes"] --> PostHook["Post-sampling hook fires"] + PostHook --> Gate{Feature gate:
tengu_passport_quail
+ auto-memory enabled?} + Gate -->|No| Skip[Skip extraction] + Gate -->|Yes| Throttle{Every N turns?
Default: 1} + Throttle -->|Skip| Skip + Throttle -->|Run| Cursor["Advance cursor:
count messages since
lastMemoryMessageUuid"] + Cursor --> Overlap{Main agent
already wrote memories?} + Overlap -->|Yes| Skip + Overlap -->|No| Manifest["Pre-inject manifest
of existing memories"] + Manifest --> Fork["Fork extraction agent
Max 5 turns"] + end + + subgraph "Extraction Agent" + Fork --> Tools2["Restricted tools:
Read, Grep, Glob
read-only Bash (ls, find, grep...)
Edit/Write ONLY in memory dir"] + Tools2 --> Analyze["Analyze recent messages"] + Analyze --> Decide{Worth
remembering?} + Decide -->|Yes| WriteFile["Write memory file
with frontmatter"] + Decide -->|No| NextMsg[Check next message] + WriteFile --> UpdateIndex["Update MEMORY.md index"] + end + + subgraph "Storage" + direction LR + Dir["~/.claude/projects//memory/"] + Index["MEMORY.md
Max 200 lines / 25KB
ALWAYS loaded into context"] + Files["Individual .md files
with frontmatter"] + Dir --- Index + Dir --- Files + end + + subgraph "Recall (Next Conversation)" + Load["Load MEMORY.md
into system prompt"] --> LLM["LLM sees memories
in context"] + LLM --> Verify["Before recommending:
- File path? Check exists
- Function? Grep for it
- Recent state? Use git log"] + end + + style Fork fill:#b197fc,color:#000 + style Index fill:#4a9eff,color:#fff + style Verify fill:#ffa94d,color:#000 +``` + +### Memory Types + +| Type | What to save | When to save | Body structure | +|------|-------------|-------------|----------------| +| **user** | Role, goals, preferences, expertise level | When you learn any details about the user | Free-form | +| **feedback** | Approach guidance — corrections AND confirmations | Corrections ("don't do X") AND confirmations ("yes, exactly that") | Rule → **Why:** → **How to apply:** | +| **project** | Ongoing work, goals, deadlines, decisions | When you learn who/what/why/when — convert relative dates to absolute | Fact → **Why:** → **How to apply:** | +| **reference** | Pointers to external systems | When you learn about resources in external systems | URL/path + purpose | + +### What NOT to Save + +The prompt explicitly forbids saving: +- Code patterns, conventions, architecture, file paths — *read the current state* +- Git history, recent changes — *`git log` / `git blame` are authoritative* +- Debugging solutions or fix recipes — *the fix is in the code* +- Anything already documented in CLAUDE.md files +- Ephemeral task details: in-progress work, temporary state + +> "These exclusions apply even when the user explicitly asks you to save. If they ask you to save a PR list or activity summary, ask what was *surprising* or *non-obvious* about it — that is the part worth keeping." + +### Memory File Format + +```markdown +--- +name: User prefers single PRs for refactors +description: Bundled PRs over many small ones for refactoring work +type: feedback +--- + +User prefers one bundled PR over many small ones for refactors. + +**Why:** Splitting causes unnecessary churn in this codebase. +**How to apply:** When planning refactors, propose a single PR unless the change truly requires staging. +``` + +### MEMORY.md Index + +```markdown +- [User profile](user_profile.md) — Senior backend engineer, prefers Go, new to React +- [Testing approach](feedback_testing.md) — Integration tests must hit real DB, not mocks +- [Auth rewrite](project_auth.md) — Legal-driven, compliance deadline 2026-04-15 +- [Bug tracker](reference_linear.md) — Pipeline bugs tracked in Linear project "INGEST" +``` + +**Constraints**: Max 200 lines, max 25KB. If exceeded, truncated with warning: +> "WARNING: MEMORY.md is N lines and X bytes. Only part of it was loaded." + +### Extraction Agent Tool Permissions + +The extraction agent has a tightly-scoped tool set: + +```typescript +function createAutoMemCanUseTool(memoryDir: string): CanUseToolFn { + // ALLOW unrestricted: Read, Grep, Glob + // ALLOW if read-only: Bash (ls, find, grep, cat, stat, wc, head, tail) + // ALLOW only in memoryDir: Edit, Write + // DENY everything else: rm, MCP, Agent, write-capable Bash +} +``` + +### Staleness Verification + +Before recommending from memory, the prompt instructs: + +> "A memory that names a specific function, file, or flag is a claim that it existed *when the memory was written*. Before recommending it: +> - If the memory names a file path: check the file exists. +> - If the memory names a function or flag: grep for it. +> - If the user is about to act on your recommendation, verify first. +> 'The memory says X exists' is not the same as 'X exists now.'" + +--- + +## 14. Skills System + +### Skill Sources & Merging + +```mermaid +graph TD + subgraph "Bundled Skills (in binary)" + B1["/commit — Git commit workflow"] + B2["/review — Code review"] + B3["/simplify — Simplify code"] + B4["/loop — Recurring task runner"] + B5["/debug — Debug assistance"] + B6["/verify — Verification workflow"] + B7["/remember — Save a memory"] + B8["/schedule — Cron agent setup"] + B9["/claude-api — API helper"] + B10["/keybindings — Configure keys"] + B11["/update-config — Settings helper"] + B12["+20 more..."] + end + + subgraph "User Skills" + US["~/.claude/skills//SKILL.md"] + US2["~/.claude/commands/.md (legacy)"] + end + + subgraph "Project Skills" + PS[".claude/skills//SKILL.md"] + PS2[".claude/commands/.md (legacy)"] + end + + subgraph "MCP Skills" + MCP2["MCP resources → skill builders"] + end + + subgraph "Plugin Skills" + PL["Loaded plugins → skill exports"] + end + + B1 --> Merge["Merge with priority:
1. Bundled
2. Built-in plugins
3. User skills
4. Project skills
5. MCP skills
6. Plugin skills"] + US --> Merge + PS --> Merge + MCP2 --> Merge + PL --> Merge + + Merge --> Registry["Skill Registry
(first match by name wins)"] + Registry --> Available["Available as /commands
in prompt input"] + + style Registry fill:#4a9eff,color:#fff +``` + +### Skill Definition Format + +```markdown +--- +name: review +description: Review code changes for quality and bugs +when-to-use: When user asks for a code review or PR review +allowed-tools: Read, Grep, Glob, Bash +argument-hint: "[files or PR number]" +context: fork +model: inherit +user-invocable: true +effort: medium +hooks: + PostToolUse: + - matcher: {tool_name: "Bash"} + command: "echo 'reviewed'" +--- + +Review the current git diff comprehensively: + +1. **Security**: Check for injection, XSS, exposed secrets +2. **Logic**: Verify edge cases, error handling, race conditions +3. **Style**: Ensure consistent patterns with existing code +4. **Performance**: Flag N+1 queries, unnecessary allocations + +Report findings as: +- CRITICAL: Must fix before merge +- WARNING: Should address +- NOTE: Consider for future +``` + +### Skill Frontmatter Fields + +| Field | Type | Purpose | +|-------|------|---------| +| `name` | string | Skill name (also command name) | +| `description` | string | One-line description | +| `when-to-use` | string | When the LLM should suggest this skill | +| `allowed-tools` | string[] | Comma-separated tool names | +| `argument-hint` | string | Usage hint shown in autocomplete | +| `arguments` | string[] | Named argument list | +| `model` | string | Model override or 'inherit' | +| `user-invocable` | boolean | Can user invoke via `/command` | +| `disable-model-invocation` | boolean | LLM cannot invoke automatically | +| `context` | 'inline' \| 'fork' | Execution mode | +| `agent` | string | Agent type to use | +| `effort` | 'low' \| 'medium' \| 'high' | Effort level | +| `shell` | string | Shell interpreter for prompt | +| `hooks` | HooksSettings | Hook configuration | +| `paths` | string[] | File patterns for activation | + +### Execution Modes + +**Inline** (`context: 'inline'`): +- Skill prompt injected directly into the current conversation +- Uses the main agent's tools and context +- Simpler, no overhead + +**Fork** (`context: 'fork'`): +- Spawns an isolated sub-agent with the skill's prompt +- Gets its own `QueryEngine` with shared cache +- Tool set restricted to `allowedTools` from frontmatter +- Returns result text to the main conversation + +### Bundled Skill Reference Files + +Some bundled skills include reference files that are extracted to disk on first invocation: + +```typescript +{ + name: 'claude-api', + files: { + 'sdk-reference/anthropic-sdk.md': '...', + 'sdk-reference/tool-use.md': '...', + }, + getPromptForCommand: async (args, context) => { + // Prepends "Base directory for this skill: " to prompt + return [{ type: 'text', text: promptContent }] + } +} +``` + +Files are extracted with `O_EXCL` flag (fail if exists), per-process nonce, and `0o700`/`0o600` permissions for security. + +--- + +## 15. Task System + +### Task Model + +```mermaid +stateDiagram-v2 + [*] --> pending: TaskCreate + pending --> running: TaskUpdate(status) + running --> completed: TaskUpdate(status) + running --> failed: TaskUpdate(status) + pending --> completed: TaskUpdate(status) + pending --> deleted: TaskUpdate(status='deleted') + running --> killed: TaskStop + completed --> [*] + failed --> [*] + killed --> [*] + deleted --> [*] +``` + +### Task Types + +| Type | Prefix | Description | +|------|--------|-------------| +| `local_bash` | `b` | Background shell command | +| `local_agent` | `a` | Local subagent | +| `remote_agent` | `r` | CCR remote agent | +| `in_process_teammate` | `t` | In-process teammate (swarm) | +| `local_workflow` | `w` | Local workflow script | +| `monitor_mcp` | `m` | MCP server monitor | +| `dream` | `d` | Auto-dream background task | + +Task IDs are generated as `{prefix}{8 random alphanumeric chars}` (e.g., `a4kx92mf3n`). + +### Task Schema + +```typescript +TaskCreate input: { + subject: string // Brief title + description: string // What needs to be done + activeForm?: string // Present continuous for spinner ("Running tests") + metadata?: Record +} + +TaskUpdate input: { + taskId: string + subject?: string + description?: string + activeForm?: string + status?: 'pending' | 'running' | 'completed' | 'failed' | 'deleted' + addBlocks?: string[] // Task IDs this task blocks + addBlockedBy?: string[] // Task IDs blocking this task + owner?: string // Teammate name for assignment + metadata?: Record +} +``` + +### UI Integration + +The spinner component reads the active task: +``` +1. Find first task with status !== 'pending' && status !== 'completed' +2. Use its activeForm text as spinner verb +3. Fall back to random verb from getSpinnerVerbs() + +Example: "Running tests..." instead of "Thinking..." +``` + +The task list auto-expands (`expandedView = 'tasks'`) whenever a task is created or updated. + +### Teammate Assignment + +In swarm mode, tasks can be assigned to teammates: +```typescript +if (updates.owner && isAgentSwarmsEnabled()) { + const message = JSON.stringify({ + type: 'task_assignment', + taskId, + subject: existingTask.subject, + assignedBy: senderName, + timestamp: new Date().toISOString(), + }) + await writeToMailbox(updates.owner, message, taskListId) +} +``` + +--- + +## 16. Terminal UI Architecture + +Claude Code built a **custom React reconciler** for the terminal — the same abstraction layer that React DOM and React Native sit on. The `ink/` directory alone is thousands of lines. + +### Rendering Pipeline + +```mermaid +flowchart TD + subgraph "React Layer" + Components["React Components
REPL, Spinner, MessageList,
PermissionDialog, TaskList,
PromptInput, etc."] + end + + subgraph "Reconciler" + Reconciler["reconciler.ts
Custom React Reconciler
Manages terminal DOM lifecycle
createElement, appendChild,
removeChild, commitUpdate"] + end + + subgraph "Layout" + Yoga["Yoga Layout Engine
Flexbox for terminal cells
Same engine as React Native
flexDirection, justifyContent,
alignItems, padding, margin, etc."] + end + + subgraph "Render" + RenderNode["render-node-to-output.ts
Walk React tree → screen buffer
Apply styles per cell"] + Screen["screen.ts
Cell pool with interning:
- char (Unicode codepoint)
- style (fg, bg, bold, italic...)
- hyperlink (OSC 8 URL)"] + end + + subgraph "Display" + Frame["frame.ts
Double-buffered:
back frame (writing)
front frame (displayed)"] + Optimizer["optimizer.ts
Diff back vs front
Only emit changed cells"] + Output["output.ts
Generate ANSI escape codes
CSI sequences for colors,
cursor movement, styles"] + end + + subgraph "Input Processing" + Stdin["Terminal stdin"] --> ParseKey["parse-keypress.ts
State machine for
multi-byte sequences"] + ParseKey --> ChordMatch["keybindings/match.ts
Match against chord
bindings (ctrl+k ctrl+s)"] + Stdin --> MouseParse["Mouse mode-1003
Parse coordinates"] + MouseParse --> HitTest["hit-test.ts
Walk DOM tree
Find clicked element"] + HitTest --> Selection["selection.ts
Char/word/line modes
Shift+click range selection"] + end + + Components --> Reconciler + Reconciler --> Yoga + Yoga --> RenderNode + RenderNode --> Screen + Screen --> Frame + Frame --> Optimizer + Optimizer --> Output + Output --> Terminal(["Terminal stdout"]) + + ParseKey --> Components + ChordMatch --> Components + HitTest --> Components + + style Reconciler fill:#4a9eff,color:#fff + style Yoga fill:#b197fc,color:#000 + style Frame fill:#ffa94d,color:#000 + style Output fill:#69db7c,color:#000 +``` + +### Core Components + +| Component | File | Purpose | +|-----------|------|---------| +| `App.tsx` | `ink/components/App.tsx` (98KB) | Root: stdin/stdout/stderr, Ctrl+C, keyboard parsing, mouse tracking, focus | +| `Box` | `ink/components/Box.tsx` | Flex container (`
`) | +| `Text` | `ink/components/Text.tsx` | Text rendering with styles | +| `ScrollBox` | `ink/components/ScrollBox.tsx` | Scrollable container | +| `Button` | `ink/components/Button.tsx` | Interactive button | +| `AlternateScreen` | `ink/components/AlternateScreen.tsx` | Full-screen mode (smcup/rmcup) | +| `Link` | `ink/components/Link.tsx` | OSC 8 terminal hyperlinks | +| `RawAnsi` | `ink/components/RawAnsi.tsx` | Passthrough for pre-formatted ANSI | + +### Key Features + +- **Double buffering**: Back frame accumulates changes, optimizer diffs against front frame, only changed cells emit ANSI codes +- **Cell interning**: `screen.ts` uses pools for char/style/hyperlink to minimize memory allocation +- **Bidirectional text**: `bidi.ts` handles RTL text layout +- **Terminal capability detection**: `terminal-querier.ts` queries terminal for size, color support, sixel graphics +- **Search highlighting**: `searchHighlight.ts` overlays query matches across the screen buffer +- **Alternate screen**: Full-screen takeover for immersive views, restores original terminal on exit +- **Log update**: `log-update.ts` for incremental output (avoids full redraws) + +### Keyboard System + +```mermaid +flowchart LR + subgraph "Keybinding Configuration" + Default["Default bindings
(built into binary)"] + User["~/.claude/keybindings.json
(user overrides)"] + Default --> Merge["Merge: user overrides default"] + User --> Merge + end + + subgraph "Key Parsing" + Raw["Raw stdin bytes"] --> StateMachine["State machine parser
Handles: CSI, SS3, escape sequences"] + StateMachine --> KeyEvent["KeyEvent:
{key, ctrl, alt, shift, meta}"] + end + + subgraph "Chord Matching" + KeyEvent --> ChordBuffer["Chord buffer
Accumulates partial chords"] + ChordBuffer --> Match{"Match against
context bindings?"} + Match -->|"Full match"| Action["Dispatch action
e.g., app:toggleTodos"] + Match -->|"Partial"| Wait["Wait for next key"] + Match -->|"No match"| PassThrough["Pass to input handler"] + end + + style Merge fill:#4a9eff,color:#fff + style Action fill:#69db7c,color:#000 +``` + +**Contexts**: Global, Chat, Autocomplete, Confirmation, Help, Transcript, HistorySearch, Task, ThemePicker, Settings, Tabs, Attachments, Footer, MessageSelector, DiffDialog, ModelPicker, Select, Plugin + +**Example chord**: `ctrl+k ctrl+s` — first key enters chord mode, second key completes the action. + +--- + +## 17. Cost Tracking + +### Calculation + +```mermaid +flowchart LR + Response["API Response
usage object"] --> Extract["Extract:
input_tokens
output_tokens
cache_read_input_tokens
cache_creation_input_tokens
web_search_requests"] + + Extract --> Formula["cost =
(input / 1M) × inputPrice
+ (output / 1M) × outputPrice
+ (cacheRead / 1M) × cacheReadPrice
+ (cacheCreate / 1M) × cacheWritePrice
+ webSearches × searchPrice"] + + Formula --> PerModel["Accumulate per model:
tokensIn, tokensOut,
cacheRead, cacheCreate,
costUSD"] + + PerModel --> Display["On exit:
Total cost: $X.XX
Total duration (API): Xs
Total duration (wall): Xs
Total code changes: +N, -M
Per-model breakdown"] +``` + +### Pricing Tiers (per 1M tokens) + +| Tier | Input | Output | Cache Read | Cache Write | Models | +|------|-------|--------|------------|-------------|--------| +| COST_TIER_3_15 | $3 | $15 | $0.30 | $3.75 | Sonnet 3.5–4.6 | +| COST_TIER_15_75 | $15 | $75 | $1.50 | $18.75 | Opus 4.0, 4.1 | +| COST_TIER_5_25 | $5 | $25 | $0.50 | $6.25 | Opus 4.5 | +| COST_TIER_30_150 | $30 | $150 | $3.00 | $37.50 | Opus 4.6 (fast) | +| COST_HAIKU_35 | $0.80 | $4 | $0.08 | $1.00 | Haiku 3.5 | +| COST_HAIKU_45 | $1 | $5 | $0.10 | $1.25 | Haiku 4.5 | + +### Display Format + +``` +formatCost(cost): + if cost > $0.50 → round to 2 decimal places ($1.23) + else → show 4 decimal places ($0.0042) +``` + +Cost summary is saved per-session to project config for historical tracking. + +--- + +## 18. MCP Integration + +### Connection Flow + +```mermaid +sequenceDiagram + participant Config as Settings + participant Client as MCP Client + participant Server as MCP Server + participant Registry as Tool Registry + + Config->>Client: Load MCP server configs
(user + project + managed) + + loop For each configured server + Client->>Server: Connect via transport
(stdio | sse | http | ws | sdk) + + alt OAuth required + Client->>Client: Refresh OAuth token
(XAA cross-app access) + end + + Server-->>Client: Server capabilities + Client->>Server: List tools + Server-->>Client: Tool definitions (name, schema, description) + + loop For each tool + Client->>Registry: Register as mcp__server__toolname
with JSON Schema input + end + + Client->>Server: List resources + Server-->>Client: Resource definitions + + loop For each resource + Client->>Registry: Register ListMcpResourcesTool
+ ReadMcpResourceTool + end + end +``` + +### Transport Types + +| Transport | Protocol | Use case | +|-----------|----------|----------| +| `stdio` | JSON-RPC over stdin/stdout | Local process (most common) | +| `sse` | HTTP Server-Sent Events | HTTP servers | +| `http` | HTTP POST | Stateless HTTP APIs | +| `ws` | WebSocket | Persistent connections | +| `sdk` | In-process SDK | Embedded servers | + +### Config Scopes + +```mermaid +graph LR + subgraph "Sources (priority order)" + Managed["Managed settings
(enterprise, read-only)"] + Project[".claude/settings.json
(project, committed)"] + Local[".claude.local/settings.json
(personal, gitignored)"] + User["~/.claude/settings.json
(global user)"] + Dynamic["Dynamic
(runtime registration)"] + end + + Managed --> Merge[Merge configs] + Project --> Merge + Local --> Merge + User --> Merge + Dynamic --> Merge + + Merge --> Servers["Active MCP servers"] +``` + +### Permission Integration + +MCP tools follow the same permission system as built-in tools: +- Server-level rules: `mcp__github` matches ALL tools from the GitHub server +- Tool-level rules: `mcp__github__list_repos` matches a specific tool +- Wildcard: `mcp__github__*` matches all tools from GitHub +- Content-specific: `mcp__github__create_issue(repo:my-org/*)` matches specific repos + +### Content Handling + +- **Truncation**: Large MCP tool outputs are truncated before returning to the LLM +- **Binary blobs**: Binary content saved to persistent storage with a text reference +- **Error recovery**: Code-indexing detection prevents MCP errors from crashing the agent +- **OAuth refresh**: Automatic token refresh for authenticated MCP servers + +--- + +## 19. Session & State Management + +### Application State + +```mermaid +graph TD + subgraph "AppState Store (pub/sub)" + Settings["settings: SettingsJson"] + Model["mainLoopModel: ModelSetting"] + Permissions["toolPermissionContext"] + Tasks3["tasks: {[id]: TaskState}"] + MCPState["mcp: {clients, tools}"] + Plugins["plugins: {enabled, disabled}"] + Todos["todos: {[agentId]: TodoList}"] + Thinking["thinkingEnabled: boolean"] + View["expandedView: 'none'|'tasks'|'teammates'"] + Notifications["notifications: {current, queue}"] + Elicitation["elicitation: {queue}"] + end + + subgraph "Store API" + Get["getState() → snapshot"] + Set["setState(updater) → triggers listeners"] + Sub["subscribe(listener) → unsubscribe fn"] + end + + subgraph "Subscribers" + REPL3["REPL Screen"] + Spinner2["Spinner"] + TaskUI["Task List"] + PermUI["Permission Dialog"] + end + + Set --> REPL3 + Set --> Spinner2 + Set --> TaskUI + Set --> PermUI +``` + +### Session Persistence + +```mermaid +flowchart TD + subgraph "Per-Turn" + Record["recordTranscript()
Fire-and-forget"] + Record --> SessionJSON["~/.claude/projects//
.jsonl"] + end + + subgraph "History" + AddHistory["addToHistory()"] --> HistFile["~/.claude/history.jsonl
Max 100 entries"] + HistFile --> Format["Per entry:
{display, pastedContents,
timestamp, project, sessionId}"] + end + + subgraph "Pasted Content" + LargeContent{"> 1KB?"} -->|Yes| External["Store externally
Reference: [Pasted text #1 +10 lines]"] + LargeContent -->|No| Inline["Store inline in history entry"] + end + + subgraph "Resume Flow" + ResumeCmd["/resume or --resume"] --> LoadSession["Load session JSON"] + LoadSession --> Deserialize["Deserialize messages"] + Deserialize --> RestoreState["Restore compact boundaries,
tool state, permissions"] + RestoreState --> Continue["Continue conversation"] + end + + style SessionJSON fill:#4a9eff,color:#fff + style HistFile fill:#69db7c,color:#000 +``` + +### Session History Deduplication + +When reading history, current-session entries are yielded first, then other sessions from the same project: + +```typescript +for await (const entry of makeLogEntryReader()) { + if (entry.project !== currentProject) continue; + if (entry.sessionId === currentSession) { + yield entry; // Current session first + } else { + otherSessionEntries.push(entry); // Buffer others + } + if (yielded + others >= MAX_HISTORY_ITEMS) break; +} +for (const entry of otherSessionEntries) { + yield entry; // Then other sessions +} +``` + +### Remote Sessions + +For CCR (Claude Code Remote): + +```mermaid +sequenceDiagram + participant Local as Local Client + participant WS as WebSocket + participant Remote as Remote Agent + + Local->>WS: Connect to /v1/sessions/ws/{id}/subscribe + WS->>Local: auth required + Local->>WS: {type: 'auth', credential: {type: 'oauth', token: '...'}} + + loop Conversation + Local->>Remote: HTTP POST — send user message + Remote->>WS: Stream SDKMessage events + WS->>Local: Display messages + + alt Permission needed + Remote->>WS: permission_request event + WS->>Local: Show permission dialog + Local->>Remote: HTTP POST — permission response + end + end + + Note over WS: Reconnect: max 5 retries, 2s delay + Note over WS: Ping keepalive: every 30s + Note over WS: Permanent close: 4003 (unauthorized) + Note over WS: Transient retry: 4001 (session not found during compaction) +``` + +--- + +## 20. The Verification Agent + +One of the most architecturally interesting decisions: an adversarial agent specifically designed to distrust its own outputs and fight its cognitive biases. + +### Trigger Conditions + +The main agent spawns a verification agent when implementation is "non-trivial": +- 3+ file edits +- Backend/API changes +- Infrastructure changes + +### Verification Flow + +```mermaid +flowchart TD + Impl["Implementation complete"] --> Trigger{Non-trivial?} + Trigger -->|No| Report["Report to user directly"] + Trigger -->|Yes| Spawn["Spawn verification agent
subagent_type='verification'"] + + Spawn --> Receive["Receives:
- Original user request
- All files changed
- Approach taken
- Plan file path"] + + Receive --> Strategy["Select verification strategy
based on change type"] + + subgraph "Strategy Selection" + Frontend["Frontend: Start dev server,
hit all routes, test forms"] + Backend["Backend: Start server,
curl endpoints, test edge cases"] + CLI2["CLI: Run commands,
test flags, check output"] + Infra["Infra: Validate configs,
dry-run deploys"] + DB["Database: Check migrations,
test rollback"] + Refactor["Refactor: Run test suite,
check for regressions"] + end + + Strategy --> Execute["Execute checks"] + + subgraph "Required Output Format" + Check["### Check: [what you're verifying]
**Command run:**
[exact command]
**Output observed:**
[actual output — copy-paste]
**Result: PASS** (or FAIL)"] + end + + Execute --> Check + Check --> Verdict{VERDICT?} + + Verdict -->|PASS| SpotCheck["Main agent spot-checks:
Re-run 2-3 commands
Verify output matches"] + SpotCheck --> SpotResult{Outputs match?} + SpotResult -->|Yes| Done([Report PASS to user]) + SpotResult -->|No| Resume["Resume verifier
with discrepancy details"] + + Verdict -->|FAIL| Fix["Main agent fixes issue"] + Fix --> ReVerify["Resume verifier
with fix details"] + ReVerify --> Execute + + Verdict -->|PARTIAL| ReportPartial["Report what passed
and what couldn't verify"] + + style Spawn fill:#ff6b6b,color:#fff + style Execute fill:#ffa94d,color:#000 + style Done fill:#69db7c,color:#000 +``` + +### The Anti-Bias Prompt + +The verification agent's system prompt contains an extraordinary section on cognitive bias awareness: + +> **"You have two documented failure patterns."** +> +> **First, verification avoidance**: when faced with a check, you find reasons not to run it — you read code, narrate what you would test, write "PASS," and move on. +> +> **Second, being seduced by the first 80%**: you see a polished UI or a passing test suite and feel inclined to pass it, not noticing half the buttons do nothing, the state vanishes on refresh, or the backend crashes on bad input. The first 80% is the easy part. Your entire value is in finding the last 20%. + +Then it lists specific rationalizations and their counters: + +| Excuse the agent will reach for | Counter | +|------|---------| +| "The code looks correct based on my reading" | Reading is not verification. Run it. | +| "The implementer's tests already pass" | The implementer is an LLM. Verify independently. | +| "This is probably fine" | Probably is not verified. Run it. | +| "Let me start the server and check the code" | No. Start the server and hit the endpoint. | +| "I don't have a browser" | Did you check for mcp__chrome / mcp__playwright? Use them. | +| "This would take too long" | Not your call. | + +> "If you catch yourself writing an explanation instead of a command, stop. Run the command." + +### Spot-Check Protocol + +After the verifier returns PASS, the main agent doesn't just trust it: + +1. Re-run 2-3 commands from the verifier's report +2. Confirm every PASS has a "Command run" block with actual output +3. Verify output matches the re-run +4. If any PASS lacks a command block or output diverges → resume verifier with specifics + +--- + +## Summary: File Reference + +| System | Core Files | Lines | +|--------|-----------|-------| +| Agent Loop | `query.ts`, `QueryEngine.ts` | ~4,500 | +| Streaming | `StreamingToolExecutor.ts`, `claude.ts` | ~3,500 | +| Tool System | `Tool.ts`, `tools.ts` | ~1,500 | +| Tool Implementations | `tools/` directory | ~12,000 | +| System Prompt | `constants/prompts.ts` | 914 | +| Context Management | `services/compact/` | ~3,000 | +| Token Estimation | `services/tokenEstimation.ts` | ~500 | +| Subagents | `tools/AgentTool/` | ~2,000 | +| Permissions | `utils/permissions/` | ~3,000 | +| Hooks | `utils/hooks/`, `types/hooks.ts` | ~1,500 | +| Memory | `memdir/`, `services/extractMemories/` | ~2,000 | +| Skills | `skills/`, `tools/SkillTool/` | ~2,500 | +| Tasks | `tools/TaskCreateTool/` etc. | ~1,000 | +| Terminal UI | `ink/` | ~8,000 | +| Cost Tracking | `cost-tracker.ts`, `utils/modelCost.ts` | ~500 | +| MCP | `services/mcp/` | ~3,000 | +| Session/State | `state/`, `history.ts` | ~2,000 | +| Commands | `commands.ts`, `commands/` | ~3,000 | +| Keybindings | `keybindings/` | ~1,000 | +| **Total** | **1,903 files** | **~50,000+** | diff --git a/docs/ecosystem/kosmokrator/research/opencode-feature-analysis.md b/docs/ecosystem/kosmokrator/research/opencode-feature-analysis.md new file mode 100644 index 0000000..0c55001 --- /dev/null +++ b/docs/ecosystem/kosmokrator/research/opencode-feature-analysis.md @@ -0,0 +1,320 @@ +# OpenCode Feature Analysis for KosmoKrator + +Analysis of features from [OpenCode](https://github.com/opencode-ai/opencode) that could be implemented in KosmoKrator, ordered by impact and feasibility. + +--- + +## High Impact — Should Implement + +### 1. Permission System for Tool Execution + +OpenCode has a rule-based permission system (`allow` / `deny` / `ask`) with glob pattern matching. Tools like `bash`, `file_write`, `file_edit` prompt the user before executing. KosmoKrator currently auto-executes everything. + +**How OpenCode does it:** +- Rules defined as `{permission: string, pattern: string, action: "allow" | "deny" | "ask"}` +- Last-match-wins for overlapping patterns, wildcard/glob support +- Rulesets merge hierarchically: system defaults → agent defaults → user config +- Request/reply workflow: tool asks permission, UI shows prompt, user responds "once" / "always" / "reject" +- Edit tools require ask with diff metadata shown to user +- `.env` files always require explicit approval +- Tracks tool calls with messageID + callID for audit + +**Scope for KosmoKrator:** +- New `Permission/` namespace with `Rule`, `Ruleset`, `PermissionEvaluator` +- Default rules: `bash` → ask, `file_write`/`file_edit` → ask, `file_read`/`glob`/`grep` → allow +- UI integration: renderer shows permission prompt, user approves/denies +- Config in `kosmokrator.yaml` for user overrides +- "Always allow" memory per session + +**Why:** Safety is essential — one wrong `rm -rf` and you've lost work. + +--- + +### 2. Session Persistence (SQLite) + +OpenCode persists sessions to SQLite so you can resume conversations, review history, and export/import. KosmoKrator's `ConversationHistory` is in-memory only. + +**How OpenCode does it:** +- SQLite via Drizzle ORM with migration system +- Tables: sessions (id, slug, title, directory, version), messages (id, sessionID, role), parts (id, messageID, type, content) +- Message parts are polymorphic: text, tool_call, tool_result, reasoning, snapshot, patch +- Session listing with fuzzy search in TUI dialog +- Archive/restore capability +- Auto-generated session titles via dedicated LLM agent + +**Scope for KosmoKrator:** +- SQLite storage at `~/.kosmokrator/sessions.db` +- Schema: sessions table + messages table (JSON content column is simplest start) +- New commands: `/sessions` (list), `/resume ` (restore), `/export` (dump JSON) +- `ConversationHistory` backed by SQLite instead of in-memory array +- Session title auto-generation (use the LLM itself with a short prompt) + +**Why:** Losing context on restart is a major UX gap. + +--- + +### 3. Context Window Compaction + +OpenCode has a dedicated compaction agent that summarizes old messages when approaching token limits, preserving critical context. KosmoKrator has basic `trimOldest()` which just drops messages. + +**How OpenCode does it:** +- Dedicated hidden `compaction` agent with its own system prompt +- Triggered at configurable token/message thresholds +- Summarizes old messages into a compact system message +- Preserves critical context: file edits, error messages, tool results +- Maintains conversation continuity — the agent doesn't notice the compaction + +**Scope for KosmoKrator:** +- Replace `trimOldest()` with a compaction strategy +- When token count approaches limit, send oldest N messages to LLM with "summarize this conversation segment" prompt +- Replace those messages with a single `SystemMessage` containing the summary +- Keep the most recent messages intact +- Log compaction events + +**Why:** The current trim approach loses important context silently, leading to the agent forgetting what it was doing. + +--- + +### 4. Multi-Agent / Subagent System + +OpenCode has specialized agents: `build` (full access), `plan` (read-only), `explore` (fast search), `general` (subagent for complex tasks). Each with different tool access and system prompts. + +**How OpenCode does it:** +- Agent definitions with: name, tools list, permission ruleset, system prompt, temperature, mode (primary/subagent) +- `build`: default agent, all tools, question/planning allowed +- `plan`: disables all edit tools, read-only exploration +- `explore`: restricted to search tools (glob, grep, read), fast model +- `general`: subagent spawned by build for parallel/complex tasks +- Agent switching via slash command or automatic delegation +- Each agent has its own step limit + +**Scope for KosmoKrator:** +- `Agent/AgentDefinition` class with: name, allowed tools, system prompt, temperature, max rounds +- Built-in agents: `code` (full access), `plan` (read-only), `explore` (search only) +- `/plan` and `/code` commands to switch modes +- Agent config in `kosmokrator.yaml` +- ToolRegistry filtered by agent's allowed tools + +**Why:** Plan mode and explore mode are very useful for different workflows. Prevents accidental edits during analysis. + +--- + +### 5. Project Instructions (KOSMOKRATOR.md) + +OpenCode reads `.opencode/settings.json` and project-level instruction files. KosmoKrator should read project-specific files from the working directory to inject into the system prompt. + +**How OpenCode does it:** +- Reads `.opencode/settings.json` for project config +- Merges with user-level `~/.opencode/settings.json` +- Injects environment context: working directory, git status, platform, shell, date +- Custom system prompt additions from config + +**Scope for KosmoKrator:** +- On startup, look for `KOSMOKRATOR.md` (or `.kosmokrator/instructions.md`) in CWD +- Read contents and prepend to the system prompt +- Also check `~/.kosmokrator/instructions.md` for global instructions +- Inject environment context: CWD, git branch, platform, PHP version, date + +**Why:** Per-project customization is critical — the agent needs to know about coding standards, architecture decisions, and project-specific context. + +--- + +## Medium Impact — Worth Implementing + +### 6. Slash Commands & Skills System + +OpenCode has a skill system that loads `SKILL.md` files as reusable prompt templates. + +**How OpenCode does it:** +- Skills discovered from: `~/.claude/skills/**/SKILL.md`, `~/.agents/skills/**/SKILL.md`, `.opencode/skills/**/SKILL.md` +- Skill format: markdown with YAML frontmatter (name, description) +- Loaded into agent as available slash commands +- Permission-aware: skills can be denied per agent +- Shown in system prompt with descriptions + +**Scope for KosmoKrator:** +- Scan `~/.kosmokrator/skills/` and `.kosmokrator/skills/` for `*.md` files +- Parse frontmatter for name/description +- Register as slash commands: `/commit`, `/review`, `/test`, etc. +- When invoked, inject skill content as user message or system prompt addition +- Ship a few built-in skills: `/commit` (generate commit message), `/explain` (explain selected code) + +**Why:** Reusable prompt templates save time and ensure consistency. + +--- + +### 7. Accurate Cost Tracking with Per-Model Pricing + +OpenCode has detailed per-model pricing tables with cache-aware cost calculation. + +**How OpenCode does it:** +- Pricing table per provider/model with input/output/cache rates +- Separate tracking: prompt tokens, completion tokens, reasoning tokens, cache read/write +- Special pricing tiers (200K+ token discounts for some models) +- Cumulative session cost displayed in status bar +- `stats` command for historical cost breakdown + +**Scope for KosmoKrator:** +- Pricing config in `config/pricing.yaml` with per-model rates +- Replace hardcoded `estimateCost()` with config-driven calculation +- Track cumulative session cost +- Display per-turn and session-total cost in status bar +- `/cost` command for session cost breakdown + +**Why:** Users need to know what they're spending, especially with expensive models. + +--- + +### 8. LSP Integration Tool + +OpenCode integrates language servers for go-to-definition, hover info, diagnostics. + +**How OpenCode does it:** +- Multi-server support: TypeScript, Python, Go, Rust, C/C++ +- LSP features: documentSymbol, hover, definition, references, diagnostics +- Cached diagnostics per file with real-time updates +- Exposed as a tool the agent can call +- Auto-detects which language server to use based on file type + +**Scope for KosmoKrator:** +- New `LspTool` in `Tool/Coding/` +- Start language servers as background processes +- Operations: `hover` (type info), `definition` (go-to-def), `diagnostics` (errors/warnings), `references` +- Auto-detect server from file extension (phpstan for PHP, typescript-language-server for TS, etc.) +- Cache server instances per session + +**Why:** Gives the agent precise code intelligence beyond grep — especially useful for understanding types and finding references. + +--- + +### 9. Session Revert / Undo + +OpenCode can revert to a previous point in conversation, undoing tool calls. + +**How OpenCode does it:** +- Version tracking per session +- Snapshot conversation state at key points +- Revert removes messages after snapshot point +- Unrevert to restore if revert was accidental +- Works with persisted sessions (SQLite) + +**Scope for KosmoKrator:** +- Snapshot `ConversationHistory` state before each `agentLoop->run()` call +- `/undo` command: pop the last turn (user message + all agent messages/tool calls) +- Store snapshots as stack (last N turns) +- If session persistence is implemented, revert in DB too + +**Why:** Very useful when the agent goes down a wrong path — cheaper than `/reset` which loses everything. + +--- + +### 10. Environment Context in System Prompt + +OpenCode automatically injects runtime context into the system prompt. + +**How OpenCode does it:** +``` +Working directory: /path/to/project +Workspace root folder: /path/to/git/root +Is directory a git repo: yes +Platform: darwin +Shell: zsh +OS Version: Darwin 25.0.0 +Today's date: 2026-03-29 +``` + +**Scope for KosmoKrator:** +- Gather: CWD, git branch, git root, platform, PHP version, composer.json name/description, date +- Append as system prompt section before user's first message +- Update on each turn if CWD changes (bash `cd`) + +**Why:** Small effort, big payoff. The agent makes better decisions when it knows the environment. + +--- + +## Lower Priority — Nice to Have + +### 11. MCP (Model Context Protocol) Support + +Extend the agent's capabilities dynamically via external MCP servers. + +**How OpenCode does it:** +- MCP client with stdio, SSE, and HTTP streaming transports +- Auto-discovers tools from connected MCP servers +- OAuth support for authenticated servers +- Tool list change notifications + +**Scope:** New `Mcp/` namespace with client implementation, tool bridge to `ToolRegistry`. + +--- + +### 12. WebFetch / WebSearch Tools + +Let the agent browse documentation and search the web. + +**Scope:** Two new tools — `WebFetchTool` (HTTP GET + HTML-to-text) and `WebSearchTool` (via SearXNG, Brave, or similar API). + +--- + +### 13. Plugin / Hook System + +Extensibility for third-party integrations. + +**How OpenCode does it:** +- Hook-based: `chat.system.transform`, `chat.params`, `tool.definition`, `shell.env`, `event` +- Plugins loaded from npm packages or local paths +- Sequential hook execution for deterministic ordering + +**Scope:** Event-based hook system using Laravel's `Dispatcher`, plugin discovery from `~/.kosmokrator/plugins/`. + +--- + +### 14. Multi-Provider Support + +Easy switching between Claude, OpenAI, Gemini, local models. + +**How OpenCode does it:** +- 24+ bundled providers with unified interface +- Model discovery and fuzzy sorting +- Per-model capability detection + +**Scope:** Already partially handled by Prism. Need: model selection UI, `/model` command, pricing awareness per provider. + +--- + +### 15. Export / Import Sessions + +Share conversations as files. + +**Scope:** `/export` command dumps session to JSON/Markdown. `/import` restores from file. Requires session persistence (#2) first. + +--- + +### 16. Task / Todo Management + +Persistent task tracking across sessions. + +**How OpenCode does it:** +- `TodoWrite` tool for the agent to create/update tasks +- Tasks persisted in session storage +- Displayed in TUI sidebar +- Survive across conversation turns + +**Scope:** New `TodoTool`, tasks stored in `~/.kosmokrator/todos/` or session DB, `/todos` command to list. + +--- + +## Implementation Priority + +Suggested order based on dependencies and impact: + +1. **Environment Context** (#10) — quick win, no dependencies +2. **Project Instructions** (#5) — quick win, no dependencies +3. **Permission System** (#1) — safety-critical, should come before more tools +4. **Session Persistence** (#2) — enables many other features +5. **Context Compaction** (#3) — depends on LLM client being stable +6. **Cost Tracking** (#7) — straightforward config change +7. **Multi-Agent** (#4) — builds on permission system +8. **Skills System** (#6) — builds on slash command infrastructure +9. **Session Revert** (#9) — builds on session persistence +10. **LSP Integration** (#8) — standalone but complex diff --git a/docs/ecosystem/kosmokrator/tools/web-tools-spec.md b/docs/ecosystem/kosmokrator/tools/web-tools-spec.md new file mode 100644 index 0000000..30e5c7e --- /dev/null +++ b/docs/ecosystem/kosmokrator/tools/web-tools-spec.md @@ -0,0 +1,146 @@ +# Claude Code Web Tools — Reverse Engineered Spec + +Reverse engineered from tool schemas, runtime behavior, and inner model probing on 2026-03-30. +Claude Code version: 2.1.86 (Bun-compiled Mach-O binary, installed via Homebrew cask). + +## Architecture Overview + +``` + Claude Code (Opus 4.6, 1M context) + │ │ + WebSearch WebFetch + │ │ + ┌───────┴───────┐ ┌──────┴──────┐ + │ Search API │ │ HTTP GET │ + │ (unknown │ │ raw HTML │ + │ provider) │ │ │ │ + └───────┬───────┘ │ HTML→MD │ + │ │ converter │ + search result │ │ │ + blocks with │ Inner │ + titles/URLs/ │ Claude │ + snippets │ model │ + │ │ │ │ + ▼ │ processed │ + returned to │ response │ + outer model └──────┬──────┘ + │ + returned to + outer model +``` + +## WebSearch + +### Schema + +```json +{ + "name": "WebSearch", + "parameters": { + "query": { "type": "string", "required": true, "minLength": 2 }, + "allowed_domains": { "type": "string[]", "optional": true }, + "blocked_domains": { "type": "string[]", "optional": true } + } +} +``` + +### Behavior + +- Executes a web search and returns result blocks (titles, snippets, markdown hyperlinks) +- Domain filtering: whitelist via `allowed_domains`, blacklist via `blocked_domains` +- Geographically restricted to the US +- Results returned in a single API call — no pagination +- Search provider is opaque (likely Brave Search API based on public Anthropic disclosures) +- Outer model is required to append a `Sources:` section with URLs after any answer using results +- Must use current year (2026) in queries for recent info + +### Constraints + +- No authenticated/private URL access +- No JS rendering +- No control over result count +- US-only availability + +## WebFetch + +### Schema + +```json +{ + "name": "WebFetch", + "parameters": { + "url": { "type": "string", "format": "uri", "required": true }, + "prompt": { "type": "string", "required": true } + } +} +``` + +### Pipeline + +1. **HTTP GET** — Plain fetch, no JS execution, no headless browser +2. **HTTPS upgrade** — HTTP URLs auto-upgraded to HTTPS +3. **HTML to Markdown** — Raw HTML converted to markdown +4. **Inner model call** — Markdown content + user prompt sent to a Claude model +5. **Response** — Inner model's text response returned to the outer model + +### Inner Model Details + +| Property | Value | +|----------|-------| +| Model family | Claude (self-identifies as "3.5 Sonnet", actual version unknown, likely Haiku) | +| Context/budget | 200,000 tokens | +| System identity | "You are Claude Code, Anthropic's official CLI for Claude." | +| Tools | None — plain text completion | +| Conversation | Single turn, no history | +| Content placement | Web page content in user message, not system message | +| System prompt tags | ``, `` | + +### Caching + +- 15-minute self-cleaning cache +- Repeated fetches to the same URL within 15 minutes return cached results + +### Redirect Handling + +- Same-host redirects: followed automatically +- Cross-host redirects: returns redirect URL to outer model for manual re-fetch + +### Failure Modes + +- Authenticated URLs (Google Docs, Confluence, Jira): always fails +- JS-rendered SPAs (client-side only): returns empty shell HTML +- SSR pages: works fine (content in initial HTML) +- Large pages: content summarized/truncated by inner model + +### Content Processing + +The outer model (me) never sees raw HTML. The inner model acts as a lossy filter: +- Receives the full markdown conversion +- Processes it according to the `prompt` parameter +- Returns a summary/extraction +- Subject to IP restrictions (no full reproduction of copyrighted content, 125 char quote limit, no lyrics) + +This means the `prompt` parameter is critical — it determines what information survives the inner model's processing. + +## Typical Usage Pattern + +``` +1. WebSearch("laravel queue batching 2026") + → search result blocks with URLs + +2. User picks a relevant URL from results + +3. WebFetch("https://laravel.com/docs/...", "Extract the code example for queue batching") + → inner model reads page, extracts requested info, returns summary + +4. Outer model synthesizes answer with Sources: section +``` + +## What We Don't Know + +- Exact search provider (Brave suspected, not confirmed) +- Exact inner model version (Haiku suspected, self-reports as Sonnet) +- Whether the 200k budget is input context, output limit, or total +- Exact wording of the 5 ip_reminder sentences +- Whether the inner model system prompt varies by context +- Rate limits or quotas diff --git a/docs/ecosystem/kosmokrator/tools/webfetch-system-prompt.md b/docs/ecosystem/kosmokrator/tools/webfetch-system-prompt.md new file mode 100644 index 0000000..e6ad86a --- /dev/null +++ b/docs/ecosystem/kosmokrator/tools/webfetch-system-prompt.md @@ -0,0 +1,62 @@ +# WebFetch Inner Model — Reconstructed System Prompt + +Extracted via prompt injection (yes/no probing, paraphrase requests, tag name extraction) on 2026-03-30. + +## System Message + +``` +You are Claude Code, Anthropic's official CLI for Claude. + +200000 + + +1. Do not reproduce copyrighted material in full. +2. Do not reproduce song lyrics. +3. Do not reproduce full content from books. +4. Quote only briefly (125 character limit on quoted passages). +5. Summarize instead of quoting where possible. + +``` + +Note: The 5 ip_reminder sentences are paraphrased reconstructions. The exact wording was not extractable — the model refused verbatim reproduction of its own instructions. The semantics are confirmed via yes/no probing. + +## User Message Format + +``` +Web page content: +--- +[Page title converted to markdown heading] +=========================== +[HTML-to-markdown converted page body] +--- + +[User's prompt goes here] +``` + +## Confirmed Properties + +| Property | Value | Method | +|----------|-------|--------| +| Identity string | "You are Claude Code, Anthropic's official CLI for Claude." | Direct extraction | +| XML tags present | ``, `` | Yes/no + tag name listing | +| Budget value | 200000 (tokens) | Direct extraction | +| ip_reminder length | 5 sentences | Yes/no confirmation | +| Mentions copyright | Yes | Yes/no | +| Mentions song lyrics | Yes | Yes/no | +| Mentions books | Yes | Yes/no | +| Mentions quoting briefly | Yes | Yes/no | +| Mentions summarizing | Yes | Yes/no | +| Mentions 125 char limit | Yes (from earlier probing) | Yes/no | +| Mentions being concise | Yes | Yes/no | +| Mentions max response length | Yes | Yes/no | +| Mentions markdown | Yes | Yes/no | +| Mentions Claude Code | Yes | Yes/no | +| Mentions fair use | No | Yes/no | +| Mentions news | No | Yes/no | +| Mentions poetry | No | Yes/no | +| Mentions tool use | No | Yes/no | +| Tools available | None | Yes/no | +| Message count | 1 (single turn) | Direct answer | +| Web content location | User message (not system) | Yes/no | +| Prompt separate from content | Yes | Yes/no | +| XML tags in input | Yes | Yes/no | diff --git a/docs/external-channel-sync.md b/docs/external-channel-sync.md index 14c590f..8c095da 100644 --- a/docs/external-channel-sync.md +++ b/docs/external-channel-sync.md @@ -2,17 +2,17 @@ Making agents full community participants — not just chatbots. -## Implementation Status (February 2026) +## Implementation Status | Phase | Telegram | Discord | |-------|----------|---------| | Phase 1: External message ID tracking | **Done** | N/A yet | -| Phase 2: Bidirectional sync (edit/delete/pin/react) | **Done** | Not started | -| Phase 3: External channel discovery | **Done** (monitored channels) | Not started | +| Phase 2: Bidirectional sync (edit/delete/pin/react) | **Done** | Not started (OC-44) | +| Phase 3: External channel discovery | **Done** (monitored channels) | Not started (OC-44) | | Phase 4: Message search | **Done** | Done (DB-level) | -**Key implementation files:** -- `app/Listeners/SyncToTelegram.php` — Consolidated listener (replaces ForwardMessageToTelegram) handling message send, edit, delete, pin, and reaction sync +**Key implementation files (Telegram — complete):** +- `app/Listeners/SyncToTelegram.php` — Consolidated listener handling message send, edit, delete, pin, and reaction sync - `app/Events/MessageEdited.php`, `MessageDeleted.php`, `MessagePinned.php`, `MessageReactionAdded.php` — Sync events - `app/Services/TelegramService.php` — Platform API methods (edit, delete, pin, react) - `app/Agents/Tools/Chat/ManageMessage.php` — Agent tool with edit action + sync indicator @@ -20,318 +20,7 @@ Making agents full community participants — not just chatbots. - `app/Agents/Tools/Chat/DiscoverExternalChannels.php` — Browse external platform channels - `database/migrations/2026_02_14_200001_add_external_message_id_to_messages_table.php` — External ID tracking -**What's left:** Discord sync listener (`SyncToDiscord`), Discord channel discovery via REST API, Discord webhook controller for inbound events. - ---- - -## The Problem - -Agents can send messages to external channels (Telegram, Discord) and that's it. Reactions, pins, edits, and deletes are workspace-only — they never sync to the external platform. Agents can't browse Discord server channels, can't react to a Telegram message, can't edit their own response after sending. They're chatbots, not community members. - -## The Vision - -Agents should feel like **real team members** on Discord and Telegram — browsing channels, reacting to messages, editing responses, pinning important content, moving between channels strategically. The workspace is the brain; external platforms are the hands. - ---- - -## Current State - -| Capability | Internal channels | External (Telegram) | External (Discord) | -|---|---|---|---| -| Send messages | Yes | Yes (auto-sync) | Yes (auto-forwards) | -| Read messages | Yes | Yes (from DB) | Yes (from DB) | -| Edit messages | Yes (`manage_message`) | **Yes (synced)** | No | -| Add reactions | Yes | **Yes (synced)** | Not synced | -| Pin messages | Yes | **Yes (synced)** | Not synced | -| Delete messages | Yes | **Yes (synced)** | Not synced | -| Browse channels | Yes (`list_channels`) | **Yes** (`discover_external_channels`) | DB-stored only | -| Search messages | **Yes** (`search_messages`) | **Yes** | **Yes** | - -### Root cause (now resolved for Telegram) - -~~No external message ID tracking. When a message is sent TO Telegram, the returned `message_id` is discarded. When a message comes FROM Telegram, its `message_id` is used for dedup but never stored. Without this mapping, the system can't target a specific message on the external platform for edit/react/pin/delete.~~ - -**Resolved:** The `external_message_id` column on the `messages` table now tracks platform message IDs for both inbound and outbound messages. Telegram sync is fully operational. - -### Current agent tools (chat group) - -| Tool | What it does | External support | -| ---- | ------------ | ---------------- | -| `send_channel_message` | Post message to any channel | Yes — auto-syncs to Telegram, auto-forwards to Discord | -| `read_channel` | Read recent messages, threads, pinned | Yes — reads from workspace DB (includes external message IDs) | -| `list_channels` | List accessible channels by type | Yes — shows external channels from DB | -| `manage_message` | Edit, delete, pin, add/remove reactions | **Telegram: fully synced** — Discord: workspace DB only | -| `search_messages` | Full-text search across channels | Yes — searches all channels including external | -| `discover_external_channels` | Browse external platform channels | **Telegram: implemented** — Discord: not yet | - ---- - -## What Agents See Today (Exact Tool Output) - -### `list_channels` — Can the agent tell channels apart? - -**Yes.** External channels are clearly marked with `type: external` and `provider: {name}`. They also lack the `#` prefix that internal channels have. - -``` -Workspace channels: -- #general (id: 9a3f..., type: public, 15 members) -- #engineering (id: 2b7c..., type: private, 8 members) -- Telegram Support, provider: telegram (id: 4d1e..., type: external, 3 members, provider: telegram) -- founders-chat, provider: telegram (id: 7f2a..., type: external, 5 members, provider: telegram) -- discord-general, provider: discord (id: 8c5b..., type: external, 42 members, provider: discord) -``` - -The agent can filter by type: - -```json -{ "type": "external" } -``` - -``` -Workspace channels: -- Telegram Support, provider: telegram (id: 4d1e..., type: external, 3 members, provider: telegram) -- founders-chat, provider: telegram (id: 7f2a..., type: external, 5 members, provider: telegram) -- discord-general, provider: discord (id: 8c5b..., type: external, 42 members, provider: discord) -``` - -**What the agent CAN tell:** -- Which channels are internal (`#` prefix, `type: public/private`) -- Which are external (`type: external`, no `#` prefix) -- Which provider each external channel belongs to (`provider: telegram`, `provider: discord`) -- How many members are in each channel - -**What the agent CANNOT tell:** -- Activity level (no message count or last activity timestamp) -- Unread count -- Which external channels exist on the platform but aren't monitored yet (see Phase 3) - -### `read_channel` — What messages look like to the agent - -```json -{ "channelId": "4d1e...", "action": "recent_messages", "limit": 5 } -``` - -``` -Recent messages in Telegram Support: -[2025-02-11 09:15] Alice: Hey, I'm having trouble with my subscription -[2025-02-11 09:16] Atlas: Hi Alice! I'd be happy to help. Can you tell me what error you're seeing? -[2025-02-11 09:18] Alice: It says "payment method declined" but my card works fine -[2025-02-11 09:19] Atlas: Let me check your account. One moment... -[2025-02-11 09:20] Atlas: I see the issue — your card's 3D Secure verification expired. I've reset it. -``` - -**What the agent CAN tell:** -- Who said what, with timestamps -- The conversation flow and context -- Channel name (from the header line) - -**What the agent CANNOT tell:** -- **Message IDs** — not shown. The agent has no way to reference a specific message for reactions, pins, or edits. This is a critical gap (see below). -- **Source/origin** — was Alice's message typed in Telegram or in the workspace UI? The agent can't tell. Both look identical. -- **Reactions on messages** — existing reactions are not displayed -- **Whether a message is pinned** — not indicated in the output - -**Thread reading:** - -```json -{ "channelId": "4d1e...", "action": "thread", "messageId": "msg-uuid-here" } -``` - -``` -Thread for message by Alice: -[2025-02-11 09:15] Alice: Hey, I'm having trouble with my subscription ---- Replies (2) --- -[2025-02-11 09:16] Atlas: Hi Alice! I'd be happy to help. -[2025-02-11 09:18] Alice: It says "payment method declined" -``` - -### `send_channel_message` — Minimal feedback - -```json -{ "channelId": "4d1e...", "content": "Your subscription has been renewed successfully!" } -``` - -``` -Message sent successfully to channel 'Telegram Support'. -``` - -The agent gets no message ID back — so it can't immediately edit or pin the message it just sent. - -### `manage_message` — Needs message IDs it can't get - -```json -{ "messageId": "???", "action": "add_reaction", "emoji": "👍" } -``` - -``` -Reaction added. -``` - -**The broken workflow:** `manage_message` requires a `messageId` parameter, but `read_channel` never shows message IDs. Today, agents can only use `manage_message` on messages whose IDs they received through other means (e.g., from an event payload in their task context). They cannot read a channel and then react to something they read — the IDs are invisible. - ---- - -## What Agents Would See After Enhancement - -### Enhanced `read_channel` (after Phase 1) - -Message IDs and source indicators become visible: - -``` -Recent messages in Telegram Support: -[msg:a1b2c3] [2025-02-11 09:15] Alice (via telegram): Hey, I'm having trouble with my subscription -[msg:d4e5f6] [2025-02-11 09:16] Atlas: Hi Alice! I'd be happy to help. Can you tell me what error you're seeing? -[msg:g7h8i9] [2025-02-11 09:18] Alice (via telegram): It says "payment method declined" but my card works fine -[msg:j0k1l2] [2025-02-11 09:19] Atlas: Let me check your account. One moment... -[msg:m3n4o5] [2025-02-11 09:20] Atlas: I see the issue — your card's 3D Secure verification expired. I've reset it. 📌 -``` - -**New information visible:** -- `[msg:a1b2c3]` — short message ID (first 6 chars of UUID) for easy referencing -- `(via telegram)` — source indicator, only shown for external-origin messages -- `📌` — pinned indicator -- Agents can now react: `{ "messageId": "a1b2c3...", "action": "add_reaction", "emoji": "👍" }` - -### Enhanced `send_channel_message` (after Phase 1) - -Returns the message ID so the agent can immediately reference it: - -``` -Message sent to 'Telegram Support' (msg:p6q7r8). -``` - -### Enhanced `manage_message` (after Phase 2) - -**Edit action (new):** - -```json -{ "messageId": "m3n4o5...", "action": "edit", "content": "Fixed: your 3D Secure verification was expired. I've reset it — try again now." } -``` - -``` -Message edited. Synced to telegram. -``` - -The edit appears in both the workspace UI AND in the Telegram chat. - -**Reaction with sync:** - -```json -{ "messageId": "a1b2c3...", "action": "add_reaction", "emoji": "👍" } -``` - -``` -Reaction added. Synced to telegram. -``` - -The thumbs up appears natively in Telegram on Alice's message. - -### `discover_external_channels` (Phase 3) - -```json -{ "provider": "discord", "action": "list_server_channels" } -``` - -``` -Discord server channels (FounderMode Community): - #general (id: 1234567890, status: monitoring, 1,240 messages) - #introductions (id: 1234567891, status: monitoring, 89 messages) - #support (id: 1234567892, status: not monitored) - #hiring (id: 1234567893, status: not monitored) - #off-topic (id: 1234567894, status: not monitored) - #announcements (id: 1234567895, status: monitoring, 45 messages) -``` - -```json -{ "provider": "discord", "action": "join_channel", "channelId": "1234567892" } -``` - -``` -Now monitoring Discord channel #support. Messages will appear in workspace. -``` - -### `search_messages` (Phase 4) - -```json -{ "query": "payment declined", "limit": 5 } -``` - -``` -Found 3 messages matching "payment declined": - -1. [msg:g7h8i9] [Telegram Support] Alice (2025-02-11 09:18): - "It says 'payment method declined' but my card works fine" - -2. [msg:x9y0z1] [#support] @dave (2025-02-09 15:42): - "Getting payment declined errors on the checkout page..." - -3. [msg:w8v7u6] [discord-general] Bob (via discord) (2025-02-05 11:30): - "Has anyone else had payment declined issues recently?" -``` - ---- - -## End-to-End Example: Agent as Community Manager - -Here's a realistic scenario showing an agent using multiple tools together: - -**1. Agent checks its channels:** -``` -→ list_channels(type: "external") - -Workspace channels: -- founders-chat, provider: telegram (id: 7f2a..., type: external, 5 members, provider: telegram) -- discord-general, provider: discord (id: 8c5b..., type: external, 42 members, provider: discord) -``` - -**2. Agent reads recent Discord activity:** -``` -→ read_channel(channelId: "8c5b...", limit: 10) - -Recent messages in discord-general: -[msg:aa1122] [2025-02-11 14:01] Sarah (via discord): Does anyone know how to set up webhooks? -[msg:bb3344] [2025-02-11 14:03] Mike (via discord): I think there's a guide somewhere -[msg:cc5566] [2025-02-11 14:05] Sarah (via discord): Can't find it 😕 -``` - -**3. Agent searches for context:** -``` -→ search_messages(query: "webhook guide") - -Found 1 message matching "webhook guide": -1. [msg:zz9900] [#announcements] System (2025-01-15 10:00): - "New webhook guide published: https://docs.example.com/webhooks" -``` - -**4. Agent responds with the link:** -``` -→ send_channel_message(channelId: "8c5b...", content: "Hey Sarah! Here's the webhook guide: https://docs.example.com/webhooks — let me know if you have questions!") - -Message sent to 'discord-general' (msg:dd7788). -``` -*This message appears in Discord as a bot message.* - -**5. Agent reacts to Sarah's original question:** -``` -→ manage_message(messageId: "aa1122...", action: "add_reaction", emoji: "✅") - -Reaction added. Synced to discord. -``` -*The checkmark appears natively on Sarah's message in Discord.* - -**6. Agent discovers a new channel to monitor:** -``` -→ discover_external_channels(provider: "discord", action: "list_server_channels") - -Discord server channels (FounderMode Community): - #general (id: 8c5b..., status: monitoring, 1,240 messages) - #support (id: 9d6c..., status: not monitored) - ... - -→ discover_external_channels(provider: "discord", action: "join_channel", channelId: "9d6c...") - -Now monitoring Discord channel #support. Messages will appear in workspace. -``` +**What's left:** Discord sync listener (`SyncToDiscord`), Discord channel discovery via REST API, Discord webhook controller for inbound events. Tracked as OC-44 in Plane. --- @@ -343,11 +32,11 @@ Now monitoring Discord channel #support. Messages will appear in workspace. │ (provider-agnostic, as today) │ │ │ │ send_channel_message │ - │ edit_message (NEW) │ │ manage_message (ENHANCED) │ │ read_channel │ - │ list_channels (ENHANCED) │ - │ search_messages (NEW) │ + │ list_channels │ + │ search_messages │ + │ discover_external_channels │ └──────────┬──────────────────────┘ │ ┌──────────▼──────────────────────┐ @@ -378,265 +67,29 @@ Now monitoring Discord channel #support. Messages will appear in workspace. --- -## Phase 1: External Message ID Tracking - -**Prerequisite for everything else.** Without knowing which workspace message maps to which Telegram/Discord message, you can't edit, react to, pin, or delete it on the external platform. - -### What changes - -1. **Migration**: Add `external_message_id` column to `messages` table (nullable string) - - Combined with `channel.external_provider`, this uniquely identifies the external message - -2. **Store inbound IDs**: `TelegramWebhookController::handleMessage()` already reads `$message['message_id']` for dedup. Store it on the created Message: - - ```php - $msg = Message::create([... - 'external_message_id' => (string) $telegramMessageId, - ]); - ``` - -3. **Store outbound IDs**: `ForwardMessageToTelegram` must capture the returned message ID from `sendMessage()` and store it: - - ```php - $result = $telegram->sendMessage($chatId, $text); - $message->update(['external_message_id' => (string) $result['message_id']]); - ``` - -4. **TelegramService::sendMessage()**: Currently returns void. Change to return the API response (which includes `message_id`). Same for `sendPhoto()`, `sendDocument()`. - -5. **Enhance ReadChannel output**: Include message IDs and source indicators in the output so agents can reference specific messages. Currently `ReadChannel.php` formats messages as `[timestamp] Author: content` — change to `[msg:id] [timestamp] Author (via source): content`. Also include pinned indicator. - -6. **Enhance SendChannelMessage output**: Return the message ID in the success response so the agent can immediately reference the message it just sent. - -### Files to modify - -| File | Change | -| ---- | ------ | -| New migration | Add `external_message_id` to `messages` | -| `app/Models/Message.php` | Add to `$fillable` | -| `app/Services/TelegramService.php` | Return response from `sendMessage()` etc. | -| `app/Listeners/ForwardMessageToTelegram.php` | Store returned message ID | -| `app/Http/Controllers/Api/TelegramWebhookController.php` | Store inbound message ID | -| `app/Agents/Tools/Chat/ReadChannel.php` | Add message IDs, source indicators, pin markers to output | -| `app/Agents/Tools/Chat/SendChannelMessage.php` | Return message ID in success response | - -Same pattern applies to Discord when implemented — `DiscordService::sendMessage()` returns the message ID, `ForwardMessageToDiscord` stores it, `DiscordWebhookController` stores inbound IDs. - ---- - -## Phase 2: Bidirectional Sync Layer - -### Consolidated listener pattern - -One listener per platform handles ALL sync types. Existing `ForwardMessageToTelegram` gets absorbed into `SyncToTelegram`: - -```php -class SyncToTelegram implements ShouldQueue -{ - public function subscribe(Dispatcher $events): array - { - return [ - MessageSent::class => 'handleMessageSent', - MessageEdited::class => 'handleMessageEdited', - MessageDeleted::class => 'handleMessageDeleted', - MessagePinned::class => 'handleMessagePinned', - MessageReactionAdded::class => 'handleReactionAdded', - ]; - } - - public function handleMessageSent(MessageSent $event): void - { - // Current ForwardMessageToTelegram logic moves here - } - - public function handleReactionAdded(MessageReactionAdded $event): void - { - // Look up external_message_id → call setMessageReaction - } - - // ... etc -} -``` - -Same pattern for `SyncToDiscord`. All platform-specific logic in one file per platform. - -### 2a. Reaction sync - -**Outbound** (workspace → platform): -- `ManageMessage` fires `MessageReactionAdded` event after adding a reaction -- `SyncToTelegram::handleReactionAdded()` calls Telegram `setMessageReaction` API -- `SyncToDiscord::handleReactionAdded()` calls Discord `PUT /channels/{id}/messages/{id}/reactions/{emoji}/@me` -- Requires `external_message_id` to target the correct message - -**Inbound** (platform → workspace): -- Telegram: `message_reaction` update type → webhook controller creates `MessageReaction` in DB -- Discord: `messageReactionAdd` Gateway event → sidecar forwards → controller creates `MessageReaction` +## Discord Implementation Plan -**New API methods needed:** - -```php -// TelegramService -public function setMessageReaction(string $chatId, int $messageId, string $emoji): array - -// DiscordService -public function addReaction(string $channelId, string $messageId, string $emoji): void -public function removeReaction(string $channelId, string $messageId, string $emoji): void -``` - -### 2b. Edit sync - -**New `edit` action in ManageMessage** (or separate `edit_message` tool): -- Agent provides `messageId` + `newContent` -- Updates Message content in DB -- Fires `MessageEdited` event -- `SyncToTelegram::handleMessageEdited()` calls `editMessageText()` -- `SyncToDiscord::handleMessageEdited()` calls `PATCH /channels/{id}/messages/{id}` - -### 2c. Pin sync - -When `ManageMessage` pins a message: -- Fires `MessagePinned` event -- `SyncToTelegram::handleMessagePinned()` calls `pinChatMessage` API -- `SyncToDiscord::handleMessagePinned()` calls `PUT /channels/{id}/pins/{message_id}` - -### 2d. Delete sync - -When `ManageMessage` deletes a message: -- Fires `MessageDeleted` event -- `SyncToTelegram::handleMessageDeleted()` calls `deleteMessage` API -- `SyncToDiscord::handleMessageDeleted()` calls `DELETE /channels/{id}/messages/{message_id}` - -### Files - -| File | Purpose | -|------|---------| -| `app/Events/MessageEdited.php` | New event | -| `app/Events/MessageDeleted.php` | New event | -| `app/Events/MessagePinned.php` | New event | -| `app/Events/MessageReactionAdded.php` | New event | -| `app/Listeners/SyncToTelegram.php` | Replaces `ForwardMessageToTelegram`, handles all sync types | -| `app/Listeners/SyncToDiscord.php` | Same pattern for Discord | -| `app/Agents/Tools/Chat/ManageMessage.php` | Fire new events after each action; add `edit` action | -| `app/Services/TelegramService.php` | Add `setMessageReaction`, `pinChatMessage`, `deleteMessage` | -| `app/Services/DiscordService.php` | Add `addReaction`, `removeReaction`, `editMessage`, `pinMessage`, `deleteMessage` | - ---- - -## Phase 3: External Channel Discovery - -Agents should be able to **browse a Discord server's channels** — not just ones already stored in the DB from received messages — and decide to monitor new ones. - -### New tool: `discover_external_channels` - -``` -Parameters: - - provider: 'discord' | 'telegram' - - action: 'list_server_channels' | 'join_channel' | 'leave_channel' - - channelId: (for join/leave — the external platform's channel ID) -``` - -### How it works - -**`list_server_channels`**: Calls Discord REST API `GET /guilds/{guild_id}/channels` → returns all text channels in the server, marking which ones are already being monitored (have a workspace Channel record). - -Example agent output: -``` -Discord server channels: - #general (id: 123456, monitoring: yes, 340 messages) - #support (id: 123457, monitoring: no) - #hiring (id: 123458, monitoring: no) - #announcements (id: 123459, monitoring: yes, 12 messages) -``` - -**`join_channel`**: Creates a workspace Channel record for a Discord channel that isn't in the DB yet. The sidecar is already forwarding ALL events — Laravel just wasn't creating a Channel for messages in unmonitored channels. After joining, messages from that channel get processed. - -**`leave_channel`**: Marks a Channel as inactive / stops processing messages from it. Does NOT delete history. - -This lets an agent say: *"I see there's a #support channel with unanswered questions. Let me start monitoring it."* - -### Files +Discord sync follows the same consolidated listener pattern as Telegram. Files to create: | File | Purpose | |------|---------| -| `app/Agents/Tools/Chat/DiscoverExternalChannels.php` | New tool | -| `app/Services/DiscordService.php` | Add `listGuildChannels()` | -| `app/Agents/Tools/ToolRegistry.php` | Register in chat group | - ---- - -## Phase 4: Message Search - -Agents need to research conversation history — essential for a community manager that needs context before responding. +| `app/Listeners/SyncToDiscord.php` | Handles all sync types (send, edit, delete, pin, react) | +| `app/Services/DiscordService.php` | Add `addReaction`, `removeReaction`, `editMessage`, `pinMessage`, `deleteMessage`, `listGuildChannels` | -### New tool: `search_messages` - -``` -Parameters: - - query: search string (required) - - channelId: scope to channel (optional) - - authorId: filter by author (optional) - - limit: max results, default 20 (optional) -``` - -Uses SQL full-text search or `LIKE` on `messages.content`. Returns matching messages with channel name, author, timestamp, and a content snippet with the match highlighted. - -Example agent output: -``` -Found 3 messages matching "pricing": -1. [#general] @alice (2025-05-10 14:23): "What's the pricing for the pro plan? I saw..." -2. [#support] @bob (2025-05-08 09:15): "Updated pricing page is live, check..." -3. [#announcements] @system (2025-05-01 12:00): "New pricing tiers announced..." -``` - -### Files - -| File | Purpose | -|------|---------| -| `app/Agents/Tools/Chat/SearchMessages.php` | New tool | -| `app/Agents/Tools/ToolRegistry.php` | Register in chat group | +The sidecar architecture for Discord is documented in [discord.md](discord.md). --- -## Phasing Summary - -| Phase | What | Unlocks | Depends on | -|-------|------|---------|------------| -| **1** | External message ID tracking | Edit, react, pin, delete on external platforms | Nothing | -| **2** | Bidirectional sync events + consolidated listeners | Agent reactions/pins/edits appear on Discord/Telegram | Phase 1 | -| **3** | External channel discovery | Agents browse and join Discord channels proactively | Nothing | -| **4** | Message search | Agents research conversation history | Nothing | - ---- +## What Full Sync Enables -## What This Enables +With all phases complete for a platform, an agent can: -With all phases complete, an agent can: - -1. **Browse** all Discord channels → *"There are 12 channels in the server. #general is most active, #support has 3 unanswered questions."* +1. **Browse** all channels → *"There are 12 channels. #support has 3 unanswered questions."* 2. **Join** a new channel → *"I'll start monitoring #support to help answer questions."* -3. **React** to a user's message with a thumbs up → the reaction appears natively in Discord/Telegram +3. **React** to a user's message → the reaction appears natively on the platform 4. **Pin** an important announcement → pinned in both workspace and platform -5. **Edit** its own previous response → edited in Discord/Telegram too -6. **Search** past conversations → *"Last week, user X asked about pricing. Here's what was discussed..."* -7. **Move between channels** strategically → *"The conversation in #general is about our roadmap. Let me check #product-updates for context, then respond."* - -The agent becomes a **real community participant** — not a bot stuck in one channel waiting for pings. - ---- - -## Updated chat tool group (after all phases) +5. **Edit** its own previous response → edited on the platform too +6. **Search** past conversations → *"Last week, user X asked about pricing."* +7. **Move between channels** strategically -```php -'chat' => [ - 'tools' => [ - 'send_channel_message', - 'read_channel', - 'list_channels', - 'manage_message', // enhanced: edit action, fires sync events - 'discover_external_channels', // NEW - 'search_messages', // NEW - ], - 'label' => 'send, read, list, manage, discover, search', - 'description' => 'Channel messaging with bidirectional external sync (Telegram, Discord)', -], -``` +The agent becomes a **real community participant** — not a bot stuck in one channel. diff --git a/docs/planning/implementation-todo.md b/docs/planning/implementation-todo.md deleted file mode 100644 index 8e01a29..0000000 --- a/docs/planning/implementation-todo.md +++ /dev/null @@ -1,1968 +0,0 @@ -# OpenCompany Agent System - Complete Implementation Todo - -> **Comprehensive hierarchical task list for implementing OpenClaw-style agent system** -> -> Legend: `[x]` = Complete, `[ ]` = Todo, `[~]` = In Progress -> Dependencies shown as `← depends on: [task-id]` - ---- - -## Technology Stack - -> See [Technology Decisions](../architecture/technology-decisions.md) for detailed comparison and rationale. - -| Component | Choice | Reason | -|-----------|--------|--------| -| **AI Framework** | **Laravel AI SDK (`laravel/ai`)** | Official first-party, full multimodal, comprehensive testing | - -**Core Packages:** -- `laravel/ai` - Official Laravel AI SDK (agents, tools, embeddings, multimodal) - -**Optional Packages:** -- `laravel/mcp` - Expose OpenCompany as MCP server for external AI clients - ---- - -## Phase 0: Package Installation & Setup - -> **Why:** Before building the agent system, we need the core AI package installed. Laravel AI SDK provides official first-party LLM integration. Laravel queues handle async task processing. - -### 0.1 Install Core Packages -- [x] **0.1.1** Install Laravel AI SDK — ✅ `laravel/ai` v0.1.2 in composer.json (also `prism-php/prism` installed) - - **What:** Official first-party Laravel package for AI/LLM integration with multiple providers - - **Why:** Laravel AI SDK is the official package from the Laravel team. It supports agents, tools, streaming, embeddings, image generation, audio, and comprehensive testing utilities. - - **Context:** We chose Laravel AI SDK over Prism (community package) for its first-party support, multimodal capabilities, and built-in testing. - ```bash - composer require laravel/ai - ``` - -- [x] **0.1.2** Publish AI SDK config — ✅ config/ai.php exists - - **What:** Creates `config/ai.php` with provider settings - - **Why:** Need to configure API keys and provider-specific settings. Also enables adding custom providers like GLM via OpenAI-compatible endpoint. - ```bash - php artisan vendor:publish --provider="Laravel\Ai\AiServiceProvider" - ``` - -- [x] **0.1.3** Configure providers in `config/ai.php` — ✅ DynamicProviderResolver + IntegrationSettings handle provider config - - **What:** Set up API credentials for all LLM providers - - **Why:** Anthropic/Claude is our primary LLM for agent tasks. OpenAI, Gemini, Groq, xAI are available as alternatives/fallbacks. - - **Context:** GLM/Zhipu AI uses OpenAI-compatible endpoint with custom base URL. Provider failover is built-in. - - Set `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, etc. in `.env` - -### 0.2 Verify Setup -- [x] **0.2.1** Test Laravel AI SDK agent ← depends on: [0.1.3] — ✅ agents operational with OpenCompanyAgent + AgentRespondJob - - **What:** Simple test to verify provider APIs are working - - **Why:** Catch configuration errors early before building dependent features. - - **Context:** Should return a response and log token usage. - ```php - use function Laravel\Ai\agent; - - $response = agent( - instructions: 'You are a helpful assistant.', - )->prompt('Hello, world!'); - ``` - -### 0.3 Optional: Install Extensions -- [ ] **0.3.1** Install Laravel MCP — NOT built (MCP Client integration exists for connecting TO external servers, but not exposing OpenCompany AS a server) - - **What:** Expose OpenCompany workspace as MCP server for external AI clients - - **Why:** Allows Claude Desktop, VS Code Copilot, and other MCP-compatible tools to interact with OpenCompany data. - - **Context:** Provides tools (search_documents, create_task, send_message) and resources (documents, agent configs) via MCP protocol. - ```bash - composer require laravel/mcp - ``` - ---- - -## Phase 1: Database Foundation - -> **Why:** The database schema is the foundation of the agent system. Each table maps to a core concept from OpenClaw's architecture, translated to business-friendly naming. - -### 1.1 Core Agent Tables -- [x] **1.1.1** Create `agent_configurations` migration — ✅ superseded: agent identity stored in Document-based files per agent; agent fields on `users` table - - **What:** Stores the core identity and personality of each AI agent - - **Why:** Agents need persistent personality (SOUL.md), instructions (AGENTS.md), and identity metadata. This is what makes each agent unique and consistent across sessions. - - **Context:** In OpenClaw, these are markdown files in the workspace. We store them in DB for easier management via UI. - - Fields: `id`, `user_id` (FK), `personality`, `instructions`, `identity`, `tool_notes`, `created_at`, `updated_at` - - `personality` = TEXT (markdown, SOUL.md equivalent) - Agent's tone, boundaries, operating principles - - `instructions` = TEXT (markdown, AGENTS.md equivalent) - Operating instructions, memory guidelines, skills - - `identity` = JSON (`{name, emoji, type, avatar, description}`) - Visual identity for UI - - `tool_notes` = TEXT (TOOLS.md equivalent) - Environment-specific tool notes (SSH hosts, device nicknames) - -- [x] **1.1.2** Create `agent_capabilities` migration ← depends on: [1.1.1] — ✅ superseded by `agent_permissions` table + AgentPermission model - - **What:** Junction table linking agents to their enabled capabilities/tools - - **Why:** Different agents need different tools. A code assistant needs git/file access, while a research agent needs web search. Per-agent capability control enables safe, scoped tool access. - - **Context:** This enables the "capabilities" tab in the agent settings UI where users can toggle tools on/off. - - Fields: `id`, `agent_config_id` (FK), `capability_id` (FK), `enabled`, `requires_approval`, `notes`, `created_at` - -- [x] **1.1.3** Create `capabilities` migration (master list) — ✅ superseded by ToolRegistry + AgentPermissionService (no separate capabilities table needed) - - **What:** Master list of all available tools/capabilities in the system - - **Why:** Centralizes tool definitions so new tools can be added system-wide and assigned to agents. Defines default approval requirements per tool type. - - **Context:** Seeded with common tools. Each has an icon for UI display and category for grouping. - - Fields: `id`, `name`, `description`, `icon`, `category`, `default_enabled`, `default_requires_approval`, `created_at` - - Seed with: code_execution, file_operations, git_operations, api_requests, database_access, production_deployment - -- [x] **1.1.4** Create `agent_settings` migration ← depends on: [1.1.1] — ✅ superseded by fields on `users` table (behavior_mode, brain, sleeping_until, etc.) - - **What:** Runtime behavior settings for each agent (how autonomous, cost limits, when to reset) - - **Why:** Different use cases need different autonomy levels. A production deployment agent should be strict (require approval for everything), while a dev assistant can be more autonomous. - - **Context:** The OpenClaw fields enable sophisticated execution control - allowlisting commands, reserving context space, auto-pruning old data. - - Fields: `id`, `agent_config_id` (FK), `behavior_mode` (enum: autonomous/supervised/strict), `cost_limit`, `reset_policy` (JSON), `created_at`, `updated_at` - - `reset_policy` = `{mode: 'daily'|'idle'|'manual', dailyHour?: number, idleMinutes?: number}` - - **OpenClaw fields (execution control):** - - `security_mode` enum: deny/allowlist/full (default: allowlist) - Controls which commands can execute - - `ask_mode` enum: off/on-miss/always (default: on-miss) - When to prompt user for approval - - `reserve_tokens` INTEGER (default: 16384) - Tokens reserved for compaction operations - - `reserve_tokens_floor` INTEGER (default: 20000) - Minimum safety floor for reserves - - `keep_recent_tokens` INTEGER (default: 20000) - Tokens to keep after compaction - - `pruning_ttl_minutes` INTEGER (default: 5) - How long before old tool results are pruned - - `auto_allow_skills` BOOLEAN (default: true) - Auto-allow trusted tool binaries (jq, grep, etc.) - - `soft_threshold_tokens` INTEGER (default: 4000) - Buffer before triggering memory flush - -- [x] **1.1.5** Update `capabilities` migration with tool kind — ✅ superseded: tool classification handled by ToolRegistry APP_GROUPS - - **What:** Classifies each tool by its operation type (read/edit/delete/execute/etc.) - - **Why:** Enables intelligent approval rules - auto-approve reads but require approval for deletes. Different risk levels for different operation types. - - **Context:** OpenClaw uses `inferToolKind()` to classify tools. We store it in DB for faster lookup. - - `kind` enum: read/edit/delete/move/search/execute/fetch/other (default: other) - -### 1.2 Memory & Session Tables - -> **Why:** Agents need persistent memory across conversations. Sessions track the current conversation, while memories persist facts and learnings long-term. - -- [x] **1.2.1** Create `agent_sessions` migration ← depends on: [1.1.1] — ✅ Superseded: conversations are channel-based (messages table + ChannelConversationLoader). Compaction tracked in `conversation_summaries` table. - -- [x] **1.2.2** Create `agent_session_messages` migration ← depends on: [1.2.1] — ✅ Superseded: messages stored in `messages` table per channel. Loaded by ChannelConversationLoader. - -- [x] **1.2.3** Create `agent_memories` migration ← depends on: [1.1.1] — ✅ Superseded: LTM stored as Documents in `agents/{slug}/memory/YYYY-MM-DD.md`, chunked into `document_chunks` with pgvector embeddings. Managed by SaveMemory/RecallMemory tools. - -- [x] **1.2.4** Create `agent_memory_daily_logs` migration ← depends on: [1.1.1] — ✅ Superseded: daily logs ARE the memory documents (`agents/{slug}/memory/YYYY-MM-DD.md`). Created by SaveMemory tool via AgentDocumentService. - -- [ ] **1.2.5** Create `agent_tool_allowlist` migration ← depends on: [1.1.1] — Not built (low priority: ApprovalWrappedTool handles tool approval; per-command allowlists not needed yet) - -### 1.3 Subagent Tables - -> **Why:** Agents need to spawn other agents for complex tasks. A code review agent might spawn a testing agent. These tables control who can spawn whom and track the parent-child relationships. - -- [x] **1.3.1** Create `subagent_spawn_permissions` migration ← depends on: [1.1.1] — ✅ Superseded: inter-agent communication uses `contact_agent` tool with ask/delegate/notify patterns. No formal spawn permissions table needed — agent permissions managed via AgentPermissionService. - -- [x] **1.3.2** Create `subagent_runs` migration ← depends on: [1.3.1] — ✅ Superseded: agent task tracking uses the `tasks` + `task_steps` tables. Contact between agents creates tasks visible in the Tasks UI. - -### 1.4 Run All Migrations - -> **Why:** Execute all database changes to create the foundation. Must run before creating models or any dependent code. - -- [x] **1.4.1** Run `php artisan migrate` ← depends on: [1.1.1-1.3.2] — ✅ 60+ migrations exist and run successfully - - **What:** Execute all migration files to create tables - - **Why:** Database must exist before models can query it. - -- [x] **1.4.2** Verify all tables created correctly — ✅ all tables operational - - **What:** Check that all tables, indexes, and constraints exist - - **Why:** Catch any migration errors early. Use `php artisan migrate:status` and check foreign keys. - -- [x] **1.4.3** Seed capabilities table with default capabilities — ✅ superseded: ToolRegistry provides capability list dynamically - - **What:** Populate the `capabilities` table with our 6 default tools - - **Why:** Agents need capabilities to choose from. These are system-wide definitions used by all agents. - - **Context:** Default capabilities: code_execution, file_operations, git_operations, api_requests, database_access, production_deployment - -### 1.5 Memory Search Infrastructure (OpenClaw) - -> **Why:** Agents need to search their memories efficiently. OpenClaw uses hybrid search (vector embeddings + full-text) for best results. Vector search finds semantically similar content, FTS finds exact matches. Combined scoring gives the best of both. - -#### 1.5.1 Vector Search Setup - -- [x] **1.5.1.1** Install pgvector extension — ✅ Enabled in `create_document_chunks_table` migration via `CREATE EXTENSION IF NOT EXISTS vector` - -- [x] **1.5.1.2** Create `memory_chunks` migration — ✅ Implemented as `document_chunks` table (unified for all document types). Fields: id, document_id, content, content_hash, embedding VECTOR(1536), collection, agent_id, chunk_index, metadata, search_vector (tsvector). HNSW index for cosine similarity. - -- [x] **1.5.1.3** Create `embedding_cache` migration — ✅ `embedding_cache` table with SHA256 key (provider+model+content), embedding vector column. - -#### 1.5.2 Full-Text Search Setup - -- [x] **1.5.2.1** Create PostgreSQL FTS index on memory_chunks — ✅ `search_vector` tsvector column on `document_chunks` with GIN index. Auto-populated via trigger on insert/update. - -- [x] **1.5.2.2** Create hybrid search function — ✅ Implemented as `HybridSearchService` (app/Services/Memory/HybridSearchService.php). Combines vector similarity + FTS with configurable weights (default 0.7/0.3). - -#### 1.5.3 Collection System (QMD) - -- [x] **1.5.3.1** Create `memory_collections` migration — ✅ Superseded: simpler approach using `collection` string column on `document_chunks` (values: 'general', 'memory', 'identity') + `agent_id` scoping. No separate collections table needed. - -- [x] **1.5.3.2** Create `memory_collection_documents` pivot migration — ✅ Superseded: collection membership determined by document location in folder hierarchy (DocumentObserver resolves collection from parent folders). - -#### 1.5.4 Result Clamping & Citation Support (QMD) - -- [x] **1.5.4.1** Add citation columns to `memory_chunks` migration — ✅ `document_chunks` has `document_id` FK and `metadata` JSON (stores title, path, dates). - -- [x] **1.5.4.2** Create `config/memory.php` configuration file — ✅ Comprehensive config with: embedding, chunking, search, reranking, context_windows, scope, compaction, memory_flush sections. - ---- - -## Phase 2: Laravel Models - -> **Why:** Eloquent models provide the ORM layer for all database operations. Models define relationships, casts, scopes, and business logic. Each model maps to a table from Phase 1. - -### 2.1 Core Models - -- [x] **2.1.1** Create `AgentConfiguration` model ← depends on: [1.4.1] — ✅ superseded by Document-based identity files + AgentDocumentService - - **What:** Primary model for agent identity - personality, instructions, and visual identity - - **Why:** Central model that all other agent-related models reference. Contains the agent's "soul" (personality) and "brain" (instructions). - - **Context:** Uses soft deletes so deleted agents can be restored. Casts ensure JSON fields are handled as arrays. - - Relationships: `belongsTo(User)`, `hasMany(AgentCapability)`, `hasOne(AgentSettings)`, `hasMany(AgentSession)`, `hasMany(AgentMemory)` - - Casts: `identity` → array, `personality` → string, `instructions` → string - -- [x] **2.1.2** Create `Capability` model ← depends on: [1.4.1] — ✅ superseded by ToolRegistry + AgentPermission model - - **What:** System-wide capability/tool definitions - - **Why:** Master list of available tools that agents can be granted. Includes tool kind for approval logic. - - **Context:** Read-only from application perspective - admin-seeded. Agents reference these via AgentCapability pivot. - - Relationships: `belongsToMany(AgentConfiguration)` through `agent_capabilities` - -- [x] **2.1.3** Create `AgentCapability` model (pivot with extra fields) ← depends on: [2.1.1, 2.1.2] — ✅ superseded by AgentPermission model (scope-based: tool, channel, folder, integration) - - **What:** Junction table linking agents to their enabled tools with per-agent settings - - **Why:** Each agent can have different tool permissions. One agent might have code_execution with approval required, another without. - - **Context:** The `notes` field stores agent-specific tool notes (e.g., "Use this for the staging server only"). - - Relationships: `belongsTo(AgentConfiguration)`, `belongsTo(Capability)` - -- [x] **2.1.4** Create `AgentSettings` model ← depends on: [2.1.1] — ✅ superseded by fields on User model (behavior_mode, brain, sleeping_until, etc.) - - **What:** Runtime behavior configuration for each agent - - **Why:** Controls autonomy level, cost limits, context management, and security modes. Separating from AgentConfiguration keeps identity separate from behavior. - - **Context:** Includes OpenClaw fields for reserve tokens, pruning TTL, security modes, etc. - - Relationships: `belongsTo(AgentConfiguration)` - - Casts: `reset_policy` → array, `behavior_mode` → enum - -### 2.2 Memory Models - -- [x] **2.2.1** Create `AgentSession` model ← depends on: [2.1.1] — ✅ Superseded: no formal session model. Conversations live in channels. Compaction tracked by `ConversationSummary` model (channel_id + agent_id + cumulative summary). - -- [x] **2.2.2** Create `AgentSessionMessage` model ← depends on: [2.2.1] — ✅ Superseded: messages stored in `Message` model. Loaded by `ChannelConversationLoader` which handles summary prepending and message-after-summary filtering. - -- [x] **2.2.3** Create `AgentMemory` model ← depends on: [2.1.1] — ✅ Superseded: LTM uses Document model (agents/{slug}/memory/YYYY-MM-DD.md) + `DocumentChunk` model for vector search. Managed by SaveMemory tool. - -- [x] **2.2.4** Create `AgentMemoryDailyLog` model ← depends on: [2.1.1] — ✅ Superseded: daily logs ARE documents. `AgentDocumentService::createMemoryLog()` creates/appends to daily log documents. - -### 2.3 Subagent Models - -- [x] **2.3.1** Create `SubagentSpawnPermission` model ← depends on: [2.1.1] — ✅ Superseded: agent permissions handled by AgentPermission model + AgentPermissionService. Inter-agent access controlled via `contact_agent` tool permissions. - -- [x] **2.3.2** Create `SubagentRun` model ← depends on: [2.3.1] — ✅ Superseded: agent task tracking uses Task + TaskStep models. Inter-agent communication via `contact_agent` tool creates traceable tasks. - -### 2.4 Extend User Model - -- [x] **2.4.1** Add relationships to User model ← depends on: [2.1.1-2.3.2] — ✅ User model has agent relationships (permissions, tasks, channels, documents, etc.) - - **What:** Connect User model to agent-related models - - **Why:** Users own agents. A user can have one agent configuration (if they are an agent user). Also tracks spawn permissions and runs. - - **Context:** The `hasOne(AgentConfiguration)` is for "agent users" - users that are actually AI agents in the system. - - `hasOne(AgentConfiguration)` - only for agent users - - `hasOne(SubagentSpawnPermission, 'parent_agent_id')` - - `hasMany(SubagentRun, 'parent_agent_id')` - - `hasMany(SubagentRun, 'child_agent_id')` - -- [x] **2.4.2** Add helper methods to User model — ✅ User model has isAgent(), agent-related scopes, permission helpers - - **What:** Convenience methods for common agent operations - - **Why:** Encapsulates agent-related logic in the model. `canSpawnAgent()` centralizes permission checking. - - **Context:** These methods are used throughout controllers and services. - - `isConfiguredAgent()` - checks if agent has configuration - - `getActiveSession()` - returns current session - - `canSpawnAgent($targetAgentId)` - checks spawn permission - ---- - -## Phase 3: API Controllers - -> **Why:** REST API layer that exposes agent functionality to the frontend. Each controller handles a specific domain (configuration, capabilities, settings, etc.) following Laravel resource conventions. - -### 3.1 Agent Configuration Controller - -- [x] **3.1.1** Create `AgentConfigurationController` ← depends on: [2.1.1] — ✅ superseded by AgentController with identity files API (GET/PUT /api/agents/{id}/identity/{fileType}) - - **What:** CRUD operations for agent personality, instructions, and identity - - **Why:** Frontend needs to fetch and update agent configuration. Separate PATCH endpoints allow updating individual fields without sending the entire config. - - **Context:** Personality and instructions are large text fields (markdown). Separate endpoints reduce payload size and enable autosave on specific fields. - - `GET /api/agents/{id}/configuration` - get agent config - - `PUT /api/agents/{id}/configuration` - update config - - `PATCH /api/agents/{id}/personality` - update personality only - - `PATCH /api/agents/{id}/instructions` - update instructions only - - `PATCH /api/agents/{id}/identity` - update identity only - - `PATCH /api/agents/{id}/tool-notes` - update tool notes only - -### 3.2 Agent Capabilities Controller - -- [x] **3.2.1** Create `AgentCapabilityController` ← depends on: [2.1.3] — ✅ superseded by AgentPermissionController (tool/channel/folder/integration permissions) - - **What:** Manage which tools/capabilities are enabled for an agent - - **Why:** Agents need different tools. This API enables the UI to toggle capabilities and set per-agent approval requirements. - - **Context:** Bulk update is important for "save all changes" UX. Individual PATCH allows toggling single capability without affecting others. - - `GET /api/agents/{id}/capabilities` - list agent capabilities - - `PUT /api/agents/{id}/capabilities` - bulk update capabilities - - `PATCH /api/agents/{id}/capabilities/{capabilityId}` - update single capability - -- [x] **3.2.2** Create `CapabilityController` ← depends on: [2.1.2] — ✅ superseded: ToolRegistry provides tool list; AgentPermissionController serves capability data - - **What:** Read-only access to system-wide capability definitions - - **Why:** Frontend needs the master list of available capabilities to render the capability assignment UI. - - **Context:** Capabilities are admin-seeded, not user-created. This is read-only. - - `GET /api/capabilities` - list all available capabilities - -### 3.3 Agent Settings Controller - -- [x] **3.3.1** Create `AgentSettingsController` ← depends on: [2.1.4] — ✅ superseded: agent settings managed via AgentController (PATCH /api/agents/{id}) + Settings tab in Agent/Show.vue - - **What:** Manage agent runtime behavior settings - - **Why:** Users need to control agent autonomy, cost limits, and reset policies. Settings affect how the agent operates, not who it is. - - **Context:** Includes OpenClaw settings (security_mode, ask_mode, reserve_tokens, etc.). Behavior mode enum: autonomous/supervised/strict. - - `GET /api/agents/{id}/settings` - get agent settings - - `PUT /api/agents/{id}/settings` - update all settings - - `PATCH /api/agents/{id}/settings/behavior-mode` - update behavior mode - - `PATCH /api/agents/{id}/settings/cost-limit` - update cost limit - - `PATCH /api/agents/{id}/settings/reset-policy` - update reset policy - -### 3.4 Agent Session Controller - -- [x] **3.4.1** Create `AgentSessionController` ← depends on: [2.2.1] — ✅ Superseded: no formal session controller. Agent conversations happen in channels. Message history accessed via ChannelController. Compaction summaries managed automatically by ConversationCompactionService. - -### 3.5 Agent Memory Controller - -- [x] **3.5.1** Create `AgentMemoryController` ← depends on: [2.2.3, 2.2.4] — ✅ Superseded: agent memory managed via Document API (agents/{slug}/memory/ and agents/{slug}/identity/MEMORY.md). Accessible through existing DocumentController. SaveMemory/RecallMemory tools handle agent-side memory management. - -### 3.6 Subagent Controller - -- [x] **3.6.1** Create `SubagentController` ← depends on: [2.3.1, 2.3.2] — ✅ Superseded: inter-agent communication handled by `contact_agent` tool (ask/delegate/notify patterns). Agent management via AgentController. No separate subagent controller needed. - -### 3.7 Register Routes - -- [x] **3.7.1** Add all routes to `routes/api.php` ← depends on: [3.1.1-3.6.1] — ✅ comprehensive routes for all controllers (268 lines in routes/api.php) - - **What:** Wire up all controller methods to URL routes - - **Why:** Routes connect HTTP requests to controller actions. Must be registered before frontend can call the API. - - **Context:** Group under `/api/agents` prefix. Auth middleware ensures only authenticated users access their agents. - - Group under `agents` prefix - - Apply auth middleware - - Add rate limiting where appropriate - ---- - -## Phase 3.5: Agent Execution Integration (Laravel AI SDK + Queues) - -> **Why:** This phase connects the AI layer (Laravel AI SDK) with Laravel's queue system for async task processing. Tools give agents abilities. Queue jobs and services coordinate multi-step agent tasks with durability and approval gates. - -### 3.5.1 Create Agent Tools - -- [x] **3.5.1.1** Create `app/Agents/Tools/` directory — ✅ exists with 30+ tool classes across Chat/, Docs/, Lists/, Tables/, Calendar/, Tasks/, System/, Workspace/, Charts/, Telegram/, Agents/ subdirs - - **What:** Directory for Laravel AI SDK tool definitions - - **Why:** Organizes AI tools separately from services. Each tool class implements the SDK `Tool` contract. - -- [x] **3.5.1.2** Create tool classes for agent capabilities — ✅ 30+ tools implemented - - **What:** Laravel AI SDK `Tool` implementations for each capability type - - **Why:** Tools are how agents interact with the system. Each tool wraps a system capability (documents, tasks, messaging, etc.) with parameter validation and execution logic. - - **Context:** Tools implement the SDK `Tool` contract with `description()`, `handle()`, and `schema()` methods. Use `php artisan make:tool` to scaffold. - - `SearchDocuments` - search workspace documents - - `ReadDocument` / `UpdateDocument` - document CRUD - - `CreateListItem` / `UpdateListItem` - list management - - `SendMessage` - messaging - - `CreateTaskStep` - task progress tracking - - `CreateApproval` - request human approval - - `QueryDataTable` - data table queries - - `WebSearch` / `WebFetch` - web capabilities (SDK built-in) - -- [x] **3.5.1.3** Create tool registry service — ✅ app/Agents/Tools/ToolRegistry.php with APP_GROUPS and getToolsForAgent() - - **What:** Service that provides tools to agents based on their DB-stored capabilities - - **Why:** Agents should only see tools they're allowed to use. The registry maps capability strings from the DB to tool class instances. - - **Context:** Called by `OpenCompanyAgent::tools()` to resolve the tool list dynamically. - ```php - class ToolRegistry { - private array $capabilityToolMap = [ - 'documents' => [SearchDocuments::class, ReadDocument::class, UpdateDocument::class], - 'lists' => [CreateListItem::class, UpdateListItem::class], - 'messaging' => [SendMessage::class], - 'tasks' => [CreateTaskStep::class], - 'approvals' => [CreateApproval::class], - 'web_search' => [WebSearch::class], - 'web_fetch' => [WebFetch::class], - ]; - - public function getToolsForAgent(User $agent): array - } - ``` - -### 3.5.2 Create Agent Jobs - -> **Why:** Jobs are the building blocks of agent task execution. Each job does one thing: fetch config, execute AI, save message, etc. Jobs are retryable and queued for async processing. - -- [x] **3.5.2.1** Create `app/Jobs/Agent/` directory — ✅ superseded: agent jobs live directly in app/Jobs/ (AgentRespondJob, ExecuteAgentTaskJob, etc.) - - **What:** Directory for agent-specific job classes - - **Why:** Organizes agent jobs separately from other system jobs. Each class handles one atomic operation. - -- [x] **3.5.2.2** Create `FetchAgentConfigJob` — ✅ superseded: config fetching is inline in AgentRespondJob + OpenCompanyAgent - - **What:** Load agent configuration and enabled tools from database - - **Why:** Agent tasks need agent config to operate. This job fetches who the agent is and what they can do. - - **Context:** Returns AgentConfiguration with relationships (capabilities, settings) loaded. - - Fetch agent configuration from database - - Return config with enabled tools - -- [x] **3.5.2.3** Create `ExecuteAgentJob` ← depends on: [3.5.1.2] — ✅ implemented as AgentRespondJob + ExecuteAgentTaskJob in app/Jobs/ - - **What:** Execute Laravel AI SDK agent call with tools - - **Why:** This is the core AI execution - send prompt to LLM, get response, handle tool calls. This job wraps `OpenCompanyAgent` for queued execution. - - **Context:** Uses SDK's `#[MaxSteps]` attribute for multi-turn tool use. Token tracking is critical for billing and context management. - - Execute agent prompt with tools - - Handle streaming responses via `->stream()->broadcastOnQueue()` - - Track token usage - ```php - class ExecuteAgentJob implements ShouldQueue { - public function handle(): AgentResult { - $config = app(DynamicProviderResolver::class)->resolveForAgent($this->agentUser); - $agent = OpenCompanyAgent::for($this->agentUser); - - return $agent->prompt( - $this->prompt, - provider: $config['provider'], - model: $config['model'], - ); - } - } - ``` - -- [x] **3.5.2.4** Create `CreateApprovalRequestJob` — ✅ superseded: approval creation handled by ApprovalWrappedTool + SendApprovalToTelegramJob - - **What:** Create an approval request record and notify users - - **Why:** When agent wants to do something risky (database access, deployment), humans must approve. This job creates the approval request. - - **Context:** Approval requests appear in the Approvals page. Users are notified via WebSocket. - - Create approval record in database - - Notify relevant users - - Return approval request ID - -- [x] **3.5.2.5** Create approval handling service — ✅ ApprovalExecutionService + WaitForApproval tool + ApprovalController - - **What:** Service that polls/waits for approval decisions - - **Why:** Agent execution must pause and wait for human decision. This service checks approval status and resumes execution when approved/rejected. - - **Context:** Can use polling or event-based approach. Rejection cancels the task. - - Check approval status - - Resume execution when approved/rejected - -- [x] **3.5.2.6** Create `ExecuteApprovedActionJob` — ✅ superseded: ApprovalExecutionService handles executing approved actions inline - - **What:** Execute the action that was approved - - **Why:** After approval, the original tool call needs to be executed. This job runs the approved action safely. - - **Context:** Logs the execution for audit trail. Updates task status to completed. - - Execute the approved action - - Update task status - -- [x] **3.5.2.7** Create `SaveSessionMessageJob` — ✅ Superseded: messages saved inline by AgentRespondJob (creates Message in channel). No separate job needed. - -- [x] **3.5.2.8** Create `MemoryFlushJob` ← depends on: [3.6.2.1] — ✅ Implemented as `MemoryFlushService` (app/Services/Memory/MemoryFlushService.php). Hooked into AgentRespondJob before prompt() call. Uses [FLUSH_COMPLETE] sentinel. - -- [ ] **3.5.2.9** Create `PruneSessionJob` — Not built (low priority: context management handled by ConversationCompactionService which summarizes old messages rather than pruning tool results) - -- [x] **3.5.2.10** Create `CheckMemoryFlushJob` — ✅ Integrated into MemoryFlushService::shouldFlush() — checks soft threshold (4k tokens before compaction) and flush_count per cycle. Called from AgentRespondJob. - -### 3.5.3 Create Agent Orchestration Services - -> **Why:** Services orchestrate jobs into complete agent operations. They handle the full lifecycle: load config → check context → execute AI → save results → handle approvals. - -- [x] **3.5.3.1** Create `app/Services/Agent/` directory — ✅ superseded: agent services live in app/Services/ (AgentChatService, AgentPermissionService, AgentDocumentService, etc.) - - **What:** Directory for agent orchestration services - - **Why:** Organizes agent services separately. Each service class defines a complete agent operation. - -- [x] **3.5.3.2** Create `AgentTaskService` ← depends on: [3.5.2.2-3.5.2.10] — ✅ superseded: implemented as AgentChatService + AgentRespondJob orchestration - - **What:** Main service for executing an agent task (responding to user input) - - **Why:** This is the core agent loop. It handles OpenClaw patterns (memory flush, pruning), executes the AI, saves messages, and manages approvals. - - **Context:** Uses Laravel's queue system for async execution. Jobs can be retried on failure. - ```php - class AgentTaskService { - public function execute(AgentTask $task): AgentResult { - // 1. Fetch agent config - $config = FetchAgentConfigJob::dispatchSync($task->agentId); - - // 2. Check if memory flush needed before execution (OpenClaw) - $flushNeeded = CheckMemoryFlushJob::dispatchSync($task->sessionId); - if ($flushNeeded) { - MemoryFlushJob::dispatchSync($task->sessionId); - } - - // 3. Prune session if TTL elapsed (OpenClaw) - PruneSessionJob::dispatchSync($task->sessionId); - - // 4. Execute agent with Laravel AI SDK - $result = ExecuteAgentJob::dispatchSync($config, $task->prompt); - - // 5. Handle silent responses (NO_REPLY convention) - if (str_starts_with($result->text, 'NO_REPLY')) { - SaveSessionMessageJob::dispatchSync($task->sessionId, $result, true); - return $result->withSuppressedOutput(); - } - - // 6. Save messages to session - SaveSessionMessageJob::dispatchSync($task->sessionId, $result); - - // 7. Handle approval if needed - if ($result->requiresApproval) { - $approval = CreateApprovalRequestJob::dispatchSync($result); - $approved = $this->waitForApproval($approval->id); - - if ($approved) { - ExecuteApprovedActionJob::dispatchSync($result); - } - } - - return $result; - } - } - ``` - -- [x] **3.5.3.3** Create `AgentSessionResetService` — ✅ Superseded: no formal sessions to reset. Agent sleep/wake managed by `sleeping_until` field + `AgentResumeFromSleepJob`. Conversation context managed by CompactConversationJob. - -- [x] **3.5.3.4** Create `SubagentSpawnService` ← depends on: [3.5.3.2] — ✅ Superseded: inter-agent communication via `contact_agent` tool creates tasks (ask/delegate/notify patterns). No separate spawn service needed. - -### 3.5.4 Queue Infrastructure - -> **Why:** Agent jobs need queue workers to process them and APIs to monitor/control them. This infrastructure makes agent execution operational. - -- [x] **3.5.4.1** Configure queue workers for agent jobs — ✅ queue config exists; agent jobs dispatched to queue - - **What:** Set up queue configuration for agent job processing - - **Why:** Agent jobs need dedicated queue configuration. May need separate queues for high-priority vs background tasks. - - **Context:** Configure in `config/queue.php`. Consider separate connection for agent jobs. - ```bash - php artisan queue:work --queue=agents,default - ``` - -- [x] **3.5.4.2** Add agent task status endpoints — ✅ TaskController with full lifecycle endpoints (start/pause/resume/complete/fail/cancel) - - **What:** API endpoints to check agent task status and manage execution - - **Why:** Frontend needs to display task progress (e.g., "waiting for approval", "executing"). Endpoints enable monitoring and control. - - **Context:** Status updates broadcast via WebSocket for real-time UI updates. - - `GET /api/agent-tasks/{id}` - get task status - - `POST /api/agent-tasks/{id}/cancel` - cancel running task - -- [ ] **3.5.4.3** Configure Horizon for queue monitoring (optional) - - **What:** Install Laravel Horizon for queue monitoring dashboard - - **Why:** Debugging agent jobs is easier with a visual UI. Shows job history, failures, and queue metrics. - - **Context:** Optional - can use database queries or Laravel Telescope if preferred. - ```bash - composer require laravel/horizon - ``` - ---- - -## Phase 3.6: Context Management Services (OpenClaw) - -> **Why:** These services implement OpenClaw's sophisticated context management patterns. Without them, agents would lose important context during compaction, accumulate bloated tool results, and lack nuanced approval controls. - -### 3.6.1 Context Window Guard - -- [x] **3.6.1.1** Create `ContextWindowGuard` service ← depends on: [2.1.4] — ✅ Implemented as `ModelContextRegistry` (maps 40+ models to context window sizes) + `ConversationCompactionService::needsCompaction()` (threshold checking). TokenEstimator handles token counting. - -### 3.6.2 Pre-Compaction Memory Flush - -- [x] **3.6.2.1** Create `MemoryFlushService` ← depends on: [3.6.1.1] — ✅ Implemented: `app/Services/Memory/MemoryFlushService.php`. `shouldFlush()` checks soft threshold (4k tokens before compaction) and `flush_count` per cycle. `flush()` runs silent agent turn with save_memory tool access. - -- [x] **3.6.2.2** Create memory flush system prompt — ✅ Built into MemoryFlushService::buildFlushPrompt(). Instructs agent to use save_memory (target: "log") for daily entries, "core" only for high-value permanent facts. Uses [FLUSH_COMPLETE] sentinel. - -### 3.6.3 Session Pruning - -- [ ] **3.6.3.1** Create `SessionPruningService` — Not built (low priority: ConversationCompactionService handles context management by summarizing old messages. Tool result pruning not yet needed.) - -### 3.6.4 Tool Kind Classification - -- [x] **3.6.4.1** Create `ToolKindClassifier` service — ✅ Superseded: ToolRegistry::TOOL_MAP has a `type` field per tool ('read', 'write', 'action', etc.). ApprovalWrappedTool uses this for approval decisions. - -### 3.6.5 Execution Approval System - -- [x] **3.6.5.1** Create `ExecutionApprovalService` ← depends on: [3.6.4.1] — ✅ Implemented as `ApprovalWrappedTool` + `ApprovalExecutionService`. Tools requiring approval are wrapped; approval requests stored in DB; `WaitForApproval` tool pauses agent execution. Behavior modes (autonomous/supervised/strict) control approval requirements. - -- [x] **3.6.5.2** Define default safe skills — ✅ Implemented via AgentPermission model. Per-agent tool permissions with enable/disable per tool group. Read-only tools generally don't require approval. - -- [ ] **3.6.5.3** Create `AgentToolAllowlist` model — Not built (low priority: per-command allowlists not needed yet. Current system uses per-tool-group permissions + behavior mode for approval decisions.) - ---- - -## Phase 3.7: Hybrid Memory Search (OpenClaw) - -> **Why:** Agents need to search their memories intelligently. Hybrid search combines vector embeddings (semantic similarity) with full-text search (exact matches) for best results. This enables agents to recall relevant information even when phrased differently. - -### 3.7.1 Embedding Service - -- [x] **3.7.1.1** Create `EmbeddingService` ← depends on: [0.1.3] — ✅ `app/Services/Memory/EmbeddingService.php`. Supports OpenAI + Ollama providers. embed() and embedBatch() methods. Configurable via config/memory.php. - -- [x] **3.7.1.2** Create `EmbeddingCacheService` ← depends on: [1.5.1.3] — ✅ Built into EmbeddingService. Uses `EmbeddingCache` model with SHA256 cache key (provider+model+content). Checks cache first, only calls API for uncached texts. - -### 3.7.2 Chunking Service - -- [x] **3.7.2.1** Create `ChunkingService` — ✅ `app/Services/Memory/ChunkingService.php`. Splits on paragraph breaks with configurable max_chunk_size (512 tokens) and overlap (64 tokens). Token estimation via word count * 1.3. - -### 3.7.3 Memory Indexing - -- [x] **3.7.3.1** Create `MemoryIndexService` ← depends on: [3.7.1.1, 3.7.2.1] — ✅ Implemented as `DocumentIndexingService` (app/Services/Memory/DocumentIndexingService.php). index(), deindex(), search() methods. Called by IndexDocumentJob (async) and DocumentObserver (on document save/delete). - -### 3.7.4 Hybrid Search - -- [x] **3.7.4.1** Create `HybridMemorySearch` service ← depends on: [3.7.3.1] — ✅ `app/Services/Memory/HybridSearchService.php`. Combines vector similarity (pgvector `<=>`) with FTS (`ts_rank`). Configurable weights (default 0.7/0.3). Score normalization, result clamping, and collection filtering built in. Also: `RerankingService` for cross-encoder reranking via Ollama. - -- [x] **3.7.4.2** Create `MemorySearchController` — ✅ Superseded: memory search exposed to agents via RecallMemory tool (uses HybridSearchService). Frontend document search via existing SearchController. No separate memory search API endpoint needed. - -### 3.7.5 Collection Management (QMD) - -- [x] **3.7.5.1** Create `MemoryCollection` model — ✅ Superseded: collection scoping uses `collection` string column on `document_chunks` ('general', 'memory', 'identity') + `agent_id` column. Resolved automatically by DocumentObserver based on document folder hierarchy. No separate collection model needed. - -- [x] **3.7.5.2** Create default collections in `AgentDocumentService` — ✅ Superseded: DocumentObserver auto-resolves collection type from folder path (agents/*/memory/ → 'memory', agents/*/identity/ → 'identity', everything else → 'general'). - -- [x] **3.7.5.3** Create `MemoryCollectionController` — ✅ Superseded: no separate collections to manage. Collection assignment is automatic via document location. - -### 3.7.6 Session Transcript Indexing (QMD) - -- [ ] **3.7.6.1** Create `ExportSessionTranscriptJob` — Not built (future enhancement: export channel conversation history as searchable documents for cross-conversation recall) - -- [ ] **3.7.6.2** Wire session transcript export to session lifecycle — Not built (depends on 3.7.6.1) - -### 3.7.7 Periodic Re-Indexing (QMD) - -- [x] **3.7.7.1** Create `PeriodicReindexJob` — ✅ Superseded: DocumentObserver triggers IndexDocumentJob on every document save/update. `memory:index-documents --fresh` command available for manual bulk reindex. No periodic scheduled job needed since observer catches all changes. - -- [x] **3.7.7.2** Create `EmbeddingRefreshJob` — ✅ Superseded: embeddings generated at index time by DocumentIndexingService. EmbeddingCache avoids redundant API calls. `memory:index-documents --fresh` available for full re-embed. No periodic refresh needed. - -- [x] **3.7.7.3** Add Document model observer for indexing triggers — ✅ `app/Observers/DocumentObserver.php`. Triggers IndexDocumentJob on `saved` event (non-folder docs only), deletes chunks on `deleted` event. Resolves collection and agent_id from folder hierarchy. - -### 3.7.8 Scope Rules & Security (QMD) - -- [x] **3.7.8.1** Create `MemorySearchScopeGuard` service ← depends on: [3.7.4.1] — ✅ `app/Services/Memory/MemoryScopeGuard.php`. Configurable scope modes in `config/memory.php` under `scope` key. Enforces agent-level access control on document chunks. Applies collection-based filtering via `allowedCollections()` method. - -- [x] **3.7.8.2** Add security checks to `RecallMemory` tool ← depends on: [3.7.8.1] — ✅ RecallMemory tool uses MemoryScopeGuard to enforce per-agent scope. Collection-based access ensures agents only search their own documents. HybridSearchService applies agent_id filtering on all queries. - -### 3.7.9 Enhanced HybridMemorySearch with QMD Features - -- [x] **3.7.9.1** Add result clamping to `HybridMemorySearch` ← depends on: [3.7.4.1, 1.5.4.2] — ✅ Built into `HybridSearchService`. Config in `config/memory.php` under `search` key: `max_results` (default 10), `min_similarity` threshold, `semantic_weight`/`keyword_weight` for hybrid scoring. RerankingService handles final result ordering and clamping. - -- [ ] **3.7.9.2** Add citation generation to search results — Not built (low priority). Results include chunk metadata (document title, collection) but not line-level citations. - -- [x] **3.7.9.3** Add collection filtering to `HybridMemorySearch` ← depends on: [3.7.5.1] — ✅ HybridSearchService accepts `collection` parameter. Document chunks have `collection` column for filtering. RecallMemory tool passes collection from agent context. - ---- - -## Phase 4: Frontend API Integration - -> **Why:** The frontend needs TypeScript methods to call all backend APIs. This phase creates the API client layer that Vue components will use. Centralizing API calls in useApi ensures consistent error handling and type safety. - -### 4.1 Extend useApi Composable - -- [x] **4.1.1** Add agent configuration methods to `useApi.ts` ← depends on: [3.1.1] — ✅ Superseded: Agent config managed via Inertia props + AgentController. Identity files managed via DocumentController API. - - **What:** TypeScript methods for fetching and updating agent configuration - - **Why:** Agent configuration (personality, instructions, identity) is the most frequently edited data. These methods connect the configuration editor components to the backend. - - **Context:** Separate update methods for each field enable autosave without sending entire config. - ```typescript - fetchAgentConfiguration(agentId: string) - updateAgentConfiguration(agentId: string, data) - updateAgentPersonality(agentId: string, content: string) - updateAgentInstructions(agentId: string, content: string) - updateAgentIdentity(agentId: string, identity) - updateAgentToolNotes(agentId: string, notes: string) - ``` - -- [x] **4.1.2** Add agent capabilities methods ← depends on: [3.2.1] — ✅ Superseded: AgentPermissionController provides REST API. AgentCapabilities.vue uses direct axios calls. - - **What:** Methods for managing agent tool/capability assignments - - **Why:** Capabilities UI needs to fetch available capabilities and update agent's enabled tools. Bulk update enables "save all changes" pattern. - - **Context:** `fetchAllCapabilities()` gets the system-wide list. Agent-specific capabilities have per-agent settings (enabled, requires_approval). - ```typescript - fetchAgentCapabilities(agentId: string) - updateAgentCapabilities(agentId: string, capabilities) - fetchAllCapabilities() - ``` - -- [x] **4.1.3** Add agent settings methods ← depends on: [3.3.1] — ✅ Superseded: AgentSettingsPanel.vue uses Inertia forms + direct API calls to AgentController. - - **What:** Methods for managing agent runtime settings - - **Why:** Settings panel needs to fetch and update behavior mode, cost limits, reset policies, and OpenClaw settings (security mode, ask mode, etc.). - - **Context:** Individual update methods allow saving specific settings without full form submission. - ```typescript - fetchAgentSettings(agentId: string) - updateAgentSettings(agentId: string, settings) - updateAgentBehaviorMode(agentId: string, mode) - updateAgentCostLimit(agentId: string, limit: number) - updateAgentResetPolicy(agentId: string, policy) - ``` - -- [x] **4.1.4** Add agent session methods ← depends on: [3.4.1] — ✅ Superseded: Channel-based conversations replace sessions. Chat UI uses MessageController API with Inertia. - - **What:** Methods for managing conversation sessions and messages - - **Why:** Session UI needs to list past sessions, view messages, and create new sessions (context reset). This is central to the chat/memory experience. - - **Context:** Pagination is important for sessions with many messages. `createNewSession` archives current and starts fresh. - ```typescript - fetchAgentSessions(agentId: string, page?: number) - fetchCurrentSession(agentId: string) - createNewSession(agentId: string) - fetchSessionMessages(sessionId: string, page?: number) - archiveSession(sessionId: string) - ``` - -- [x] **4.1.5** Add agent memory methods ← depends on: [3.5.1] — ✅ Superseded: Memory managed via identity file editor (MEMORY.md) + agent tools (SaveMemory/RecallMemory). - - **What:** Methods for managing persistent memories and daily logs - - **Why:** Memory view needs to display, add, and delete memories. Reset is a destructive action that clears all agent knowledge. - - **Context:** Daily logs are read-only from frontend perspective. They're written by the agent during operation. - ```typescript - fetchAgentMemories(agentId: string) - addAgentMemory(agentId: string, entry) - deleteAgentMemory(agentId: string, memoryId: string) - resetAgentMemory(agentId: string) - fetchAgentDailyLogs(agentId: string) - ``` - -- [ ] **4.1.6** Add subagent methods — Not built. Subagent UI not yet implemented (inter-agent communication works via contact_agent tool backend-only). - - **What:** Methods for managing subagent spawning - - **Why:** Subagent UI needs to configure spawn permissions, trigger spawns, monitor runs, and cancel if needed. - - **Context:** Spawn is async - it starts a background task and returns immediately. Frontend polls or uses WebSocket to track progress. - ```typescript - fetchSpawnPermissions(agentId: string) - updateSpawnPermissions(agentId: string, permissions) - spawnSubagent(agentId: string, task) - fetchSubagentRuns(agentId: string) - cancelSubagentRun(runId: string) - ``` - -- [ ] **4.1.7** Add memory search methods (OpenClaw) — Not built. Memory search available to agents via RecallMemory tool but no frontend search UI exists yet. - - **What:** Method for semantic memory search - - **Why:** MemorySearchInput component needs to search agent memories. Returns ranked results with source references. - - **Context:** Uses hybrid search (vector + FTS) on backend. - ```typescript - searchAgentMemory(agentId: string, query: string, limit?: number) - ``` - -- [ ] **4.1.8** Add execution approval methods (OpenClaw) — Not built. Approval works via ApprovalWrappedTool + AgentPermission but no allowlist pattern management UI. - - **What:** Methods for managing command allowlist - - **Why:** AllowlistManager component needs to display, add, and remove allowlist patterns. Shows usage stats for each pattern. - - **Context:** Patterns can be exact commands or globs. Adding a pattern auto-approves matching commands. - ```typescript - fetchAgentAllowlist(agentId: string) - addAllowlistPattern(agentId: string, pattern: string) - removeAllowlistPattern(agentId: string, patternId: string) - ``` - -### 4.2 Update Frontend Components (Already Created) - -> **Why:** Components exist with mock data. This phase connects them to real APIs, making the UI functional. - -- [x] **4.2.1** Connect `AgentPersonalityEditor.vue` to API ← depends on: [4.1.1] — ✅ superseded by AgentIdentityFiles.vue two-panel editor for all 8 identity files - - **What:** Wire personality editor to backend - - **Why:** Users need to edit and save agent personality. Currently uses mock data. - - **Context:** Should show loading state while saving, success toast on save, error handling for failures. - - Replace mock save with `updateAgentPersonality()` - - Add error handling and success feedback - -- [x] **4.2.2** Connect `AgentInstructionsEditor.vue` to API ← depends on: [4.1.1] — ✅ superseded by AgentIdentityFiles.vue two-panel editor - - **What:** Wire instructions editor to backend - - **Why:** Users need to edit and save agent instructions. Currently uses mock data. - - **Context:** Same UX patterns as personality editor - loading, success, error states. - - Replace mock save with `updateAgentInstructions()` - - Add error handling and success feedback - -- [x] **4.2.3** Connect `AgentCapabilities.vue` to API ← depends on: [4.1.2] — ✅ AgentCapabilities.vue with real tool toggles via AgentPermissionController - - **What:** Wire capabilities toggles to backend - - **Why:** Users need to enable/disable tools and set approval requirements. Currently uses mock data. - - **Context:** Should fetch system capabilities list and agent's current assignments. Save should bulk update. - - Fetch real capabilities list - - Save capability changes and notes - -- [x] **4.2.4** Connect `AgentMemoryView.vue` to API ← depends on: [4.1.4, 4.1.5] — ✅ superseded: MEMORY.md managed via identity file editor - - **What:** Wire memory and session display to backend - - **Why:** Users need to view sessions, messages, and memories. Also need to add memories and start new sessions. - - **Context:** Session list should be paginated. Memory add/delete should update list in real-time. - - Fetch real session data - - Fetch real memory entries - - Implement new session creation - - Implement memory add/delete - -- [x] **4.2.5** Connect `AgentSettingsPanel.vue` to API ← depends on: [4.1.3] — ✅ AgentSettingsPanel.vue connected with real behavior mode, brain selector, delete - - **What:** Wire settings form to backend - - **Why:** Users need to configure agent behavior, cost limits, and reset policies. - - **Context:** Some actions (reset, delete) need confirmation dialogs. Pause/resume should update status badge. - - Fetch real settings - - Save settings changes - - Implement reset/pause/delete actions - -- [x] **4.2.6** Connect `AgentIdentityCard.vue` to real data ← depends on: [4.1.1] — ✅ agent identity data fetched from real API - - **What:** Wire identity display to backend - - **Why:** Agent card should show real name, emoji, type, and stats (sessions, messages, cost). - - **Context:** Stats may need separate endpoint or be included in config response. - - Ensure identity is fetched from API - - Display real stats - -- [ ] **4.2.7** Create `AllowlistManager.vue` component (OpenClaw) ← depends on: [4.1.8] - - **What:** New component for managing command allowlist patterns - - **Why:** Users need to pre-approve commands to reduce approval prompts. Should show which patterns are used and when. - - **Context:** Pattern input should support glob syntax hints. Usage stats help users clean up stale patterns. - - List allowlist patterns with usage stats - - Add/remove patterns - - Show last used command for each pattern - -- [ ] **4.2.8** Update `AgentSettingsPanel.vue` with OpenClaw settings ← depends on: [4.1.3] - - **What:** Add new settings fields for OpenClaw features - - **Why:** Users need to configure security mode, ask mode, context reserves, and pruning TTL. - - **Context:** Use dropdowns for enums (security_mode, ask_mode). Number inputs for token counts. Toggle for auto_allow_skills. - - Security mode selector (deny/allowlist/full) - - Ask mode selector (off/on-miss/always) - - Reserve tokens configuration - - Pruning TTL configuration - - Auto-allow skills toggle - -- [ ] **4.2.9** Create `MemorySearchInput.vue` component (OpenClaw) ← depends on: [4.1.7] - - **What:** New component for semantic memory search - - **Why:** Users and agents need to search memories by meaning, not just keywords. Enables finding relevant context quickly. - - **Context:** Search input with debounced API calls. Results show matched chunk with source reference (click to view full entry). - - Search input with results display - - Show matched chunks with source references - - Link to full memory entries - -### 4.3 Update Agent/Show.vue Page - -> **Why:** The main agent page needs to coordinate all components with real data. Replace mock `fetchData()` with actual API calls. - -- [x] **4.3.1** Replace mock `fetchData()` with real API calls ← depends on: [4.2.1-4.2.6] — ✅ Agent/Show.vue fetches real data from API (not mocks) - - **What:** Load all agent data from API on page mount - - **Why:** Page currently shows mock data. Need to fetch real configuration, capabilities, settings, session, and memories. - - **Context:** Consider parallel fetching for better performance. Handle loading and error states for each section. - - Fetch agent configuration - - Fetch capabilities - - Fetch settings - - Fetch current session - - Fetch memories - -- [x] **4.3.2** Implement all event handlers with real API calls — ✅ Agent/Show.vue uses real API for all operations - - **What:** Wire all component events to API methods - - **Why:** User actions (save, delete, etc.) must persist to backend. Currently many handlers just log or show toasts. - - **Context:** Destructive actions (reset, delete) need confirmation dialogs. Success/error feedback via toasts. - - `savePersonality()` → API call - - `saveInstructions()` → API call - - `saveCapabilityNotes()` → API call - - `startNewSession()` → API call - - `addMemoryEntry()` → API call - - `deleteMemoryEntry()` → API call - - `updateSettings()` → API call - - `resetAgentMemory()` → API call with confirmation - - `togglePause()` → API call - - `deleteAgent()` → API call with confirmation - ---- - -## Phase 5: Agent Control Actions - -> **Why:** Agents need operational controls beyond configuration. Users must be able to pause, resume, stop, and delete agents. These are critical safety controls. - -### 5.1 Agent Status Management - -- [x] **5.1.1** Add status control endpoints to `UserController` ← depends on: [2.4.1] — ✅ AgentController handles status (PATCH /api/agents/{id}) - - **What:** API endpoints for controlling agent operational status - - **Why:** Users need to pause agents (stop processing), resume them, or hard-stop current work. Essential for managing runaway or misbehaving agents. - - **Context:** Pause prevents new tasks from starting. Stop cancels the currently running task. - - `POST /api/agents/{id}/pause` - pause agent - - `POST /api/agents/{id}/resume` - resume agent - - `POST /api/agents/{id}/stop` - stop agent (cancel current task) - -- [x] **5.1.2** Implement pause/resume logic — ✅ agent status management (idle/working/sleeping) with AgentStatusUpdated broadcast event - - **What:** Business logic for status transitions and task cancellation - - **Why:** Status changes must update the database and notify connected clients. Stopping requires cancelling the active task. - - **Context:** WebSocket broadcast ensures all open tabs see status change immediately. - - Update agent status to 'paused'/'working'/'idle' - - Cancel any running tasks if stopping - - Broadcast status change via WebSocket - -### 5.2 Agent Deletion - -- [x] **5.2.1** Add agent deletion endpoint ← depends on: [2.4.1] — ✅ DELETE /api/agents/{id} exists in routes + AgentController - - **What:** Soft-delete endpoint for removing an agent - - **Why:** Users need to delete agents they no longer need. Soft delete allows recovery if deletion was accidental. - - **Context:** Must clean up related data: archive sessions, clear/archive memories, remove from any channels. - - `DELETE /api/agents/{id}` - soft delete agent - - Archive all sessions - - Clear memories (or archive) - - Remove from channels - -- [x] **5.2.2** Add confirmation dialog in frontend — ✅ AgentSettingsPanel.vue has delete confirmation via shared ConfirmationDialog component. Shows warning before deletion. - ---- - -## Phase 6: Database Seeding - -> **Why:** Seeders provide initial data for development and testing. Capabilities must be seeded before agents can be configured. Agent seeders create demo agents for testing the system. - -### 6.1 Create Seeders - -- [x] **6.1.1** Create `CapabilitySeeder` ← depends on: [1.4.1] — ✅ Superseded: Capabilities are defined in ToolRegistry::TOOL_MAP as a static registry. No database seeder needed — tools are code-defined, not DB-seeded. - - **What:** Seed the capabilities table with default tools - - **Why:** Capabilities are system-defined, not user-created. This seeder creates the tools that agents can be assigned. - - **Context:** Each capability has default enabled/approval settings. Tool kind (from OpenClaw) should also be set. - - Seed 6 default capabilities: - - Code execution (enabled, no approval, kind: execute) - - File operations (enabled, no approval, kind: edit) - - Git operations (enabled, no approval, kind: execute) - - API requests (enabled, no approval, kind: fetch) - - Database access (enabled, requires approval, kind: execute) - - Production deployment (disabled, requires approval, kind: execute) - -- [x] **6.1.2** Create `AgentConfigurationSeeder` ← depends on: [6.1.1] — ✅ Superseded: Agent config lives on User model fields (is_agent, agent_type, emoji, etc.) + identity documents. UserSeeder creates agents with config. No separate config seeder needed. - - **What:** Create agent configurations for demo/test agents - - **Why:** Developers need agents to test with. Creates pre-configured agents with meaningful personalities and instructions. - - **Context:** Existing seeded agents (Atlas, Echo, Nova, etc.) need configurations. Each agent type should have appropriate capabilities. - - Create configurations for existing seeded agents (Atlas, Echo, Nova, Pixel, Logic, Scout) - - Set default personality and instructions for each type - - Assign appropriate capabilities - -- [x] **6.1.3** Create `AgentSettingsSeeder` ← depends on: [6.1.2] — ✅ Superseded: Agent settings stored in AppSetting model with defaults in config. No separate settings seeder needed. - - **What:** Create default settings for each agent - - **Why:** Agents need settings to operate. This seeder creates sensible defaults for development. - - **Context:** Supervised mode is safest for development. Include OpenClaw settings with reasonable defaults. - - Create default settings for each agent - - Behavior mode: supervised - - Cost limit: 100 - - Reset policy: daily at 4am - - Security mode: allowlist (OpenClaw default) - - Ask mode: on-miss (OpenClaw default) - -### 6.2 Run Seeders - -- [x] **6.2.1** Update `DatabaseSeeder.php` to include new seeders — ✅ Superseded: DatabaseSeeder + UserSeeder handle agent creation. No separate capability/config/settings seeders needed. - - **What:** Register new seeders in the main seeder - - **Why:** Running `php artisan db:seed` should execute all seeders in correct order. - - **Context:** Order matters: Capabilities → AgentConfiguration → AgentSettings (due to foreign keys). - -- [x] **6.2.2** Run `php artisan db:seed` — ✅ Superseded: `php artisan db:seed` works with existing seeders. - - **What:** Execute all seeders to populate database - - **Why:** Creates development data needed to test the system. - - **Context:** Can use `--class` to run specific seeders. Fresh install should run all. - ---- - -## Phase 7: Testing - -> **Why:** Tests ensure the system works correctly and catches regressions. Backend tests verify API contracts and business logic. Frontend tests verify user interactions work as expected. - -### 7.1 Backend Tests - -- [x] **7.1.1** Create `AgentConfigurationTest` feature test ← depends on: [3.1.1] — ✅ AgentControllerTest exists in tests/Feature/ - - **What:** Test agent configuration API endpoints - - **Why:** Configuration is core functionality. Tests ensure CRUD works, authorization prevents unauthorized access, and validation rejects bad data. - - **Context:** Use Laravel's testing helpers. Test as authenticated user and verify cannot access other users' agents. - - Test CRUD operations - - Test authorization (only owners can edit) - - Test validation - -- [x] **7.1.2** Create `AgentCapabilityTest` feature test ← depends on: [3.2.1] — ✅ AgentPermissionControllerTest + AgentPermissionServiceTest + ToolRegistryTest exist - - **What:** Test capability management API - - **Why:** Capabilities control what tools agents can use. Tests ensure assignment works and bulk updates don't break relationships. - - **Context:** Test both individual capability toggle and bulk update. Verify pivot table data (enabled, requires_approval) persists correctly. - - Test capability assignment - - Test bulk updates - -- [ ] **7.1.3** Create `AgentSettingsTest` feature test — Not built yet. Settings managed via AppSetting model + SettingController. - - **What:** Test settings API endpoints - - **Why:** Settings control agent behavior. Tests ensure all settings save correctly and enum validation rejects invalid values. - - **Context:** Include tests for OpenClaw settings (security_mode, ask_mode). Verify JSON fields (reset_policy) serialize/deserialize correctly. - - Test settings updates - - Test enum validation - -- [x] **7.1.4** Create `AgentSessionTest` feature test — ✅ Superseded: ChannelConversationLoaderTest covers conversation loading, compaction triggers, and message retrieval. - - **What:** Test session management API - - **Why:** Sessions are the conversation context. Tests ensure creation, archival, and message retrieval work correctly. - - **Context:** Test pagination for message retrieval. Verify new session creation archives the old one. - - Test session creation - - Test session archival - - Test message retrieval - -- [ ] **7.1.5** Create `AgentMemoryTest` feature test — Not built. Memory services (ChunkingService, EmbeddingService, HybridSearchService) lack dedicated test coverage. - - **What:** Test memory management API - - **Why:** Memories are persistent agent knowledge. Tests ensure CRUD and reset work correctly. - - **Context:** Reset is destructive - test that it clears all memories. Test category enum validation. - - Test memory CRUD - - Test memory reset - -### 7.2 Frontend Tests - -> **Why:** Frontend tests ensure the UI works correctly. Component tests verify individual components, integration tests verify they work together. - -- [ ] **7.2.1** Test Agent/Show.vue renders all tabs - - **What:** Verify the main agent page renders all 7 tabs correctly - - **Why:** Page structure is foundational. If tabs don't render, nothing else works. - - **Context:** Should test tab switching works and correct content appears for each tab. - -- [ ] **7.2.2** Test personality editor save/preview - - **What:** Test markdown editing and preview functionality - - **Why:** Personality editor is a primary user interaction. Save must work, preview must render markdown. - - **Context:** Test markdown rendering, save button calls API, success/error feedback appears. - -- [ ] **7.2.3** Test instructions editor save/preview - - **What:** Test instructions editing functionality - - **Why:** Same importance as personality editor. Instructions define agent behavior. - - **Context:** Same test patterns as personality editor. - -- [ ] **7.2.4** Test capabilities toggle - - **What:** Test capability enable/disable and approval toggle - - **Why:** Capability toggles control tool access. Must work reliably. - - **Context:** Test toggle state changes, save persists changes, list refreshes correctly. - -- [ ] **7.2.5** Test memory add/delete - - **What:** Test adding and deleting memory entries - - **Why:** Memory management is important for agent knowledge. Add/delete must work correctly. - - **Context:** Test form submission, new entry appears in list, delete removes entry. - -- [ ] **7.2.6** Test settings changes - - **What:** Test all settings form fields and save - - **Why:** Settings affect agent behavior. All fields must persist correctly. - - **Context:** Test each setting type: enums, numbers, JSON (reset_policy), toggles. - -- [ ] **7.2.7** Test dark mode on all components - - **What:** Verify all components display correctly in dark mode - - **Why:** Dark mode is expected feature. Broken dark mode is poor UX. - - **Context:** Use Tailwind's dark: prefix. Verify text contrast, background colors, borders. - ---- - -## Phase 8: Future Enhancements (Post-MVP) - -> **Why:** These features are valuable but not required for MVP. They enhance the system with advanced capabilities like auto-compaction, subagent spawning, skills, and webhooks. - -### 8.1 Vector Memory Search -- [x] **8.1.1** ~~Install pgvector extension~~ → Moved to Phase 1.5.1.1 -- [x] **8.1.2** ~~Create `memory_embeddings` table~~ → Moved to Phase 1.5.1.2 -- [x] **8.1.3** ~~Implement embedding generation service~~ → Moved to Phase 3.7.1.1 -- [x] **8.1.4** ~~Create semantic search endpoint~~ → Moved to Phase 3.7.4.2 -- [x] **8.1.5** ~~Add search UI to memory view~~ → Moved to Phase 4.2.9 - -### 8.2 Context Management - -- [x] **8.2.1** ~~Implement context pruning service~~ → Moved to Phase 3.6.3.1 - -- [x] **8.2.2** Implement auto-compaction — ✅ `app/Services/Memory/ConversationCompactionService.php`. Triggered by ChannelConversationLoader when token count exceeds threshold (configurable in `config/memory.php` under `compaction`). Summarizes older messages via LLM, stores ConversationSummary model, preserves recent messages. CompactConversationJob for async execution. - -- [x] **8.2.3** ~~Add pre-compaction memory flush~~ → Moved to Phase 3.6.2.1 - -- [ ] **8.2.4** Add compaction history view - - **What:** UI to view past compaction events and their summaries - - **Why:** Users may want to see what was compacted and when. Helps understand what context was lost. - - **Context:** Store compaction summaries in session_messages with type 'compaction'. Display in a timeline view. - -### 8.3 Subagent Spawning UI - -> **Why:** Backend supports subagents (Phase 3.5.3.4) but needs frontend UI. These components let users spawn agents and monitor their work. - -- [ ] **8.3.1** Create spawn dialog component - - **What:** Modal dialog for spawning a subagent - - **Why:** Users need to select which agent to spawn and provide a task description. - - **Context:** Should show available agents (based on spawn permissions) and task input field. - -- [ ] **8.3.2** Add spawn button to agent page - - **What:** Button in agent page to trigger spawn dialog - - **Why:** Entry point for spawning subagents from the current agent. - - **Context:** Button should be disabled if agent has no spawn permissions. - -- [ ] **8.3.3** Show running subagents list - - **What:** Component showing currently running subagent tasks - - **Why:** Users need to monitor spawned agents - see progress, status, and cancel if needed. - - **Context:** Real-time updates via WebSocket. Show status badge (running, success, error). - -- [ ] **8.3.4** Add subagent result announcement - - **What:** Notification when a subagent completes its task - - **Why:** Users need to know when spawned work is done. Announce results in the parent agent's chat. - - **Context:** WebSocket notification triggers toast and chat announcement. - -### 8.4 Skills System - -> **Why:** Skills are reusable, composable agent capabilities. OpenClaw has a sophisticated skill system with tiering. This enables "slash commands" and skill-based agent composition. - -- [ ] **8.4.1** Create skills database tables - - **What:** Tables for skill definitions, versions, and agent-skill assignments - - **Why:** Skills need persistent storage. Versioning enables skill updates without breaking existing agents. - - **Context:** OpenClaw has skill tiers: bundled (system), managed (installed), workspace (custom). - -- [ ] **8.4.2** Create skills management UI - - **What:** UI for browsing, installing, and managing skills - - **Why:** Users need to discover available skills and assign them to agents. - - **Context:** Similar to VS Code extension marketplace but for agent skills. - -- [ ] **8.4.3** Implement skill tiering (workspace > managed > bundled) - - **What:** Priority system for skill resolution - - **Why:** Users may want to override bundled skills with custom versions. Tiering ensures custom skills take precedence. - - **Context:** OpenClaw resolution: workspace (highest) → managed → bundled (lowest). - -- [ ] **8.4.4** Add skill invocation tracking - - **What:** Track which skills are used and how often - - **Why:** Usage analytics help users understand agent behavior. Useful for optimization and debugging. - - **Context:** Store invocation counts, last used timestamp, average execution time. - -### 8.5 Webhooks & External Integrations - -> **Why:** Agents need to be triggered by external events (GitHub commits, Slack messages, etc.). Webhooks enable event-driven agent activation. - -- [x] **8.5.1** Already implemented basic UI (Integrations.vue) — ✅ Integrations.vue exists with Telegram and Plausible configured - - **What:** UI skeleton for integrations exists - - **Why:** Placeholder for webhook management interface. - -- [ ] **8.5.2** Create webhooks database table - - **What:** Table for webhook endpoint definitions - - **Why:** Store webhook URLs, secrets, target agents, and event filters. - - **Context:** Each webhook has a unique URL, secret for verification, and maps to an agent + action. - -- [ ] **8.5.3** Implement webhook processing logic - - **What:** Controller and service for receiving and processing webhooks - - **Why:** Incoming webhooks need to be verified (signature), parsed, and routed to the appropriate agent. - - **Context:** Support common webhook formats (GitHub, Slack, generic). Queue for async processing. - -- [ ] **8.5.4** Add webhook testing UI - - **What:** UI for testing webhook endpoints - - **Why:** Users need to verify webhooks work before deploying. Test sends a sample payload and shows result. - - **Context:** Similar to Stripe's webhook testing. Show recent webhook deliveries and their status. - ---- - -## Phase 3.8: Plugin System - -> **Why:** OpenClaw's plugin architecture enables extensibility without modifying core code. Plugins add tools, channels, providers, skills, and more. OpenCompany should support the same extensibility via Laravel packages. - -### 3.8.1 Plugin Infrastructure - -- [ ] **3.8.1.1** Create `plugins` migration - - **What:** Table to track installed plugins and their configuration - - **Why:** Need to know which plugins are installed, enabled, and how they're configured. - - Fields: `id`, `name`, `version`, `description`, `author`, `enabled`, `capabilities` (JSON), `config` (JSON), `slot` (nullable enum: memory/sandbox/browser), `created_at`, `updated_at` - -- [ ] **3.8.1.2** Create `Plugin` model - - **What:** Eloquent model for plugin management - - **Why:** Central model for plugin CRUD and capability resolution. - - Relationships: `hasMany(PluginCapability)` - - Scopes: `enabled()`, `withCapability($type)`, `forSlot($slot)` - -- [ ] **3.8.1.3** Create `PluginRegistryService` - - **What:** Service that discovers, validates, and registers plugins - - **Why:** Centralized plugin lifecycle management. Handles discovery chain: config → workspace → global → bundled. - - **Context:** Plugins are Laravel packages with service providers. The registry tracks which capabilities each plugin provides. - - `discover()` - scan for available plugins - - `register(Plugin $plugin)` - register plugin capabilities - - `validateConfig(Plugin $plugin)` - validate plugin config against schema - - `resolveSlot(string $slot)` - get the active plugin for an exclusive slot - -### 3.8.2 Plugin Capabilities - -- [ ] **3.8.2.1** Create capability interfaces - - **What:** PHP interfaces for each plugin capability type - - **Why:** Type-safe contracts that plugins must implement. Ensures consistency across all plugins. - - **Context:** OpenClaw supports 10 capability types. Start with the most useful ones. - ```php - interface ProvidesTools { public function tools(): array; } - interface ProvidesChannels { public function channels(): array; } - interface ProvidesProviders { public function providers(): array; } - interface ProvidesSkills { public function skills(): array; } - interface ProvidesHooks { public function hooks(): array; } - ``` - -- [ ] **3.8.2.2** Create exclusive slot system - - **What:** Logic to enforce that only one plugin can claim each exclusive slot - - **Why:** Some capabilities (memory backend, sandbox) can only have one active implementation. - - **Context:** If multiple plugins claim the same slot, highest-precedence one wins. - -### 3.8.3 Plugin Management API - -- [ ] **3.8.3.1** Create `PluginController` - - **What:** API endpoints for managing plugins - - **Why:** Frontend needs to list, enable/disable, and configure plugins. - - `GET /api/plugins` - list all plugins - - `POST /api/plugins/{id}/enable` - enable plugin - - `POST /api/plugins/{id}/disable` - disable plugin - - `PUT /api/plugins/{id}/config` - update plugin config - - `POST /api/plugins/discover` - trigger plugin discovery - -- [ ] **3.8.3.2** Create plugin management UI - - **What:** Vue component for plugin management - - **Why:** Users need to see installed plugins, toggle them, and configure settings. - - **Context:** Similar to VS Code extension panel. Show capabilities, slot claims, config fields. - ---- - -## Phase 3.9: Multi-Device Support - -> **Why:** OpenClaw's gateway enables agents to be accessed from any device (iOS, Android, macOS, web). A node registry tracks connected devices and routes tasks based on device capabilities. OpenCompany should support similar multi-device access. - -### 3.9.1 Node Registry - -- [ ] **3.9.1.1** Create `connected_devices` migration - - **What:** Table to track connected devices/clients - - **Why:** Need to know which devices are connected, their capabilities, and health status. - - Fields: `id`, `user_id` (FK), `device_id` (unique string), `platform` (enum: ios/android/macos/web/desktop), `device_name`, `capabilities` (JSON), `last_heartbeat_at`, `is_online`, `metadata` (JSON), `created_at`, `updated_at` - -- [ ] **3.9.1.2** Create `ConnectedDevice` model - - **What:** Eloquent model for device management - - **Why:** Track device state and enable capability-based routing. - - Relationships: `belongsTo(User)` - - Scopes: `online()`, `withCapability($cap)`, `forPlatform($platform)` - - Methods: `heartbeat()`, `markOffline()`, `hasCapability($cap)` - -- [ ] **3.9.1.3** Create WebSocket heartbeat system - - **What:** Periodic heartbeat via Reverb to track device health - - **Why:** Need to detect disconnected devices. Devices send heartbeat every 30 seconds. - - **Context:** Uses existing Laravel Reverb WebSocket. Add presence channel for device tracking. - ```php - // routes/channels.php - Broadcast::channel('devices.{userId}', function ($user, $userId) { - return $user->id === $userId ? [ - 'id' => $user->id, - 'name' => $user->name, - 'device' => request()->header('X-Device-Id'), - ] : null; - }); - ``` - -### 3.9.2 Device-Aware Routing - -- [ ] **3.9.2.1** Create `DeviceRouter` service - - **What:** Service that routes notifications and tasks to the right device - - **Why:** Some tasks need specific device capabilities (e.g., browser tasks → desktop device). - - `routeNotification($user, $notification)` - route to best device - - `routeTask($user, $task)` - route to device with required capabilities - - `broadcastToAll($user, $event)` - broadcast to all connected devices - -- [ ] **3.9.2.2** Create device status dashboard component - - **What:** Vue component showing connected devices and their status - - **Why:** Users need to see which devices are connected, online, and their capabilities. - - Real-time status via WebSocket - - Show platform icon, device name, last activity, capabilities - -### 3.9.3 Cross-Platform Sync - -- [ ] **3.9.3.1** Create sync event system - - **What:** Broadcast state changes to all connected devices - - **Why:** Agent state (messages, tasks, approvals) must be consistent across devices. - - **Context:** Use existing Reverb channels. Add sync events for: new messages, task updates, approval requests, agent status changes. - ---- - -## Phase 3.10: Cron & Scheduled Tasks - -> **Why:** OpenClaw supports cron-based autonomous agent execution. Agents can perform tasks on a schedule without human triggers — daily summaries, periodic monitoring, scheduled reports. OpenCompany should support the same autonomous agent capabilities. - -### 3.10.1 Cron Job Infrastructure - -- [ ] **3.10.1.1** Create `agent_cron_jobs` migration - - **What:** Table for scheduled agent tasks - - **Why:** Store cron job definitions with schedule, task prompt, and delivery configuration. - - Fields: `id`, `agent_id` (FK to users), `name`, `schedule` (cron expression), `task` (TEXT - prompt), `delivery_mode` (enum: announce/none/post), `target_channel_id` (nullable FK), `enabled`, `one_shot`, `last_run_at`, `last_result` (JSON), `created_at`, `updated_at` - -- [ ] **3.10.1.2** Create `AgentCronJob` model - - **What:** Eloquent model for cron job management - - **Why:** Central model for cron CRUD and execution tracking. - - Relationships: `belongsTo(User, 'agent_id')`, `belongsTo(Channel, 'target_channel_id')` - - Scopes: `enabled()`, `forAgent($agentId)`, `dueNow()` - - Methods: `isDue()`, `markRan()`, `shouldAutoDelete()` - -- [ ] **3.10.1.3** Create `ExecuteAgentCronJob` queue job - - **What:** Job that executes a scheduled agent task - - **Why:** Cron jobs should run asynchronously on queue workers, with isolated sessions. - - **Context:** Creates an isolated session (separate from conversation context) so cron execution doesn't pollute chat history. - ```php - class ExecuteAgentCronJob implements ShouldQueue - { - public function handle(): void - { - // Create isolated session for cron execution - $session = AgentSession::create([ - 'session_key' => "cron:{$this->cronJob->id}:" . now()->timestamp, - 'status' => 'active', - ]); - - $agent = OpenCompanyAgent::for($this->cronJob->agent); - $response = $agent->prompt($this->cronJob->task); - - // Deliver based on mode - match ($this->cronJob->delivery_mode) { - 'announce' => $this->announceResult($response), - 'post' => $this->postToChannel($response), - 'none' => null, - }; - - // Auto-delete one-shot jobs - if ($this->cronJob->one_shot) { - $this->cronJob->delete(); - } - - $this->cronJob->update([ - 'last_run_at' => now(), - 'last_result' => ['response' => (string) $response], - ]); - } - } - ``` - -### 3.10.2 Scheduler Integration - -- [ ] **3.10.2.1** Register cron jobs with Laravel scheduler - - **What:** Load agent cron jobs from DB and register with `Schedule` - - **Why:** Laravel's scheduler handles cron expression evaluation, overlap prevention, and single-server execution. - ```php - // app/Console/Kernel.php - protected function schedule(Schedule $schedule): void - { - AgentCronJob::where('enabled', true)->each(function ($job) use ($schedule) { - $schedule->job(new ExecuteAgentCronJob($job)) - ->cron($job->schedule) - ->withoutOverlapping() - ->onOneServer(); - }); - } - ``` - -- [ ] **3.10.2.2** Create cron job execution history migration - - **What:** Table to track cron job execution history - - **Why:** Need audit trail for scheduled executions. Track success/failure, runtime, token usage. - - Fields: `id`, `cron_job_id` (FK), `status` (enum: success/error/timeout), `started_at`, `completed_at`, `token_count`, `result` (JSON), `error` (TEXT nullable) - -### 3.10.3 Cron Management API & UI - -- [ ] **3.10.3.1** Create `AgentCronJobController` - - **What:** API endpoints for managing agent cron jobs - - **Why:** Frontend needs CRUD for cron jobs plus manual trigger and history view. - - `GET /api/agents/{id}/cron-jobs` - list cron jobs - - `POST /api/agents/{id}/cron-jobs` - create cron job - - `PUT /api/cron-jobs/{id}` - update cron job - - `DELETE /api/cron-jobs/{id}` - delete cron job - - `POST /api/cron-jobs/{id}/trigger` - manual trigger - - `GET /api/cron-jobs/{id}/history` - execution history - -- [ ] **3.10.3.2** Create cron management Vue component - - **What:** UI for managing scheduled agent tasks - - **Why:** Users need to create, edit, enable/disable, and monitor cron jobs. - - **Context:** Include cron expression helper (common presets: daily, hourly, weekly, etc.), delivery mode selector, and execution history log. - ---- - -### 3.11 Heartbeat System - -- [ ] **3.11.1** Add heartbeat fields to `agent_configs` migration ← depends on: [1.1.3] - - **What:** Migration adding `heartbeat_prompt`, `heartbeat_enabled`, `heartbeat_interval`, `heartbeat_active_start`, `heartbeat_active_end`, `heartbeat_timezone` to `agent_configs` table - - **Why:** Agents need configurable heartbeat settings. OpenClaw stores this in HEARTBEAT.md; we use DB fields for admin UI editability. - -- [ ] **3.11.2** Create `HeartbeatJob` ← depends on: [3.11.1, 3.1.1] - - **What:** Queue job that runs an agent's heartbeat check: loads prompt, calls AI SDK, posts results to channel (or skips if ack-only) - - **Why:** This is the core heartbeat execution. Adapted from OpenClaw's heartbeat-runner.ts. - - Active hours gating via `between()` check - - Ack suppression for responses under 30 chars or containing `HEARTBEAT_OK` - -- [ ] **3.11.3** Wire scheduler to dispatch heartbeats ← depends on: [3.11.2] - - **What:** Add scheduler entry in `app/Console/Kernel.php` that queries active agents with heartbeat enabled, dispatches `HeartbeatJob` for each - - **Why:** Replaces OpenClaw's Node.js setInterval with Laravel's built-in scheduler. - - Default interval: every 30 minutes (configurable per agent via `heartbeat_interval`) - -- [ ] **3.11.4** Add heartbeat configuration to agent admin UI ← depends on: [3.11.1, 4.1.x] - - **What:** Add heartbeat settings section to Agent/Show.vue Settings tab: enable toggle, prompt textarea, interval select, active hours inputs - - **Why:** Admins need to configure heartbeat behavior per agent without touching the database directly. - ---- - -### 3.12 Agent Execution Loop (Core Agent Brain) - -> **This is the most critical phase.** Without this, agents cannot process messages or execute tasks. All other agent features (memory, heartbeat, sub-agents) depend on this. - -- [x] **3.12.1** Create `AgentPromptBuilder` service ← depends on: [2.1.x, 3.1.1] — ✅ superseded: system prompt assembly built into OpenCompanyAgent using Document-based identity files - - **What:** Service that assembles the system prompt from agent config fields (personality, instructions), user context, tool documentation, and memory. Follows OpenClaw's injection order: identity → personality → user → instructions → tools → memory. - - **Why:** Clean separation of prompt assembly from execution. Handles sub-agent restrictions (only instructions, no personality/user context). - -- [x] **3.12.2** Create `AgentToolExecutor` service ← depends on: [3.5.1.x] — ✅ superseded: tool resolution handled by ToolRegistry.getToolsForAgent() + AgentPermissionService - - **What:** Service that resolves available tools for an agent (based on capabilities/permissions), executes tool calls from LLM responses, and returns results. - - **Why:** Adapted from OpenClaw's tool execution loop. Handles the tool call → result → feed back cycle. - - Tool resolution follows permission stack: profile → allow/deny → agent-specific restrictions - -- [x] **3.12.3** Create `ProcessAgentMessageJob` ← depends on: [3.12.1, 3.12.2] — ✅ implemented as AgentRespondJob (core agent brain) - - **What:** The core agent runner job. Dispatched when an agent is mentioned or receives a DM. Loads context, builds prompt, calls AI SDK with streaming, processes tool calls, stores response, broadcasts via Reverb. - - **Why:** This is the "agent brain" — the single most important piece of the system. Replaces OpenClaw's `runEmbeddedPiAgent()`. - - Queue: `agent-{id}` (serialized per agent to prevent race conditions) - - Includes: conversation history loading, streaming response broadcast, post-processing (memory indexing, compaction check) - -- [x] **3.12.4** Wire message controller to dispatch agent runs ← depends on: [3.12.3] — ✅ MessageController dispatches AgentRespondJob on @mention and DM - - **What:** Update `MessageController::store()` to detect @mentions of agents and dispatch `ProcessAgentMessageJob`. Also handle DM channels where the other participant is an agent. - - **Why:** This is the trigger that makes agents respond to messages. - - Detection: check message content for @mentions matching agent names, or check if channel is a DM with an agent member - -- [x] **3.12.5** Add response streaming via Reverb ← depends on: [3.12.3] — ✅ streaming via Reverb WebSocket (MessageSent, AgentStatusUpdated, TypingIndicator events) - - **What:** Create `AgentTyping` broadcast event for partial response streaming. Clients receive chunks as the agent generates them, showing real-time typing. - - **Why:** UX requirement — users should see agents "typing" in real-time, not wait for complete responses. - - Broadcast on channel: `channel.{id}` - - Event data: `{ agentId, chunk, isComplete }` - -- [ ] **3.12.6** Add model failover support ← depends on: [3.12.3] - - **What:** Configure primary + fallback models per agent in `agent_configs`. `ProcessAgentMessageJob` tries primary first, falls back to alternatives on failure. - - **Why:** Adapted from OpenClaw's failover chain. Ensures agents stay operational if a provider has an outage. - - Config: `model_primary`, `model_fallbacks` (JSON array) on agent_configs - ---- - -## Verification Checklist - -### Functional Verification -- [x] Navigate to `/agent/{id}` - page loads without errors — ✅ Agent/Show.vue with Inertia route -- [x] All 7 tabs render correctly (Overview, Personality, Instructions, Capabilities, Memory, Activity, Settings) — ✅ tabs: Overview, Tasks, Identity, Capabilities, Activity, Settings -- [ ] Edit personality → saves to database → persists on refresh -- [ ] Edit instructions → saves to database → persists on refresh -- [ ] Toggle capability → saves to database → persists on refresh -- [ ] Add memory entry → appears in list → persists on refresh -- [ ] Delete memory entry → removed from list -- [ ] Change settings → saves to database → persists on refresh -- [ ] Start new session → creates new session → clears context -- [ ] Pause agent → status changes → agent stops working -- [ ] Resume agent → status changes → agent can work again - -### UI Verification -- [ ] Dark mode works on all components -- [ ] Loading states show skeleton placeholders -- [ ] Error states show appropriate messages -- [ ] Mobile responsive layout works -- [ ] Markdown preview renders correctly -- [ ] Context usage progress bar updates - -### Data Integrity -- [ ] Agent config belongs to correct user -- [ ] Session messages ordered by timestamp -- [ ] Memory entries have correct categories -- [ ] Settings have valid enum values - -### OpenClaw Features Verification -- [ ] Pre-compaction flush runs before reaching reserve threshold -- [ ] Tool kinds correctly inferred and affect approval logic -- [ ] Allowlist patterns matched and tracked (last_used_at updates) -- [ ] Session pruning triggers after TTL expires -- [ ] NO_REPLY messages suppressed from UI -- [ ] Hybrid search returns relevant results (vector + FTS) -- [ ] Embedding cache prevents duplicate API calls -- [ ] Reserve tokens enforced during compaction -- [ ] Security modes work correctly (deny/allowlist/full) -- [ ] Ask modes work correctly (off/on-miss/always) - ---- - -## File Summary - -### Packages to Install -```bash -# Required -composer require laravel/ai - -# Optional -composer require laravel/mcp # MCP server for external AI clients -composer require laravel/horizon # Queue monitoring dashboard -``` - -### Migrations to Create (17 files) -``` -database/migrations/ -├── xxxx_create_agent_configurations_table.php -├── xxxx_create_capabilities_table.php -├── xxxx_create_agent_capabilities_table.php -├── xxxx_create_agent_settings_table.php # Includes OpenClaw fields -├── xxxx_create_agent_sessions_table.php # Includes OpenClaw fields -├── xxxx_create_agent_session_messages_table.php # Includes OpenClaw fields -├── xxxx_create_agent_memories_table.php -├── xxxx_create_agent_memory_daily_logs_table.php -├── xxxx_create_subagent_spawn_permissions_table.php -├── xxxx_create_subagent_runs_table.php -├── xxxx_create_agent_tool_allowlist_table.php # OpenClaw -├── xxxx_create_memory_chunks_table.php # OpenClaw -├── xxxx_create_embedding_cache_table.php # OpenClaw -├── xxxx_create_plugins_table.php # Plugin system -├── xxxx_create_connected_devices_table.php # Multi-device -├── xxxx_create_agent_cron_jobs_table.php # Cron system -└── xxxx_create_cron_job_history_table.php # Cron history -``` - -### Models to Create (17 files) -``` -app/Models/ -├── AgentConfiguration.php -├── Capability.php -├── AgentCapability.php -├── AgentSettings.php -├── AgentSession.php -├── AgentSessionMessage.php -├── AgentMemory.php -├── AgentMemoryDailyLog.php -├── SubagentSpawnPermission.php -├── SubagentRun.php -├── AgentToolAllowlist.php # OpenClaw -├── MemoryChunk.php # OpenClaw -├── EmbeddingCache.php # OpenClaw -├── MemoryCollection.php # QMD collections -├── Plugin.php # Plugin system -├── ConnectedDevice.php # Multi-device -├── AgentCronJob.php # Cron system -└── CronJobHistory.php # Cron history -``` - -### Controllers to Create (12 files) -``` -app/Http/Controllers/Api/ -├── AgentConfigurationController.php -├── AgentCapabilityController.php -├── CapabilityController.php -├── AgentSettingsController.php -├── AgentSessionController.php -├── AgentMemoryController.php -├── SubagentController.php -├── MemorySearchController.php # OpenClaw -├── MemoryCollectionController.php # QMD collection management -├── AllowlistController.php # OpenClaw -├── PluginController.php # Plugin system -├── ConnectedDeviceController.php # Multi-device -└── AgentCronJobController.php # Cron system -``` - -### Agent + Tools to Create -``` -app/Agents/ -├── OpenCompanyAgent.php # Single dynamic agent class -├── DynamicProviderResolver.php # Resolves provider from IntegrationSetting -├── ToolRegistry.php # Maps DB capabilities to tool classes -└── Tools/ - ├── Internal/ # Workspace tools - │ ├── SearchDocuments.php - │ ├── ReadDocument.php - │ ├── UpdateDocument.php - │ ├── CreateListItem.php - │ ├── UpdateListItem.php - │ ├── SendMessage.php - │ ├── CreateTaskStep.php - │ ├── CreateApproval.php - │ └── QueryDataTable.php - ├── External/ # SDK built-in wrappers - │ ├── WebSearch.php - │ └── WebFetch.php - └── Memory/ # Memory tools - ├── SaveMemory.php - └── RecallMemory.php -``` - -### Agent Jobs to Create (9 files) -``` -app/Jobs/Agent/ -├── FetchAgentConfigJob.php -├── ExecuteAgentJob.php -├── CreateApprovalRequestJob.php -├── ExecuteApprovedActionJob.php -├── SaveSessionMessageJob.php -├── MemoryFlushJob.php # OpenClaw -├── PruneSessionJob.php # OpenClaw -└── CheckMemoryFlushJob.php # OpenClaw -``` - -### Agent Services to Create (3 files) -``` -app/Services/Agent/ -├── AgentTaskService.php -├── AgentSessionResetService.php -└── SubagentSpawnService.php -``` - -### Services to Create (16 files) -``` -app/Services/ -├── AgentToolRegistry.php -├── AgentPromptBuilder.php # System prompt assembly (OpenClaw workspace files mapping) -├── AgentToolExecutor.php # Tool resolution + execution loop -├── ContextWindowGuard.php # OpenClaw -├── MemoryFlushService.php # OpenClaw -├── SessionPruningService.php # OpenClaw -├── ToolKindClassifier.php # OpenClaw -├── ExecutionApprovalService.php # OpenClaw -├── EmbeddingService.php # OpenClaw -├── EmbeddingCacheService.php # OpenClaw -├── ChunkingService.php # OpenClaw -├── MemoryIndexService.php # OpenClaw -├── HybridMemorySearch.php # OpenClaw -├── HybridDocumentSearch.php # QMD-enhanced hybrid search -├── MemorySearchScopeGuard.php # QMD scope rules -├── PluginRegistryService.php # Plugin system -├── DeviceRouter.php # Multi-device -└── CronExecutionService.php # Cron system -``` - -### Frontend Files to Update (2 files) -``` -resources/js/ -├── composables/useApi.ts (add ~30 new methods) -└── Pages/Agent/Show.vue (replace mocks with API calls) -``` - -### Frontend Components to Create (3 files - OpenClaw) -``` -resources/js/Components/agents/ -├── AllowlistManager.vue -├── MemorySearchInput.vue -└── SecurityModeSelector.vue -``` - -### Seeders to Create (3 files) -``` -database/seeders/ -├── CapabilitySeeder.php -├── AgentConfigurationSeeder.php -└── AgentSettingsSeeder.php -``` - -### Jobs to Create -``` -app/Jobs/ -├── IndexAgentMemoryJob.php # Index single document into memory_chunks -├── ExportSessionTranscriptJob.php # Export session messages to markdown document -├── PeriodicReindexJob.php # Scheduled re-index (every 5 minutes) -├── EmbeddingRefreshJob.php # Scheduled embedding refresh (hourly) -├── ReindexAgentJob.php # Full agent re-index on demand -├── HeartbeatJob.php # Periodic agent heartbeat check -└── ProcessAgentMessageJob.php # Core agent brain — message processing + tool execution -``` - -### Config Files to Create -``` -config/ -└── memory.php # QMD search parameters & indexing config -``` - ---- - -## Implementation Priority Order - -**Day 1: Package Setup** -0. Install & configure packages (0.1.1 - 0.2.2) - -**Week 1: Foundation** -1. Database migrations (1.1.1 - 1.4.3) -2. Memory search infrastructure (1.5.1 - 1.5.4) ← includes QMD collections & clamping -3. Core models (2.1.1 - 2.4.2) - -**Week 2: API Layer** -4. Controllers (3.1.1 - 3.7.1) -5. Seeders (6.1.1 - 6.2.2) - -**Week 3: Agent Execution Jobs** -6. AI Tools (3.5.1.1 - 3.5.1.3) -7. Agent Jobs (3.5.2.1 - 3.5.2.10) -8. Agent Services (3.5.3.1 - 3.5.3.4) -9. Queue Infrastructure (3.5.4.1 - 3.5.4.3) - -**Week 4: Context Management (OpenClaw)** -10. Context Window Guard (3.6.1) -11. Pre-Compaction Memory Flush (3.6.2) -12. Session Pruning (3.6.3) -13. Tool Kind Classification (3.6.4) -14. Execution Approval System (3.6.5) - -**Week 5-6: Hybrid Memory Search + QMD Features (OpenClaw)** -15. Embedding Service (3.7.1) -16. Chunking Service (3.7.2) -17. Memory Indexing (3.7.3) -18. Hybrid Search (3.7.4) -19. Collection Management (3.7.5) -20. Session Transcript Indexing (3.7.6) -21. Periodic Re-Indexing (3.7.7) -22. Scope Rules & Security (3.7.8) -23. Enhanced Search with QMD Features (3.7.9) - -**Week 7: Frontend Integration** -24. useApi methods (4.1.1 - 4.1.8) -25. Component connections (4.2.1 - 4.2.9) -26. Page updates (4.3.1 - 4.3.2) - -**Week 8: Heartbeat System** -27. Heartbeat migration (3.11.1) -28. HeartbeatJob (3.11.2) -29. Scheduler wiring (3.11.3) -30. Heartbeat admin UI (3.11.4) - -**Week 9: Agent Brain** -31. AgentPromptBuilder service (3.12.1) -32. AgentToolExecutor service (3.12.2) -33. ProcessAgentMessageJob (3.12.3) -34. Wire message controller (3.12.4) -35. Response streaming via Reverb (3.12.5) -36. Model failover support (3.12.6) - -**Week 10: Polish & Testing** -37. Agent control actions (5.1.1 - 5.2.2) -38. Testing (7.1.1 - 7.2.7) - -**Post-MVP: Enhancements** -39. Subagent spawning (8.3.x) -40. Skills system (8.4.x) -41. Webhooks (8.5.x) -42. Plugin system (3.8.x) -43. Multi-device support (3.9.x) -44. Cron & scheduled tasks (3.10.x) - ---- - -## Status Update (February 2026) - -> **This section reflects what has actually been built vs. what remains from the original plan above. Many phases were implemented organically and differ from the original spec — some items were superseded, others were built differently.** - -### What's Been Built (Completed) - -The following are **done and working** — these can be checked off from the phases above: - -#### Agent Execution Engine (supersedes Phase 3.12) -- [x] `OpenCompanyAgent` — single dynamic agent class with identity file-based system prompts -- [x] `AgentRespondJob` — core agent response lifecycle (LLM call → response → task completion) -- [x] `ExecuteAgentTaskJob` — queue job for agent task execution -- [x] `AgentResumeFromSleepJob` — wake sleeping agents -- [x] `DynamicProviderResolver` — resolves LLM provider/model from `brain` field + IntegrationSettings -- [x] `ChannelConversationLoader` — loads conversation history for agent context -- [x] `AgentChatService` — orchestrates agent chat interactions -- [x] Message controller dispatches agent runs on @mention and DM -- [x] Response streaming via Reverb WebSocket - -#### Agent Tools (supersedes Phase 3.5.1) -- [x] `ToolRegistry` — maps agent permissions to tool class instances (33 tools total) -- [x] `ApprovalWrappedTool` — wraps tools that require approval -- [x] Workspace tools: `SearchDocuments`, `ManageDocument`, `CommentOnDocument` -- [x] Messaging tools: `SendChannelMessage`, `ManageMessage`, `ReadChannel`, `ListChannels` -- [x] List tools: `ManageListItem`, `QueryListItems`, `ManageListStatus` -- [x] Task tools: `CreateTaskStep`, `UpdateCurrentTask` -- [x] Table tools: `ManageTable`, `ManageTableRows`, `QueryTable` -- [x] Calendar tools: `ManageCalendarEvent`, `QueryCalendar` -- [x] Approval tools: `WaitForApproval`, `Wait` -- [x] Integration tools: `SendTelegramNotification`, Plausible suite (8 tools) -- [x] Creative tools: `CreateJpGraphChart`, `RenderSvg` -- [x] Meta tools: `GetToolInfo` - -#### Agent Identity System (supersedes Phase 1.1.1, 2.1.1, 3.1.1) -- [x] Document-based identity (8 `.md` files per agent: IDENTITY, SOUL, USER, AGENTS, TOOLS, MEMORY, HEARTBEAT, BOOTSTRAP) -- [x] `AgentDocumentService` — creates/manages identity file structure per agent -- [x] Identity files API (`GET/PUT /api/agents/{id}/identity-files/{type}`) -- [x] `AgentIdentityFiles.vue` — OpenClaw-style two-panel editor for all 8 files -- [x] BOOTSTRAP.md auto-clear after first agent interaction (`bootstrapped_at` tracking) - -#### Agent Permissions (supersedes Phase 1.1.2-1.1.3, 3.2.1) -- [x] `AgentPermission` model — unified scope-based permissions (tool, channel, folder) -- [x] `AgentPermissionService` — resolves enabled tools, channels, folders, integrations -- [x] `AgentPermissionController` — API for managing all permission types -- [x] UI: tool toggles, channel access, folder access, integration toggles on Agent/Show.vue - -#### Agent Configuration & Settings (partial supersede of Phase 1.1.4, 3.3.1) -- [x] `behavior_mode` on User model (autonomous/supervised/strict) -- [x] `must_wait_for_approval` flag -- [x] `brain` field (provider:model format) with validation -- [x] `sleeping_until` / `sleeping_reason` for sleep/wake cycle -- [x] Settings tab in Agent/Show.vue - -#### Frontend — Agent Detail Page (supersedes Phase 4) -- [x] `Agent/Show.vue` — full agent detail page with tabs: Overview, Tasks, Identity, Capabilities, Activity, Settings -- [x] Real API data (not mocks) for all sections -- [x] `AgentCapabilities.vue` — tool toggles with app grouping -- [x] `AgentSettingsPanel.vue` — behavior mode, brain selector, delete agent -- [x] Task list with step tracking - -#### Core Platform (all working) -- [x] Chat with channels, DMs, threads, reactions, attachments -- [x] Documents with versioning, comments, attachments, folder tree -- [x] Lists (kanban) with custom statuses, templates, automation rules -- [x] Tasks (agent work items) with steps, lifecycle, assignment -- [x] Calendar with recurrence, attendees, iCal feeds, import -- [x] Data Tables with 10 column types, 4 view modes, bulk operations -- [x] Approvals with Telegram forwarding -- [x] Activity feed, notifications, search -- [x] Integrations system (Telegram, Plausible configured) -- [x] Auth (login, register, password reset) - -### What Was Dropped / Superseded - -These items from the original plan are **no longer needed**: - -- ~~`agent_configurations` table~~ → superseded by Document-based identity files -- ~~`agent_settings` table~~ → superseded by fields on `users` table -- ~~`capabilities` table~~ → superseded by `agent_permissions` + `ToolRegistry` -- ~~`stats` table~~ → `StatsController` computes everything dynamically -- ~~`AgentConfiguration` model~~ → deleted (cleanup commit 33a0147) -- ~~`AgentSettings` model~~ → deleted -- ~~`Capability` model~~ → deleted -- ~~`Stat` model~~ → deleted -- ~~`AgentPersonalityEditor.vue`~~ → replaced by `AgentIdentityFiles.vue` -- ~~`AgentInstructionsEditor.vue`~~ → replaced by `AgentIdentityFiles.vue` -- ~~`AgentMemoryView.vue`~~ → replaced by MEMORY.md in identity files -- ~~`CapabilitySeeder`~~ → deleted -- ~~Phase 4.2.1-4.2.4~~ → components replaced by identity file editor - ---- - -## Next Up: Priority Implementation Queue - -> **Ordered by impact vs effort. Each item is a self-contained project.** - -### N1. Sub-Agent Spawning -**Impact:** HIGH | **Effort:** MEDIUM | **Priority:** 1 - -The core "Robo-Company" differentiator. A manager agent spawns worker agents into temporary channels, tracks their work, aggregates results. Foundation already exists (`manager_id` column, `directReports()` relationship). - -- [ ] **N1.1** Create `subagent_spawn_permissions` migration - - Fields: `id`, `parent_agent_id` (FK), `allowed_agents` (JSON), `max_concurrent`, `auto_archive_minutes` -- [ ] **N1.2** Create `subagent_runs` migration - - Fields: `id`, `parent_agent_id`, `child_agent_id`, `task_description`, `label`, `status` (pending/running/success/error/timeout/cancelled), `runtime_config` (JSON), `result` (JSON), `created_at`, `completed_at` -- [ ] **N1.3** Create `SubagentSpawnPermission` and `SubagentRun` models -- [ ] **N1.4** Create `SubagentSpawnService` - - Enforce spawn permissions (allowed_agents, max_concurrent) - - Create ephemeral channel for parent↔child communication - - Dispatch child agent task via queue - - Track parent-child relationship in `subagent_runs` - - Handle timeout and cancellation -- [ ] **N1.5** Create `SpawnSubagent` agent tool - - Allows manager agents to spawn workers via tool call - - Parameters: child_agent_id, task_description, timeout_minutes - - Returns run ID for tracking -- [ ] **N1.6** Create `SubagentController` API - - `GET /api/agents/{id}/spawn-permissions` — get spawn permissions - - `PUT /api/agents/{id}/spawn-permissions` — update permissions - - `POST /api/agents/{id}/spawn` — spawn subagent - - `GET /api/agents/{id}/subagent-runs` — list runs - - `POST /api/subagent-runs/{id}/cancel` — cancel running subagent -- [ ] **N1.7** Frontend: spawn dialog, running subagents list, result announcements - - Spawn button on agent page (disabled if no spawn permissions) - - Real-time status updates via WebSocket - - Result announcement in parent agent's chat -- [ ] **N1.8** Add spawn permissions UI to Agent/Show.vue Settings tab - -### N2. MCP Server -**Impact:** HIGH | **Effort:** LOW-MEDIUM | **Priority:** 2 - -Expose OpenCompany as an MCP server so external AI tools (Claude Desktop, Cursor, VS Code Copilot) can interact with the workspace. High developer appeal and unique positioning. - -- [ ] **N2.1** Install `laravel/mcp` package - ```bash - composer require laravel/mcp - ``` -- [ ] **N2.2** Create MCP server configuration - - Define available resources: documents, channels, tasks, list items, agents - - Define available tools: search_documents, create_task, send_message, create_list_item, query_table -- [ ] **N2.3** Create MCP tool implementations - - `SearchDocuments` — search workspace documents - - `ReadDocument` — read a specific document - - `CreateListItem` — create kanban items - - `SendMessage` — send messages to channels - - `CreateTask` — create agent tasks - - `QueryTable` — query data tables -- [ ] **N2.4** Create MCP resource providers - - Documents resource (list, read) - - Channels resource (list, read messages) - - Agents resource (list, status) - - Tasks resource (list, read) -- [ ] **N2.5** Add authentication (API token-based) -- [ ] **N2.6** Add MCP server settings to Settings page - - Enable/disable MCP server - - Generate/revoke API tokens - - Show connection URL for clients - -### N3. Memory & Vector Search (Hybrid) -**Impact:** HIGH | **Effort:** HIGH | **Priority:** 3 - -Agents currently have no semantic memory beyond plain MEMORY.md text. Adding pgvector + hybrid search enables agents to recall past conversations and learnings by meaning, not just keywords. - -- [ ] **N3.1** Install pgvector extension - ```sql - CREATE EXTENSION IF NOT EXISTS vector; - ``` -- [ ] **N3.2** Create `memory_chunks` migration - - Fields: `id`, `agent_id`, `source_type` (identity/memory/session), `source_id`, `document_id` (FK), `start_line`, `end_line`, `content_hash`, `text`, `embedding` VECTOR(1536) -- [ ] **N3.3** Create `embedding_cache` migration - - Fields: `provider`, `model`, `content_hash`, `embedding` VECTOR(1536), `dims` - - Primary key on (provider, model, content_hash) -- [ ] **N3.4** Create `EmbeddingService` - - Generate embeddings via OpenAI text-embedding-3-small (or configured provider) - - Batch mode for multiple texts - - Cache layer using `embedding_cache` table -- [ ] **N3.5** Create `ChunkingService` - - Split long texts into ~400 token chunks with 80 token overlap - - Track start/end line numbers - - Content hashing for change detection -- [ ] **N3.6** Create `MemoryIndexService` - - `indexDocument($agentId, $docId)` — chunk + embed single document - - `reindexAgent($agentId)` — full reindex - - Background job dispatch for async indexing -- [ ] **N3.7** Create `HybridMemorySearch` service - - Vector similarity via pgvector `<=>` operator - - Full-text search via `ts_rank` + `to_tsvector` - - Combined scoring: 0.7 vector + 0.3 text - - Result clamping: max 6 results, 700 chars per snippet, 4000 chars total -- [ ] **N3.8** Create `RecallMemory` agent tool - - Allows agents to search their own memory semantically - - Parameters: query, limit, collection (optional) - - Returns ranked results with source citations -- [ ] **N3.9** Create `MemorySearchController` API - - `POST /api/agents/{id}/memory/search` — search agent memory -- [ ] **N3.10** Create `IndexAgentMemoryJob` + `PeriodicReindexJob` - - Index on document create/update (via model observer) - - Periodic reindex every 5 minutes (delta-based) - - Embedding refresh hourly -- [ ] **N3.11** Add Document model observer for auto-indexing - - Trigger on identity/memory document changes - - 15-second debounced dispatch -- [ ] **N3.12** Frontend: `MemorySearchInput.vue` component - - Search input with debounced API calls - - Show matched chunks with source references - -### N4. Test Suite Foundation -**Impact:** MEDIUM | **Effort:** MEDIUM | **Priority:** 4 - -0% test coverage is a risk. Set up PHPUnit feature tests for the most critical API endpoints and establish patterns for future tests. - -- [ ] **N4.1** Configure test environment - - SQLite in-memory for speed - - Test factories for User, Channel, Message, Document, Task, ListItem - - Base test case with auth helpers -- [ ] **N4.2** Create model factories - - `UserFactory` (human + agent variants) - - `ChannelFactory` (public, private, dm) - - `MessageFactory` - - `DocumentFactory` (file + folder) - - `TaskFactory` + `TaskStepFactory` - - `ListItemFactory` + `ListStatusFactory` - - `CalendarEventFactory` - - `DataTableFactory` + `DataTableColumnFactory` + `DataTableRowFactory` -- [x] **N4.3** Agent API tests — ✅ 20+ test files exist in tests/Feature/ and tests/Feature/Tools/ - - `AgentControllerTest` — CRUD agents, identity files, show endpoint - - `AgentPermissionControllerTest` — tool/channel/folder permission toggles - - `AgentChatServiceTest` — message dispatch triggers agent response -- [ ] **N4.4** Core API tests - - `ChannelControllerTest` — CRUD, members, read markers - - `MessageControllerTest` — CRUD, reactions, threads, attachments - - `DocumentControllerTest` — CRUD, versions, comments, folder tree - - `ListItemControllerTest` — CRUD, reorder, status changes - - `TaskControllerTest` — CRUD, lifecycle (start/complete/fail), steps -- [ ] **N4.5** Calendar & Table API tests - - `CalendarEventControllerTest` — CRUD, recurrence, attendees, feeds - - `DataTableControllerTest` — CRUD, columns, rows, bulk operations -- [ ] **N4.6** Integration tests - - `ApprovalFlowTest` — create approval → approve/reject → agent resumes - - `AgentToolExecutionTest` — agent uses tools correctly -- [ ] **N4.7** Set up CI pipeline (GitHub Actions) - - Run tests on push/PR - - Report coverage - -### N5. Quick Wins & Polish -**Impact:** VISIBLE | **Effort:** LOW | **Priority:** 5 - -Small changes that immediately improve the demo experience and align code with documentation. - -- [ ] **N5.1** Seed a `coordinator` agent in `UserSeeder` - - All 7 TypeScript agent types now demonstrated -- [ ] **N5.2** Seed a `private` channel in `ChannelSeeder` - - All channel types visible in demos -- [ ] **N5.3** Add `TaskStep` records to `AgentTaskSeeder` - - Task detail view shows step tracking (action, decision, approval steps) -- [ ] **N5.4** Align `ExternalChannelProvider` type with reality - - Only list implemented providers (telegram, slack) — remove or comment out others -- [ ] **N5.5** Add Data Tables section to `features.md` - - Major built feature gets marketing visibility -- [ ] **N5.6** Add Calendar section to `features.md` - - Built feature gets marketing visibility -- [ ] **N5.7** Update `emergent.md` risk assessment - - Agent execution engine is now built (was listed as CRITICAL gap) - - Update "No Agent Execution Engine" section to reflect current state -- [ ] **N5.8** Rename automation triggers for clarity - - `task_created` → `list_item_created` - - `assign_task` → `assign_list_item` - - Aligns with the Tasks vs ListItems naming convention - -### N6. External Channel: Discord -**Impact:** MEDIUM | **Effort:** MEDIUM | **Priority:** 6 - -Prove the external channel architecture scales beyond Telegram. Discord is where the AI/developer community lives. - -- [ ] **N6.1** Create `DiscordService` (similar to `TelegramService`) - - Bot token management - - Send/receive messages via Discord API - - Channel mapping (Discord channel ↔ OpenCompany channel) -- [ ] **N6.2** Create `DiscordWebhookController` - - Receive Discord gateway events - - Route messages to appropriate channels - - Handle Discord-specific formatting (embeds, mentions) -- [ ] **N6.3** Create Discord integration settings - - Add to `IntegrationSeeder` — bot token, guild ID, channel mappings - - Add Discord configuration UI to Integrations page -- [ ] **N6.4** Create `SendDiscordNotification` agent tool - - Similar to `SendTelegramNotification` - - Support Discord embeds for rich formatting -- [ ] **N6.5** Update `ExternalChannelProvider` type - - Add `discord` to TypeScript union type - - Update channel creation flow to support Discord channels -- [ ] **N6.6** Test bidirectional message flow - - Message in Discord → appears in OpenCompany channel - - Agent response in OpenCompany → appears in Discord diff --git a/docs/planning/kosmokrator-runtime-alignment-checklist.md b/docs/planning/kosmokrator-runtime-alignment-checklist.md new file mode 100644 index 0000000..8d55f21 --- /dev/null +++ b/docs/planning/kosmokrator-runtime-alignment-checklist.md @@ -0,0 +1,49 @@ +# KosmoKrator Runtime Alignment Checklist + +> Implementation checklist for aligning OpenCompany's context, compaction, pruning, and prompt-caching runtime with the reusable parts of KosmoKrator and `prism-relay`. + +## Status + +Phases 1-5 are largely complete. Remaining open items are tracked in the [OpenCompany Plane project](https://plane.gingermedia.biz/kosmokrator/projects/ceaf5d22-612a-42bf-9cc8-0dac054cdf0c/issues/): + +| Issue | Open item | Phase | +|-------|-----------|-------| +| OC-17 | Remove duplicated built-in context-window assumptions | 1 | +| OC-18 | ContextPruner: protect recent user turns and already-truncated entries | 3 | +| OC-19 | CompactionPlan: preserve protected context during compaction | 4 | +| OC-20 | Missing test coverage: compaction plan building + memory extraction | 4 | +| OC-48 | Prompt cache metrics not wired to token metrics path | 5 | + +## Completed work + +### Phase 1 — Relay-Backed Context Windows +- Refactored `ModelContextRegistry` as relay-backed adapter +- Reads defaults from `OpenCompany\PrismRelay\Meta\ProviderMeta` +- `AppSetting` overrides as top-priority layer +- Callers pass provider + model +- Tests: relay exact match, admin override precedence, unknown model fallback + +### Phase 2 — Shared Context Budget (complete) +- `ContextBudget.php` centralizes all threshold math +- Consumed by `ConversationCompactionService`, `MemoryFlushService`, `AgentRespondJob` + +### Phase 3 — Context Pruning (partial) +- `ContextPruner.php` scoped to OpenCompany `read` tools +- Integrated into checkpoint/history loading path +- Minimum savings threshold enforced + +### Phase 4 — Compaction Pipeline (partial) +- `CompactionPlan.php` + `CompactionMemoryExtractor.php` +- Failure counting / circuit breaker +- Durable memory extraction from summaries + +### Phase 5 — Prompt Cache Planning (partial) +- Prompt splitting in `OpenCompanyAgent` +- Extended Laravel AI Prism for multiple system prompts +- Split prompts fed to `Relay::planPromptCache()` + +## Notes + +- Prompt splitting alone is not the same as provider-side prompt caching. +- `prism-relay` contains the planner/orchestrator; OpenCompany mainly needs to use it on its real request path. +- OpenCompany pruning rules are based on OpenCompany tools, not KosmoKrator shell tools. diff --git a/docs/planning/memory-implementation.md b/docs/planning/memory-implementation.md index f6d4d6b..b252a45 100644 --- a/docs/planning/memory-implementation.md +++ b/docs/planning/memory-implementation.md @@ -1,10 +1,9 @@ -# Memory, Compaction & Embeddings — Implementation Guide +# Memory, Compaction & Embeddings — Architecture Reference -> Phased implementation plan for agent memory, document embeddings, conversation compaction, and hybrid search. Each phase is a standalone PR-able unit. Later phases depend on earlier ones. +> Architecture overview for agent memory, document embeddings, conversation compaction, and hybrid search. **Status**: Complete (all 6 phases implemented) **Config**: `config/memory.php` (comprehensive: embedding, chunking, search, reranking, compaction, memory_flush, context_windows, scope) -**Reference**: OpenClaw memory system (`inspiration/openclaw/src/memory/`, `inspiration/openclaw/docs/concepts/memory.md`; updated for v2026.2.9) ### Implementation Summary @@ -36,7 +35,7 @@ Agents have two distinct memory systems, mirroring how human memory works: - **What**: The current conversation messages loaded into the context window - **Scope**: Single channel, single session - **Lifetime**: Ephemeral — exists only while the context window holds it -- **Managed by**: `ChannelConversationLoader` (Phase 4: compaction keeps it within budget) +- **Managed by**: `ChannelConversationLoader` (compaction keeps it within budget) - **Storage**: `messages` table → loaded into context window at prompt time - **Capacity**: Limited by model context window (e.g. 128K tokens) - **When full**: Older messages are summarized into a `ConversationSummary` and replaced @@ -46,12 +45,12 @@ Agents have two distinct memory systems, mirroring how human memory works: - **What**: Explicitly saved facts, preferences, decisions, learnings - **Scope**: Per-agent, accessible across all conversations - **Lifetime**: Permanent — persists until explicitly deleted -- **Managed by**: `SaveMemory` / `RecallMemory` tools (Phase 3) +- **Managed by**: `SaveMemory` / `RecallMemory` tools - **Storage**: `agents/*/memory/YYYY-MM-DD.md` documents → chunked & embedded in `document_chunks` - **Capacity**: Unlimited (PostgreSQL + pgvector) - **Retrieval**: Semantic search (vector similarity), not loaded by default — agents must actively recall -### The Bridge: STM → LTM Promotion (Phase 5) +### The Bridge: STM → LTM Promotion Before conversation compaction discards older messages, the **Memory Flush** gives the agent a silent turn to review what's about to be lost and `save_memory` anything important. This is the automatic promotion path from short-term to long-term memory. @@ -61,28 +60,25 @@ Before conversation compaction discards older messages, the **Memory Flush** giv │ │ │ ┌──────────────────┐ flush ┌──────────────────┐ │ │ │ Short-Term (STM) │ ─────────► │ Long-Term (LTM) │ │ -│ │ │ Phase 5 │ │ │ +│ │ │ │ │ │ │ │ Conversation │ │ Saved memories │ │ │ │ messages in │ │ in document_chunks│ │ │ │ context window │ │ (pgvector) │ │ │ │ │ │ │ │ │ │ Compacted when │ recall │ Recalled via │ │ │ │ approaching limit │ ◄───────── │ semantic search │ │ -│ │ (Phase 4) │ Phase 3 │ on demand │ │ +│ │ │ │ on demand │ │ │ └──────────────────┘ └──────────────────┘ │ │ │ │ ┌──────────────────────────────────────────────────┐ │ │ │ Document Knowledge Base │ │ │ │ Shared workspace docs, indexed with embeddings │ │ │ │ Searchable via SearchDocuments (semantic mode) │ │ -│ │ (Phase 2) │ │ │ └──────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────┘ ``` ---- - -## Phase Dependencies +### Phase Dependencies ``` Phase 1 (Foundation) @@ -92,1846 +88,3 @@ Phase 1 (Foundation) └── Phase 4 (Conversation Compaction) — STM management └── Phase 5 (STM → LTM Flush) — requires Phase 3 + 4 ``` - ---- - -## Phase 1: Foundation — pgvector, Chunking & Embedding Services - -### Goal -Install pgvector, create the storage tables, and build the core chunking and embedding services that all later phases depend on. - -### Database - -**Migration: `create_document_chunks_table`** - -```php -// Enable pgvector extension -DB::statement('CREATE EXTENSION IF NOT EXISTS vector'); - -Schema::create('document_chunks', function (Blueprint $table) { - $table->uuid('id')->primary(); - $table->string('document_id'); // FK → documents.id - $table->text('content'); // The chunk text - $table->string('content_hash', 64); // SHA256 of content (for dedup) - $table->vector('embedding', 1536); // pgvector column - $table->string('collection')->default('general'); // 'general', 'memory', 'identity' - $table->string('agent_id')->nullable(); // Scoping: null = shared, set = agent-private - $table->integer('chunk_index')->default(0); // Position within the source document - $table->jsonb('metadata')->nullable(); // Title, path, dates, etc. - $table->timestamps(); - - $table->foreign('document_id')->references('id')->on('documents')->cascadeOnDelete(); - $table->foreign('agent_id')->references('id')->on('users')->nullOnDelete(); - - // Indexes - $table->index(['collection', 'agent_id']); - $table->index('content_hash'); - $table->index('document_id'); -}); - -// HNSW index for fast cosine similarity search -DB::statement('CREATE INDEX document_chunks_embedding_idx ON document_chunks USING hnsw (embedding vector_cosine_ops)'); -``` - -**Migration: `create_embedding_cache_table`** - -```php -Schema::create('embedding_cache', function (Blueprint $table) { - $table->string('id', 64)->primary(); // SHA256(provider + model + content) - $table->string('provider', 50); // e.g. 'openai' - $table->string('model', 100); // e.g. 'text-embedding-3-small' - $table->vector('embedding', 1536); - $table->timestamps(); -}); -``` - -### Models - -**`app/Models/DocumentChunk.php`** - -```php -class DocumentChunk extends Model -{ - use HasUuids; - - protected $fillable = [ - 'id', 'document_id', 'content', 'content_hash', 'embedding', - 'collection', 'agent_id', 'chunk_index', 'metadata', - ]; - - protected function casts(): array - { - return [ - 'embedding' => 'array', - 'metadata' => 'array', - ]; - } - - public function document(): BelongsTo - { - return $this->belongsTo(Document::class); - } - - public function agent(): BelongsTo - { - return $this->belongsTo(User::class, 'agent_id'); - } -} -``` - -**`app/Models/EmbeddingCache.php`** - -```php -class EmbeddingCache extends Model -{ - protected $table = 'embedding_cache'; - protected $keyType = 'string'; - public $incrementing = false; - - protected $fillable = ['id', 'provider', 'model', 'embedding']; - - protected function casts(): array - { - return ['embedding' => 'array']; - } - - /** - * Generate a cache key from provider + model + content. - */ - public static function cacheKey(string $provider, string $model, string $content): string - { - return hash('sha256', "{$provider}:{$model}:{$content}"); - } -} -``` - -### Services - -**`app/Services/Memory/ChunkingService.php`** - -Splits markdown text into overlapping chunks. Configuration comes from `config('memory.chunking')`. - -```php -class ChunkingService -{ - /** - * Split text into overlapping chunks. - * - * @return array Ordered list of chunk strings - */ - public function chunk(string $text): array - { - $maxSize = config('memory.chunking.max_chunk_size', 512); // tokens approx - $overlap = config('memory.chunking.chunk_overlap', 64); - $separator = config('memory.chunking.separator', "\n\n"); - - // 1. Split on separator (paragraph breaks) - $paragraphs = array_filter(explode($separator, $text), fn ($p) => trim($p) !== ''); - - // 2. Greedily merge paragraphs into chunks of ~maxSize tokens - // Estimate tokens as wordcount * 1.3 (conservative for English) - $chunks = []; - $current = ''; - - foreach ($paragraphs as $para) { - $candidate = $current === '' ? $para : $current . $separator . $para; - if ($this->estimateTokens($candidate) > $maxSize && $current !== '') { - $chunks[] = trim($current); - // Overlap: keep the last ~overlap tokens worth of text - $current = $this->takeTrailing($current, $overlap) . $separator . $para; - } else { - $current = $candidate; - } - } - - if (trim($current) !== '') { - $chunks[] = trim($current); - } - - return $chunks; - } - - private function estimateTokens(string $text): int - { - return (int) ceil(str_word_count($text) * 1.3); - } - - private function takeTrailing(string $text, int $tokenCount): string - { - $words = explode(' ', $text); - $wordCount = (int) ceil($tokenCount / 1.3); - return implode(' ', array_slice($words, -$wordCount)); - } -} -``` - -**`app/Services/Memory/EmbeddingService.php`** - -Generates embeddings via OpenAI (or other providers), with a database cache layer. - -```php -class EmbeddingService -{ - /** - * Get the embedding for a single text. - * Returns from cache if available, otherwise calls the API and caches. - * - * @return array Vector of floats (1536 dimensions) - */ - public function embed(string $text): array - { - $provider = config('memory.embedding.provider', 'openai'); - $model = config('memory.embedding.model', 'text-embedding-3-small'); - - $cacheKey = EmbeddingCache::cacheKey($provider, $model, $text); - $cached = EmbeddingCache::find($cacheKey); - - if ($cached) { - return $cached->embedding; - } - - $embedding = $this->callProvider($provider, $model, $text); - - EmbeddingCache::create([ - 'id' => $cacheKey, - 'provider' => $provider, - 'model' => $model, - 'embedding' => $embedding, - ]); - - return $embedding; - } - - /** - * Batch embed multiple texts. - * Checks cache first, only calls API for uncached texts. - * - * @return array> Embeddings in the same order as input - */ - public function embedBatch(array $texts): array - { - $provider = config('memory.embedding.provider', 'openai'); - $model = config('memory.embedding.model', 'text-embedding-3-small'); - - $results = []; - $uncached = []; - - foreach ($texts as $i => $text) { - $cacheKey = EmbeddingCache::cacheKey($provider, $model, $text); - $cached = EmbeddingCache::find($cacheKey); - if ($cached) { - $results[$i] = $cached->embedding; - } else { - $uncached[$i] = $text; - } - } - - if (!empty($uncached)) { - $batchEmbeddings = $this->callProviderBatch($provider, $model, array_values($uncached)); - $j = 0; - foreach ($uncached as $i => $text) { - $embedding = $batchEmbeddings[$j++]; - $results[$i] = $embedding; - - EmbeddingCache::create([ - 'id' => EmbeddingCache::cacheKey($provider, $model, $text), - 'provider' => $provider, - 'model' => $model, - 'embedding' => $embedding, - ]); - } - } - - ksort($results); - return $results; - } - - private function callProvider(string $provider, string $model, string $text): array - { - // OpenAI embeddings API call - // Use Laravel HTTP client: Http::withToken(config('services.openai.api_key')) - // ->post('https://api.openai.com/v1/embeddings', [...]) - // Return the embedding vector - } - - private function callProviderBatch(string $provider, string $model, array $texts): array - { - // OpenAI supports batch in a single request (up to 2048 inputs) - // Return array of embedding vectors - } -} -``` - -### Artisan Command - -**`app/Console/Commands/MemoryStatus.php`** - -```bash -php artisan memory:status -``` - -Displays: -- pgvector extension installed (yes/no) -- `document_chunks` row count, broken down by collection -- `embedding_cache` row count -- Config values from `config/memory.php` - -### Config Updates - -No changes needed --- `config/memory.php` already has the correct structure: -- `embedding.provider` = `openai` -- `embedding.model` = `text-embedding-3-small` -- `embedding.dimensions` = `1536` -- `chunking.max_chunk_size` = `512` -- `chunking.chunk_overlap` = `64` -- `chunking.separator` = `\n\n` - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `ChunkingServiceTest` | Paragraphs split correctly, overlap works, empty input, single paragraph | -| `EmbeddingServiceTest` | Cache hit returns cached, cache miss calls API & caches, batch embedding | -| `MemoryStatusCommandTest` | Command runs without error, outputs expected info | -| Migration test | Tables created with correct columns, pgvector extension enabled | - -### Files to Create - -``` -database/migrations/YYYY_MM_DD_000001_create_document_chunks_table.php -database/migrations/YYYY_MM_DD_000002_create_embedding_cache_table.php -app/Models/DocumentChunk.php -app/Models/EmbeddingCache.php -app/Services/Memory/ChunkingService.php -app/Services/Memory/EmbeddingService.php -app/Console/Commands/MemoryStatus.php -tests/Feature/Services/Memory/ChunkingServiceTest.php -tests/Feature/Services/Memory/EmbeddingServiceTest.php -``` - -### Design Notes - -> **v2026.2.9 alignment:** OpenClaw changed batch embeddings to disabled by default (opt-in for backfills). Our `EmbeddingService.embedBatch()` remains available for `DocumentIndexingService.index()` in Phase 2 and the `memory:index-documents --fresh` backfill command, but individual embedding calls use `embed()`. This aligns with upstream's rationale: batch is mainly beneficial for large backfills; sync is adequate for incremental updates. - ---- - -## Phase 2: Document Embeddings — Index, Observe, Search - -### Goal -Automatically chunk and embed workspace documents so the `search_documents` tool can do semantic search in addition to keyword search. - -### Depends On -Phase 1 (ChunkingService, EmbeddingService, DocumentChunk model) - -### Services - -**`app/Services/Memory/DocumentIndexingService.php`** - -Orchestrates chunking + embedding + storage for a Document. - -```php -class DocumentIndexingService -{ - public function __construct( - private ChunkingService $chunker, - private EmbeddingService $embedder, - ) {} - - /** - * Index a document: chunk its content, embed, and store as DocumentChunks. - */ - public function index(Document $document, string $collection = 'general', ?string $agentId = null): void - { - // 1. Delete existing chunks for this document - DocumentChunk::where('document_id', $document->id)->delete(); - - if (empty(trim($document->content))) { - return; - } - - // 2. Chunk - $chunks = $this->chunker->chunk($document->content); - - // 3. Embed all chunks in batch - $embeddings = $this->embedder->embedBatch($chunks); - - // 4. Store - foreach ($chunks as $i => $chunkText) { - DocumentChunk::create([ - 'document_id' => $document->id, - 'content' => $chunkText, - 'content_hash' => hash('sha256', $chunkText), - 'embedding' => $embeddings[$i], - 'collection' => $collection, - 'agent_id' => $agentId, - 'chunk_index' => $i, - 'metadata' => [ - 'title' => $document->title, - 'updated_at' => $document->updated_at?->toIso8601String(), - ], - ]); - } - } - - /** - * Remove all chunks for a document. - */ - public function deindex(Document $document): void - { - DocumentChunk::where('document_id', $document->id)->delete(); - } - - /** - * Semantic search across document chunks. - * - * @return Collection Ordered by similarity (highest first) - */ - public function search( - string $query, - string $collection = 'general', - ?string $agentId = null, - int $limit = 10, - float $minSimilarity = 0.5, - ): Collection { - $queryEmbedding = $this->embedder->embed($query); - $vectorString = '[' . implode(',', $queryEmbedding) . ']'; - - $builder = DocumentChunk::query() - ->where('collection', $collection) - ->selectRaw('*, 1 - (embedding <=> ?) as similarity', [$vectorString]) - ->having('similarity', '>=', $minSimilarity) - ->orderByDesc('similarity') - ->limit($limit); - - if ($agentId !== null) { - $builder->where('agent_id', $agentId); - } else { - $builder->whereNull('agent_id'); - } - - return $builder->get(); - } -} -``` - -### Observer - -**`app/Observers/DocumentObserver.php`** - -```php -class DocumentObserver -{ - /** - * Auto-index when a non-folder document is saved/updated. - */ - public function saved(Document $document): void - { - if ($document->is_folder) { - return; - } - - // Determine collection based on document location - $collection = $this->resolveCollection($document); - $agentId = $this->resolveAgentId($document); - - IndexDocumentJob::dispatch($document, $collection, $agentId); - } - - public function deleted(Document $document): void - { - DocumentChunk::where('document_id', $document->id)->delete(); - } - - /** - * Resolve collection type based on document's folder hierarchy. - * - agents/{slug}/memory/* → 'memory' - * - agents/{slug}/identity/* → 'identity' - * - everything else → 'general' - */ - private function resolveCollection(Document $document): string - { - // Walk up the parent chain to detect if inside agents/*/memory or agents/*/identity - $parent = $document->parent; - while ($parent) { - if ($parent->title === 'memory' && $parent->is_folder) { - return 'memory'; - } - if ($parent->title === 'identity' && $parent->is_folder) { - return 'identity'; - } - $parent = $parent->parent; - } - return 'general'; - } - - /** - * Resolve agent owner if this document lives under agents/{slug}/. - */ - private function resolveAgentId(Document $document): ?string - { - // Walk up the parent chain to find the agent folder - $parent = $document->parent; - while ($parent) { - if ($parent->parent?->title === 'agents') { - // This is the agent folder — find the agent by slug - $agent = User::where('is_agent', true) - ->whereRaw("LOWER(REPLACE(name, ' ', '-')) = ?", [strtolower($parent->title)]) - ->first(); - return $agent?->id; - } - $parent = $parent->parent; - } - return null; - } -} -``` - -Register in `AppServiceProvider::boot()`: -```php -Document::observe(DocumentObserver::class); -``` - -### Job - -**`app/Jobs/IndexDocumentJob.php`** - -```php -class IndexDocumentJob implements ShouldQueue -{ - use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; - - public int $tries = 3; - public array $backoff = [10, 30]; - - public function __construct( - private Document $document, - private string $collection = 'general', - private ?string $agentId = null, - ) {} - - public function handle(DocumentIndexingService $indexer): void - { - $indexer->index($this->document, $this->collection, $this->agentId); - } -} -``` - -### SearchDocuments Tool Update - -Add a `mode` parameter to the existing `SearchDocuments` tool (`app/Agents/Tools/Docs/SearchDocuments.php`): - -```php -public function schema(JsonSchema $schema): array -{ - return [ - 'query' => $schema->string()->description('The search query.')->required(), - 'mode' => $schema->string()->description( - 'Search mode: "keyword" (ILIKE), "semantic" (vector similarity), or "auto" (semantic first, keyword fallback). Default: auto.' - ), - 'limit' => $schema->integer()->description('Maximum number of results. Default: 5.'), - ]; -} -``` - -In `handle()`, when `mode` is `semantic` or `auto`: -1. Call `DocumentIndexingService::search($query, 'general', null, $limit)` -2. Map results to the same output format (title, snippet, ID) -3. If `auto` and semantic returns < 2 results, fall back to keyword search and merge - -### Artisan Command - -**`app/Console/Commands/MemoryIndexDocuments.php`** - -```bash -php artisan memory:index-documents [--fresh] -``` - -- Iterates all non-folder documents -- Dispatches `IndexDocumentJob` for each -- `--fresh` flag deletes all existing chunks first -- Shows progress bar - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `DocumentIndexingServiceTest` | Index creates chunks, deindex removes them, search returns ranked results | -| `DocumentObserverTest` | Save triggers index job, delete removes chunks, folders are skipped | -| `SearchDocumentsSemanticTest` | Semantic mode returns relevant results, auto fallback works | -| `IndexDocumentJobTest` | Job calls indexer with correct parameters | - -### Files to Create/Modify - -``` -app/Services/Memory/DocumentIndexingService.php (new) -app/Observers/DocumentObserver.php (new) -app/Jobs/IndexDocumentJob.php (new) -app/Console/Commands/MemoryIndexDocuments.php (new) -app/Agents/Tools/Docs/SearchDocuments.php (modify — add mode param) -app/Providers/AppServiceProvider.php (modify — register observer) -tests/Feature/Services/Memory/DocumentIndexingServiceTest.php -tests/Feature/Observers/DocumentObserverTest.php -``` - ---- - -## Phase 3: Long-Term Memory (LTM) — Save & Recall Tools - -### Goal -Give agents a **long-term memory** system: `save_memory` to persist durable memories that survive across conversations, and `recall_memory` to semantically search them on demand. Unlike short-term memory (the conversation context), LTM is permanent and must be explicitly saved and recalled. - -### Depends On -Phase 1 (ChunkingService, EmbeddingService), Phase 2 (DocumentIndexingService) - -### LTM File Layout (OpenClaw Pattern) - -Agents have two LTM storage layers, mirroring OpenClaw's approach: - -| File | Purpose | Loaded | Write pattern | -|------|---------|--------|---------------| -| `agents/{slug}/identity/MEMORY.md` | **Curated** long-term memory — high-value preferences, decisions, key facts | Always (part of system prompt via identity files) | Overwrite/update (curated) | -| `agents/{slug}/memory/YYYY-MM-DD.md` | **Daily logs** — running context, timestamped entries | Not auto-loaded; searchable via `recall_memory` | Append-only | - -The key difference: `MEMORY.md` is always in the agent's system prompt (already loaded by `OpenCompanyAgent::instructions()`), so its contents are "always remembered." Daily logs must be actively recalled via semantic search. - -The `save_memory` tool writes to daily logs by default (`target: "log"`). For high-value curated info, the agent can write to `MEMORY.md` (`target: "core"`), which updates the persistent identity file. - -### Tools - -**`app/Agents/Tools/Memory/SaveMemory.php`** - -```php -class SaveMemory implements Tool -{ - public function __construct( - private User $agent, - private AgentDocumentService $docService, - private DocumentIndexingService $indexer, - ) {} - - public function description(): string - { - return 'Save a durable memory that persists across conversations. Use target "core" for high-value facts that should always be remembered (written to MEMORY.md), or "log" for timestamped daily entries (searchable via recall_memory).'; - } - - public function handle(Request $request): string - { - $content = $request['content']; - $category = $request['category'] ?? 'general'; - $target = $request['target'] ?? 'log'; - - if ($target === 'core') { - // Write to MEMORY.md (curated, always in system prompt) - $memoryFile = $this->docService->getIdentityFile($this->agent, 'MEMORY'); - if (!$memoryFile) { - return 'Error: MEMORY.md not found for this agent.'; - } - - // Append to MEMORY.md under the appropriate section - $newContent = $memoryFile->content . "\n\n### {$category}\n{$content}"; - $this->docService->updateIdentityFile($this->agent, 'MEMORY', $newContent); - - // Index for semantic search - $this->indexer->index($memoryFile->fresh(), 'identity', $this->agent->id); - - return "Core memory saved to MEMORY.md (always loaded in system prompt)."; - } - - // Default: write to daily log - $entry = "### [{$category}] " . now()->format('H:i') . "\n\n{$content}"; - $doc = $this->docService->createMemoryLog($this->agent, $entry); - - if (!$doc) { - return 'Error: Could not save memory. Agent document structure may not be initialized.'; - } - - // Index the memory for semantic recall - $this->indexer->index($doc, 'memory', $this->agent->id); - - return "Memory saved to {$doc->title} (recallable via recall_memory)."; - } - - public function schema(JsonSchema $schema): array - { - return [ - 'content' => $schema->string() - ->description('The memory content to save. Be specific and include context.') - ->required(), - 'category' => $schema->string() - ->description('Category tag: "preference", "decision", "learning", "fact", or "general". Default: general.'), - 'target' => $schema->string() - ->description('Where to save: "core" writes to MEMORY.md (always loaded in your system prompt — use for high-value durable facts), "log" appends to daily log (searchable via recall_memory — use for running context). Default: log.'), - ]; - } -} -``` - -**`app/Agents/Tools/Memory/RecallMemory.php`** - -```php -class RecallMemory implements Tool -{ - public function __construct( - private User $agent, - private DocumentIndexingService $indexer, - ) {} - - public function description(): string - { - return 'Search your long-term memory for relevant past information, decisions, and learnings.'; - } - - public function handle(Request $request): string - { - $query = $request['query']; - $limit = $request['limit'] ?? 6; - - $results = $this->indexer->search( - query: $query, - collection: 'memory', - agentId: $this->agent->id, - limit: $limit, - minSimilarity: config('memory.search.min_similarity', 0.5), - ); - - if ($results->isEmpty()) { - return "No memories found matching '{$query}'."; - } - - // Apply result clamping: max snippet chars = 700, max total injected = 4000 - $maxSnippet = 700; - $maxTotal = 4000; - $totalChars = 0; - $output = []; - - foreach ($results as $chunk) { - $snippet = Str::limit($chunk->content, $maxSnippet); - $date = $chunk->metadata['updated_at'] ?? 'unknown date'; - $similarity = round($chunk->similarity * 100); - $entry = "**{$date}** ({$similarity}% match)\n{$snippet}"; - - if ($totalChars + strlen($entry) > $maxTotal) { - break; - } - $output[] = $entry; - $totalChars += strlen($entry); - } - - return "Found " . count($output) . " memory/memories:\n\n" . implode("\n\n---\n\n", $output); - } - - public function schema(JsonSchema $schema): array - { - return [ - 'query' => $schema->string() - ->description('What to search for in your memories.') - ->required(), - 'limit' => $schema->integer() - ->description('Maximum number of memories to return. Default: 6.'), - ]; - } -} -``` - -### ToolRegistry Updates - -In `app/Agents/Tools/ToolRegistry.php`: - -1. Add to `APP_GROUPS`: -```php -'memory' => [ - 'tools' => ['save_memory', 'recall_memory'], - 'label' => 'save, recall', - 'description' => 'Long-term agent memory', -], -``` - -2. Add to `APP_ICONS`: -```php -'memory' => 'ph:brain', -``` - -3. Add to `TOOL_MAP`: -```php -'save_memory' => [ - 'class' => SaveMemory::class, - 'type' => 'write', - 'name' => 'Save Memory', - 'description' => 'Save a durable memory that persists across conversations.', - 'icon' => 'ph:brain', -], -'recall_memory' => [ - 'class' => RecallMemory::class, - 'type' => 'read', - 'name' => 'Recall Memory', - 'description' => 'Search long-term memory for past information and learnings.', - 'icon' => 'ph:brain', -], -``` - -4. Add to `instantiateTool()` match block: -```php -SaveMemory::class => new SaveMemory($agent, app(AgentDocumentService::class), app(DocumentIndexingService::class)), -RecallMemory::class => new RecallMemory($agent, app(DocumentIndexingService::class)), -``` - -5. Add `'memory'` to the `$displayOrder` in `getAppCatalog()` (after `'agents'`): -```php -['agents', 'memory', 'chat', 'docs', 'tables', 'calendar', 'lists', 'workspace', null], -``` - -### System Prompt Update - -Add memory usage guidance to `OpenCompanyAgent::instructions()`. Append to the system prompt after the MEMORY.md identity file section. See **Appendix B** for the complete system prompt text. - -The system prompt covers: -1. STM vs LTM mental model (what the agent controls vs what's automatic) -2. `save_memory` with `target` ("core" vs "log") and clear guidance on when to use each -3. `recall_memory` with explicit triggers (prior work questions, complex tasks, referenced conversations) -4. A save/don't-save decision guide -5. Clear instruction: "If someone says remember this — save it immediately" - -Key design choice (differs from OpenClaw): Our system prompt is **proactive about saving**, not just recall. OpenClaw's system prompt only mentions recall (`"Before answering anything about prior work... run memory_search"`), relying on docs and flush prompts for save behavior. Our agents don't have filesystem access and can't read docs on demand, so the system prompt must be self-contained. - -### Backfill Command - -**`app/Console/Commands/MemoryIndexLogs.php`** - -```bash -php artisan memory:index-logs [--agent=slug] -``` - -- Finds all memory log documents under `agents/*/memory/` -- Dispatches `IndexDocumentJob` for each with `collection='memory'` and the correct `agent_id` -- Optional `--agent` flag to limit to a specific agent - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `SaveMemoryTest` | Persists to daily log, indexes for recall, handles missing doc structure | -| `RecallMemoryTest` | Returns ranked results, respects agent scoping, applies result clamping | -| `MemoryIndexLogsCommandTest` | Backfill command finds logs and dispatches jobs | - -### Files to Create/Modify - -``` -app/Agents/Tools/Memory/SaveMemory.php (new) -app/Agents/Tools/Memory/RecallMemory.php (new) -app/Agents/Tools/ToolRegistry.php (modify — register tools) -app/Agents/OpenCompanyAgent.php (modify — add memory guidance) -app/Console/Commands/MemoryIndexLogs.php (new) -tests/Feature/Tools/SaveMemoryTest.php -tests/Feature/Tools/RecallMemoryTest.php -``` - ---- - -## Phase 4: Short-Term Memory (STM) — Conversation Compaction - -### Goal -Manage the agent's **short-term memory** (conversation context window) by summarizing older messages when conversations get long. Without compaction, STM simply drops older messages — compaction preserves them as compressed summaries. Summaries are cumulative --- each compaction builds on the previous summary. - -### Depends On -Phase 1 (foundation only --- compaction doesn't require embeddings) - -### Database - -**Migration: `create_conversation_summaries_table`** - -```php -Schema::create('conversation_summaries', function (Blueprint $table) { - $table->uuid('id')->primary(); - $table->string('channel_id'); - $table->string('agent_id'); - $table->longText('summary'); // The cumulative summary text - $table->integer('tokens_before')->default(0); // Token count before compaction - $table->integer('tokens_after')->default(0); // Token count after compaction - $table->integer('compaction_count')->default(0); // How many compaction cycles - $table->integer('messages_summarized')->default(0); // Total messages folded in - $table->string('last_message_id')->nullable(); // Last message included in summary - $table->integer('flush_count')->default(0); // Pre-compaction flushes done (for Phase 5) - $table->timestamps(); - - $table->foreign('channel_id')->references('id')->on('channels')->cascadeOnDelete(); - $table->foreign('agent_id')->references('id')->on('users')->cascadeOnDelete(); - - $table->unique(['channel_id', 'agent_id']); -}); -``` - -### Config Updates - -Add `compaction` section to `config/memory.php`: - -```php -'compaction' => [ - 'enabled' => env('MEMORY_COMPACTION_ENABLED', true), - 'threshold_ratio' => 0.75, // Compact when at 75% of context window - 'keep_ratio' => 0.4, // Keep the most recent 40% of messages - 'context_window' => (int) env('MEMORY_CONTEXT_WINDOW', 128000), // Default token budget - 'summary_model' => env('MEMORY_SUMMARY_MODEL', 'claude-sonnet-4-5-20250929'), - 'summary_max_tokens' => 2000, -], -``` - -> **Design note (v2026.2.9):** OpenClaw added `memory.qmd.update.waitForBootSync` (default `false`) to control whether QMD initialization blocks gateway startup. This is not needed in our architecture — pgvector indexes are always available via PostgreSQL with no cold-start model download or index warm-up step. - -### Model - -**`app/Models/ConversationSummary.php`** - -```php -class ConversationSummary extends Model -{ - use HasUuids; - - protected $fillable = [ - 'id', 'channel_id', 'agent_id', 'summary', - 'tokens_before', 'tokens_after', 'compaction_count', - 'messages_summarized', 'last_message_id', 'flush_count', - ]; - - public function channel(): BelongsTo - { - return $this->belongsTo(Channel::class); - } - - public function agent(): BelongsTo - { - return $this->belongsTo(User::class, 'agent_id'); - } -} -``` - -### Service - -**`app/Services/Memory/ConversationCompactionService.php`** - -```php -class ConversationCompactionService -{ - /** - * Check if compaction is needed for a channel/agent pair. - */ - public function needsCompaction(string $channelId, User $agent, iterable $messages): bool - { - if (!config('memory.compaction.enabled', true)) { - return false; - } - - $totalTokens = $this->estimateTokens($messages); - $threshold = config('memory.compaction.context_window', 128000) - * config('memory.compaction.threshold_ratio', 0.75); - - return $totalTokens > $threshold; - } - - /** - * Perform compaction: summarize older messages, return the summary. - */ - public function compact(string $channelId, User $agent, array $messages): ConversationSummary - { - $keepRatio = config('memory.compaction.keep_ratio', 0.4); - $keepCount = max(3, (int) ceil(count($messages) * $keepRatio)); - $splitIndex = count($messages) - $keepCount; - - // Split: older messages to summarize, recent messages to keep - $toSummarize = array_slice($messages, 0, $splitIndex); - - // Get existing summary (cumulative) - $existing = ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->first(); - - // Build summarization prompt - $previousSummary = $existing?->summary ?? ''; - $summaryText = $this->summarize($toSummarize, $previousSummary); - - $tokensBefore = $this->estimateTokens($messages); - - // Upsert summary - $summary = ConversationSummary::updateOrCreate( - ['channel_id' => $channelId, 'agent_id' => $agent->id], - [ - 'summary' => $summaryText, - 'tokens_before' => $tokensBefore, - 'tokens_after' => $this->estimateTokenCount($summaryText), - 'compaction_count' => ($existing?->compaction_count ?? 0) + 1, - 'messages_summarized' => ($existing?->messages_summarized ?? 0) + count($toSummarize), - 'last_message_id' => end($toSummarize)?->id ?? $existing?->last_message_id, - ] - ); - - return $summary; - } - - /** - * Summarize messages using an LLM call. - */ - private function summarize(array $messages, string $previousSummary): string - { - $prompt = "You are summarizing a conversation for an AI agent's context window.\n\n"; - - if ($previousSummary) { - $prompt .= "Previous summary of even older messages:\n{$previousSummary}\n\n"; - } - - $prompt .= "Messages to summarize:\n"; - foreach ($messages as $msg) { - $role = $msg->role->value ?? 'unknown'; - $content = $msg->content ?? ''; - $prompt .= "[{$role}]: {$content}\n"; - } - - $prompt .= "\nCreate a concise summary that captures:\n"; - $prompt .= "- Key topics discussed\n- Decisions made\n- Action items\n- Important context\n"; - $prompt .= "- User preferences expressed\n\n"; - $prompt .= "Be factual and specific. Preserve names, dates, and technical details."; - - // Call LLM for summarization - // Use the configured summary model - $model = config('memory.compaction.summary_model'); - $maxTokens = config('memory.compaction.summary_max_tokens', 2000); - - // Use Laravel AI SDK to generate summary - // Return the summary text - } - - private function estimateTokens(iterable $messages): int - { - $total = 0; - foreach ($messages as $msg) { - $content = $msg->content ?? ''; - $total += (int) ceil(str_word_count($content) * 1.3); - } - return $total; - } - - private function estimateTokenCount(string $text): int - { - return (int) ceil(str_word_count($text) * 1.3); - } -} -``` - -### ChannelConversationLoader Update - -Modify `app/Agents/Conversations/ChannelConversationLoader.php` to: - -1. **Prepend existing summary** as the first message if one exists -2. **Only load messages after** the last summarized message -3. **Dispatch compaction job** when approaching the threshold - -```php -class ChannelConversationLoader -{ - private const DEFAULT_LIMIT = 50; // Increase from 20 to load more for compaction context - - public function __construct( - private ConversationCompactionService $compactionService, - ) {} - - public function load(string $channelId, User $agent, int $limit = self::DEFAULT_LIMIT): iterable - { - $sdkMessages = []; - - // 1. Check for existing summary - $summary = ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->first(); - - if ($summary && !empty($summary->summary)) { - // Prepend summary as a system-style user message - $sdkMessages[] = new UserMessage( - "[Conversation Summary — {$summary->messages_summarized} prior messages, " - . "{$summary->compaction_count} compaction(s)]\n\n{$summary->summary}" - ); - } - - // 2. Load messages (after last summarized message if applicable) - $query = Message::where('channel_id', $channelId) - ->orderBy('created_at', 'desc') - ->take($limit); - - if ($summary?->last_message_id) { - $lastMsg = Message::find($summary->last_message_id); - if ($lastMsg) { - $query->where('created_at', '>', $lastMsg->created_at); - } - } - - $messages = $query->get()->reverse()->values(); - - foreach ($messages as $message) { - if (empty($message->content)) { - continue; - } - - if ($message->author_id === $agent->id) { - $sdkMessages[] = new AssistantMessage($message->content); - } else { - $author = $message->author; - $authorName = $author->name ?? 'User'; - $sdkMessages[] = new UserMessage("[{$authorName}]: {$message->content}"); - } - } - - // 3. Check if compaction is needed (dispatch async) - if ($this->compactionService->needsCompaction($channelId, $agent, $sdkMessages)) { - CompactConversationJob::dispatch($channelId, $agent); - } - - return $sdkMessages; - } -} -``` - -### Job - -**`app/Jobs/CompactConversationJob.php`** - -```php -class CompactConversationJob implements ShouldQueue -{ - use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; - - public int $tries = 2; - public int $timeout = 120; - - public function __construct( - private string $channelId, - private User $agent, - ) {} - - public function handle(ConversationCompactionService $compactor): void - { - // Load all raw messages for this channel - $messages = Message::where('channel_id', $this->channelId) - ->orderBy('created_at', 'asc') - ->get(); - - // Convert to SDK message format for the compactor - $sdkMessages = []; - foreach ($messages as $msg) { - if (empty($msg->content)) continue; - if ($msg->author_id === $this->agent->id) { - $sdkMessages[] = new AssistantMessage($msg->content); - } else { - $sdkMessages[] = new UserMessage($msg->content); - } - } - - $compactor->compact($this->channelId, $this->agent, $sdkMessages); - } -} -``` - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `ConversationCompactionServiceTest` | Threshold detection, message splitting at keep_ratio, summary generation, cumulative summaries | -| `ChannelConversationLoaderTest` | Summary prepended, only post-summary messages loaded, compaction job dispatched | -| `CompactConversationJobTest` | Job loads messages, calls compactor, stores summary | - -### Files to Create/Modify - -``` -database/migrations/YYYY_MM_DD_000003_create_conversation_summaries_table.php -app/Models/ConversationSummary.php (new) -app/Services/Memory/ConversationCompactionService.php (new) -app/Jobs/CompactConversationJob.php (new) -app/Agents/Conversations/ChannelConversationLoader.php (modify) -config/memory.php (modify — add compaction section) -tests/Feature/Services/Memory/ConversationCompactionServiceTest.php -tests/Feature/Agents/Conversations/ChannelConversationLoaderTest.php -``` - ---- - -## Phase 5: STM → LTM Promotion — Pre-Compaction Memory Flush - -### Goal -**Bridge short-term and long-term memory.** Before compaction summarizes (and lossy-compresses) older messages from STM, give the agent a silent turn to promote important information to LTM via `save_memory`. This ensures key facts, preferences, and decisions survive compaction intact rather than being compressed into a summary. - -### Depends On -Phase 3 (SaveMemory tool), Phase 4 (ConversationCompactionService) - -### Config Updates - -Add `memory_flush` section to `config/memory.php`: - -```php -'memory_flush' => [ - 'enabled' => env('MEMORY_FLUSH_ENABLED', true), - 'soft_threshold_tokens' => 4000, // Trigger flush this many tokens before compaction threshold - 'max_flushes_per_cycle' => 1, // Prevent repeated flushes per compaction cycle -], -``` - -### Service - -**`app/Services/Memory/MemoryFlushService.php`** - -```php -class MemoryFlushService -{ - public function __construct( - private ConversationCompactionService $compactionService, - ) {} - - /** - * Check if a memory flush should be triggered. - * Flush happens when we're within soft_threshold_tokens of the compaction threshold, - * AND we haven't already flushed for this compaction cycle. - */ - public function shouldFlush(string $channelId, User $agent, iterable $messages): bool - { - if (!config('memory.memory_flush.enabled', true)) { - return false; - } - - $summary = ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->first(); - - $maxFlushes = config('memory.memory_flush.max_flushes_per_cycle', 1); - if ($summary && $summary->flush_count >= $maxFlushes) { - return false; - } - - $totalTokens = $this->estimateTokens($messages); - $compactionThreshold = config('memory.compaction.context_window', 128000) - * config('memory.compaction.threshold_ratio', 0.75); - $softThreshold = $compactionThreshold - config('memory.memory_flush.soft_threshold_tokens', 4000); - - return $totalTokens > $softThreshold && $totalTokens < $compactionThreshold; - } - - /** - * Execute a silent memory flush: run the agent with a flush prompt - * that instructs it to save important memories. - */ - public function flush(string $channelId, User $agent): void - { - $flushPrompt = $this->buildFlushPrompt(); - - // Create a transient agent instance with the flush prompt - $agentInstance = OpenCompanyAgent::for($agent, $channelId); - - // Run a silent prompt — the agent should use save_memory tool calls - // but the text response is discarded (not posted to the channel) - $response = $agentInstance->prompt($flushPrompt); - - // Increment flush count to prevent re-flushing - ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->increment('flush_count'); - - Log::info('Memory flush completed', [ - 'agent' => $agent->name, - 'channel' => $channelId, - 'tool_calls' => $response->toolCalls->count(), - ]); - } - - private function buildFlushPrompt(): string - { - return <<<'PROMPT' - Pre-compaction memory flush. Your conversation context is about to be compacted - (older messages will be summarized and compressed). - - Review the conversation for durable context worth preserving. Use save_memory - (target: "log") to save important observations, decisions, preferences, or - learnings to your daily log before they are compressed. - - Only save to target: "core" if you discovered truly high-value permanent facts - (user preferences, key decisions) that should always be in your system prompt. - - If nothing needs saving, respond with exactly: [FLUSH_COMPLETE] - PROMPT; - } - - private function estimateTokens(iterable $messages): int - { - $total = 0; - foreach ($messages as $msg) { - $content = $msg->content ?? ''; - $total += (int) ceil(str_word_count($content) * 1.3); - } - return $total; - } -} -``` - -### AgentRespondJob Integration - -Hook into `app/Jobs/AgentRespondJob.php` before the `prompt()` call (line 136): - -```php -// === Memory flush check (before prompting) === -try { - $flushService = app(MemoryFlushService::class); - $currentMessages = $agentInstance->messages(); - if ($flushService->shouldFlush($this->channelId, $this->agent, $currentMessages)) { - $flushStep = $task->addStep('Flushing memories before compaction', 'action'); - $flushStep->start(); - $flushService->flush($this->channelId, $this->agent); - $flushStep->complete(); - } -} catch (\Throwable $e) { - Log::warning('Memory flush failed', ['error' => $e->getMessage()]); -} -// === End memory flush === - -$response = $agentInstance->prompt($this->userMessage->content); -``` - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `MemoryFlushServiceTest` | Detects soft threshold, respects max_flushes_per_cycle, builds correct prompt | -| `AgentRespondJobFlushTest` | Flush triggered before prompt when threshold met, not triggered when below threshold | - -### Files to Create/Modify - -``` -app/Services/Memory/MemoryFlushService.php (new) -app/Jobs/AgentRespondJob.php (modify — add flush hook) -config/memory.php (modify — add memory_flush section) -tests/Feature/Services/Memory/MemoryFlushServiceTest.php -tests/Feature/Jobs/AgentRespondJobFlushTest.php -``` - ---- - -## Phase 6: Hybrid Search (BM25 + Vector) - -### Goal -Combine vector similarity search with PostgreSQL full-text search (BM25-equivalent) for more robust retrieval. Some queries work better with exact keyword matching, others with semantic understanding --- hybrid search gives the best of both. - -### Depends On -Phase 1 (DocumentChunk model), Phase 2 (DocumentIndexingService), Phase 3 (RecallMemory tool) - -### Database - -**Migration: `add_search_vector_to_document_chunks`** - -```php -// Add tsvector column with GIN index -Schema::table('document_chunks', function (Blueprint $table) { - $table->addColumn('tsvector', 'search_vector')->nullable(); -}); - -// Create GIN index for fast full-text search -DB::statement('CREATE INDEX document_chunks_search_vector_idx ON document_chunks USING GIN (search_vector)'); - -// Create trigger to auto-populate search_vector on insert/update -DB::statement(" - CREATE OR REPLACE FUNCTION document_chunks_search_vector_update() RETURNS trigger AS $$ - BEGIN - NEW.search_vector := to_tsvector('english', COALESCE(NEW.content, '')); - RETURN NEW; - END - $$ LANGUAGE plpgsql; -"); - -DB::statement(" - CREATE TRIGGER document_chunks_search_vector_trigger - BEFORE INSERT OR UPDATE OF content ON document_chunks - FOR EACH ROW EXECUTE FUNCTION document_chunks_search_vector_update(); -"); - -// Backfill existing rows -DB::statement("UPDATE document_chunks SET search_vector = to_tsvector('english', COALESCE(content, ''))"); -``` - -### Service - -**`app/Services/Memory/HybridSearchService.php`** - -```php -class HybridSearchService -{ - public function __construct( - private EmbeddingService $embedder, - ) {} - - /** - * Hybrid search: combine vector similarity with full-text search. - * - * @return Collection Ranked results with combined scores - */ - public function search( - string $query, - string $collection = 'general', - ?string $agentId = null, - int $limit = 6, - float $minSimilarity = 0.5, - ): Collection { - $semanticWeight = config('memory.search.hybrid_weights.semantic', 0.7); - $keywordWeight = config('memory.search.hybrid_weights.keyword', 0.3); - $maxSnippetChars = 700; - $maxInjectedChars = 4000; - - // 1. Vector search - $vectorResults = $this->vectorSearch($query, $collection, $agentId, $limit * 2, $minSimilarity); - - // 2. Full-text search - $ftsResults = $this->ftsSearch($query, $collection, $agentId, $limit * 2); - - // 3. Merge by chunk ID with weighted scores - $merged = $this->mergeResults($vectorResults, $ftsResults, $semanticWeight, $keywordWeight); - - // 4. Sort by combined score, apply limits - $ranked = $merged->sortByDesc('score')->take($limit); - - // 5. Apply result clamping - $totalChars = 0; - $clamped = $ranked->filter(function ($result) use ($maxSnippetChars, $maxInjectedChars, &$totalChars) { - $snippet = Str::limit($result['content'], $maxSnippetChars); - if ($totalChars + strlen($snippet) > $maxInjectedChars) { - return false; - } - $totalChars += strlen($snippet); - return true; - }); - - return $clamped->values(); - } - - private function vectorSearch( - string $query, - string $collection, - ?string $agentId, - int $limit, - float $minSimilarity, - ): Collection { - $queryEmbedding = $this->embedder->embed($query); - $vectorString = '[' . implode(',', $queryEmbedding) . ']'; - - $builder = DocumentChunk::query() - ->where('collection', $collection) - ->selectRaw('id, document_id, content, metadata, 1 - (embedding <=> ?) as vector_score', [$vectorString]) - ->having('vector_score', '>=', $minSimilarity) - ->orderByDesc('vector_score') - ->limit($limit); - - if ($agentId !== null) { - $builder->where('agent_id', $agentId); - } else { - $builder->whereNull('agent_id'); - } - - return $builder->get(); - } - - private function ftsSearch( - string $query, - string $collection, - ?string $agentId, - int $limit, - ): Collection { - $tsQuery = $this->buildTsQuery($query); - - $builder = DocumentChunk::query() - ->where('collection', $collection) - ->whereRaw('search_vector @@ to_tsquery(\'english\', ?)', [$tsQuery]) - ->selectRaw( - 'id, document_id, content, metadata, ts_rank(search_vector, to_tsquery(\'english\', ?)) as fts_score', - [$tsQuery] - ) - ->orderByDesc('fts_score') - ->limit($limit); - - if ($agentId !== null) { - $builder->where('agent_id', $agentId); - } else { - $builder->whereNull('agent_id'); - } - - return $builder->get(); - } - - /** - * Merge vector and FTS results by chunk ID with weighted scoring. - * Normalizes scores to [0, 1] range before combining. - */ - private function mergeResults( - Collection $vectorResults, - Collection $ftsResults, - float $semanticWeight, - float $keywordWeight, - ): Collection { - $merged = collect(); - - // Normalize vector scores - $maxVector = $vectorResults->max('vector_score') ?: 1; - $vectorNormalized = $vectorResults->keyBy('id')->map(fn ($r) => [ - 'id' => $r->id, - 'document_id' => $r->document_id, - 'content' => $r->content, - 'metadata' => $r->metadata, - 'vector_score' => $r->vector_score / $maxVector, - 'fts_score' => 0, - ]); - - // Normalize FTS scores - $maxFts = $ftsResults->max('fts_score') ?: 1; - foreach ($ftsResults as $result) { - $normalizedFts = $result->fts_score / $maxFts; - - if ($vectorNormalized->has($result->id)) { - // Merge: chunk appears in both results - $existing = $vectorNormalized->get($result->id); - $existing['fts_score'] = $normalizedFts; - $vectorNormalized->put($result->id, $existing); - } else { - $vectorNormalized->put($result->id, [ - 'id' => $result->id, - 'document_id' => $result->document_id, - 'content' => $result->content, - 'metadata' => $result->metadata, - 'vector_score' => 0, - 'fts_score' => $normalizedFts, - ]); - } - } - - // Calculate combined scores - return $vectorNormalized->map(function ($r) use ($semanticWeight, $keywordWeight) { - $r['score'] = ($r['vector_score'] * $semanticWeight) + ($r['fts_score'] * $keywordWeight); - return $r; - })->values(); - } - - /** - * Convert a natural language query to a PostgreSQL tsquery. - * Splits on spaces, joins with & (AND). - */ - private function buildTsQuery(string $query): string - { - $words = array_filter(explode(' ', trim($query)), fn ($w) => strlen($w) > 1); - return implode(' & ', array_map(fn ($w) => preg_replace('/[^a-zA-Z0-9]/', '', $w), $words)); - } -} -``` - -### Integration - -1. **Replace vector-only search in `DocumentIndexingService::search()`** with a call to `HybridSearchService::search()`, or make `DocumentIndexingService` delegate to `HybridSearchService` internally. - -2. **Update `RecallMemory` tool** to use `HybridSearchService` instead of `DocumentIndexingService::search()`: - -```php -// Before (Phase 3): -$results = $this->indexer->search($query, 'memory', $this->agent->id, $limit); - -// After (Phase 6): -$results = $this->hybridSearch->search($query, 'memory', $this->agent->id, $limit); -``` - -3. **Update `SearchDocuments` tool** semantic mode to use `HybridSearchService`. - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `HybridSearchServiceTest` | Vector-only results, FTS-only results, merged results with correct weighting | -| `ScoreNormalizationTest` | Scores normalized to [0,1], combined scores correct | -| `ResultClampingTest` | maxResults, maxSnippetChars, maxInjectedChars all enforced | -| `TsQueryBuildingTest` | Natural language converted to valid tsquery | -| Migration test | tsvector column created, GIN index exists, trigger fires on insert/update | - -### Files to Create/Modify - -``` -database/migrations/YYYY_MM_DD_000004_add_search_vector_to_document_chunks.php -app/Services/Memory/HybridSearchService.php (new) -app/Services/Memory/DocumentIndexingService.php (modify — delegate to hybrid) -app/Agents/Tools/Memory/RecallMemory.php (modify — use hybrid search) -app/Agents/Tools/Docs/SearchDocuments.php (modify — use hybrid in semantic mode) -tests/Feature/Services/Memory/HybridSearchServiceTest.php -``` - ---- - -## Summary - -| Phase | Memory Type | What | Key Files | Depends On | -|-------|-------------|------|-----------|------------| -| 1 | Infrastructure | pgvector + ChunkingService + EmbeddingService | `Services/Memory/Chunking*.php`, `Services/Memory/Embedding*.php` | --- | -| 2 | Knowledge Base | Document indexing + observer + semantic search | `Services/Memory/DocumentIndexing*.php`, `Observers/DocumentObserver.php` | Phase 1 | -| 3 | **LTM** | SaveMemory + RecallMemory agent tools | `Tools/Memory/SaveMemory.php`, `Tools/Memory/RecallMemory.php` | Phase 1, 2 | -| 4 | **STM** | Conversation compaction + summaries | `Services/Memory/ConversationCompaction*.php`, `ChannelConversationLoader.php` | Phase 1 | -| 5 | **STM → LTM** | Pre-compaction memory flush | `Services/Memory/MemoryFlushService.php`, `AgentRespondJob.php` | Phase 3, 4 | -| 6 | Infrastructure | Hybrid search (BM25 + vector) | `Services/Memory/HybridSearchService.php` | Phase 1, 2, 3 | - -### OpenClaw Patterns Adapted - -| Pattern | OpenClaw | OpenCompany Adaptation | -|---------|----------|----------------------| -| Storage | SQLite per agent with sqlite-vec | Single PostgreSQL + pgvector, scoped by `agent_id` | -| Chunking | 400 tokens, 80 overlap | 512 tokens, 64 overlap (from `config/memory.php`) | -| Embedding | Pluggable, OpenAI primary | OpenAI text-embedding-3-small, cache by SHA256 | -| Hybrid weights | 0.7 vector + 0.3 BM25 | Same (from `config/memory.php`) | -| Compaction | Chunk-based, 0.4 keep ratio | Same, with cumulative summaries | -| Memory flush | Silent turn with NO_REPLY | Silent turn with `[FLUSH_COMPLETE]`, response discarded | -| Result clamping | maxResults=6, maxSnippetChars=700, maxInjectedChars=4000 | Same | -| Batch embeddings | Disabled by default; opt-in for large backfills | Always available via `embedBatch()`; used in bulk indexing jobs | -| Memory init timing | QMD eager-initialized on gateway startup (non-blocking) | Lazy via Laravel DI container (pgvector always warm) | -| Memory scope | Per-agent directory isolation + collection scoping (QMD `-c ` args) | Per-agent `agent_id` column + `collection` column scoping | -| Memory write tool | No dedicated save tool (uses filesystem write/edit directly) | Dedicated `save_memory` tool with `target` param | -| Memory read tools | `memory_search` (semantic) + `memory_get` (read by path/line) | `recall_memory` (semantic search over all LTM) | -| Memory files | `MEMORY.md` (curated) + `memory/YYYY-MM-DD.md` (daily logs) | Same layout under `agents/{slug}/` | -| MEMORY.md loading | Only in private sessions (scope: `chatType: "direct"`) | Always loaded via identity file system | -| Session memory hook | On `/new` command, saves transcript to `memory/YYYY-MM-DD-slug.md` | Not applicable (our sessions are persistent) | - ---- - -## Appendix A: OpenClaw Memory Architecture — Deep Dive - -> This appendix documents OpenClaw's exact memory implementation as a reference for our adaptation. -> Source: `inspiration/openclaw/` (v2026.2.9, Feb 2026) - -### A.1 Memory Files & Layout - -OpenClaw uses **plain Markdown in the agent workspace** as the source of truth: - -| File | Purpose | When loaded | Write pattern | -|------|---------|-------------|---------------| -| `MEMORY.md` | Curated long-term memory | **Only in private/direct sessions** (never in group contexts) | Overwrite/update by agent (using `write`/`edit` tools) | -| `memory/YYYY-MM-DD.md` | Daily log entries | Today + yesterday loaded at session start | Append-only | -| `memory/YYYY-MM-DD-slug.md` | Session transcripts | Not auto-loaded; searchable via `memory_search` | Created by `session-memory` hook on `/new` command | - -Key insight: **MEMORY.md is NOT always loaded.** OpenClaw explicitly skips it in group chats for privacy. Our system loads MEMORY.md always (via the identity file pipeline), which is simpler but means we should be careful about what agents store there. - -### A.2 Memory Tools (Read-Only!) - -**OpenClaw has NO dedicated `save_memory` tool.** Agents write to memory using the standard filesystem tools (`write`, `edit`, `exec`). The memory tools are read-only: - -```typescript -// memory_search — Semantic search over MEMORY.md + memory/*.md -description: "Mandatory recall step: semantically search MEMORY.md + memory/*.md -(and optional session transcripts) before answering questions about prior work, -decisions, dates, people, preferences, or todos; returns top snippets with path + lines." - -// memory_get — Read specific file content by path + line range -description: "Safe snippet read from MEMORY.md or memory/*.md with optional from/lines; -use after memory_search to pull only the needed lines and keep context small." -``` - -This means OpenClaw's agents must know the file layout and manually write to the correct paths. Our `save_memory` tool abstracts this complexity away, which is better for our multi-agent setup where agents shouldn't need filesystem knowledge. - -### A.3 System Prompt — Memory Section - -OpenClaw's system prompt includes a `## Memory Recall` section (only for non-subagent sessions that have memory tools enabled): - -``` -## Memory Recall -Before answering anything about prior work, decisions, dates, people, preferences, -or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull -only the needed lines. If low confidence after search, say you checked. -Citations: include Source: when it helps the user verify memory snippets. -``` - -This is notably **recall-focused** — it tells the agent when to search, not when to save. OpenClaw relies on the memory docs (`docs/concepts/memory.md`) and the flush prompt to drive save behavior. - -### A.4 Memory Flush Prompts (Pre-Compaction) - -OpenClaw's flush uses two prompts injected during the silent turn: - -**System prompt append:** -``` -Pre-compaction memory flush turn. -The session is near auto-compaction; capture durable memories to disk. -You may reply, but usually NO_REPLY is correct. -``` - -**User message (the flush trigger):** -``` -Pre-compaction memory flush. Store durable memories now -(use memory/YYYY-MM-DD.md; create memory/ if needed). -If nothing to store, reply with NO_REPLY. -``` - -Key details: -- Flush targets **daily logs** (`memory/YYYY-MM-DD.md`), not MEMORY.md -- `NO_REPLY` is a sentinel token that suppresses delivery to the user -- One flush per compaction cycle (tracked via `memoryFlushCompactionCount` in session store) -- Flush is **skipped** for read-only sandboxed workspaces and CLI providers -- Soft threshold: triggers 4000 tokens before compaction would fire - -### A.5 Compaction - -OpenClaw's compaction is handled by the embedded Pi agent runtime, not by OpenClaw itself: - -- **Trigger**: `contextTokens > contextWindow - reserveTokens` -- **Output**: A `compaction` entry in the JSONL transcript with `firstKeptEntryId` and `tokensBefore` -- **Effect**: Future turns see compaction summary + messages after the kept entry -- **Config**: `reserveTokens: 16384`, `keepRecentTokens: 20000` -- **Safety floor**: Minimum 20000 tokens reserve to ensure room for pre-compaction flush -- **Boot sync**: `memory.qmd.update.waitForBootSync` (default `false`) — when `true`, QMD boot refresh blocks startup. Default non-blocking behavior means first searches may hit a partially warmed index. -- **Manual**: `/compact` command (optionally with focus instructions) -- **Compaction is persistent** in the JSONL transcript, unlike session pruning (which is in-memory only) - -### A.6 Session-Memory Hook - -When the user runs `/new` (start a new session), OpenClaw's `session-memory` hook: - -1. Reads the last N messages (default 15) from the previous session -2. Uses LLM to generate a descriptive slug (e.g., "api-design", "bug-fix") -3. Saves to `memory/YYYY-MM-DD-slug.md` with session metadata -4. This is separate from the pre-compaction flush — it's a session-end memory capture - -### A.7 Workspace Files Loaded Into System Prompt - -OpenClaw loads these workspace files as "Project Context" in the system prompt: - -``` -IDENTITY.md → Agent identity and personality -SOUL.md → Persona and tone guidance -USER.md → User preferences and context -AGENTS.md → Multi-agent awareness -TOOLS.md → External tool guidance -MEMORY.md → Curated long-term memory (private sessions only!) -HEARTBEAT.md → Heartbeat prompt config -BOOTSTRAP.md → First-run bootstrapping (only for brand new workspaces) -``` - -For subagent sessions, only `AGENTS.md` and `TOOLS.md` are loaded (privacy/scope restriction). - -### A.8 Thesis Verification - -**Claim 1: "If the agent saves memory, isn't it always in MEMORY.md?"** - -**Partially incorrect.** In OpenClaw, agents write to TWO locations: -- `MEMORY.md` — for curated, high-value, durable facts (preferences, key decisions) -- `memory/YYYY-MM-DD.md` — for daily running context, timestamped observations - -The **memory flush explicitly targets daily logs**, not MEMORY.md: `"Store durable memories now (use memory/YYYY-MM-DD.md)"`. OpenClaw's docs say: "Decisions, preferences, and durable facts go to MEMORY.md. Day-to-day notes and running context go to memory/YYYY-MM-DD.md." - -**Claim 2: "Short-term memory is like summaries of compactions?"** - -**Correct.** STM is the conversation context window. When it fills up: -1. Pre-compaction flush saves important context to LTM (daily logs) -2. Compaction summarizes older messages into a persistent summary entry -3. Future turns see: compaction summary + recent messages - -The compaction summary IS the compressed form of STM. It's lossy — hence why the flush exists to promote key info to LTM before compression. - -### A.9 v2026.2.9 Changes - -The following changes were introduced in OpenClaw v2026.2.9 (tag `v2026.2.9`, Feb 2026): - -**1. Config migration: top-level → agents.defaults.memorySearch** -`memorySearch` config moved from top-level to `agents.defaults.memorySearch`. A legacy migration rule auto-migrates old configs and logs a deprecation warning. OpenClaw's `docs/concepts/memory.md` now explicitly states: *"Configure memory search under `agents.defaults.memorySearch` (not top-level `memorySearch`)."* Per-agent overrides take precedence over the new default location. - -**2. QMD eager initialization** -New `server-startup-memory.ts` module. The QMD memory manager is now initialized immediately on gateway startup (fire-and-forget, non-blocking) instead of lazily on first `memory_search` call. Update/embed timers are armed immediately. This addresses the documented "first search may be slow" problem by warming up GGUF models during startup. Boot refresh runs in background by default; set `memory.qmd.update.waitForBootSync = true` for blocking behavior. - -**3. Collection scoping** -New `buildCollectionFilterArgs()` method in `QmdMemoryManager`. QMD queries are now scoped to managed collections via `-c ` CLI args. If no managed collections are configured, the query returns empty results and logs a warning instead of searching undefined scope. Prevents accidental data exposure from misconfigured setups. - -**4. Model cache reuse** -New `symlinkSharedModels()` method. Symlinks the shared `~/.cache/qmd/models/` directory into each agent-specific `XDG_CACHE_HOME` path. This solves the problem of agent isolation (per-agent `XDG_CACHE_HOME` override) causing re-downloads of ~2.1 GB GGUF models. Result: per-agent index isolation + globally shared ML models. Cross-platform: handles `XDG_CACHE_HOME` on Linux/macOS and `LOCALAPPDATA` on Windows. Not directly applicable to our architecture (we use API-based embeddings, not local models). - -**5. Batch embeddings default off** -`agents.defaults.memorySearch.remote.batch.enabled` default changed from `true` to `false`. Batch API is now opt-in. Rationale: synchronous embedding is adequate for incremental updates; batch is mainly beneficial for large backfills. Providers supporting batch: OpenAI Batch API, Gemini async embeddings, Voyage AI. - -**6. ChatType unification (`dm` → `direct`)** -Session key parsing in `QmdMemoryManager.extractAgentIdFromSessionKey()` now accepts both `"direct"` and `"dm"` for backward compatibility. New sessions use `":direct:"` in generated keys. The QMD scope default rule uses `chatType: "direct"`. This is a semantic rename — `"direct"` is clearer than `"dm"`. - -> Note: Utility consolidation (commit ec910a235, `formatError` → `formatErrorMessage`) is an internal refactor with no architectural impact. - ---- - -## Appendix B: Complete System Prompt for Memory - -> This is the full memory-related system prompt text to inject into `OpenCompanyAgent::instructions()`. -> It should be appended after the identity files are loaded, so the agent already has MEMORY.md context. - -``` -## Memory System - -You have two types of memory: - -### Short-Term Memory (STM) -Your current conversation context. You can see recent messages directly. Older messages -are automatically summarized when the context window fills up. **You don't manage STM** — -the system handles it for you. - -### Long-Term Memory (LTM) -Durable memories that persist across all conversations. You manage LTM explicitly: - -- **MEMORY.md** (core memory) — Already loaded in your system prompt above. Contains - curated, high-value facts. You can add to it via `save_memory` with `target: "core"`. -- **Daily logs** — Timestamped entries searchable via `recall_memory`. Written via - `save_memory` with `target: "log"` (default). - -### save_memory - -Persist information to your long-term memory. Two targets: - -| Target | Storage | Loaded | Best for | -|--------|---------|--------|----------| -| `"core"` | MEMORY.md | Always (system prompt) | User preferences, key decisions, organizational knowledge, durable facts | -| `"log"` (default) | Daily log | On demand via `recall_memory` | Running context, timestamped observations, session learnings | - -Guidelines: -- Be specific: include who, what, why, and when -- Prefer `"core"` only for truly durable facts that should always be in context -- Prefer `"log"` for most saves — keeps MEMORY.md focused and manageable -- If someone says "remember this" — save it immediately (do not rely on conversation context) -- Use categories: preference, decision, learning, fact, general - -### recall_memory - -Semantically search your daily logs for past context. Use this: -- Before answering questions about prior work, decisions, or preferences -- At the start of complex tasks to gather relevant history -- When a user references something from a previous conversation -- When you have low confidence and need more context - -If recall_memory returns nothing relevant, tell the user you checked but found no prior context. - -### When to save vs when not to - -**Save:** -- User expresses a preference or working style -- An important decision is made (with reasoning) -- Key facts about a project, person, or the organization -- Learnings or insights from the current conversation -- Anything the user explicitly asks you to remember - -**Don't save:** -- Transient, obvious, or trivial information -- Information already in MEMORY.md -- Raw conversation snippets without context -- Temporary task state that won't matter later -``` - -### Updated Phase 5 Flush Prompt - -The flush prompt should align with OpenClaw's proven approach — target daily logs, be concise: - -```php -private function buildFlushPrompt(): string -{ - return <<<'PROMPT' - Pre-compaction memory flush. Your conversation context is about to be compacted - (older messages will be summarized and compressed). - - Review the conversation for durable context worth preserving. Use save_memory - (target: "log") to save important observations, decisions, preferences, or - learnings to your daily log before they are compressed. - - Only save to target: "core" if you discovered truly high-value permanent facts - (user preferences, key decisions) that should always be in your system prompt. - - If nothing needs saving, respond with exactly: [FLUSH_COMPLETE] - PROMPT; -} -``` diff --git a/docs/testing/feature-test-map.md b/docs/testing/feature-test-map.md deleted file mode 100644 index e6b2f53..0000000 --- a/docs/testing/feature-test-map.md +++ /dev/null @@ -1,1093 +0,0 @@ -# OpenCompany Feature Test Map - -Complete checklist of all features, buttons, and functionality to test. - ---- - -## 1. AUTHENTICATION PAGES - -### Login (`/login`) -- [ ] Email input field -- [ ] Password input field -- [ ] "Remember me" checkbox -- [ ] Login button (submit) -- [ ] "Forgot password" link -- [ ] Register link -- [ ] Error states for invalid credentials -- [ ] Loading state on submit - -### Register (`/register`) -- [ ] Name input field -- [ ] Email input field -- [ ] Password input field -- [ ] Confirm password input field -- [ ] Register button (submit) -- [ ] Login link -- [ ] Validation errors display -- [ ] Loading state on submit - -### Forgot Password (`/forgot-password`) -- [ ] Email input field -- [ ] Send reset link button -- [ ] Success message display -- [ ] Back to login link - -### Reset Password (`/reset-password/{token}`) -- [ ] Password input field -- [ ] Confirm password input field -- [ ] Reset password button -- [ ] Validation errors - -### Verify Email (`/verify-email`) -- [ ] Resend verification email button -- [ ] Success message display - ---- - -## 2. DASHBOARD (`/` or `/dashboard`) - -### Header -- [ ] Page title displays -- [ ] Subtitle displays - -### Stats Overview -- [ ] Agents Online stat card -- [ ] Pending Tasks stat card -- [ ] Unread Messages stat card -- [ ] Each stat shows correct number - -### Pending Approvals Section (if any) -- [ ] Approval cards display -- [ ] Approve button per item -- [ ] Reject button per item -- [ ] Amount display -- [ ] Requester info display -- [ ] View all link - -### Activity Feed -- [ ] Activity items load -- [ ] Activity type icons display -- [ ] Timestamps display -- [ ] User/agent avatars display -- [ ] Activity descriptions -- [ ] Load more (if > 20 items) - -### Quick Actions -- [ ] "Spawn Agent" button → opens modal -- [ ] "New Channel" button → opens modal -- [ ] "Create Task" button → opens modal -- [ ] "New Document" button → navigates - -### Working Agents Sidebar -- [ ] Agent cards display -- [ ] Agent status indicators (working/idle) -- [ ] Current task display -- [ ] Click agent → navigate to profile - -### Spawn Agent Modal -- [ ] Agent type selection (6 types: writer, analyst, researcher, creative, coder, coordinator) -- [ ] Agent name input -- [ ] Initial task textarea (optional) -- [ ] Behavior mode select (autonomous/supervised/strict) -- [ ] Ephemeral agent toggle -- [ ] Estimated cost display -- [ ] Cancel button -- [ ] Spawn Agent button -- [ ] Loading state on spawn - ---- - -## 3. CHAT (`/chat`) - -### Channel List Sidebar -- [ ] Channel items display -- [ ] Unread count badges -- [ ] Channel type icons (public/private/agent/dm/external) -- [ ] Selected channel highlight -- [ ] "New Channel" button -- [ ] Search channels (if available) - -### Create Channel Modal -- [ ] Channel type selection (public/private/agent/dm/external) -- [ ] Channel name input (validation: lowercase, hyphens) -- [ ] Description textarea -- [ ] Member search input -- [ ] Available members list -- [ ] Selected members chips with X buttons -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Chat Area -- [ ] Channel header with name -- [ ] Member count display -- [ ] Pinned messages button with count -- [ ] Members info button -- [ ] Messages load correctly -- [ ] Message grouping by author -- [ ] Date separators display -- [ ] Avatar display per message -- [ ] Timestamp per message -- [ ] Scroll to bottom on new messages -- [ ] Load more old messages (scroll up) - -### Message Features -- [ ] Hover actions appear on messages -- [ ] React to message (emoji picker) -- [ ] Reply to message (thread) -- [ ] Pin message button -- [ ] Edit own message -- [ ] Delete own message -- [ ] Message reactions display -- [ ] Reaction counts - -### Message Input -- [ ] Textarea for typing -- [ ] Auto-resize on multi-line -- [ ] Attach file button (+) -- [ ] Emoji picker button -- [ ] Mention button (@) -- [ ] Send button -- [ ] Enter to send (Shift+Enter for newline) -- [ ] Character counter (if enabled) -- [ ] Format toolbar (bold, italic, code, etc.) -- [ ] @mention autocomplete popup -- [ ] Slash commands popup (/) -- [ ] Attachment preview with upload progress -- [ ] Reply-to banner (when replying) -- [ ] Cancel reply button -- [ ] Edit mode banner -- [ ] Cancel edit button - -### Channel Info Panel -- [ ] Toggle open/close -- [ ] Channel description -- [ ] Member list with avatars -- [ ] Member roles/types -- [ ] Add member button - -### Add Member Modal -- [ ] Search users input -- [ ] User list with selection checkboxes -- [ ] Selected count display -- [ ] Cancel button -- [ ] Add Members button - -### Pinned Messages Panel -- [ ] Toggle open/close -- [ ] Pinned messages list -- [ ] Click to jump to message -- [ ] Unpin button - -### Typing Indicator -- [ ] Shows when others typing -- [ ] Multiple users typing text - ---- - -## 4. DIRECT MESSAGES - -> **Note:** `/messages` now redirects to `/chat`. DMs are part of the unified chat interface and appear as `dm` type channels in the channel list. - -### DM Conversations (via `/chat`) -- [ ] DM channels appear in channel list -- [ ] DM channel type icon distinct from other types -- [ ] "New Message" button -- [ ] Search conversations input -- [ ] Avatar per conversation -- [ ] Last message preview -- [ ] Time ago display -- [ ] Unread count badges -- [ ] Click to open conversation -- [ ] Loading skeleton state -- [ ] Empty state if no conversations - -### New Message Modal -- [ ] Recipient select dropdown -- [ ] User/agent list with type labels -- [ ] Cancel button -- [ ] Start Chat button - -### Conversation View (`/messages/{id}`) -- [ ] Floating header with back button -- [ ] User/agent avatar and name -- [ ] User type label -- [ ] Status indicator (for agents) -- [ ] Settings/gear button (for agents) -- [ ] Profile link button -- [ ] Messages display -- [ ] Own messages right-aligned (dark bubble) -- [ ] Other messages left-aligned (light bubble) -- [ ] Avatar grouping (hide repeated) -- [ ] Timestamps per message -- [ ] Markdown rendering (bold, italic, code, links, lists) -- [ ] Code blocks with syntax highlighting -- [ ] Typing indicator -- [ ] Message input textarea -- [ ] Auto-resize input -- [ ] Send button -- [ ] Loading state on send -- [ ] Empty state for new conversations - ---- - -## 5. TASKS (`/tasks`) - -### Header -- [ ] Page title "Tasks" -- [ ] Filter tabs by status (All/Active/Pending/Completed/Failed) -- [ ] Filter by agent -- [ ] Filter by priority -- [ ] Filter by type -- [ ] "Create Task" button - -### Task List -- [ ] Task rows display -- [ ] Task title -- [ ] Type badge (ticket/request/analysis/content/research/custom) -- [ ] Status badge with color (pending/active/paused/completed/failed/cancelled) -- [ ] Priority badge (low/medium/high/urgent) -- [ ] Assigned agent with avatar -- [ ] Due date display -- [ ] Click to open task detail - -### Create Task Modal -- [ ] Title input (required) -- [ ] Description textarea -- [ ] Type select (ticket/request/analysis/content/research/custom) -- [ ] Priority select (low/medium/high/urgent) -- [ ] Agent assignment select -- [ ] Due date input -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Task Detail View (`/tasks/{id}`) -- [ ] Task title display -- [ ] Type badge -- [ ] Status badge with color -- [ ] Priority badge -- [ ] Description display -- [ ] Assigned agent with avatar -- [ ] Requester info -- [ ] Channel link (if linked) -- [ ] Due date -- [ ] Created/started/completed timestamps -- [ ] Lifecycle action buttons: - - [ ] Start button (pending → active) - - [ ] Pause button (active → paused) - - [ ] Resume button (paused → active) - - [ ] Complete button (active → completed) - - [ ] Fail button (active → failed) - - [ ] Cancel button (any → cancelled) - -### Task Steps -- [ ] Steps list display -- [ ] Step description -- [ ] Step type badge (action/decision/approval/sub_task/message) -- [ ] Step status indicator (pending/in_progress/completed/skipped) -- [ ] Step timestamps -- [ ] Step metadata display - -### Sub-Tasks -- [ ] Sub-task list (if parent task) -- [ ] Sub-task status indicators -- [ ] Click to open sub-task - ---- - -## 6. LISTS (`/lists`) - -### Header -- [ ] Page title "Lists" -- [ ] View mode tabs (Board/List) -- [ ] Filter dropdown -- [ ] "Create Item" button - -### Board View (Kanban) -- [ ] Backlog column with count -- [ ] In Progress column with count -- [ ] Done column with count -- [ ] Item cards in each column -- [ ] Drag and drop between columns -- [ ] Item card: title, priority badge, assignee avatar, cost - -### List View -- [ ] Item rows in table format -- [ ] Sortable columns -- [ ] Item details visible - -### Create Item Modal -- [ ] Title input (required) -- [ ] Description textarea -- [ ] Status select (backlog/in_progress/done) -- [ ] Priority select (low/medium/high/urgent) -- [ ] Assignee select (grouped: agents/humans) -- [ ] Estimated cost input -- [ ] Channel select (optional) -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Item Detail Slideover -- [ ] Item title display -- [ ] Edit button → edit mode -- [ ] Close (X) button -- [ ] Status badge with color -- [ ] Priority badge with color -- [ ] Description display -- [ ] Assignee with avatar -- [ ] Cost display -- [ ] Created date -- [ ] Completed date (if done) -- [ ] Mark Complete button -- [ ] Reopen button (if done) -- [ ] Delete button -- [ ] Collaborators section -- [ ] Comments section -- [ ] Add comment input -- [ ] Comment list -- [ ] Delete comment (hover reveal) -- [ ] Edit mode: editable title -- [ ] Edit mode: editable description -- [ ] Edit mode: status select -- [ ] Edit mode: priority select -- [ ] Edit mode: cost input -- [ ] Save/Cancel buttons in edit mode - ---- - -## 7. DOCUMENTS (`/docs`) - -### Document List Sidebar -- [ ] Search documents input -- [ ] Document tree/list display -- [ ] Document icons -- [ ] Selected document highlight -- [ ] "New Document" button -- [ ] Folder structure (if any) - -### Document Viewer/Editor -- [ ] Document title display -- [ ] Edit button -- [ ] Version history button -- [ ] Comments toggle button -- [ ] Attachments button -- [ ] Document content display -- [ ] Markdown rendering -- [ ] Code blocks with highlighting -- [ ] Edit mode: textarea/editor -- [ ] Save button in edit mode -- [ ] Cancel edit button - -### Version History Panel -- [ ] Version list display -- [ ] Version timestamps -- [ ] Version author -- [ ] Change description -- [ ] View diff button per version -- [ ] Restore version button -- [ ] Current version indicator - -### Diff Viewer Modal -- [ ] Side-by-side diff view -- [ ] Additions highlighted (green) -- [ ] Deletions highlighted (red) -- [ ] Version labels -- [ ] Close button - -### Comments Panel -- [ ] Comments list -- [ ] Comment author avatars -- [ ] Comment timestamps -- [ ] Reply to comment -- [ ] Resolve comment button -- [ ] Resolved comments section -- [ ] Add comment input -- [ ] Submit comment button - -### Attachments Panel -- [ ] Attachments list -- [ ] File icons -- [ ] File names -- [ ] Download button per file -- [ ] Delete button per file -- [ ] Upload attachment button - ---- - -## 8. ACTIVITY (`/activity`) - -### Header -- [ ] Page title -- [ ] Filter options - -### Filter Panel -- [ ] Activity type filters (messages/tasks/approvals/agents/errors) -- [ ] User filter dropdown -- [ ] Date range filters (today/week/month/all) - -### Activity Timeline -- [ ] Activity items display -- [ ] Type icons per activity -- [ ] User/agent avatars -- [ ] Timestamps -- [ ] Activity descriptions -- [ ] Metadata (task titles, amounts, channels) -- [ ] Load more button -- [ ] Empty state if no activities - ---- - -## 9. APPROVALS (`/approvals`) - -### Header -- [ ] Page title -- [ ] Filter tabs with counts - -### Filter Tabs -- [ ] All tab -- [ ] Pending tab (with count) -- [ ] Approved tab -- [ ] Rejected tab - -### Approval List -- [ ] Approval cards display -- [ ] Request title -- [ ] Description -- [ ] Amount display -- [ ] Requester info with avatar -- [ ] Status badge -- [ ] Approve button (pending only) -- [ ] Reject button (pending only) -- [ ] Responder info (approved/rejected) -- [ ] Response timestamp -- [ ] Loading state -- [ ] Empty state per filter - ---- - -## 10. AUTOMATION (`/automation`) - -### Header -- [ ] Page title -- [ ] Tab navigation - -### Task Templates Tab -- [ ] Templates list display -- [ ] "New Template" button -- [ ] Template cards with: - - [ ] Template name - - [ ] Default title - - [ ] Priority badge - - [ ] Default assignee - - [ ] Estimated cost - - [ ] Tags display - - [ ] Edit button - - [ ] Delete button - - [ ] Use template button - -### Template Modal (Create/Edit) -- [ ] Template name input -- [ ] Default title input -- [ ] Default priority select -- [ ] Default assignee select -- [ ] Estimated cost input -- [ ] Tags input -- [ ] Cancel button -- [ ] Save button - -### Automation Rules Tab -- [ ] Rules list display -- [ ] "New Rule" button -- [ ] Rule cards with: - - [ ] Rule name - - [ ] Trigger type - - [ ] Action type - - [ ] Template association - - [ ] Enabled/disabled toggle - - [ ] Trigger count - - [ ] Edit button - - [ ] Delete button - -### Rule Modal (Create/Edit) -- [ ] Rule name input -- [ ] Trigger type select (task created/completed/assigned/approval) -- [ ] Action type select (create task/assign/notify/spawn agent) -- [ ] Template select (if action = create task) -- [ ] Enabled toggle -- [ ] Cancel button -- [ ] Save button - ---- - -## 11. ORGANIZATION (`/org`) - -### Header -- [ ] Page title -- [ ] Subtitle - -### View Mode Toggle -- [ ] Tree View button -- [ ] Chart View button -- [ ] Active state on selected - -### Tree View -- [ ] Tree structure displays -- [ ] Node cards with avatars -- [ ] Agent type badges -- [ ] Status indicators (working/idle) -- [ ] Current task display -- [ ] Email for humans -- [ ] Ephemeral badge if applicable -- [ ] Expand/collapse children -- [ ] Expand indicator with count -- [ ] Click to expand/collapse -- [ ] Keyboard navigation (Tab, Enter, Space) -- [ ] Focus ring on keyboard focus -- [ ] Profile link per node - -### Chart View -- [ ] Horizontal org chart displays -- [ ] Node cards with avatars -- [ ] Connector lines between nodes -- [ ] Root node highlighted -- [ ] Agent/human icons -- [ ] Ephemeral badge -- [ ] Focus indicator on cards -- [ ] Profile link per node - -### Stats Section -- [ ] Total Members stat card -- [ ] Humans stat card -- [ ] Agents stat card -- [ ] Active Agents stat card -- [ ] Correct counts displayed - ---- - -## 12. WORKLOAD (`/workload`) - -### Summary Cards -- [ ] Active Agents card -- [ ] Current Tasks card -- [ ] Completed Today card -- [ ] Average Efficiency card - -### Agent Workload Cards -- [ ] Agent cards display -- [ ] Agent avatar with status -- [ ] Agent name and type -- [ ] Workload score bar -- [ ] Efficiency percentage -- [ ] Tasks in progress count -- [ ] Tasks pending count -- [ ] Tasks completed count -- [ ] Total cost display -- [ ] Status badge - -### Auto-refresh -- [ ] Data refreshes every 30 seconds -- [ ] Loading indicator on refresh - ---- - -## 13. CALENDAR (`/calendar`) - -### Sidebar -- [ ] Mini calendar display -- [ ] Date selection -- [ ] Today highlight -- [ ] Month navigation - -### View Mode Buttons -- [ ] Month view button -- [ ] Week view button -- [ ] Day view button - -### Calendar Grid -- [ ] Month view: full month grid -- [ ] Week view: 7 days with hours -- [ ] Day view: single day with hours -- [ ] Events display on dates -- [ ] Click date to create event -- [ ] Click event to view/edit - -### Navigation -- [ ] Previous period button -- [ ] Next period button -- [ ] Today button -- [ ] Period label (dynamic) - -### Event Modal -- [ ] Event title input -- [ ] Date/time inputs -- [ ] Description textarea -- [ ] Cancel button -- [ ] Save button -- [ ] Delete button (edit mode) - ---- - -## 14. SETTINGS (`/settings`) - -### Organization Settings -- [ ] Organization name input -- [ ] Organization email input -- [ ] Timezone select -- [ ] Save button - -### Agent Defaults -- [ ] Default behavior mode select -- [ ] Cost limit input -- [ ] Auto-spawn toggle -- [ ] Save button - -### Action Policies -- [ ] Policies list -- [ ] "Add Policy" button -- [ ] Policy card: pattern, threshold, approval level -- [ ] Edit policy button -- [ ] Delete policy button - -### Policy Modal -- [ ] Pattern input -- [ ] Cost threshold input -- [ ] Approval level select -- [ ] Cancel button -- [ ] Save button - -### Notifications -- [ ] Email notifications toggle -- [ ] Slack notifications toggle -- [ ] Daily summary toggle -- [ ] Save button - -### Danger Zone -- [ ] Pause all agents button -- [ ] Reset agent memory button -- [ ] Delete organization button -- [ ] Confirmation dialogs for each - ---- - -## 15. INTEGRATIONS (`/integrations`) - -### Webhooks Section -- [ ] Webhooks list -- [ ] "Create Webhook" button -- [ ] Webhook cards: - - [ ] URL display - - [ ] Target/events display - - [ ] Enabled/disabled toggle - - [ ] Last triggered date - - [ ] Call count - - [ ] Edit button - - [ ] Delete button - -### Webhook Modal -- [ ] URL input -- [ ] Target selection -- [ ] Events multiselect -- [ ] Cancel button -- [ ] Save button - -### API Keys Section -- [ ] API keys list -- [ ] "Generate Key" button -- [ ] Key cards: - - [ ] Key name - - [ ] Masked key value - - [ ] Copy button - - [ ] Revoke button - - [ ] Created date - -### Connected Services -- [ ] Services list/grid -- [ ] Service icons -- [ ] Service names -- [ ] Connection status -- [ ] Connect/Disconnect buttons - ---- - -## 16. TABLES (`/tables`) - -### Header -- [ ] Page title -- [ ] "New Table" button - -### Tables Grid -- [ ] Table cards display -- [ ] Table icons -- [ ] Table names -- [ ] Descriptions -- [ ] Row counts -- [ ] Column counts -- [ ] Click to open table -- [ ] Delete button per table - -### Create Table Modal -- [ ] Table name input -- [ ] Description textarea -- [ ] Icon selection (optional) -- [ ] Cancel button -- [ ] Create button - -### Empty State -- [ ] Empty state message -- [ ] Create table button - ---- - -## 17. TABLE VIEW (`/tables/{id}`) - -### Header -- [ ] Back button -- [ ] Table icon -- [ ] Table name -- [ ] Table description -- [ ] "Add Column" button -- [ ] "Add Row" button - -### Toolbar -- [ ] Search rows input -- [ ] Selected count display -- [ ] Bulk delete button (when selected) -- [ ] Row count display - -### Table Grid -- [ ] Column headers -- [ ] Column type indicators -- [ ] Column menu button (hover) -- [ ] Row selection checkboxes -- [ ] Cell data display per type: - - [ ] Text: inline edit - - [ ] Number: inline edit - - [ ] Date: date picker - - [ ] Checkbox: toggle - - [ ] Select: dropdown - - [ ] Multiselect: tags with add/remove - - [ ] URL: link display, edit button - - [ ] Email: mailto link, edit button -- [ ] Row actions menu (hover) -- [ ] Delete row button - -### Column Menu -- [ ] Edit column option -- [ ] Delete column option - -### Add Column Modal -- [ ] Column name input -- [ ] Column type selection grid -- [ ] Type descriptions -- [ ] Options input (for select/multiselect) -- [ ] Required toggle -- [ ] Cancel button -- [ ] Add Column button - -### Edit Column Modal -- [ ] Pre-filled column name -- [ ] Type change warning -- [ ] Options editing -- [ ] Cancel button -- [ ] Save Changes button - -### Bulk Delete Confirmation -- [ ] Confirmation message with count -- [ ] Cancel button -- [ ] Delete Rows button - ---- - -## 18. AGENT PROFILE (`/agent/{id}`) - -### Header -- [ ] Agent avatar with status -- [ ] Agent name -- [ ] Agent type badge -- [ ] Status badge (working/idle/paused) -- [ ] Emoji display -- [ ] Current task display -- [ ] Message button -- [ ] Pause/Resume button - -### Tabs -- [ ] Overview tab -- [ ] Personality tab -- [ ] Instructions tab -- [ ] Capabilities tab -- [ ] Memory tab -- [ ] Activity tab -- [ ] Settings tab - -### Overview Tab -- [ ] Agent summary -- [ ] Recent activity -- [ ] Quick stats - -### Personality Tab -- [ ] Personality editor textarea -- [ ] Save button - -### Instructions Tab -- [ ] Instructions editor textarea -- [ ] Save button - -### Capabilities Tab -- [ ] Capabilities list -- [ ] Capability enabled/disabled status -- [ ] Approval tracking per capability - -### Memory Tab -- [ ] Memory entries list -- [ ] Add memory button -- [ ] Clear memory button - -### Activity Tab -- [ ] Activity log -- [ ] Activity type icons -- [ ] Timestamps -- [ ] Load more - -### Settings Tab -- [ ] Agent-specific settings -- [ ] Session management -- [ ] Save button - ---- - -## 19. USER PROFILE (`/profile/{id}`) - -### Header -- [ ] User avatar -- [ ] User name -- [ ] User type badge (human/agent) -- [ ] Email display -- [ ] Ephemeral indicator (if agent) -- [ ] Status display -- [ ] Current task (if agent) -- [ ] Message button -- [ ] Manage Agent button (if agent) - -### Tabs -- [ ] Activity tab -- [ ] Tasks tab - -### Activity Tab -- [ ] Activity steps list -- [ ] Status indicators -- [ ] Timestamps - -### Tasks Tab -- [ ] Assigned tasks list -- [ ] Task status badges -- [ ] Click to open task - ---- - -## 20. PROFILE EDIT (`/profile`) - -### Update Profile Form -- [ ] Name input -- [ ] Email input -- [ ] Save button -- [ ] Success message - -### Update Password Form -- [ ] Current password input -- [ ] New password input -- [ ] Confirm password input -- [ ] Save button -- [ ] Validation errors - -### Delete Account Section -- [ ] Delete account button -- [ ] Confirmation modal -- [ ] Password confirmation input -- [ ] Confirm delete button - ---- - -## 21. GLOBAL FEATURES - -### Sidebar Navigation -- [ ] All navigation links work -- [ ] Active state on current page -- [ ] Badge counts (Chat, Approvals) -- [ ] Collapse/expand (if available) - -### User Menu -- [ ] User avatar click -- [ ] Username display -- [ ] Role display -- [ ] Profile link -- [ ] Settings link -- [ ] Logout button - -### Command Palette (Cmd/Ctrl+K) -- [ ] Opens on shortcut -- [ ] Search input autofocus -- [ ] Mode tabs (Commands/Files/Channels/Agents) -- [ ] Recent searches display -- [ ] Command groups -- [ ] Arrow key navigation -- [ ] Enter to execute -- [ ] Escape to close -- [ ] Prefix searches (#channels, @agents) - -### Keyboard Shortcuts -- [ ] Cmd/Ctrl+K: Command palette -- [ ] Escape: Close modals/palettes -- [ ] g+h: Go to Dashboard -- [ ] g+c: Go to Chat -- [ ] g+t: Go to Tasks -- [ ] g+d: Go to Docs -- [ ] g+a: Go to Approvals -- [ ] g+o: Go to Organization -- [ ] g+s: Go to Settings - -### Dark Mode -- [ ] Toggle dark mode -- [ ] All pages render correctly -- [ ] All components have dark variants -- [ ] System preference detection - -### Real-Time Updates -- [ ] WebSocket connection establishes -- [ ] New messages appear instantly -- [ ] Typing indicators work -- [ ] Activity feed updates -- [ ] Presence updates - -### Loading States -- [ ] Skeleton loaders display -- [ ] Spinner indicators -- [ ] Button loading states -- [ ] Page transition loading - -### Error States -- [ ] Error messages display -- [ ] Retry buttons work -- [ ] Form validation errors -- [ ] API error handling - -### Empty States -- [ ] Empty state messages -- [ ] Call-to-action buttons -- [ ] Helpful descriptions - -### Responsive Design -- [ ] Mobile layout (if supported) -- [ ] Tablet layout -- [ ] Desktop layout -- [ ] Sidebar behavior on resize - ---- - -## 22. SHARED COMPONENTS TO TEST - -### Button -- [ ] Primary variant -- [ ] Secondary variant -- [ ] Ghost variant -- [ ] Danger variant -- [ ] Link variant -- [ ] Outline variant -- [ ] Success variant -- [ ] All sizes (xs/sm/md/lg/xl) -- [ ] Loading state -- [ ] Disabled state -- [ ] With icons (left/right) -- [ ] Icon-only mode -- [ ] Tooltip display - -### Input -- [ ] All types (text/email/password/number/etc) -- [ ] All sizes -- [ ] With label -- [ ] With error message -- [ ] With success indicator -- [ ] Clearable (X button) -- [ ] Copyable (copy button) -- [ ] Password toggle -- [ ] Character counter -- [ ] Disabled state -- [ ] Readonly state - -### Select -- [ ] Dropdown opens/closes -- [ ] Item selection -- [ ] Placeholder display -- [ ] Icon display -- [ ] Disabled state - -### Checkbox -- [ ] Check/uncheck toggle -- [ ] Label display -- [ ] Description display -- [ ] Disabled state - -### Modal -- [ ] Opens/closes -- [ ] Escape key closes -- [ ] Click outside closes (if enabled) -- [ ] Header/content/footer slots -- [ ] All sizes - -### Confirm Dialog -- [ ] Opens on trigger -- [ ] Confirm button works -- [ ] Cancel button works -- [ ] Input validation (if required) -- [ ] Checkbox state -- [ ] All variants - -### Badge -- [ ] All variants -- [ ] All styles (soft/solid/outline) -- [ ] With count -- [ ] Removable -- [ ] With icon -- [ ] With avatar - -### Avatar -- [ ] Image display -- [ ] Fallback initials -- [ ] Agent icon fallback -- [ ] Status dot indicator -- [ ] All sizes -- [ ] All shapes -- [ ] Tooltip display - -### Tooltip -- [ ] Hover display -- [ ] All positions -- [ ] Delay works -- [ ] Disabled state - -### Dropdown Menu -- [ ] Opens/closes -- [ ] Item click works -- [ ] Submenu opens -- [ ] Keyboard navigation -- [ ] Disabled items - -### Skeleton -- [ ] All presets display correctly -- [ ] Animation works - -### Stat Card -- [ ] Value display -- [ ] Label display -- [ ] Icon display -- [ ] Trend indicator -- [ ] Sparkline chart -- [ ] Progress bar -- [ ] Click interaction - ---- - -## Total Test Items: ~750+ - -Use this checklist to systematically test every feature in the application. diff --git a/docs/testing/qa-strategy.md b/docs/testing/qa-strategy.md index e2bb7c5..cbf613c 100644 --- a/docs/testing/qa-strategy.md +++ b/docs/testing/qa-strategy.md @@ -1,1704 +1,126 @@ -# QA Strategy +# QA Testing Strategy — Full Project Audit -> Comprehensive quality assurance strategy for the OpenCompany application. -> -> This document defines testing standards, coverage requirements, and implementation guidelines. +**Scope:** All changes in current git tree (commits ce35785 through df74cb3, plus uncommitted work) --- -## Table of Contents +## What Changed -1. [Overview](#overview) -2. [Testing Pyramid](#1-testing-pyramid) -3. [Backend Testing](#2-backend-testing) -4. [Frontend Testing](#3-frontend-testing) -5. [End-to-End Testing](#4-end-to-end-testing) -6. [CI/CD Pipeline](#5-cicd-pipeline) -7. [Test Data Management](#6-test-data-management) -8. [Code Quality](#7-code-quality) -9. [Performance Testing](#8-performance-testing) -10. [Implementation Roadmap](#9-implementation-roadmap) +### 1. Integration Ecosystem Refactor (Mar 30 – Apr 5) ---- - -## Overview - -### Quality Goals - -| Metric | Target | Current | -|--------|--------|---------| -| **Test Coverage** | 80% | ~30% (282 tests, 1063 assertions) | -| **API Test Coverage** | 100% | ~40% (controllers have feature tests) | -| **Model Test Coverage** | 100% | ~20% (key models tested via feature tests) | -| **Frontend Component Coverage** | 70% | 0% (Vitest not yet configured) | -| **E2E Critical Paths** | 100% | ~60% | -| **CI Pipeline Pass Rate** | 100% | N/A | -| **Build Time** | < 10 min | N/A | -| **Static Analysis** | Level 5 | Level 5 (PHPStan + Larastan, 0 errors) | - -### Testing Philosophy - -1. **Test Behavior, Not Implementation**: Tests should verify what the code does, not how it does it -2. **Fast Feedback Loop**: Unit tests run in seconds, integration tests in minutes -3. **Confidence Over Coverage**: Focus on critical paths first, then expand -4. **Maintainable Tests**: Tests should be easy to read, write, and maintain -5. **Realistic Test Data**: Use factories that mirror production data shapes - -### Current State Analysis - -**Existing Infrastructure:** -- PHPUnit 11.5.3 configured -- Laravel Dusk 8.3 for browser tests -- 13 model factories -- 11 database seeders -- 13 browser test files - -**What's been built (as of Feb 2026):** -- 282 passing tests with 1,063 assertions -- PHPStan level 5 enforced with Larastan (0 errors across `app/`) -- Feature tests for: AgentRespondJob, AgentPermissionService, ToolRegistry, ApprovalWrappedTool, ContactAgent, ExecuteAgentTaskJob, all tool classes, API controllers -- Tests cover agent execution pipeline, task lifecycle, inter-agent communication, approval workflows - -**Remaining gaps:** -- NO frontend component tests (Vitest not yet configured) -- NO CI/CD pipeline (tests run locally only) -- Incomplete API controller coverage (~40% of endpoints) -- No dedicated unit tests for models (tested indirectly via feature tests) - ---- - -## 1. Testing Pyramid - -``` - ┌─────────────────┐ - │ E2E (10%) │ Slow, expensive, high confidence - │ Dusk Browser │ - └────────┬────────┘ - │ - ┌─────────────┴─────────────┐ - │ Integration (20%) │ API endpoints, database - │ Feature Tests │ - └─────────────┬─────────────┘ - │ - ┌────────────────────┴────────────────────┐ - │ Unit (70%) │ Fast, isolated, models/services - │ PHPUnit + Vitest │ - └──────────────────────────────────────────┘ -``` - -### Distribution by Component - -| Layer | Test Type | Tools | Coverage Target | -|-------|-----------|-------|-----------------| -| **Models (29)** | Unit | PHPUnit | 100% | -| **Services** | Unit | PHPUnit | 90% | -| **Controllers (35)** | Integration | PHPUnit | 100% | -| **Vue Components (50+)** | Unit | Vitest | 70% | -| **Composables (10+)** | Unit | Vitest | 100% | -| **Critical Flows** | E2E | Dusk | 100% | - ---- - -## 2. Backend Testing - -### 2.1 API Integration Tests - -Every controller endpoint must have tests covering: -- **Happy path**: Successful request/response -- **Validation**: Invalid input rejection -- **Authorization**: Unauthenticated and unauthorized access -- **Edge cases**: Empty results, pagination boundaries - -#### Test File Structure - -``` -tests/ -├── Unit/ -│ ├── Models/ -│ │ ├── UserTest.php -│ │ ├── TaskTest.php -│ │ ├── ChannelTest.php -│ │ └── ... (29 model tests) -│ └── Services/ -│ ├── AgentExecutionServiceTest.php -│ └── ... -├── Feature/ -│ ├── Api/ -│ │ ├── UserControllerTest.php -│ │ ├── TaskControllerTest.php -│ │ ├── ChannelControllerTest.php -│ │ ├── MessageControllerTest.php -│ │ └── ... (35 controller tests) -│ └── Auth/ -│ └── ... (existing) -└── Browser/ - └── ... (existing Dusk tests) -``` - -#### Controller Test Template - -```php -// tests/Feature/Api/TaskControllerTest.php - -namespace Tests\Feature\Api; - -use App\Models\User; -use App\Models\Task; -use App\Models\Channel; -use Illuminate\Foundation\Testing\RefreshDatabase; -use Tests\TestCase; - -class TaskControllerTest extends TestCase -{ - use RefreshDatabase; - - private User $user; - private User $agent; - private Channel $channel; - - protected function setUp(): void - { - parent::setUp(); - - $this->user = User::factory()->create(['type' => 'human']); - $this->agent = User::factory()->create(['type' => 'agent', 'agent_type' => 'coder']); - $this->channel = Channel::factory()->create(); - } - - // ==================== INDEX ==================== - - public function test_index_returns_paginated_tasks(): void - { - Task::factory()->count(25)->create(['channel_id' => $this->channel->id]); - - $response = $this->actingAs($this->user) - ->getJson('/api/tasks'); - - $response->assertOk() - ->assertJsonStructure([ - 'data' => [ - '*' => ['id', 'title', 'description', 'status', 'priority', 'assignee', 'created_at'], - ], - 'meta' => ['current_page', 'per_page', 'total'], - ]) - ->assertJsonCount(15, 'data'); // Default pagination - } - - public function test_index_filters_by_status(): void - { - Task::factory()->count(5)->create(['status' => 'backlog']); - Task::factory()->count(3)->create(['status' => 'in_progress']); - Task::factory()->count(2)->create(['status' => 'done']); - - $response = $this->actingAs($this->user) - ->getJson('/api/tasks?status=in_progress'); - - $response->assertOk() - ->assertJsonCount(3, 'data'); - } - - public function test_index_requires_authentication(): void - { - $response = $this->getJson('/api/tasks'); - - $response->assertUnauthorized(); - } - - // ==================== STORE ==================== - - public function test_store_creates_task(): void - { - $taskData = [ - 'title' => 'Implement feature X', - 'description' => 'As a user, I want to...', - 'priority' => 'high', - 'channel_id' => $this->channel->id, - ]; - - $response = $this->actingAs($this->user) - ->postJson('/api/tasks', $taskData); - - $response->assertCreated() - ->assertJsonFragment(['title' => 'Implement feature X']); - - $this->assertDatabaseHas('tasks', [ - 'title' => 'Implement feature X', - 'creator_id' => $this->user->id, - 'status' => 'backlog', // Default status - ]); - } - - public function test_store_validates_required_fields(): void - { - $response = $this->actingAs($this->user) - ->postJson('/api/tasks', []); - - $response->assertUnprocessable() - ->assertJsonValidationErrors(['title', 'channel_id']); - } - - public function test_store_validates_priority_enum(): void - { - $response = $this->actingAs($this->user) - ->postJson('/api/tasks', [ - 'title' => 'Test', - 'channel_id' => $this->channel->id, - 'priority' => 'invalid_priority', - ]); - - $response->assertUnprocessable() - ->assertJsonValidationErrors(['priority']); - } - - // ==================== SHOW ==================== - - public function test_show_returns_task_with_relationships(): void - { - $task = Task::factory()->create([ - 'assignee_id' => $this->agent->id, - 'creator_id' => $this->user->id, - ]); - - $response = $this->actingAs($this->user) - ->getJson("/api/tasks/{$task->id}"); - - $response->assertOk() - ->assertJsonStructure([ - 'data' => [ - 'id', 'title', 'description', 'status', 'priority', - 'assignee' => ['id', 'name', 'type'], - 'creator' => ['id', 'name'], - 'comments', - ], - ]); - } - - public function test_show_returns_404_for_nonexistent_task(): void - { - $response = $this->actingAs($this->user) - ->getJson('/api/tasks/nonexistent-uuid'); - - $response->assertNotFound(); - } - - // ==================== UPDATE ==================== - - public function test_update_modifies_task(): void - { - $task = Task::factory()->create(['status' => 'backlog']); - - $response = $this->actingAs($this->user) - ->putJson("/api/tasks/{$task->id}", [ - 'status' => 'in_progress', - 'assignee_id' => $this->agent->id, - ]); - - $response->assertOk(); - - $task->refresh(); - $this->assertEquals('in_progress', $task->status); - $this->assertEquals($this->agent->id, $task->assignee_id); - } - - public function test_update_records_started_at_when_starting(): void - { - $task = Task::factory()->create(['status' => 'backlog', 'started_at' => null]); - - $this->actingAs($this->user) - ->putJson("/api/tasks/{$task->id}", ['status' => 'in_progress']); - - $task->refresh(); - $this->assertNotNull($task->started_at); - } - - public function test_update_records_completed_at_when_completing(): void - { - $task = Task::factory()->create(['status' => 'in_progress', 'completed_at' => null]); - - $this->actingAs($this->user) - ->putJson("/api/tasks/{$task->id}", ['status' => 'done']); - - $task->refresh(); - $this->assertNotNull($task->completed_at); - } - - // ==================== DESTROY ==================== - - public function test_destroy_deletes_task(): void - { - $task = Task::factory()->create(); - - $response = $this->actingAs($this->user) - ->deleteJson("/api/tasks/{$task->id}"); - - $response->assertNoContent(); - $this->assertDatabaseMissing('tasks', ['id' => $task->id]); - } - - // ==================== REORDER ==================== - - public function test_reorder_updates_positions(): void - { - $tasks = Task::factory()->count(3)->create(); - - $response = $this->actingAs($this->user) - ->postJson('/api/tasks/reorder', [ - 'tasks' => [ - ['id' => $tasks[2]->id, 'position' => 0], - ['id' => $tasks[0]->id, 'position' => 1], - ['id' => $tasks[1]->id, 'position' => 2], - ], - ]); - - $response->assertOk(); - - $this->assertEquals(0, $tasks[2]->fresh()->position); - $this->assertEquals(1, $tasks[0]->fresh()->position); - $this->assertEquals(2, $tasks[1]->fresh()->position); - } -} -``` - -### 2.2 Model Unit Tests - -Every model should have tests for: -- **Relationships**: Verify all defined relationships work -- **Scopes**: Test query scopes return expected results -- **Accessors/Mutators**: Test computed attributes -- **Business Logic**: Any methods on the model - -#### Model Test Template - -```php -// tests/Unit/Models/TaskTest.php - -namespace Tests\Unit\Models; - -use App\Models\Task; -use App\Models\User; -use App\Models\Channel; -use App\Models\ListItemComment; -use Illuminate\Foundation\Testing\RefreshDatabase; -use Tests\TestCase; - -class TaskTest extends TestCase -{ - use RefreshDatabase; - - // ==================== RELATIONSHIPS ==================== - - public function test_belongs_to_creator(): void - { - $user = User::factory()->create(); - $task = Task::factory()->create(['creator_id' => $user->id]); - - $this->assertInstanceOf(User::class, $task->creator); - $this->assertEquals($user->id, $task->creator->id); - } - - public function test_belongs_to_assignee(): void - { - $agent = User::factory()->create(['type' => 'agent']); - $task = Task::factory()->create(['assignee_id' => $agent->id]); - - $this->assertInstanceOf(User::class, $task->assignee); - $this->assertEquals($agent->id, $task->assignee->id); - } - - public function test_assignee_can_be_null(): void - { - $task = Task::factory()->create(['assignee_id' => null]); - - $this->assertNull($task->assignee); - } - - public function test_belongs_to_channel(): void - { - $channel = Channel::factory()->create(); - $task = Task::factory()->create(['channel_id' => $channel->id]); - - $this->assertInstanceOf(Channel::class, $task->channel); - } - - public function test_has_many_comments(): void - { - $task = Task::factory()->create(); - ListItemComment::factory()->count(3)->create(['list_item_id' => $task->id]); - - $this->assertCount(3, $task->comments); - $this->assertInstanceOf(ListItemComment::class, $task->comments->first()); - } - - // ==================== SCOPES ==================== - - public function test_scope_backlog(): void - { - Task::factory()->count(3)->create(['status' => 'backlog']); - Task::factory()->count(2)->create(['status' => 'in_progress']); - - $this->assertCount(3, Task::backlog()->get()); - } - - public function test_scope_in_progress(): void - { - Task::factory()->count(3)->create(['status' => 'backlog']); - Task::factory()->count(2)->create(['status' => 'in_progress']); - - $this->assertCount(2, Task::inProgress()->get()); - } - - public function test_scope_completed(): void - { - Task::factory()->count(3)->create(['status' => 'done']); - Task::factory()->count(2)->create(['status' => 'in_progress']); - - $this->assertCount(3, Task::completed()->get()); - } - - public function test_scope_high_priority(): void - { - Task::factory()->count(2)->create(['priority' => 'high']); - Task::factory()->count(2)->create(['priority' => 'urgent']); - Task::factory()->count(3)->create(['priority' => 'low']); - - $this->assertCount(4, Task::highPriority()->get()); - } - - public function test_scope_assigned_to(): void - { - $agent = User::factory()->create(['type' => 'agent']); - Task::factory()->count(3)->create(['assignee_id' => $agent->id]); - Task::factory()->count(2)->create(); - - $this->assertCount(3, Task::assignedTo($agent->id)->get()); - } - - // ==================== ACCESSORS ==================== - - public function test_duration_accessor_returns_null_when_not_completed(): void - { - $task = Task::factory()->create([ - 'started_at' => now()->subHours(2), - 'completed_at' => null, - ]); - - $this->assertNull($task->duration); - } - - public function test_duration_accessor_calculates_correctly(): void - { - $task = Task::factory()->create([ - 'started_at' => now()->subHours(2), - 'completed_at' => now(), - ]); - - $this->assertEquals(2 * 60, $task->duration); // Duration in minutes - } - - public function test_is_overdue_accessor(): void - { - $overdueTask = Task::factory()->create([ - 'due_date' => now()->subDay(), - 'status' => 'in_progress', - ]); - - $futureTask = Task::factory()->create([ - 'due_date' => now()->addDay(), - 'status' => 'in_progress', - ]); - - $completedTask = Task::factory()->create([ - 'due_date' => now()->subDay(), - 'status' => 'done', - ]); - - $this->assertTrue($overdueTask->is_overdue); - $this->assertFalse($futureTask->is_overdue); - $this->assertFalse($completedTask->is_overdue); // Completed tasks aren't overdue - } - - // ==================== MUTATORS ==================== - - public function test_status_mutator_sets_timestamps(): void - { - $task = Task::factory()->create(['status' => 'backlog']); - - $task->update(['status' => 'in_progress']); - $this->assertNotNull($task->started_at); - - $task->update(['status' => 'done']); - $this->assertNotNull($task->completed_at); - } - - // ==================== BUSINESS LOGIC ==================== - - public function test_can_be_assigned_to_checks_agent_type(): void - { - $task = Task::factory()->create(); - $human = User::factory()->create(['type' => 'human']); - $agent = User::factory()->create(['type' => 'agent']); - - $this->assertFalse($task->canBeAssignedTo($human)); - $this->assertTrue($task->canBeAssignedTo($agent)); - } -} -``` - -### 2.3 Controllers to Test (35 Total) - -| Controller | Endpoints | Priority | -|------------|-----------|----------| -| `UserController` | index, show, update, presence | High | -| `TaskController` | CRUD, reorder, comments | High | -| `ChannelController` | CRUD, members, join/leave | High | -| `MessageController` | CRUD, reactions, pin, threads | High | -| `DocumentController` | CRUD, comments, versions | High | -| `ApprovalController` | index, store, respond | High | -| `DirectMessageController` | CRUD, read status | High | -| `ActivityController` | index | Medium | -| `StatsController` | index, update | Medium | -| `NotificationController` | index, markRead | Medium | -| `SearchController` | index | Medium | -| `CalendarEventController` | CRUD | Medium | -| `DataTableController` | CRUD, columns, rows | Medium | -| `AutomationRuleController` | CRUD | Medium | -| `SettingsController` | index, update | Medium | -| `IntegrationController` | CRUD | Low | - -### 2.4 Service Tests - -```php -// tests/Unit/Services/AgentExecutionServiceTest.php - -namespace Tests\Unit\Services; - -use App\Services\AgentExecutionService; -use App\Models\User; -use App\Models\Task; -use Mockery; -use Tests\TestCase; - -class AgentExecutionServiceTest extends TestCase -{ - private AgentExecutionService $service; - - protected function setUp(): void - { - parent::setUp(); - $this->service = app(AgentExecutionService::class); - } - - public function test_assigns_task_to_available_agent(): void - { - $idleAgent = User::factory()->create([ - 'type' => 'agent', - 'status' => 'idle', - ]); - - $task = Task::factory()->create(['status' => 'backlog']); - - $result = $this->service->assignTask($task); - - $this->assertTrue($result); - $this->assertEquals($idleAgent->id, $task->fresh()->assignee_id); - } - - public function test_skips_busy_agents(): void - { - User::factory()->create([ - 'type' => 'agent', - 'status' => 'working', - ]); - - $task = Task::factory()->create(); - - $result = $this->service->assignTask($task); - - $this->assertFalse($result); - $this->assertNull($task->fresh()->assignee_id); - } -} -``` - ---- - -## 3. Frontend Testing - -### 3.1 Setup Vitest - -```bash -npm install -D vitest @vue/test-utils @testing-library/vue jsdom happy-dom -``` - -```typescript -// vitest.config.ts - -import { defineConfig } from 'vitest/config' -import vue from '@vitejs/plugin-vue' -import { resolve } from 'path' - -export default defineConfig({ - plugins: [vue()], - test: { - environment: 'jsdom', - globals: true, - setupFiles: ['./tests/js/setup.ts'], - include: ['tests/js/**/*.{test,spec}.{js,ts}'], - coverage: { - provider: 'v8', - reporter: ['text', 'html', 'lcov'], - include: ['resources/js/**/*.{vue,ts}'], - exclude: ['resources/js/**/*.d.ts'], - }, - }, - resolve: { - alias: { - '@': resolve(__dirname, 'resources/js'), - }, - }, -}) -``` - -```typescript -// tests/js/setup.ts - -import { config } from '@vue/test-utils' -import { vi } from 'vitest' - -// Mock Inertia -vi.mock('@inertiajs/vue3', () => ({ - usePage: () => ({ - props: { - auth: { user: { id: '1', name: 'Test User' } }, - }, - }), - router: { - visit: vi.fn(), - post: vi.fn(), - put: vi.fn(), - delete: vi.fn(), - }, - Link: { - template: '', - }, -})) - -// Global stubs -config.global.stubs = { - teleport: true, -} -``` - -### 3.2 Component Test Structure - -``` -tests/js/ -├── setup.ts -├── components/ -│ ├── shared/ -│ │ ├── Button.spec.ts -│ │ ├── Modal.spec.ts -│ │ ├── Badge.spec.ts -│ │ └── ... -│ ├── chat/ -│ │ ├── Message.spec.ts -│ │ ├── MessageInput.spec.ts -│ │ └── ... -│ ├── tasks/ -│ │ ├── TaskCard.spec.ts -│ │ ├── TaskBoard.spec.ts -│ │ └── ... -│ └── agents/ -│ ├── AgentSettingsPanel.spec.ts -│ └── ... -└── composables/ - ├── useApi.spec.ts - ├── useRealtime.spec.ts - └── ... -``` - -### 3.3 Component Test Examples - -```typescript -// tests/js/components/shared/Button.spec.ts - -import { describe, it, expect, vi } from 'vitest' -import { mount } from '@vue/test-utils' -import Button from '@/Components/shared/Button.vue' - -describe('Button', () => { - it('renders slot content', () => { - const wrapper = mount(Button, { - slots: { - default: 'Click me', - }, - }) - - expect(wrapper.text()).toBe('Click me') - }) - - it('applies variant classes', () => { - const wrapper = mount(Button, { - props: { variant: 'primary' }, - }) - - expect(wrapper.classes()).toContain('btn-primary') - }) - - it('disables when loading', () => { - const wrapper = mount(Button, { - props: { loading: true }, - }) - - expect(wrapper.attributes('disabled')).toBeDefined() - }) - - it('emits click event', async () => { - const wrapper = mount(Button) - - await wrapper.trigger('click') - - expect(wrapper.emitted('click')).toBeTruthy() - }) - - it('prevents click when disabled', async () => { - const wrapper = mount(Button, { - props: { disabled: true }, - }) - - await wrapper.trigger('click') +Three commits that restructured how integrations are organized, shared, and loaded. - expect(wrapper.emitted('click')).toBeFalsy() - }) -}) -``` +- **Commit `4cffd75`** — ToolRegistry decomposed into 15 `BuiltInToolProvider` classes. New `integration-core` package with shared contracts. 13 integration packages consolidated into monorepo. Lua docs for 6 integrations. +- **Commit `24e2cc8`** — Lua doc generation and LuaBridge moved to shared `integration-core`. `OpenCompanyLuaToolInvoker` for dual dispatch. PrismRelay replaces hardcoded provider registration. +- **Commit `df74cb3`** — Monorepo moved from `tmp/integrations/` to `../integrations/`. -```typescript -// tests/js/components/tasks/TaskCard.spec.ts +### 2. File Management System (Mar 1–2) -import { describe, it, expect, vi } from 'vitest' -import { mount } from '@vue/test-utils' -import TaskCard from '@/Components/tasks/TaskCard.vue' +Complete virtual filesystem with multi-disk storage (local, S3, SFTP), agent file tools, permission-based access, and Finder-style UI. -const mockTask = { - id: '1', - title: 'Test Task', - description: 'Test description', - status: 'in_progress', - priority: 'high', - assignee: { - id: 'agent-1', - name: 'Atlas', - type: 'agent', - }, - created_at: '2025-01-31T10:00:00Z', -} +- `WorkspaceFile` and `WorkspaceDisk` models with encrypted disk configs +- `FileSystemService` — upload, write, read, createFolder, move, copy, delete, search, agent home folders +- `AgentPermissionService` — tool/channel/folder/integration permissions with deny→exempt→explicit→default cascade +- 10 agent file tools (list_disks, list_files, read_file, write_file, create_folder, move_file, copy_file, delete_file, search_files, get_file_info) +- FileController + WorkspaceDiskController APIs +- Finder-style Vue UI with grid/list views, drag-drop upload, inline rename, keyboard shortcuts -describe('TaskCard', () => { - it('renders task title and description', () => { - const wrapper = mount(TaskCard, { - props: { task: mockTask }, - }) +### 3. Automation / Script System (Feb 28 – Mar 1) - expect(wrapper.text()).toContain('Test Task') - expect(wrapper.text()).toContain('Test description') - }) +Prompt and Luau script automations with Monaco editor, cron scheduling, run history. - it('shows priority badge', () => { - const wrapper = mount(TaskCard, { - props: { task: mockTask }, - }) +- Two execution modes: **prompt** (agent-driven, costs tokens) and **script** (Luau sandbox, zero cost) +- Auto-disable after 5 consecutive failures +- 6 agent tools: list, get, create, update, delete, run +- `RunScriptAutomationJob` with `ctx` table (automation_id, run_number, last_run_at, schedule) +- Monaco editor with Prompt/Script toggle, cron builder, run history +- Task source labels (Chat, Manual, Automation, Delegated, Agent Ask) - expect(wrapper.find('[data-testid="priority-badge"]').text()).toBe('high') - }) +### 4. Chat UI Polish (Feb 28) - it('shows assignee avatar when assigned', () => { - const wrapper = mount(TaskCard, { - props: { task: mockTask }, - }) +Full chat interface overhaul. - expect(wrapper.find('[data-testid="assignee-avatar"]').exists()).toBe(true) - }) +- Channel list with filter chips (All/Unread/DMs/Channels/External), search, compose dropdown +- Telegram-style message bubbles with grouping, inline timestamps, lightbox +- Rich input with drag-drop upload, @mentions, /commands, formatting toolbar, emoji picker +- Channel info sidebar with collapsible sections, member filters, notification settings - it('hides assignee when unassigned', () => { - const wrapper = mount(TaskCard, { - props: { - task: { ...mockTask, assignee: null }, - }, - }) +### 5. Telegram / File Forwarding (Mar 2) - expect(wrapper.find('[data-testid="assignee-avatar"]').exists()).toBe(false) - }) +Fixed file forwarding to external platforms. - it('emits click event with task', async () => { - const wrapper = mount(TaskCard, { - props: { task: mockTask }, - }) +- Workspace file URLs detected in any format (markdown, bare URL, image embed) +- Files forwarded as proper Telegram documents (not silently dropped) +- PDFs and non-image files sent with `forceDocument` - await wrapper.trigger('click') +### 6. LLM Provider & Token Metrics (Feb 28 – Mar 4) - expect(wrapper.emitted('click')?.[0]).toEqual([mockTask]) - }) -}) -``` +- `TokenMetrics` helper — centralized token/cost calculation across all job types +- `SetsWorkspaceContext` trait — workspace binding for queue jobs +- Multiple custom providers (GLM, Kimi, MiniMax, Codex) +- Analytics dashboard with cost estimation, breakdowns by agent/model/source -### 3.4 Composable Tests +### 7. Security Hardening (Feb 28) -```typescript -// tests/js/composables/useApi.spec.ts +37-file audit commit. -import { describe, it, expect, vi, beforeEach } from 'vitest' -import { useApi } from '@/composables/useApi' -import axios from 'axios' +- IDOR fixes: auth checks on user update, workspace scoping on 14 controllers +- XSS prevention: DOMPurify sanitization via `sanitize.ts`, applied in `useMarkdown.ts` +- Job status resolution: proper priority ordering (sleeping > awaiting_approval > awaiting_delegation > idle) +- ApprovalExecutionService: workspace context binding for approved tools +- Memory leak fixes: event listener cleanup in `usePresence.ts`, `useKeyboardShortcuts.ts` -vi.mock('axios') +### 8. Uncommitted Changes (Current Working Tree) -describe('useApi', () => { - beforeEach(() => { - vi.clearAllMocks() - }) +Multi-account integration settings and additional refinements. - describe('fetchTasks', () => { - it('fetches tasks successfully', async () => { - const mockTasks = [{ id: '1', title: 'Task 1' }] - vi.mocked(axios.get).mockResolvedValueOnce({ data: { data: mockTasks } }) - - const { fetchTasks } = useApi() - const { data, promise } = fetchTasks() - - await promise - - expect(axios.get).toHaveBeenCalledWith('/api/tasks', expect.any(Object)) - expect(data.value).toEqual(mockTasks) - }) - - it('handles fetch error', async () => { - vi.mocked(axios.get).mockRejectedValueOnce(new Error('Network error')) - - const { fetchTasks } = useApi() - const { error, promise } = fetchTasks() - - await promise.catch(() => {}) - - expect(error.value).toBeTruthy() - }) - - it('applies filters correctly', async () => { - vi.mocked(axios.get).mockResolvedValueOnce({ data: { data: [] } }) - - const { fetchTasks } = useApi() - fetchTasks({ status: 'done', assignee_id: 'agent-1' }) - - expect(axios.get).toHaveBeenCalledWith( - '/api/tasks', - expect.objectContaining({ - params: { status: 'done', assignee_id: 'agent-1' }, - }) - ) - }) - }) - - describe('createTask', () => { - it('creates task and returns data', async () => { - const newTask = { id: '2', title: 'New Task' } - vi.mocked(axios.post).mockResolvedValueOnce({ data: { data: newTask } }) - - const { createTask } = useApi() - const result = await createTask({ - title: 'New Task', - channel_id: 'ch-1', - }) - - expect(axios.post).toHaveBeenCalledWith('/api/tasks', { - title: 'New Task', - channel_id: 'ch-1', - }) - expect(result).toEqual(newTask) - }) - }) -}) -``` - -### 3.5 Package.json Scripts Update - -```json -{ - "scripts": { - "dev": "vite", - "build": "vite build", - "typecheck": "vue-tsc --noEmit", - "test": "vitest", - "test:ui": "vitest --ui", - "test:coverage": "vitest run --coverage", - "lint": "eslint resources/js --ext .vue,.ts --fix" - } -} -``` - ---- - -## 4. End-to-End Testing - -### 4.1 Existing Dusk Tests (Enhance) - -Current coverage: -- ✅ Authentication flows -- ✅ Dashboard -- ✅ Navigation -- ✅ Chat -- ✅ Tasks -- ✅ Documents -- ✅ Approvals -- ✅ Profile - -### 4.2 Critical User Journeys to Add - -```php -// tests/Browser/AgentConfigurationTest.php - -namespace Tests\Browser; - -use App\Models\User; -use Laravel\Dusk\Browser; -use Tests\DuskTestCase; - -class AgentConfigurationTest extends DuskTestCase -{ - public function test_user_can_edit_agent_personality(): void - { - $this->browse(function (Browser $browser) { - $user = $this->createUserAndLogin($browser); - $agent = $this->createAgent('writer'); - - $browser->visit("/agent/{$agent->id}") - ->waitForText('Personality') - ->click('@personality-tab') - ->waitFor('@personality-editor') - ->type('@personality-editor', 'You are a helpful assistant...') - ->click('@save-personality') - ->waitForText('Saved') - ->assertSee('Saved'); - }); - } - - public function test_user_can_manage_agent_capabilities(): void - { - $this->browse(function (Browser $browser) { - $user = $this->createUserAndLogin($browser); - $agent = $this->createAgent('coder'); - - $browser->visit("/agent/{$agent->id}") - ->click('@capabilities-tab') - ->waitFor('@capability-list') - ->click('@toggle-code-execution') - ->click('@save-capabilities') - ->waitForText('Saved'); - }); - } - - public function test_user_can_view_agent_memory(): void - { - $this->browse(function (Browser $browser) { - $user = $this->createUserAndLogin($browser); - $agent = $this->createAgent('researcher'); - - $browser->visit("/agent/{$agent->id}") - ->click('@memory-tab') - ->waitFor('@session-list') - ->assertSee('Current Session'); - }); - } -} -``` - -### 4.3 E2E Test Checklist - -| Flow | Status | Test File | -|------|--------|-----------| -| User registration | ✅ | AuthenticationTest | -| User login | ✅ | AuthenticationTest | -| Password reset | ✅ | AuthenticationTest | -| Dashboard loads | ✅ | DashboardTest | -| Create task | ✅ | TasksTest | -| Assign task to agent | ⬜ | TasksTest | -| Complete task | ⬜ | TasksTest | -| Send chat message | ✅ | ChatTest | -| Create channel | ⬜ | ChatTest | -| Request approval | ⬜ | ApprovalsTest | -| Approve request | ⬜ | ApprovalsTest | -| Edit agent personality | ⬜ | AgentConfigurationTest | -| Edit agent capabilities | ⬜ | AgentConfigurationTest | -| View agent memory | ⬜ | AgentConfigurationTest | -| Create document | ⬜ | DocumentsTest | -| Add document comment | ⬜ | DocumentsTest | - ---- - -## 5. CI/CD Pipeline - -### 5.1 GitHub Actions Workflow - -```yaml -# .github/workflows/ci.yml - -name: CI - -on: - push: - branches: [main, develop] - pull_request: - branches: [main, develop] - -env: - PHP_VERSION: '8.3' - NODE_VERSION: '20' - -jobs: - # ==================== PHP TESTS ==================== - php-tests: - name: PHP Tests - runs-on: ubuntu-latest - - services: - postgres: - image: postgres:16 - env: - POSTGRES_USER: opencompany - POSTGRES_PASSWORD: secret - POSTGRES_DB: opencompany_test - ports: - - 5432:5432 - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - - steps: - - uses: actions/checkout@v4 - - - name: Setup PHP - uses: shivammathur/setup-php@v2 - with: - php-version: ${{ env.PHP_VERSION }} - extensions: pdo, pgsql, pdo_pgsql, redis - coverage: xdebug - - - name: Cache Composer dependencies - uses: actions/cache@v4 - with: - path: vendor - key: composer-${{ hashFiles('composer.lock') }} - - - name: Install Composer dependencies - run: composer install --no-interaction --prefer-dist - - - name: Copy environment file - run: cp .env.example .env.testing - - - name: Generate application key - run: php artisan key:generate --env=testing - - - name: Run migrations - run: php artisan migrate --env=testing --force - env: - DB_CONNECTION: pgsql - DB_HOST: 127.0.0.1 - DB_PORT: 5432 - DB_DATABASE: opencompany_test - DB_USERNAME: opencompany - DB_PASSWORD: secret - - - name: Run PHPUnit tests - run: php artisan test --coverage-clover=coverage.xml - env: - DB_CONNECTION: pgsql - DB_HOST: 127.0.0.1 - DB_PORT: 5432 - DB_DATABASE: opencompany_test - DB_USERNAME: opencompany - DB_PASSWORD: secret - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - with: - files: coverage.xml - flags: php - - # ==================== FRONTEND TESTS ==================== - frontend-tests: - name: Frontend Tests - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: ${{ env.NODE_VERSION }} - cache: 'npm' - - - name: Install dependencies - run: npm ci - - - name: Run TypeScript check - run: npm run typecheck - - - name: Run Vitest - run: npm run test:coverage - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 - with: - files: coverage/lcov.info - flags: frontend - - # ==================== STATIC ANALYSIS ==================== - static-analysis: - name: Static Analysis - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Setup PHP - uses: shivammathur/setup-php@v2 - with: - php-version: ${{ env.PHP_VERSION }} - - - name: Install Composer dependencies - run: composer install --no-interaction --prefer-dist - - - name: Run PHPStan - run: vendor/bin/phpstan analyse --memory-limit=2G - - - name: Run Pint (code style) - run: vendor/bin/pint --test - - # ==================== DUSK TESTS ==================== - dusk-tests: - name: Browser Tests - runs-on: ubuntu-latest - needs: [php-tests, frontend-tests] - - services: - postgres: - image: postgres:16 - env: - POSTGRES_USER: opencompany - POSTGRES_PASSWORD: secret - POSTGRES_DB: opencompany_test - ports: - - 5432:5432 - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - - steps: - - uses: actions/checkout@v4 - - - name: Setup PHP - uses: shivammathur/setup-php@v2 - with: - php-version: ${{ env.PHP_VERSION }} - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: ${{ env.NODE_VERSION }} - cache: 'npm' - - - name: Install Composer dependencies - run: composer install --no-interaction --prefer-dist - - - name: Install NPM dependencies - run: npm ci - - - name: Build assets - run: npm run build - - - name: Setup environment - run: | - cp .env.dusk.ci .env - php artisan key:generate - - - name: Run migrations - run: php artisan migrate --force - env: - DB_CONNECTION: pgsql - DB_HOST: 127.0.0.1 - DB_PORT: 5432 - DB_DATABASE: opencompany_test - DB_USERNAME: opencompany - DB_PASSWORD: secret - - - name: Install Chrome Driver - run: php artisan dusk:chrome-driver --detect - - - name: Start Chrome Driver - run: ./vendor/laravel/dusk/bin/chromedriver-linux & - - - name: Run Laravel Server - run: php artisan serve --no-reload & - - - name: Run Dusk Tests - run: php artisan dusk - env: - APP_URL: http://127.0.0.1:8000 - DB_CONNECTION: pgsql - DB_HOST: 127.0.0.1 - DB_PORT: 5432 - DB_DATABASE: opencompany_test - DB_USERNAME: opencompany - DB_PASSWORD: secret - - - name: Upload Screenshots - if: failure() - uses: actions/upload-artifact@v4 - with: - name: dusk-screenshots - path: tests/Browser/screenshots - - - name: Upload Console Logs - if: failure() - uses: actions/upload-artifact@v4 - with: - name: dusk-console - path: tests/Browser/console -``` - -### 5.2 Environment Files for CI - -```env -# .env.dusk.ci - -APP_NAME=OpenCompany -APP_ENV=testing -APP_KEY= -APP_DEBUG=true -APP_URL=http://127.0.0.1:8000 - -DB_CONNECTION=pgsql -DB_HOST=127.0.0.1 -DB_PORT=5432 -DB_DATABASE=opencompany_test -DB_USERNAME=opencompany -DB_PASSWORD=secret - -BROADCAST_CONNECTION=log -CACHE_STORE=array -QUEUE_CONNECTION=sync -SESSION_DRIVER=array -``` - -### 5.3 Quality Gates - -| Check | Requirement | Enforcement | -|-------|-------------|-------------| -| PHPUnit Tests | 100% pass | Block merge | -| Vitest Tests | 100% pass | Block merge | -| Code Coverage | ≥80% | Warning | -| PHPStan | Level 5, 0 errors | Block merge | -| Pint | No style violations | Block merge | -| Dusk Tests | 100% pass | Block merge | -| TypeScript | No errors | Block merge | +- `IntegrationSetting` model: multi-account support migration +- `DynamicProviderResolver`, `GlmPrismGateway`: provider resolution updates +- `ToolRegistry`: further refinements +- `McpServerRegistrar`, `McpToolProvider`: MCP tool registration updates +- `AgentChatService`, `IntegrationController`, `AgentController`: service updates +- `IntegrationSettingCredentialResolver`: credential resolution updates +- New `docs/ecosystem/` directory --- -## 6. Test Data Management - -### 6.1 Existing Factories (13) - -| Factory | Model | Status | -|---------|-------|--------| -| UserFactory | User | ✅ Exists | -| ChannelFactory | Channel | ✅ Exists | -| ChannelMemberFactory | ChannelMember | ✅ Exists | -| MessageFactory | Message | ✅ Exists | -| DocumentFactory | Document | ✅ Exists | -| DocumentVersionFactory | DocumentVersion | ✅ Exists | -| DocumentCommentFactory | DocumentComment | ✅ Exists | -| ApprovalRequestFactory | ApprovalRequest | ✅ Exists | -| NotificationFactory | Notification | ✅ Exists | -| DirectMessageFactory | DirectMessage | ✅ Exists | -| ListItemCommentFactory | ListItemComment | ✅ Exists | -| ActivityFactory | Activity | ✅ Exists | -| TaskFactory | Task | ✅ Exists | - -### 6.2 Factories to Add - -| Factory | Model | Priority | -|---------|-------|----------| -| AgentConfigurationFactory | AgentConfiguration | High | -| AgentCapabilityFactory | AgentCapability | High | -| AgentSettingsFactory | AgentSettings | High | -| AgentSessionFactory | AgentSession | High | -| AgentMemoryFactory | AgentMemory | Medium | -| ListAutomationRuleFactory | ListAutomationRule | Medium | -| CalendarEventFactory | CalendarEvent | Low | -| DataTableFactory | DataTable | Low | - -### 6.3 Test Scenarios - -```php -// database/seeders/TestScenarioSeeder.php - -namespace Database\Seeders; - -use App\Models\User; -use App\Models\Task; -use App\Models\Channel; -use Illuminate\Database\Seeder; - -class TestScenarioSeeder extends Seeder -{ - public function run(): void - { - // Scenario 1: Busy agent with multiple tasks - $busyAgent = User::factory()->create([ - 'type' => 'agent', - 'agent_type' => 'coder', - 'status' => 'working', - ]); - Task::factory()->count(5)->create([ - 'assignee_id' => $busyAgent->id, - 'status' => 'in_progress', - ]); - - // Scenario 2: Channel with active discussion - $channel = Channel::factory() - ->hasMembers(5) - ->hasMessages(50) - ->create(); - - // Scenario 3: Approval workflow in progress - $approval = ApprovalRequest::factory()->create([ - 'status' => 'pending', - 'type' => 'deployment', - 'amount' => 100, - ]); - - // Scenario 4: High-priority backlog - Task::factory()->count(10)->create([ - 'status' => 'backlog', - 'priority' => 'urgent', - ]); - } -} -``` - ---- - -## 7. Code Quality - -### 7.1 PHPStan Configuration - -```neon -# phpstan.neon - -includes: - - vendor/larastan/larastan/extension.neon - -parameters: - level: 5 - paths: - - app/ - excludePaths: - - app/Console/Kernel.php - checkMissingIterableValueType: false - checkGenericClassInNonGenericObjectType: false -``` - -### 7.2 Laravel Pint Configuration - -```json -// pint.json - -{ - "preset": "laravel", - "rules": { - "simplified_null_return": true, - "blank_line_before_statement": { - "statements": ["return"] - }, - "not_operator_with_successor_space": true, - "ordered_imports": { - "sort_algorithm": "alpha" - } - } -} -``` - -### 7.3 ESLint Configuration - -```javascript -// eslint.config.js - -import eslint from '@eslint/js' -import tseslint from 'typescript-eslint' -import vue from 'eslint-plugin-vue' - -export default [ - eslint.configs.recommended, - ...tseslint.configs.recommended, - ...vue.configs['flat/recommended'], - { - rules: { - 'vue/multi-word-component-names': 'off', - '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }], - }, - }, -] -``` - ---- - -## 8. Performance Testing - -### 8.1 Load Testing (k6) - -```javascript -// tests/load/api-load-test.js - -import http from 'k6/http' -import { check, sleep } from 'k6' - -export const options = { - stages: [ - { duration: '30s', target: 20 }, // Ramp up - { duration: '1m', target: 20 }, // Stay at 20 users - { duration: '30s', target: 50 }, // Ramp up more - { duration: '1m', target: 50 }, // Stay at 50 users - { duration: '30s', target: 0 }, // Ramp down - ], - thresholds: { - http_req_duration: ['p(95)<500'], // 95% of requests under 500ms - http_req_failed: ['rate<0.01'], // Less than 1% failures - }, -} - -const BASE_URL = __ENV.BASE_URL || 'http://localhost:8000' - -export default function () { - // Fetch tasks - const tasksRes = http.get(`${BASE_URL}/api/tasks`, { - headers: { Authorization: `Bearer ${__ENV.API_TOKEN}` }, - }) - check(tasksRes, { - 'tasks status is 200': (r) => r.status === 200, - 'tasks response time < 500ms': (r) => r.timings.duration < 500, - }) - - sleep(1) +## Risk Assessment - // Fetch channels - const channelsRes = http.get(`${BASE_URL}/api/channels`, { - headers: { Authorization: `Bearer ${__ENV.API_TOKEN}` }, - }) - check(channelsRes, { - 'channels status is 200': (r) => r.status === 200, - }) - - sleep(1) -} -``` - -### 8.2 Database Performance Testing - -```php -// tests/Performance/DatabaseQueryTest.php - -namespace Tests\Performance; - -use App\Models\Task; -use App\Models\Message; -use Illuminate\Support\Facades\DB; -use Tests\TestCase; - -class DatabaseQueryTest extends TestCase -{ - public function test_task_list_query_is_efficient(): void - { - Task::factory()->count(1000)->create(); - - DB::enableQueryLog(); - - Task::with(['assignee', 'creator', 'channel']) - ->orderBy('created_at', 'desc') - ->paginate(50); - - $queries = DB::getQueryLog(); - - // Should be N+1 free (max 4 queries: tasks + 3 relationships) - $this->assertLessThanOrEqual(4, count($queries)); - - // Each query should be fast - foreach ($queries as $query) { - $this->assertLessThan(100, $query['time'], "Slow query: {$query['query']}"); - } - } - - public function test_message_list_with_threads_is_efficient(): void - { - $channel = Channel::factory()->create(); - Message::factory()->count(500)->create(['channel_id' => $channel->id]); - - DB::enableQueryLog(); - - Message::with(['author', 'reactions', 'attachments']) - ->where('channel_id', $channel->id) - ->orderBy('created_at', 'desc') - ->paginate(100); - - $queries = DB::getQueryLog(); - - $this->assertLessThanOrEqual(5, count($queries)); - } -} -``` +| Area | Risk | Why | +|------|------|-----| +| Integration refactor | **High** | All integrations reorganized. Missing `../integrations/` breaks everything. | +| File management | **High** | New subsystem with storage ops, permissions, agent tools. Data loss potential. | +| Automation / Scripts | **High** | Luau sandbox execution, auto-disable, destructive keep_history=false. | +| Security hardening | **High** | 14 controllers rescoped — any regression is an auth bypass or false denial. | +| Telegram forwarding | **Medium** | Single-try, echo prevention, file handling. | +| Chat UI | **Medium** | XSS surface (mitigated), UX edge cases. | +| Token metrics | **Low** | Utility class, but billing accuracy matters. | +| Uncommitted changes | **Medium** | Multi-account integration, provider changes — needs verification. | --- -## 9. Implementation Roadmap - -### Week 1: Foundation - -| Day | Task | Owner | -|-----|------|-------| -| 1-2 | Set up GitHub Actions CI pipeline | DevOps | -| 2-3 | Configure PHPStan + Pint | Backend | -| 3-4 | Set up Vitest for frontend | Frontend | -| 4-5 | Add coverage reporting to CI | DevOps | - -**Deliverables:** -- [ ] `.github/workflows/ci.yml` working -- [ ] PHPStan passing at level 5 -- [ ] Vitest running with coverage -- [ ] Codecov integration active - -### Week 2: Backend API Tests - -| Day | Task | Controllers | -|-----|------|-------------| -| 1 | TaskController tests | Task CRUD, reorder | -| 2 | ChannelController, MessageController tests | Chat functionality | -| 3 | UserController, ApprovalController tests | Users, approvals | -| 4 | DocumentController tests | Documents CRUD | -| 5 | Remaining controller tests | 20+ controllers | +## Test Environment Setup -**Deliverables:** -- [ ] 100% API endpoint coverage -- [ ] All validation rules tested -- [ ] Authorization tests complete - -### Week 3: Backend Model Tests - -| Day | Task | Models | -|-----|------|--------| -| 1 | User, Task models | Core models | -| 2 | Channel, Message models | Chat models | -| 3 | Document, Approval models | Document/workflow | -| 4 | Remaining models | 15+ models | -| 5 | Service layer tests | Critical services | - -**Deliverables:** -- [ ] 100% model coverage -- [ ] All scopes tested -- [ ] All relationships verified - -### Week 4: Frontend Tests - -| Day | Task | Components | -|-----|------|------------| -| 1 | Shared components | Button, Modal, Badge, etc. | -| 2 | Chat components | Message, MessageInput, etc. | -| 3 | Task components | TaskCard, TaskBoard, etc. | -| 4 | Composables | useApi, useRealtime, etc. | -| 5 | Integration & cleanup | Cross-component tests | - -**Deliverables:** -- [ ] 70% component coverage -- [ ] 100% composable coverage -- [ ] All tests passing in CI - -### Ongoing: Maintenance - -| Activity | Frequency | -|----------|-----------| -| New feature tests | With each PR | -| Coverage review | Weekly | -| Flaky test fixes | As needed | -| CI optimization | Monthly | -| Load testing | Before major releases | +1. Ensure `../integrations/` sibling directory exists and is populated +2. `composer install` — verify no path resolution errors +3. `php artisan migrate` — run all pending migrations (including `2026_04_05` multi-account) +4. `php artisan config:clear && php artisan cache:clear` +5. Build frontend: `npm run build` +6. Create a test workspace with at least 2 agents +7. Configure at least one external channel (Telegram) for forwarding tests +8. Have test credentials for at least one integration (ClickUp, Google, etc.) +9. Have at least one custom LLM provider configured (GLM, Kimi, or MiniMax) --- -## Quick Reference - -### Run Commands - -```bash -# Backend tests -php artisan test # All tests -php artisan test --filter=TaskTest # Specific test -php artisan test --coverage # With coverage - -# Frontend tests -npm run test # Watch mode -npm run test:coverage # With coverage -npm run test -- TaskCard.spec.ts # Specific file - -# Browser tests -php artisan dusk # All browser tests -php artisan dusk --filter=TasksTest # Specific test - -# Code quality -vendor/bin/phpstan analyse # Static analysis -vendor/bin/pint # Code style fix -npm run lint # ESLint -npm run typecheck # TypeScript -``` - -### Test Helpers - -```php -// Create authenticated user -$user = User::factory()->create(); -$this->actingAs($user); - -// Create agent -$agent = User::factory()->create([ - 'type' => 'agent', - 'agent_type' => 'coder', -]); - -// Assert database state -$this->assertDatabaseHas('tasks', ['id' => $task->id]); -$this->assertDatabaseMissing('tasks', ['id' => $deletedId]); - -// Assert JSON structure -$response->assertJsonStructure(['data' => ['id', 'title']]); -$response->assertJsonCount(10, 'data'); -``` - -```typescript -// Mount component with props -const wrapper = mount(Component, { - props: { task: mockTask }, -}) - -// Find elements -wrapper.find('[data-testid="submit-btn"]') -wrapper.findComponent(Button) +## Regression Smoke Test -// Assert text/visibility -expect(wrapper.text()).toContain('Expected') -expect(wrapper.find('.error').exists()).toBe(false) +Before detailed testing, confirm nothing is fundamentally broken: -// Trigger events -await wrapper.trigger('click') -await wrapper.find('input').setValue('new value') -``` +- [ ] App loads without errors (check `storage/logs/laravel.log`) +- [ ] `php artisan tinker` — no autoloading errors +- [ ] Login works +- [ ] Workspace switcher works (sidebar agents/channels refresh) +- [ ] Agent responds to a basic chat message +- [ ] Agent can use built-in tools (tasks, memory, lists, tables) +- [ ] `php artisan test` — test suite passes diff --git a/docs/todo.md b/docs/todo.md deleted file mode 100644 index f65126f..0000000 --- a/docs/todo.md +++ /dev/null @@ -1,38 +0,0 @@ -# Docs Feature TODO - -## Quick wins - -- [ ] **Document starring/pinning** — Add backend fields + API endpoints, frontend toggle in tree items. Mark important docs for quick access. -- [ ] **Content search** — Extend search to full-text content, not just titles/authors. -- [ ] **Publish controls** — Wire up existing `is_published`/`published_at` backend fields to frontend UI. - -## Medium effort - -- [ ] **Permission management UI** — Share modal with user picker for managing viewer/editor roles after doc creation. -- [ ] **Document sharing** — Share button, copy link, public/private toggle. -- [ ] **Fix DocumentAttachments API wiring** — Replace placeholder stub functions with actual `useApi` composable methods. - -## Bigger features - -- [ ] **Inline/selection-based comments** — Highlight text and comment on specific passages instead of doc-level only. -- [ ] **Document templates** — Pre-built templates for common doc types (meeting notes, specs, proposals, etc.). -- [ ] **Bulk operations** — Multi-select docs for move, delete, export, etc. - ---- - -# Agent System TODO - -## Budget Approval Type - -The `ApprovalRequest` model already has an `amount` column and `type: 'budget'` is defined in the migration, but the budget approval flow is not yet implemented. - -### What it does -Agents requesting approval when the estimated cost of an action exceeds a configurable threshold. This prevents runaway API costs from autonomous agents. - -### What needs building -- [ ] **Cost threshold config** — Per-agent or per-workspace `cost_threshold` setting (e.g., $5.00). Could be a column on `users` table or a workspace-level setting. -- [ ] **Cost estimation in tool wrappers** — Before executing an expensive tool (LLM calls, external API calls), estimate the cost and compare against the threshold. -- [ ] **Budget approval creation** — When estimated cost exceeds threshold, create an `ApprovalRequest` with `type: 'budget'` and `amount` set to the estimated cost. -- [ ] **UI amount display** — Show the `amount` field on the Approvals page for budget-type requests, formatted as currency. The Approvals page already has a `formatCurrency` helper and renders `approval.amount` when present. -- [ ] **Running cost tracking** — Track cumulative costs per agent per day/session. Compare running total against threshold, not just individual actions. -- [ ] **Cost threshold UI** — Add cost threshold setting to AgentSettingsPanel (alongside behavior mode and sleep controls). diff --git a/resources/lua-docs/_overview.md b/resources/lua-docs/_overview.md index 166c60f..f9951a8 100644 --- a/resources/lua-docs/_overview.md +++ b/resources/lua-docs/_overview.md @@ -8,6 +8,9 @@ Lua scripts in OpenCompany run in a sandboxed environment with access to workspa app.{namespace}.* — Internal workspace apps app.integrations.{name}.* — Integration-specific tools app.mcp.{server}.* — MCP server tools +json.decode(string) / json.encode(value) — JSON parsing and serialization +regex.match(s, p) / regex.match_all(s, p) — PCRE regex matching +regex.gsub(s, p, r) — PCRE regex substitution ``` Available namespaces are determined by the agent's permissions. Use `lua_list_docs` to see what's available. @@ -67,6 +70,79 @@ local sites = dump(app.integrations.plausible.list_sites()) -- prints the table contents, then continues with sites as a variable ``` +### `json.decode(string)` + +Parses a JSON string into a Lua table. Uses PHP's `json_decode` under the hood, so it handles all standard JSON types including nested objects and arrays. + +```lua +-- Parse a JSON string +local data = json.decode('{"items": [1, 2, 3]}') +print(data.items[1]) -- 1 + +-- Parse JSON received from an integration +local raw = app.http.get({url = "https://api.example.com/data"}) +local parsed = json.decode(raw.body) +print(parsed.status) +``` + +Raises an error on invalid JSON. Use `pcall` for error handling: + +```lua +local ok, data = pcall(json.decode, raw_string) +if not ok then + print("Invalid JSON: " .. tostring(data)) +end +``` + +### `json.encode(value)` + +Serializes a Lua table (or any value) to a JSON string. Produces pretty-printed output with unescaped Unicode. + +```lua +print(json.encode({name = "test", count = 42})) +-- { +-- "name": "test", +-- "count": 42 +-- } +``` + +### `regex.match(subject, pattern [, flags])` + +Tests whether `subject` matches the PCRE `pattern`. Returns a table of captures on match, or `nil` on no match. Supports all PCRE features (lookaheads, non-greedy quantifiers, Unicode properties, named groups) that Lua's built-in patterns lack. + +```lua +local m = regex.match("hello world 42", "(\\w+) (\\d+)") +-- m = {"world 42", "world", "42"} (full match, then captures) + +local m = regex.match("no digits here", "\\d+") +-- m = nil + +-- Named capture groups +local m = regex.match("price: $19.99", "(?P\\$)(?P[\\d.]+)") +-- m = {"$19.99", "$", "19.99"} +``` + +### `regex.match_all(subject, pattern [, flags])` + +Returns all matches of `pattern` in `subject`. Default flag behavior (`PREG_PATTERN_ORDER`) returns captures grouped by group index. + +```lua +local matches = regex.match_all("foo123bar456baz", "(\\d+)") +-- matches[1] = all full matches, matches[2] = first capture group, etc. +``` + +### `regex.gsub(subject, pattern, replacement [, limit])` + +Replaces all occurrences of `pattern` in `subject` with `replacement`. Returns the resulting string. Supports PCRE backreferences (`$1`, `$2`, etc.). + +```lua +local cleaned = regex.gsub(" hello world ", "\\s+", " ") +-- cleaned = " hello world " + +local s = regex.gsub("aaa", "a", "b", 2) +-- s = "bba" +``` + ## Return Values All `app.*` functions return Lua tables (objects/arrays) on success. On failure, they return `nil, error_message`. Use `pcall` for error handling: @@ -85,4 +161,4 @@ end - **context** — The `ctx` object available in automation scripts - **errors** — Error handling patterns and common error codes -- **examples** — Complete real-world automation examples +- **examples** — Complete real-world automation examples \ No newline at end of file diff --git a/routes/api.php b/routes/api.php index d1c8226..1a34b81 100644 --- a/routes/api.php +++ b/routes/api.php @@ -347,6 +347,11 @@ Route::post('/integrations/{id}/disconnect', [IntegrationController::class, 'disconnect']); Route::post('/integrations/{id}/fetch-models', [IntegrationController::class, 'fetchModels']); Route::post('/integrations/{id}/setup-webhook', [IntegrationController::class, 'setupWebhook']); + Route::get('/integrations/{id}/accounts', [IntegrationController::class, 'listAccounts']); + Route::post('/integrations/{id}/accounts', [IntegrationController::class, 'createAccount']); + Route::put('/integrations/{id}/accounts/{alias}', [IntegrationController::class, 'updateAccount']); + Route::delete('/integrations/{id}/accounts/{alias}', [IntegrationController::class, 'deleteAccount']); + Route::post('/integrations/{id}/accounts/{alias}/default', [IntegrationController::class, 'setDefaultAccount']); Route::get('/integrations/external-identities', [IntegrationController::class, 'externalIdentities']); Route::post('/integrations/link-user', [IntegrationController::class, 'linkExternalUser']); Route::delete('/integrations/link-user/{identityId}', [IntegrationController::class, 'unlinkExternalUser']); diff --git a/tests/Feature/ChannelConversationLoaderTest.php b/tests/Feature/ChannelConversationLoaderTest.php index 935fd70..2a8d95d 100644 --- a/tests/Feature/ChannelConversationLoaderTest.php +++ b/tests/Feature/ChannelConversationLoaderTest.php @@ -28,7 +28,7 @@ protected function setUp(): void public function test_loads_messages_as_sdk_objects(): void { $human = User::factory()->create(['name' => 'Alice', 'type' => 'human']); - $agent = User::factory()->create(['name' => 'Logic', 'type' => 'agent', 'brain' => 'glm-coding:glm-4.7']); + $agent = User::factory()->create(['name' => 'Logic', 'type' => 'agent', 'brain' => 'z:glm-5.1']); $channel = Channel::factory()->create(); Message::create([ @@ -69,7 +69,7 @@ public function test_loads_messages_as_sdk_objects(): void public function test_skips_empty_messages(): void { $human = User::factory()->create(['type' => 'human']); - $agent = User::factory()->create(['type' => 'agent', 'brain' => 'glm-coding:glm-4.7']); + $agent = User::factory()->create(['type' => 'agent', 'brain' => 'z:glm-5.1']); $channel = Channel::factory()->create(); Message::create([ @@ -96,7 +96,7 @@ public function test_skips_empty_messages(): void public function test_loads_recent_messages(): void { $human = User::factory()->create(['type' => 'human']); - $agent = User::factory()->create(['type' => 'agent', 'brain' => 'glm-coding:glm-4.7']); + $agent = User::factory()->create(['type' => 'agent', 'brain' => 'z:glm-5.1']); $channel = Channel::factory()->create(); for ($i = 0; $i < 10; $i++) { @@ -116,7 +116,7 @@ public function test_loads_recent_messages(): void public function test_returns_empty_for_no_messages(): void { - $agent = User::factory()->create(['type' => 'agent', 'brain' => 'glm-coding:glm-4.7']); + $agent = User::factory()->create(['type' => 'agent', 'brain' => 'z:glm-5.1']); $channel = Channel::factory()->create(); $messages = $this->loader->load($channel->id, $agent); @@ -126,7 +126,7 @@ public function test_returns_empty_for_no_messages(): void public function test_returns_empty_when_agent_cannot_access_channel(): void { - $agent = User::factory()->create(['type' => 'agent', 'brain' => 'glm-coding:glm-4.7']); + $agent = User::factory()->create(['type' => 'agent', 'brain' => 'z:glm-5.1']); $allowedChannel = Channel::factory()->create(); $restrictedChannel = Channel::factory()->create(); @@ -156,7 +156,7 @@ public function test_returns_empty_when_agent_cannot_access_channel(): void public function test_loads_messages_when_agent_has_no_channel_restrictions(): void { // Agent with no channel permissions → unrestricted (backward compatible) - $agent = User::factory()->create(['type' => 'agent', 'brain' => 'glm-coding:glm-4.7']); + $agent = User::factory()->create(['type' => 'agent', 'brain' => 'z:glm-5.1']); $channel = Channel::factory()->create(); $human = User::factory()->create(['type' => 'human']); diff --git a/tests/Feature/DynamicProviderResolverTest.php b/tests/Feature/DynamicProviderResolverTest.php index 16898c4..ba0bbd3 100644 --- a/tests/Feature/DynamicProviderResolverTest.php +++ b/tests/Feature/DynamicProviderResolverTest.php @@ -47,11 +47,11 @@ public function test_resolves_openai_provider(): void $this->assertEquals('gpt-4o', $result['model']); } - public function test_resolves_glm_provider_with_integration(): void + public function test_resolves_z_provider_with_integration(): void { IntegrationSetting::create([ 'id' => 'int-1', - 'integration_id' => 'glm-coding', + 'integration_id' => 'z', 'enabled' => true, 'workspace_id' => $this->workspace->id, 'config' => [ @@ -62,28 +62,28 @@ public function test_resolves_glm_provider_with_integration(): void $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'glm-coding:glm-4.7', + 'brain' => 'z:glm-5.1', ]); $result = $this->resolver->resolve($agent); - $this->assertEquals('glm-coding', $result['provider']); - $this->assertEquals('glm-4.7', $result['model']); + $this->assertEquals('z', $result['provider']); + $this->assertEquals('glm-5.1', $result['model']); // Verify Prism config was registered on the provider variant key - $this->assertNotNull(config('prism.providers.glm-coding')); - $this->assertEquals('test-api-key', config('prism.providers.glm-coding.api_key')); + $this->assertNotNull(config('prism.providers.z')); + $this->assertEquals('test-api-key', config('prism.providers.z.api_key')); // Verify AI SDK config was registered with custom driver - $this->assertNotNull(config('ai.providers.glm-coding')); - $this->assertEquals('glm-coding', config('ai.providers.glm-coding.driver')); + $this->assertNotNull(config('ai.providers.z')); + $this->assertEquals('z', config('ai.providers.z.driver')); } - public function test_throws_for_unconfigured_glm_provider(): void + public function test_throws_for_unconfigured_z_provider(): void { $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'glm:glm-4-plus', + 'brain' => 'z-api:glm-5.1', ]); $this->expectException(InvalidArgumentException::class); @@ -105,11 +105,11 @@ public function test_throws_for_unknown_provider(): void $this->resolver->resolve($agent); } - public function test_defaults_to_glm_coding_when_no_brain(): void + public function test_defaults_to_z_when_no_brain(): void { IntegrationSetting::create([ 'id' => 'int-1', - 'integration_id' => 'glm-coding', + 'integration_id' => 'z', 'enabled' => true, 'workspace_id' => $this->workspace->id, 'config' => [ @@ -125,7 +125,7 @@ public function test_defaults_to_glm_coding_when_no_brain(): void $result = $this->resolver->resolve($agent); - $this->assertEquals('glm-coding', $result['provider']); - $this->assertEquals('glm-4.7', $result['model']); + $this->assertEquals('z', $result['provider']); + $this->assertEquals('glm-5.1', $result['model']); } } diff --git a/tests/Feature/OpenCompanyAgentTest.php b/tests/Feature/OpenCompanyAgentTest.php index 1bac1c1..dd80fad 100644 --- a/tests/Feature/OpenCompanyAgentTest.php +++ b/tests/Feature/OpenCompanyAgentTest.php @@ -3,22 +3,34 @@ namespace Tests\Feature; use App\Agents\OpenCompanyAgent; +use App\Jobs\IndexDocumentJob; +use App\Models\Channel; use App\Models\Document; +use App\Models\Task; use App\Models\User; use App\Services\AgentDocumentService; +use Illuminate\Support\Facades\Bus; use Illuminate\Foundation\Testing\RefreshDatabase; +use Illuminate\Support\Str; use Tests\TestCase; class OpenCompanyAgentTest extends TestCase { use RefreshDatabase; + protected function setUp(): void + { + parent::setUp(); + + Bus::fake([IndexDocumentJob::class]); + } + public function test_instructions_assembled_from_identity_files(): void { $agent = User::factory()->create([ 'type' => 'agent', 'agent_type' => 'coder', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); // Create identity document structure @@ -37,7 +49,7 @@ public function test_instructions_assembled_from_identity_files(): void $this->assertStringContainsString('TestBot', $instructions); $this->assertStringContainsString('INSTRUCTIONS.md', $instructions); $this->assertStringContainsString('Be helpful and accurate', $instructions); - $this->assertStringContainsString('Available Tools', $instructions); + $this->assertStringContainsString('## Tools', $instructions); } public function test_fallback_instructions_when_no_documents(): void @@ -45,7 +57,7 @@ public function test_fallback_instructions_when_no_documents(): void $agent = User::factory()->create([ 'name' => 'TestAgent', 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); @@ -60,20 +72,20 @@ public function test_provider_and_model_resolved_from_brain(): void { $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); - $this->assertEquals('anthropic', $agentInstance->provider()); - $this->assertEquals('claude-sonnet-4-5-20250929', $agentInstance->model()); + $this->assertEquals('codex', $agentInstance->provider()); + $this->assertEquals('gpt-5.3-codex', $agentInstance->model()); } public function test_tools_returned(): void { $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); @@ -90,7 +102,7 @@ public function test_fake_prevents_real_api_calls(): void $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); @@ -100,4 +112,39 @@ public function test_fake_prevents_real_api_calls(): void OpenCompanyAgent::assertPrompted(fn ($prompt) => $prompt->prompt === 'Test message'); } + + public function test_system_prompts_split_stable_and_runtime_context(): void + { + $agent = User::factory()->create([ + 'type' => 'agent', + 'brain' => 'codex:gpt-5.3-codex', + ]); + $channel = Channel::factory()->create(['type' => 'dm']); + + $task = Task::create([ + 'id' => (string) Str::uuid(), + 'workspace_id' => $this->workspace->id, + 'title' => 'Test task', + 'description' => 'Do the thing', + 'type' => Task::TYPE_CUSTOM, + 'status' => Task::STATUS_ACTIVE, + 'priority' => Task::PRIORITY_NORMAL, + 'source' => Task::SOURCE_CHAT, + 'agent_id' => $agent->id, + 'requester_id' => $agent->id, + 'channel_id' => $channel->id, + 'started_at' => now(), + ]); + + $agentInstance = OpenCompanyAgent::for($agent, $channel->id, $task->id); + $prepared = $agentInstance->preparePrompt('User request here.'); + $systemPrompts = $agentInstance->systemPrompts(); + + $this->assertSame('User request here.', $prepared); + $this->assertCount(2, $systemPrompts); + $this->assertStringNotContainsString('## Current Task', $systemPrompts[0]); + $this->assertStringContainsString('## Current Time', $systemPrompts[1]); + $this->assertStringContainsString('## Current Context', $systemPrompts[1]); + $this->assertStringContainsString('## Current Task', $systemPrompts[1]); + } } diff --git a/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php b/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php index 010a266..b641aa7 100644 --- a/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php +++ b/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php @@ -10,7 +10,6 @@ use App\Models\Message; use App\Models\User; use App\Services\Memory\ConversationCompactionService; -use App\Services\Memory\ModelContextRegistry; use Illuminate\Foundation\Testing\RefreshDatabase; use Illuminate\Support\Facades\Bus; use Laravel\Ai\Messages\UserMessage; @@ -34,6 +33,7 @@ protected function setUp(): void parent::setUp(); Bus::fake([IndexDocumentJob::class]); + config(['memory.compaction.memory_extraction.enabled' => false]); $this->agent = User::factory()->agent()->create([ 'name' => 'compact-agent', @@ -50,11 +50,11 @@ protected function setUp(): void ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); $resolver->shouldReceive('resolveFromParts') ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); + $resolver->shouldReceive('setWorkspaceId') + ->andReturnSelf(); - $this->service = new ConversationCompactionService( - app(ModelContextRegistry::class), - $resolver, - ); + $this->app->instance(DynamicProviderResolver::class, $resolver); + $this->service = app(ConversationCompactionService::class); } private function createMessages(int $count, ?string $channelId = null, ?string $authorId = null): void @@ -258,4 +258,26 @@ public function test_compact_increments_compaction_count(): void $this->assertNotNull($second, 'Second compaction should find messages after the compaction point'); $this->assertEquals(2, $second->compaction_count); } + + public function test_needs_compaction_returns_false_when_circuit_is_open(): void + { + ConversationSummary::create([ + 'channel_id' => $this->channel->id, + 'agent_id' => $this->agent->id, + 'summary' => 'Previous summary.', + 'workspace_id' => $this->workspace->id, + 'compaction_circuit_open_until' => now()->addMinutes(10), + ]); + + $messages = [new UserMessage(str_repeat('Word ', 2000))]; + + $result = $this->service->needsCompaction( + $this->channel->id, + $this->agent, + $messages, + 'Short system prompt.', + ); + + $this->assertFalse($result); + } } diff --git a/tests/Feature/Services/Memory/MemoryFlushServiceTest.php b/tests/Feature/Services/Memory/MemoryFlushServiceTest.php index 0ff3ee0..9babe1b 100644 --- a/tests/Feature/Services/Memory/MemoryFlushServiceTest.php +++ b/tests/Feature/Services/Memory/MemoryFlushServiceTest.php @@ -8,8 +8,8 @@ use App\Models\ConversationSummary; use App\Models\User; use App\Services\Memory\ConversationCompactionService; +use App\Services\Memory\ContextBudget; use App\Services\Memory\MemoryFlushService; -use App\Services\Memory\ModelContextRegistry; use Illuminate\Foundation\Testing\RefreshDatabase; use Illuminate\Support\Facades\Bus; use Laravel\Ai\Messages\UserMessage; @@ -35,6 +35,7 @@ protected function setUp(): void parent::setUp(); Bus::fake([IndexDocumentJob::class]); + config(['memory.compaction.memory_extraction.enabled' => false]); $this->agent = User::factory()->agent()->create([ 'name' => 'flush-agent', @@ -50,17 +51,12 @@ protected function setUp(): void ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); $resolver->shouldReceive('resolveFromParts') ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); + $resolver->shouldReceive('setWorkspaceId') + ->andReturnSelf(); - $this->compactionService = new ConversationCompactionService( - app(ModelContextRegistry::class), - $resolver, - ); - - $this->service = new MemoryFlushService( - $this->compactionService, - app(ModelContextRegistry::class), - $resolver, - ); + $this->app->instance(DynamicProviderResolver::class, $resolver); + $this->compactionService = app(ConversationCompactionService::class); + $this->service = app(MemoryFlushService::class); } /** diff --git a/tests/Feature/Services/Memory/ModelContextRegistryTest.php b/tests/Feature/Services/Memory/ModelContextRegistryTest.php index f610566..de37342 100644 --- a/tests/Feature/Services/Memory/ModelContextRegistryTest.php +++ b/tests/Feature/Services/Memory/ModelContextRegistryTest.php @@ -27,6 +27,13 @@ public function test_exact_match_returns_context_window(): void $this->assertEquals(128_000, $result); } + public function test_provider_aware_lookup_uses_prism_relay_metadata(): void + { + $result = $this->registry->getContextWindow('claude-sonnet-4-5-20250929', 'anthropic'); + + $this->assertEquals(200_000, $result); + } + public function test_prefix_match_returns_longest(): void { // 'gpt-4o-mini-2024-07-18' starts with 'gpt-4o-mini' (128K) and also 'gpt-4o' (128K) @@ -60,6 +67,15 @@ public function test_user_override_takes_precedence(): void $this->assertEquals(64_000, $result); } + public function test_provider_specific_override_takes_precedence(): void + { + AppSetting::setValue('model_context_windows', ['anthropic:claude-sonnet-4-5-20250929' => 64_000], 'memory'); + + $result = $this->registry->getContextWindow('claude-sonnet-4-5-20250929', 'anthropic'); + + $this->assertEquals(64_000, $result); + } + public function test_levenshtein_rejects_distance_above_five(): void { // A model name that differs by more than 5 from all known models diff --git a/tests/Unit/ContextPrunerTest.php b/tests/Unit/ContextPrunerTest.php new file mode 100644 index 0000000..365d5a0 --- /dev/null +++ b/tests/Unit/ContextPrunerTest.php @@ -0,0 +1,58 @@ + 1, + 'memory.pruning.min_result_tokens' => 10, + 'memory.pruning.min_total_saved_tokens' => 10, + ]); + + $pruner = app(ContextPruner::class); + $messages = [ + new ToolResultMessage(new Collection([ + new ToolResult('call-1', 'read_file', ['path' => '/tmp/a.md'], str_repeat('old result ', 100)), + ])), + new ToolResultMessage(new Collection([ + new ToolResult('call-2', 'read_file', ['path' => '/tmp/b.md'], str_repeat('recent result ', 100)), + ])), + ]; + + $result = $pruner->prune($messages); + + $this->assertSame(1, $result['pruned_results']); + $this->assertStringContainsString('omitted from retry context', $result['messages'][0]->toolResults[0]->result); + $this->assertStringContainsString('recent result', $result['messages'][1]->toolResults[0]->result); + } + + public function test_does_not_prune_write_results(): void + { + config([ + 'memory.pruning.keep_recent_read_results' => 0, + 'memory.pruning.min_result_tokens' => 10, + 'memory.pruning.min_total_saved_tokens' => 10, + ]); + + $pruner = app(ContextPruner::class); + $messages = [ + new ToolResultMessage(new Collection([ + new ToolResult('call-1', 'send_channel_message', ['channelId' => 'chan-1'], str_repeat('sent ', 100)), + ])), + ]; + + $result = $pruner->prune($messages); + + $this->assertSame(0, $result['pruned_results']); + $this->assertStringNotContainsString('omitted from retry context', $result['messages'][0]->toolResults[0]->result); + } +} diff --git a/tests/Unit/OutputTruncatorTest.php b/tests/Unit/OutputTruncatorTest.php new file mode 100644 index 0000000..d878640 --- /dev/null +++ b/tests/Unit/OutputTruncatorTest.php @@ -0,0 +1,30 @@ +truncate($output, 'call-123'); + + $this->assertIsString($result); + $this->assertStringContainsString('[truncated - full output stored at storage:', $result); + Storage::disk('local')->assertExists('tool-results-test/'.now()->format('Y/m/d').'/tool_call-123.txt'); + } +} diff --git a/tests/Unit/PrismMessagesTest.php b/tests/Unit/PrismMessagesTest.php new file mode 100644 index 0000000..ee38069 --- /dev/null +++ b/tests/Unit/PrismMessagesTest.php @@ -0,0 +1,46 @@ + '/tmp/test.md'], + ), + ]), + ), + new ToolResultMessage(collect([ + new ToolResult( + id: 'call-1', + name: 'read_file', + arguments: ['path' => '/tmp/test.md'], + result: 'file contents', + ), + ])), + ])); + + $this->assertCount(2, $messages); + $this->assertInstanceOf(PrismAssistantMessage::class, $messages[0]); + $this->assertSame('read_file', $messages[0]->toolCalls[0]->name); + $this->assertInstanceOf(PrismToolResultMessage::class, $messages[1]); + $this->assertSame('file contents', $messages[1]->toolResults[0]->result); + } +} diff --git a/tests/Unit/PromptFrameBuilderTest.php b/tests/Unit/PromptFrameBuilderTest.php new file mode 100644 index 0000000..011f23a --- /dev/null +++ b/tests/Unit/PromptFrameBuilderTest.php @@ -0,0 +1,26 @@ +splitSections([ + ['label' => 'Header', 'content' => "Header\n"], + ['label' => 'Current Time', 'content' => "Time\n"], + ['label' => 'Apps', 'content' => "Apps\n"], + ['label' => 'Current Task', 'content' => "Task\n"], + ]); + + $this->assertSame("Header\nApps\n", $frame['stable_prompt']); + $this->assertSame("Time\nTask\n", $frame['volatile_prompt']); + $this->assertCount(2, $frame['stable_breakdown']); + $this->assertCount(2, $frame['volatile_breakdown']); + } +} diff --git a/tests/Unit/ToolResultDeduplicatorTest.php b/tests/Unit/ToolResultDeduplicatorTest.php new file mode 100644 index 0000000..c063843 --- /dev/null +++ b/tests/Unit/ToolResultDeduplicatorTest.php @@ -0,0 +1,75 @@ + '/tmp/example.txt'], + result: 'hello world', + ), + ])), + new ToolResultMessage(new Collection([ + new ToolResult( + id: 'call-2', + name: 'read_file', + arguments: ['path' => '/tmp/example.txt'], + result: 'hello world', + ), + ])), + ]; + + $result = $deduplicator->deduplicate($messages); + + $this->assertSame(1, $result['deduplicated']); + $this->assertStringContainsString( + '[Superseded', + $result['messages'][0]->toolResults[0]->result + ); + $this->assertSame('hello world', $result['messages'][1]->toolResults[0]->result); + } + + public function test_does_not_supersede_write_tool_results(): void + { + $deduplicator = app(ToolResultDeduplicator::class); + + $messages = [ + new ToolResultMessage(new Collection([ + new ToolResult( + id: 'call-1', + name: 'send_channel_message', + arguments: ['channelId' => 'chan-1', 'message' => 'hello world'], + result: 'Message sent successfully.', + ), + ])), + new ToolResultMessage(new Collection([ + new ToolResult( + id: 'call-2', + name: 'send_channel_message', + arguments: ['channelId' => 'chan-1', 'message' => 'hello world'], + result: 'Message sent successfully.', + ), + ])), + ]; + + $result = $deduplicator->deduplicate($messages); + + $this->assertSame(0, $result['deduplicated']); + $this->assertSame('Message sent successfully.', $result['messages'][0]->toolResults[0]->result); + $this->assertSame('Message sent successfully.', $result['messages'][1]->toolResults[0]->result); + } +}