From c4bbbfc845dcce74210ab5f6237a2d731578c539 Mon Sep 17 00:00:00 2001 From: ruttydm Date: Tue, 7 Apr 2026 12:18:40 +0200 Subject: [PATCH 1/4] feat: multi-account integrations, MCP improvements, ecosystem docs, and comprehensive QA test plans Integration & MCP: - Add multi-account support for integration settings and MCP servers (account_alias, is_default columns with composite unique keys) - Expand IntegrationController with full CRUD for multi-account setups - Update IntegrationSettingCredentialResolver to resolve by account alias - Refactor McpServerRegistrar and McpToolProvider for improved registration - Split composer path repo: ../integrations/core + ../integrations/packages/* Provider & config: - Update DynamicProviderResolver and GlmPrismGateway for latest relay - Update config/integrations.php and config/prism.php with new provider entries - Clean up AppServiceProvider registration Services: - Update LuaApiDocGenerator with improved catalog building - Update AgentChatService and OpenCompanyLuaToolInvoker - Add workspace-scoped tool meta lookup in LuaBridge Docs: - Add comprehensive docs/ecosystem/ with integration docs, iris analysis, and kosmokrator architecture/audit/research/proposal docs - Add QA test strategy and feature test map covering the full git tree: integration refactor, file management, automations, chat UI, Telegram forwarding, LLM providers, security hardening, and uncommitted changes Tests: - Update ChannelConversationLoaderTest and DynamicProviderResolverTest --- .kosmokrator/config.yaml | 3 + .../Providers/DynamicProviderResolver.php | 22 +- app/Agents/Providers/GlmPrismGateway.php | 2 +- app/Agents/Tools/ToolRegistry.php | 7 +- .../Tools/Workspace/GetIntegrationSetup.php | 2 +- .../Workspace/TestIntegrationConnection.php | 6 +- .../Workspace/UpdateIntegrationConfig.php | 4 +- app/Console/Commands/TestGlmPing.php | 16 +- app/Http/Controllers/Api/AgentController.php | 4 +- .../Controllers/Api/IntegrationController.php | 169 +- app/Models/IntegrationSetting.php | 52 + app/Models/McpServer.php | 21 + app/Providers/AppServiceProvider.php | 10 +- app/Services/AgentChatService.php | 16 +- .../IntegrationSettingCredentialResolver.php | 25 +- app/Services/LuaApiDocGenerator.php | 42 +- app/Services/LuaBridge.php | 1 + app/Services/Mcp/McpServerRegistrar.php | 36 +- app/Services/Mcp/McpToolProvider.php | 58 +- app/Services/OpenCompanyLuaToolInvoker.php | 4 +- composer.json | 6 +- composer.lock | 208 +- config/integrations.php | 22 +- config/prism.php | 16 +- ..._multi_account_to_integration_settings.php | 45 + docs/ecosystem/integrations/README.md | 943 ++++++++ .../integrations/celestial/README.md | 119 + docs/ecosystem/integrations/clickup/README.md | 61 + .../ecosystem/integrations/clickup/clickup.md | 207 ++ .../integrations/coingecko/README.md | 34 + .../integrations/coingecko/coingecko.md | 103 + docs/ecosystem/integrations/core/README.md | 209 ++ .../integrations/exchangerate/README.md | 28 + .../integrations/exchangerate/exchangerate.md | 118 + docs/ecosystem/integrations/google/README.md | 129 + docs/ecosystem/integrations/google/google.md | 207 ++ docs/ecosystem/integrations/mermaid/README.md | 59 + .../integrations/plausible/README.md | 132 + .../integrations/plausible/plausible.md | 144 ++ .../ecosystem/integrations/ticktick/README.md | 106 + .../integrations/ticktick/ticktick.md | 124 + .../ecosystem/integrations/trustmrr/README.md | 52 + .../integrations/worldbank/README.md | 28 + docs/ecosystem/iris/ecosystem-overview.md | 250 ++ docs/ecosystem/iris/missing-in-iris.md | 150 ++ docs/ecosystem/kosmokrator/README.md | 49 + .../kosmokrator/architecture/overview.md | 156 ++ .../architecture/permission-modes.md | 125 + .../architecture/subagent-architecture.md | 98 + .../audits/deep-audit-2026-04-02.md | 287 +++ .../kosmokrator/audits/memory-leak-audit.md | 534 +++++ .../audits/ram-audit/RAM-EFFICIENCY-AUDIT.md | 1773 ++++++++++++++ .../ram-audit/synthesis-architecture.md | 370 +++ .../audits/ram-audit/synthesis-core-agent.md | 496 ++++ .../ram-audit/synthesis-io-performance.md | 222 ++ .../audits/ram-audit/synthesis-security.md | 344 +++ .../audits/self-audit-2026-03-30.md | 317 +++ .../kosmokrator/deep-audit-2026-04-04.md | 715 ++++++ .../proposals/command-inspiration.md | 65 + .../proposals/context-compaction.md | 142 ++ .../proposals/context-management-redesign.md | 705 ++++++ .../context-management-strategies.md | 49 + .../kosmokrator/proposals/desktop-app.md | 296 +++ .../proposals/ecosystem-architecture.md | 441 ++++ .../proposals/integration-refactor-plan.md | 339 +++ .../proposals/laravel-ai-patterns.md | 209 ++ .../kosmokrator/proposals/streaming.md | 139 ++ .../proposals/tui-ux-improvements.md | 415 ++++ ...nt-swarm-gaps-and-frankenstein-analysis.md | 418 ++++ .../research/claude-code-analysis.md | 1464 ++++++++++++ .../research/claude-code-architecture.md | 2123 +++++++++++++++++ .../research/opencode-feature-analysis.md | 320 +++ .../kosmokrator/tools/web-tools-spec.md | 146 ++ .../tools/webfetch-system-prompt.md | 62 + docs/testing/feature-test-map.md | 1342 +++-------- docs/testing/qa-strategy.md | 1750 +------------- routes/api.php | 5 + .../Feature/ChannelConversationLoaderTest.php | 12 +- tests/Feature/DynamicProviderResolverTest.php | 30 +- 79 files changed, 17040 insertions(+), 2918 deletions(-) create mode 100644 .kosmokrator/config.yaml create mode 100644 database/migrations/2026_04_05_000001_add_multi_account_to_integration_settings.php create mode 100644 docs/ecosystem/integrations/README.md create mode 100644 docs/ecosystem/integrations/celestial/README.md create mode 100644 docs/ecosystem/integrations/clickup/README.md create mode 100644 docs/ecosystem/integrations/clickup/clickup.md create mode 100644 docs/ecosystem/integrations/coingecko/README.md create mode 100644 docs/ecosystem/integrations/coingecko/coingecko.md create mode 100644 docs/ecosystem/integrations/core/README.md create mode 100644 docs/ecosystem/integrations/exchangerate/README.md create mode 100644 docs/ecosystem/integrations/exchangerate/exchangerate.md create mode 100644 docs/ecosystem/integrations/google/README.md create mode 100644 docs/ecosystem/integrations/google/google.md create mode 100644 docs/ecosystem/integrations/mermaid/README.md create mode 100644 docs/ecosystem/integrations/plausible/README.md create mode 100644 docs/ecosystem/integrations/plausible/plausible.md create mode 100644 docs/ecosystem/integrations/ticktick/README.md create mode 100644 docs/ecosystem/integrations/ticktick/ticktick.md create mode 100644 docs/ecosystem/integrations/trustmrr/README.md create mode 100644 docs/ecosystem/integrations/worldbank/README.md create mode 100644 docs/ecosystem/iris/ecosystem-overview.md create mode 100644 docs/ecosystem/iris/missing-in-iris.md create mode 100644 docs/ecosystem/kosmokrator/README.md create mode 100644 docs/ecosystem/kosmokrator/architecture/overview.md create mode 100644 docs/ecosystem/kosmokrator/architecture/permission-modes.md create mode 100644 docs/ecosystem/kosmokrator/architecture/subagent-architecture.md create mode 100644 docs/ecosystem/kosmokrator/audits/deep-audit-2026-04-02.md create mode 100644 docs/ecosystem/kosmokrator/audits/memory-leak-audit.md create mode 100644 docs/ecosystem/kosmokrator/audits/ram-audit/RAM-EFFICIENCY-AUDIT.md create mode 100644 docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-architecture.md create mode 100644 docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-core-agent.md create mode 100644 docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-io-performance.md create mode 100644 docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-security.md create mode 100644 docs/ecosystem/kosmokrator/audits/self-audit-2026-03-30.md create mode 100644 docs/ecosystem/kosmokrator/deep-audit-2026-04-04.md create mode 100644 docs/ecosystem/kosmokrator/proposals/command-inspiration.md create mode 100644 docs/ecosystem/kosmokrator/proposals/context-compaction.md create mode 100644 docs/ecosystem/kosmokrator/proposals/context-management-redesign.md create mode 100644 docs/ecosystem/kosmokrator/proposals/context-management-strategies.md create mode 100644 docs/ecosystem/kosmokrator/proposals/desktop-app.md create mode 100644 docs/ecosystem/kosmokrator/proposals/ecosystem-architecture.md create mode 100644 docs/ecosystem/kosmokrator/proposals/integration-refactor-plan.md create mode 100644 docs/ecosystem/kosmokrator/proposals/laravel-ai-patterns.md create mode 100644 docs/ecosystem/kosmokrator/proposals/streaming.md create mode 100644 docs/ecosystem/kosmokrator/proposals/tui-ux-improvements.md create mode 100644 docs/ecosystem/kosmokrator/reports/sub-agent-swarm-gaps-and-frankenstein-analysis.md create mode 100644 docs/ecosystem/kosmokrator/research/claude-code-analysis.md create mode 100644 docs/ecosystem/kosmokrator/research/claude-code-architecture.md create mode 100644 docs/ecosystem/kosmokrator/research/opencode-feature-analysis.md create mode 100644 docs/ecosystem/kosmokrator/tools/web-tools-spec.md create mode 100644 docs/ecosystem/kosmokrator/tools/webfetch-system-prompt.md diff --git a/.kosmokrator/config.yaml b/.kosmokrator/config.yaml new file mode 100644 index 0000000..0eaa84f --- /dev/null +++ b/.kosmokrator/config.yaml @@ -0,0 +1,3 @@ +kosmokrator: + agent: + mode: plan diff --git a/app/Agents/Providers/DynamicProviderResolver.php b/app/Agents/Providers/DynamicProviderResolver.php index f744626..177dcd4 100644 --- a/app/Agents/Providers/DynamicProviderResolver.php +++ b/app/Agents/Providers/DynamicProviderResolver.php @@ -26,7 +26,7 @@ public function setWorkspaceId(?string $workspaceId): self /** * Parse a User's brain field and resolve to SDK provider + model. * - * Brain format: "provider:model" (e.g. "glm-coding:glm-4.7", "anthropic:claude-sonnet-4-5-20250929") + * Brain format: "provider:model" (e.g. "z:glm-5.1", "anthropic:claude-sonnet-4-5-20250929") * * @return array{provider: string, model: string} */ @@ -34,7 +34,7 @@ public function resolve(User $agent): array { $this->workspaceId = $agent->workspace_id; - $brain = $agent->brain ?? 'glm-coding:glm-4.7'; + $brain = $agent->brain ?? 'z:glm-5.1'; $parts = explode(':', $brain, 2); $providerKey = $parts[0]; $model = $parts[1] ?? $this->getDefaultModel($providerKey); @@ -55,7 +55,7 @@ public function resolveFromParts(string $providerKey, string $model): array return ['provider' => 'codex', 'model' => $model]; } - // GLM providers use IntegrationSetting for API keys + // Custom providers use IntegrationSetting for API keys if ($this->isGlmProvider($providerKey)) { $this->registerGlmProvider($providerKey); return ['provider' => $providerKey, 'model' => $model]; @@ -72,7 +72,7 @@ public function resolveFromParts(string $providerKey, string $model): array } /** - * Check if a provider key is a GLM variant. + * Check if a provider key is managed by Prism Relay. */ private function isGlmProvider(string $providerKey): bool { @@ -80,10 +80,7 @@ private function isGlmProvider(string $providerKey): bool } /** - * Dynamically register a GLM provider in the Prism config. - * - * GLM uses an OpenAI-compatible API, so we register it as an OpenAI provider - * with a custom URL and API key from IntegrationSetting. + * Dynamically register a custom provider in the Prism config. */ private function registerGlmProvider(string $providerKey): void { @@ -107,7 +104,7 @@ private function registerGlmProvider(string $providerKey): void $apiKey = $integration->getConfigValue('api_key'); $url = $integration->getConfigValue('url') ?? $this->getDefaultGlmUrl($providerKey); - // Set Prism config for the GLM provider variant (registered via PrismManager::extend) + // Set Prism config for the provider variant (registered via PrismManager::extend) config([ "prism.providers.{$providerKey}" => [ 'api_key' => $apiKey, @@ -116,10 +113,10 @@ private function registerGlmProvider(string $providerKey): void ]); // Register in AI SDK config using our custom driver (registered via AiManager::extend) - // This routes through GlmPrismGateway → Prism 'glm' provider → chat/completions + // This routes through GlmPrismGateway to the matching Prism provider. config([ "ai.providers.{$providerKey}" => [ - 'driver' => $providerKey, // 'glm' or 'glm-coding' — custom drivers + 'driver' => $providerKey, 'key' => $apiKey, ], ]); @@ -154,6 +151,7 @@ private function mapToSdkProvider(string $providerKey): ?string 'deepseek' => 'deepseek', 'mistral' => 'mistral', 'ollama' => 'ollama', + 'perplexity' => 'perplexity', ]; return $map[$providerKey] ?? null; @@ -190,7 +188,7 @@ private function applyIntegrationConfig(string $providerKey): void } /** - * Get default URL for a GLM provider. + * Get default URL for a known provider. */ private function getDefaultGlmUrl(string $providerKey): string { diff --git a/app/Agents/Providers/GlmPrismGateway.php b/app/Agents/Providers/GlmPrismGateway.php index 2c7b6e5..6710419 100644 --- a/app/Agents/Providers/GlmPrismGateway.php +++ b/app/Agents/Providers/GlmPrismGateway.php @@ -11,7 +11,7 @@ /** * Custom PrismGateway that routes requests to custom Prism providers - * registered via PrismManager::extend() (GLM, Kimi, MiniMax, etc.). + * registered via PrismManager::extend() (Z.AI, Kimi, MiniMax, etc.). * * The base PrismGateway maps driver names to PrismProvider enums, which only * works for native Prism providers. Custom providers need their string key diff --git a/app/Agents/Tools/ToolRegistry.php b/app/Agents/Tools/ToolRegistry.php index ecb9777..38e2e54 100644 --- a/app/Agents/Tools/ToolRegistry.php +++ b/app/Agents/Tools/ToolRegistry.php @@ -503,13 +503,13 @@ public function getToolCatalog(User $agent): array /** * Instantiate a specific tool by slug (for post-approval execution). */ - public function instantiateToolBySlug(string $slug, User $agent): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool|null + public function instantiateToolBySlug(string $slug, User $agent, ?string $account = null): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool|null { if (! isset($this->getEffectiveToolMap()[$slug])) { return null; } - return $this->instantiateTool($this->getEffectiveToolMap()[$slug]['class'], $agent, $slug); + return $this->instantiateTool($this->getEffectiveToolMap()[$slug]['class'], $agent, $slug, $account); } /** @@ -573,7 +573,7 @@ public function getAppCatalog(User $agent): string /** * Instantiate a tool class via its provider. */ - private function instantiateTool(string $class, User $agent, string $slug = ''): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool + private function instantiateTool(string $class, User $agent, string $slug = '', ?string $account = null): \OpenCompany\IntegrationCore\Contracts\Tool|\Laravel\Ai\Contracts\Tool { $context = [ 'channel_id' => $this->currentChannelId, @@ -589,6 +589,7 @@ private function instantiateTool(string $class, User $agent, string $slug = ''): 'agent' => $agent, 'timezone' => AppSetting::getValue('org_timezone', 'UTC'), 'tool_slug' => $toolSlug, + 'account' => $account, ]); } } diff --git a/app/Agents/Tools/Workspace/GetIntegrationSetup.php b/app/Agents/Tools/Workspace/GetIntegrationSetup.php index 434ea7c..5ce7420 100644 --- a/app/Agents/Tools/Workspace/GetIntegrationSetup.php +++ b/app/Agents/Tools/Workspace/GetIntegrationSetup.php @@ -66,7 +66,7 @@ public function schema(JsonSchema $schema): array return [ 'integrationId' => $schema ->string() - ->description('Integration ID (e.g., "telegram", "glm", "plausible"). Includes both static and dynamic package-provided integrations.') + ->description('Integration ID (e.g., "telegram", "z", "plausible"). Includes both static and dynamic package-provided integrations.') ->required(), ]; } diff --git a/app/Agents/Tools/Workspace/TestIntegrationConnection.php b/app/Agents/Tools/Workspace/TestIntegrationConnection.php index 6b460e4..8fd8052 100644 --- a/app/Agents/Tools/Workspace/TestIntegrationConnection.php +++ b/app/Agents/Tools/Workspace/TestIntegrationConnection.php @@ -61,7 +61,7 @@ public function handle(Request $request): string return $this->testTelegram($apiKey); } - // GLM-style providers + // OpenAI-compatible chat-completions providers $url = $setting->getConfigValue('url') ?? ($available[$integrationId]['default_url'] ?? ''); $model = $setting->getConfigValue('default_model') ?? array_key_first($available[$integrationId]['models'] ?? []); @@ -122,8 +122,8 @@ public function schema(JsonSchema $schema): array return [ 'integrationId' => $schema ->string() - ->description('Integration ID (e.g., "telegram", "glm", "plausible"). Includes both static and dynamic package-provided integrations.') + ->description('Integration ID (e.g., "telegram", "z", "plausible"). Includes both static and dynamic package-provided integrations.') ->required(), ]; } -} \ No newline at end of file +} diff --git a/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php b/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php index 50c2df8..d391e1b 100644 --- a/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php +++ b/app/Agents/Tools/Workspace/UpdateIntegrationConfig.php @@ -175,7 +175,7 @@ public function schema(JsonSchema $schema): array return [ 'integrationId' => $schema ->string() - ->description('Integration ID (e.g., "telegram", "glm", "plausible"). Includes both static and dynamic package-provided integrations.') + ->description('Integration ID (e.g., "telegram", "z", "plausible"). Includes both static and dynamic package-provided integrations.') ->required(), 'apiKey' => $schema ->string() @@ -203,4 +203,4 @@ public function schema(JsonSchema $schema): array ->description('JSON array of site domains for string_list fields, e.g. ["example.com"].'), ]; } -} \ No newline at end of file +} diff --git a/app/Console/Commands/TestGlmPing.php b/app/Console/Commands/TestGlmPing.php index 00dcfa7..f86b9ca 100644 --- a/app/Console/Commands/TestGlmPing.php +++ b/app/Console/Commands/TestGlmPing.php @@ -7,16 +7,16 @@ class TestGlmPing extends Command { - protected $signature = 'glm:ping {--prompt= : Custom prompt to send}'; - protected $description = 'Test GLM 4.7 API connection'; + protected $signature = 'z:ping {--prompt= : Custom prompt to send}'; + protected $description = 'Test Z.AI API connection'; public function handle(): int { - $this->info('Testing GLM 4.7 API connection...'); + $this->info('Testing Z.AI API connection...'); $this->newLine(); - $url = config('prism.providers.glm.url'); - $apiKey = config('prism.providers.glm.api_key'); + $url = config('prism.providers.z.url'); + $apiKey = config('prism.providers.z.api_key'); $this->line('Endpoint: ' . $url); $this->line('API Key: ' . substr($apiKey, 0, 10) . '...'); @@ -26,7 +26,7 @@ public function handle(): int try { $response = Prism::text() - ->using('glm', 'glm-4.7') + ->using('z', 'glm-5.1') ->withPrompt($prompt) ->asText(); @@ -45,11 +45,11 @@ public function handle(): int ); $this->newLine(); - $this->info('GLM 4.7 connection successful!'); + $this->info('Z.AI connection successful!'); return Command::SUCCESS; } catch (\Exception $e) { - $this->error('Failed to connect to GLM 4.7:'); + $this->error('Failed to connect to Z.AI:'); $this->error($e->getMessage()); if ($this->output->isVerbose()) { diff --git a/app/Http/Controllers/Api/AgentController.php b/app/Http/Controllers/Api/AgentController.php index 5f2a0f3..f2af3fb 100644 --- a/app/Http/Controllers/Api/AgentController.php +++ b/app/Http/Controllers/Api/AgentController.php @@ -66,14 +66,14 @@ public function store(Request $request): JsonResponse // Validate brain format (provider:model) if (!str_contains($validated['brain'], ':')) { return response()->json([ - 'error' => 'Invalid brain format. Expected "provider:model" (e.g., "glm:glm-4.7")', + 'error' => 'Invalid brain format. Expected "provider:model" (e.g., "z:glm-5.1")', ], 422); } [$provider] = explode(':', $validated['brain'], 2); // Standard providers use .env keys; only check IntegrationSetting for custom providers - $standardProviders = ['anthropic', 'openai', 'gemini', 'groq', 'xai', 'openrouter', 'deepseek', 'mistral', 'ollama']; + $standardProviders = ['anthropic', 'openai', 'gemini', 'groq', 'xai', 'openrouter', 'deepseek', 'mistral', 'ollama', 'perplexity']; if (!in_array($provider, $standardProviders)) { $integration = IntegrationSetting::forWorkspace() diff --git a/app/Http/Controllers/Api/IntegrationController.php b/app/Http/Controllers/Api/IntegrationController.php index 0d48bc0..6fdbb3c 100644 --- a/app/Http/Controllers/Api/IntegrationController.php +++ b/app/Http/Controllers/Api/IntegrationController.php @@ -31,7 +31,7 @@ public function index(): \Illuminate\Http\JsonResponse $integrations = []; - // Static integrations (GLM, Telegram, Codex — no ToolProvider package) + // Static integrations (Z.AI, Telegram, Codex — no ToolProvider package) foreach ($available as $id => $info) { // Codex uses OAuth tokens, not API keys if ($id === 'codex') { @@ -199,7 +199,7 @@ public function showConfig(string $id): \Illuminate\Http\JsonResponse ]); } - // Static integrations (GLM, Telegram, chat platforms) + // Static integrations (Z.AI, Telegram, chat platforms) $available = IntegrationSetting::getAvailableIntegrations(); if (!isset($available[$id])) { return response()->json(['error' => 'Integration not found'], 404); @@ -233,7 +233,7 @@ public function showConfig(string $id): \Illuminate\Http\JsonResponse ]); } - // AI model integrations (GLM etc.) + // AI model integrations (Z.AI, Perplexity, etc.) $config = [ 'apiKey' => $setting?->getMaskedApiKey(), 'url' => $setting?->getConfigValue('url') ?? ($available[$id]['default_url'] ?? ''), @@ -331,7 +331,7 @@ public function updateConfig(Request $request, string $id): \Illuminate\Http\Jso ]); } - // Static integrations (GLM, chat platforms) + // Static integrations (Z.AI, chat platforms) $available = IntegrationSetting::getAvailableIntegrations(); if (!isset($available[$id])) { return response()->json(['error' => 'Integration not found'], 404); @@ -377,7 +377,7 @@ public function updateConfig(Request $request, string $id): \Illuminate\Http\Jso ]); } - // AI model integrations (GLM etc.) + // AI model integrations (Z.AI, Perplexity, etc.) $request->validate([ 'apiKey' => 'nullable|string', 'url' => 'nullable|string|url', @@ -564,10 +564,10 @@ public function disconnect(string $id): \Illuminate\Http\JsonResponse } /** - * Test GLM/Zhipu AI connection + * Test OpenAI-compatible AI provider connection. */ /** - * Test connection for OpenAI-compatible providers (OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, GLM, Ollama). + * Test connection for OpenAI-compatible providers (OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, Z.AI, Ollama, Perplexity). */ private function testOpenAiCompatConnection(?string $apiKey, string $url, ?string $model): \Illuminate\Http\JsonResponse { @@ -923,7 +923,7 @@ public function enabledModels(): \Illuminate\Http\JsonResponse /** * Get all available AI providers with their models for settings dropdowns. * - * Returns both integration-based providers (GLM, Codex) and prism-config + * Returns both integration-based providers (Z.AI, Codex) and prism-config * providers (Anthropic, OpenAI, etc.) with configuration status. */ public function allProviders(): \Illuminate\Http\JsonResponse @@ -1417,7 +1417,7 @@ private function fetchModelsFromProvider(string $id): array /** * Fetch models from an OpenAI-compatible /models endpoint. - * Works for: OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, GLM. + * Works for: OpenAI, DeepSeek, Groq, Mistral, xAI, OpenRouter, Z.AI, Perplexity. * * @return array */ @@ -1448,8 +1448,8 @@ private function fetchOpenAiCompatModels(string $id): array $models[$modelId] = $this->formatModelName($modelId); } - // GLM: probe flash/plus variants not listed by /models - if ($id === 'glm' || $id === 'glm-coding') { + // Z.AI: probe flash/plus variants not listed by /models + if ($id === 'z' || $id === 'z-api') { $models = $this->probeGlmVariants($models, $apiKey, $baseUrl); } @@ -1593,7 +1593,7 @@ private function getProviderCredentials(string $id): array } /** - * GLM-specific: probe flash/plus variants not listed by /models. + * Z.AI-specific: probe flash/plus variants not listed by /models. * * @param array $models * @return array @@ -1768,4 +1768,149 @@ private function testChatIntegrationConnection(string $id, Request $request): \I ], 500); } } + + // ─── Multi-Account Endpoints ──────────────────────────────── + + /** + * List all accounts for an integration. + */ + public function listAccounts(string $id): \Illuminate\Http\JsonResponse + { + $settings = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->get(); + + $accounts = $settings->map(fn (IntegrationSetting $s) => [ + 'alias' => $s->account_alias, + 'is_default' => $s->is_default, + 'enabled' => $s->enabled, + 'configured' => $s->hasValidConfig(), + ]); + + return response()->json(['accounts' => $accounts]); + } + + /** + * Create a new account for an integration. + */ + public function createAccount(Request $request, string $id): \Illuminate\Http\JsonResponse + { + $request->validate([ + 'alias' => ['required', 'string', 'max:32', 'regex:/^[a-z0-9_]+$/'], + 'config' => ['required', 'array'], + ]); + + $alias = $request->input('alias'); + + $exists = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->exists(); + + if ($exists) { + return response()->json(['error' => "Account '{$alias}' already exists."], 422); + } + + $hasOthers = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->exists(); + + $setting = IntegrationSetting::create([ + 'id' => Str::uuid()->toString(), + 'workspace_id' => workspace()->id, + 'integration_id' => $id, + 'account_alias' => $alias, + 'config' => $request->input('config'), + 'enabled' => true, + 'is_default' => ! $hasOthers, + ]); + + return response()->json([ + 'alias' => $setting->account_alias, + 'is_default' => $setting->is_default, + ], 201); + } + + /** + * Update an account's config. + */ + public function updateAccount(Request $request, string $id, string $alias): \Illuminate\Http\JsonResponse + { + $setting = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->first(); + + if (! $setting) { + return response()->json(['error' => 'Account not found.'], 404); + } + + $config = $setting->config ?? []; + foreach ($request->input('config', []) as $key => $value) { + if (is_string($value) && str_contains($value, '*')) { + continue; // Skip masked values + } + $config[$key] = $value; + } + $setting->config = $config; + $setting->save(); + + return response()->json(['success' => true]); + } + + /** + * Delete an account. + */ + public function deleteAccount(string $id, string $alias): \Illuminate\Http\JsonResponse + { + if ($alias === '') { + return response()->json(['error' => 'Cannot delete the default account.'], 422); + } + + $setting = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->first(); + + if (! $setting) { + return response()->json(['error' => 'Account not found.'], 404); + } + + $wasDefault = $setting->is_default; + $setting->delete(); + + // If we deleted the default, promote the remaining default (empty alias) row + if ($wasDefault) { + IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', '') + ->update(['is_default' => true]); + } + + return response()->json(['success' => true]); + } + + /** + * Set an account as the default. + */ + public function setDefaultAccount(string $id, string $alias): \Illuminate\Http\JsonResponse + { + $setting = IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->where('account_alias', $alias) + ->first(); + + if (! $setting) { + return response()->json(['error' => 'Account not found.'], 404); + } + + // Clear is_default on all accounts for this integration + IntegrationSetting::forWorkspace() + ->where('integration_id', $id) + ->update(['is_default' => false]); + + $setting->update(['is_default' => true]); + + return response()->json(['success' => true]); + } } diff --git a/app/Models/IntegrationSetting.php b/app/Models/IntegrationSetting.php index 18243b0..ec33b8a 100644 --- a/app/Models/IntegrationSetting.php +++ b/app/Models/IntegrationSetting.php @@ -10,6 +10,8 @@ * @property array $config * @property bool $enabled * @property string $integration_id + * @property string $account_alias + * @property bool $is_default */ class IntegrationSetting extends Model { @@ -23,8 +25,10 @@ class IntegrationSetting extends Model 'id', 'workspace_id', 'integration_id', + 'account_alias', 'config', 'enabled', + 'is_default', ]; protected function casts(): array @@ -32,9 +36,57 @@ protected function casts(): array return [ 'config' => 'encrypted:array', 'enabled' => 'boolean', + 'is_default' => 'boolean', ]; } + /** + * Scope to a specific account alias. + * + * Null or empty string targets the default (un-aliased) account. + * + * @param \Illuminate\Database\Eloquent\Builder $query + * @return \Illuminate\Database\Eloquent\Builder + */ + public function scopeForAccount($query, ?string $account): self + { + $alias = ($account === null || $account === '') ? '' : $account; + + return $query->where('account_alias', $alias); + } + + /** + * Scope to the default account (is_default = true or the un-aliased row). + * + * @param \Illuminate\Database\Eloquent\Builder $query + * @return \Illuminate\Database\Eloquent\Builder + */ + public function scopeDefault($query): self + { + return $query->where(function ($q) { + $q->where('is_default', true)->orWhere('account_alias', ''); + }); + } + + /** + * Get all non-default account aliases for an integration in the current workspace. + * + * @return list + */ + public static function getAccountsFor(string $integrationId): array + { + $query = app()->bound('currentWorkspace') + ? static::forWorkspace() + : static::query(); + + return $query + ->where('integration_id', $integrationId) + ->where('account_alias', '!=', '') + ->pluck('account_alias') + ->values() + ->all(); + } + /** * Get a specific config value */ diff --git a/app/Models/McpServer.php b/app/Models/McpServer.php index 548d710..0455cd0 100644 --- a/app/Models/McpServer.php +++ b/app/Models/McpServer.php @@ -19,6 +19,7 @@ * @property string|null $description * @property string $name * @property string $slug + * @property string $account_alias * @property \Carbon\Carbon|null $tools_discovered_at * @property \Carbon\Carbon|null $created_at * @property \Carbon\Carbon|null $updated_at @@ -36,6 +37,7 @@ class McpServer extends Model 'workspace_id', 'name', 'slug', + 'account_alias', 'url', 'auth_type', 'auth_config', @@ -62,6 +64,25 @@ protected function casts(): array ]; } + /** + * Get all non-default account aliases for an MCP server slug in the current workspace. + * + * @return list + */ + public static function getAccountsFor(string $slug): array + { + try { + return static::forWorkspace() + ->where('slug', $slug) + ->where('account_alias', '!=', '') + ->pluck('account_alias') + ->values() + ->all(); + } catch (\Throwable) { + return []; + } + } + /** * Get prefixed tool slugs from cached discovered tools. * diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index a9f8846..fe55570 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -80,11 +80,11 @@ public function boot(): void }); } - // Custom Prism providers (GLM, Kimi, MiniMax) are registered by + // Custom Prism providers (Z.AI, Kimi, MiniMax) are registered by // PrismRelayServiceProvider via afterResolving(PrismManager::class). - // Register 'glm' and 'glm-coding' as custom AI SDK drivers. - // These use GlmPrismGateway which routes to our custom 'glm' Prism provider + // Register custom AI SDK drivers. + // These use GlmPrismGateway which routes to the matching Prism provider // (chat/completions) instead of the default OpenAI provider (/responses). // Use afterResolving because AiManager is scoped (recreated per job in queue workers). $this->app->afterResolving(AiManager::class, function (AiManager $aiManager, $app) { @@ -96,8 +96,8 @@ public function boot(): void ); }; - $aiManager->extend('glm', $createGlmDriver); - $aiManager->extend('glm-coding', $createGlmDriver); + $aiManager->extend('z', $createGlmDriver); + $aiManager->extend('z-api', $createGlmDriver); $aiManager->extend('kimi', $createGlmDriver); $aiManager->extend('kimi-coding', $createGlmDriver); $aiManager->extend('minimax', $createGlmDriver); diff --git a/app/Services/AgentChatService.php b/app/Services/AgentChatService.php index 3c20f0e..892830a 100644 --- a/app/Services/AgentChatService.php +++ b/app/Services/AgentChatService.php @@ -16,11 +16,11 @@ public function __construct( public function respond(User $agent, string $channelId, string $userMessage): string { - // Parse agent's brain setting (e.g., 'glm-coding:glm-4.7') - $brain = $agent->brain ?? 'glm-coding:glm-4.7'; + // Parse agent's brain setting (e.g., 'z:glm-5.1') + $brain = $agent->brain ?? 'z:glm-5.1'; $parts = explode(':', $brain, 2); $provider = $parts[0]; - $model = $parts[1] ?? 'glm-4.7'; + $model = $parts[1] ?? 'glm-5.1'; // Verify integration is enabled and get config $integration = IntegrationSetting::where('workspace_id', $agent->workspace_id) @@ -60,15 +60,15 @@ public function respond(User $agent, string $channelId, string $userMessage): st $systemPrompt = $this->buildSystemPrompt($agent); // Call the configured AI model via HTTP - return $this->callGlmApi($apiKey, $baseUrl, $model, $systemPrompt, $messages); + return $this->callChatCompletionApi($apiKey, $baseUrl, $model, $systemPrompt, $messages); } /** - * Call GLM/Zhipu AI API directly + * Call an OpenAI-compatible chat completions API directly. * * @param array> $messages */ - private function callGlmApi(string $apiKey, string $baseUrl, string $model, string $systemPrompt, array $messages): string + private function callChatCompletionApi(string $apiKey, string $baseUrl, string $model, string $systemPrompt, array $messages): string { // Prepend system message $apiMessages = [ @@ -99,8 +99,8 @@ private function callGlmApi(string $apiKey, string $baseUrl, string $model, stri private function getDefaultUrl(string $provider): string { return match ($provider) { - 'glm' => 'https://open.bigmodel.cn/api/paas/v4', - 'glm-coding' => 'https://api.z.ai/api/coding/paas/v4', + 'z-api' => 'https://open.bigmodel.cn/api/paas/v4', + 'z' => 'https://api.z.ai/api/coding/paas/v4', default => throw new \Exception("Unknown provider: {$provider}"), }; } diff --git a/app/Services/IntegrationSettingCredentialResolver.php b/app/Services/IntegrationSettingCredentialResolver.php index 5b49c63..1a5c7db 100644 --- a/app/Services/IntegrationSettingCredentialResolver.php +++ b/app/Services/IntegrationSettingCredentialResolver.php @@ -9,17 +9,30 @@ class IntegrationSettingCredentialResolver implements CredentialResolver { public function get(string $integration, string $key, mixed $default = null, ?string $account = null): mixed { - // OpenCompany uses workspace-scoped settings; account parameter is ignored - // (each workspace has one set of credentials per integration). - $setting = app()->bound('currentWorkspace') - ? IntegrationSetting::forWorkspace()->where('integration_id', $integration)->first() - : IntegrationSetting::where('integration_id', $integration)->first(); + $setting = $this->findSetting($integration, $account); return $setting?->getConfigValue($key, $default) ?? $default; } public function isConfigured(string $integration, ?string $account = null): bool { - return ! empty($this->get($integration, 'api_key')); + return ! empty($this->get($integration, 'api_key', null, $account)); + } + + public function getAccounts(string $integration): array + { + return IntegrationSetting::getAccountsFor($integration); + } + + private function findSetting(string $integration, ?string $account): ?IntegrationSetting + { + $query = app()->bound('currentWorkspace') + ? IntegrationSetting::forWorkspace() + : IntegrationSetting::query(); + + return $query + ->where('integration_id', $integration) + ->forAccount($account) + ->first(); } } diff --git a/app/Services/LuaApiDocGenerator.php b/app/Services/LuaApiDocGenerator.php index f9bc19c..07574fe 100644 --- a/app/Services/LuaApiDocGenerator.php +++ b/app/Services/LuaApiDocGenerator.php @@ -3,7 +3,9 @@ namespace App\Services; use App\Agents\Tools\ToolRegistry; +use App\Models\McpServer; use App\Models\User; +use OpenCompany\IntegrationCore\Contracts\CredentialResolver; use OpenCompany\IntegrationCore\Lua\LuaCatalogBuilder; use OpenCompany\IntegrationCore\Lua\LuaDocRenderer; use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; @@ -68,8 +70,33 @@ private function buildNamespaces(User $agent): array return $this->cachedNamespaces; } + $catalog = $this->registry->getToolCatalog($agent); + + // Inject account aliases for multi-account integrations and MCP servers + $credentialResolver = app(CredentialResolver::class); + foreach ($catalog as &$app) { + $appName = $app['name'] ?? ''; + if (empty($appName) || empty($app['isIntegration'])) { + continue; + } + + if (str_starts_with($appName, 'mcp_')) { + // MCP server — look up accounts by slug + $mcpSlug = substr($appName, 4); + $accounts = McpServer::getAccountsFor($mcpSlug); + } else { + // Regular integration — look up accounts via credential resolver + $accounts = $credentialResolver->getAccounts($appName); + } + + if ($accounts !== []) { + $app['accounts'] = $accounts; + } + } + unset($app); + $this->cachedNamespaces = $this->catalogBuilder->buildNamespaces( - $this->registry->getToolCatalog($agent), + $catalog, ['tasks', 'system', 'lua'], ); $this->cachedAgent = $agent; @@ -93,6 +120,14 @@ public function buildParameterMap(User $agent): array return $this->catalogBuilder->buildParameterMap($this->buildNamespaces($agent)); } + /** + * @return array path => accountAlias (only for multi-account function paths) + */ + public function buildAccountMap(User $agent): array + { + return $this->catalogBuilder->buildAccountMap($this->buildNamespaces($agent)); + } + /** * @return list */ @@ -114,6 +149,11 @@ private function getProviderLuaDocs(string $namespace): ?string ? substr($namespace, strlen('integrations.')) : $namespace; + // Strip account segment for multi-account namespaces (e.g., "clickup.work" → "clickup") + if ($this->providerRegistry->get($appName) === null && str_contains($appName, '.')) { + $appName = explode('.', $appName, 2)[0]; + } + $provider = $this->providerRegistry->get($appName); if ($provider === null) { return null; diff --git a/app/Services/LuaBridge.php b/app/Services/LuaBridge.php index f982786..b732666 100644 --- a/app/Services/LuaBridge.php +++ b/app/Services/LuaBridge.php @@ -19,6 +19,7 @@ public function __construct( $docGenerator->buildFunctionMap($agent), $docGenerator->buildParameterMap($agent), new OpenCompanyLuaToolInvoker($agent, $registry), + $docGenerator->buildAccountMap($agent), ); } diff --git a/app/Services/Mcp/McpServerRegistrar.php b/app/Services/Mcp/McpServerRegistrar.php index 33d5808..4e0b5b1 100644 --- a/app/Services/Mcp/McpServerRegistrar.php +++ b/app/Services/Mcp/McpServerRegistrar.php @@ -9,6 +9,10 @@ class McpServerRegistrar { /** * Register all enabled MCP servers as ToolProviders in the registry. + * + * Servers sharing the same slug are grouped: the default (account_alias='') + * provides the canonical tool definitions, and additional accounts are + * registered on the same provider for multi-account namespace support. */ public static function registerAll(ToolProviderRegistry $registry): void { @@ -17,12 +21,40 @@ public static function registerAll(ToolProviderRegistry $registry): void ->whereNotNull('discovered_tools') ->get(); } catch (\Throwable) { - // Table may not exist yet (fresh install / migration pending) return; } + // Group by slug — default account first + $grouped = []; foreach ($servers as $server) { - $registry->register(new McpToolProvider($server)); + $grouped[$server->slug][] = $server; + } + + foreach ($grouped as $slug => $group) { + // Find the default server (account_alias = '') + $default = null; + $accounts = []; + + foreach ($group as $server) { + if ($server->account_alias === '' || $server->account_alias === null) { + $default = $server; + } else { + $accounts[$server->account_alias] = $server; + } + } + + // Fall back to first server if no explicit default + if ($default === null) { + $default = $group[0]; + } + + $provider = new McpToolProvider($default); + + foreach ($accounts as $alias => $accountServer) { + $provider->addAccountServer($alias, $accountServer); + } + + $registry->register($provider); } } } diff --git a/app/Services/Mcp/McpToolProvider.php b/app/Services/Mcp/McpToolProvider.php index 0285c89..cb6e433 100644 --- a/app/Services/Mcp/McpToolProvider.php +++ b/app/Services/Mcp/McpToolProvider.php @@ -9,10 +9,21 @@ class McpToolProvider implements ToolProvider { + /** @var array account_alias => server */ + private array $accountServers = []; + public function __construct( private McpServer $server, ) {} + /** + * Register an additional account server for this provider. + */ + public function addAccountServer(string $account, McpServer $server): void + { + $this->accountServers[$account] = $server; + } + public function appName(): string { return 'mcp_' . $this->server->slug; @@ -57,29 +68,48 @@ public function isIntegration(): bool /** @param array $context */ public function createTool(string $class, array $context = []): Tool { + $account = $context['account'] ?? null; + $server = $this->resolveServer($account); + $toolSlug = $context['tool_slug'] ?? ''; $mcpToolName = $this->mcpToolNameFromSlug($toolSlug); - $mcpToolDef = $this->findToolDef($mcpToolName); + $mcpToolDef = $this->findToolDef($mcpToolName, $server); return new McpProxyTool( - server: $this->server, + server: $server, mcpToolName: $mcpToolName, mcpToolDescription: $mcpToolDef['description'] ?? '', mcpInputSchema: $mcpToolDef['inputSchema'] ?? [], ); } + public function luaDocsPath(): ?string + { + return null; + } + + public function credentialFields(): array + { + return []; + } + /** - * Build tool slug: mcp_{server_slug}__{tool_name_snake} + * Resolve the server for the given account alias. */ + private function resolveServer(?string $account): McpServer + { + if ($account !== null && $account !== '' && isset($this->accountServers[$account])) { + return $this->accountServers[$account]; + } + + return $this->server; + } + private function toolSlug(string $mcpToolName): string { return 'mcp_' . $this->server->slug . '__' . Str::snake($mcpToolName); } - /** - * Extract MCP tool name from slug. - */ private function mcpToolNameFromSlug(string $slug): string { $prefix = 'mcp_' . $this->server->slug . '__'; @@ -92,23 +122,13 @@ private function mcpToolNameFromSlug(string $slug): string } /** - * Find a tool definition by MCP tool name from cached discovered_tools. - * * @return array */ - public function luaDocsPath(): ?string - { - return null; - } - - public function credentialFields(): array + private function findToolDef(string $mcpToolName, ?McpServer $server = null): array { - return []; // MCP servers handle their own credentials - } + $tools = ($server ?? $this->server)->discovered_tools ?? []; - private function findToolDef(string $mcpToolName): array - { - foreach ($this->server->discovered_tools ?? [] as $tool) { + foreach ($tools as $tool) { if (Str::snake($tool['name']) === $mcpToolName || $tool['name'] === $mcpToolName) { return $tool; } diff --git a/app/Services/OpenCompanyLuaToolInvoker.php b/app/Services/OpenCompanyLuaToolInvoker.php index 17b9723..518007f 100644 --- a/app/Services/OpenCompanyLuaToolInvoker.php +++ b/app/Services/OpenCompanyLuaToolInvoker.php @@ -15,9 +15,9 @@ public function __construct( private ToolRegistry $registry, ) {} - public function invoke(string $toolSlug, array $args): mixed + public function invoke(string $toolSlug, array $args, ?string $account = null): mixed { - $tool = $this->registry->instantiateToolBySlug($toolSlug, $this->agent); + $tool = $this->registry->instantiateToolBySlug($toolSlug, $this->agent, $account); if ($tool === null) { throw new \RuntimeException("Tool not available: {$toolSlug}"); diff --git a/composer.json b/composer.json index e06fc54..a898928 100644 --- a/composer.json +++ b/composer.json @@ -8,7 +8,11 @@ "repositories": [ { "type": "path", - "url": "../integrations/*" + "url": "../integrations/core" + }, + { + "type": "path", + "url": "../integrations/packages/*" }, { "type": "path", diff --git a/composer.lock b/composer.lock index b57ec05..a1625da 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "f29a59c5bc95a3bf57a5724035d3ab86", + "content-hash": "5ae411008f59c8b4bd43daa3ca679e65", "packages": [ { "name": "brick/math", @@ -4182,7 +4182,7 @@ "dist": { "type": "path", "url": "tmp/prism-relay", - "reference": "b0ee49538c5d9ce13e9d6c55d0161a6e9c881ad3" + "reference": "0126bda0e558679c90cba4455bce300837839ea8" }, "require": { "php": "^8.2", @@ -4440,16 +4440,16 @@ }, { "name": "prism-php/prism", - "version": "v0.99.21", + "version": "v0.99.22", "source": { "type": "git", "url": "https://github.com/prism-php/prism.git", - "reference": "95272567629a62831294f63b1b927b1e2e608daf" + "reference": "989f67567aef69c613eae6e932d615fb96e2f5d7" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/prism-php/prism/zipball/95272567629a62831294f63b1b927b1e2e608daf", - "reference": "95272567629a62831294f63b1b927b1e2e608daf", + "url": "https://api.github.com/repos/prism-php/prism/zipball/989f67567aef69c613eae6e932d615fb96e2f5d7", + "reference": "989f67567aef69c613eae6e932d615fb96e2f5d7", "shasum": "" }, "require": { @@ -4507,7 +4507,7 @@ "description": "A powerful Laravel package for integrating Large Language Models (LLMs) into your applications.", "support": { "issues": "https://github.com/prism-php/prism/issues", - "source": "https://github.com/prism-php/prism/tree/v0.99.21" + "source": "https://github.com/prism-php/prism/tree/v0.99.22" }, "funding": [ { @@ -4515,7 +4515,7 @@ "type": "github" } ], - "time": "2026-03-01T21:12:44+00:00" + "time": "2026-03-12T17:55:23+00:00" }, { "name": "psr/clock", @@ -5858,16 +5858,16 @@ }, { "name": "symfony/clock", - "version": "v8.0.0", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/clock.git", - "reference": "832119f9b8dbc6c8e6f65f30c5969eca1e88764f" + "reference": "b55a638b189a6faa875e0ccdb00908fb87af95b3" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/clock/zipball/832119f9b8dbc6c8e6f65f30c5969eca1e88764f", - "reference": "832119f9b8dbc6c8e6f65f30c5969eca1e88764f", + "url": "https://api.github.com/repos/symfony/clock/zipball/b55a638b189a6faa875e0ccdb00908fb87af95b3", + "reference": "b55a638b189a6faa875e0ccdb00908fb87af95b3", "shasum": "" }, "require": { @@ -5911,7 +5911,7 @@ "time" ], "support": { - "source": "https://github.com/symfony/clock/tree/v8.0.0" + "source": "https://github.com/symfony/clock/tree/v8.0.8" }, "funding": [ { @@ -5931,20 +5931,20 @@ "type": "tidelift" } ], - "time": "2025-11-12T15:46:48+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/console", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/console.git", - "reference": "e1e6770440fb9c9b0cf725f81d1361ad1835329d" + "reference": "1e92e39c51f95b88e3d66fa2d9f06d1fb45dd707" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/console/zipball/e1e6770440fb9c9b0cf725f81d1361ad1835329d", - "reference": "e1e6770440fb9c9b0cf725f81d1361ad1835329d", + "url": "https://api.github.com/repos/symfony/console/zipball/1e92e39c51f95b88e3d66fa2d9f06d1fb45dd707", + "reference": "1e92e39c51f95b88e3d66fa2d9f06d1fb45dd707", "shasum": "" }, "require": { @@ -6009,7 +6009,7 @@ "terminal" ], "support": { - "source": "https://github.com/symfony/console/tree/v7.4.7" + "source": "https://github.com/symfony/console/tree/v7.4.8" }, "funding": [ { @@ -6029,20 +6029,20 @@ "type": "tidelift" } ], - "time": "2026-03-06T14:06:20+00:00" + "time": "2026-03-30T13:54:39+00:00" }, { "name": "symfony/css-selector", - "version": "v8.0.6", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/css-selector.git", - "reference": "2a178bf80f05dbbe469a337730eba79d61315262" + "reference": "8db1c00226a94d8ab6aa89d9224eeee91e2ea2ed" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/css-selector/zipball/2a178bf80f05dbbe469a337730eba79d61315262", - "reference": "2a178bf80f05dbbe469a337730eba79d61315262", + "url": "https://api.github.com/repos/symfony/css-selector/zipball/8db1c00226a94d8ab6aa89d9224eeee91e2ea2ed", + "reference": "8db1c00226a94d8ab6aa89d9224eeee91e2ea2ed", "shasum": "" }, "require": { @@ -6078,7 +6078,7 @@ "description": "Converts CSS selectors to XPath expressions", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/css-selector/tree/v8.0.6" + "source": "https://github.com/symfony/css-selector/tree/v8.0.8" }, "funding": [ { @@ -6098,7 +6098,7 @@ "type": "tidelift" } ], - "time": "2026-02-17T13:07:04+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/deprecation-contracts", @@ -6169,16 +6169,16 @@ }, { "name": "symfony/error-handler", - "version": "v7.4.4", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/error-handler.git", - "reference": "8da531f364ddfee53e36092a7eebbbd0b775f6b8" + "reference": "8dd79d8af777ee6cba2fd4d98da6ffb839f3c0fa" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/error-handler/zipball/8da531f364ddfee53e36092a7eebbbd0b775f6b8", - "reference": "8da531f364ddfee53e36092a7eebbbd0b775f6b8", + "url": "https://api.github.com/repos/symfony/error-handler/zipball/8dd79d8af777ee6cba2fd4d98da6ffb839f3c0fa", + "reference": "8dd79d8af777ee6cba2fd4d98da6ffb839f3c0fa", "shasum": "" }, "require": { @@ -6227,7 +6227,7 @@ "description": "Provides tools to manage errors and ease debugging PHP code", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/error-handler/tree/v7.4.4" + "source": "https://github.com/symfony/error-handler/tree/v7.4.8" }, "funding": [ { @@ -6247,20 +6247,20 @@ "type": "tidelift" } ], - "time": "2026-01-20T16:42:42+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/event-dispatcher", - "version": "v8.0.4", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/event-dispatcher.git", - "reference": "99301401da182b6cfaa4700dbe9987bb75474b47" + "reference": "f662acc6ab22a3d6d716dcb44c381c6002940df6" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/event-dispatcher/zipball/99301401da182b6cfaa4700dbe9987bb75474b47", - "reference": "99301401da182b6cfaa4700dbe9987bb75474b47", + "url": "https://api.github.com/repos/symfony/event-dispatcher/zipball/f662acc6ab22a3d6d716dcb44c381c6002940df6", + "reference": "f662acc6ab22a3d6d716dcb44c381c6002940df6", "shasum": "" }, "require": { @@ -6312,7 +6312,7 @@ "description": "Provides tools that allow your application components to communicate with each other by dispatching events and listening to them", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/event-dispatcher/tree/v8.0.4" + "source": "https://github.com/symfony/event-dispatcher/tree/v8.0.8" }, "funding": [ { @@ -6332,7 +6332,7 @@ "type": "tidelift" } ], - "time": "2026-01-05T11:45:55+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/event-dispatcher-contracts", @@ -6412,16 +6412,16 @@ }, { "name": "symfony/finder", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/finder.git", - "reference": "8655bf1076b7a3a346cb11413ffdabff50c7ffcf" + "reference": "e0be088d22278583a82da281886e8c3592fbf149" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/finder/zipball/8655bf1076b7a3a346cb11413ffdabff50c7ffcf", - "reference": "8655bf1076b7a3a346cb11413ffdabff50c7ffcf", + "url": "https://api.github.com/repos/symfony/finder/zipball/e0be088d22278583a82da281886e8c3592fbf149", + "reference": "e0be088d22278583a82da281886e8c3592fbf149", "shasum": "" }, "require": { @@ -6456,7 +6456,7 @@ "description": "Finds files and directories via an intuitive fluent interface", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/finder/tree/v7.4.6" + "source": "https://github.com/symfony/finder/tree/v7.4.8" }, "funding": [ { @@ -6476,20 +6476,20 @@ "type": "tidelift" } ], - "time": "2026-01-29T09:40:50+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/http-foundation", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/http-foundation.git", - "reference": "f94b3e7b7dafd40e666f0c9ff2084133bae41e81" + "reference": "9381209597ec66c25be154cbf2289076e64d1eab" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/http-foundation/zipball/f94b3e7b7dafd40e666f0c9ff2084133bae41e81", - "reference": "f94b3e7b7dafd40e666f0c9ff2084133bae41e81", + "url": "https://api.github.com/repos/symfony/http-foundation/zipball/9381209597ec66c25be154cbf2289076e64d1eab", + "reference": "9381209597ec66c25be154cbf2289076e64d1eab", "shasum": "" }, "require": { @@ -6538,7 +6538,7 @@ "description": "Defines an object-oriented layer for the HTTP specification", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/http-foundation/tree/v7.4.7" + "source": "https://github.com/symfony/http-foundation/tree/v7.4.8" }, "funding": [ { @@ -6558,20 +6558,20 @@ "type": "tidelift" } ], - "time": "2026-03-06T13:15:18+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/http-kernel", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/http-kernel.git", - "reference": "3b3fcf386c809be990c922e10e4c620d6367cab1" + "reference": "017e76ad089bac281553389269e259e155935e1a" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/http-kernel/zipball/3b3fcf386c809be990c922e10e4c620d6367cab1", - "reference": "3b3fcf386c809be990c922e10e4c620d6367cab1", + "url": "https://api.github.com/repos/symfony/http-kernel/zipball/017e76ad089bac281553389269e259e155935e1a", + "reference": "017e76ad089bac281553389269e259e155935e1a", "shasum": "" }, "require": { @@ -6657,7 +6657,7 @@ "description": "Provides a structured process for converting a Request into a Response", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/http-kernel/tree/v7.4.7" + "source": "https://github.com/symfony/http-kernel/tree/v7.4.8" }, "funding": [ { @@ -6677,20 +6677,20 @@ "type": "tidelift" } ], - "time": "2026-03-06T16:33:18+00:00" + "time": "2026-03-31T20:57:01+00:00" }, { "name": "symfony/mailer", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/mailer.git", - "reference": "b02726f39a20bc65e30364f5c750c4ddbf1f58e9" + "reference": "f6ea532250b476bfc1b56699b388a1bdbf168f62" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/mailer/zipball/b02726f39a20bc65e30364f5c750c4ddbf1f58e9", - "reference": "b02726f39a20bc65e30364f5c750c4ddbf1f58e9", + "url": "https://api.github.com/repos/symfony/mailer/zipball/f6ea532250b476bfc1b56699b388a1bdbf168f62", + "reference": "f6ea532250b476bfc1b56699b388a1bdbf168f62", "shasum": "" }, "require": { @@ -6741,7 +6741,7 @@ "description": "Helps sending emails", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/mailer/tree/v7.4.6" + "source": "https://github.com/symfony/mailer/tree/v7.4.8" }, "funding": [ { @@ -6761,20 +6761,20 @@ "type": "tidelift" } ], - "time": "2026-02-25T16:50:00+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/mime", - "version": "v7.4.7", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/mime.git", - "reference": "da5ab4fde3f6c88ab06e96185b9922f48b677cd1" + "reference": "6df02f99998081032da3407a8d6c4e1dcb5d4379" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/mime/zipball/da5ab4fde3f6c88ab06e96185b9922f48b677cd1", - "reference": "da5ab4fde3f6c88ab06e96185b9922f48b677cd1", + "url": "https://api.github.com/repos/symfony/mime/zipball/6df02f99998081032da3407a8d6c4e1dcb5d4379", + "reference": "6df02f99998081032da3407a8d6c4e1dcb5d4379", "shasum": "" }, "require": { @@ -6830,7 +6830,7 @@ "mime-type" ], "support": { - "source": "https://github.com/symfony/mime/tree/v7.4.7" + "source": "https://github.com/symfony/mime/tree/v7.4.8" }, "funding": [ { @@ -6850,7 +6850,7 @@ "type": "tidelift" } ], - "time": "2026-03-05T15:24:09+00:00" + "time": "2026-03-30T14:11:46+00:00" }, { "name": "symfony/polyfill-ctype", @@ -7683,16 +7683,16 @@ }, { "name": "symfony/process", - "version": "v7.4.5", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/process.git", - "reference": "608476f4604102976d687c483ac63a79ba18cc97" + "reference": "60f19cd3badc8de688421e21e4305eba50f8089a" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/process/zipball/608476f4604102976d687c483ac63a79ba18cc97", - "reference": "608476f4604102976d687c483ac63a79ba18cc97", + "url": "https://api.github.com/repos/symfony/process/zipball/60f19cd3badc8de688421e21e4305eba50f8089a", + "reference": "60f19cd3badc8de688421e21e4305eba50f8089a", "shasum": "" }, "require": { @@ -7724,7 +7724,7 @@ "description": "Executes commands in sub-processes", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/process/tree/v7.4.5" + "source": "https://github.com/symfony/process/tree/v7.4.8" }, "funding": [ { @@ -7744,7 +7744,7 @@ "type": "tidelift" } ], - "time": "2026-01-26T15:07:59+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/psr-http-message-bridge", @@ -7836,16 +7836,16 @@ }, { "name": "symfony/routing", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/routing.git", - "reference": "238d749c56b804b31a9bf3e26519d93b65a60938" + "reference": "9608de9873ec86e754fb6c0a0fa7e5f1a960eb6b" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/routing/zipball/238d749c56b804b31a9bf3e26519d93b65a60938", - "reference": "238d749c56b804b31a9bf3e26519d93b65a60938", + "url": "https://api.github.com/repos/symfony/routing/zipball/9608de9873ec86e754fb6c0a0fa7e5f1a960eb6b", + "reference": "9608de9873ec86e754fb6c0a0fa7e5f1a960eb6b", "shasum": "" }, "require": { @@ -7897,7 +7897,7 @@ "url" ], "support": { - "source": "https://github.com/symfony/routing/tree/v7.4.6" + "source": "https://github.com/symfony/routing/tree/v7.4.8" }, "funding": [ { @@ -7917,7 +7917,7 @@ "type": "tidelift" } ], - "time": "2026-02-25T16:50:00+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/service-contracts", @@ -8008,16 +8008,16 @@ }, { "name": "symfony/string", - "version": "v8.0.6", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/string.git", - "reference": "6c9e1108041b5dce21a9a4984b531c4923aa9ec4" + "reference": "ae9488f874d7603f9d2dfbf120203882b645d963" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/string/zipball/6c9e1108041b5dce21a9a4984b531c4923aa9ec4", - "reference": "6c9e1108041b5dce21a9a4984b531c4923aa9ec4", + "url": "https://api.github.com/repos/symfony/string/zipball/ae9488f874d7603f9d2dfbf120203882b645d963", + "reference": "ae9488f874d7603f9d2dfbf120203882b645d963", "shasum": "" }, "require": { @@ -8074,7 +8074,7 @@ "utf8" ], "support": { - "source": "https://github.com/symfony/string/tree/v8.0.6" + "source": "https://github.com/symfony/string/tree/v8.0.8" }, "funding": [ { @@ -8094,20 +8094,20 @@ "type": "tidelift" } ], - "time": "2026-02-09T10:14:57+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/translation", - "version": "v8.0.6", + "version": "v8.0.8", "source": { "type": "git", "url": "https://github.com/symfony/translation.git", - "reference": "13ff19bcf2bea492d3c2fbeaa194dd6f4599ce1b" + "reference": "27c03ae3940de24ba2f71cfdbac824f2aa1fdf2f" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/translation/zipball/13ff19bcf2bea492d3c2fbeaa194dd6f4599ce1b", - "reference": "13ff19bcf2bea492d3c2fbeaa194dd6f4599ce1b", + "url": "https://api.github.com/repos/symfony/translation/zipball/27c03ae3940de24ba2f71cfdbac824f2aa1fdf2f", + "reference": "27c03ae3940de24ba2f71cfdbac824f2aa1fdf2f", "shasum": "" }, "require": { @@ -8167,7 +8167,7 @@ "description": "Provides tools to internationalize your application", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/translation/tree/v8.0.6" + "source": "https://github.com/symfony/translation/tree/v8.0.8" }, "funding": [ { @@ -8187,7 +8187,7 @@ "type": "tidelift" } ], - "time": "2026-02-17T13:07:04+00:00" + "time": "2026-03-30T15:14:47+00:00" }, { "name": "symfony/translation-contracts", @@ -8273,16 +8273,16 @@ }, { "name": "symfony/uid", - "version": "v7.4.4", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/uid.git", - "reference": "7719ce8aba76be93dfe249192f1fbfa52c588e36" + "reference": "6883ebdf7bf6a12b37519dbc0df62b0222401b56" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/uid/zipball/7719ce8aba76be93dfe249192f1fbfa52c588e36", - "reference": "7719ce8aba76be93dfe249192f1fbfa52c588e36", + "url": "https://api.github.com/repos/symfony/uid/zipball/6883ebdf7bf6a12b37519dbc0df62b0222401b56", + "reference": "6883ebdf7bf6a12b37519dbc0df62b0222401b56", "shasum": "" }, "require": { @@ -8327,7 +8327,7 @@ "uuid" ], "support": { - "source": "https://github.com/symfony/uid/tree/v7.4.4" + "source": "https://github.com/symfony/uid/tree/v7.4.8" }, "funding": [ { @@ -8347,20 +8347,20 @@ "type": "tidelift" } ], - "time": "2026-01-03T23:30:35+00:00" + "time": "2026-03-24T13:12:05+00:00" }, { "name": "symfony/var-dumper", - "version": "v7.4.6", + "version": "v7.4.8", "source": { "type": "git", "url": "https://github.com/symfony/var-dumper.git", - "reference": "045321c440ac18347b136c63d2e9bf28a2dc0291" + "reference": "9510c3966f749a1d1ff0059e1eabef6cc621e7fd" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/var-dumper/zipball/045321c440ac18347b136c63d2e9bf28a2dc0291", - "reference": "045321c440ac18347b136c63d2e9bf28a2dc0291", + "url": "https://api.github.com/repos/symfony/var-dumper/zipball/9510c3966f749a1d1ff0059e1eabef6cc621e7fd", + "reference": "9510c3966f749a1d1ff0059e1eabef6cc621e7fd", "shasum": "" }, "require": { @@ -8414,7 +8414,7 @@ "dump" ], "support": { - "source": "https://github.com/symfony/var-dumper/tree/v7.4.6" + "source": "https://github.com/symfony/var-dumper/tree/v7.4.8" }, "funding": [ { @@ -8434,7 +8434,7 @@ "type": "tidelift" } ], - "time": "2026-02-15T10:53:20+00:00" + "time": "2026-03-30T13:44:50+00:00" }, { "name": "tightenco/ziggy", diff --git a/config/integrations.php b/config/integrations.php index 2cadfcb..41d53c8 100644 --- a/config/integrations.php +++ b/config/integrations.php @@ -109,6 +109,16 @@ 'api_key_url' => 'https://openrouter.ai/keys', ], + 'perplexity' => [ + 'category' => 'ai-models', + 'name' => 'Perplexity', + 'description' => 'Search-native reasoning models', + 'icon' => 'ph:compass', + 'default_url' => 'https://api.perplexity.ai', + 'api_format' => 'openai_compat', + 'api_key_url' => 'https://www.perplexity.ai/settings/api', + ], + 'minimax' => [ 'category' => 'ai-models', 'name' => 'MiniMax Coding Plan', @@ -149,20 +159,20 @@ 'api_key_url' => 'https://platform.moonshot.ai/console', ], - 'glm' => [ + 'z-api' => [ 'category' => 'ai-models', - 'name' => 'GLM (Zhipu AI)', - 'description' => 'General-purpose Chinese LLM', + 'name' => 'Z.AI API', + 'description' => 'Zhipu AI standard API endpoint', 'icon' => 'ph:brain', 'default_url' => \OpenCompany\PrismRelay\Providers\Glm::URL, 'api_format' => 'openai_compat', 'api_key_url' => 'https://open.bigmodel.cn/', ], - 'glm-coding' => [ + 'z' => [ 'category' => 'ai-models', - 'name' => 'GLM Coding Plan', - 'description' => 'Specialized coding LLM via Zhipu Coding Plan', + 'name' => 'Z.AI Coding Plan', + 'description' => 'Zhipu AI coding-plan endpoint', 'icon' => 'ph:code', 'default_url' => \OpenCompany\PrismRelay\Providers\GlmCoding::URL, 'api_format' => 'openai_compat', diff --git a/config/prism.php b/config/prism.php index 1086754..45ab183 100644 --- a/config/prism.php +++ b/config/prism.php @@ -61,11 +61,17 @@ 'x_title' => env('OPENROUTER_SITE_X_TITLE', null), ], ], - // GLM 4.7 (Zhipu AI) - OpenAI-compatible API - // Coding plan uses dedicated endpoint - 'glm' => [ - 'api_key' => env('GLM_API_KEY', ''), - 'url' => env('GLM_URL', 'https://api.z.ai/api/coding/paas/v4'), + 'perplexity' => [ + 'api_key' => env('PERPLEXITY_API_KEY', ''), + 'url' => env('PERPLEXITY_URL', 'https://api.perplexity.ai'), + ], + 'z' => [ + 'api_key' => env('ZAI_API_KEY', ''), + 'url' => env('Z_URL', 'https://api.z.ai/api/coding/paas/v4'), + ], + 'z-api' => [ + 'api_key' => env('ZAI_API_KEY', ''), + 'url' => env('Z_API_URL', 'https://open.bigmodel.cn/api/paas/v4'), ], ], ]; diff --git a/database/migrations/2026_04_05_000001_add_multi_account_to_integration_settings.php b/database/migrations/2026_04_05_000001_add_multi_account_to_integration_settings.php new file mode 100644 index 0000000..a2ed49d --- /dev/null +++ b/database/migrations/2026_04_05_000001_add_multi_account_to_integration_settings.php @@ -0,0 +1,45 @@ +string('account_alias', 32)->default('')->after('integration_id'); + $table->boolean('is_default')->default(true)->after('enabled'); + + $table->dropUnique(['workspace_id', 'integration_id']); + $table->unique(['workspace_id', 'integration_id', 'account_alias']); + }); + + // MCP servers: multi-account support + Schema::table('mcp_servers', function (Blueprint $table) { + $table->string('account_alias', 32)->default('')->after('slug'); + + $table->dropUnique(['workspace_id', 'slug']); + $table->unique(['workspace_id', 'slug', 'account_alias']); + }); + } + + public function down(): void + { + Schema::table('mcp_servers', function (Blueprint $table) { + $table->dropUnique(['workspace_id', 'slug', 'account_alias']); + $table->unique(['workspace_id', 'slug']); + + $table->dropColumn('account_alias'); + }); + + Schema::table('integration_settings', function (Blueprint $table) { + $table->dropUnique(['workspace_id', 'integration_id', 'account_alias']); + $table->unique(['workspace_id', 'integration_id']); + + $table->dropColumn(['account_alias', 'is_default']); + }); + } +}; diff --git a/docs/ecosystem/integrations/README.md b/docs/ecosystem/integrations/README.md new file mode 100644 index 0000000..93db57d --- /dev/null +++ b/docs/ecosystem/integrations/README.md @@ -0,0 +1,943 @@ +# OpenCompany Integrations + +Monorepo for all [OpenCompany](https://github.com/OpenCompanyApp) integration packages. Each package exposes tools that AI agents can call — from rendering diagrams to querying APIs to managing tasks. + +Integrations are independent Composer packages built on a shared core. They work in any PHP 8.2+ application: [OpenCompany](https://github.com/OpenCompanyApp) (web), [KosmoKrator](https://github.com/OpenCompanyApp) (CLI), or your own consumer. + +## Repository Structure + +``` +core/ Shared contracts, credential abstraction, Lua bridge, registry +celestial/ Astronomy: moon phases, sunrise/sunset, planet positions, eclipses +clickup/ ClickUp project management: tasks, lists, folders, time tracking +coingecko/ CoinGecko cryptocurrency: prices, market data, trending, charts +exchangerate/ Currency exchange rates: 340+ fiat, crypto, and metal conversions +google/ Google Calendar, Gmail, Drive, Sheets, Docs, Forms, Contacts, Tasks, Analytics, Search Console +mermaid/ Mermaid diagram rendering to PNG +plantuml/ PlantUML diagram rendering to PNG +plausible/ Plausible Analytics: stats, realtime visitors, goals +ticktick/ TickTick task management with time tracking +trustmrr/ TrustMRR verified startup revenue data +typst/ Typst document rendering to PDF +vegalite/ Vega-Lite chart rendering to PNG +worldbank/ World Bank economic indicators for 200+ countries +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────┐ +│ Host Application (OpenCompany, KosmoKrator) │ +│ │ +│ ┌──────────┐ ┌───────────────────────────┐ │ +│ │ Lua VM │──▸│ LuaBridge │ │ +│ │ │ │ functionMap → tool slugs │ │ +│ │ app.integrations.mermaid.render(...) │ │ +│ └──────────┘ └────────────┬──────────────┘ │ +│ │ │ +│ ┌───────────────────────────▼──────────────┐ │ +│ │ ToolProviderRegistry │ │ +│ │ ├─ mermaid → MermaidToolProvider │ │ +│ │ ├─ plausible → PlausibleToolProvider │ │ +│ │ ├─ clickup → ClickUpToolProvider │ │ +│ │ └─ ... │ │ +│ └───────────────────────────┬──────────────┘ │ +│ │ │ +│ ┌───────────────────────────▼──────────────┐ │ +│ │ ToolProvider.createTool(class, context) │ │ +│ │ → CredentialResolver for API keys │ │ +│ │ → AgentFileStorage for file output │ │ +│ │ → Tool.execute(args) → ToolResult │ │ +│ └──────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ +``` + +**Key concepts:** + +- **Tool** — A single callable action (e.g. "render a Mermaid diagram", "list ClickUp tasks"). Implements `name()`, `description()`, `parameters()`, `execute()`. +- **ToolProvider** — Groups related tools under an app name. Declares metadata, handles tool instantiation with credentials, and optionally provides Lua documentation. +- **ToolProviderRegistry** — Singleton that collects all providers. The host queries it to discover available tools. +- **CredentialResolver** — Abstraction for API keys. The default reads from `config/ai-tools.php`; OpenCompany swaps this for encrypted database storage. +- **LuaBridge** — Routes `app.integrations.{name}.{function}(...)` calls from the Lua VM to PHP tool classes. + +## Available Integrations + +| Package | Tools | Credentials | Category | Description | +|---------|------:|-------------|----------|-------------| +| [celestial](celestial/) | 9 | None | Data | Moon phases, sunrise/sunset, planet positions, eclipses, zodiac | +| [clickup](clickup/) | 34 | API token | Productivity | Tasks, lists, folders, time tracking, docs, chat | +| [coingecko](coingecko/) | 8 | None | Data | Crypto prices, market data, trending coins, historical charts | +| [exchangerate](exchangerate/) | 5 | None | Data | 340+ currency conversions (fiat, crypto, metals) | +| [google](google/) | 117 | OAuth | Productivity | Calendar, Gmail, Drive, Sheets, Docs, Forms, Contacts, Tasks, Analytics, Search Console | +| [mermaid](mermaid/) | 1 | None | Rendering | Flowcharts, sequences, Gantt, class diagrams → PNG | +| [plantuml](plantuml/) | 1 | None | Rendering | UML class, sequence, activity, component, state → PNG | +| [plausible](plausible/) | 8 | API key | Analytics | Stats, realtime visitors, site and goal management | +| [ticktick](ticktick/) | 9 | OAuth | Productivity | Projects, tasks, time tracking (TickTick and Dida365) | +| [trustmrr](trustmrr/) | 2 | API key | Data | Verified startup revenue, MRR, growth, acquisitions | +| [typst](typst/) | 1 | None | Rendering | Reports, invoices, proposals → PDF | +| [vegalite](vegalite/) | 1 | None | Rendering | Bar, line, scatter, heatmap, boxplot charts → PNG | +| [worldbank](worldbank/) | 6 | None | Data | GDP, inflation, population for 200+ countries | + +## Installation + +Each subdirectory is an independent Composer package. In your consuming application: + +```json +{ + "repositories": [ + {"type": "path", "url": "../integrations/*"} + ], + "require": { + "opencompanyapp/integration-core": "@dev", + "opencompanyapp/integration-mermaid": "@dev", + "opencompanyapp/integration-plausible": "@dev" + } +} +``` + +Laravel auto-discovers service providers. For non-Laravel apps, use the contracts and registry directly. + +### System Dependencies + +Some rendering integrations need external tools: + +| Package | Dependency | Install | +|---------|-----------|---------| +| mermaid | `mmdc` (Mermaid CLI) | `npm install -g @mermaid-js/mermaid-cli` | +| plantuml | Java + `plantuml.jar` | Bundled in `plantuml/bin/`, needs `java` on PATH | +| typst | `typst` CLI | `brew install typst` or [typst.app](https://github.com/typst/typst) | +| vegalite | Node.js | `node` on PATH; render script bundled in `vegalite/bin/` | + +--- + +# Developer Guide + +## Building a New Integration + +This walkthrough creates a complete integration from scratch. We'll build a "Weather" integration as an example. + +### 1. Create the Package Directory + +``` +weather/ +├── composer.json +├── src/ +│ ├── WeatherServiceProvider.php +│ ├── WeatherService.php +│ ├── WeatherToolProvider.php +│ └── Tools/ +│ └── GetWeather.php +└── lua-docs/ (optional) + └── weather.md +``` + +### 2. Define `composer.json` + +```json +{ + "name": "opencompanyapp/integration-weather", + "description": "Weather data and forecasts integration for OpenCompany.", + "license": "MIT", + "authors": [ + { + "name": "OpenCompany", + "homepage": "https://github.com/OpenCompanyApp" + } + ], + "keywords": ["tools", "weather", "forecasts", "opencompany"], + "require": { + "php": "^8.2", + "opencompanyapp/integration-core": "^2.0 || @dev" + }, + "autoload": { + "psr-4": { + "OpenCompany\\Integrations\\Weather\\": "src/" + } + }, + "extra": { + "laravel": { + "providers": [ + "OpenCompany\\Integrations\\Weather\\WeatherServiceProvider" + ] + } + }, + "minimum-stability": "stable", + "prefer-stable": true +} +``` + +**Conventions:** +- Package name: `opencompanyapp/integration-{name}` +- Namespace: `OpenCompany\Integrations\{Name}\` +- If replacing an older standalone package, add a `"replace"` key: `"opencompanyapp/ai-tool-weather": "self.version"` +- Only add `illuminate/support` to `require` if you use facades like `Storage`, `Http`, `Log` directly (most API integrations don't need it) + +### 3. Create the Service Class + +The service class encapsulates all API communication. Tools call the service — they never make HTTP requests directly. + +```php +apiKey); + } + + public function getCurrent(string $location): array + { + return $this->request('GET', '/current', [ + 'location' => $location, + ]); + } + + public function getForecast(string $location, int $days = 3): array + { + return $this->request('GET', '/forecast', [ + 'location' => $location, + 'days' => $days, + ]); + } + + private function request(string $method, string $path, array $params = []): array + { + if (! $this->isConfigured()) { + throw new \RuntimeException('Weather API key is not configured.'); + } + + try { + $response = Http::withHeaders([ + 'Authorization' => "Bearer {$this->apiKey}", + 'Accept' => 'application/json', + ])->timeout(15)->get(self::BASE_URL . $path, $params); + + if (! $response->successful()) { + $error = $response->json('error') ?? $response->body(); + Log::error("Weather API error: {$method} {$path}", [ + 'status' => $response->status(), + 'error' => $error, + ]); + throw new \RuntimeException( + 'Weather API error (' . $response->status() . '): ' . $error + ); + } + + return $response->json() ?? []; + } catch (\Illuminate\Http\Client\ConnectionException $e) { + throw new \RuntimeException("Failed to connect to Weather API: {$e->getMessage()}"); + } + } +} +``` + +### 4. Create the Service Provider + +The service provider wires everything into the Laravel container and registers with the `ToolProviderRegistry`. + +```php +app->singleton(WeatherService::class, function ($app) { + $creds = $app->make(CredentialResolver::class); + + return new WeatherService( + apiKey: $creds->get('weather', 'api_key', ''), + ); + }); + } + + public function boot(): void + { + if ($this->app->bound(ToolProviderRegistry::class)) { + $this->app->make(ToolProviderRegistry::class) + ->register(new WeatherToolProvider()); + } + } +} +``` + +**Pattern notes:** +- Always register the service as a singleton — tools may be called multiple times in one request +- Always check `$this->app->bound(ToolProviderRegistry::class)` before registering — the core package may not be installed +- Use `CredentialResolver` to get API keys, never read config directly + +### 5. Create the Tool Provider + +The tool provider declares what tools are available and how to instantiate them. + +```php + 'weather, forecasts, temperature', + 'description' => 'Weather data and forecasts', + 'icon' => 'ph:cloud-sun', + 'logo' => 'ph:cloud-sun', + ]; + } + + public function tools(): array + { + return [ + 'get_weather' => [ + 'class' => GetWeather::class, + 'type' => 'read', + 'name' => 'Get Weather', + 'description' => 'Current weather for any location.', + 'icon' => 'ph:cloud-sun', + ], + 'get_forecast' => [ + 'class' => GetForecast::class, + 'type' => 'read', + 'name' => 'Get Forecast', + 'description' => 'Multi-day weather forecast.', + 'icon' => 'ph:calendar', + ], + ]; + } + + public function isIntegration(): bool + { + return true; + } + + public function createTool(string $class, array $context = []): Tool + { + return new $class(app(WeatherService::class)); + } + + public function luaDocsPath(): ?string + { + return __DIR__ . '/../lua-docs/weather.md'; + } + + public function credentialFields(): array + { + return [ + [ + 'key' => 'api_key', + 'type' => 'secret', + 'label' => 'API Key', + 'required' => true, + 'placeholder' => 'wth_...', + ], + ]; + } +} +``` + +**`tools()` array keys:** +- `class` — Fully-qualified class name of the Tool implementation +- `type` — `'read'` (fetches data) or `'write'` (creates/modifies/deletes) +- `name` — Human-readable display name +- `description` — Short description for listings and UI cards +- `icon` — [Iconify](https://icon-sets.iconify.design/) identifier (we use the `ph:` Phosphor set) + +**`createTool()` context:** +- The `$context` array is injected by the host application at runtime +- In OpenCompany: `['agent' => User, 'timezone' => 'Europe/Amsterdam']` +- In KosmoKrator: `['account' => 'default']` +- Use it to pass runtime dependencies without coupling to specific models + +### 6. Create Tool Classes + +Each tool is a single callable action. + +```php + [ + 'type' => 'string', + 'required' => true, + 'description' => 'City name, address, or coordinates (e.g. "Amsterdam", "51.5,-0.1").', + ], + 'units' => [ + 'type' => 'string', + 'enum' => ['metric', 'imperial'], + 'description' => 'Unit system (default: metric).', + ], + ]; + } + + public function execute(array $args): ToolResult + { + $location = $args['location'] ?? ''; + if (empty($location)) { + return ToolResult::error('Location is required.'); + } + + try { + $data = $this->service->getCurrent($location); + + return ToolResult::success($data); + } catch (\Throwable $e) { + return ToolResult::error($e->getMessage()); + } + } +} +``` + +**Parameter types:** `string`, `integer`, `number`, `boolean`, `array`, `object` + +**Optional parameter keys:** +- `required` — `true` if the parameter must be provided (default `false`) +- `description` — Shown in generated Lua docs and tool catalogs +- `enum` — Array of allowed string values +- `items` — Element type for arrays, e.g. `['type' => 'string']` +- `properties` — Sub-property definitions for objects +- `default` — Default value if not provided + +**ToolResult patterns:** +```php +// Success with data (array or string) +return ToolResult::success(['temperature' => 22, 'unit' => 'C']); +return ToolResult::success('The current temperature is 22C.'); + +// Success with metadata (files created, timing info, etc.) +return ToolResult::success($data, ['files' => [$fileInfo]]); + +// Error +return ToolResult::error('Location not found.'); +``` + +--- + +## Integration Types + +The codebase has four distinct integration patterns. Pick the one that matches your use case. + +### Type A: Public API (No Credentials) + +For APIs that don't require authentication: exchangerate, worldbank, coingecko, celestial. + +```php +// ToolProvider +public function credentialFields(): array +{ + return []; // No credentials needed +} + +// ServiceProvider — no credential resolver needed +public function register(): void +{ + $this->app->singleton(MyService::class); +} +``` + +### Type B: API Key Authentication + +For services that need an API key: plausible, trustmrr. + +```php +// ServiceProvider — inject credentials +$this->app->singleton(MyService::class, function ($app) { + $creds = $app->make(CredentialResolver::class); + + return new MyService( + apiKey: $creds->get('myservice', 'api_key', ''), + baseUrl: $creds->get('myservice', 'url', 'https://api.example.com'), + ); +}); + +// ToolProvider +public function credentialFields(): array +{ + return [ + ['key' => 'api_key', 'type' => 'secret', 'label' => 'API Key', 'required' => true], + ['key' => 'url', 'type' => 'url', 'label' => 'Base URL', 'default' => 'https://api.example.com'], + ]; +} +``` + +### Type C: OAuth Authentication + +For services requiring OAuth flows: clickup, ticktick, google. + +These integrations register OAuth routes in their service provider and include a controller: + +```php +// ServiceProvider boot() +Route::prefix('api/integrations/myservice/oauth')->group(function () { + Route::get('authorize', [MyOAuthController::class, 'authorize']); + Route::get('callback', [MyOAuthController::class, 'callback']); +}); + +// ToolProvider credentialFields +public function credentialFields(): array +{ + return [ + ['key' => 'client_id', 'type' => 'string', 'label' => 'Client ID', 'required' => true], + ['key' => 'client_secret', 'type' => 'secret', 'label' => 'Client Secret', 'required' => true], + ['key' => 'access_token', 'type' => 'oauth', 'label' => 'Connect Account'], + ]; +} +``` + +### Type D: Rendering / File Output + +For tools that produce files (images, PDFs): mermaid, plantuml, typst, vegalite. + +These use the `AgentFileStorage` contract to save output files: + +```php +// ToolProvider — inject file storage +public function createTool(string $class, array $context = []): Tool +{ + $fileStorage = app()->bound(AgentFileStorage::class) + ? app(AgentFileStorage::class) + : null; + + return new $class( + app(MyRenderService::class), + $fileStorage, + $context['agent'] ?? null, + ); +} + +// Tool — use file storage if available, fall back to public disk +public function execute(array $args): ToolResult +{ + $bytes = $this->service->renderToBytes($input); + + if ($this->fileStorage && $this->agent) { + $result = $this->fileStorage->saveFile( + $this->agent, 'output.png', $bytes, 'image/png', 'myrenderer' + ); + return ToolResult::success("![Title]({$result['url']})"); + } + + $url = $this->service->render($input); // saves to public disk + return ToolResult::success("![Title]({$url})"); +} +``` + +--- + +## Making an Integration Configurable + +To add a settings UI in OpenCompany, implement `ConfigurableIntegration` alongside `ToolProvider`: + +```php +use OpenCompany\IntegrationCore\Contracts\ConfigurableIntegration; +use OpenCompany\IntegrationCore\Contracts\ToolProvider; + +class WeatherToolProvider implements ToolProvider, ConfigurableIntegration +{ + // ... ToolProvider methods ... + + public function integrationMeta(): array + { + return [ + 'name' => 'Weather', + 'description' => 'Weather data and forecasts for any location', + 'icon' => 'ph:cloud-sun', + 'logo' => 'ph:cloud-sun', + 'category' => 'data', // data, productivity, analytics, rendering + 'badge' => 'New', // optional badge text + 'docs_url' => 'https://...', // optional external docs link + ]; + } + + public function configSchema(): array + { + return [ + [ + 'key' => 'api_key', + 'type' => 'secret', + 'label' => 'API Key', + 'placeholder' => 'wth_...', + 'hint' => 'Get your key at weather.example.', + 'required' => true, + ], + [ + 'key' => 'units', + 'type' => 'select', + 'label' => 'Default Units', + 'options' => ['metric' => 'Metric (C, km/h)', 'imperial' => 'Imperial (F, mph)'], + 'default' => 'metric', + ], + ]; + } + + public function testConnection(array $config): array + { + try { + // Make a lightweight API call to verify credentials + $response = Http::withHeaders([ + 'Authorization' => "Bearer {$config['api_key']}", + ])->timeout(10)->get('https://api.weather.example/v1/ping'); + + if ($response->successful()) { + return ['success' => true, 'message' => 'Connected to Weather API.']; + } + + return ['success' => false, 'error' => 'Invalid API key.']; + } catch (\Exception $e) { + return ['success' => false, 'error' => $e->getMessage()]; + } + } + + public function validationRules(): array + { + return [ + 'api_key' => 'nullable|string', + 'units' => 'nullable|in:metric,imperial', + ]; + } +} +``` + +**Config field types:** +- `secret` — Masked input, stored encrypted +- `text` / `string` — Plain text input +- `url` — URL input with format validation +- `select` — Dropdown, requires `options` array +- `string_list` — Dynamic list of strings (e.g. site IDs) +- `oauth_connect` — OAuth connection button, requires `authorize_url` and `redirect_uri` + +**Conditional fields** — Show a field only when another field has a specific value: +```php +[ + 'key' => 'workspace_id', + 'type' => 'text', + 'label' => 'Workspace ID', + 'visible_when' => ['field' => 'mode', 'value' => 'workspace'], +] +``` + +--- + +## Lua Documentation + +Agents discover tools through auto-generated Lua API docs. The `LuaDocRenderer` and `LuaCatalogBuilder` in core handle this automatically based on your `parameters()` and `description()` definitions. + +For complex integrations, add a `lua-docs/{name}.md` file with supplementary documentation — workflows, examples, and gotchas that aren't captured by the parameter reference. + +### Writing Lua Docs + +```markdown +## Common Workflows + +### Get current weather and format it + +```lua +local weather = app.integrations.weather.get({location = "Amsterdam"}) +local forecast = app.integrations.weather.forecast({location = "Amsterdam", days = 3}) +``` + +### Batch lookups + +```lua +local cities = {"Amsterdam", "London", "Tokyo"} +for _, city in ipairs(cities) do + local w = app.integrations.weather.get({location = city}) + -- process results +end +``` + +## Notes + +- Locations accept city names, addresses, or lat/lng coordinates +- Rate limit: 60 requests per minute +- Temperature is in Celsius by default (use `units = "imperial"` for Fahrenheit) +``` + +Point to the file in your tool provider: + +```php +public function luaDocsPath(): ?string +{ + return __DIR__ . '/../lua-docs/weather.md'; +} +``` + +### How Lua Routing Works + +The `LuaCatalogBuilder` transforms your tool definitions into a Lua namespace tree: + +``` +app.integrations.weather.get({location = "Amsterdam"}) +│ │ │ │ +│ │ │ └─ Function name (derived from tool name, minus app name) +│ │ └─ App name (from ToolProvider::appName()) +│ └─ "integrations." prefix (added when isIntegration() returns true) +└─ Root namespace +``` + +The `LuaBridge` then: +1. Looks up the function path in its `functionMap` to find the tool slug +2. Maps positional arguments to named parameters via `parameterMap` +3. Delegates to `LuaToolInvoker::invoke()` which instantiates and executes the tool +4. Logs the call (path, duration, status, error) for observability +5. Suggests similar functions on typos ("Did you mean: ...") + +--- + +## Core Contracts Reference + +### `Tool` + +The fundamental unit of work. Every tool implements this interface. + +```php +interface Tool +{ + public function name(): string; // Slug for routing (e.g. 'get_weather') + public function description(): string; // Shown in docs and catalogs + public function parameters(): array; // Parameter definitions + public function execute(array $args): ToolResult; +} +``` + +### `ToolProvider` + +Groups tools under an app, handles instantiation. + +```php +interface ToolProvider +{ + public function appName(): string; // Unique identifier + public function appMeta(): array; // UI metadata + public function tools(): array; // Tool definitions + public function isIntegration(): bool; // Toggleable per agent? + public function createTool(string $class, array $context = []): Tool; + public function luaDocsPath(): ?string; // Supplementary docs + public function credentialFields(): array; // Required credentials +} +``` + +### `CredentialResolver` + +Abstracts credential storage. The host application binds its own implementation. + +```php +interface CredentialResolver +{ + public function get(string $integration, string $key, mixed $default = null, ?string $account = null): mixed; + public function isConfigured(string $integration, ?string $account = null): bool; +} +``` + +The `$account` parameter supports multi-account setups (e.g. "work" and "personal" Google accounts). + +### `ConfigurableIntegration` + +Optional. Adds a settings UI for the integration in OpenCompany. + +```php +interface ConfigurableIntegration +{ + public function integrationMeta(): array; // Name, description, icon, category + public function configSchema(): array; // Form field definitions + public function testConnection(array $config): array; // Verify credentials + public function validationRules(): array; // Laravel validation rules +} +``` + +### `AgentFileStorage` + +Allows tools to save files into the agent's workspace without coupling to the host's file system. + +```php +interface AgentFileStorage +{ + public function saveFile( + object $agent, + string $filename, + string $content, + string $mimeType, + ?string $subfolder = null, + ): array; // Returns ['id' => ..., 'path' => ..., 'url' => ...] +} +``` + +### `LuaToolInvoker` + +Host-side adapter for executing tools from the Lua bridge. + +```php +interface LuaToolInvoker +{ + public function invoke(string $toolSlug, array $args): mixed; + public function getToolMeta(string $toolSlug): array; +} +``` + +### `ToolResult` + +Value object returned by all tool executions. + +```php +$result = ToolResult::success($data); // Success with data +$result = ToolResult::success($data, $meta); // Success with metadata +$result = ToolResult::error('Something failed'); // Error + +$result->succeeded(); // bool +$result->data; // mixed — string, array, or any serializable value +$result->error; // ?string +$result->meta; // array — files, timing, etc. +$result->toString(); // String representation for legacy consumers +``` + +--- + +## Credential Management + +### For Standalone Laravel Apps + +The default `ConfigCredentialResolver` reads from `config/ai-tools.php`: + +```php +// config/ai-tools.php +return [ + 'weather' => [ + 'api_key' => env('WEATHER_API_KEY'), + ], + 'plausible' => [ + 'api_key' => env('PLAUSIBLE_API_KEY'), + 'url' => env('PLAUSIBLE_URL', 'https://plausible.io'), + ], + // Multi-account example + 'gmail' => [ + 'work' => ['api_key' => env('GMAIL_WORK_KEY')], + 'personal' => ['api_key' => env('GMAIL_PERSONAL_KEY')], + ], +]; +``` + +### Custom Credential Storage + +Bind your own `CredentialResolver` implementation: + +```php +// In your AppServiceProvider +$this->app->singleton( + \OpenCompany\IntegrationCore\Contracts\CredentialResolver::class, + \App\Services\DatabaseCredentialResolver::class, +); +``` + +--- + +## Static Analysis + +Packages that include a `phpstan.neon` are configured for [Larastan](https://github.com/larastan/larastan) level 5: + +```neon +includes: + - vendor/larastan/larastan/extension.neon + +parameters: + paths: + - src/ + level: 5 +``` + +Run from any package directory: + +```console +cd mermaid && ../vendor/bin/phpstan analyse +``` + +--- + +## Contributing + +### Adding a New Integration + +1. Create a new directory following the structure above +2. Implement `ToolProvider` (and optionally `ConfigurableIntegration`) +3. Create your service class and tool classes +4. Add lua-docs if the integration has non-obvious workflows +5. Add a `phpstan.neon` and ensure level 5 passes +6. Update this README's structure listing and integrations table + +### Conventions + +- **Naming**: Package directories and `appName()` are lowercase kebab/snake. Namespaces are PascalCase. +- **Icons**: Use [Phosphor Icons](https://icon-sets.iconify.design/ph/) (`ph:` prefix). +- **Tool types**: Use `'read'` for tools that fetch data, `'write'` for tools that create, modify, or delete. +- **Parameter names**: Always `snake_case`. +- **Error handling**: Tools should catch exceptions and return `ToolResult::error()` — never let exceptions bubble out of `execute()`. +- **Service isolation**: Tools call service methods. Services make HTTP requests. Tools never make HTTP requests directly. +- **No hardcoded config**: Always use `CredentialResolver` for API keys and endpoints. Never read `config()` or `env()` directly in tool or service classes. + +### Checklist for New Integrations + +- [ ] `composer.json` with correct package name, namespace, and Laravel provider auto-discovery +- [ ] Service class encapsulating all API communication +- [ ] Service provider with singleton service registration and `ToolProviderRegistry` boot +- [ ] Tool provider implementing `ToolProvider` (and `ConfigurableIntegration` if credentials are needed) +- [ ] Tool classes with clear `description()`, typed `parameters()`, and `ToolResult` returns +- [ ] `credentialFields()` defined for any required API keys or tokens +- [ ] `testConnection()` if implementing `ConfigurableIntegration` +- [ ] `lua-docs/{name}.md` for integrations with complex workflows +- [ ] Entry added to README structure listing and integrations table + +## License + +MIT diff --git a/docs/ecosystem/integrations/celestial/README.md b/docs/ecosystem/integrations/celestial/README.md new file mode 100644 index 0000000..ed74fbe --- /dev/null +++ b/docs/ecosystem/integrations/celestial/README.md @@ -0,0 +1,119 @@ +# Integration: Celestial + +> Astronomy integration for the [Laravel AI SDK](https://github.com/laravel/ai) — moon phases, sunrise/sunset, planet positions, eclipses, night sky reports. Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Give your AI agents the ability to perform real-time astronomical calculations. Built on [astronomy-bundle-php](https://github.com/OpenCompanyApp/astronomy-bundle-php) (Jean Meeus' *Astronomical Algorithms*) and the [Integration Core](https://github.com/OpenCompanyApp/integration-core) framework. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This celestial tool is one example of how AI agents can be extended with specialized capabilities beyond standard LLM knowledge — giving agents accurate, real-time astronomical data instead of relying on training data that may be outdated or imprecise. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Installation + +```console +composer require opencompanyapp/integration-celestial +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Available Actions + +| Action | Description | Required Params | +|--------|-------------|-----------------| +| `moon_phase` | Phase, illumination, age, zodiac sign, next new/full moon | — | +| `sun_info` | Sunrise/sunset, altitude/azimuth, twilight, day length, zodiac | `latitude`, `longitude` | +| `moon_info` | Moon position, illumination, visibility from a location | `latitude`, `longitude` | +| `planet_position` | Planet altitude/azimuth, zodiac, rise/set. Use `planet="all"` for overview | `latitude`, `longitude` | +| `solar_eclipse` | Eclipse type, obscuration, contacts, magnitude for a date + location | `date`, `latitude`, `longitude` | +| `lunar_eclipse` | Eclipse type, magnitude, gamma, contact times (P1-P4, U1-U4) | `date` | +| `night_sky` | What's visible now: sun/moon/planet positions, darkness, stargazing quality | `latitude`, `longitude` | +| `zodiac_report` | All celestial bodies mapped to zodiac signs with alignments | — | +| `time_info` | Julian Day, sidereal time (GMST/GAST), equation of time | — | + +All actions accept optional `date` (ISO format, defaults to now) and `timezone` (defaults to UTC or configured default). + +## Quick Start: Use with Laravel AI SDK + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\Celestial\Tools\QueryCelestial; +use OpenCompany\Integrations\Celestial\CelestialService; + +// Create the tool +$tool = new QueryCelestial( + service: app(CelestialService::class), + defaultTimezone: 'Europe/Amsterdam', +); + +// Use with an AI agent +$response = Ai::agent() + ->tools([$tool]) + ->prompt('What phase is the moon in right now?'); +``` + +### Via ToolProvider (recommended) + +If you have `integration-core` installed, the tool auto-registers with the `ToolProviderRegistry`: + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +$registry = app(ToolProviderRegistry::class); +$provider = $registry->get('celestial'); + +// Create tool with context +$tool = $provider->createTool( + \OpenCompany\Integrations\Celestial\Tools\QueryCelestial::class, + ['timezone' => 'America/New_York'] +); +``` + +## Standalone Service Usage + +You can use `CelestialService` directly without the integration wrapper: + +```php +use OpenCompany\Integrations\Celestial\CelestialService; + +$service = app(CelestialService::class); + +// Moon phase +echo $service->moonPhase(null, 'Europe/Amsterdam'); + +// Sunrise/sunset in Amsterdam +echo $service->sunInfo('2025-06-21', 52.3676, 4.9041, 'Europe/Amsterdam'); + +// All planets visible from Berlin +echo $service->planetPosition('all', null, 52.524, 13.411, 'Europe/Berlin'); + +// Night sky report +echo $service->nightSky(52.3676, 4.9041, 'Europe/Amsterdam'); + +// Solar eclipse check +echo $service->solarEclipse('2026-08-12', 40.7128, -74.0060); + +// Lunar eclipse check +echo $service->lunarEclipse('2025-09-07'); +``` + +## Dependencies + +| Package | Purpose | +|---------|---------| +| [opencompanyapp/integration-core](https://github.com/OpenCompanyApp/integration-core) | ToolProvider contract and registry | +| [opencompanyapp/astronomy-bundle](https://github.com/OpenCompanyApp/astronomy-bundle-php) | Astronomical calculation engine (Meeus algorithms, VSOP87) | +| [laravel/ai](https://github.com/laravel/ai) | Laravel AI SDK Tool contract | + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 +- [Laravel AI SDK](https://github.com/laravel/ai) ^0.1 + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/clickup/README.md b/docs/ecosystem/integrations/clickup/README.md new file mode 100644 index 0000000..ad3b77a --- /dev/null +++ b/docs/ecosystem/integrations/clickup/README.md @@ -0,0 +1,61 @@ +# ClickUp Integration + +ClickUp project management integration for the Laravel AI SDK. Part of the **OpenCompany** integration ecosystem — an open platform where AI agents collaborate with humans to run organizations. + +## Available Tools (17) + +| Tool | Type | Description | +|------|------|-------------| +| `clickup_get_hierarchy` | read | Get workspace hierarchy: spaces, folders, lists | +| `clickup_search` | read | Search tasks across the workspace | +| `clickup_members` | read | List, find, or resolve workspace members | +| `clickup_get_tasks` | read | Get all tasks in a list | +| `clickup_get_task` | read | Get a task by ID with full details | +| `clickup_create_task` | write | Create a new task in a list | +| `clickup_update_task` | write | Update an existing task | +| `clickup_delete_task` | write | Delete a task | +| `clickup_manage_tags` | write | Add or remove tags on tasks | +| `clickup_attach_file` | write | Attach a file to a task | +| `clickup_manage_comments` | write | Read or add comments on tasks | +| `clickup_time_tracking` | write | Start, stop, log, or list time entries | +| `clickup_manage_list` | write | Create, get, or update lists | +| `clickup_manage_folder` | write | Create, get, or update folders | +| `clickup_chat` | write | List channels or send messages | +| `clickup_manage_document` | write | Create a ClickUp document | +| `clickup_manage_document_pages` | write | List, get, create, or update document pages | + +## Installation + +```bash +composer require opencompanyapp/integration-clickup +``` + +The service provider is auto-discovered by Laravel. + +## Configuration + +| Key | Type | Required | Description | +|-----|------|----------|-------------| +| `api_token` | secret | Yes | Personal API Token (starts with `pk_`). Generate at ClickUp → Settings → Apps. | +| `workspace_id` | text | No | Workspace ID from URL: `app.clickup.com/{id}/...`. Required for search, time tracking, members. | + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; + +$response = Ai::tools(['clickup_get_hierarchy', 'clickup_create_task']) + ->prompt('List all spaces, then create a task called "Review Q1 report" in the first list you find.'); +``` + +## Dependencies + +| Package | Version | +|---------|---------| +| PHP | ^8.2 | +| opencompanyapp/integration-core | ^2.0 | +| laravel/ai | ^0.1 | + +## License + +MIT diff --git a/docs/ecosystem/integrations/clickup/clickup.md b/docs/ecosystem/integrations/clickup/clickup.md new file mode 100644 index 0000000..75ee243 --- /dev/null +++ b/docs/ecosystem/integrations/clickup/clickup.md @@ -0,0 +1,207 @@ +# ClickUp Lua API Reference + +## Priority Mapping + +| Value | Level | +|-------|---------| +| 1 | Urgent | +| 2 | High | +| 3 | Normal | +| 4 | Low | + +```lua +-- Use numeric values for priority +clickup_create_task({ list_id = "901234", name = "Fix bug", priority = 1 }) -- urgent +clickup_update_task({ task_id = "abc123", priority = 3 }) -- normal +``` + +## Date Handling + +All dates use ISO 8601 format. The API converts them to Unix milliseconds internally. + +```lua +-- Date only +clickup_create_task({ list_id = "901234", name = "Sprint review", due_date = "2026-04-01" }) + +-- Date with time +clickup_create_task({ + list_id = "901234", + name = "Standup", + start_date = "2026-04-01T09:00:00", + due_date = "2026-04-01T09:30:00" +}) + +-- Clear a date by passing empty string +clickup_update_task({ task_id = "abc123", due_date = "" }) + +-- Filter tasks by due date range +clickup_get_tasks({ + list_id = "901234", + due_date_gt = "2026-03-01", + due_date_lt = "2026-03-31" +}) +``` + +## Member Resolution + +Always resolve names to numeric user IDs before assigning tasks. + +```lua +-- Find a single member by name or email +local result = clickup_find_member({ query = "Sarah" }) +-- Returns: { matches = { { id = "12345", username = "sarah", email = "sarah@co.com" } } } + +-- Resolve multiple names at once +local result = clickup_resolve_members({ query = "Sarah, John, alex@co.com" }) +-- Returns: { results = { { query = "Sarah", id = "12345", resolved = true }, ... } } + +-- Use the IDs for assignment (comma-separated string) +clickup_create_task({ list_id = "901234", name = "Review PR", assignees = "12345,67890" }) +``` + +## Common Workflows + +### Create a task with assignees and tags + +```lua +-- Step 1: Find the list ID +local hierarchy = clickup_get_hierarchy({}) +-- Traverse: hierarchy.spaces[].folders[].lists[] or hierarchy.spaces[].lists[] + +-- Step 2: Resolve member IDs +local members = clickup_resolve_members({ query = "Sarah, John" }) +local ids = {} +for _, m in ipairs(members.results) do + if m.resolved then table.insert(ids, m.id) end +end + +-- Step 3: Create the task +clickup_create_task({ + list_id = "901234", + name = "Implement auth flow", + description = "Add OAuth2 login support", + status = "open", + priority = 2, + assignees = table.concat(ids, ","), + tags = "backend,auth", + due_date = "2026-04-15" +}) +``` + +### Search for tasks, then update them + +```lua +-- Search across workspace +local results = clickup_search({ + query = "auth", + statuses = "open,in progress", + include_subtasks = true +}) + +-- Update each matching task +for _, task in ipairs(results.tasks) do + clickup_update_task({ + task_id = task.id, + priority = 1, + status = "in progress" + }) +end + +-- Custom task IDs (e.g., "DEV-42") work too +clickup_update_task({ task_id = "DEV-42", status = "closed" }) +``` + +### Time tracking (start, stop, log) + +```lua +-- Start a timer on a task +clickup_start_timer({ + task_id = "abc123", + description = "Working on auth flow", + billable = true +}) + +-- Check what's currently running +local current = clickup_current_time_entry({}) +-- Returns task name, start time, description + +-- Stop the running timer +clickup_stop_timer({}) + +-- Or log time manually (duration in milliseconds) +clickup_log_time({ + task_id = "abc123", + start = "2026-03-29T09:00:00", + duration = "3600000", -- 1 hour = 3,600,000 ms + description = "Code review", + billable = true +}) + +-- View all time entries for a task +local entries = clickup_list_time_entries({ task_id = "abc123" }) +-- entries.entries[].duration is already formatted ("60.0 min") +``` + +### Navigate hierarchy (find list and folder IDs) + +```lua +-- Get full workspace tree +local tree = clickup_get_hierarchy({}) + +-- Filter to specific space(s) +local tree = clickup_get_hierarchy({ space_ids = "12345,67890" }) + +-- Get folder details including its lists +local folder = clickup_get_folder({ folder_id = "456" }) +-- folder.lists = { { id = "789", name = "Backlog" }, ... } + +-- Get list details +local list = clickup_get_list({ list_id = "789" }) +-- list.task_count, list.space, list.folder + +-- Get all tasks in a specific list +local tasks = clickup_get_tasks({ + list_id = "789", + statuses = "open,in progress", + include_closed = false +}) +``` + +### Create a subtask + +```lua +clickup_create_task({ + list_id = "901234", + name = "Write unit tests", + parent_task_id = "abc123", + priority = 3 +}) +``` + +### Tags + +```lua +-- Add a tag (must already exist in the space) +clickup_add_tag({ task_id = "abc123", tag_name = "urgent" }) + +-- Remove a tag +clickup_remove_tag({ task_id = "abc123", tag_name = "urgent" }) + +-- Set tags during task creation +clickup_create_task({ + list_id = "901234", + name = "Deploy", + tags = "devops,release" +}) +``` + +## Tips + +- **Time durations are in milliseconds**: 1 min = 60000, 1 hour = 3600000, 1 day = 86400000 +- **Tags must pre-exist** in the ClickUp space before you can add them to tasks +- **workspace_id** is pulled from config automatically for search, time tracking, and member operations -- you rarely need to pass it explicitly +- **Custom task IDs** like `"DEV-42"` work anywhere a `task_id` is accepted +- **Pagination**: `clickup_search` and `clickup_get_tasks` support a `page` parameter (starts at 0) +- **Assignees** use comma-separated numeric user IDs, not names -- always resolve first +- **Statuses** are list-specific strings (e.g., `"open"`, `"in progress"`, `"closed"`) -- check the list for valid values +- **time_estimate** on `clickup_update_task` is in **minutes** (converted to ms internally) diff --git a/docs/ecosystem/integrations/coingecko/README.md b/docs/ecosystem/integrations/coingecko/README.md new file mode 100644 index 0000000..4906ba3 --- /dev/null +++ b/docs/ecosystem/integrations/coingecko/README.md @@ -0,0 +1,34 @@ +# Integration: CoinGecko + +Cryptocurrency market data for AI agents — search coins, get prices, market rankings, trending coins, and historical chart data. + +> Part of the **OpenCompany** integration ecosystem. These packages extend AI agents with real-world capabilities through the Laravel AI SDK. + +## Available Tools + +| Tool | Type | Description | +|---|---|---| +| `coingecko_search` | read | Search coins by name/symbol, trending coins, global market overview | +| `coingecko_market` | read | Current prices, market rankings, and market cap data | +| `coingecko_details` | read | Coin profiles, historical price charts, OHLC candlestick data | + +## Installation + +```bash +composer require opencompanyapp/integration-coingecko +``` + +## Configuration + +Requires a free CoinGecko Demo API key. Get one at [CoinGecko Developer Dashboard](https://www.coingecko.com/en/api/pricing). + +## Dependencies + +| Package | Purpose | +|---|---| +| `opencompanyapp/integration-core` | Shared tool provider contracts and registry | +| `laravel/ai` | Laravel AI SDK tool interface | + +## License + +MIT diff --git a/docs/ecosystem/integrations/coingecko/coingecko.md b/docs/ecosystem/integrations/coingecko/coingecko.md new file mode 100644 index 0000000..2f43715 --- /dev/null +++ b/docs/ecosystem/integrations/coingecko/coingecko.md @@ -0,0 +1,103 @@ +# CoinGecko — Lua API Reference + +## Important: Coin IDs vs Ticker Symbols + +CoinGecko tools use CoinGecko IDs (e.g. `"bitcoin"`, `"ethereum"`, `"solana"`), **not** ticker symbols (`"BTC"`, `"ETH"`). If you only know the ticker, use `coingecko_search_coins` first to find the correct ID. + +**Rate limits:** free tier allows ~30 calls/min. + +## coingecko_search_coins + +Find coin IDs by name or ticker symbol. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `query` | string | yes | Coin name or ticker (e.g. `"bitcoin"`, `"ETH"`) | + +```lua +local result = coingecko_search_coins({ query = "SOL" }) + +-- result.coins is an array of { id, name, symbol, market_cap_rank } +for _, coin in ipairs(result.coins) do + log(coin.id .. " (" .. coin.symbol .. ") — rank #" .. (coin.market_cap_rank or "?")) +end +``` + +## coingecko_price + +Get current price for one or more coins. Includes 24h change, volume, and market cap. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `ids` | string | yes | Comma-separated CoinGecko IDs (e.g. `"bitcoin,ethereum"`) | +| `currencies` | string | no | Comma-separated target currencies (default: `"usd"`) | + +```lua +local result = coingecko_price({ + ids = "bitcoin,ethereum", + currencies = "usd,eur" +}) +``` + +## coingecko_markets + +Top coins ranked by market cap with full market data. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `ids` | string | no | Filter to specific coin IDs | +| `currency` | string | no | Target currency (default: `"usd"`) | +| `category` | string | no | Filter by category (e.g. `"decentralized-finance-defi"`) | +| `per_page` | string | no | Results per page (default: `"20"`, max: 100) | +| `page` | string | no | Page number (default: `"1"`) | +| `price_change_percentage` | string | no | Timeframes (default: `"24h,7d"`). Options: `1h,24h,7d,14d,30d,200d,1y` | + +```lua +local result = coingecko_markets({ + per_page = "10", + price_change_percentage = "24h,7d,30d" +}) + +for _, coin in ipairs(result.coins) do + log("#" .. coin.market_cap_rank .. " " .. coin.name .. ": $" .. coin.current_price) +end +``` + +## Examples + +### Search for a coin, then get its price + +```lua +-- Step 1: find the coin ID +local search = coingecko_search_coins({ query = "Cardano" }) +local coin_id = search.coins[1].id -- "cardano" + +-- Step 2: get the price +local price = coingecko_price({ + ids = coin_id, + currencies = "usd,btc" +}) +``` + +### Top 10 coins by market cap + +```lua +local result = coingecko_markets({ + per_page = "10", + currency = "usd" +}) + +for _, coin in ipairs(result.coins) do + log(coin.name .. ": $" .. coin.current_price .. " (24h: " .. (coin.price_change_percentage_24h or "?") .. "%)") +end +``` + +### Compare specific coins + +```lua +local result = coingecko_markets({ + ids = "bitcoin,ethereum,solana", + currency = "usd", + price_change_percentage = "1h,24h,7d" +}) +``` diff --git a/docs/ecosystem/integrations/core/README.md b/docs/ecosystem/integrations/core/README.md new file mode 100644 index 0000000..cfdc194 --- /dev/null +++ b/docs/ecosystem/integrations/core/README.md @@ -0,0 +1,209 @@ +# Integration Core + +> Framework-agnostic core for building integration packages. Part of the [OpenCompany](https://github.com/OpenCompanyApp) ecosystem. + +Provides the contracts, credential abstraction, and auto-discovery registry that all OpenCompany integration packages build on. Packages built on integration-core work in any PHP application — OpenCompany (web), KosmoKrator (CLI), or custom consumers. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This core package enables OpenCompany's plugin architecture for integrations — each external integration (astronomy, analytics, messaging, etc.) is a separate Composer package that any PHP app can install independently. + +## Installation + +```console +composer require opencompanyapp/integration-core +``` + +Laravel auto-discovers the service provider. Non-Laravel apps can use the contracts and registry directly. + +## What's Included + +| Component | Purpose | +|-----------|---------| +| `Tool` interface | Framework-agnostic tool contract — `name()`, `description()`, `parameters()`, `execute()` | +| `ToolResult` value object | Structured result from tool execution — `success()`, `error()`, metadata | +| `ToolProvider` interface | Contract every integration package implements — declares tools, metadata, factory, and Lua docs | +| `CredentialResolver` interface | Abstraction for API keys/config — swap between config files, databases, or vaults | +| `ConfigCredentialResolver` | Default resolver that reads from `config/ai-tools.php` | +| `ToolProviderRegistry` | Singleton registry that collects all tool providers for discovery | +| `IntegrationCoreServiceProvider` | Binds everything with sensible defaults (all overridable) | + +## Quick Start: Building an Integration Package + +### 1. Implement `ToolProvider` + +```php +use OpenCompany\IntegrationCore\Contracts\Tool; +use OpenCompany\IntegrationCore\Contracts\ToolProvider; + +class WeatherToolProvider implements ToolProvider +{ + public function appName(): string + { + return 'weather'; + } + + public function appMeta(): array + { + return [ + 'label' => 'weather, forecasts, temperature', + 'description' => 'Weather data and forecasts', + 'icon' => 'ph:cloud-sun', + 'logo' => 'ph:cloud-sun', + ]; + } + + public function tools(): array + { + return [ + 'get_weather' => [ + 'class' => GetWeather::class, + 'type' => 'read', + 'name' => 'Get Weather', + 'description' => 'Current weather and forecasts for any location.', + 'icon' => 'ph:cloud-sun', + ], + ]; + } + + public function isIntegration(): bool + { + return true; // Can be toggled per agent + } + + public function createTool(string $class, array $context = []): Tool + { + $credentials = app(\OpenCompany\IntegrationCore\Contracts\CredentialResolver::class); + + return new GetWeather( + apiKey: $credentials->get('weather', 'api_key'), + units: $context['units'] ?? 'metric', + ); + } + + public function luaDocsPath(): ?string + { + return null; // Or: __DIR__ . '/../lua-docs/weather.md' + } +} +``` + +### 2. Register in Your Service Provider + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +class WeatherServiceProvider extends ServiceProvider +{ + public function boot(): void + { + if ($this->app->bound(ToolProviderRegistry::class)) { + $this->app->make(ToolProviderRegistry::class) + ->register(new WeatherToolProvider()); + } + } +} +``` + +### 3. Create Your Tool Class + +```php +use OpenCompany\IntegrationCore\Contracts\Tool; +use OpenCompany\IntegrationCore\Support\ToolResult; + +class GetWeather implements Tool +{ + public function __construct( + private string $apiKey, + private string $units = 'metric', + ) {} + + public function name(): string + { + return 'get_weather'; + } + + public function description(): string + { + return 'Get current weather and forecasts for any location.'; + } + + public function parameters(): array + { + return [ + 'location' => [ + 'type' => 'string', + 'required' => true, + 'description' => 'City name or coordinates', + ], + 'days' => [ + 'type' => 'integer', + 'description' => 'Forecast days (default: 1)', + ], + ]; + } + + public function execute(array $args): ToolResult + { + $location = $args['location'] ?? ''; + if (empty($location)) { + return ToolResult::error('Location is required.'); + } + + // Your implementation... + $data = $this->fetchWeather($location, $args['days'] ?? 1); + + return ToolResult::success($data); + } +} +``` + +## Credential Management + +The `CredentialResolver` interface abstracts where API keys come from. Integration packages call `CredentialResolver` to get credentials without knowing or caring about the storage backend. + +**In OpenCompany**, credentials are managed through the Integrations UI and stored encrypted in the database. Users never need to touch config files — everything is configured through the admin interface. + +**For standalone usage** in other Laravel apps, the default `ConfigCredentialResolver` reads from a config file: + +```php +// config/ai-tools.php +return [ + 'plausible' => [ + 'api_key' => env('PLAUSIBLE_API_KEY'), + 'url' => env('PLAUSIBLE_URL', 'https://plausible.io'), + ], +]; +``` + +You can swap the resolver to use any storage backend (database, vault, secrets manager) by binding your own implementation: + +```php +$this->app->singleton( + \OpenCompany\IntegrationCore\Contracts\CredentialResolver::class, + YourCustomResolver::class +); +``` + +## Integration Packages + +All installed integration packages auto-register via Laravel service provider discovery. The `ToolProviderRegistry` collects them: + +```php +$registry = app(ToolProviderRegistry::class); + +$registry->all(); // All registered providers +$registry->has('celestial'); // Check if a provider exists +$registry->get('celestial'); // Get a specific provider +``` + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 (for service provider auto-discovery; contracts work without Laravel) + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/exchangerate/README.md b/docs/ecosystem/integrations/exchangerate/README.md new file mode 100644 index 0000000..ef16d23 --- /dev/null +++ b/docs/ecosystem/integrations/exchangerate/README.md @@ -0,0 +1,28 @@ +# Exchange Rate Integration + +Currency exchange rate integration for the OpenCompany integration ecosystem. Provides access to 340 fiat currencies, cryptocurrencies, and precious metals via the fawazahmed0/exchange-api. + +No API key required. + +## Tools + +- **exchangerate_search** — List and search available currencies (fiat, crypto, metals) +- **exchangerate_convert** — Convert currencies, get rates, compare historical rates + +## Popular Currency Codes + +| Code | Currency | +|------|----------| +| usd | US Dollar | +| eur | Euro | +| gbp | British Pound | +| jpy | Japanese Yen | +| cny | Chinese Yuan | +| btc | Bitcoin | +| eth | Ethereum | +| xau | Gold (troy oz) | +| xag | Silver (troy oz) | + +## License + +MIT diff --git a/docs/ecosystem/integrations/exchangerate/exchangerate.md b/docs/ecosystem/integrations/exchangerate/exchangerate.md new file mode 100644 index 0000000..692bddf --- /dev/null +++ b/docs/ecosystem/integrations/exchangerate/exchangerate.md @@ -0,0 +1,118 @@ +# Exchange Rate — Lua API Reference + +No API key needed. Supports 340+ currencies: fiat, crypto, and precious metals. + +## exchangerate_convert_currency + +Convert an amount from one currency to another. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `from` | string | yes | Source currency code (e.g. `"usd"`, `"btc"`, `"xau"`) | +| `to` | string | yes | Target currency code (e.g. `"eur"`, `"jpy"`) | +| `amount` | string | no | Amount to convert (default: `"1"`) | +| `date` | string | no | Date for the rate: `"YYYY-MM-DD"` or `"latest"` (default) | + +```lua +local result = exchangerate_convert_currency({ + from = "usd", + to = "eur", + amount = "100" +}) + +log("100 USD = " .. result.result .. " EUR") +``` + +## exchangerate_history + +Compare a currency pair across multiple dates. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `from` | string | yes | Source currency code | +| `to` | string | yes | Target currency code | +| `dates` | string | yes | Comma-separated dates (e.g. `"2026-01-01,2026-02-01,2026-03-01"`) | + +```lua +local result = exchangerate_history({ + from = "usd", + to = "eur", + dates = "2026-01-01,2026-02-01,2026-03-01" +}) + +for _, h in ipairs(result.history) do + log(h.date .. ": " .. h.rate) +end +-- result.change.percentage shows overall change +``` + +## exchangerate_list_currencies + +List and search available currencies. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `query` | string | no | Filter by code or name (e.g. `"dollar"`, `"btc"`, `"gold"`) | + +```lua +-- Find gold-related currencies +local result = exchangerate_list_currencies({ query = "gold" }) + +for _, c in ipairs(result.currencies) do + log(c.code .. ": " .. c.name) +end +``` + +## exchangerate_rates + +Get all exchange rates for a base currency (not shown in detail -- pass `base` currency code). + +## exchangerate_popular_currencies + +Show commonly used currency codes. No parameters. + +```lua +local result = exchangerate_popular_currencies({}) +``` + +## Examples + +### Convert 500 EUR to USD + +```lua +local result = exchangerate_convert_currency({ + from = "eur", + to = "usd", + amount = "500" +}) +``` + +### Historical rate on a specific date + +```lua +local result = exchangerate_convert_currency({ + from = "gbp", + to = "jpy", + amount = "1", + date = "2025-06-15" +}) +``` + +### Track EUR/USD over several months + +```lua +local result = exchangerate_history({ + from = "eur", + to = "usd", + dates = "2025-10-01,2025-11-01,2025-12-01,2026-01-01,2026-02-01,2026-03-01" +}) + +log("Change: " .. result.change.percentage .. "%") +``` + +### Find a currency code + +```lua +local result = exchangerate_list_currencies({ query = "peso" }) +-- Returns matching currencies like MXN (Mexican Peso), ARS (Argentine Peso), etc. +``` diff --git a/docs/ecosystem/integrations/google/README.md b/docs/ecosystem/integrations/google/README.md new file mode 100644 index 0000000..89177af --- /dev/null +++ b/docs/ecosystem/integrations/google/README.md @@ -0,0 +1,129 @@ +# Google Integration + +Google Calendar, Gmail, Google Drive, Google Contacts, Google Sheets, Google Search Console, Google Tasks, Google Analytics, Google Docs, and Google Forms integration for the Laravel AI SDK. Part of the **OpenCompany** integration ecosystem — an open platform where AI agents collaborate with humans to run organizations. + +## Integrations + +This package registers **ten separate integrations**, each appearing independently on the integrations page: + +### Google Calendar (3 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_calendar_list` | read | List calendars and search/list events | +| `google_calendar_event` | write | Create, update, delete, or quick-add calendar events | +| `google_calendar_freebusy` | read | Check free/busy status across calendars | + +### Gmail (4 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `gmail_search` | read | Search and list email messages | +| `gmail_read` | read | Get full email content | +| `gmail_send` | write | Send emails or create/send drafts | +| `gmail_manage` | write | Labels, read/unread, trash, and archive | + +### Google Drive (3 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_drive_search` | read | Search and retrieve files | +| `google_drive_manage` | write | Create, rename, move, copy, and delete files | +| `google_drive_share` | write | Share files and manage permissions | + +### Google Contacts (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_contacts_search` | read | Search, list, and look up contacts | +| `google_contacts_manage` | write | Create, update, and delete contacts | + +### Google Sheets (3 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_sheets_read` | read | Read spreadsheet data, metadata, and search | +| `google_sheets_write` | write | Create spreadsheets and write data | +| `google_sheets_manage` | write | Manage sheets, rows, columns, sorting, and filters | + +### Google Search Console (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_search_console_query` | read | Search performance, URL inspection, and sitemaps | +| `google_search_console_manage` | write | Submit sitemaps and manage site properties | + +### Google Tasks (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_tasks_read` | read | List task lists and tasks, get task details | +| `google_tasks_manage` | write | Create, update, complete, delete, and organize tasks | + +### Google Analytics (1 tool) + +| Tool | Type | Description | +|------|------|-------------| +| `google_analytics_query` | read | Website traffic reports, realtime data, and metadata discovery | + +### Google Docs (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_docs_read` | read | Read document content, structure, and search text | +| `google_docs_write` | write | Create, edit, format, and manage documents | + +### Google Forms (2 tools) + +| Tool | Type | Description | +|------|------|-------------| +| `google_forms_read` | read | Read form structure and responses | +| `google_forms_write` | write | Create, edit, and manage forms | + +## Installation + +```bash +composer require opencompanyapp/integration-google +``` + +The service provider is auto-discovered by Laravel. + +## Configuration + +All integrations share the same Google Cloud OAuth credentials (Client ID and Secret only need to be entered once): + +| Key | Type | Required | Description | +|-----|------|----------|-------------| +| `client_id` | text | Yes | OAuth 2.0 Client ID from Google Cloud Console | +| `client_secret` | secret | Yes | OAuth 2.0 Client Secret | +| `access_token` | oauth | Yes | Connected via OAuth flow | + +### Setup + +1. Create a project in [Google Cloud Console](https://console.cloud.google.com/) +2. Enable the **Google Calendar API**, **Gmail API**, **Google Drive API**, **People API**, **Google Sheets API**, **Google Search Console API**, **Google Tasks API**, **Google Analytics Data API**, **Google Docs API**, and/or **Google Forms API** +3. Create OAuth 2.0 credentials (Web application type) +4. Add the redirect URI: `{your-domain}/api/integrations/google/oauth/callback` +5. Enter Client ID and Secret in Settings → Integrations +6. Click "Connect" to authorize via OAuth + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; + +$response = Ai::tools(['google_calendar_list', 'google_calendar_event']) + ->prompt('List my calendars, then create a meeting called "Team Standup" tomorrow at 10am.'); +``` + +## Dependencies + +| Package | Version | +|---------|---------| +| PHP | ^8.2 | +| opencompanyapp/integration-core | ^2.0 | +| laravel/ai | ^0.1 | + +## License + +MIT diff --git a/docs/ecosystem/integrations/google/google.md b/docs/ecosystem/integrations/google/google.md new file mode 100644 index 0000000..08d90ed --- /dev/null +++ b/docs/ecosystem/integrations/google/google.md @@ -0,0 +1,207 @@ +# Google Integration — Lua API Supplement + +## Gmail + +Send email with CC/BCC: + +```lua +gmail_send_email({ + to = "alice@example.com", + subject = "Q1 Report", + body = "Please find the report attached.", + cc = "bob@example.com, carol@example.com", + bcc = "manager@example.com", +}) +``` + +Search, read, then reply workflow: + +```lua +-- Step 1: Search for messages +local results = gmail_search_emails({ + query = "from:alice subject:meeting is:unread", + max_results = 5, +}) + +-- Step 2: Read the full message +local msg = gmail_read({ message_id = results.messages[1].id }) + +-- Step 3: Reply in the same thread +gmail_reply({ + message_id = msg.id, + thread_id = msg.threadId, + body = "Thanks, I'll be there.", + cc = "team@example.com", +}) +``` + +Draft vs direct send -- use `gmail_create_draft` to stage an email without sending, then `gmail_send_draft` to send it later: + +```lua +-- Create a draft (not sent) +local draft = gmail_create_draft({ + to = "client@example.com", + subject = "Proposal", + body = "Draft content here...", +}) + +-- Send it later using the draft ID +gmail_send_draft({ draft_id = draft.draftId }) +``` + +## Google Sheets + +Values use 2D Lua tables -- each inner table is one row: + +```lua +local values = { + {"Name", "Age", "City"}, + {"Alice", 30, "NYC"}, + {"Bob", 25, "LA"}, +} +``` + +A1 notation examples: + +- `"Sheet1!A1:D10"` -- specific range +- `"Sheet1!A:A"` -- entire column +- `"Sheet1"` -- entire sheet +- `"'My Sheet'!A1:B2"` -- sheet names with spaces need quotes + +Input modes: `"user_entered"` (default) parses formulas and dates, `"raw"` stores literal strings. + +Create a spreadsheet, add a sheet, write data: + +```lua +-- Create a new spreadsheet +local ss = google_sheets_create({ title = "Q1 Sales" }) +local id = ss.spreadsheetId + +-- Add a second sheet/tab +google_sheets_add_sheet({ + spreadsheet_id = id, + title = "By Region", +}) + +-- Write data with headers +google_sheets_write_range({ + spreadsheet_id = id, + range = "Sheet1!A1:C3", + values = { + {"Region", "Revenue", "Growth"}, + {"North", 50000, "=B2/50000-1"}, + {"South", 42000, "=B3/42000-1"}, + }, + input = "user_entered", -- parses the formulas +}) +``` + +Read data back: + +```lua +local data = google_sheets_read_range({ + spreadsheet_id = id, + range = "Sheet1!A1:C3", + render = "formatted", -- "formatted" (default), "unformatted", or "formula" +}) +-- data.values is a 2D table: {{"Region","Revenue","Growth"}, {"North","50000","0%"}, ...} +``` + +Append vs write -- `google_sheets_append` auto-detects the last row and adds below it: + +```lua +google_sheets_append({ + spreadsheet_id = id, + range = "Sheet1", + values = { + {"East", 38000, "=B4/38000-1"}, + }, + input = "user_entered", +}) +``` + +## Google Calendar + +Create a timed event with attendees: + +```lua +google_calendar_create_event({ + summary = "Sprint Planning", + description = "Bi-weekly sprint planning session", + location = "Conference Room B", + start_date_time = "2026-04-01T10:00:00-05:00", + end_date_time = "2026-04-01T11:00:00-05:00", + time_zone = "America/New_York", + attendees = "alice@example.com, bob@example.com", + recurrence = "RRULE:FREQ=WEEKLY;INTERVAL=2;COUNT=10", +}) +``` + +Create an all-day event: + +```lua +google_calendar_create_event({ + summary = "Company Holiday", + start_date = "2026-07-04", + end_date = "2026-07-05", +}) +``` + +Date/time format: ISO 8601 with timezone offset for timed events (`2026-04-01T10:00:00-05:00`), plain `YYYY-MM-DD` for all-day events. Use `time_zone` for IANA names like `"America/New_York"`. + +## Google Drive + +Search for files, then get details: + +```lua +-- Search by name and type +local results = google_drive_search_files({ + query = "name contains 'report' and mimeType = 'application/vnd.google-apps.spreadsheet'", + max_results = 10, + order_by = "modifiedTime desc", +}) + +-- Get full file info (and optionally export content) +local file = google_drive_get_file({ + file_id = results.files[1].id, + export_as = "csv", -- "text", "csv", or "markdown" (Google Workspace files only) +}) +``` + +Common Drive query patterns: + +- `"name contains 'budget'"` -- by name +- `"mimeType = 'application/vnd.google-apps.spreadsheet'"` -- Sheets +- `"mimeType = 'application/vnd.google-apps.document'"` -- Docs +- `"mimeType = 'application/vnd.google-apps.folder'"` -- folders +- `"modifiedTime > '2026-01-01'"` -- recently modified +- `"sharedWithMe = true"` -- shared files +- `"'FOLDER_ID' in parents"` -- files in a folder + +Share a file: + +```lua +-- Share with a specific user +google_drive_share_file({ + file_id = "abc123", + role = "writer", -- "reader", "writer", or "commenter" + email = "alice@example.com", + notify = "true", +}) + +-- Share with anyone via link +google_drive_share_file({ + file_id = "abc123", + role = "reader", + type = "anyone", +}) +``` + +## Tips + +- All Google APIs share the same OAuth token -- if Gmail is connected, the same credentials work for Sheets, Drive, Calendar, etc. +- Use `input = "user_entered"` when writing Sheets data that contains formulas (e.g., `"=SUM(A1:A10)"`) or dates. Use `"raw"` for literal strings. +- Sheet names with spaces must be quoted in A1 notation: `"'My Sheet'!A1:B2"`. +- `google_sheets_append` is better than `google_sheets_write_range` when adding rows to an existing table -- it auto-detects where the data ends. +- Calendar event times use ISO 8601 with timezone offset. Always include the offset or set `time_zone` explicitly. +- Drive search excludes trashed files by default. diff --git a/docs/ecosystem/integrations/mermaid/README.md b/docs/ecosystem/integrations/mermaid/README.md new file mode 100644 index 0000000..d14adff --- /dev/null +++ b/docs/ecosystem/integrations/mermaid/README.md @@ -0,0 +1,59 @@ +# Integration: Mermaid + +> Mermaid diagram rendering integration for the [Laravel AI SDK](https://github.com/laravel/ai). Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Generates PNG images from Mermaid diagram syntax. Supports flowcharts, sequence diagrams, class diagrams, state diagrams, ER diagrams, Gantt charts, pie charts, git graphs, and more. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Prerequisites + +Requires the [Mermaid CLI](https://github.com/mermaid-js/mermaid-cli) (`mmdc`) to be installed: + +```bash +npm install @mermaid-js/mermaid-cli +``` + +## Installation + +```console +composer require opencompanyapp/integration-mermaid +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Available Tools + +| Tool | Type | Description | +|------|------|-------------| +| `render_mermaid` | write | Render Mermaid diagram syntax to a PNG image | + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\Mermaid\Tools\RenderMermaid; +use OpenCompany\Integrations\Mermaid\MermaidService; + +$tool = new RenderMermaid(app(MermaidService::class)); + +$response = Ai::agent() + ->tools([$tool]) + ->prompt('Create a flowchart showing the user registration process'); +``` + +## Dependencies + +| Package | Purpose | +|---------|---------| +| `opencompanyapp/integration-core` | ToolProvider contract and registry | +| `laravel/ai` | Laravel AI SDK Tool interface | +| `@mermaid-js/mermaid-cli` | Mermaid to PNG rendering (npm) | + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/plausible/README.md b/docs/ecosystem/integrations/plausible/README.md new file mode 100644 index 0000000..ddb0ee1 --- /dev/null +++ b/docs/ecosystem/integrations/plausible/README.md @@ -0,0 +1,132 @@ +# Integration: Plausible + +> Plausible Analytics integration for the [Laravel AI SDK](https://github.com/laravel/ai) — query stats, realtime visitors, manage sites and goals. Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Give your AI agents access to privacy-friendly web analytics. Query traffic data, track realtime visitors, and manage sites and conversion goals — all through the [Plausible Analytics](https://plausible.io) API. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This Plausible tool lets AI agents query website analytics, monitor realtime traffic, and manage tracking configuration — giving agents data-driven awareness of web properties. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Installation + +```console +composer require opencompanyapp/integration-plausible +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Configuration + +This tool requires a Plausible Analytics API key. + +**In OpenCompany**, credentials are managed through the Integrations UI. + +**For standalone usage**, create `config/ai-tools.php`: + +```php +return [ + 'plausible' => [ + 'api_key' => env('PLAUSIBLE_API_KEY'), + 'url' => env('PLAUSIBLE_URL', 'https://plausible.io'), + 'sites' => ['example.com', 'blog.example.com'], + ], +]; +``` + +## Available Tools + +| Tool | Type | Description | +|------|------|-------------| +| `plausible_query_stats` | read | Query website analytics — aggregate, timeseries, breakdowns by dimension | +| `plausible_realtime_visitors` | read | Current realtime visitor count (last 5 minutes) | +| `plausible_list_sites` | read | List all tracked websites | +| `plausible_create_site` | write | Register a new website for tracking | +| `plausible_delete_site` | write | Remove a website from tracking | +| `plausible_list_goals` | read | List conversion goals for a site | +| `plausible_create_goal` | write | Create a conversion goal (page visit or custom event) | +| `plausible_delete_goal` | write | Delete a conversion goal | + +## Quick Start + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\Plausible\PlausibleService; +use OpenCompany\Integrations\Plausible\Tools\PlausibleQueryStats; +use OpenCompany\Integrations\Plausible\Tools\PlausibleRealtimeVisitors; + +// Create tools +$service = app(PlausibleService::class); +$tools = [ + new PlausibleQueryStats($service), + new PlausibleRealtimeVisitors($service), +]; + +// Use with an AI agent +$response = Ai::agent() + ->tools($tools) + ->prompt('How many visitors did example.com get this month?'); +``` + +### Via ToolProvider (recommended) + +If you have `integration-core` installed, all 8 tools auto-register with the `ToolProviderRegistry`: + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +$registry = app(ToolProviderRegistry::class); +$provider = $registry->get('plausible'); + +// Create any tool via the provider +$tool = $provider->createTool( + \OpenCompany\Integrations\Plausible\Tools\PlausibleQueryStats::class +); +``` + +## Standalone Service Usage + +```php +use OpenCompany\Integrations\Plausible\PlausibleService; + +$service = app(PlausibleService::class); + +// Query stats +$stats = $service->query([ + 'site_id' => 'example.com', + 'metrics' => ['visitors', 'pageviews'], + 'date_range' => '30d', +]); + +// Realtime visitors +$count = $service->realtimeVisitors('example.com'); + +// List sites +$sites = $service->listSites(); + +// Manage goals +$goals = $service->listGoals('example.com'); +$service->createGoal('example.com', ['goal_type' => 'event', 'event_name' => 'Signup']); +``` + +## Dependencies + +| Package | Purpose | +|---------|---------| +| [opencompanyapp/integration-core](https://github.com/OpenCompanyApp/integration-core) | ToolProvider contract and registry | +| [laravel/ai](https://github.com/laravel/ai) | Laravel AI SDK Tool contract | + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 +- [Laravel AI SDK](https://github.com/laravel/ai) ^0.1 +- A [Plausible Analytics](https://plausible.io) account with API access + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/plausible/plausible.md b/docs/ecosystem/integrations/plausible/plausible.md new file mode 100644 index 0000000..fdbf85e --- /dev/null +++ b/docs/ecosystem/integrations/plausible/plausible.md @@ -0,0 +1,144 @@ +# Plausible Analytics — Lua API Reference + +## plausible_query_stats + +Query website analytics with aggregate stats, timeseries, or breakdowns. + +### Parameters + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `site_id` | string | yes | Site domain, e.g. `"example.com"` | +| `metrics` | array | yes | Metrics to retrieve (see list below) | +| `date_range` | string | yes | Time period (see options below) | +| `dimensions` | array | no | Group results by dimension (see list below) | +| `filters` | string | no | JSON-encoded filter expressions | +| `date_from` | string | no | Start date (ISO 8601) when `date_range="custom"` | +| `date_to` | string | no | End date (ISO 8601) when `date_range="custom"` | +| `order_by` | string | no | JSON-encoded sort order | +| `limit` | integer | no | Max results (default: 10000) | + +### Date Range Options + +`"7d"`, `"28d"`, `"30d"`, `"month"`, `"3mo"`, `"6mo"`, `"12mo"`, `"custom"` + +When using `"custom"`, you must also pass `date_from` and `date_to`. + +### Available Metrics + +`visitors`, `pageviews`, `visits`, `bounce_rate`, `visit_duration`, `views_per_visit`, `events`, `conversion_rate` + +### Available Dimensions + +| Dimension | Description | +|-----------|-------------| +| `visit:source` | Traffic source (Google, Twitter, etc.) | +| `visit:country` | Country code | +| `visit:city` | City name | +| `visit:device` | Device type (Desktop, Mobile, Tablet) | +| `visit:browser` | Browser name | +| `visit:os` | Operating system | +| `event:page` | Page path | +| `event:name` | Custom event name | +| `time:day` | Day-level timeseries | +| `time:month` | Month-level timeseries | + +### Filter Syntax + +Filters are a JSON string containing an array of filter expressions: + +``` +[["operator", "dimension", ["value1", "value2"]]] +``` + +Operators: `is`, `is_not`, `contains`, `does_not_contain`, `matches`, `does_not_match` + +### Order By Syntax + +``` +[["metric_name", "desc"]] +``` + +## Examples + +### Top pages by visitors (last 30 days) + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews"}, + date_range = "30d", + dimensions = {"event:page"}, + order_by = '[["visitors", "desc"]]', + limit = 20 +}) + +for _, row in ipairs(result.rows) do + log(row["event:page"] .. ": " .. row.visitors .. " visitors") +end +``` + +### Traffic by country (custom date range) + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "visits", "bounce_rate"}, + date_range = "custom", + date_from = "2026-01-01", + date_to = "2026-01-31", + dimensions = {"visit:country"}, + order_by = '[["visitors", "desc"]]', + limit = 10 +}) + +for _, row in ipairs(result.rows) do + log(row["visit:country"] .. ": " .. row.visitors .. " visitors, " .. row.bounce_rate .. "% bounce") +end +``` + +### Filter to specific country + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews"}, + date_range = "7d", + dimensions = {"event:page"}, + filters = '[["is", "visit:country", ["US"]]]' +}) +``` + +### Filter pages containing /blog + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews"}, + date_range = "30d", + dimensions = {"event:page"}, + filters = '[["contains", "event:page", ["/blog"]]]', + order_by = '[["pageviews", "desc"]]' +}) +``` + +### Daily timeseries + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors"}, + date_range = "30d", + dimensions = {"time:day"} +}) +``` + +### Aggregate totals (no dimensions) + +```lua +local result = plausible_query_stats({ + site_id = "example.com", + metrics = {"visitors", "pageviews", "bounce_rate", "visit_duration"}, + date_range = "30d" +}) +``` diff --git a/docs/ecosystem/integrations/ticktick/README.md b/docs/ecosystem/integrations/ticktick/README.md new file mode 100644 index 0000000..753d9e3 --- /dev/null +++ b/docs/ecosystem/integrations/ticktick/README.md @@ -0,0 +1,106 @@ +# Integration: TickTick + +> Task management integration for the [Laravel AI SDK](https://github.com/laravel/ai) — manage projects, create tasks, set priorities, track completion. Part of the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. + +Give your AI agents the ability to manage TickTick tasks and projects. Supports both direct access token and OAuth authentication, plus the Dida365 variant. + +## About OpenCompany + +[OpenCompany](https://github.com/OpenCompanyApp) is an AI-powered workplace platform where teams deploy and coordinate multiple AI agents alongside human collaborators. It combines team messaging, document collaboration, task management, and intelligent automation in a single workspace — with built-in approval workflows and granular permission controls so organizations can adopt AI agents safely and transparently. + +This TickTick tool lets AI agents manage tasks and projects on behalf of users — creating tasks from conversations, checking project status, completing items, and keeping task lists organized automatically. + +OpenCompany is built with Laravel, Vue 3, and Inertia.js. Learn more at [github.com/OpenCompanyApp](https://github.com/OpenCompanyApp). + +## Installation + +```console +composer require opencompanyapp/integration-ticktick +``` + +Laravel auto-discovers the service provider. No manual registration needed. + +## Available Actions + +| Action | Description | Required Params | +|--------|-------------|-----------------| +| `ticktick_list_projects` | List all TickTick projects | — | +| `ticktick_get_project` | Get a project with its tasks and sections | `projectId` | +| `ticktick_create_project` | Create a new project (list) | `name` | +| `ticktick_delete_project` | Delete a project | `projectId` | +| `ticktick_get_tasks` | Get all tasks in a project | `projectId` | +| `ticktick_create_task` | Create a new task | `title`, `projectId` | +| `ticktick_update_task` | Update an existing task | `taskId`, `projectId` | +| `ticktick_complete_task` | Mark a task as complete | `taskId`, `projectId` | +| `ticktick_delete_task` | Delete a task | `taskId`, `projectId` | + +## Authentication + +Two authentication methods are supported — select your preferred method in the integration settings: + +### Access Token (recommended for quick setup) + +1. Go to [developer.ticktick.com/manage](https://developer.ticktick.com/manage) +2. Generate an access token +3. Paste it in the integration config + +### OAuth (Client ID + Secret) + +1. Register an app at the TickTick Developer Center +2. Enter your Client ID and Client Secret in the config +3. Use the OAuth authorize flow to connect + +## Quick Start: Use with Laravel AI SDK + +```php +use Laravel\Ai\Facades\Ai; +use OpenCompany\Integrations\TickTick\Tools\TickTickListProjects; +use OpenCompany\Integrations\TickTick\TickTickService; + +// Create the tool +$tool = new TickTickListProjects( + service: app(TickTickService::class), +); + +// Use with an AI agent +$response = Ai::agent() + ->tools([$tool]) + ->prompt('What projects do I have in TickTick?'); +``` + +### Via ToolProvider (recommended) + +If you have `integration-core` installed, the tool auto-registers with the `ToolProviderRegistry`: + +```php +use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; + +$registry = app(ToolProviderRegistry::class); +$provider = $registry->get('ticktick'); + +// Create a tool +$tool = $provider->createTool( + \OpenCompany\Integrations\TickTick\Tools\TickTickCreateTask::class, +); +``` + +## Dida365 Support + +TickTick operates as Dida365 in China. To use this integration with Dida365, change the API Base URL in settings to `https://api.dida365.com`. + +## Dependencies + +| Package | Purpose | +|---------|---------| +| [opencompanyapp/integration-core](https://github.com/OpenCompanyApp/integration-core) | ToolProvider contract and registry | +| [laravel/ai](https://github.com/laravel/ai) | Laravel AI SDK Tool contract | + +## Requirements + +- PHP 8.2+ +- Laravel 11 or 12 +- [Laravel AI SDK](https://github.com/laravel/ai) ^0.1 + +## License + +MIT — see [LICENSE](LICENSE) diff --git a/docs/ecosystem/integrations/ticktick/ticktick.md b/docs/ecosystem/integrations/ticktick/ticktick.md new file mode 100644 index 0000000..e21e488 --- /dev/null +++ b/docs/ecosystem/integrations/ticktick/ticktick.md @@ -0,0 +1,124 @@ +# TickTick — Lua API Reference + +## ticktick_list_projects + +List all projects (task lists). No parameters. Call this first to discover project IDs. + +```lua +local projects = ticktick_list_projects({}) + +for _, p in ipairs(projects) do + log(p.name .. " (id: " .. p.id .. ")") +end +``` + +## ticktick_get_tasks + +Get all tasks in a project. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `project_id` | string | yes | Project ID (from `ticktick_list_projects`) | + +```lua +local tasks = ticktick_get_tasks({ project_id = "abc123" }) +``` + +## ticktick_create_task + +Create a new task. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `title` | string | yes | Task title | +| `project_id` | string | no | Project ID. Omit to add to Inbox | +| `content` | string | no | Description/notes | +| `start_date` | string | no | ISO 8601 (e.g. `"2026-03-30T09:00:00+0000"`) | +| `due_date` | string | no | ISO 8601 (e.g. `"2026-03-30T17:00:00+0000"`) | +| `priority` | integer | no | `0` = none, `1` = low, `3` = medium, `5` = high | +| `is_all_day` | boolean | no | `true` for all-day, `false` for specific times | +| `items` | string | no | JSON array of subtasks (see below) | + +### Subtask format + +``` +[{"title": "Subtask 1", "status": 0}, {"title": "Subtask 2", "status": 0}] +``` + +Status: `0` = unchecked, `2` = checked. + +## ticktick_complete_task + +Mark a task as complete. + +| Name | Type | Required | Description | +|------|------|----------|-------------| +| `project_id` | string | yes | Project ID the task belongs to | +| `task_id` | string | yes | Task ID to complete | + +## ticktick_update_task + +Update an existing task (same fields as create, plus `task_id` and `project_id`). + +## ticktick_delete_task + +Delete a task (requires `project_id` and `task_id`). + +## Examples + +### List projects, then create a task + +```lua +-- Step 1: find the project +local projects = ticktick_list_projects({}) +local project_id = nil +for _, p in ipairs(projects) do + if p.name == "Work" then + project_id = p.id + break + end +end + +-- Step 2: create a high-priority task with a due date +ticktick_create_task({ + title = "Finish quarterly report", + project_id = project_id, + content = "Include revenue and churn metrics", + due_date = "2026-04-01T17:00:00+0000", + priority = 5, + is_all_day = false +}) +``` + +### Create a task with subtasks + +```lua +ticktick_create_task({ + title = "Launch checklist", + project_id = project_id, + priority = 3, + items = '[{"title": "Update changelog", "status": 0}, {"title": "Tag release", "status": 0}, {"title": "Notify team", "status": 0}]' +}) +``` + +### Complete a task + +```lua +-- Step 1: get tasks in the project +local tasks = ticktick_get_tasks({ project_id = "abc123" }) + +-- Step 2: complete the first one +ticktick_complete_task({ + project_id = "abc123", + task_id = tasks[1].id +}) +``` + +### Create a quick Inbox task + +```lua +ticktick_create_task({ + title = "Buy groceries", + priority = 1 +}) +``` diff --git a/docs/ecosystem/integrations/trustmrr/README.md b/docs/ecosystem/integrations/trustmrr/README.md new file mode 100644 index 0000000..0ddb2ab --- /dev/null +++ b/docs/ecosystem/integrations/trustmrr/README.md @@ -0,0 +1,52 @@ +# Integration: TrustMRR + +Verified startup revenue data for the [OpenCompany](https://github.com/OpenCompanyApp) integration ecosystem. Browse startups, filter by revenue, MRR, asking price, growth, and more — all backed by real payment provider data. + +## Available Tools + +| Slug | Type | Description | +|------|------|-------------| +| `trustmrr_list_startups` | read | Browse and filter startups by revenue, MRR, growth, category, and sale status. | +| `trustmrr_get_startup` | read | Get full details for a startup including tech stack, cofounders, and extended metrics. | + +## Installation + +```bash +composer require opencompanyapp/integration-trustmrr +``` + +The service provider is auto-discovered by Laravel. + +## Configuration + +Add your TrustMRR API key via the Integrations settings page, or configure it directly: + +| Key | Description | +|-----|-------------| +| `api_key` | TrustMRR API key (starts with `tmrr_`). Generate at [TrustMRR Developer Dashboard](https://trustmrr.com/dashboard/developer). | + +## Quick Start + +```php +use OpenCompany\Integrations\TrustMrr\TrustMrrService; + +$service = app(TrustMrrService::class); + +// List top startups by revenue +$startups = $service->listStartups(['sort' => 'revenue-desc', 'limit' => 10]); + +// Get details for a specific startup +$startup = $service->getStartup('shipfast'); +``` + +## Dependencies + +| Package | Version | +|---------|---------| +| PHP | ^8.2 | +| opencompanyapp/integration-core | ^2.0 | +| laravel/ai | ^0.1 | + +## License + +MIT diff --git a/docs/ecosystem/integrations/worldbank/README.md b/docs/ecosystem/integrations/worldbank/README.md new file mode 100644 index 0000000..697b6ef --- /dev/null +++ b/docs/ecosystem/integrations/worldbank/README.md @@ -0,0 +1,28 @@ +# World Bank Integration + +World Bank economic indicators integration for the OpenCompany integration ecosystem. Provides access to 29,000+ economic indicators for 200+ countries via the World Bank Open Data API. + +No API key required. + +## Tools + +- **worldbank_search** — Discover countries, indicators, and topic categories +- **worldbank_data** — Fetch indicator data for countries with date ranges and comparisons + +## Common Indicators + +| Code | Description | +|------|-------------| +| NY.GDP.MKTP.CD | GDP (current US$) | +| NY.GDP.MKTP.KD.ZG | GDP growth (annual %) | +| NY.GDP.PCAP.CD | GDP per capita (current US$) | +| FP.CPI.TOTL.ZG | Inflation, consumer prices (annual %) | +| SL.UEM.TOTL.ZS | Unemployment (% of labor force) | +| SP.POP.TOTL | Population, total | +| SP.DYN.LE00.IN | Life expectancy at birth (years) | +| SI.POV.GINI | Gini index | +| EN.ATM.CO2E.PC | CO2 emissions (metric tons per capita) | + +## License + +MIT diff --git a/docs/ecosystem/iris/ecosystem-overview.md b/docs/ecosystem/iris/ecosystem-overview.md new file mode 100644 index 0000000..35607e6 --- /dev/null +++ b/docs/ecosystem/iris/ecosystem-overview.md @@ -0,0 +1,250 @@ +# Iris Ecosystem Overview + +Audit date: 2026-04-03 + +This document summarizes how Iris appears to fit into the broader +OpenCompany/KosmoKrator/token-commerce plan based on the current local docs and +code in: + +- `/Users/rutger/Projects/kosmokrator` +- `/Users/rutger/Sites/opencompany` + +Important: some of the strongest statements come from planning and confidential +architecture docs, not from already-shipped code. So this should be read as +"intended product/service direction" plus "current implementation reality." + +## Executive summary + +The intended architecture is not "Iris as just a proxy." + +The intended architecture is: + +- `prism-relay` = shared provider/model normalization library +- `Iris` = deployed inference proxy and settlement layer +- `OpenCompany` = team collaboration product +- `KosmoKrator` = open-source agent client +- `tokens.opencompany.app` = future token marketplace / token commerce product + +The business plan is to keep team SaaS billing and AI compute billing separate, +while making them feel connected in product UX. + +## The intended product split + +The clearest strategy document is +`/Users/rutger/Projects/kosmokrator/docs/confidential/business-and-token-architecture.md`. + +Its model is: + +- **OpenCompany** = team AI platform +- **Token Commerce** = token packs, drops, exchange +- **KosmoKrator** = open-source agent +- **Relay/Iris** = inference proxy, provider routing, token accounting + +That document explicitly describes: + +- `opencompany.app` for team management and seat-based billing +- `tokens.opencompany.app` for packs, drops, exchange, and token billing +- `relay.opencompany.app` as the inference proxy service + +In other words, Iris is meant to be infrastructure with product consequences: +it is where usage becomes billable. + +## What Iris is supposed to be + +Based on the same architecture doc, the intended responsibilities of Iris are: + +- token auth +- token balance accounting +- provider routing +- provider failover +- rate limiting +- model pricing / settlement +- OpenAI-compatible inference endpoint + +That makes Iris the service boundary between: + +- user-facing apps +- third-party API consumers +- upstream model providers + +The same doc treats Iris as the mission-critical piece because it is the one +service that can both execute inference and settle cost. + +## Why Iris exists separately from `prism-relay` + +The intended separation is: + +- `prism-relay` is the library +- `Iris` is the deployed service + +`prism-relay` owns: + +- provider/model registry +- aliases and normalization +- provider capabilities +- pricing metadata +- request/response normalization +- provider-specific runtime adapters where available + +Iris owns: + +- HTTP API surface +- auth +- token ledger +- settlement +- routing policy +- failover behavior +- rate limits +- balance endpoints + +That separation is consistent with the current codebase direction. + +## How KosmoKrator fits in + +KosmoKrator is currently both a consumer of `prism-relay` and part of the +strategic funnel into the rest of the ecosystem. + +Observed from local code/docs: + +- KosmoKrator already boots `RelayRegistryBuilder`, `RelayRegistry`, `Relay`, + and `RelayManager`. +- It uses relay for request normalization, error normalization, response + normalization, and prompt caching. +- Its README positions it as a multi-provider agent client with local and + configurable provider access. + +Strategically, the confidential business doc frames KosmoKrator as: + +- an MIT/open-source agent product +- something that can connect directly to the relay +- a separate buyer/user path from OpenCompany + +So KosmoKrator is both: + +- a real relay consumer +- a distribution wedge that can create demand for hosted compute tokens later + +## How OpenCompany fits in + +OpenCompany is the team product, not the token product. + +Current local docs show: + +- OpenCompany is the self-hosted collaboration platform for teams +- its AI stack uses Laravel AI SDK plus Prism +- it already tracks token usage and provider/model analytics internally +- it treats OpenRouter as an important provider option + +Strategically, the token/business architecture doc says OpenCompany should not +own token billing directly. Instead, OpenCompany should: + +- own platform subscription billing +- surface token balances and usage +- link out to token management +- send inference traffic through the relay service + +So OpenCompany is supposed to consume Iris, not absorb it. + +## Planned token marketplace + +The planned token-commerce product is explicitly described as separate from +OpenCompany. + +Its planned responsibilities are: + +- token pack browsing and purchase +- pack billing +- weekly token drops +- exchange listings / buy-sell order flow +- transaction history +- webhook handling for payments + +The reason for separating it is economic and product clarity: + +- OpenCompany sells seats to teams +- token commerce sells compute to individuals, teams, and third-party apps + +This is why the strategy docs keep repeating that tokens should not be buried +inside OpenCompany billing. + +## What this means for OpenRouter and other providers + +Within this architecture, a provider like OpenRouter is useful because it gives +the ecosystem broad model reach behind one provider integration, while Iris and +`prism-relay` preserve control of: + +- model catalog normalization +- internal accounting units +- provider capability rules +- API shape exposed to clients + +That means OpenCompany and KosmoKrator can treat OpenRouter as one normalized +provider among many, while Iris remains the thing that decides billing and +settlement. + +## What exists today versus what is still planned + +### Exists today + +- `prism-relay` as a real shared package +- Iris as the beginning of the standalone proxy/settlement service +- KosmoKrator using relay heavily in runtime +- OpenCompany using Prism and provider abstractions +- internal docs that clearly define the service split + +### Planned or partial + +- a full standalone token marketplace app +- production-grade token pack, drop, and exchange flows +- the complete "relay.opencompany.app" operational surface described in strategy +- perfect settlement controls and all policy enforcement inside Iris +- complete runtime parity for every provider in the generated relay registry + +## Current reality for Iris + +Iris is aligned with the intended direction, but it is not fully at the target +described in the architecture docs yet. + +From the current Iris codebase, the remaining notable gaps are: + +- exact preauthorization and reservation, not just post-settlement +- real rate-limit enforcement +- relay-normalized error mapping across the HTTP surface +- active routing failover policy +- product features around packs/drops/exchange are still absent from Iris itself + +See also: + +- `docs/missing-in-iris.md` + +## Practical interpretation + +If the ecosystem plan holds, Iris should become the canonical hosted compute +gateway for: + +- OpenCompany hosted usage +- KosmoKrator hosted mode +- third-party consumers using OpenCompany-issued tokens +- future token-commerce settlement + +That implies Iris should optimize for: + +- strict correctness in accounting +- provider abstraction stability +- simple client API compatibility +- auditable ledger behavior +- high-quality failure handling +- separation from UI/product concerns + +## Recommended framing for Iris + +The cleanest positioning is: + +- `prism-relay` normalizes providers +- `Iris` monetizes and governs inference +- `tokens` sells compute access +- `OpenCompany` and `KosmoKrator` consume that compute in different product + contexts + +If that remains the plan, then Iris is not a side utility. It is shared +infrastructure at the center of the token economy. diff --git a/docs/ecosystem/iris/missing-in-iris.md b/docs/ecosystem/iris/missing-in-iris.md new file mode 100644 index 0000000..3f8400b --- /dev/null +++ b/docs/ecosystem/iris/missing-in-iris.md @@ -0,0 +1,150 @@ +# What Is Still Missing In Iris + +Audit date: 2026-04-03 + +This note captures the remaining gaps in Iris after the Prism + `prism-relay` +integration and the bundled-only relay registry mode. + +## Highest-priority gaps + +### 1. Exact preauthorization is still missing + +Settlement is exact after the response finishes, but preflight balance checks are +still conservative only. + +- Iris only rejects when balance is non-positive or when requested max output + cost exceeds balance. +- Iris does not estimate prompt-side token cost before dispatch. +- Iris does not reserve funds before sending the upstream request. +- Concurrent requests can still race and overspend the same balance. + +Relevant code: + +- `src/Http/Controller/ProxyController.php` +- `src/Accounting/PricingEngine.php` +- `src/Accounting/TokenLedger.php` + +### 2. Rate limiting is configured but not enforced + +`rate_limit` exists in config and is stored in the ledger, but the request path +does not currently apply any requests-per-window check before allowing a call. + +Relevant code: + +- `config/iris.yaml` +- `src/Kernel.php` +- `src/Auth/TokenAuthenticator.php` +- `src/Accounting/TokenLedger.php` + +### 3. Relay error normalization is not fully wired into HTTP responses + +Iris still returns generic normalization and provider failure responses instead +of routing exceptions through relay's structured error categorization. + +What is missing: + +- mapping provider exceptions through `OpenCompany\PrismRelay\Normalizers\ErrorNormalizer` +- returning stable error codes and HTTP statuses by normalized category +- forwarding retry hints such as `Retry-After` +- shaping streaming failure output consistently with the normalized error model + +Relevant code: + +- `src/Http/Controller/ProxyController.php` +- `vendor/opencompanyapp/prism-relay/src/Normalizers/ErrorNormalizer.php` + +### 4. Routing policy config is still mostly dead + +The config declares `default_provider` and `failover`, but the router currently +uses direct model-to-provider lookup from configured models and does not execute +failover chains on provider errors. + +Relevant code: + +- `config/iris.yaml` +- `src/Provider/ProviderRouter.php` + +### 5. Full provider runtime parity is not there yet + +Iris uses relay metadata and runtime adapters where they exist, but not every +provider in the relay catalog is executable through Prism yet. + +As of the current relay package, these providers are still metadata-only: + +- `cerebras` +- `cloudflare-ai-gateway` +- `codex` +- `cohere` +- `custom` +- `deepinfra` +- `gitlab` +- `google-vertex-anthropic` +- `sap-ai-core` +- `togetherai` +- `v0` +- `venice` + +Relevant docs: + +- `vendor/opencompanyapp/prism-relay/TODO.md` + +## Secondary gaps + +### 6. Authentication is still minimal + +A bearer token is accepted if it exists in static config or in the local ledger. +There is still no stronger production token model around issuer verification, +expiry, scopes, revocation, or hashed token storage. + +Relevant code: + +- `src/Auth/TokenAuthenticator.php` + +### 7. Test coverage is still too thin for production confidence + +Current tests cover mapping and some request-building behavior, but not the main +operational risks. + +Missing test areas: + +- auth failures +- rate limiting +- exact billing and overspend prevention +- streaming settlement +- normalized error responses +- failover behavior +- end-to-end provider request handling + +Relevant paths: + +- `tests/` + +### 8. README and config still overstate the implemented product surface + +The current docs still describe parts of the product that do not actually exist +in the Iris service today. + +Examples: + +- balance checking is described as middleware behavior +- pack, drop, and exchange concepts are documented, but not implemented in Iris +- routing config implies failover support that is not active + +Relevant files: + +- `README.md` +- `config/iris.yaml` + +## Practical interpretation + +If the goal is "usable internal proxy with exact post-settlement and shared +relay metadata," Iris is already close. + +If the goal is "watertight production settlement and fully normalized proxy," +the blocking work is: + +1. exact preauthorization and reservation flow +2. real rate-limit enforcement +3. relay-normalized error handling +4. actual routing failover logic +5. closing or clearly scoping the remaining relay runtime-provider gaps diff --git a/docs/ecosystem/kosmokrator/README.md b/docs/ecosystem/kosmokrator/README.md new file mode 100644 index 0000000..686e400 --- /dev/null +++ b/docs/ecosystem/kosmokrator/README.md @@ -0,0 +1,49 @@ +# KosmoKrator Documentation + +## Architecture (Current-Truth) + +These docs describe shipped behavior. They must be updated when the codebase changes. + +| Document | Description | +|----------|-------------| +| [overview.md](architecture/overview.md) | Architecture overview: runtime, UI, tools, context pipeline, subagents, config | +| [permission-modes.md](architecture/permission-modes.md) | Agent modes (Edit/Plan/Ask), permission modes (Guardian/Argus/Prometheus), evaluation order | +| [subagent-architecture.md](architecture/subagent-architecture.md) | Subagent types, tool scoping, orchestration, dependency resolution, concurrency | + +## Proposals + +Forward-looking design docs. Not shipped — may reference classes or features that don't exist yet. + +| Document | Description | +|----------|-------------| +| [streaming.md](proposals/streaming.md) | SSE streaming for LLM responses | +| [context-management-redesign.md](proposals/context-management-redesign.md) | 17 proposed context pipeline improvements | +| [context-management-strategies.md](proposals/context-management-strategies.md) | Semantic scoring, dedup tiers, progressive summarization | +| [context-compaction.md](proposals/context-compaction.md) | Historical plan for the first compaction implementation | +| [ecosystem-architecture.md](proposals/ecosystem-architecture.md) | Lua code mode, MCP integration, OpenCompany tool ecosystem | +| [integration-refactor-plan.md](proposals/integration-refactor-plan.md) | Refactoring tool packages to framework-agnostic contracts | +| [desktop-app.md](proposals/desktop-app.md) | NativePHP + Electron desktop surface proposal | +| [tui-ux-improvements.md](proposals/tui-ux-improvements.md) | 10 ranked UX improvements with mockups | +| [command-inspiration.md](proposals/command-inspiration.md) | Slash/power command ideas from competitive analysis | +| [laravel-ai-patterns.md](proposals/laravel-ai-patterns.md) | Patterns from Laravel AI SDK worth borrowing | + +## Audits (Historical) + +Write-once audit reports. Findings reference file:line numbers that may have shifted. + +| Document | Date | Scope | +|----------|------|-------| +| [deep-audit-2026-04-02.md](audits/deep-audit-2026-04-02.md) | 2026-04-02 | Full codebase (8 domains, 162 files) | +| [self-audit-2026-03-30.md](audits/self-audit-2026-03-30.md) | 2026-03-30 | Initial self-audit (68 files) | +| [memory-leak-audit.md](audits/memory-leak-audit.md) | 2026-04-01 | Memory leak analysis (131 files) | +| [ram-audit/RAM-EFFICIENCY-AUDIT.md](audits/ram-audit/RAM-EFFICIENCY-AUDIT.md) | 2026-04-03 | RAM efficiency synthesis (10 agents) | +| [ram-audit/synthesis-architecture.md](audits/ram-audit/synthesis-architecture.md) | 2026-04-03 | Architecture RAM analysis | +| [ram-audit/synthesis-core-agent.md](audits/ram-audit/synthesis-core-agent.md) | 2026-04-03 | Core agent memory hotspots | +| [ram-audit/synthesis-io-performance.md](audits/ram-audit/synthesis-io-performance.md) | 2026-04-03 | I/O performance and buffering | +| [ram-audit/synthesis-security.md](audits/ram-audit/synthesis-security.md) | 2026-04-03 | Security-adjacent RAM concerns | + +## Confidential (Not in Git) + +Internal strategy and competitor analysis. Excluded from version control via `.gitignore`. + +See `docs/confidential/` — business strategy, token architecture, Claude Code analysis, OpenCode analysis, Reven specs. diff --git a/docs/ecosystem/kosmokrator/architecture/overview.md b/docs/ecosystem/kosmokrator/architecture/overview.md new file mode 100644 index 0000000..7a7da65 --- /dev/null +++ b/docs/ecosystem/kosmokrator/architecture/overview.md @@ -0,0 +1,156 @@ +# KosmoKrator Overview + +KosmoKrator is a terminal coding agent built in PHP. The shipped product today is a CLI application with a dual renderer, a tool-driven agent loop, session persistence, context management, slash commands, power commands, a skill system, and a subagent system. + +This document is the current-state architecture summary. Proposal and roadmap material lives in `docs/proposals/` and is explicitly labeled there. + +## Current Implementation + +### Runtime + +The runtime entry path is: + +```text +bin/kosmokrator + → Kernel + → AgentCommand + → AgentSessionBuilder + → AgentLoop +``` + +Key responsibilities: + +- `Kernel` boots the Illuminate container, YAML config, logging, Prism provider wiring, SQLite persistence, tools, and commands. +- `AgentSessionBuilder` assembles UI, LLM client, permission evaluator, tool registry, session manager, context management helpers, and subagent infrastructure for an interactive session. +- `AgentLoop` runs the prompt → LLM → tools → LLM loop and handles persistence, mode filtering, context health, and status reporting. + +### UI + +KosmoKrator ships with two renderers behind `RendererInterface`: + +- `TuiRenderer` for the interactive Symfony TUI experience +- `AnsiRenderer` for ANSI/readline fallback +- `NullRenderer` for headless subagent loops (auto-approves permissions) + +The shared UI layer also includes diff rendering, theming, terminal notifications, subagent tree formatting, and modal/dialog helpers for settings, approvals, and dashboards. + +### Tools and Modes + +Built-in tool families: + +- Coding tools: `file_read`, `file_write`, `file_edit`, `apply_patch`, `glob`, `grep`, `bash` +- Shell session tools: `shell_start`, `shell_write`, `shell_read`, `shell_kill` +- Coordination tools: `subagent`, `task_create`, `task_update`, `task_get`, `task_list` +- Interactive tools: `ask_user`, `ask_choice` +- Memory tools: `memory_save`, `memory_search` + +Interactive agent modes: + +- `Edit`: full tool access +- `Plan`: read/search/bash/subagent/task/ask tools, but no file mutation tools +- `Ask`: read/search/bash/task/ask tools, but no file mutation tools and no subagents + +Permission modes are separate from agent modes: + +- `Guardian`: auto-approve safe reads and safe bash, ask for riskier calls +- `Argus`: ask for approval on governed tool calls +- `Prometheus`: auto-approve governed calls except absolute denies + +Blocked paths and blocked command patterns are always enforced. + +### Persistence and State + +KosmoKrator persists state in SQLite under `~/.kosmokrator/data`: + +- Sessions and message history +- Global and project-scoped settings +- Memories and compaction summaries +- Token accounting metadata used for status and resume flows + +User-visible session flows include `/sessions`, `/resume`, `/new`, `/compact`, `/memories`, and `/forget`. + +### Context Management + +The current context pipeline is layered: + +- output truncation for oversized tool results +- deduplication of superseded tool results +- pruning of older low-value tool outputs +- LLM-based compaction with optional memory extraction +- oldest-turn trimming as an overflow fallback + +This is implemented today. Future context experiments live in `docs/proposals/context-management-strategies.md` and are not part of the shipped behavior unless stated otherwise. + +### Subagents + +KosmoKrator ships with a working subagent system: + +- agent types: `general`, `explore`, `plan` +- dependency chains with `depends_on` +- sequential groups with `group` +- `await` and `background` execution modes +- retry handling for retryable failures +- concurrency limiting +- live tree/dashboard rendering via `/agents` + +See `AGENTS.md` and `docs/architecture/subagent-architecture.md` for implementation details. + +### Key Directories + +| Directory | Purpose | +|-----------|---------| +| `src/Agent/` | Agent core: AgentLoop, ToolExecutor, ContextManager, StuckDetector, subagent system, events | +| `src/LLM/` | LLM clients: AsyncLlmClient, PrismService, RetryableLlmClient, model catalog, pricing | +| `src/UI/` | Rendering: TuiRenderer, AnsiRenderer, NullRenderer, diff rendering, theming | +| `src/Tool/` | Tool implementations and permission system | +| `src/Command/` | AgentCommand, SetupCommand, AuthCommand, slash commands, power commands | +| `src/Command/Slash/` | 20 interactive slash commands (`/edit`, `/compact`, `/settings`, etc.) | +| `src/Command/Power/` | 20 power commands (`:autopilot`, `:review`, `:team`, `:unleash`, etc.) | +| `src/Session/` | SQLite persistence: sessions, messages, memories, settings | +| `src/Task/` | Task tracking with tree structure and dependency enforcement | +| `src/Skill/` | Skill system: YAML-based custom prompts with `$skillname` dispatch | +| `src/Settings/` | Layered settings resolution (project → global → default) | +| `src/Provider/` | Service providers for DI container wiring (9 providers) | +| `src/Update/` | Self-updater with GitHub release checking | +| `src/Audio/` | Completion sounds (LLM-composed MIDI per session) | + +## What Is Not Implemented + +These are still proposal or future-work areas, not shipped runtime features: + +- Lua code mode +- MCP client support +- external integration loader / hosted integrations +- desktop app surface +- provider failover across multiple backends in the main runtime + +Documents that discuss these topics are design docs in `docs/proposals/`, not current feature docs. + +## Configuration + +Config is loaded in layers, with later layers overriding earlier ones: + +1. bundled defaults in `config/*.yaml` +2. user config in `~/.kosmokrator/config.yaml` +3. project config in `.kosmokrator.yaml` + +Important config areas: + +- `config/prism.yaml` for provider endpoints and API keys +- `config/models.yaml` for model metadata such as context windows and pricing +- `config/kosmokrator.yaml` for agent behavior, permission defaults, UI settings, and context thresholds + +Environment variables in YAML are expanded using `${VAR_NAME}`. + +## Documentation Map + +See [docs/README.md](../README.md) for the full documentation index. + +Current-truth docs: + +- `README.md`: installation, usage, and high-level architecture +- `AGENTS.md`: subagent architecture and orchestration model +- `docs/architecture/permission-modes.md`: agent-mode and permission-mode behavior +- `docs/architecture/subagent-architecture.md`: current subagent behavior and configuration + +Proposal and reference material lives in `docs/proposals/`. Historical audits live in `docs/audits/`. diff --git a/docs/ecosystem/kosmokrator/architecture/permission-modes.md b/docs/ecosystem/kosmokrator/architecture/permission-modes.md new file mode 100644 index 0000000..7409637 --- /dev/null +++ b/docs/ecosystem/kosmokrator/architecture/permission-modes.md @@ -0,0 +1,125 @@ +# Permission Modes & Agent Modes + +KosmoKrator has two orthogonal control axes: + +- **Agent mode** decides which tools are available +- **Permission mode** decides how governed tool calls are approved + +## Agent Modes + +| Mode | Available tool families | Purpose | +|------|-------------------------|---------| +| **Edit** | read, write, edit, search, bash, subagent, task, ask-user tools | Full coding access | +| **Plan** | read, search, bash, subagent, task, ask-user tools | Research and planning without file edits | +| **Ask** | read, search, bash, task, ask-user tools | Q&A without file edits or subagents | + +Important behavior: + +- `file_write` and `file_edit` are unavailable outside `Edit` +- `subagent` is unavailable in `Ask` +- `bash` is available in all three interactive modes +- `Ask` adds an extra read-only guard: mutative bash commands are blocked even if permission mode is permissive + +## Permission Modes + +| Mode | Symbol | Behavior | +|------|--------|----------| +| **Guardian** | ◈ | Auto-approve known-safe calls, ask for riskier governed calls | +| **Argus** | ◉ | Ask for every governed call | +| **Prometheus** | ⚡ | Auto-approve governed calls unless an absolute deny rule matches | + +Governed calls come from the configured approval rules. By default that includes `file_write`, `file_edit`, and `bash`. + +## How They Compose + +| Agent mode | Permission behavior | +|-----------|---------------------| +| **Edit** | Full permission system applies to writes and bash | +| **Plan** | No file mutation tools exist, but bash still goes through permission evaluation | +| **Ask** | No file mutation tools exist; bash still goes through permission evaluation, and mutative bash is denied by the mode guard | + +## Guardian Heuristics + +Guardian uses static checks only. Current auto-approve rules are: + +| Tool | Auto-approve behavior | +|------|------------------------| +| `file_read`, `glob`, `grep` | always auto-approved | +| `task_*` | always auto-approved | +| `file_write`, `file_edit` | auto-approved only when the resolved path is inside the project root | +| `bash` | auto-approved only when the command matches the safe-command whitelist and contains no shell operators | + +Blocked paths and blocked command patterns always win, regardless of permission mode. + +### Safe bash patterns + +Configured in `config/kosmokrator.yaml` under `tools.guardian_safe_commands`. + +Representative defaults: + +```text +git * +ls * +pwd +cat * +head * +tail * +wc * +find * +which * +echo * +php vendor/bin/phpunit* +php vendor/bin/pint* +composer * +npm * +node * +python * +cargo * +go * +make * +``` + +Commands containing shell operators such as `;`, `&&`, `|`, redirection, command substitution, or embedded newlines are not treated as safe. + +## Evaluation Order + +The permission evaluator applies rules in this order: + +1. blocked paths +2. blocked command patterns +3. session grants for the tool name +4. rule evaluation for `ask` or `deny` +5. permission-mode override (`Guardian`, `Argus`, `Prometheus`) + +Implications: + +- session grants can bypass future `ask` results for the same tool +- session grants do not bypass absolute deny rules +- `Prometheus` only upgrades `ask` to `allow`; it does not override denies + +## Approval Flow + +When approval is required, the UI can: + +- allow just this call +- allow this tool for the rest of the session +- escalate to `Guardian` +- escalate to `Prometheus` +- deny the call + +Changing to `Guardian` or `Prometheus` applies to the current session immediately and approves the current prompt flow. + +## Related Commands + +```text +/edit /plan /ask +/guardian /argus /prometheus +``` + +## Implementation References + +- `src/Agent/AgentMode.php` +- `src/Tool/Permission/PermissionMode.php` +- `src/Tool/Permission/PermissionEvaluator.php` +- `src/Tool/Permission/GuardianEvaluator.php` +- `config/kosmokrator.yaml` diff --git a/docs/ecosystem/kosmokrator/architecture/subagent-architecture.md b/docs/ecosystem/kosmokrator/architecture/subagent-architecture.md new file mode 100644 index 0000000..0d1e611 --- /dev/null +++ b/docs/ecosystem/kosmokrator/architecture/subagent-architecture.md @@ -0,0 +1,98 @@ +# Subagent Architecture + +This document describes the current shipped subagent system. + +KosmoKrator can spawn child agents for parallel research, planning, and delegated work. Each child runs its own agent loop with a narrowed tool set and reports results back through a shared orchestrator. + +## Agent Types + +| Type | Read | Write | Can spawn | +|------|------|-------|-----------| +| `general` | yes | yes | `general`, `explore`, `plan` | +| `explore` | yes | no | `explore` | +| `plan` | yes | no | `explore` | + +Type narrowing is strict. Children can only keep or reduce capabilities relative to their parent. + +## Interactive Agent Modes vs Subagent Types + +Do not confuse: + +- **interactive agent modes**: `Edit`, `Plan`, `Ask` +- **subagent types**: `general`, `explore`, `plan` + +Interactive modes shape the parent session tool set. Subagent types shape delegated child sessions. + +## Tool Scoping + +Current subagent tool sets: + +- `general`: `file_read`, `file_write`, `file_edit`, `glob`, `grep`, `bash`, `subagent` +- `explore`: `file_read`, `glob`, `grep`, `bash`, `subagent` +- `plan`: `file_read`, `glob`, `grep`, `bash`, `subagent` + +The `subagent` tool is removed automatically once the max depth is reached. + +## Execution Modes + +The `subagent` tool supports two execution modes: + +| Mode | Behavior | +|------|----------| +| `await` | parent waits for the child result and gets it inline as a tool result | +| `background` | parent continues immediately and receives the child result on a later turn | + +Background results are collected per parent agent ID so sibling trees do not drain each other's results. + +## Orchestration Features + +The current orchestrator supports: + +- explicit agent IDs +- dependency chains with `depends_on` +- sequential execution groups with `group` +- global concurrency limiting +- retry handling for retryable failures +- cancellation of background agents +- per-agent stats for status, elapsed time, tokens, tool calls, depth, and retries + +Dependency behavior: + +- a dependent child waits for all listed dependencies +- successful dependency results are injected into the child task +- failed dependencies are injected as marked degraded results instead of aborting the dependent child +- circular dependencies are rejected before execution + +## Depth and Concurrency + +Default runtime settings: + +```yaml +agent: + subagent_max_depth: 3 + subagent_concurrency: 10 + subagent_max_retries: 2 +``` + +Meaning: + +- root session depth is `0` +- children increment depth by `1` +- the default tree allows root → child → grandchild +- concurrency `0` disables the global semaphore and allows unlimited parallel children + +## UI and Monitoring + +KosmoKrator exposes subagent state through: + +- inline spawn/running/batch displays in both renderers +- a live tree in TUI mode +- the `/agents` dashboard for aggregated progress, retries, token usage, and failures + +## Implementation References + +- `AGENTS.md` +- `src/Tool/Coding/SubagentTool.php` +- `src/Agent/SubagentOrchestrator.php` +- `src/Agent/SubagentFactory.php` +- `src/Agent/AgentContext.php` diff --git a/docs/ecosystem/kosmokrator/audits/deep-audit-2026-04-02.md b/docs/ecosystem/kosmokrator/audits/deep-audit-2026-04-02.md new file mode 100644 index 0000000..024029f --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/deep-audit-2026-04-02.md @@ -0,0 +1,287 @@ +# KosmoKrator Deep Audit + +> **Date:** 2026-04-02 +> **Scope:** Full codebase — 162 PHP source files (25,130 lines), 81 test files (12,278 lines) +> **Method:** 8 parallel audit domains via ~30 subagents, each finding verified against code with exact file:line references + +## Audit Domains + +| Domain | Focus | +|--------|-------| +| Security | Command injection, path traversal, input validation, secret exposure | +| Error Handling | Exception swallowing, missing finally blocks, recovery paths, infinite loops | +| Concurrency | Race conditions, semaphore leaks, fiber safety, cancellation propagation | +| API Boundaries | LLM response parsing, tool parameter validation, response size limits | +| Resource Management | File handle/process/DB leaks, temp file cleanup, unbounded buffering | +| Session Persistence | SQL injection, schema constraints, concurrent writes, file permissions | +| Logic Bugs | State machine violations, edge cases in patch/edit tools, off-by-one errors | +| Test Coverage | Untested classes, assertion depth, mock quality, isolation | + +--- + +## Critical Findings (5) + +### C1. BashTool EventLoop timer leak + +**Location:** `src/Tool/Coding/BashTool.php:68-113` + +The timeout timer created via `EventLoop::delay()` is only cancelled on the success path (line 99). If `$process->join()` or `$stdoutFuture->await()` throws, the catch block returns without calling `EventLoop::cancel($timerId)`. The timer callback holds a reference to the `Process` object, preventing GC. + +```php +// Current: timer leaked on exception +} catch (\Throwable $e) { + return ToolResult::error("Process error: {$e->getMessage()}"); +} + +// Fix: cancel timer in catch +} catch (\Throwable $e) { + EventLoop::cancel($timerId); + return ToolResult::error("Process error: {$e->getMessage()}"); +} +``` + +### C2. Semaphore self-deadlock with nested agents + +**Location:** `src/Agent/SubagentOrchestrator.php:165-201` + +When parent agents hold semaphore slots and their child agents (spawned inside the semaphore-held zone) also need slots, all slots can be consumed by waiting parents. Children never acquire a slot, parents never finish — deadlock. + +Trigger: `concurrency` set low (e.g., 2) with agents at depth > 1. The dependency wait happens *before* semaphore acquisition, but the factory execution runs *inside* the held semaphore zone, and nested `SubagentTool` calls re-enter `spawnAgent()` which tries to acquire the global semaphore again. + +### C3. ShellSession unbounded buffer + +**Location:** `src/Tool/Coding/ShellSession.php:41,54-55` + +The `$buffer` string grows unboundedly as chunks are appended via `.= ` in `appendOutput()`. The `readUnread()` method updates `$readOffset` but **never truncates `$buffer`**. Long-running sessions (e.g., `tail -f`, build logs) accumulate memory indefinitely. + +```php +// Fix: discard consumed portion in readUnread() +public function readUnread(): string +{ + $chunk = substr($this->buffer, $this->readOffset); + $this->buffer = substr($this->buffer, $this->readOffset); + $this->readOffset = 0; + $this->touch(); + return $chunk; +} +``` + +### C4. Task::transitionTo() ignores state machine + +**Location:** `src/Task/Task.php:57` + +`TaskStatus::canTransitionTo()` defines valid transitions (pending→in_progress, in_progress→completed/cancelled/failed), but `transitionTo()` never calls it. Any-to-any state transitions are silently allowed. `TaskUpdateTool` also omits `failed` from its valid status list. + +### C5. file_read is ALWAYS_SAFE in Guardian mode + +**Location:** `src/Tool/Permission/GuardianEvaluator.php:23-30` + +`file_read` is listed in `ALWAYS_SAFE`, meaning reads of any file are auto-approved without path checks. An LLM can read `/etc/passwd`, `~/.ssh/id_rsa`, or any file on the system with zero restriction and no user prompt. + +--- + +## High Findings (8) + +### H1. Raw exception messages leak to LLM + +**Locations:** `src/Agent/ToolExecutor.php:307`, `src/Agent/AgentLoop.php:248,425` + +`$e->getMessage()` from any caught `Throwable` (including PDO exceptions, filesystem errors) is returned directly as tool result text, which is then sent back to the LLM. This can leak internal filesystem paths, database credentials (if present in DSN), PHP version details, and stack trace information. + +### H2. GuardianEvaluator mutative command check bypassed by full paths + +**Location:** `src/Tool/Permission/GuardianEvaluator.php:140` + +`MUTATIVE_PATTERNS` uses `str_starts_with($lower, $pattern)` to detect mutative commands. Full-path invocations like `/bin/rm -rf /` or `/usr/bin/git commit` bypass all pattern checks. Ask mode relies on this check to block mutative commands. + +### H3. Concurrent file edits silently lose data + +**Location:** `src/Tool/Coding/FileEditTool.php:135` + +No file locking is used. If parallel subagents edit the same file, both read the original, find their matches, create temp files, and `rename()`. The second rename overwrites the first, silently discarding the earlier edit. + +### H4. BashTool ignores Cancellation — zombie processes + +**Location:** `src/Tool/Coding/BashTool.php:52-113` + +`BashTool::execute()` takes no `Cancellation` parameter. If the user presses Ctrl+C while a bash tool is running in a subagent, the process won't be killed until it times out (up to 7200 seconds). Cancellation is caught at the LLM call level, but the spawned process continues as a zombie. + +### H5. No PRAGMA busy_timeout on SQLite ✅ Fixed + +**Location:** `src/Session/Database.php:30-32` + +WAL mode is enabled but no `busy_timeout` is set. If two KosmoKrator processes access the same DB simultaneously (e.g., two terminal sessions), one will get an immediate `SQLITE_BUSY` exception instead of retrying. + +**Fix:** Add `$this->pdo->exec('PRAGMA busy_timeout=5000');` after line 32. + +### H6. DB directory 0755 instead of 0700 ✅ Fixed + +**Location:** `src/Session/Database.php:19` + +The database directory `~/.kosmokrator/data` is created with `0755` (world-readable). The log directory in `Kernel.php:124` uses `0700`. The DB file itself inherits the process umask (typically `0644` — world-readable). + +### H7. PatchApplier blocked-path bypass via non-existent parents + +**Location:** `src/Tool/Coding/Patch/PathResolver.php:33-35` + +When a file doesn't exist yet (e.g., `add` operation), `PathResolver::resolve()` falls back to `realpath(dirname($path))`. If the parent directory itself doesn't exist, `realpath()` returns `false` → `resolve()` returns `null` → the resolved path is never checked against blocked paths. + +### H8. PermissionEvaluator blocked-path check doesn't work for apply_patch + +**Location:** `src/Tool/Permission/PermissionEvaluator.php:23` + +The blocked-path check inspects `$args['path']`, but `apply_patch` passes arguments as `patch` (containing embedded paths), not `path`. The `PatchApplier` has its own internal check, but the `PermissionEvaluator` layer is completely bypassed for patch operations — single point of failure. + +--- + +## Medium Findings (12) + +### M1. No response body size limit on LLM HTTP + +**Location:** `src/LLM/AsyncLlmClient.php:193` + +The Amp HTTP client's `buffer()` reads the entire response into memory. No `Content-Length` check or body size cap. A compromised LLM API could return an arbitrarily large response causing OOM. Transfer timeout (600s) provides partial mitigation. + +### M2. No secret redaction in ContextManager + +**Location:** `src/Agent/ContextManager.php:130-157` + +Memories, session recall, tool results, and parent briefs are injected into the system prompt verbatim. If any contain API keys, passwords, or other secrets (e.g., from `env` command output stored in session history), they are sent to the LLM API. + +### M3. ShellStartTool no timeout upper bound ✅ Fixed + +**Location:** `src/Tool/Coding/ShellStartTool.php:54` + +Unlike `BashTool` which clamps timeouts to `max(1, min($timeout, 7200))`, `ShellStartTool` passes the timeout directly. A user/LLM could specify `timeout: 999999` (~11.5 days). The idle TTL (300s) partially mitigates this for idle sessions. + +### M4. ToolExecutor missing finally for BashTool::$progressCallback ✅ Fixed + +**Location:** `src/Agent/ToolExecutor.php:155-165` + +`BashTool::$progressCallback` is set before execution and cleared after, but not in a `finally` block. If `executeSingleTool()` throws past its own catch (e.g., `ToolResult` constructor failure), the static callback leaks. + +### M5. StuckDetector only in runHeadless() + +**Location:** `src/Agent/AgentLoop.php` + +The `StuckDetector` is only wired in `runHeadless()`. Interactive `run()` has no stuck detection — by design, since the user controls execution via Ctrl+C. + +### M6. runHeadless() missing finally block + +**Location:** `src/Agent/AgentLoop.php:337-487` + +Unlike `run()` which has a `finally` block (line 325-328) that resets UI phase to Idle, `runHeadless()` has no guaranteed cleanup path. + +### M7. maybeCompleteParent marks Completed even when children failed + +**Location:** `src/Task/TaskStore.php:304` + +When all children reach terminal status, the parent is auto-completed as `Completed` regardless of whether children are `Failed` or `Cancelled`. A parent with all-failed children should probably be marked `Failed`. + +### M8. PatchParser inconsistent empty-line handling + +**Location:** `src/Tool/Coding/Patch/PatchParser.php:34` vs `:157` + +Empty lines between operations are silently skipped (line 34), but empty lines inside an update body throw an `InvalidArgumentException` (line 157). This inconsistency can confuse LLMs generating patches. + +### M9. Lost exception context in all error logging + +**Locations:** `AgentLoop.php:222,244,409`, `ToolExecutor.php:305`, `SubagentOrchestrator.php:203` + +All catch blocks use only `$e->getMessage()`, discarding exception class name, file, and line. Makes debugging production issues very difficult. Should log `$e::class`, `$e->getFile()`, `$e->getLine()` alongside. + +### M10. No transactions around multi-step DB operations + +**Location:** `src/Session/SessionManager.php:69-93` + +`saveMessage()` performs INSERT + UPDATE + potential SELECT + UPDATE without wrapping in a transaction. If the process crashes between the message insert and the session touch, data will be inconsistent. + +### M11. Temp file leak on exception in FileEditTool + +**Location:** `src/Tool/Coding/FileEditTool.php:149-170` + +If `stream_copy_to_stream()` or `fwrite()` throws inside `patchFile()`, the `finally` block closes file handles but does NOT delete the `.tmp.` file. The `@unlink($tmpPath)` at line 175 only runs when `rename()` returns false, not on exceptions. + +### M12. BashTool static $progressCallback race across subagents + +**Location:** `src/Tool/Coding/BashTool.php:17`, `src/Agent/ToolExecutor.php:155-165` + +`BashTool::$progressCallback` is a static property shared across all fibers. If a background subagent and its parent both execute bash tools, they overwrite each other's callback. + +--- + +## By Design + +- **Interactive run() has no round limit or StuckDetector** — the user controls execution and can Ctrl+C at any time. Headless mode has both guards since there's no human in the loop. + +--- + +## What's Healthy + +| Area | Assessment | +|------|------------| +| SQL injection | All queries use prepared statements with parameterized values | +| PHP object injection | Zero `unserialize()` calls in the codebase | +| JSON deserialization | Uses `json_decode($str, true)` with array type checks | +| Semaphore finally blocks | Orchestrator `finally` correctly releases both group and global semaphores | +| StuckDetector escalation | Well-designed 3-stage path: nudge → final notice → force return | +| Background agent cancellation | `cancelAll()` correctly cancels all background agents on shutdown | +| LLM HTTP cancellation | Cancellation token propagated to both request and body buffering | +| File handle management | `FileEditTool` streaming path uses proper try/finally with fclose | +| Process cleanup on exit | `AgentCommand` teardown calls `cancelAll()` then `killAll()` | +| WAL mode | Enabled for concurrent SQLite reads | +| Foreign keys | Enforced via `PRAGMA foreign_keys=ON` | +| Dependency cycle detection | DFS-based cycle detection before agent spawning | +| LIKE injection | Wildcards properly escaped in both `MessageRepository` and `MemoryRepository` | + +--- + +## Test Coverage Summary + +| Metric | Value | +|--------|-------| +| Test files | 79 | +| Test methods | ~662 | +| Classes with tests | ~65 of ~100 concrete classes | +| Core logic method coverage | ~85% | +| Skipped/incomplete tests | 0 | + +### Critical Untested Code + +| Priority | File | LOC | Risk | +|----------|------|-----|------| +| P0 | `src/Agent/ToolExecutor.php` | 456 | Core execution pipeline — permission checks, concurrent execution, error handling | +| P0 | `src/Agent/AgentSessionBuilder.php` | ~240 | Complex DI wiring — broken wiring goes undetected | +| P1 | `src/Agent/MemorySelector.php` | — | Scoring/ranking algorithm — bugs silently degrade agent intelligence | +| P1 | `src/Agent/ContextBudget.php` | — | Threshold math for auto-compact/blocking — trivially testable | +| P1 | `src/Settings/SettingsManager.php` | ~220 | Entire Settings/ namespace has zero tests | +| P2 | `src/Tool/Coding/Patch/PatchApplier.php` | — | Disk-modifying code with no tests | +| P2 | Shell tool classes (Start/Write/Read/Kill) | — | Process I/O tools, only ShellSessionManager tested | +| P2 | `src/LLM/PromptFrameBuilder.php` | — | Builds system prompt frames | + +### Services with Zero Tests + +1. `CodexOAuthService` — OAuth for Codex auth +2. `CodexAuthFlow` — Full auth flow orchestration +3. `Relay` — External PrismRelay registration +4. `PatchApplier` — Only tested indirectly via `ApplyPatchToolTest` +5. `SessionGrants` — Auto-wired singleton + +### DI Wiring + +No test verifies that the container correctly resolves all registered services. The only integration test (`Feature/AgentCommandTest.php`) boots the kernel and runs `/quit` — a smoke test, not a DI verification. + +--- + +## Recommended Fix Priority + +1. **C1** (timer leak) — One-line fix, zero risk +2. **C3** (unbounded buffer) — Three-line fix, zero risk +3. **C5** (file_read ALWAYS_SAFE) — Design decision needed: restrict to project dir or keep open? +4. **H5** (busy_timeout) — One-line fix, zero risk +5. **H6** (0755→0700) — One-line fix, zero risk +6. **H7** (PathResolver null) — Small fix in PathResolver +7. **C2** (semaphore deadlock) — Design decision: reserve slots for children? Pre-check depth? +8. **C4** (state machine) — Wire `canTransitionTo()` into `transitionTo()` +9. **H1** (exception message leak) — Sanitize paths and env details from error messages +10. **H2** (full-path bypass) — Expand mutative patterns or use `basename()` extraction diff --git a/docs/ecosystem/kosmokrator/audits/memory-leak-audit.md b/docs/ecosystem/kosmokrator/audits/memory-leak-audit.md new file mode 100644 index 0000000..322620e --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/memory-leak-audit.md @@ -0,0 +1,534 @@ +# Memory Leak Audit + +> Status: Historical audit. Counts, findings, and repository size reflect the audit date and may not match the current codebase. + +Comprehensive audit of the KosmoKrator codebase (131 PHP files, ~21k lines) for memory leaks, resource leaks, and unbounded growth patterns. Covers all subsystems: Agent loop, Subagent orchestrator, LLM/HTTP layer, Tools, TUI/ANSI rendering, Session persistence, and vendor dependencies. + +--- + +## Table of Contents + +- [Executive Summary](#executive-summary) +- [Object Reference Map & Cycles](#object-reference-map--cycles) +- [Critical Findings](#critical-findings) +- [High Findings](#high-findings) +- [Medium Findings](#medium-findings) +- [Low Findings](#low-findings) +- [Vendor Library Risks](#vendor-library-risks) +- [Async/Event-Loop Pattern Audit](#asyncevent-loop-pattern-audit) +- [Positive Findings (Clean)](#positive-findings-clean) +- [Recommended Fix Plan](#recommended-fix-plan) + +--- + +## Executive Summary + +10 subagents audited every file in `src/` plus key vendor libraries (`amphp/http-client`, `amphp/amp`, `amphp/process`, `prism-php/prism`, `revolt/event-loop`). Findings break down as follows: + +| Severity | Count | Summary | +|----------|-------|---------| +| CRITICAL | 8 | Unbounded growth, leaked timers, leaked HTTP connections | +| HIGH | 12 | Circular reference chains, missing destructors, unbounded buffers | +| MEDIUM | 14 | Accumulating caches, soft-deleted data, missing cleanup | +| LOW | 10 | Minor issues, theoretical risks, bounded growth | + +The three highest-impact areas are: + +1. **SubagentOrchestrator** — failed agents never pruned, `Future` objects with large closures accumulate +2. **HTTP connection pools** — each subagent creates a fresh `AsyncLlmClient` with an unbounded connection pool +3. **TUI timer lifecycle** — timers not cancelled on teardown, pinning the entire renderer object graph + +--- + +## Object Reference Map & Cycles + +### Cycle 1: The Agent Loop Cycle (GC-Resistant) + +``` +AgentLoop ◂──────────────────────────────────────────┐ + │ $agentContext │ + │ $allTools │ + ▼ │ +AgentContext ───$orchestrator──▸ SubagentOrchestrator │ + │ │ │ + │ (readonly, shared) │ $agents[] → Future + │ │ │ │ + ▼ │ ▼ (fiber closure captures) +SubagentTool ◂──── ToolRegistry ◂────┘ SubagentFactory + │ $parentContext │ $rootRegistry → ToolRegistry + │ $agentFactory ───────────────────────┘ + └──▸ Closure captures $subagentFactory + └──▸ creates child AgentLoop + (child.agentContext → SAME orchestrator) +``` + +**Pinned by:** Amp EventLoop holds Future refs. Not breakable by PHP GC until Futures complete and `pruneCompleted()` is called. + +**Objects in cycle:** 5+ (AgentLoop, AgentContext, SubagentTool, ToolRegistry, SubagentOrchestrator) +**Estimated pinned memory:** 5–15 KB base + unbounded Future/Stats arrays + +### Cycle 2: The TUI Display Cycle (Timer-Pinned) + +``` +TuiRenderer ◂──────────────────────────────────┐ + │ $subagentDisplay │ + │ $animationManager │ + ▼ │ +SubagentDisplayManager ──Closures($this)──▸ TuiRenderer + │ │ + │ $breathColorProvider → $animationManager │ + │ $renderCallback → $this→flushRender() │ + │ │ + └───◂── TuiAnimationManager ──────────────────┘ + $subagentTickCallback → $subagentDisplay + $renderCallback → $this→flushRender() + $refreshTaskBar → $this→refreshTaskBar() +``` + +**Pinned by:** 4+ `EventLoop::repeat()` timers (30fps breathing, 20fps subagent, 50fps tool-executing, compacting). Not breakable while timers are active. + +**Objects in cycle:** 3+ (TuiRenderer, SubagentDisplayManager, TuiAnimationManager, TuiModalManager) +**Estimated pinned memory:** 20–50 KB + widget tree + +### Cycle 3: TuiRenderer ↔ TuiModalManager + +``` +TuiRenderer → TuiModalManager +TuiModalManager.$renderCallback → Closure($this = TuiRenderer) +TuiModalManager.$forceRenderCallback → Closure($this = TuiRenderer) +``` + +**Pinned by:** Dashboard timer when active. Otherwise breakable by PHP GC. + +### Subagent Tree Spanning Cycle + +At depth 2 with 3 concurrent agents, every child references back to the root's `ToolRegistry`: + +``` +Child AgentLoop → child SubagentTool → $agentFactory Closure → $subagentFactory + → $subagentFactory.rootRegistry → ROOT ToolRegistry → ROOT SubagentTool + → ROOT SubagentTool.parentContext → ROOT AgentContext → SubagentOrchestrator + → SubagentOrchestrator.agents[childId] → Future → child fiber → child AgentLoop +``` + +For a full depth-2 tree (12 agents), estimated total pinned memory: **400 KB – 3 MB** (dominated by `ConversationHistory`). + +--- + +## Critical Findings + +### C1. Fresh HttpClient per Subagent — N Independent Connection Pools + +**Files:** `src/Agent/SubagentFactory.php:96–104`, `src/LLM/AsyncLlmClient.php:32` + +Each `createAndRunAgent()` creates a new `AsyncLlmClient` → new `HttpClientBuilder::buildDefault()` → new `UnlimitedConnectionPool` (limit: `PHP_INT_MAX`). With 3+ concurrent subagents at depth 2–3, this creates multiple unbounded connection pools, each holding open sockets and TLS state. Never explicitly closed. + +**Trigger:** Every subagent spawn. +**Fix:** Share a single `HttpClient` across all `AsyncLlmClient` instances. Create it once in `SubagentFactory` constructor and inject it. Also bound the pool: `ConnectionLimitingPool::byAuthority(8)`. + +--- + +### C2. ConversationHistory Unbounded in Headless Mode + +**File:** `src/Agent/ConversationHistory.php:15` + +In headless mode (subagents via `AgentLoop::runHeadless()`), no `ContextCompactor` is passed (`SubagentFactory.php:70`). The only backpressure is `trimOldest()` on overflow errors. Subagents processing many tool calls accumulate hundreds of messages with full tool output. + +**Trigger:** Subagents with many tool call rounds. +**Fix:** Pass a lightweight compactor or implement token-count-based trimming in headless mode. + +--- + +### C3. SubagentOrchestrator — Failed Agent Futures Never Pruned ✅ Fixed + +**File:** `src/Agent/SubagentOrchestrator.php:340` + +```php +$terminalStates = ['done' => true, 'cancelled' => true]; +``` + +`pruneCompleted()` only removes `'done'` and `'cancelled'`. Failed agents stay in `$this->agents` (holding `Future` objects with large closures) and `$this->stats` indefinitely. + +**Trigger:** Any subagent failure (API errors, context overflows). +**Fix:** Add `'failed'` to `$terminalStates`. One-line fix. + +--- + +### C4. TUI Teardown Doesn't Cancel Timers + +**File:** `src/UI/Tui/TuiRenderer.php:1143–1148` + +`teardown()` calls `$this->tui->stop()` but never cancels the breathing timer (`TuiAnimationManager::$thinkingTimerId` at 30fps), the compacting timer, the subagent elapsed timer (`SubagentDisplayManager::$elapsedTimerId` at 20fps), or the tool-executing timer. These `EventLoop::repeat()` timers capture `$this` via closure, pinning the entire TuiRenderer + widget tree in memory. + +**Trigger:** Process exit during thinking, tool execution, or while subagents are running. +**Fix:** Add `TuiAnimationManager::shutdown()` that cancels all timers. Call it + `$subagentDisplay->cleanup()` from `teardown()`. + +--- + +### C5. GrepTool Spawns `which rg` TWICE per Call, No Caching ✅ Fixed + +**File:** `src/Tool/Coding/GrepTool.php:47,52,88–93` + +`hasRipgrep()` spawns a new `Process` each call and is invoked twice per `execute()`. In heavy grep sessions, this triples process overhead. + +**Trigger:** Every grep invocation. +**Fix:** Cache: `private ?bool $hasRg = null;` and memoize. + +--- + +### C6. GrepTool Has No Timeout ✅ Fixed + +**File:** `src/Tool/Coding/GrepTool.php:64–67` + +Unlike `BashTool`, `GrepTool` has zero timeout protection. A hung grep (network mount, FIFO, massive tree) blocks the agent loop forever. + +**Trigger:** Searching slow filesystems or massive directories. +**Fix:** Add timeout watchdog identical to `BashTool`'s pattern. + +--- + +### C7. BashTool Timer NOT Cancelled on Exception ✅ Fixed + +**File:** `src/Tool/Coding/BashTool.php:71–99` + +`EventLoop::cancel($timerId)` at line 84 is only reached on the happy path. If `$process->join()` or `->await()` throws, execution jumps to the catch block and the timer leaks. The timer closure captures `$process`, keeping the Process object alive. + +**Trigger:** Exception during process execution. +**Fix:** Move `EventLoop::cancel($timerId)` to a `finally` block. + +--- + +### C8. Unbounded `buffer()` — Full Command Output in Memory + +**Files:** `src/Tool/Coding/BashTool.php:79–80`, `src/Tool/Coding/GrepTool.php:65–66` + +`Amp\ByteStream\buffer()` reads entire stdout/stderr into a single string with no size cap. `OutputTruncator` runs after the full string is already in memory. A command producing GBs of output OOMs before truncation kicks in. + +**BashTool additional issue:** The progress callback (`ToolExecutor.php:140–142`) passes the entire accumulated buffer to the UI on every chunk, not just the new chunk. + +**Trigger:** Bash commands with large output (logs, data files, recursive listings). +**Fix:** Stream output to a temp file with a configurable size cap, or use a chunked buffer that stops after N bytes and kills the process. + +--- + +## High Findings + +### H1. SubagentOrchestrator Has No `__destruct()` — Background Agents Orphaned ✅ Fixed + +**File:** `src/Agent/SubagentOrchestrator.php:20–517` + +Has `cancelAll()` but no destructor. If the orchestrator goes out of scope while background agents run, their cancellations are never triggered and futures execute orphaned. + +**Fix:** Add `public function __destruct() { $this->cancelAll(); }`. + +--- + +### H2. AgentContext Circular Reference via Orchestrator + +**File:** `src/Agent/AgentContext.php:19` + +Every `AgentContext` holds a strong reference to the singleton `SubagentOrchestrator`. The orchestrator's `spawnAgent()` async closure captures `$childContext`, which holds the orchestrator. Each subagent level replicates this cycle. + +**Fix:** Use `WeakReference` for the orchestrator in `AgentContext`, or extract only scalars into the async closure. + +--- + +### H3. SubagentOrchestrator `$pendingResults` — Orphaned Results Accumulate + +**File:** `src/Agent/SubagentOrchestrator.php:31–32` + +Background agent results (both success and failure) are stored in `$this->pendingResults[$parentId][$id]`. If a parent crashes or never calls `collectPendingResults()`, these accumulate forever. + +**Fix:** Add TTL or size cap to `pendingResults`. Prune orphaned entries in `pruneCompleted()`. + +--- + +### H4. Group Semaphores Never Evicted + +**File:** `src/Agent/SubagentOrchestrator.php:381–384` + +```php +return $this->groups[$name] ??= new LocalSemaphore(1); +``` + +Every unique group name creates a `LocalSemaphore` that is never removed. + +**Fix:** Clear `$this->groups` in `pruneCompleted()` when no active agents reference a group. + +--- + +### H5. Widget Tree Grows Unboundedly in TUI + +**File:** `src/UI/Tui/TuiRenderer.php` (multiple methods) + +`$this->conversation` ContainerWidget accumulates every widget ever added. Each `showToolCall()`, `showToolResult()`, `showSubagentSpawn()` adds permanent widgets. Only cleared on explicit `/new` or `/clear`. + +**Trigger:** Long sessions with hundreds of tool calls. +**Fix:** Implement a scrolling window — remove widgets beyond N turns, or collapse old tool results into summary widgets. + +--- + +### H6. Prism Upstream: `StreamState::reset()` Doesn't Clear `thinkingSummaries` + +**File:** `vendor/prism-php/prism/src/Streaming/StreamState.php:152,383–403` + +`reset()` is called between tool-call turns in multi-step streaming, but `thinkingSummaries` array is never cleared. Grows across turns for models with extended thinking. + +**Fix:** Upstream bug report. Patch: add `$this->thinkingSummaries = [];` to `reset()`. + +--- + +### H7. Prism Upstream: New Provider Instance per Request + +**File:** `vendor/prism-php/prism/src/PrismManager.php:40–56` + +Every `PrismService::chat()` call creates a fresh provider + `PendingRequest` HTTP client. No caching. Causes GC pressure in tight loops. + +**Fix:** Cache resolved providers in `PrismManager`, or in `PrismService`. + +--- + +### H8. ConversationHistory Tool Result `args` Not Freed After Pruning ✅ Fixed + +**File:** `src/Agent/ConversationHistory.php:15` + +`pruneToolResults()` replaces `result` with a placeholder but leaves `args` intact. Large args (file contents for edits) persist for the entire session. + +**Fix:** Null out `args` on pruned/superseded tool results. + +--- + +### H9. SQLite PDO Connection Never Explicitly Closed + +**File:** `src/Session/Database.php:9,25` + +No `close()` method, no `__destruct()`. WAL journal mode enabled but never checkpointed. WAL file can grow without bound. + +**Fix:** Add `close()` method and call from a shutdown handler. Add periodic `PRAGMA wal_checkpoint(TRUNCATE)`. + +--- + +### H10. PrismService Uses No Connection Pooling (Guzzle Path) + +**File:** `src/LLM/PrismService.php:113` + +PrismService uses Laravel's `Http` facade (Guzzle under the hood), not Amp. Each request creates and tears down a fresh TCP+TLS connection. No connection reuse. + +**Fix:** Enable Guzzle connection pooling or share a Guzzle client instance. + +--- + +### H11. Full Message History Loaded on Session Resume + +**File:** `src/Session/SessionManager.php:96–106`, `src/Session/MessageRepository.php:53–68` + +On resume, `loadActive()` deserializes ALL non-compacted messages into memory via `fetchAll()`. For a long session with thousands of messages containing tool results with full file contents, this causes a significant memory spike. + +**Fix:** Implement lazy loading or cursor-based pagination for message history. + +--- + +### H12. `onRetry` Closure Captures UIManager in Singleton + +**File:** `src/Agent/AgentSessionBuilder.php:74–78` + +`$llm->setOnRetry(function (...) use ($ui) { ... })` captures the UIManager in a closure stored on the `RetryableLlmClient` singleton. Circular retention: container → LLM singleton → closure → UIManager. + +**Fix:** Use `WeakReference` for `$ui` inside the closure. + +--- + +## Medium Findings + +| # | Finding | File | Fix | +|---|---------|------|-----| +| M1 | `SubagentTool` closure captures entire `SubagentFactory` + ancestor registries | `SubagentFactory.php:56–58` | Extract only config, not `$this` | +| M2 | `streamBuffer` not cleared on interrupted streaming | `AnsiRenderer.php:22,127` | Clear in error handler | +| M3 | `lastToolArgs` holds large strings between tool calls | `TuiRenderer.php:114` | Clear after consuming | +| M4 | GlobTool collects ALL results before truncating to 200 | `GlobTool.php:78–113` | Short-circuit at 200 | +| M5 | `register_shutdown_function` accumulates on repeated animation calls | `AnsiTheogony.php:84` et al | Register once with static flag | +| M6 | Memories accumulate indefinitely (no TTL/count limit) | `Session/MemoryRepository.php` | Add configurable limit + auto-prune | +| M7 | `forProject()` loads all memories without LIMIT | `Session/MemoryRepository.php:40–55` | Add LIMIT clause | +| M8 | Compacted messages flagged but never deleted from DB | `Session/MessageRepository.php:89–95` | Periodic `DELETE WHERE compacted = 1` | +| M9 | WAL file never checkpointed | `Session/Database.php:30` | Periodic `PRAGMA wal_checkpoint(TRUNCATE)` | +| M10 | Compaction stores raw summary as redundant memory | `Agent/ContextManager.php:142–156` | Skip raw summary, store only extracted memories | +| M11 | `OutputTruncator` files accumulate, cleanup only at construction | `Agent/OutputTruncator.php:23` | Call `cleanupOldFiles()` periodically | +| M12 | `FileEditTool` temp file not cleaned on crash | `Tool/Coding/FileEditTool.php:135–178` | Add `@unlink($tmpPath)` in finally block | +| M13 | `FileReadTool` doubles memory for under-threshold files | `Tool/Coding/FileReadTool.php:59` | Use streaming for all files or lower threshold | +| M14 | SubagentDisplayManager old containers never removed from conversation | `SubagentDisplayManager.php:118` | Remove old containers or prune | + +--- + +## Low Findings + +| # | Finding | File | +|---|---------|------| +| L1 | `AgentLoop` no `dispose()` method | `AgentLoop.php:21–606` | +| L2 | Kernel singletons held for process lifetime | `Kernel.php:299–341` | +| L3 | `Facade::setFacadeApplication()` static holds container | `Kernel.php:258–259` | +| L4 | `SessionGrants` unbounded growth (bounded by ~15 tool count) | `Tool/Permission/SessionGrants.php` | +| L5 | `hasRipgrep()` process stdout/stderr not consumed | `GrepTool.php:90–92` | +| L6 | Non-timeout BashTool exceptions don't explicitly kill process | `BashTool.php:70–96` | +| L7 | `ToolExecutor` static `BashTool::$progressCallback` not cleared on exception | `ToolExecutor.php:140–150` | +| L8 | `FutureState` unhandled error thrown on GC for unconsumed errored futures | `vendor/amphp/amp` | +| L9 | `resetSessionCost()` doesn't reset history | `AgentLoop.php:489–493` | +| L10 | Event loop `disable()` keeps closure in callbacks array (must use `cancel()` to free) | `vendor/revolt/event-loop` | + +--- + +## Vendor Library Risks + +### amphp/http-client + +| Risk | Severity | Description | +|------|----------|-------------| +| Unlimited connection pool | **Medium** | `buildDefault()` uses `PHP_INT_MAX` limit. 64-idle-connection eviction only for idle connections, no time-based TTL | +| Connection leak on abandoned response | **High** | If response body not fully consumed and `Response` object kept alive, connection never returns to pool. GC-dependent cleanup via destructor *closes* the connection rather than returning it | +| Reference cycles in cancellation chain | **Medium** | `DeferredCancellation` → `Cancellable` → callbacks → connection → response body → cycle. Requires PHP cycle collector | + +### amphp/amp + +| Risk | Severity | Description | +|------|----------|-------------| +| `FutureState` unhandled error on GC | **Medium** | Errored Futures that are never consumed (`await()`, `catch()`, or `ignore()`) throw `UnhandledFutureError` from destructor into event loop | +| `DeferredCancellation` destructor auto-cancels | **Low** | Safety feature, but creates unnecessary event loop noise when background agents complete successfully | + +### amphp/process + +| Risk | Severity | Description | +|------|----------|-------------| +| Pipe buffers on kill | **Low** | OS pipe buffer (~64KB) can block child if full when killed | +| Static WeakMaps | **Low** | Self-cleaning, but stdout/stderr references elsewhere keep `ProcHolder` alive | + +### prism-php/prism + +| Risk | Severity | Description | +|------|----------|-------------| +| O(N²) message storage in Text handler | **High** | Each `Step` stores full message history. Multi-turn tool calls in Prism's internal loop accumulate quadratically. Not impactful for KosmoKrator since tool calls are driven externally | +| `StreamState::$thinkingSummaries` never cleared by `reset()` | **Low** | Upstream bug. Only freed when handler is discarded | +| PrismManager creates new providers each call | **Low** | No caching but objects are lightweight | + +### revolt/event-loop + +| Risk | Severity | Description | +|------|----------|-------------| +| Callbacks not freed on `disable()` | **Low** | Must use `cancel()` to free closures from `$callbacks` array | +| Callbacks remain in memory after `stop()` | **Low** | Loop stop doesn't clear `$callbacks`. Timers fire again if loop restarts | + +--- + +## Async/Event-Loop Pattern Audit + +### EventLoop::repeat() — 5 Call Sites + +| Location | Timer | Cancelled When | Leak Risk | +|----------|-------|----------------|-----------| +| `TuiAnimationManager.php:214` | Compacting (30fps) | `clearCompacting()` | No finally/destructor guard | +| `TuiAnimationManager.php:386` | Breathing (30fps) | `enterTools()`, `enterIdle()` | Relies on phase transition | +| `SubagentDisplayManager.php:203` | Elapsed (20fps) | `stopLoader()` → `cleanup()` | Relies on `enterIdle()` chain | +| `TuiRenderer.php:749` | Tool executing (50fps) | `clearToolExecuting()` | No finally guard | +| `TuiModalManager.php:463` | Dashboard (0.5fps) | After `$suspension->suspend()` returns | Safe | + +### EventLoop::delay() — 1 Call Site + +| Location | Purpose | Cancelled When | Leak Risk | +|----------|---------|----------------|-----------| +| `BashTool.php:71` | Process timeout | Happy path only (line 84) | **NOT cancelled on exception** | + +### Amp\async() — 4 Call Sites + +| Location | Captures | Leak Risk | +|----------|----------|-----------| +| `BashTool.php:83` | `$process`, `$progressCb` | Safe — always awaited | +| `BashTool.php:95` | `$process->getStderr()` | Safe | +| `GrepTool.php:65` | `$process->getStdout()` | Safe | +| `GrepTool.php:66` | `$process->getStderr()` | Safe | + +### Process::start() — 3 Call Sites + +| Location | Timeout | Cleanup | Leak Risk | +|----------|---------|---------|-----------| +| `BashTool.php:70` | Yes (configurable) | `join()` + `kill()` on timeout | Process not killed on non-timeout exception | +| `GrepTool.php:64` | **No timeout** | `join()` | **No timeout, no try/catch** | +| `GrepTool.php:90` | No | `join()` | Stdout/stderr not consumed (minor) | + +### DeferredCancellation — 3 Usage Sites + +| Location | Cleanup | Leak Risk | +|----------|---------|-----------| +| `SubagentOrchestrator.php:82` | `finally` block + `cancelAll()` | Safe | +| `TuiRenderer.php:515` | Nulled on Idle phase | Safe | +| `TuiAnimationManager.php:304` | Passed through, not owned | Safe | + +### LocalSemaphore — 2 Usage Sites + +| Location | Release | Leak Risk | +|----------|---------|-----------| +| `SubagentOrchestrator.php:48` (global) | `finally` block | Safe | +| `SubagentOrchestrator.php:383` (groups) | `finally` block | Map never shrinks | + +### Suspension::suspend() — 8 Call Sites + +All modal methods follow create → suspend → resume → cleanup pattern. All exit paths resume the suspension. **Safe.** + +### EventLoop::onSignal() — 0 Call Sites + +No custom signal handlers. Safe. + +--- + +## Positive Findings (Clean) + +| Area | Assessment | +|------|-----------| +| **No static mutable state in `src/Agent/`** | Grep for `static (private|protected|public) \$` returned zero matches | +| **Event classes** | All 5 event classes are `readonly` value objects. No leak risk | +| **ANSI rendering** | No static mutable state. All buffers reset per operation. Particle arrays are method-local and bounded | +| **Theogony** | 1997 lines but ~1.2KB static data, loaded only on demand. Method-local particle arrays | +| **MarkdownToAnsi** | Properly resets all buffers per `render()`. Highlighter is stateless | +| **DiffRenderer** | All state is method-local. Lazy Highlighter is a single reusable instance | +| **Theme** | Pure static utility, no mutable state | +| **AgentDisplayFormatter/AgentTreeBuilder** | Pure static methods, no instance state | +| **Repositories** | Stateless wrappers around PDO queries, no caching | +| **Kernel container** | No circular dependencies in singleton registrations | +| **Semaphore release** | Always in `finally` blocks with `Lock::__destruct()` as safety net | +| **ToolResultDeduplicator** | Method-local index arrays, GC'd after return | +| **StuckDetector** | Window bounded by `array_slice` to `$windowSize` (default 8) | +| **Modal lifecycle** | All modals properly remove widgets after dismissal | +| **FileWriteTool** | Stateless, no handles | +| **Permission system** | All immutable value objects, `SessionGrants` bounded by tool count | + +--- + +## Recommended Fix Plan + +### Priority 1 — High Impact, Easy Fixes + +| Fix | Effort | Impact | +|-----|--------|--------| +| Add `'failed'` to `pruneCompleted()` terminal states | Trivial (1 line) | Eliminates C3 — failed agent accumulation | +| Add `__destruct()` to `SubagentOrchestrator` calling `cancelAll()` | Trivial | Eliminates H1 — orphaned background agents | +| Move BashTool `EventLoop::cancel($timerId)` to `finally` block | Small | Eliminates C7 — timer leak on exception | +| Cache `hasRipgrep()` result in GrepTool | Small | Eliminates C5 — 2× extra processes per grep | +| Add timeout watchdog to GrepTool | Small | Eliminates C6 — hung process blocking loop | +| Harden `TuiRenderer::teardown()` to cancel all timers | Small | Eliminates C4 — timer leaks on exit | + +### Priority 2 — High Impact, Medium Effort + +| Fix | Effort | Impact | +|-----|--------|--------| +| Share single `HttpClient` across subagents, bound pool to 8 | Medium | Eliminates C1 — N connection pools | +| Add `TuiAnimationManager::shutdown()` method | Small | Timer cleanup infrastructure | +| Null out `args` on pruned tool results | Small | Reduces H8 — retained file contents | +| Add periodic WAL checkpoint to Database | Small | Reduces H9 — WAL file growth | + +### Priority 3 — Architectural Improvements + +| Fix | Effort | Impact | +|-----|--------|--------| +| Implement conversation widget pruning in TUI | Medium | Reduces H5 — widget accumulation | +| Add compaction support in headless mode | Medium | Reduces C2 — unbounded subagent history | +| Use `WeakReference` for orchestrator in `AgentContext` | Medium | Breaks Cycle 1 partially | +| Lazy-load message history on resume | Large | Reduces H11 — full history load spike | +| Add memory count limit with auto-pruning | Medium | Reduces M6/M7 — unbounded memories | +| Delete compacted messages from DB periodically | Small | Reduces M8 — DB bloat | diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/RAM-EFFICIENCY-AUDIT.md b/docs/ecosystem/kosmokrator/audits/ram-audit/RAM-EFFICIENCY-AUDIT.md new file mode 100644 index 0000000..ded13c1 --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/RAM-EFFICIENCY-AUDIT.md @@ -0,0 +1,1773 @@ +# KosmoKrator RAM Efficiency Audit — Comprehensive Report + +**Project:** KosmoKrator — AI coding agent for the terminal +**Audit Date:** 2026-04-03 +**Status:** Phase 1 & 2 Synthesis Complete +**PHP Version:** 8.4 +**Architecture:** CLI (Symfony Console + Illuminate Container + Amp Event Loop) + +--- + +## 1. Executive Summary + +### Overall Assessment + +KosmoKrator demonstrates **generally sound memory management** with bounded history compaction, no classic leaks, and strong use of PHP 8.4 readonly features. However, **systematic caching omissions** and **unbounded accumulation vectors** create significant RAM efficiency risks in long-running or memory-intensive sessions. + +**Risk Rating:** 🔴 **HIGH** — Two critical unbounded-growth vectors and multiple high-impact caching gaps can cause progressive memory bloat. + +### Critical Issues (Address Immediately) + +| # | Issue | Location | Est. Impact | Effort | +|---|-------|----------|-------------|--------| +| C1 | Permission regex recompilation on every call | `PermissionRule::matchesGlob()` | 20–50 KB/request + CPU | 5 min | +| C2 | Tool schema regeneration per subagent | `ToolRegistry::toPrismTools()` | 1.8–7.5 MB with 30 subagents | 10 min | +| C3 | Instruction files re-read every session | `InstructionLoader::gather()` | 2–50 KB/session + I/O | 5 min | +| C4 | Subagent orchestrator unbounded retention | `SubagentOrchestrator::$agents`, `$stats`, `$pendingResults` | Unbounded (MB–GB) | 1 hr | +| C5 | MemoryRepository loads all rows every LLM round | `MemoryRepository::forProject()` | 100–500 MB for 10k memories | 2 hrs | +| C6 | TaskStore unbounded accumulation | `TaskStore::$tasks` | Unbounded (MB) | 1 hr | +| C7 | HTTP connection pool per AsyncLlmClient | `AsyncLlmClient` → `UnlimitedConnectionPool` | ~50–200 KB per subagent × N | 30 min | +| C8 | TUI animation timers not cancelled on teardown | `TuiAnimationManager` | Pins entire widget tree | 15 min | + +### Memory Hotspots (Highest Impact) + +| Component | File:Line | Growth Pattern | Estimated Footprint | +|-----------|-----------|----------------|---------------------| +| `ConversationHistory::$messages` | `Agent/ConversationHistory.php:19` | Monotonic (bounded by compaction) | 100–500 bytes/message | +| `SubagentOrchestrator::$agents` | `Agent/SubagentOrchestrator.php:31` | Unbounded (no auto-prune) | ~1 KB/active agent | +| `MemoryRepository::forProject()` result | `Session/MemoryRepository.php:65-88` | Full table load per call | 100–500 MB for 10k rows | +| `TaskStore::$tasks` | `Task/TaskStore.php:17` | Unbounded (no eviction) | ~200–300 bytes/task | +| `FileReadTool::$readCache` | `Tool/Coding/FileReadTool.php:21` | Unbounded (no eviction) | 10 KB–10 MB depending on files read | +| `ToolRegistry` tool instances | `Provider/ToolServiceProvider.php` | Static (20+ tools) | ~3–6 MB at boot | +| Kernel boot services | `Kernel.php` + providers | One-time spike | ~20–40 MB peak | + +### Priority Roadmap + +**Immediate Actions (<1 day, high impact):** +1. Add static regex cache to `PermissionRule::matchesGlob()` — saves 20–50 KB/request +2. Cache tool schemas in `ToolRegistry` — saves 1.8–7.5 MB with concurrency +3. Cache instruction files in `InstructionLoader` — saves 2–50 KB/session + I/O +4. Cache git root/branch — eliminates 200 shell calls/100 turns +5. Add `pruneCompleted()` auto-call in `SubagentOrchestrator` — stops unbounded growth +6. Bulk token fetch + in-memory cache in `SettingsCodexTokenStore` — saves 6 KB/request + DB load + +**Short-Term (1–2 weeks):** +7. Implement memory selection caching per turn — avoids 3–4× rescoring +8. Add LIMIT to `MemoryRepository::forProject()` — caps RAM spike +9. Truncate task tree rendering (max 50 tasks / 10 KB) — bounds prompt growth +10. Stream BashTool/GrepTool output with early truncation — prevents 100 MB spikes +11. Add LRU eviction to FileReadTool cache — bounds long-run growth +12. Share single HttpClient with bounded pool across subagents — saves 50–200 KB × N + +**Long-Term (1–3 months):** +13. Push memory scoring into SQL (ORDER BY score LIMIT 6) — eliminates O(N) in PHP +14. Implement task eviction policy (max 100 tasks, LRU) — bounds task memory +15. Add database indexes on `memories` (composite) — speeds queries, reduces rows scanned +16. Centralize edge storage in TaskStore — 50% edge memory reduction +17. Container compilation / opcache warmup — reduces boot memory 30–50% +18. Worker pooling for audio notifications — avoids 2× kernel boot per sound + +--- + +## 2. Methodology + +### Dimensions Investigated + +1. **Data Structures** — array copying patterns, object graphs, string handling, collection usage +2. **Caching Gaps** — repeated computations, missing memoization, no distributed cache +3. **PHP Internals** — PHP 8.4 features (readonly, enums), generator usage, closure captures, autoloader +4. **Async/Event Loop** — timer leaks, fiber suspension, promise accumulation, connection pooling +5. **Bootstrap & Container** — service registration, singleton lifetimes, boot memory spikes +6. **I/O & Streaming** — file handling, shell sessions, tool output buffering, database fetching +7. **Security-Adjacent** — permission evaluation, token storage, config parsing, credential exposure +8. **Architecture** — subagent orchestration, memory repository patterns, task tracking, event system +9. **UI Renderers** — TUI/ANSI renderers, animation state, diff rendering +10. **Audio/Notifications** — worker process lifecycle, IPC overhead, buffer management + +### Tools Used + +- **Static analysis:** ripgrep (`rg`), glob pattern searches, manual code review +- **Memory profiling:** `memory_get_usage()`, `memory_get_peak_usage()` (where available in code) +- **Benchmarking:** Custom PHP scripts in `docs/ram-audit/benchmarks/` (to be created) +- **Existing audits:** `docs/memory-leak-audit.md`, `docs/deep-audit-*.md` referenced +- **Synthesis agents:** 10 parallel sub-agents covering specialized domains + +### Benchmark Approach + +**No benchmark files were created** during this audit (agents were in read-only mode). The following benchmark suite is **recommended** for implementation: + +| Benchmark | Scenario | Metrics | +|-----------|----------|---------| +| `db-connection-memory.php` | Connection open/close cycles, singleton reuse | Per-connection memory delta, GC retention | +| `agent-loop-memory.php` | 100/500/1000 turns with 3 tools/turn | Memory growth curve, compaction triggers | +| `subagent-memory.php` | Spawn 10/30/100 concurrent subagents | Per-agent overhead, total peak | +| `tool-memory.php` | Concurrent tool execution, large file I/O | Tool-specific spikes, cache growth | +| `async-memory.php` | 100/500/1000 concurrent promises | Per-promise overhead, Fiber stack | +| `caching-memory.php` | Repeated token estimation, model resolution | Cache hit/miss impact | +| `datastructure-memory.php` | Array merge patterns, JSON encoding | Temporary allocation peaks | +| `ui-memory.php` | TUI/ANSI render cycles, animation frames | Render buffer growth, timer retention | +| `audio-memory.php` | 10/50/100 rapid completion sounds | Worker process memory, IPC overhead | +| `session-memory.php` | 1k/5k/10k session creations, message inserts | DB fetch strategies, connection reuse | + +**Measurement protocol:** +- Use `memory_get_peak_usage(true)` (real peak) before/after each operation +- Run each scenario 5×, report median and max +- Test with `gc_collect_cycles()` forced between iterations +- Profile with `xhprof` or `tideways` if available (not used here) + +--- + +## 3. Detailed Findings by Area + +### 3.1 Security-Adjacent RAM Efficiency (synthesis-security.md) + +#### Finding SEC-1: Regex Compilation in Hot Path — PermissionRule::matchesGlob() + +**Severity:** 🔴 Critical +**Files:** `src/Tool/Permission/PermissionRule.php:51-60`, `src/Tool/Permission/Check/DenyPatternCheck.php:39`, `src/Tool/Permission/Check/BlockedPathCheck.php:66`, `src/Tool/Permission/GuardianEvaluator.php:106` + +**Issue:** Every call to `matchesGlob()` compiles a fresh regex via `preg_quote()` + `str_replace()` + `preg_match()`. This method is invoked: +- For each deny pattern in each matching rule (DenyPatternCheck) +- For each blocked path pattern (BlockedPathCheck, up to 4× per path) +- For each safe command pattern (GuardianEvaluator, O(p) per call) + +With ~50 tools, ~10 rules, ~5 deny patterns per rule, a single permission check can trigger **250+ regex compilations**. PHP's internal regex cache is limited and not guaranteed to hit. + +**RAM Impact:** Each compiled regex pattern string occupies ~200–500 bytes in memory. At 250 compilations per check × 10 concurrent requests = **~500 KB – 1.25 MB** of transient regex strings per request cycle, plus GC pressure. + +**Security Risk:** An attacker controlling tool arguments can force evaluation of many deny patterns, causing CPU/memory exhaustion. No rate limiting exists on permission checks. + +**Recommendation:** Add static regex cache to `PermissionRule`: + +```php +private static array $regexCache = []; +$key = $pattern; +if (!isset(self::$regexCache[$key])) { + self::$regexCache[$key] = '/^'.str_replace(['\*', '\?'], ['.*', '.'], preg_quote($pattern, '/')).'$/i'; +} +$regex = self::$regexCache[$key]; +``` + +**Effort:** 5 minutes. ~5–10 lines change. + +--- + +#### Finding SEC-2: N+1 Token Storage Queries — SettingsCodexTokenStore + +**Severity:** 🔴 Critical +**Files:** `src/LLM/Codex/SettingsCodexTokenStore.php:32-38`, `src/LLM/Codex/SettingsCodexTokenStore.php:63-85` + +**Issue:** Token storage uses 7 individual settings keys (`provider.codex.*`). Every `current()` performs 7 separate SELECT queries; every `save()` performs 7 separate INSERT/UPDATE queries. No in-memory caching; every call hits SQLite. + +**RAM Impact:** Each query returns a row (~200–300 bytes). 7 queries × result set overhead × concurrent requests = **~1–2 KB per request** in short-lived DB result objects. More critically, **connection pool exhaustion** under load can cause queued requests to accumulate memory. + +**Security Risk:** Token refresh storms (multiple simultaneous requests triggering refresh) cause 7 writes + HTTP call per refresh, amplifying memory/CPU usage. No refresh debouncing. + +**Recommendation:** Replace 7 individual SELECTs with single bulk query: + +```sql +SELECT key, value FROM settings WHERE scope='global' AND key LIKE 'provider.codex.%' +``` + +Build token array from single result set. Add in-memory cache with 5-second TTL. + +**Effort:** 15 minutes. ~20 lines change. + +--- + +#### Finding SEC-3: Full Config Reload on Every Write — SettingsManager::reloadRepository() + +**Severity:** 🔴 Critical +**Files:** `src/Settings/SettingsManager.php:266-274` + +**Issue:** After any settings `set()` or `delete()`, `reloadRepository()` creates a **new ConfigLoader** and re-parses all 4 bundled YAML files + user + project config, then copies data into the Repository. This happens on every single settings write. + +**RAM Impact:** Total YAML size ~28 KB, but parsing creates intermediate arrays and objects. A full reload generates **~100–150 KB** of temporary arrays/objects per write, which are then GC'd. Under rapid successive writes (e.g., batch updates), this creates significant memory churn and can push PHP memory_limit. + +**Security Risk:** An attacker with settings write access (or a buggy tool) can trigger repeated config reloads to exhaust memory. The pattern is predictable and not rate-limited. + +**Recommendation:** In `reloadRepository()`, update `$this->config` incrementally using the `$data` already loaded in `configTarget()`. Avoid full `ConfigLoader::load()`. + +**Effort:** 20 minutes. ~20 lines change. + +--- + +#### Finding SEC-4: No Path Resolution Cache — PathResolver::resolve() + +**Severity:** 🟠 High +**Files:** `src/Tool/Permission/PathResolver.php:21-39` + +**Issue:** `realpath()` syscall executed on every path check with no caching. `BlockedPathCheck` calls this for every file operation, and `GuardianEvaluator::isInsideProject()` calls it for every command. + +**RAM Impact:** Each `realpath()` result is a string (~256–1024 bytes). With 100 file checks per request, that's **25–100 KB** of repeated string allocations. Strings are duplicated in memory if same path resolved multiple times. + +**Security Risk:** Path traversal attacks cause repeated resolution of deep/nested paths, amplifying memory usage. No TTL or eviction on cache (because none exists). + +**Recommendation:** Add static cache to `PathResolver`: + +```php +private static array $cache = []; +$key = $path; +if (!isset(self::$cache[$key])) { + self::$cache[$key] = realpath($path); +} +return self::$cache[$key]; +``` + +**Effort:** 10 minutes. + +--- + +#### Finding SEC-5: Duplicate Rule Evaluation — DenyPatternCheck + RuleCheck + ModeOverrideCheck + +**Severity:** 🟠 High +**Files:** `src/Tool/Permission/Check/DenyPatternCheck.php:26-49`, `src/Tool/Permission/Check/RuleCheck.php:25-48`, `src/Tool/Permission/Check/ModeOverrideCheck.php:30-70` + +**Issue:** Rules are evaluated up to **3 times** in a single permission flow: +1. `DenyPatternCheck` iterates all rules, calls `matchesGlob()` for each deny pattern +2. `RuleCheck` iterates all rules again, calls `evaluate()` (which calls `matchesGlob()` again) +3. `ModeOverrideCheck` iterates all rules a third time if mode is Guardian + +**RAM Impact:** Each evaluation creates temporary arrays and regex strings. Triple evaluation multiplies memory churn by 3×. For 50 rules × 5 patterns = 750 regex compilations instead of 250. + +**Security Risk:** Complex permission rules (many deny patterns) are amplified 3×, making them a more effective DoS vector. + +**Recommendation:** Refactor check chain so `RuleCheck` returns both Deny and Ask states in one pass, and `ModeOverrideCheck` reuses that result instead of re-evaluating. + +**Effort:** 1–2 hours. + +--- + +### 3.2 Core Agent Memory Efficiency (synthesis-core-agent.md) + +#### Finding AGENT-1: Instruction Files Re-Read Every Session (No Cache) + +**Severity:** 🔴 Critical +**Files:** `src/Agent/InstructionLoader.php:26-85` + +**What:** `InstructionLoader::gather()` reads up to 5 files from disk on every session start: +- `~/.kosmokrator/instructions.md` +- `{git_root}/KOSMOKRATOR.md` +- `{git_root}/.kosmokrator/instructions.md` +- `{git_root}/AGENTS.md` +- `{cwd}/KOSMOKRATOR.md` + +**Impact:** +- **Memory:** Each file loaded as a string kept for session lifetime. Large `AGENTS.md` (common in monorepos) can be 10–100 KB. +- **I/O:** 3–5 `file_get_contents()` calls per session; `gitRoot()` uses `shell_exec()` (line 102). +- **Frequency:** Once per session, but sessions are frequent in REPL usage. + +**Why critical:** This is **pure waste** — instruction files change rarely (user edits or git commits). No technical reason exists to re-read them. Static property cache would eliminate all I/O and string allocation. + +**Recommendation:** Add `static ?string $cached = null` to `gather()`. On first call, read files and store. Subsequent calls return cached string. + +**Effort:** 5 minutes. + +--- + +#### Finding AGENT-2: Tool Schema Regenerated on Every Subagent Spawn + +**Severity:** 🔴 Critical +**Files:** `src/Tool/ToolRegistry.php:67-103`, `src/Agent/SubagentFactory.php:105` + +**What:** `ToolRegistry::toPrismTools()` converts each tool to a `PrismTool` object with full parameter schema on every call. Called: +- Once at main `AgentLoop` setup (`AgentSessionBuilder:133`) +- **Once per subagent** (`SubagentFactory:105`) — subagents spawn frequently + +**Impact:** +- **Memory:** ~30–50 tools × ~10 parameters each = 300–500 parameter objects per call. Each `PrismTool` + parameter objects ≈ 200–500 bytes → **60–250 KB per subagent** wasted. +- **CPU:** Object allocation + method calls repeated unnecessarily. +- **Frequency:** Every subagent creation (default concurrency 10, depth 3 → potentially 30+ subagents per session). + +**Why critical:** Tool schemas are **static metadata** — they never change at runtime. Rebuilding them is pure allocation bloat. Subagent memory isolation is good, but this duplicates static data across all subagents. + +**Recommendation:** Add private `?array $cachedPrismTools = null` to `ToolRegistry`. In `toPrismTools()`, check cache; if null, build and store. Invalidate only when `register()`/`unregister()` called (rare). + +**Effort:** 10 minutes. + +--- + +#### Finding AGENT-3: Repeated Git Shell Calls Every Turn + +**Severity:** 🟠 High +**Files:** `src/Agent/ProtectedContextBuilder.php:24-50`, `src/Agent/InstructionLoader.php:102` + +**What:** `ProtectedContextBuilder::build()` calls: +- `InstructionLoader::gitRoot()` — `shell_exec('git rev-parse --show-toplevel')` +- `InstructionLoader::gitBranch()` — `shell_exec('git branch --show-current')` + +Every time protected context is built, which is **every turn** (via `ContextManager::buildSystemPrompt()`). + +**Impact:** +- **Memory:** Each `shell_exec()` returns a string (~20–100 bytes). Strings are short-lived but allocated every turn. +- **I/O:** Two subprocess calls per turn. At 100 turns → 200 shell executions. Significant overhead. +- **Latency:** Each call takes ~1–5 ms; cumulative delay noticeable. + +**Why high:** Git state changes infrequently. Caching with `static ?string` (per-request) or session-scoped property would eliminate all repeated calls. No invalidation needed except on explicit git events. + +**Recommendation:** Add `static ?string $cachedRoot` and `static ?string $cachedBranch` to respective methods. Cache result for lifetime of request. + +**Effort:** 5 minutes per method. + +--- + +#### Finding AGENT-4: Task Tree Rendering Unbounded + +**Severity:** 🟠 High +**Files:** `src/Agent/TaskStore.php` (referenced in `ContextManager:270`) + +**What:** `ContextManager::buildSystemPrompt()` appends `$this->taskStore->renderTree()` to system prompt every turn. No truncation limit observed. + +**Impact:** +- **Memory:** Task tree grows linearly with number of tasks created. Each task adds ~50–200 chars to rendered string. +- **Prompt bloat:** Unbounded task list consumes context window, forcing earlier compaction. +- **Frequency:** Every turn. + +**Why high:** Long-running sessions with many decomposed tasks could see task tree reach **tens of KB**. This directly competes with conversation history for context space. Should have hard limit (e.g., last 50 tasks, or 10 KB max). + +**Recommendation:** Add configurable limit: `max_tasks: 50` or `max_chars: 10240`. Truncate oldest tasks first. Return `"... truncated N tasks"` note. + +**Effort:** 15–30 minutes. + +--- + +### 3.3 I/O Performance & Memory (synthesis-io-performance.md) + +#### Finding IO-1: FileReadTool Unbounded Cache + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Coding/FileReadTool.php:21,70-72,103-104` + +**Issue:** `$readCache` array grows unbounded across process lifetime; no eviction policy. Cache stores boolean flags per `(path, mtime, offset, limit)` key. + +**Impact:** Hundreds of MB in long-running sessions with many file reads (e.g., codebase exploration). Current state: cache stores only booleans, minimizing per-entry footprint; FileReadTool is a singleton in ToolRegistry. + +**Recommendation:** Add LRU eviction with configurable max entries (e.g., 1000) or TTL (e.g., 1 hour). Consider per-AgentContext cache instead of singleton. + +**Effort:** 30 minutes. + +--- + +#### Finding IO-2: BashTool Full Output Buffering + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Coding/BashTool.php:96-108` + +**Issue:** Stdout and stderr fully buffered in memory via `buffer()` before OutputTruncator runs. Commands producing >100 MB output will spike RAM; no streaming to disk or early truncation. + +**Current mitigation:** OutputTruncator caps at 2000 lines / 50 KB but runs **after** tool returns (ToolExecutor line 300-302). + +**Recommendation:** Stream stdout/stderr directly to `OutputTruncator` during read loop, applying line/byte limits incrementally. Or add `stream_to_file` parameter for outputs >1 MB. Enforce per-command output limit with early process kill. + +**Effort:** 1–2 hours. + +--- + +#### Finding IO-3: Subagent PendingResults Orphaned + +**Severity:** 🟡 Medium +**Files:** `src/Agent/SubagentOrchestrator.php:34,420` + +**Issue:** `$pendingResults[parentId]` never cleared if parent agent crashes or exits without calling `collectPendingResults()`. Results (strings, potentially KB–MB each) accumulate per background subagent over time. + +**Current state:** Documented in `docs/memory-leak-audit.md` as known issue; `pruneCompleted()` does not touch `$pendingResults`. + +**Recommendation:** Add TTL (e.g., 1 hour) to `$pendingResults` entries with timestamp. Or prune `$pendingResults[parentId]` when all agents for that parent reach terminal state. + +**Effort:** 20 minutes. + +--- + +#### Finding IO-4: GlobTool Intermediate Array Buildup + +**Severity:** 🟢 Low +**Files:** `src/Tool/Coding/GlobTool.php:93-99` + +**Issue:** `array_merge()` inside recursion loops creates O(n²) intermediate arrays for deep directory trees. + +**Impact:** Temporary memory spikes during glob operations on nested structures; 10k files in nested tree → ~10 MB temporary. + +**Current mitigation:** Result set capped at 200 files after full sort/deduplication (lines 59-62). + +**Recommendation:** Apply 200-file cap earlier in recursion to avoid building full array. Replace `array_merge()` with generator-based yielding to eliminate intermediate arrays. + +**Effort:** 1 hour. + +--- + +### 3.4 Architecture & Service Container (synthesis-architecture.md) + +#### Finding ARCH-1: Subagent Orchestrator Unbounded Retention + +**Severity:** 🔴 Critical +**Files:** `src/Agent/SubagentOrchestrator.php:31-34, 392-409, 420-428, 471` + +**Issue:** The orchestrator stores: +- `$agents`: Future objects keyed by agent ID — never pruned automatically +- `$stats`: SubagentStats objects — never pruned automatically +- `$pendingResults`: Background results keyed by parent ID — cleared only via explicit `collectPendingResults()` +- `$groups`: Semaphore objects per unique group name — never removed + +**Impact:** Each completed agent retains ~500–1000 bytes of closure/future overhead + captured context. With hundreds of agents over a long session, this grows to **tens of MB**. Background results can be KB–MB each and linger indefinitely if parent never collects. + +**Why critical:** This is a **classic memory leak pattern** — global mutable registry with no TTL, no weak references, no size limits. + +**Recommendations:** +1. Call `pruneCompleted()` automatically after each agent finishes or via periodic timer (e.g., every 10 completions). +2. Track reference count per group; when last agent in a group completes, `unset($this->groups[$group])`. +3. When a parent agent finishes, automatically call `collectPendingResults($parentId)` to free result strings. + +**Effort:** 1 hour total. + +--- + +#### Finding ARCH-2: MemoryRepository Unbounded Fetch + +**Severity:** 🔴 Critical +**Files:** `src/Session/MemoryRepository.php:65-88`, `src/Session/SessionManager.php:276-281` + +**Issue:** `MemoryRepository::forProject()` executes `SELECT * FROM memories` with no LIMIT, no filters pushed down. Fetches **all** memory rows into PHP (could be thousands). Called on every LLM round via `SessionManager::getMemories()` — 3–4 times per user turn. + +**Impact:** With 10,000 memories, each fetch loads 100–500 MB into PHP memory. Repeated 3–4× per turn = **300–2000 MB** of repeated allocation/GC churn. Even with 1000 memories, that's 10–50 MB per round. + +**Why critical:** This is an **N+1 query problem** compounded by **repeated full-table scans and in-memory sorts**. MemorySelector then scores all in-memory and discards. + +**Recommendations:** +- **Short-term:** Add `? LIMIT 1000` to `forProject()` to cap rows; log warning if truncated. +- **Long-term:** Push scoring into SQL: `SELECT *, (CASE ...) AS score FROM memories WHERE … ORDER BY score DESC LIMIT 6`. Eliminate O(N) in PHP. + +**Effort:** Short-term 15 min; long-term 2–3 hours. + +--- + +#### Finding ARCH-3: TaskStore Unbounded Accumulation + +**Severity:** 🔴 Critical +**Files:** `src/Task/TaskStore.php:17, 62-84, 174-287` + +**Issue:** Tasks stored in simple associative array with: +- No persistence +- No eviction policy (only manual `/tasks clear` or REPL-triggered `clearTerminal()`) +- No pagination or depth limits +- Bidirectional edge storage (duplicate arrays) +- Full tree re-render on every task operation and at 30fps in TUI + +**Impact:** Each task ~200–300 bytes + edge arrays. Unbounded growth; for 100+ tasks in complex workflows, memory and CPU become excessive due to O(n) full scans and O(n²) worst-case rendering. + +**Recommendations:** +1. Add configurable `max_tasks` (e.g., 100) with LRU eviction. When adding a task exceeds limit, remove oldest non-terminal tasks. +2. After removing tasks in `clearTerminal()`/`clearAll()`, walk all remaining tasks and filter `blockedBy`/`blocks` arrays to remove IDs not in `$this->tasks`. +3. Reduce TUI refresh rate from 30fps to 5–10fps; use dirty flag to only re-render if tree changed. + +**Effort:** 2–3 hours total. + +--- + +#### Finding ARCH-4: Missing Database Indexes + +**Severity:** 🟠 High +**Files:** `src/Session/Database.php:128` + +**Issue:** Only index on `memories` is `idx_memories_project` (single column on `project`). Queries filter on `(project IS NULL OR project = ?)` plus `expires_at`, `memory_class`, `pinned`. Missing composite index. + +**Impact:** Full table scans for every `forProject()` and `search()` call. With 10k memories, each scan reads all rows → more memory loaded, slower queries. + +**Recommendation:** Add composite index: + +```sql +CREATE INDEX idx_memories_lookup ON memories(project, memory_class, type, expires_at, pinned DESC, created_at DESC); +``` + +Also add single-column indexes on `memory_class` and `type` if composite not feasible. + +**Effort:** 30 minutes (migration). + +--- + +### 3.5 Caching Strategies & Gaps (caching-strategies-gaps.md) + +#### Finding CACHE-1: No Token Estimation Memoization + +**Severity:** 🟡 Medium +**Files:** `src/Agent/TokenEstimator.php:17-108` + +**Issue:** `TokenEstimator::estimate()` calls `mb_strlen()` O(n) per string for every message every turn. No memoization; same messages re-estimated repeatedly. + +**Impact:** Cheap per-call but cumulative across long conversations. With 100 messages × 3 turns = 300 estimations. Could cache by message content hash (SHA256). + +**Recommendation:** Add static in-memory cache keyed by `md5($message->content())`. Est. memory 5–50 KB (bounded by history size). + +**Effort:** 15 minutes. + +--- + +#### Finding CACHE-2: No Model Resolution Cache + +**Severity:** 🟡 Medium +**Files:** `src/LLM/ModelDefinitionSource.php:72-104` + +**Issue:** `resolve()` uses exact match O(1) but substring fallback does O(n) linear scan of all models (100–150) on every miss. No result cache. + +**Impact:** Substring scan on every unknown model reference. With 100 models, still trivial (<1ms) but unnecessary. + +**Recommendation:** Add `$resolveCache` array to `ModelDefinitionSource`. Check cache before substring scan loop. Est. memory 10–100 KB. + +**Effort:** 10 minutes. + +--- + +#### Finding CACHE-3: No Permission Decision Cache + +**Severity:** 🟠 High +**Files:** `src/Tool/Permission/PermissionEvaluator.php:26-49` + +**Issue:** No decision cache; same tool+args re-evaluated every call. Permission check runs before EVERY tool call, including glob matching and `realpath()`. + +**Impact:** Full permission chain (glob + path resolution) repeated for repeated tool calls. Could be 30–50% of permission check time saved. + +**Recommendation:** Add `$decisionCache` to `PermissionEvaluator`. Key: `md5(toolName . serialize($args))`. Invalidate on `grantSession()` or `resetGrants()`. Est. memory 10–200 KB. + +**Effort:** 20 minutes. + +--- + +#### Finding CACHE-4: Glob Pattern Pre-compilation Missing + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Permission/PermissionRule.php:51-60` + +**Issue:** `matchesGlob()` compiles glob→regex on EVERY call via `str_replace` + `preg_quote`. Patterns repeat across calls. + +**Impact:** `preg_quote` is relatively expensive; patterns re-compiled repeatedly. Est. 5–20 KB of compiled patterns could be cached. + +**Recommendation:** Compile once in `PermissionRule` constructor, store compiled regex in private property. + +**Effort:** 10 minutes. + +--- + +### 3.6 Data Structure Optimization (data-structure-optimization.md) + +#### Finding DS-1: array_merge in Loops (O(n²) Copies) + +**Severity:** 🔴 Critical +**Files:** `src/Agent/SubagentOrchestrator.php:426-428`, `src/Tool/Coding/GlobTool.php:102,115,118` + +**Issue:** +- `SubagentOrchestrator::collectPendingResults()`: `$all = array_merge($all, $bucket)` in loop copies entire `$all` each iteration. +- `GlobTool::globStar()`: recursive `array_merge` copies parent array on each merge. + +**Impact:** O(n²) total copy volume if many buckets or deep recursion. For 1000 files in nested tree, temporary memory spikes can reach **10 MB**. + +**Recommendation:** Use `[...$all, ...$bucket]` (PHP 8.4 spread operator creates single copy) or pre-allocate and assign by key. For `GlobTool`, yield results via generator instead of merging. + +**Effort:** 30 minutes. + +--- + +#### Finding DS-2: Unbounded Message/Task Accumulation + +**Severity:** 🔴 Critical +**Files:** `src/Agent/ConversationHistory.php:26`, `src/Task/TaskStore.php:17` + +**Issue:** +- `ConversationHistory::$messages` grows every turn; compaction replaces with summary + recent but old array copied via `array_slice` + spread before GC. +- `TaskStore::$tasks` holds all tasks until manual clear; no eviction. + +**Impact:** Linear growth with session length. Peak memory during compaction = old + new array (temporary doubling). Task memory unbounded. + +**Recommendations:** +- Use `array_splice` (in-place) instead of `array_slice` + reassignment in `ConversationHistory::compact()`. +- Add task eviction policy (max 100 tasks, LRU) to `TaskStore`. + +**Effort:** 20 min + 1 hr. + +--- + +#### Finding DS-3: JSON Encoding in Tight Loops + +**Severity:** 🟠 High +**Files:** `src/Agent/TokenEstimator.php:83`, `src/Agent/StuckDetector.php:45`, `src/Agent/ToolResultDeduplicator.php:155-157` + +**Issue:** `json_encode($tc->arguments())` per tool call for signature generation. Repeated encoding of same arguments. + +**Impact:** Temporary string allocation per tool call. For many tool results, allocates many temporary strings (100+ tool calls = 100+ JSON strings). + +**Recommendation:** Cache JSON encoding of tool arguments by signature (already computed for deduplication key). Reuse. + +**Effort:** 15 minutes. + +--- + +### 3.7 PHP Internals & Language Features (php-internals-memory.md) + +#### Finding PHP-1: Readonly Properties — Excellent Adoption + +**Status:** ✅ Positive +**Files:** Throughout (`Session/SessionManager.php:30-38`, `Tool/Permission/PermissionResult.php:16-18`, `Agent/SubagentStats.php:44`) + +**Impact:** Readonly properties eliminate copy-on-write overhead. Since set once and never modified, PHP can safely share zval without separation. Excellent for DTOs and injected dependencies. + +**Recommendation:** Continue pattern. Consider extending to more DTOs (`AgentContext`, `CompactionPlan` if not already). + +--- + +#### Finding PHP-2: Generator Usage Underutilized + +**Severity:** 🟡 Medium +**Files:** `src/Session/MessageRepository.php:80`, `src/Session/MemoryRepository.php:87`, `src/Session/SessionRepository.php:62`, `src/Agent/SubagentOrchestrator.php:427-428`, `src/Agent/ContextCompactor.php:144`, `src/Agent/ConversationHistory.php:124` + +**Issue:** Generators used only once (streaming LLM responses in `PrismService.php:139`). Multiple locations load entire result sets with `fetchAll()` or `array_slice` where streaming would be superior. + +**Impact:** For large histories (1000+ messages), eager loads cause memory spikes. Could use `PDOStatement::fetch()` with generators or process pending results in buckets. + +**Recommendation:** Introduce generators for large dataset iteration where appropriate. Not urgent given expected data sizes but good practice. + +**Effort:** 1–2 hours for targeted refactoring. + +--- + +#### Finding PHP-3: Closure Capture Risk in Long-Lived Collections + +**Severity:** 🟡 Medium +**Files:** `src/Agent/SubagentOrchestrator.php:133`, `src/UI/Tui/TuiAnimationManager.php:216`, `src/UI/Tui/SubagentDisplayManager.php:205`, `src/UI/Tui/TuiToolRenderer.php:267` + +**Issue:** Closures stored in long-lived collections (`$this->agents`, `$this->pendingResults`, `$cancellations`) capture use-variables, potentially including large objects (`AgentContext`, `agentFactory`). Timers capture `$this` pinning entire widget tree. + +**Impact:** Captured objects cannot be GC'd until closure completes. For subagents living minutes, this is by design but increases retention. Timer leaks (see async section) are worse. + +**Recommendation:** Audit closures stored in long-lived collections to ensure they don't inadvertently capture more than needed. Extract primitives instead of whole objects when possible. + +**Effort:** 1 hour audit. + +--- + +### 3.8 Async Event Loop & Fiber Memory (async-event-loop-memory.md) + +#### Finding ASYNC-1: HTTP Connection Pool per AsyncLlmClient + +**Severity:** 🔴 Critical +**Files:** `src/LLM/AsyncLlmClient.php:73`, `src/Agent/SubagentFactory.php:127` + +**Issue:** Each `AsyncLlmClient` instance gets its own `HttpClient` with `UnlimitedConnectionPool` (limit: `PHP_INT_MAX`). Concurrent subagents (depth 2–3) create multiple pools holding open sockets + TLS state indefinitely. No explicit close. + +**Impact:** Each pool holds connection resources (~50–200 KB per connection). With 10 concurrent subagents, that's 10 pools × potential connections = **500 KB – 2 MB** of idle connection state. No pooling benefit. + +**Recommendation:** Share a single `HttpClient` with bounded pool (e.g., `ConnectionLimitingPool::byAuthority(8)`) across all `AsyncLlmClient` instances. Inject via container as singleton. + +**Effort:** 30 minutes. + +--- + +#### Finding ASYNC-2: TUI Animation Timers Not Cancelled on Teardown + +**Severity:** 🔴 Critical +**Files:** `src/UI/Tui/TuiAnimationManager.php:216,378`, `src/UI/Tui/SubagentDisplayManager.php:205`, `src/UI/Tui/TuiToolRenderer.php:267` + +**Issue:** +- `TuiAnimationManager` timers (`compactingTimerId`, `thinkingTimerId`) — no `shutdown()` method, `teardown()` doesn't cancel them. +- `SubagentDisplayManager::elapsedTimerId` — only cancelled when loader stops; may leak if TUI tears down mid-subagent. +- `TuiToolRenderer::toolExecutingTimerId` — only cancelled when tool clears; not on TUI teardown. + +**Impact:** Timers capture `$this` via closure, pinning entire TuiRenderer + widget tree in memory even after teardown. Each timer ~100–200 bytes but prevents GC of entire UI object graph (potentially MBs). + +**Recommendation:** +1. Add `TuiAnimationManager::shutdown()` to cancel both timers; call from `TuiCoreRenderer::teardown()`. +2. Ensure `SubagentDisplayManager::cleanup()` and `TuiToolRenderer::clearToolExecuting()` are called during teardown. +3. Move `BashTool` timeout cancellation into `finally` block (currently outside try/catch at line 112). + +**Effort:** 15–30 minutes. + +--- + +#### Finding ASYNC-3: No Streaming in AsyncLlmClient + +**Severity:** 🟠 High +**Files:** `src/LLM/AsyncLlmClient.php:291` + +**Issue:** `buffer($cancellation)` reads entire response body into memory. No streaming support. + +**Impact:** Large LLM responses (rare but possible) held fully in RAM before processing. Typically responses are <100 KB so impact moderate. + +**Recommendation:** Implement streaming with `onRead()` callback, processing chunks as they arrive. More involved; lower priority. + +**Effort:** 2–3 hours. + +--- + +### 3.9 Bootstrap & Kernel (kernel-bootstrap.md) + +#### Finding BOOT-1: Eager Service Instantiation + +**Severity:** 🟡 Medium +**Files:** `src/Kernel.php:40-73`, `src/Provider/ToolServiceProvider.php:18-110`, `src/Provider/AgentServiceProvider.php` + +**Issue:** All providers registered eagerly; all singletons bound but not yet instantiated. However, `ToolRegistry` instantiates ~20 tool objects during registration even if never used (e.g., `setup` command doesn't need `FileWriteTool`). `AgentServiceProvider` constructs `AgentLoop`, `SubagentOrchestrator`, `ContextPipeline` — heavy. + +**Impact:** Boot memory spike ~20–40 MB before any agent work begins. Acceptable for CLI but could be lazy-loaded. + +**Recommendation:** Lazy-load heavy services. Defer `ToolRegistry` and `AgentServiceProvider` until `AgentCommand` executes. Use `$container->bind()` with factory closures instead of `singleton()` for rarely-used services. + +**Effort:** 1–2 hours. + +--- + +#### Finding BOOT-2: GeminiCacheStore Loads Entire JSON File + +**Severity:** 🟡 Medium +**Files:** `src/Provider/LlmServiceProvider.php:74-76` + +**Issue:** `GeminiCacheStore` reads entire `~/.kosmokrator/cache/gemini-cache.json` into memory on construction. If cache grows to 100 MB, every invocation loads 100 MB even if not using Gemini. + +**Impact:** Unbounded file-based cache growth loads fully into RAM each run. + +**Recommendation:** Stream JSON or use SQLite for large caches. Implement lazy loading with on-demand reads. + +**Effort:** 2 hours. + +--- + +#### Finding BOOT-3: No Container Compilation + +**Severity:** 🟢 Low +**Files:** `composer.json:65` + +**Issue:** No `bootstrap/cache/container.php` or compiled container. Every run re-parses all YAML, rebuilds all singletons. + +**Impact:** Boot time + memory overhead ~30–50% vs compiled container. Not a RAM leak but inefficiency. + +**Recommendation:** Use Laravel's `php artisan optimize` or switch to Symfony's `ContainerBuilder` with `dump()` to generate compiled container. + +**Effort:** 1 hour setup. + +--- + +### 3.10 Audio Notifications (audio-notifications.md) + +#### Finding AUDIO-1: Worker Process Per Notification (Double Kernel Boot) + +**Severity:** 🟠 High +**Files:** `src/Audio/CompletionSound.php:167`, `src/Audio/compose_worker.php:26-27`, `src/Audio/compose_llm_worker.php:26-27` + +**Issue:** Each completion sound spawns **two full PHP kernel boots** sequentially: +1. `compose_worker.php` boots full kernel (~50–100 MB) +2. That worker spawns `compose_llm_worker.php` which also boots full kernel (~50–100 MB) + +**Impact:** For rapid-fire notifications (10–100 in quick succession), memory spikes temporarily (each kernel ~50–100 MB). GC pressure from repeated container construction/destruction. + +**Recommendation:** +1. Worker pooling: reuse a single long-lived `compose_worker.php` process for multiple notifications via IPC (socket/queue). +2. Move LLM call back to main worker instead of spawning `compose_llm_worker.php` — use `proc_open` with timeout directly in `compose_worker.php` to avoid second kernel boot. + +**Effort:** 3–4 hours. + +--- + +#### Finding AUDIO-2: ShellSession Buffer Never Truncated + +**Severity:** 🟡 Medium +**Files:** `src/Tool/Coding/ShellSession.php:18-64` + +**Issue:** `$buffer` accumulates all output; `readOffset` prevents re-reading but buffer never shrinks. + +**Impact:** Long-running shell sessions with continuous output accumulate MBs linearly. + +**Recommendation:** Add configurable max buffer size (e.g., 1 MB) and trim from start based on `readOffset`. + +**Effort:** 30 minutes. + +--- + +### 3.11 Session & Persistence Layer (session-persistence.md) + +#### Finding PERS-1: Unbounded fetchAll() in MessageRepository & MemoryRepository + +**Severity:** 🔴 Critical +**Files:** `src/Session/MessageRepository.php:77-80, 102-111`, `src/Session/MemoryRepository.php:65-88`, `src/Session/SessionRepository.php:62` + +**Issue:** All repository methods use `$stmt->fetchAll()` loading complete result sets. No cursor-based streaming. Specific unbounded queries: +- `MessageRepository::loadActive()` — fetches all non-compacted messages for a session (could be thousands) +- `MessageRepository::loadRaw()` — fetches all messages without limit +- `MemoryRepository::forProject()` — fetches **all** non-expired memories (unbounded) +- `SessionRepository::listByProject()` — uses `LIMIT` (good) + +**Impact:** Memory scales linearly with result size. For 10k messages, could be 10–50 MB per fetch. Called repeatedly in agent loop. + +**Recommendation:** +- Use `while ($row = $stmt->fetch())` generator pattern for large result sets. +- Add pagination/limits where appropriate. +- For `forProject()`, push filters into SQL and use LIMIT (already covered in ARCH-2). + +**Effort:** 1–2 hours. + +--- + +#### Finding PERS-2: No Query Result Caching + +**Severity:** 🟡 Medium +**Files:** All repository classes + +**Issue:** No Redis/Memcached/APCu caching. Repeated reads (settings, session lookups) hit SQLite each time. + +**Impact:** DB load + memory churn from parsing results each call. Minor for local SQLite but scales poorly. + +**Recommendation:** Introduce PSR-6/16 cache for settings, session lookups, memory `forProject` results (with short TTL). + +**Effort:** 2 hours. + +--- + +#### Finding PERS-3: Missing Indexes + +**Severity:** 🟠 High +**Files:** `src/Session/Database.php:109,128` + +**Issue:** +- `messages(session_id, compacted)` — good, covers `loadActive()`. +- `memories(project)` only — `forProject()` also filters on `expires_at`, `memory_class`, `pinned` — missing composite index. +- `sessions(project, updated_at)` not indexed — `listByProject()` and `latest()` filter/order by this. + +**Impact:** Full table scans for common queries. More rows scanned = more memory loaded = slower. + +**Recommendation:** Add: +```sql +CREATE INDEX idx_memories_proj_ec ON memories(project, expires_at, memory_class); +CREATE INDEX idx_sessions_proj_updated ON sessions(project, updated_at DESC); +``` + +**Effort:** 30 minutes. + +--- + +### 3.12 Model Catalog & Pricing (model-catalog-pricing.md) + +**Status:** ✅ **Already Efficient** + +- Model catalog uses arrays (not objects) — ~20–45 KB total for 100–150 models. +- No caching needed — data immutable after construction. +- `resolve()` substring fallback O(n) but n=100–150, trivial. +- No RAM issues identified. + +**Recommendation:** None. Consider adding result cache to `resolve()` if profiling shows hotspot, but unlikely. + +--- + +### 3.13 Database Connection Pooling (database-connection-pooling.md) + +**Status:** ✅ **Adequate for CLI** + +- Single PDO connection per process (singleton). No connection pooling needed. +- No persistent connections. +- WAL mode enabled; no `busy_timeout` or `wal_checkpoint` set (H5, M9 in other audits — disk, not RAM). +- RAM per connection: ~50–150 KB. +- No leaks detected. + +**Recommendation:** None for RAM. Consider adding `PRAGMA busy_timeout` for concurrency robustness (not RAM-related). + +--- + +### 3.14 UI Renderers (ui-renderer-memory — not saved but findings incorporated) + +**Key findings from analysis:** +- TUI animation managers create high-frequency timers (30fps) that capture `$this` — covered in ASYNC-2. +- ANSI renderer uses `streamBuffer` that grows during streaming but cleared after — safe. +- Diff renderer builds large strings via concatenation — typical, not excessive. +- No major UI-specific RAM issues beyond timer leaks and animation state. + +--- + +## 4. Cross-Cutting Concerns + +### 4.1 Data Structure Patterns + +**Array copying epidemic:** The codebase uses `array_merge`, spread operator `[...$arr]`, and `array_slice` extensively, creating many temporary copies. Critical hotspots: +- `SubagentOrchestrator::collectPendingResults()` — O(n²) copies +- `GlobTool::globStar()` — O(n²) intermediates +- `ConversationHistory::compact()` — copies entire recent array +- `ContextCompactor::buildPlan()` — multiple `array_slice` on same data + +**Recommendation:** Replace `array_merge` in loops with single spread or pre-allocation. Use `array_splice` for in-place modification where possible. Consider generators for large result streaming. + +**String concatenation in loops:** `BashTool`, `FileReadTool`, `ShellSession` use `.=` in loops. PHP's string buffer doubling mitigates but still causes reallocation. For very large outputs (100 MB), this is significant. + +**Recommendation:** For large outputs, write directly to temp file or use `stream_copy_to_stream()` with chunking (already used in `FileEditTool` — good pattern). + +--- + +### 4.2 Caching Gaps Summary + +| Computation | Current Cost | Cache Opportunity | Est. Savings | Priority | +|------------|--------------|------------------|--------------|----------| +| Permission regex | 250+ compilations/check | Static cache in `PermissionRule` | 20–50 KB/req | HIGH | +| Tool schema build | 60–250 KB/subagent | Cache in `ToolRegistry` | 1.8–7.5 MB/session | HIGH | +| Instruction files | 3–5 disk reads/session | Static cache in `InstructionLoader` | 2–50 KB + I/O | HIGH | +| Token fetch | 7 DB queries/op | Bulk fetch + in-memory cache | 6 KB/req + DB load | HIGH | +| Path resolution | `realpath()` per path | Static cache in `PathResolver` | 25–100 KB/req | MEDIUM | +| Model resolution | O(n) scan on miss | Result cache in `ModelDefinitionSource` | 10–100 KB | MEDIUM | +| Permission decision | Full chain every call | Memoize by (tool, args) | 10–200 KB/req | HIGH | +| Git root/branch | 2 shell execs/turn | Static per-request cache | 200 ms latency | HIGH | +| Prompt split | 2 `substr()`/call | Static cache by prompt hash | 5–10 KB/call | LOW | +| Memory format | Re-group every turn | Cache by memory ID set | 1–5 KB/turn | LOW | + +**Total high-priority cache memory:** ~25–350 KB per request, with compute savings 30–70% in hot paths. + +--- + +### 4.3 PHP Internals Observations + +**Strengths:** +- Readonly properties extensively used — excellent for immutability and memory sharing. +- Enums for state machines — memory-efficient singleton-like instances. +- Constructor property promotion where used — clean initialization. +- No `serialize()`/`unserialize()` of large graphs. +- No `SplObjectStorage` or heavy collection libraries — native arrays only. + +**Weaknesses:** +- Generators underused — only 1 occurrence in production code. +- Closure captures in long-lived collections may retain more than needed. +- No typed properties beyond readonly (relies on PHPDoc) — minor performance penalty. +- Static variables only in tests — good (no function-static retention). + +**Autoloader:** `optimize-autoloader: true` — class map generated, good. No `classmap-authoritative` but fine for CLI. + +--- + +### 4.4 Cross-Cutting Security-Adjacent Risks + +1. **Memory exhaustion DoS** — Permission regex compilation, token refresh storms, config write amplification all create predictable memory churn patterns exploitable by attackers. +2. **Credential exposure** — Repeated token reads from disk increase attack surface in shared hosting; more memory copies of secrets. +3. **Timing attacks** — Repeated disk I/O (config parse, instruction reads) increases latency variance, making timing attacks easier. +4. **No rate limiting** — Permission checks, token refreshes, config writes all unbounded — amplification vectors. + +**Recommendation:** Implement rate limiting at permission evaluator and token store levels. Add caching aggressively to reduce churn. + +--- + +## 5. Risk Matrix + +Severity × Likelihood matrix for RAM-related issues: + +| Severity \ Likelihood | High (Every request/turn) | Medium (Per session) | Low (Rare/Edge) | +|----------------------|---------------------------|---------------------|-----------------| +| **Critical** | Permission regex recompilation (SEC-1) — every permission check
Tool schema regen per subagent (AGENT-2) — every spawn
Subagent orchestrator leak (ARCH-1) — accumulates over session | Instruction file re-read (AGENT-1) — once/session but frequent
MemoryRepository unbounded fetch (ARCH-2) — every LLM round | Config write amplification (SEC-3) — only on settings writes | +| **High** | Duplicate rule evaluation (SEC-5) — 3× per check
HTTP pool per subagent (ASYNC-1) — per subagent spawn
TUI timer leaks (ASYNC-2) — persistent until teardown | Git shell calls (AGENT-3) — every turn
Task tree unbounded (AGENT-4) — every turn
Path resolution no cache (SEC-4) — every file check | Token no cache (SEC-7) — on every LLM call
Provider instantiation flood (SEC-8) — per provider resolve | +| **Medium** | FileReadTool cache unbounded (IO-1) — per file read
BashTool buffering (IO-2) — per command
PendingResults orphan (IO-3) — on parent crash | No token estimation cache (CACHE-1) — per message estimation
No model resolution cache (CACHE-2) — per model resolve | GlobTool array buildup (IO-4) — on large globs
JSON encoding loops (DS-3) — per tool call | +| **Low** | — | — | Generator underuse (PHP-2) — architectural
Container not compiled (BOOT-3) — boot only | + +**Interpretation:** +- **Critical-High likelihood:** Issues that occur on every hot path (permission checks, subagent spawn, LLM rounds) with severe impact — address immediately. +- **Critical-Medium:** Session-start or write-amplification issues — still urgent but less frequent. +- **High-High:** Turn-level overhead (git calls, task tree) — significant cumulative impact. +- **Medium-High:** Per-operation spikes (file reads, bash output) — moderate risk but can cause OOM on large inputs. + +--- + +## 6. Immediate Actions (<1 Day, High Impact) + +These are low-effort (<30 min each), high-impact fixes that should be deployed within 24–48 hours. + +### Action 1: Static Regex Cache in PermissionRule + +**File:** `src/Tool/Permission/PermissionRule.php:51-60` +**Change:** Add static cache array; compile once per pattern. + +```php +private static array $regexCache = []; + +public function matchesGlob(string $path): bool +{ + $key = $this->pattern; + if (!isset(self::$regexCache[$key])) { + $regex = '/^' . str_replace(['\*', '\?'], ['.*', '.'], preg_quote($this->pattern, '/')) . '$/i'; + self::$regexCache[$key] = $regex; + } + return preg_match(self::$regexCache[$key], $path) === 1; +} +``` + +**Impact:** Eliminates 90%+ of regex compilation overhead. Saves 20–50 KB per request, reduces CPU significantly. +**Effort:** 5 minutes. + +--- + +### Action 2: Cache Tool Schemas in ToolRegistry + +**File:** `src/Tool/ToolRegistry.php:67-103` +**Change:** Add private cache property; build once. + +```php +private ?array $cachedPrismTools = null; + +public function toPrismTools(): array +{ + if ($this->cachedPrismTools !== null) { + return $this->cachedPrismTools; + } + $tools = []; + foreach ($this->tools as $tool) { + $tools[] = $tool->toPrismTool(); // build + } + return $this->cachedPrismTools = $tools; +} +``` + +**Impact:** Saves 60–250 KB per subagent spawn. With 30 subagents, saves **1.8–7.5 MB**. +**Effort:** 10 minutes. + +--- + +### Action 3: Cache InstructionLoader Gather Result + +**File:** `src/Agent/InstructionLoader.php:26-85` +**Change:** Static cache in `gather()` method. + +```php +public static function gather(): string +{ + static ?string $cached = null; + if ($cached !== null) { + return $cached; + } + // ... existing file reads ... + return $cached = $result; +} +``` + +**Impact:** Eliminates 3–5 disk reads per session; saves 2–50 KB string allocations. +**Effort:** 5 minutes. + +--- + +### Action 4: Cache Git Root/Branch + +**Files:** +- `src/Agent/InstructionLoader.php:102` (gitRoot) +- `src/Agent/ProtectedContextBuilder.php:57` (gitBranch) + +**Change:** Add static cache variables. + +```php +public static function gitRoot(): string +{ + static ?string $root = null; + if ($root === null) { + $root = trim(shell_exec('git rev-parse --show-toplevel')); + } + return $root; +} +``` + +**Impact:** Eliminates 2 shell execs per turn. At 100 turns, saves 200 subprocesses and ~200 ms latency. +**Effort:** 5 minutes per method (10 total). + +--- + +### Action 5: Auto-Prune Completed Subagents + +**File:** `src/Agent/SubagentOrchestrator.php:245-258, 392-409` +**Change:** Call `pruneCompleted()` automatically after each agent reaches terminal state, or via periodic timer. + +```php +private function markCompleted(string $id, string $state): void +{ + $this->agents[$id]->setState($state); + $this->pruneCompleted(); // Add this line +} +``` + +Or add timer in `runAgent()`: +```php +EventLoop::repeat(10, fn() => $this->pruneCompleted()); +``` + +**Impact:** Prevents unbounded growth of `$agents`, `$stats`, `$pendingResults`. Saves ~500 bytes–1 KB per completed agent. +**Effort:** 15 minutes. + +--- + +### Action 6: Bulk Token Fetch + In-Memory Cache + +**File:** `src/LLM/Codex/SettingsCodexTokenStore.php:32-38, 63-85` +**Change:** Replace 7 individual SELECTs with single query; add 5-second cache. + +```php +private ?CodexToken $cached = null; +private int $cachedAt = 0; + +public function current(): CodexToken +{ + if ($this->cached && (time() - $this->cachedAt) < 5) { + return $this->cached; + } + $rows = $this->db->connection()->query( + "SELECT key, value FROM settings WHERE scope='global' AND key LIKE 'provider.codex.%'" + )->fetchAll(); + // build token from $rows... + $this->cached = $token; + $this->cachedAt = time(); + return $token; +} +``` + +**Impact:** Reduces token load from 7 DB round-trips to 1. Saves ~6 KB/request + connection pool pressure. +**Effort:** 20 minutes. + +--- + +### Action 7: Add LIMIT to MemoryRepository::forProject() + +**File:** `src/Session/MemoryRepository.php:65-88` +**Change:** Add `LIMIT 1000` to query as safety valve. + +```php +$stmt = $this->db->connection()->prepare( + "SELECT * FROM memories WHERE (project IS NULL OR project = ?) AND expires_at > ? ORDER BY pinned DESC, created_at DESC LIMIT 1000" +); +``` + +**Impact:** Caps RAM spike at ~50–100 MB even with 10k memories (vs 500 MB). Prevents OOM. +**Effort:** 5 minutes. + +--- + +### Action 8: Share HttpClient Across AsyncLlmClient Instances + +**File:** `src/LLM/AsyncLlmClient.php:73`, `src/Agent/SubagentFactory.php:127` +**Change:** Make HttpClient singleton; inject via container. + +```php +// In LlmServiceProvider: +$this->container->singleton(HttpClient::class, fn() => + (new HttpClientBuilder())->withPool(ConnectionLimitingPool::byAuthority(8))->build() +); +// In AsyncLlmClient constructor, accept HttpClient $httpClient +``` + +**Impact:** Saves 50–200 KB per subagent × N concurrent. Also limits total connections to 8, preventing socket exhaustion. +**Effort:** 20 minutes. + +--- + +### Action 9: Cancel TUI Timers on Teardown + +**Files:** +- `src/UI/Tui/TuiAnimationManager.php` — add `shutdown()` method +- `src/UI/Tui/TuiCoreRenderer.php` — call shutdown in `teardown()` +- `src/UI/Tui/SubagentDisplayManager.php` — ensure `cleanup()` called +- `src/UI/Tui/TuiToolRenderer.php` — ensure `clearToolExecuting()` called + +**Change (AnimationManager):** +```php +public function shutdown(): void +{ + if ($this->compactingTimerId !== null) { + EventLoop::cancel($this->compactingTimerId); + $this->compactingTimerId = null; + } + if ($this->thinkingTimerId !== null) { + EventLoop::cancel($this->thinkingTimerId); + $this->thinkingTimerId = null; + } +} +``` + +**Impact:** Releases closures pinning entire UI widget tree (potentially MBs). +**Effort:** 15 minutes. + +--- + +### Action 10: Move BashTool Timer Cancellation into finally + +**File:** `src/Tool/Coding/BashTool.php:87-112` +**Change:** Ensure timer cancelled even on exception. + +```php +$timerId = EventLoop::repeat($timeout, $checkTimeout); +try { + // ... existing code ... +} finally { + EventLoop::cancel($timerId); // Move here from after await +} +``` + +**Impact:** Prevents timer leak if process join throws. +**Effort:** 5 minutes. + +--- + +**Total immediate effort:** ~2–3 hours. +**Total immediate RAM savings:** ~5–15 MB per session + significant CPU/latency gains + security hardening. + +--- + +## 7. Short-Term Optimizations (1–2 Weeks) + +These require moderate effort (2–8 hours total) but yield substantial improvements. + +### Optimization 1: Permission Decision Memoization + +**File:** `src/Tool/Permission/PermissionEvaluator.php:26-49` +**Add:** `$decisionCache = []` property. In `evaluate()`: + +```php +$key = md5($toolName . serialize($args)); +if (isset($this->decisionCache[$key])) { + return $this->decisionCache[$key]; +} +$result = $this->evaluateChain($toolName, $args); +$this->decisionCache[$key] = $result; +return $result; +``` + +Invalidate in `resetGrants()`: `$this->decisionCache = [];` + +**Impact:** Avoids re-running full permission chain for repeated tool+args. Saves 30–50% permission check time. Est. memory 10–200 KB (bounded by session patterns). +**Effort:** 20 minutes. + +--- + +### Optimization 2: Path Resolution Cache + +**File:** `src/Tool/Permission/PathResolver.php:21-39` +**Add:** Static cache array. + +```php +private static array $cache = []; + +public static function resolve(string $path): ?string +{ + $real = realpath($path); + self::$cache[$path] = $real; + return $real; +} +``` + +**Impact:** Eliminates duplicate `realpath()` syscalls. Saves 25–100 KB/request. +**Effort:** 10 minutes. + +--- + +### Optimization 3: Avoid Full Config Reload on Write + +**File:** `src/Settings/SettingsManager.php:266-274` +**Change:** Instead of `ConfigLoader::load()`, update `$this->config` incrementally using `$data` from `configTarget()`. + +```php +private function reloadRepository(): void +{ + // Instead of full reload, just update the specific scope/key that changed + // $this->config is a Repository; use $this->config->set($key, $value) directly + // Or if full reload unavoidable, cache parsed YAML by mtime +} +``` + +**Impact:** Reduces write amplification from 5 parses (~100–150 KB churn) to near-zero. +**Effort:** 30 minutes (needs careful handling of merged configs). + +--- + +### Optimization 4: YAML Parse Cache + +**File:** `src/ConfigLoader.php` or `src/Settings/YamlConfigStore.php:23-35` +**Add:** Static cache keyed by `realpath($path) . filemtime($path)`. + +```php +private static array $cache = []; + +public function load(string $path): array +{ + $key = realpath($path) . ':' . filemtime($path); + if (isset(self::$cache[$key])) { + return self::$cache[$key]; + } + $data = Yaml::parseFile($path); + return self::$cache[$key] = $data; +} +``` + +**Impact:** Eliminates redundant parses across multiple `get()` calls. Saves 50–100 KB per settings access. +**Effort:** 20 minutes. + +--- + +### Optimization 5: Cache Provider Instances + +**File:** `src/LLM/RelayProviderRegistrar.php:42-117` +**Add:** `$instances = []` property; return cached if already resolved. + +```php +private array $instances = []; + +public function resolve(string $provider): Provider +{ + if (isset($this->instances[$provider])) { + return $this->instances[$provider]; + } + // ... create instance ... + return $this->instances[$provider] = $providerInstance; +} +``` + +**Impact:** Saves 200–500 bytes per provider call; reduces credential fetch overhead. +**Effort:** 10 minutes. + +--- + +### Optimization 6: Truncate Task Tree Rendering + +**File:** `src/Agent/TaskStore.php` (locate `renderTree()`) +**Add:** Configurable limit: `max_tasks: 50` or `max_chars: 10240`. + +```php +public function renderTree(): string +{ + $maxTasks = 50; + $tasks = array_slice($this->tasks, 0, $maxTasks); + // render only $tasks + if (count($this->tasks) > $maxTasks) { + $output .= "\n... truncated " . (count($this->tasks) - $maxTasks) . " tasks"; + } + return $output; +} +``` + +**Impact:** Bounds system prompt growth from task tree. Prevents unbounded context consumption. +**Effort:** 20 minutes. + +--- + +### Optimization 7: Stream BashTool/GrepTool Output + +**Files:** +- `src/Tool/Coding/BashTool.php:96-108` +- `src/Tool/Coding/GrepTool.php:68-78` + +**Change:** Process output incrementally via `onRead()` callback, writing directly to `OutputTruncator` stream with line/byte limits enforced during read, not after. + +```php +$truncator = new OutputTruncator(2000, 50 * 1024); +$process = Process::run($command, [ + 'onRead' => function(string $chunk) use ($truncator) { + $truncator->write($chunk); // truncates incrementally + } +]); +$output = $truncator->getOutput(); // already truncated +``` + +**Impact:** Prevents 100 MB RAM spikes from large command outputs. Memory bounded by truncation limits from first byte. +**Effort:** 2 hours. + +--- + +### Optimization 8: LRU Eviction for FileReadTool Cache + +**File:** `src/Tool/Coding/FileReadTool.php:21,70-72` +**Add:** Max entries (e.g., 1000) with LRU eviction using `SplDoublyLinkedList` as LRU list. + +```php +private array $readCache = []; +private SplDoublyLinkedList $lruList; +private int $maxEntries = 1000; + +public function read(string $path, ?int $offset = null, ?int $limit = null): string +{ + $key = $this->cacheKey($path, $offset, $limit); + if (isset($this->readCache[$key])) { + // Move to front of LRU + $this->lruList->unshift($key); + return $this->readCache[$key]; + } + // ... read file ... + if (count($this->readCache) >= $this->maxEntries) { + $oldest = $this->lruList->pop(); + unset($this->readCache[$oldest]); + } + $this->readCache[$key] = $content; + $this->lruList->unshift($key); + return $content; +} +``` + +**Impact:** Bounds long-run growth; prevents 10 MB+ cache bloat in exploratory sessions. +**Effort:** 45 minutes. + +--- + +### Optimization 9: Memory Selection Caching Per Turn + +**File:** `src/Agent/ContextManager.php` (or wherever `selectRelevantMemories` called) +**Add:** Property `$memoryCache = []` keyed by query/round. Populate on first call per LLM round; reuse for subsequent calls within same round. + +```php +private array $memoryCache = []; + +private function selectMemories(string $query, int $round): array +{ + $key = md5($query . ':' . $round); + if (!isset($this->memoryCache[$key])) { + $this->memoryCache[$key] = $this->sessionManager->selectRelevantMemories($query); + } + return $this->memoryCache[$key]; +} +``` + +**Impact:** Avoids re-scoring same memories 3–4× per turn. Sorts O(N log N) repeated work. With 1000 memories, saves ~10k comparisons × 3 = 30k ops/turn. +**Effort:** 20 minutes. + +--- + +### Optimization 10: Periodic Subagent Cleanup for Headless Agents + +**File:** `src/Agent/SubagentOrchestrator.php:245-258` +**Add:** Timer-based cleanup in addition to on-demand. + +```php +EventLoop::repeat(30, function() { + $this->pruneCompleted(); +}); +``` + +**Impact:** Frees subagent memory sooner in long-running headless sessions where parent may not call `injectPending...` frequently. Saves ~1 KB/subagent sooner. +**Effort:** 15 minutes. + +--- + +**Total short-term effort:** ~8–12 hours. +**Total short-term RAM reduction:** ~10–30 MB per session + bounded growth + CPU savings. + +--- + +## 8. Long-Term Architectural Improvements (1–3 Months) + +These require design changes, migrations, or significant refactoring. + +### Improvement 1: Push Memory Scoring into SQL + +**Files:** `src/Session/MemoryRepository.php`, `src/Agent/MemorySelector.php` +**Current:** `forProject()` fetches all rows → `MemorySelector::select()` scores in PHP with O(N log N) sort → returns top 6. +**Proposed:** Compute score in SQL: + +```sql +SELECT *, + (CASE + WHEN pinned = 1 THEN 1000 ELSE 0 + + (strlen(content) * 0.1) + + (created_at > ? ?) + END) AS relevance_score +FROM memories +WHERE (project IS NULL OR project = ?) AND expires_at > ? +ORDER BY relevance_score DESC, created_at DESC +LIMIT 6; +``` + +**Impact:** Eliminates O(N) memory load and sort. RAM per round drops from O(all memories) to O(6). With 10k memories, saves **100–500 MB per round**. +**Effort:** 2–3 hours (SQL expression tuning, testing edge cases). + +--- + +### Improvement 2: Task Eviction Policy & Centralized Edge Storage + +**Files:** `src/Task/TaskStore.php` +**Changes:** +1. Add `max_tasks` config (default 100). When adding exceeds limit, remove oldest non-terminal tasks (status != 'done'). +2. Replace per-task `blockedBy`/`blocks` arrays with central adjacency map: + +```php +private array $edges = [ + 'blocks' => ['fromId' => ['toId1', 'toId2']], + 'blockedBy' => ['toId' => ['fromId1', 'fromId2']] +]; +``` + +Derive per-task views on demand or maintain denormalized caches. + +**Impact:** +- Bounds task memory (100 tasks × 300 bytes = 30 KB max). +- ~50% edge memory reduction (no duplicate storage). +- Easier cleanup (single map vs scattered arrays). +**Effort:** 3–4 hours (migration, testing). + +--- + +### Improvement 3: Database Index Overhaul + +**File:** `src/Session/Database.php` (migrations) +**Add indexes:** + +```sql +-- For MemoryRepository::forProject() +CREATE INDEX idx_memories_lookup ON memories(project, memory_class, type, expires_at, pinned DESC, created_at DESC); + +-- For SessionRepository::listByProject() +CREATE INDEX idx_sessions_proj_updated ON sessions(project, updated_at DESC); + +-- For MessageRepository::searchProjectHistory() (FTS5) +CREATE VIRTUAL TABLE messages_fts USING fts5(content, content='messages', content_rowid='id'); +``` + +**Impact:** +- Speeds up `forProject()` and `search()` by 10–100×. +- Reduces rows scanned → less memory loaded. +- FTS5 enables full-text search without full scan. +**Effort:** 1 hour (migration + query updates). + +--- + +### Improvement 4: Container Compilation & Opcache Warmup + +**Files:** `composer.json`, `bin/kosmokrator` +**Changes:** +1. Run `composer install --optimize-autoloader --classmap-authoritative` (already has optimize-autoloader). +2. Generate compiled container: `php artisan optimize` (if using Laravel) or implement Symfony-style `ContainerBuilder` dump. +3. Warm opcache in production: `php -d opcache.enable_cli=1 bin/kosmokrator ...` + +**Impact:** Reduces boot memory by 30–50% (fewer class maps, no runtime compilation). Boot time faster. +**Effort:** 1–2 hours setup + CI integration. + +--- + +### Improvement 5: Worker Pooling for Audio Notifications + +**Files:** `src/Audio/CompletionSound.php`, `src/Audio/compose_worker.php` +**Design:** +- Start single long-lived `compose_worker.php` process at first notification. +- Communicate via JSON over stdin/stdout or Unix socket. +- Worker stays alive, processes multiple composition requests sequentially. +- Parent sends `{"prompt": "...", "callback": "..."}`; worker returns script path. + +**Impact:** Avoids 2× kernel boot per notification. For 100 notifications, saves **5–10 GB** of cumulative allocation (though not simultaneous). Reduces GC pressure. +**Effort:** 4–5 hours (IPC, protocol, lifecycle management). + +--- + +### Improvement 6: Incremental Prompt Assembly Cache + +**File:** `src/Agent/ContextManager.php:257-289` +**Design:** Introduce `PromptCache` object storing: +- Stable base prompt (instructions + environment + tool schemas) +- Mode suffix (constant) +- Only rebuild volatile parts (memories, task tree) each turn + +```php +class PromptCache { + private string $base; + private array $toolSchemas; // shared reference + public function build(array $memories, string $taskTree): string { + return $this->base . $this->formatMemories($memories) . $taskTree; + } +} +``` + +**Impact:** Reduces per-turn string allocations from ~10–50 KB to ~2–5 KB. Eliminates repeated `implode()` of static parts. +**Effort:** 3 hours (design + implementation + testing). + +--- + +### Improvement 7: Generator-Based Streaming for Large DB Results + +**Files:** `src/Session/MessageRepository.php`, `src/Session/MemoryRepository.php` +**Change:** Replace `fetchAll()` with generator: + +```php +public function streamActive(string $sessionId): Generator +{ + $stmt = $this->db->connection()->prepare( + "SELECT * FROM messages WHERE session_id = ? AND compacted = 0 ORDER BY id ASC" + ); + $stmt->execute([$sessionId]); + while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) { + yield $row; + } +} +``` + +Callers can iterate without full array materialization. + +**Impact:** For 10k messages, peak memory drops from 10–50 MB to O(1) per row during iteration. Useful for export/analysis commands. +**Effort:** 2 hours (update all callers). + +--- + +### Improvement 8: Full-Text Search (FTS5) for Memories + +**File:** Database migration + `src/Session/MemoryRepository.php:160-201` +**Change:** Create virtual table `memories_fts` on `(title, content)`. Rewrite `search()` to use `MATCH` instead of `LIKE`. + +```sql +CREATE VIRTUAL TABLE memories_fts USING fts5(title, content, content='memories', content_rowid='id'); +-- Populate via triggers or batch +SELECT m.* FROM memories m +JOIN memories_fts fts ON m.id = fts.rowid +WHERE memories_fts MATCH ? +ORDER BY rank LIMIT 20; +``` + +**Impact:** Full-text search becomes index-based, not full scan. Faster + less memory. +**Effort:** 3 hours (migration, trigger setup, query rewrite). + +--- + +### Improvement 9: Task Tree Segmentation & Archival + +**File:** `src/Agent/TaskStore.php` +**Design:** Split tasks into "active" (last N) and "archived" (summarized). Render only active. Archive old tasks via compaction-like process (summarize completed subtasks into parent description). + +**Impact:** Prevents unbounded task tree growth. Keeps system prompt size bounded. Aligns with history compaction philosophy. +**Effort:** 3–4 hours (archival logic, summarization LLM call). + +--- + +### Improvement 10: Benchmark Suite Completion + +**Files:** Create all benchmark scripts in `docs/ram-audit/benchmarks/` (see Section 9). +**Effort:** 4–6 hours total to write, run, and document baseline. + +--- + +**Total long-term effort:** ~20–30 hours (spread over 1–3 months). +**Total long-term RAM reduction:** ~100–500 MB for large sessions + bounded growth + scalability. + +--- + +## 9. Benchmark Suite Summary + +**Status:** No benchmark files were created during this audit (agents operated in read-only mode). The following suite is **recommended for implementation** to establish baselines and validate fixes. + +### Recommended Benchmark Files + +| File | Purpose | Key Metrics | +|------|---------|-------------| +| `db-connection-memory.php` | Connection lifecycle, singleton reuse | Per-connection memory delta, GC retention after `unset()` | +| `agent-loop-memory.php` | 100/500/1000 turns with 3 tools/turn | Memory growth curve, compaction triggers, GC cycles | +| `subagent-memory.php` | Spawn 10/30/100 concurrent subagents | Per-agent overhead, total peak, isolation | +| `tool-memory.php` | Concurrent tool execution, large file I/O | Tool-specific spikes, cache growth (FileReadTool) | +| `async-memory.php` | 100/500/1000 concurrent promises | Per-promise overhead, Fiber stack size, event loop memory | +| `caching-memory.php` | Repeated token estimation, model resolution | Cache hit/miss impact, memory vs compute tradeoff | +| `datastructure-memory.php` | Array merge patterns, JSON encoding | Temporary allocation peaks, copy-on-write | +| `ui-memory.php` | TUI/ANSI render cycles, animation frames | Render buffer growth, timer retention, widget tree | +| `audio-memory.php` | 10/50/100 rapid completion sounds | Worker process memory, IPC overhead, zombie risk | +| `session-memory.php` | 1k/5k/10k session creations, message inserts | DB fetch strategies, connection reuse, fetchAll vs streaming | + +### Measurement Protocol + +1. Use `memory_get_peak_usage(true)` (real peak) before/after each operation. +2. Run each scenario 5×, report median and max to smooth GC variance. +3. Force `gc_collect_cycles()` between iterations to measure steady-state. +4. Profile with `xhprof` or `tideways` if available for callgrind analysis. +5. For async operations, measure before/after `await` and after GC. + +### Baseline Targets (To Be Established) + +After implementing immediate actions, expect: +- **Per-request RAM churn** reduced from ~200–400 KB to ~50–100 KB (security/caching fixes). +- **Subagent memory** reduced by 1.8–7.5 MB (tool schema cache). +- **MemoryRepository per-round** from O(N) to O(1) after SQL scoring (long-term). +- **Task memory** bounded to ~30–50 KB max (eviction policy). +- **Boot memory** from ~20–40 MB to ~12–20 MB (container compilation + lazy services). + +--- + +## 10. Monitoring Recommendations + +### Runtime Metrics to Track + +1. **Memory usage by component** (via custom stats): + - `ConversationHistory::count()` and estimated size + - `SubagentOrchestrator::count()` active + completed + - `TaskStore::count()` tasks + - `FileReadTool::cacheSize()` entries + - `MemoryRepository::count()` total memories + +2. **GC activity**: + - `gc_collected_cycles()` count + - `gc_mem_caches()` — memory in caches + - Monitor frequency; high GC cycles indicate allocation churn. + +3. **Database query patterns**: + - Count of `MemoryRepository::forProject()` calls per turn + - Rows returned per call (log if >1000) + - Query time (should be <10 ms with indexes) + +4. **Permission evaluation**: + - Number of permission checks per tool call + - Time spent in `PermissionEvaluator::evaluate()` + - Cache hit rate (if memoization added) + +5. **Async resources**: + - Active timers count (via `EventLoop::getRunningTimers()` if accessible) + - Open connections in HTTP pool + - Pending futures in `SubagentOrchestrator` + +6. **File system**: + - Number of open `ShellSession` instances + - Shell session buffer sizes + - Temp file count (audio, edit operations) + +### Alert Thresholds + +| Metric | Warning | Critical | +|--------|---------|----------| +| Process RSS | > 200 MB | > 500 MB | +| ConversationHistory messages | > 500 | > 1000 | +| SubagentOrchestrator agents (total) | > 50 | > 100 | +| TaskStore tasks | > 100 | > 200 | +| MemoryRepository memories (project) | > 5000 | > 10000 | +| FileReadTool cache entries | > 5000 | > 10000 | +| GC cycles per minute | > 1000 | > 5000 | +| Permission checks per second | > 100 | > 500 (possible DoS) | + +### Logging Recommendations + +- Add debug logs to `PermissionRule::matchesGlob()` counting compilations vs cache hits (after fix). +- Log `MemoryRepository::forProject()` row count when >1000. +- Log subagent spawn/completion with memory delta. +- Log task creation/removal with count. +- Log cache misses for token fetch, model resolution. + +### Profiling in Production + +- Use `php -d opcache.enable_cli=1` with `opcache_get_status()` to monitor opcode memory. +- Consider `tideways` or `blackfire` for periodic profiling (low overhead). +- Export metrics to statsd/Prometheus if available (not currently integrated). + +--- + +## Conclusion + +KosmoKrator's RAM efficiency profile is **mixed**: core memory management (history compaction, subagent isolation) is well-designed, but **systematic caching omissions** and **unbounded accumulations** create significant avoidable memory pressure. The most severe issues are: + +1. **Permission system** — regex recompilation, duplicate evaluation, no caching — critical for both performance and security. +2. **Subagent orchestrator** — unbounded retention of completed agent data — classic memory leak pattern. +3. **Memory repository** — full table scans on every LLM round — O(N) in PHP instead of SQL. +4. **Task system** — no eviction, 30fps re-renders — does not scale. +5. **HTTP connection pools** — one per subagent — resource waste. +6. **Prompt construction** — instruction re-reads, tool schema duplication, git shell calls — constant overhead. + +**Immediate actions** (10 items, ~2–3 hours total) will yield 5–15 MB savings per session and eliminate the most egregious waste. **Short-term optimizations** (10 items, ~8–12 hours) will further reduce churn and bound growth. **Long-term architectural improvements** (10 items, ~20–30 hours) are necessary for scalability to large sessions (1000+ messages, 100+ tasks, 10k memories). + +The **benchmark suite** must be created and baseline established before and after fixes to quantify impact and guard against regressions. **Monitoring** should be added to track memory hotspots in production. + +**Priority:** Implement all Immediate Actions within 48 hours. Then tackle Short-Term Optimizations over the next 1–2 weeks. Schedule Long-Term improvements for next sprint cycle. + +--- + +**Report Compiled By:** KosmoKrator General Agent (RAM Efficiency Audit) +**Source Synthesis Files:** +- `docs/ram-audit/synthesis-security.md` +- `docs/ram-audit/synthesis-core-agent.md` +- `docs/ram-audit/synthesis-io-performance.md` +- `docs/ram-audit/synthesis-architecture.md` + +**Additional Agent Contributions:** +- database-connection-pooling +- model-catalog-pricing +- caching-strategies-gaps +- data-structure-optimization +- php-internals-memory +- async-event-loop-memory +- kernel-bootstrap +- audio-notifications +- session-persistence + +**Final Deliverable:** `docs/ram-audit/RAM-EFFICIENCY-AUDIT.md` +**Absolute Path:** `/Users/rutger/Projects/kosmokrator/docs/ram-audit/RAM-EFFICIENCY-AUDIT.md` diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-architecture.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-architecture.md new file mode 100644 index 0000000..ac26a38 --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-architecture.md @@ -0,0 +1,370 @@ +# Architecture Memory Efficiency Report + +**Project:** KosmoKrator — AI coding agent for the terminal +**Audit Scope:** Subagent orchestration, event propagation, service container, task tracking, memory repository patterns +**Date:** 2026-04-03 +**Status:** Phase 1 Synthesis + +--- + +## Executive Summary + +This report synthesizes RAM efficiency findings from five Phase 1 audit agents covering core architectural subsystems. The analysis reveals **critical memory inefficiencies** in two areas: **subagent orchestration** (unbounded retention of completed agent data) and **memory repository** (unbounded database fetches). The **task tracking system** shows moderate issues with unbounded growth and high-frequency re-renders. The **event system** is exemplary — minimal overhead, tiny payloads, single listener. The **service container** pattern avoids per-subagent bootstrapping but has minor duplication of stateless components. + +**Overall Risk Assessment:** 🔴 **HIGH** — Two critical leaks can cause unbounded RAM growth in long-running sessions; one high-risk database pattern loads all memories on every LLM round. + +**Key Metrics:** +- **Subagent orchestrator:** Retains completed agent futures & stats indefinitely; group semaphores accumulate; background results held until manual collection. +- **Memory selection:** Fetches entire `memories` table on every context rebuild (O(N) per LLM round), scores all in-memory, then discards — repeated 3–4× per user turn. +- **Task system:** Full tree re-render at 30fps in TUI mode; no eviction policy; stale dependency edges retained after task removal. +- **Event system:** ~28 bytes per dispatched event; single listener; negligible overhead. + +--- + +## Findings (Severity) + +### 🔴 Critical + +| # | Component | Issue | Impact | File:Line | +|---|-----------|-------|--------|-----------| +| C1 | SubagentOrchestrator | Completed agent futures & stats retained indefinitely; `pruneCompleted()` never auto-called | Unbounded RAM growth with agent count; each entry ~200–500 bytes + future closure overhead | `src/Agent/SubagentOrchestrator.php:392-409` (prune exists but not invoked) | +| C2 | SubagentOrchestrator | Group semaphores (`$groups`) created per unique group name, never removed | Unbounded growth if group names are dynamic (e.g., per-task groups) | `src/Agent/SubagentOrchestrator.php:471` | +| C3 | SubagentOrchestrator | Background agent results in `$pendingResults` cleared only via explicit `collectPendingResults()` | Accumulates if parent never collects; each result string can be KBs | `src/Agent/SubagentOrchestrator.php:??` | +| C4 | MemoryRepository | `forProject()` loads **all** memory rows into PHP on every call (no LIMIT) | With 10k memories: 100–500 MB per fetch; called on every LLM round (3–4×/turn) | `src/Session/MemoryRepository.php:65-88` | +| C5 | TaskStore | No task eviction policy; tasks accumulate until manual `/tasks clear` or REPL prompt | Unbounded growth; each task ~200–300 bytes + edge arrays | `src/Task/TaskStore.php:14-356` | +| C6 | TaskStore | `clearTerminal()` / `clearAll()` do **not** purge stale IDs from other tasks' `blockedBy`/`blocks` arrays | Memory leak: dangling references accumulate across clear cycles | `src/Task/TaskStore.php:??` | + +### 🟠 High + +| # | Component | Issue | Impact | File:Line | +|---|-----------|-------|--------|-----------| +| H1 | MemorySelector | Re-scores entire memory set on every LLM round; no caching | O(N log N) repeated work; with 1000 memories, ~10k comparisons per round × 3–4 rounds/turn | `src/Agent/MemorySelector.php:29-38` | +| H2 | TaskStore | TUI task bar re-renders full tree at ~30fps during active phases (every 33ms) | 3,000+ node visits/sec for 100 tasks; high allocation/GC pressure | `src/UI/Tui/TuiCoreRenderer.php:643-681`, `src/UI/Tui/TuiAnimationManager.php:378-420` | +| H3 | Database | Missing indexes on `memories` table: `memory_class`, `type`, `(pinned, created_at)`, `expires_at` | Full table scans for every `forProject()` and `search()`; CPU + memory pressure | `src/Session/Database.php:128` | +| H4 | TaskStore | Bidirectional edge storage duplicates every dependency (2× memory) | ~50% edge memory overhead vs central adjacency list | `src/Task/TaskStore.php:62-84` | + +### 🟡 Medium + +| # | Component | Issue | Impact | File:Line | +|---|-----------|-------|--------|-----------| +| M1 | SubagentFactory | Stateless `ContextPruner` & `ToolResultDeduplicator` instantiated per subagent unnecessarily | Minor per-agent overhead (~negligible but wasteful) | `src/Agent/SubagentFactory.php:90-103` | +| M2 | SubagentOrchestrator | `$stats->dependsOn` arrays grow O(N) but not pruned | Small but unbounded; ~8 bytes per parent ID × N | `src/Agent/SubagentStats.php:??` | +| M3 | TaskStore | `roots()` and `children()` scan entire task set each call (O(n)) | Inefficient for large n; could be indexed | `src/Task/TaskStore.php:??` | +| M4 | MemoryRepository | `fetchAll()` used everywhere — entire result set materialized even if only a few rows needed | Memory spike for large queries; streaming not used | `src/Session/MemoryRepository.php:??` | +| M5 | Event system | 5 unused event classes (ResponseCompleteEvent, StreamChunkEvent, etc.) | Code bloat only; zero runtime cost | `src/Agent/Event/*.php` | + +### 🟢 Low / Informational + +| # | Component | Note | Impact | +|---|-----------|------|--------| +| L1 | TaskStore | `toDetail()` JSON-encodes metadata; could be large if metadata contains big structures | Only when explicitly called | +| L2 | TaskStore | Subject truncation only in ANSI render; plain text shows full subject | Minor display inconsistency | +| L3 | Event system | `TokenTrackingListener` state persists session-wide; integers could overflow in theory (practically impossible) | None | +| L4 | SubagentOrchestrator | `totalTokens()` iterates all stats on demand — O(n) but acceptable | None | + +--- + +## Memory Hotspots (file:line + estimates) + +### Subagent Orchestration (`src/Agent/`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| Completed agent futures array | `SubagentOrchestrator.php:??` | ~500 bytes/agent + closure capture | Grows unbounded; primary leak | +| Completed stats array | `SubagentOrchestrator.php:??` | ~300–500 bytes/agent | Mirrors `$agents` | +| Group semaphores | `SubagentOrchestrator.php:471` | ~100–200 bytes/group | Accumulates with unique group names | +| Pending background results | `SubagentOrchestrator.php:??` | Size of result string (KB) per background agent | Held until parent collects | +| Per-agent ConversationHistory | `AgentLoop.php:??` | Grows with message count; ~100–1000+ bytes/message | Freed when AgentLoop GC'd (if future not retained) | +| Per-agent LLM client | `SubagentFactory.php:??` | ~few KB (HTTP client, listeners) | New per subagent; intentional isolation | + +### Event System (`src/Agent/Event/`, `src/Kernel.php`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| Dispatched event objects | `AgentLoop.php:184,213,245,344,401,462,816,829` | ~28 bytes/event | 8–9 events per typical run; negligible | +| TokenTrackingListener state | `Listener/TokenTrackingListener.php:??` | 4× int = 32 bytes + object header | Accumulates counts only; no per-event storage | + +### Task Tracking (`src/Task/`, `src/UI/`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| Task objects array | `TaskStore.php:14` | ~200–300 bytes/task + edges | Unbounded; no eviction | +| Edge arrays (blockedBy/blocks) | `Task.php:??` | ~8 bytes/edge × 2 (bidirectional) | Duplicate storage; stale IDs never purged | +| TUI task bar render buffer | `TuiCoreRenderer.php:643-681` | Full tree string + ANSI codes | Rebuilt every 33ms; ~10–100 KB per render depending on tree size | +| Full tree render (per call) | `TaskStore.php:174-186, 219-287` | O(n) string allocation | Called on every task tool and TUI refresh | + +### Memory Repository (`src/Session/`) + +| Hotspot | File:Line | Estimate | Notes | +|---------|-----------|----------|-------| +| `forProject()` result set | `MemoryRepository.php:65-88` | **All rows** — 10k memories = 100–500 MB | Called on every LLM round via `SessionManager::getMemories()` | +| In-memory memory array (during selection) | `SessionManager.php:276-281` | Full memory set duplicated in PHP array | Held during `MemorySelector::select()` sort | +| `usort()` temporary arrays | `MemorySelector.php:29-38` | O(N) additional zvals | Sorting overhead doubles memory footprint temporarily | +| Uncapped search result formatting | `MemorySearchTool.php:104` | Full content of each memory echoed | Limited to 20 results but each could be large | + +--- + +## Architectural Concerns + +### 1. Subagent Orchestration: Lifecycle & Retention Policy + +**Current design:** The `SubagentOrchestrator` acts as a global registry for all agents spawned in a session. It stores: +- `$agents`: Future objects keyed by agent ID +- `$stats`: SubagentStats objects keyed by agent ID +- `$pendingResults`: Background results keyed by parent ID +- `$groups`: Semaphore objects keyed by group name + +**Concern:** No automatic cleanup. The orchestrator lives for the entire session. Completed agents are never pruned unless some external code calls `pruneCompleted()`. In practice, this never happens automatically. This turns the orchestrator into an **unbounded accumulation vector**. + +**Why it matters:** In a long-running session with many subagent spawns (e.g., iterative planning, recursive decomposition), the `$agents` and `$stats` arrays grow linearly. While each entry is small, the cumulative effect over hours/days can be tens of MB. More importantly, the `$pendingResults` for background agents can hold large output strings indefinitely. + +**Secondary concern:** Group semaphores are created on first use and never destroyed. If group names are dynamic (e.g., per-task or per-context), this creates another unbounded array. + +**Pattern assessment:** The orchestrator is a **global mutable registry** with no TTL, no weak references, no size limits. This is a classic memory leak pattern. + +--- + +### 2. Memory Selection: N+1 Fetch & Repeated Scoring + +**Current design:** Every time the LLM is called (3–4 times per user turn due to tool calls), the system: +1. Calls `SessionManager::getMemories()` → `MemoryRepository::forProject()` → `SELECT * FROM memories` (no LIMIT, no filters pushed down) +2. Fetches **all** memory rows into PHP (could be thousands) +3. Scores each memory against the current query using `MemorySelector::select()` (O(N log N) sort) +4. Takes top 6 and injects into context +5. Discards the full set until next round + +**Concern:** This is an **N+1 query problem** compounded by **repeated full-table scans and in-memory sorts**. With 1000 memories, each round loads 1000 rows, scores them, and throws them away — repeated 3–4 times per turn. That's 3000–4000 full scans per user interaction. + +**Why it matters:** RAM spikes from loading all rows; CPU waste from repeated scoring; no caching. As memory count grows, latency and memory pressure grow superlinearly due to sort. + +**Pattern assessment:** Anti-pattern: **fetch-all-then-score-in-application** instead of **filter-score-limit in database**. The database is perfectly capable of sorting and limiting if scoring is expressed as an ORDER BY expression. + +--- + +### 3. Task System: In-Memory Graph with No Eviction + +**Current design:** Tasks are stored in a simple associative array (`TaskStore::$tasks`). There is: +- No persistence (tasks lost on restart) +- No eviction policy (only manual `/tasks clear` or REPL-triggered `clearTerminal()`) +- No pagination or depth limits +- Bidirectional edge storage (duplicate arrays) +- Full tree re-render on every task operation and at 30fps in TUI + +**Concern:** The task system is designed for **small-scale, short-lived sessions**. For complex multi-agent workflows generating 100+ tasks, memory and CPU usage become excessive due to: +- O(n) full scans for `roots()`, `children()`, `renderTree()` +- O(n²) worst-case rendering if many blockers per task +- 30fps re-renders = thousands of node visits/sec +- Stale edge references never cleaned up on task removal + +**Why it matters:** KosmoKrator is meant for complex coding tasks that may generate many subtasks. The current implementation does not scale. + +**Pattern assessment:** In-memory graph with linear scans is acceptable for <50 nodes but needs indexing/eviction for production-scale use. + +--- + +### 4. Event System: Minimalist & Efficient + +**Current design:** Events are small, immutable DTOs. Only 3 events are actually dispatched (carrying aggregated metrics). Dispatcher has a single listener (`TokenTrackingListener`). Dispatch is synchronous, immediate. + +**Assessment:** This is **architecturally sound**. No buffering, no async overhead, no payload duplication. The event system is a non-issue from a RAM perspective. + +**Minor note:** 5 unused event classes exist but are dead code — harmless but could be removed for cleanliness. + +--- + +### 5. Service Container: Factory Pattern Avoids Per-Agent Bootstrapping + +**Current design:** `SubagentFactory` receives shared services via constructor (ToolRegistry, ModelCatalog, etc.). It constructs a fresh `AgentLoop` per subagent but passes shared services. No per-agent service container is created. + +**Assessment:** This is **efficient**. Avoids the overhead of a full DI container per subagent. The object graph is lean. + +**Minor duplication:** `ContextPruner` and `ToolResultDeduplicator` are stateless but instantiated per `AgentLoop`. They could be shared singletons injected once into the factory. + +--- + +## Recommendations + +### Immediate (Priority 1 — Critical Leaks) + +#### R1. Auto-prune completed subagents +- **Where:** `SubagentOrchestrator` +- **What:** Call `pruneCompleted()` automatically after each agent finishes or via a periodic timer (e.g., every 10 completions). +- **Alternative:** Use `WeakReference` for `$agents` entries if parent might still await results; but explicit prune is simpler. +- **Impact:** Prevents unbounded growth of `$agents`, `$stats`, `$pendingResults`. + +#### R2. Clean up group semaphores +- **Where:** `SubagentOrchestrator` +- **What:** Track reference count per group; when the last agent in a group completes, `unset($this->groups[$group])`. +- **Impact:** Prevents semaphore accumulation from dynamic group names. + +#### R3. Auto-collect background results on parent completion +- **Where:** `SubagentOrchestrator::runAgent()` (where background mode is handled) +- **What:** When a parent agent finishes, automatically call `collectPendingResults($parentId)` to free result strings. +- **Impact:** Prevents large result strings from lingering. + +#### R4. Fix unbounded memory fetch +- **Where:** `MemoryRepository::forProject()` and `SessionManager::getMemories()` +- **What:** Replace `SELECT *` with a **LIMIT** or **cursor-based streaming** for full scans. Better: push scoring into SQL. +- **Short-term:** Add `? LIMIT 1000` to `forProject()` to cap rows; log warning if truncated. +- **Long-term:** Implement SQL-based scoring: `SELECT *, (CASE ...) AS score FROM memories WHERE … ORDER BY score DESC LIMIT 6`. +- **Impact:** Reduces per-round RAM from O(all memories) to O(selected memories). + +#### R5. Add task eviction policy +- **Where:** `TaskStore` +- **What:** Add configurable `max_tasks` (e.g., 100) with LRU eviction. When adding a task exceeds limit, remove oldest non-terminal tasks. +- **Alternative:** Auto-clear completed tasks after each tool call (not just at REPL prompt). +- **Impact:** Bounds task memory; prevents unbounded accumulation. + +#### R6. Purge stale dependency edges +- **Where:** `TaskStore::clearTerminal()` and `TaskStore::clearAll()` +- **What:** After removing tasks, walk all remaining tasks and filter `blockedBy`/`blocks` arrays to remove IDs not in `$this->tasks`. +- **Impact:** Prevents stale ID accumulation; reduces array bloat over time. + +--- + +### High Priority (Priority 2 — Performance & Scaling) + +#### R7. Debounce TUI task bar refresh +- **Where:** `TuiAnimationManager` (breathing timer) and `TuiCoreRenderer::refreshTaskBar()` +- **What:** Reduce refresh rate from 30fps (33ms) to 5–10fps (100–200ms) during breathing animation. Use dirty flag: only re-render if task tree changed. +- **Impact:** Cuts node visits/sec by 3–6×; reduces allocation/GC pressure. + +#### R8. Add database indexes for memories +- **Where:** `src/Session/Database.php` (migration/schema) +- **What:** Add composite index: + ```sql + CREATE INDEX idx_memories_lookup ON memories(project, memory_class, type, expires_at, pinned DESC, created_at DESC); + ``` +- Also add single-column indexes on `memory_class` and `type` if composite not feasible. +- **Impact:** Speeds up `forProject()` and `search()`; reduces rows scanned → less memory loaded. + +#### R9. Cache memory selection per turn +- **Where:** `ContextManager` +- **What:** Add property `$memoryCache = []` keyed by query string; populate on first `selectRelevantMemories()` call per LLM round; reuse for subsequent calls within same round. +- **Impact:** Avoids re-scoring same memories multiple times per turn (3–4× reduction). + +#### R10. Centralize edge storage (optional) +- **Where:** `TaskStore` +- **What:** Replace per-task `blockedBy`/`blocks` arrays with a central adjacency map: `$edges = ['blocks' => ['from' => ['to1', 'to2']], 'blockedBy' => …]`. Derive per-task views on demand or maintain denormalized caches. +- **Impact:** ~50% edge memory reduction; easier cleanup; but adds complexity. + +--- + +### Medium Priority (Priority 3 — Cleanup & Minor Gains) + +#### R11. Share stateless components +- **Where:** `SubagentFactory` +- **What:** Instantiate `ContextPruner` and `ToolResultDeduplicator` once as private properties; pass to each `AgentLoop`. +- **Impact:** Negligible RAM savings; reduces per-agent object count. + +#### R12. Implement auxiliary indexes for tasks +- **Where:** `TaskStore` +- **What:** Maintain `parentId => [childIds]` map updated on `add()`/`update()`. Makes `children()` O(1) and `roots()` O(1) with `parentId === null` index. +- **Impact:** Faster queries; minor RAM overhead for index arrays. + +#### R13. Remove unused event classes +- **Where:** `src/Agent/Event/` +- **What:** Delete `ResponseCompleteEvent`, `StreamChunkEvent`, `ThinkingEvent`, `ToolCallEvent`, `ToolResultEvent` if truly unused. +- **Impact:** Code cleanliness only; zero runtime effect. + +#### R14. Add full-text search (FTS5) for memories +- **Where:** Database schema +- **What:** Create virtual table `memories_fts` on `(title, content)`; rewrite `search()` to use `MATCH`. +- **Impact:** Faster text search; allows index-based lookup instead of full scan. + +--- + +### Long-term / Exploratory + +#### R15. Memory repository pagination API +- Design a `MemoryRepository::getRecent(int $limit, int $offset)` for UI browsing, separate from `forProject()` which should be for context injection only. + +#### R16. Task tree depth limiting +- Add config `max_task_depth` (e.g., 5); deeper tasks are truncated or rejected. + +#### R17. Benchmark suite completion +- Create the three benchmark scripts referenced in Phase 1 reports: + - `docs/ram-audit/benchmarks/subagent-memory.php` + - `docs/ram-audit/benchmarks/event-memory.php` (already created) + - `docs/ram-audit/benchmarks/task-memory.php` + - `docs/ram-audit/benchmarks/memory-memory.php` +- Use them to validate fixes and track regressions. + +--- + +## Implementation Roadmap (Suggested Order) + +| Phase | Targets | Expected RAM Reduction | +|-------|---------|------------------------| +| 1 | R1, R2, R3 (subagent leaks) | Stops unbounded growth; ~500 bytes/agent saved after completion | +| 2 | R4, R8 (memory fetch + indexes) | Per-round RAM from O(N) to O(1); 100–500 MB saved for 10k memories | +| 3 | R5, R6, R7 (task eviction + edge cleanup + TUI debounce) | Bounds task memory; 30fps → 5fps = 6× fewer renders | +| 4 | R9 (memory caching) | 3–4× fewer scorings per turn; CPU savings | +| 5 | R10, R11, R12, R13 (optimizations) | Minor gains; code quality | +| 6 | R14, R15, R16 (FTS, pagination, depth limit) | Scalability improvements | + +--- + +## Conclusion + +KosmoKrator's architecture is **generally sound** but suffers from two **critical unbounded-growth vectors**: +1. Subagent orchestrator retains completed agent data indefinitely. +2. Memory repository loads all memories on every LLM round. + +The **task system** also requires **bounded eviction** and **render throttling** to scale. The **event system** is exemplary. The **service container** pattern is efficient with minor duplication opportunities. + +**Immediate action** on R1–R4 will prevent RAM exhaustion in long-running or memory-intensive sessions. Subsequent phases (R5–R9) will improve performance and scalability. The benchmark suite should be completed to quantify improvements and guard against regressions. + +--- + +## Appendix: Files Analyzed + +### Subagent Orchestration +- `src/Agent/SubagentOrchestrator.php` +- `src/Agent/SubagentFactory.php` +- `src/Agent/SubagentStats.php` +- `src/Agent/SubagentPipeline.php` +- `src/Agent/SubagentPipelineFactory.php` +- `src/Agent/SubagentModelConfig.php` +- `src/Agent/StuckDetector.php` +- `src/Agent/AgentLoop.php` +- `src/Agent/ConversationHistory.php` +- `src/Agent/ContextManager.php` +- `src/Agent/ContextCompactor.php` +- `src/Agent/ContextPruner.php` +- `src/Agent/ToolResultDeduplicator.php` + +### Event System +- `src/Agent/Event/*.php` (8 events) +- `src/Kernel.php` +- `src/Provider/EventServiceProvider.php` +- `src/Agent/Listener/TokenTrackingListener.php` + +### Task Tracking +- `src/Task/Task.php` +- `src/Task/TaskStore.php` +- `src/Task/TaskStatus.php` +- `src/Task/Tool/TaskCreateTool.php` +- `src/Task/Tool/TaskGetTool.php` +- `src/Task/Tool/TaskListTool.php` +- `src/Task/Tool/TaskUpdateTool.php` +- `src/UI/Tui/TuiCoreRenderer.php` +- `src/UI/Tui/TuiAnimationManager.php` +- `src/UI/Ansi/AnsiCoreRenderer.php` +- `src/Command/AgentCommand.php` +- `src/Agent/ContextManager.php` + +### Memory Repository +- `src/Session/MemoryRepository.php` +- `src/Session/SessionManager.php` +- `src/Session/Tool/MemorySaveTool.php` +- `src/Session/Tool/MemorySearchTool.php` +- `src/Agent/MemorySelector.php` +- `src/Agent/MemoryInjector.php` +- `src/Session/SettingsRepository.php` +- `src/Session/Database.php` + +--- + +**Report generated from Phase 1 agent findings.** +**Next step:** Implement Priority 1 recommendations and validate with benchmark suite. diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-core-agent.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-core-agent.md new file mode 100644 index 0000000..c283b1a --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-core-agent.md @@ -0,0 +1,496 @@ +# Core Agent Memory Efficiency Synthesis + +**Report Date:** 2026-04-03 +**Agents Consulted:** agent-loop-lifecycle, context-memory-audit, stuck-detection-memory, prompt-engineering-overhead +**Scope:** RAM efficiency of core agent loop, context management, and prompt construction + +--- + +## Executive Summary + +KosmoKrator's core agent loop demonstrates **fundamentally sound memory management** with multiple defensive layers against unbounded growth. The primary memory accumulator — `ConversationHistory::$messages` — grows monotonically but is bounded by three reclamation mechanisms (compaction, pruning, deduplication) that trigger automatically based on context window pressure. + +**Critical Finding:** While no memory leaks exist, **prompt construction suffers from systematic caching omissions** that cause redundant work and string bloat on every turn. The most severe inefficiencies are: + +1. **Instruction file re-reading** every session (3–5 disk reads, no cache) +2. **Tool schema regeneration** on every subagent spawn (~30–50 object allocations repeated) +3. **Git shell calls** repeated per-turn (`gitRoot()`, `gitBranch()`) +4. **Task tree rendering** with no visible truncation limit + +These issues are **independent of conversation history size** and therefore apply constant overhead even to short sessions. + +**Severity Distribution:** +- 🔴 Critical: 2 issues (instruction caching, tool schema caching) +- 🟠 High: 2 issues (git shell calls, task tree unbounded) +- 🟡 Medium: 4 issues (prompt splitting, memory formatting, environment detection, string concatenation) +- 🟢 Low: 2 issues (suboptimal thresholds, cleanup timing) + +--- + +## Findings (Severity-Rated) + +### 🔴 Critical + +#### CRIT-1: Instruction Files Re-Read Every Session (No Cache) +**Files:** `src/Agent/InstructionLoader.php:26-85` + +**What:** `InstructionLoader::gather()` reads up to 5 files from disk on every session start: +- `~/.kosmokrator/instructions.md` +- `{git_root}/KOSMOKRATOR.md` +- `{git_root}/.kosmokrator/instructions.md` +- `{git_root}/AGENTS.md` +- `{cwd}/KOSMOKRATOR.md` + +**Impact:** +- **Memory:** Each file loaded as a string kept for session lifetime. Large `AGENTS.md` (common in monorepos) can be 10–100 KB. +- **I/O:** 3–5 `file_get_contents()` calls per session; `gitRoot()` uses `shell_exec()` (line 102). +- **Frequency:** Once per session, but sessions are frequent in REPL usage. + +**Why it's critical:** This is **pure waste** — instruction files change rarely (user edits or git commits). No technical reason exists to re-read them. Static property cache would eliminate all I/O and string allocation. + +**Evidence:** `readFile()` (line 87) has no memoization; `gather()` calls it sequentially every time. + +--- + +#### CRIT-2: Tool Schema Regenerated on Every Subagent Spawn +**Files:** `src/Tool/ToolRegistry.php:67-103`, `src/Agent/SubagentFactory.php:105` + +**What:** `ToolRegistry::toPrismTools()` converts each tool to a `PrismTool` object with full parameter schema on every call. Called: +- Once at main `AgentLoop` setup (`AgentSessionBuilder:133`) +- **Once per subagent** (`SubagentFactory:105`) — subagents spawn frequently + +**Impact:** +- **Memory:** ~30–50 tools × ~10 parameters each = 300–500 parameter objects per call. Each `PrismTool` + parameter objects ≈ 200–500 bytes → **60–250 KB per subagent** wasted. +- **CPU:** Object allocation + method calls (`withStringParameter()`, etc.) repeated unnecessarily. +- **Frequency:** Every subagent creation (default concurrency 10, depth 3 → potentially 30+ subagents per session). + +**Why it's critical:** Tool schemas are **static metadata** — they never change at runtime. Rebuilding them is pure allocation bloat. Subagent memory isolation is good, but this duplicates static data across all subagents. + +**Evidence:** `toPrismTool()` (lines 76-103) creates fresh `PrismTool` and calls `->parameters()` on tool to rebuild schema arrays each time. + +--- + +### 🟠 High + +#### HIGH-1: Repeated Git Shell Calls Every Turn +**Files:** `src/Agent/ProtectedContextBuilder.php:24-50`, `src/Agent/InstructionLoader.php:102` + +**What:** `ProtectedContextBuilder::build()` calls: +- `InstructionLoader::gitRoot()` — `shell_exec('git rev-parse --show-toplevel')` +- `InstructionLoader::gitBranch()` — `shell_exec('git branch --show-current')` + +Every time protected context is built, which is **every turn** (via `ContextManager::buildSystemPrompt()`). + +**Impact:** +- **Memory:** Each `shell_exec()` returns a string (path or branch name, ~20–100 bytes). Strings are short-lived but allocated every turn. +- **I/O:** Two subprocess calls per turn. At 100 turns → 200 shell executions. Significant overhead. +- **Latency:** Each call takes ~1–5 ms; cumulative delay noticeable. + +**Why it's high:** Git state changes infrequently. Caching with `static ?string` (per-request) or session-scoped property would eliminate all repeated calls. No invalidation needed except on explicit git events (not applicable in agent runtime). + +**Evidence:** `gitRoot()` (line 102) and `gitBranch()` (line 57) have no caching; called unconditionally in `build()`. + +--- + +#### HIGH-2: Task Tree Rendering Unbounded +**Files:** `src/Agent/TaskStore.php` (not fully inspected, but referenced in `ContextManager:270`) + +**What:** `ContextManager::buildSystemPrompt()` appends `$this->taskStore->renderTree()` to system prompt every turn. No truncation limit observed in codebase. + +**Impact:** +- **Memory:** Task tree grows linearly with number of tasks created. Each task adds ~50–200 chars to rendered string. +- **Prompt bloat:** Unbounded task list consumes context window, forcing earlier compaction. +- **Frequency:** Every turn. + +**Why it's high:** Long-running sessions with many decomposed tasks could see task tree reach **tens of KB**. This directly competes with conversation history for context space. Should have hard limit (e.g., last 50 tasks, or 10 KB max). + +**Evidence:** `renderTree()` call at `ContextManager:270` with no preceding `substr()` or count check. + +--- + +### 🟡 Medium + +#### MED-1: PromptFrameBuilder Re-Splits Every Call (No Cache) +**Files:** `src/LLM/PromptFrameBuilder.php:31-77` + +**What:** `splitSystemPrompt($prompt)` uses `strpos()` + `substr()` to separate stable/volatile portions. Called downstream by providers that support prompt caching. No result caching. + +**Impact:** +- **Memory:** `substr()` creates new string copies (O(n) duplication). For a 5 KB prompt, two allocations per turn. +- **CPU:** String scanning repeated every turn. +- **Frequency:** Every LLM call (every turn). + +**Why it's medium:** Prompt size is modest (< 10 KB typical), so memory duplication is small (~10 KB/turn). But it's unnecessary work. Caching split result per unique prompt would eliminate it. + +**Evidence:** Static method, no static cache property. `substr()` at lines 42–43, 66 creates new strings. + +--- + +#### MED-2: MemoryInjector::format() Rebuilds Every Turn +**Files:** `src/Agent/MemoryInjector.php:17-109` + +**What:** `format()` groups memories by type, truncates each, and `implode()`s. Called every turn in `ContextManager::buildSystemPrompt()`. + +**Impact:** +- **Memory:** Creates intermediate arrays (`$sections`, `$lines`) and concatenated string (~1–5 KB typical). +- **CPU:** Looping through memories, truncating, grouping — repeated work. +- **Frequency:** Every turn. + +**Why it's medium:** Memory selection (`SessionManager::selectRelevantMemories`) already queries DB each turn, so some reformatting is expected. But formatted blocks could be cached keyed by memory ID set + truncation parameters. Gains modest but free. + +**Evidence:** No caching; `implode("\n\n", $sections)` at line 108 creates new string every call. + +--- + +#### MED-3: EnvironmentContext Gathered Once Per Session (No Cross-Session Cache) +**Files:** `src/Agent/EnvironmentContext.php:15-48` + +**What:** `gather()` runs `file_exists()` for 10+ project types, reads `composer.json`/`package.json`, gets OS/shell/date. Called once at session start (`AgentSessionBuilder:84-86`). + +**Impact:** +- **Memory:** Result string ~200–500 bytes kept for session lifetime. +- **I/O:** Multiple filesystem checks and JSON parsing at session start. +- **Frequency:** Once per session. + +**Why it's medium:** Session start is acceptable place, but environment rarely changes during a session. Could be cached globally (static) to skip filesystem checks across sessions. Benefit small but zero cost. + +**Evidence:** No static cache; `file_exists()` calls at lines 18–28 every invocation. + +--- + +#### MED-4: String Concatenation in Loops (ContextCompactor) +**Files:** `src/Agent/ContextCompactor.php:253-294` + +**What:** `formatMessages()` builds `$lines` array by looping through messages, then `implode()`s. Capped at 100K chars (`MAX_FORMAT_CHARS`), but still allocates intermediate array. + +**Impact:** +- **Memory:** Array of strings + final concatenated string. Peak ~100 KB during compaction. +- **Frequency:** Only during compaction (infrequent). + +**Why it's medium:** Compaction already expensive (2 LLM calls). This is a small fraction of total compaction memory spike. Could use `implode()` with generator or `StringBuilder` pattern, but not urgent. + +**Evidence:** `$lines[] = ...` loop (lines 253–294) then `implode("\n", $lines)` at line 296. + +--- + +### 🟢 Low + +#### LOW-1: Compaction Threshold May Be Too High +**Files:** `src/Agent/ContextCompactor.php:17`, `src/Agent/ContextBudget.php` + +**What:** Default `compact_threshold = 60%` of context window. For a 32K context, compaction triggers at ~19K tokens. With typical 1–2 KB messages, that's ~10–20 turns between compactions. + +**Impact:** +- **Memory:** History grows larger before compaction, increasing peak memory. +- **Frequency:** Fewer compactions = less LLM cost but more RAM. + +**Why it's low:** Configurable via settings. Default is a conservative trade-off. Could be lowered to 50% or made adaptive, but not a bug. + +**Evidence:** Default at line 17; used in `shouldCompactHistory()` (`ContextManager:274-279`). + +--- + +#### LOW-2: Subagent Cleanup Only on Parent Turn +**Files:** `src/Agent/SubagentOrchestrator.php:245-258`, `src/Agent/AgentLoop.php:552-557` + +**What:** `pruneCompleted()` removes completed subagents from orchestrator arrays. Called only when parent agent processes pending results (once per parent turn). + +**Impact:** +- **Memory:** Completed subagent objects ( histories, tool executors, etc.) remain in `$agents`, `$stats`, `$cancellations`, `$globalLocks` until parent's next turn. +- **Window:** Typically one turn delay (~seconds). With 10 concurrent subagents, delay is minor. + +**Why it's low:** Cleanup is prompt (next turn). No observed leaks. Could add periodic timer-based cleanup for long-running headless parents, but benefit marginal. + +**Evidence:** `pruneCompleted()` called only in `injectPendingBackgroundResults()` (`AgentLoop:552-557`). + +--- + +## Memory Hotspots (file:line + estimates) + +### Primary Accumulator + +| Hotspot | File:Line | Accumulation | Estimated Size/Turn | Notes | +|---------|-----------|--------------|---------------------|-------| +| `ConversationHistory::$messages` | `src/Agent/ConversationHistory.php:19` | **Monotonic** | 100–500 bytes per message | Primary growth vector. Each turn adds 2–3 messages (user + assistant + tool results). | +| `SubagentOrchestrator::$agents` | `src/Agent/SubagentOrchestrator.php:245` | **Concurrent** | ~1 KB per active subagent | Holds `Future` + `SubagentStats` until parent prunes. | +| `SubagentOrchestrator::$stats` | same | **Concurrent** | ~500 bytes per subagent | Same lifetime as `$agents`. | +| `SubagentOrchestrator::$cancellations` | same | **Concurrent** | ~100 bytes per subagent | Cleared in `finally` block. | +| `SubagentOrchestrator::$globalLocks` | same | **Concurrent** | ~100 bytes per subagent | Released & unset when subagent finishes. | + +### Prompt Construction Bloat (Per-Turn) + +| Hotspot | File:Line | Allocation | Estimated Size | Frequency | Cache? | +|---------|-----------|------------|----------------|-----------|--------| +| `InstructionLoader::gather()` | `src/Agent/InstructionLoader.php:26-85` | 3–5 file reads + string concat | 2–50 KB (depends on AGENTS.md) | Once/session | ❌ | +| `ToolRegistry::toPrismTools()` | `src/Tool/ToolRegistry.php:67-103` | 300–500 objects (PrismTool + params) | 60–250 KB | Per subagent spawn | ❌ | +| `ProtectedContextBuilder::build()` (git calls) | `src/Agent/ProtectedContextBuilder.php:24-50` | 2 `shell_exec()` strings | ~200 bytes | Every turn | ❌ | +| `TaskStore::renderTree()` | `src/Agent/TaskStore.php` (ref: `ContextManager:270`) | Recursive string build | ~1–10 KB (unbounded) | Every turn | ❌ | +| `PromptFrameBuilder::splitSystemPrompt()` | `src/LLM/PromptFrameBuilder.php:31-77` | 2 `substr()` copies | ~5–10 KB | Every LLM call | ❌ | +| `MemoryInjector::format()` | `src/Agent/MemoryInjector.php:17-109` | Array + `implode` | ~1–5 KB | Every turn | ❌ | +| `EnvironmentContext::gather()` | `src/Agent/EnvironmentContext.php:15-48` | FS checks + JSON parse | ~200–500 bytes | Once/session | ❌ | + +### Temporary Spikes (Transient) + +| Hotspot | File:Line | Spike Size | Duration | Reclaimed | +|---------|-----------|------------|----------|-----------| +| Compaction formatted transcript | `src/Agent/ContextCompactor.php:233-275` | Up to 100 KB string | During 2 LLM calls (seconds) | Yes (after apply) | +| CompactionPlan object | `src/Agent/ContextCompactor.php:104-160` | ~10–50 KB (new Message objects) | Brief | Yes | +| Deduplication indexes | `src/Agent/ToolResultDeduplicator.php:28-108` | O(n) where n = tool result messages | Per tool round | Yes | +| Pruning candidates array | `src/Agent/ContextPruner.php:37-104` | O(n) | Per prune | Yes | + +--- + +## Convergence Issues + +### Issue 1: Compaction Threshold vs. Prompt Bloat +**Interaction:** The `context.compact_threshold` (default 60%) determines when history compaction triggers. However, **prompt construction bloat** (unbounded task tree, no instruction caching) inflates the **base system prompt size**, reducing effective context window for conversation history. This causes **earlier compaction triggers** than necessary, increasing LLM call frequency. + +**Root cause:** Base prompt is rebuilt every turn with redundant data. A 50 KB base prompt (large AGENTS.md + unbounded tasks) leaves less room for history, causing compaction at ~15K tokens instead of ~19K. + +**Impact:** More frequent compactions → more LLM calls → higher cost + temporary memory spikes. + +--- + +### Issue 2: Subagent Memory Multiplication via Tool Schema Duplication +**Interaction:** Each subagent gets its own `AgentLoop` with fresh `ToolRegistry::toPrismTools()` call. With 10 concurrent subagents and depth 3, **tool schema objects are duplicated 30+ times** in memory simultaneously. + +**Root cause:** Tool schemas are static metadata but treated as per-instance data. No shared cache in `ToolRegistry`. + +**Impact:** 60–250 KB × 30 = **1.8–7.5 MB** of duplicated schema objects in memory during peak concurrency. Not catastrophic but wasteful. + +--- + +### Issue 3: Git Shell Calls Accumulate Latency, Not Memory +**Interaction:** While git calls don't cause memory leaks, their **per-turn execution** adds cumulative latency. In long sessions (100+ turns), 200 shell calls can add **200–1000 ms** of overhead. This is a **performance convergence issue** — the design assumes git state is needed every turn, but it's quasi-static. + +**Root cause:** No caching of git root/branch. `ProtectedContextBuilder` rebuilds every turn. + +**Impact:** Degraded user experience; perceived slowness. + +--- + +### Issue 4: Task Tree Growth Accelerates Context Pressure +**Interaction:** `TaskStore::renderTree()` output grows with each decomposed task. Unbounded growth means: +- System prompt size increases over session lifetime +- Context window fills faster → more frequent compaction +- Compaction replaces older history, but task tree itself is **never pruned** + +**Root cause:** No truncation logic for task tree rendering. All tasks forever included. + +**Impact:** Long sessions with many subtasks see **progressive prompt bloat** that never recedes, even after history compaction. Eventually dominates context window. + +--- + +## Recommendations + +### Priority 1 (Immediate — High Impact, Low Effort) + +#### REC-1: Cache InstructionLoader::gather() Result +**Target:** `src/Agent/InstructionLoader.php:26-85` + +**Change:** Add `static ?string $cached = null` to `gather()`. On first call, read files and store. Subsequent calls return cached string. + +**Impact:** +- Eliminates 3–5 disk reads per session +- Saves 2–50 KB string allocations per session +- Zero risk — instruction files rarely change during runtime + +**Effort:** 5 minutes. Add 2 lines. + +--- + +#### REC-2: Cache ToolRegistry::toPrismTools() Result +**Target:** `src/Tool/ToolRegistry.php:67-103` + +**Change:** Add private `?array $cachedPrismTools = null`. In `toPrismTools()`, check cache; if null, build and store. Invalidate only when `register()`/`unregister()` called (rare). + +**Impact:** +- Saves 60–250 KB per subagent spawn +- With 30 subagents/session → **1.8–7.5 MB saved** +- Reduces object allocation churn + +**Effort:** 10 minutes. Add cache property + check. + +--- + +#### REC-3: Cache Git Shell Calls +**Target:** `src/Agent/InstructionLoader.php:102` (gitRoot), `src/Agent/ProtectedContextBuilder.php:57` (gitBranch) + +**Change:** Add `static ?string $cachedRoot` and `static ?string $cachedBranch` to respective methods. Cache result for lifetime of request. + +**Impact:** +- Eliminates 2 shell execs per turn +- At 100 turns → 200 fewer subprocesses +- Saves ~200 bytes × 100 = 20 KB (small) but latency gain significant + +**Effort:** 5 minutes per method. + +--- + +#### REC-4: Truncate Task Tree Rendering +**Target:** `src/Agent/TaskStore::renderTree()` (need to locate file) + +**Change:** Add configurable limit: e.g., `max_tasks: 50` or `max_chars: 10240`. Truncate oldest tasks first. Return `"... truncated N tasks"` note. + +**Impact:** +- Bounds system prompt growth from task tree +- Prevents unbounded context consumption +- Forces user to `/compact` or complete tasks to make room + +**Effort:** 15–30 minutes (need to inspect `TaskStore` implementation). + +--- + +### Priority 2 (Medium-Term — Moderate Impact) + +#### REC-5: Implement PromptFrameBuilder Split Cache +**Target:** `src/LLM/PromptFrameBuilder.php:31-77` + +**Change:** Add static `array $cache = []` keyed by `md5($prompt)`. Store `['prefix' => ..., 'volatile' => ...]`. Reuse if prompt unchanged. + +**Impact:** +- Saves 2 `substr()` allocations per LLM call +- Modest memory savings (~5–10 KB/turn) +- Reduces CPU for string ops + +**Effort:** 10 minutes. + +--- + +#### REC-6: Cache EnvironmentContext::gather() +**Target:** `src/Agent/EnvironmentContext.php:15-48` + +**Change:** Convert `gather()` to instance method with private `?string $cached = null`. Build once per `EnvironmentContext` object (created once per session already). Already per-session, but still avoids repeated FS checks within same gather call if called multiple times. + +**Impact:** Negligible (already once/session), but cleans up pattern. + +**Effort:** 5 minutes. + +--- + +#### REC-7: Batch Memory Extraction During Compaction +**Target:** `src/Agent/ContextCompactor.php:189-224` + +**Change:** Track last extraction turn/timestamp. Skip extraction if recent (e.g., within 5 turns or < 100 new messages). Or batch: only extract if `count($history->newMessagesSinceLastExtraction) > 20`. + +**Impact:** +- Reduces compaction LLM calls from 2 → 1 in many cases +- Saves cost + temporary memory spike from extraction response +- Minor risk of missing some memories, but memories are cumulative and idempotent + +**Effort:** 20–30 minutes (need to track state in `ContextCompactor`). + +--- + +#### REC-8: Periodic Subagent Cleanup for Headless Agents +**Target:** `src/Agent/SubagentOrchestrator.php:245-258` + +**Change:** Add timer-based cleanup (e.g., every 30 seconds) in addition to on-demand in `injectPendingBackgroundResults()`. Use `EventLoop` repeat callback. + +**Impact:** +- Frees subagent memory sooner in long-running headless sessions where parent may not call `injectPending...` frequently +- Minor improvement; current cleanup is already timely for interactive use + +**Effort:** 15 minutes. + +--- + +### Priority 3 (Long-Term — Architectural) + +#### REC-9: Implement Shared Tool Schema Registry +**Target:** `src/Tool/ToolRegistry.php` + tool classes + +**Change:** Each tool class defines `static ?PrismTool $schemaCache`. First call to `toPrismTool()` builds and stores. `ToolRegistry::toPrismTools()` returns these shared instances (or clones if mutability concerns). + +**Impact:** +- Eliminates all tool schema duplication across subagents +- Could save **5–10 MB** in sessions with many subagents +- Clean separation of static metadata + +**Effort:** 1–2 hours (need to ensure PrismTool objects are immutable or cloned). + +--- + +#### REC-10: Incremental Prompt Assembly Cache +**Target:** `src/Agent/ContextManager.php:257-289` (buildSystemPrompt) + +**Change:** Introduce `PromptCache` object that stores: +- Stable base prompt (instructions + environment) +- Tool schemas (shared reference) +- Mode suffix (constant) +- Only rebuild volatile parts (memories, task tree) each turn + +**Impact:** +- Reduces per-turn string allocations from ~10–50 KB to ~2–5 KB +- Eliminates repeated `implode()` of static parts +- Significant for long sessions + +**Effort:** 2–3 hours (design + implementation). + +--- + +#### REC-11: Task Tree Segmentation & Archival +**Target:** `src/Agent/TaskStore.php` + +**Change:** Split tasks into "active" (last N) and "archived" (summarized). Render only active. Archive old tasks via compaction-like process. + +**Impact:** +- Prevents unbounded task tree growth +- Keeps system prompt size bounded +- Aligns with history compaction philosophy + +**Effort:** 2–3 hours. + +--- + +#### REC-12: Benchmark Suite Activation +**Target:** `docs/ram-audit/benchmarks/agent-loop-memory.php` (provided in agent-loop-lifecycle) + +**Action:** Create and run benchmark to establish baseline memory growth curves. Test with: +- 100 turns, 3 tools/turn, compaction on/off +- 500 turns, 5 tools/turn +- 1000 turns, 0 tools (pure chat) + +**Impact:** Quantifies actual memory behavior; validates fixes. + +**Effort:** 10 minutes to create file + run benchmarks. + +--- + +## Summary Table + +| Category | Issue | Severity | Est. Savings (per session) | Effort | Priority | +|----------|-------|----------|----------------------------|--------|----------| +| Prompt bloat | Instruction file caching | 🔴 Critical | 2–50 KB + I/O | 5 min | P1 | +| Prompt bloat | Tool schema caching | 🔴 Critical | 1.8–7.5 MB | 10 min | P1 | +| Prompt bloat | Git shell call caching | 🟠 High | 200 ms latency | 5 min | P1 | +| Prompt bloat | Task tree truncation | 🟠 High | 1–10 KB/turn bounded | 30 min | P1 | +| Prompt bloat | Prompt split cache | 🟡 Medium | 5–10 KB/turn | 10 min | P2 | +| Prompt bloat | Memory formatter cache | 🟡 Medium | 1–3 KB/turn | 10 min | P2 | +| Compaction | Batch memory extraction | 🟡 Medium | 1 LLM call / 5 turns | 30 min | P2 | +| Subagents | Periodic cleanup | 🟢 Low | ~1 KB/subagent sooner | 15 min | P2 | +| Architecture | Shared tool schemas | 🟢 Long-term | 5–10 MB total | 2 hrs | P3 | +| Architecture | Incremental prompt cache | 🟢 Long-term | 5–20 KB/turn | 3 hrs | P3 | +| Architecture | Task segmentation | 🟢 Long-term | Bounded prompt | 3 hrs | P3 | + +**Total immediate win (P1):** ~2–8 MB saved + significant latency reduction + bounded prompt growth. **Effort: ~1 hour.** + +--- + +## Conclusion + +KosmoKrator's memory management is **structurally sound** — history growth is bounded by automatic compaction/pruning, subagents are isolated, and no leaks exist. However, **prompt construction inefficiencies** represent a **systematic, repeatable waste** of memory and CPU that affects every session regardless of size. + +The four critical/high issues (instruction caching, tool schema caching, git calls, task tree truncation) are **low-hanging fruit** offering immediate 2–8 MB savings per session with < 1 hour total implementation time. These should be addressed in the next sprint. + +Longer-term architectural improvements (shared schemas, incremental prompt cache) offer further gains but require more careful design. + +**Next steps:** +1. Implement Priority 1 recommendations (REC-1 through REC-4) +2. Create and run benchmark suite to quantify baseline and improvement +3. Monitor production memory logs; consider lowering `compact_threshold` to 50% after prompt bloat fixes +4. Explore Priority 2 if memory pressure persists in long-running sessions + +--- + +*Report generated from synthesis of agent-loop-lifecycle, context-memory-audit, stuck-detection-memory, and prompt-engineering-overhead Phase 1 agents.* diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-io-performance.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-io-performance.md new file mode 100644 index 0000000..8e3d90f --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-io-performance.md @@ -0,0 +1,222 @@ +# I/O Memory Efficiency Report + +## Executive Summary + +**Overall Rating: GOOD** with **3 moderate-risk** and **2 low-risk** memory concerns identified. + +The system demonstrates strong engineering for memory efficiency: streaming I/O, constant-memory algorithms, and bounded result sets. The primary risks are **cache unboundedness** and **orphaned background result accumulation** under failure scenarios. + +**Key Findings:** +- FileReadTool maintains an unbounded read cache that grows across process lifetime +- BashTool buffers entire command output in memory before truncation +- Subagent background results can orphan if parent crashes +- GlobTool and GrepTool use eager evaluation with intermediate array creation +- Shell session management is sound with proper idle cleanup +- OutputTruncator uses spill-to-disk strategy effectively (but post-facto) + +--- + +## Findings (Severity) + +### Medium Risk + +#### F1: FileReadTool Unbounded Cache +- **File:** `src/Tool/Coding/FileReadTool.php:21,70-72,103-104` +- **Issue:** `$readCache` array grows unbounded across process lifetime; no eviction policy +- **Impact:** Hundreds of MB in long-running sessions with many file reads (e.g., codebase exploration) +- **Current state:** Cache stores only boolean flags, minimizing per-entry footprint; FileReadTool is a singleton in ToolRegistry + +#### F2: BashTool Full Output Buffering +- **File:** `src/Tool/Coding/BashTool.php:96-108` +- **Issue:** Stdout and stderr fully buffered in memory via `buffer()` before OutputTruncator runs +- **Impact:** Commands producing >100 MB output will spike RAM; no streaming to disk or early truncation +- **Current mitigation:** OutputTruncator caps at 2000 lines / 50 KB but runs **after** tool returns (ToolExecutor line 300-302) + +#### S1: Subagent PendingResults Orphaned +- **File:** `src/Agent/SubagentOrchestrator.php:34,420` +- **Issue:** `$pendingResults[parentId]` never cleared if parent agent crashes or exits without calling `collectPendingResults()` +- **Impact:** Results (strings, potentially KB–MB each) accumulate per background subagent over time +- **Current state:** Documented in `docs/memory-leak-audit.md` as known issue; `pruneCompleted()` does not touch `$pendingResults` + +#### S3: Failed Agents Not Pruned +- **File:** `src/Agent/SubagentOrchestrator.php:394-399` +- **Issue:** `pruneCompleted()` only removes `'done'` and `'cancelled'` agents; `'failed'` agents remain forever +- **Impact:** `Future` objects hold closure references → entire agent context retained → potential MB-scale leaks + +#### G1: GlobTool Intermediate Array Buildup +- **File:** `src/Tool/Coding/GlobTool.php:93-99` +- **Issue:** `array_merge()` inside recursion loops creates O(n²) intermediate arrays for deep directory trees +- **Impact:** Temporary memory spikes during glob operations on nested structures; 10k files in nested tree → ~10 MB temporary +- **Current mitigation:** Result set capped at 200 files after full sort/deduplication (lines 59-62) + +#### G2: GrepTool Pre-Truncation Buffering +- **File:** `src/Tool/Coding/GrepTool.php:68` +- **Issue:** `buffer($process->getStdout())` reads entire output into string before applying `--max-count=50` or 100-line cap +- **Impact:** Large result sets (10k+ matches) held fully in memory despite output limits; 10k matches × 200 bytes = 2 MB +- **Current mitigation:** ripgrep's `--max-count=50` limits per-file matches; final `array_slice` caps at 100 lines (line 92) + +### Low Risk + +#### F3: FileEditTool Temp File Leaks +- **File:** `src/Tool/Coding/FileEditTool.php:179` +- **Issue:** Orphaned `*.tmp.` files if process crashes mid-write; no shutdown cleanup registered +- **Impact:** Filesystem accumulation, not RAM; requires manual cleanup or TTL-based reaping + +#### S2: Subagent Groups Semaphore Map Never Cleared +- **File:** `src/Agent/SubagentOrchestrator.php:28,469` +- **Issue:** `$groups` array accumulates `LocalSemaphore` objects per unique group name; never removed even after group empties +- **Impact:** Minor memory growth per unique group name (~few hundred bytes each); problematic if group names are dynamic (e.g., per-task IDs) + +#### G3: GlobTool Eager Sort Before Cap +- **File:** `src/Tool/Coding/GlobTool.php:59-62` +- **Issue:** `sort()` and `array_unique()` applied to full result set before 200-file cap +- **Impact:** Wasted CPU/memory sorting thousands of paths only to discard most; temporary O(n) overhead + +#### G4: GlobTool Unlimited Recursion Depth +- **File:** `src/Tool/Coding/GlobTool.php:globStar()` +- **Issue:** No depth limit; symlink loops could cause infinite recursion +- **Impact:** Potential hang or memory exhaustion in pathological directory structures + +#### G5: No Pattern Compilation Caching +- **Files:** `src/Tool/Coding/GlobTool.php`, `src/Tool/Coding/GrepTool.php` +- **Issue:** Patterns re-compiled on every invocation; no shared cache +- **Impact:** Minor CPU overhead; no direct memory impact + +--- + +## Memory Hotspots (file:line + estimates) + +### High-Impact Hotspots + +| File:Line | Component | Memory Profile | Estimate | +|-----------|-----------|----------------|----------| +| `FileReadTool.php:21` | `$readCache` array | Unbounded growth; one boolean entry per distinct `(path,mtim,offset,limit)` | 1k entries ≈ 10 KB; 100k entries ≈ 1 MB; 1M entries ≈ 10 MB | +| `BashTool.php:96-107` | `$buf` accumulation | O(command output size) before truncation; repeated concatenation in progress callback | 100 MB output → 100 MB RAM spike | +| `SubagentOrchestrator.php:34` | `$pendingResults` | Accumulates per-parent if not collected; each result string KB–MB | 100 background agents × 100 KB = 10 MB per orphaned parent | +| `GlobTool.php:93-99` | Recursion intermediates | O(n²) temporary arrays during deep `array_merge()` loops | 10k files in nested tree → ~10 MB temporary | +| `GrepTool.php:68` | `buffer()` output | Full stdout before any limit applied | 10k matches × 200 bytes = 2 MB buffer | + +### Moderate-Impact Hotspots + +| File:Line | Component | Memory Profile | Estimate | +|-----------|-----------|----------------|----------| +| `FileEditTool.php:136-183` | Temp file streaming | 64 KB chunks via `stream_copy_to_stream()`; constant memory | Negligible | +| `ShellSession.php:18-137` | `$buffer` string | Grows monotonically per session; drained via `readUnread()` but retained until session kill | 1 MB per active long-running session | +| `SubagentOrchestrator.php:28` | `$groups` map | One `LocalSemaphore` object per unique group name (~few hundred bytes) | 100 groups × 500 bytes = 50 KB | + +--- + +## I/O Bottlenecks + +### 1. Tool Execution Buffering + +**BashTool** (`src/Tool/Coding/BashTool.php:96-108`) and **GrepTool** (`src/Tool/Coding/GrepTool.php:68`) both use `Amp\Process\Process` with `buffer()` to read entire stdout/stderr into memory before any processing. This creates a **synchronization point** where all output must be held in RAM. + +- **Current caps:** OutputTruncator (2000 lines / 50 KB) runs post-facto in `ToolExecutor.php:300-302` +- **Bottleneck:** Large outputs (logs, dumps, binary data) cause RAM spikes before truncation +- **Severity:** Medium — affects any tool executing external commands + +### 2. Large File Handling + +**FileReadTool** (`src/Tool/Coding/FileReadTool.php:75-82,117-149`) implements smart thresholding: +- **< 10 MB:** `file()` loads entire file → O(file size) memory (acceptable for intended use) +- **≥ 10 MB:** `fopen()` + `fgets()` loop → O(64 KB buffer + line) constant memory ✓ + +**FileWriteTool** (`src/Tool/Coding/FileWriteTool.php:57`) holds entire content string in memory once — acceptable for <10 MB writes. + +**FileEditTool** (`src/Tool/Coding/FileEditTool.php:81-183`) uses 64 KB chunks and atomic `rename()` — excellent constant-memory algorithm ✓ + +### 3. Shell Session Lifecycle + +**ShellSession** (`src/Tool/Coding/ShellSession.php:18-137`) buffers all output in `$buffer` string with no eviction. However: + +- **Cleanup:** `ShellSessionManager::cleanupIdleSessions()` (line 49) removes sessions where `isDrained()` (exit + no unread output) after 300s TTL +- **Assessment:** ✅ Bounded by idle timeout; no unbounded accumulation +- **Caveat:** Long-running sessions with continuous output can accumulate MBs until drained or killed + +### 4. File Search Memory (Glob/Grep) + +**GlobTool** (`src/Tool/Coding/GlobTool.php:52-101`): +- Uses native `glob()` (eager array, not iterator) +- Custom `globStar()` recursion with `array_merge()` creates intermediate arrays +- Full `sort()` + `array_unique()` before 200-file cap +- **Bottleneck:** O(n) temporary memory for full match set; O(n²) intermediates in deep recursion + +**GrepTool** (`src/Tool/Coding/GrepTool.php:73-78`): +- Same eager `buffer()` pattern as BashTool +- ripgrep `--max-count=50` and final 100-line slice are **process-level** and **post-processing** limits respectively +- **Bottleneck:** Entire output held in memory before limits applied + +--- + +## Recommendations + +### Priority 1 (Address in next sprint) + +1. **FileReadTool Cache Eviction (F1)** + - Add LRU eviction with configurable max entries (e.g., 1000) + - Or add TTL (e.g., 1 hour) + - Consider per-AgentContext cache instead of singleton + - **Files:** `src/Tool/Coding/FileReadTool.php:21` + +2. **BashTool/GrepTool Streaming Output (F2, G2)** + - Stream stdout/stderr directly to `OutputTruncator` during read loop, applying line/byte limits incrementally + - Or add `stream_to_file` parameter for outputs >1 MB + - Enforce per-command output limit with early process kill + - **Files:** `src/Tool/Coding/BashTool.php:96-108`, `src/Tool/Coding/GrepTool.php:68` + +3. **SubagentOrchestrator PendingResults Cleanup (S1)** + - Add TTL (e.g., 1 hour) to `$pendingResults` entries with timestamp + - Or prune `$pendingResults[parentId]` when all agents for that parent reach terminal state + - **Files:** `src/Agent/SubagentOrchestrator.php:34,420` + +4. **Include Failed Agents in Pruning (S3)** + - Add `'failed'` to `$terminalStates` in `pruneCompleted()` + - **Files:** `src/Agent/SubagentOrchestrator.php:394` + +### Priority 2 (Next quarter) + +5. **GlobTool Optimization (G1, G3, G4)** + - Apply 200-file cap earlier in recursion to avoid building full array + - Replace `array_merge()` with generator-based yielding to eliminate intermediate arrays + - Add recursion depth limit (e.g., 20) to prevent symlink loops + - **Files:** `src/Tool/Coding/GlobTool.php:52-101` + +6. **GrepTool Streaming (G2)** + - Process ripgrep/grep output line-by-line as it arrives, writing directly to OutputTruncator stream + - Avoid full `buffer()` call; use `onRead()` callback with incremental processing + - **Files:** `src/Tool/Coding/GrepTool.php:68-78` + +7. **FileEditTool Temp File Cleanup (F3)** + - Register `register_shutdown_function()` to cleanup orphaned `*.tmp.*` files matching pattern + - Or switch to `tmpfile()` + stream wrapper for automatic cleanup + - **Files:** `src/Tool/Coding/FileEditTool.php:179` + +8. **Subagent Groups Cleanup (S2)** + - Clear `$groups[groupName]` when semaphore count reaches 0 and no pending agents + - Use `WeakMap` if PHP 8.4+ for automatic cleanup + - **Files:** `src/Agent/SubagentOrchestrator.php:28,469` + +### Priority 3 (Nice to have) + +9. **Pattern Compilation Cache (G5)** + - Implement shared cache for glob patterns and grep regex (e.g., `SplObjectStorage` or `WeakMap`) + - Cache key: pattern string + flags + - **Files:** `src/Tool/Coding/GlobTool.php`, `src/Tool/Coding/GrepTool.php` + +10. **Benchmark Suite** + - Create `docs/ram-audit/benchmarks/tool-memory.php` with scenarios: + - Concurrent tool execution: 10 / 50 / 100 parallel no-op tools + - Large file read/write: 10 MB, 50 MB, 100 MB + - Glob on 10,000 files (simulated tree) + - Grep on 10,000 files with 5000 matches + - Use `memory_get_peak_usage(true)` before/after, median of 5 runs + - **Path:** `docs/ram-audit/benchmarks/tool-memory.php` + +--- + +**Report generated from Phase 1 agent findings:** +- `tool-execution-memory` (comprehensive system audit) +- `large-file-handling` (FileReadTool/FileWriteTool/FileEditTool analysis) +- `shell-session-management` (ShellSession/SessionManager lifecycle) +- `glob-grep-optimization` (GlobTool/GrepTool memory patterns) diff --git a/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-security.md b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-security.md new file mode 100644 index 0000000..59724de --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/ram-audit/synthesis-security.md @@ -0,0 +1,344 @@ +# Security-Adjacent RAM Efficiency Audit — Synthesis Report + +**Audit Scope:** Permission system, Codex authentication integration, configuration caching +**Date:** 2026-04-03 +**Status:** Phase 1 findings synthesized + +--- + +## Executive Summary + +This report synthesizes RAM efficiency audits across three critical subsystems: permission evaluation, Codex authentication, and configuration management. The findings reveal **systemic caching failures** that create both performance bottlenecks and **security-adjacent vulnerabilities**, particularly around memory exhaustion attack vectors and credential exposure through predictable memory patterns. + +**Key Critical Issues:** +- `PermissionRule::matchesGlob()` compiles regex on every call — hundreds of times per permission check +- `SettingsCodexTokenStore` performs 7× N+1 database queries per token operation with no in-memory cache +- `SettingsManager::reloadRepository()` triggers a full config re-parse (4+ YAML files) on every settings write +- No caching exists for path resolutions, evaluation results, or parsed YAML anywhere in the stack + +**Security Implications:** +- Memory exhaustion via repeated permission checks on complex rule sets +- Token refresh storms can saturate SQLite connection pool and memory +- Config write amplification creates predictable memory churn patterns +- Lack of rate limiting on permission evaluation enables DoS via tool spam +- Credentials repeatedly read from disk increase attack surface in shared hosting + +--- + +## Findings + +### Critical Severity + +#### 1. Regex Compilation in Hot Path — PermissionRule::matchesGlob() +**Files:** `src/Tool/Permission/PermissionRule.php:51-60`, `src/Tool/Permission/Check/DenyPatternCheck.php:39`, `src/Tool/Permission/Check/BlockedPathCheck.php:66`, `src/Tool/Permission/GuardianEvaluator.php:106` + +**Issue:** Every call to `matchesGlob()` compiles a fresh regex via `preg_quote()` + `str_replace()` + `preg_match()`. This method is invoked: +- For each deny pattern in each matching rule (DenyPatternCheck) +- For each blocked path pattern (BlockedPathCheck, up to 4× per path) +- For each safe command pattern (GuardianEvaluator, O(p) per call) + +With ~50 tools, ~10 rules, ~5 deny patterns per rule, a single permission check can trigger **250+ regex compilations**. PHP's internal regex cache is limited and not guaranteed to hit. + +**RAM Impact:** Each compiled regex pattern string occupies ~200-500 bytes in memory. At 250 compilations per check × 10 concurrent requests = **~500KB - 1.25MB** of transient regex strings per request cycle, plus GC pressure. + +**Security Risk:** An attacker controlling tool arguments can force evaluation of many deny patterns, causing CPU/memory exhaustion. No rate limiting exists on permission checks. + +--- + +#### 2. N+1 Token Storage Queries — SettingsCodexTokenStore +**Files:** `src/LLM/Codex/SettingsCodexTokenStore.php:32-38`, `src/LLM/Codex/SettingsCodexTokenStore.php:63-85` + +**Issue:** Token storage uses 7 individual settings keys (`provider.codex.*`). Every `current()` performs 7 separate SELECT queries; every `save()` performs 7 separate INSERT/UPDATE queries. No in-memory caching; every call hits SQLite. + +**RAM Impact:** Each query returns a row (~200-300 bytes). 7 queries × result set overhead × concurrent requests = **~1-2KB per request** in short-lived DB result objects. More critically, **connection pool exhaustion** under load can cause queued requests to accumulate memory. + +**Security Risk:** Token refresh storms (multiple simultaneous requests triggering refresh) cause 7 writes + HTTP call per refresh, amplifying memory/CPU usage. No refresh debouncing. + +--- + +#### 3. Full Config Reload on Every Write — SettingsManager::reloadRepository() +**Files:** `src/Settings/SettingsManager.php:266-274` + +**Issue:** After any settings `set()` or `delete()`, `reloadRepository()` creates a **new ConfigLoader** and re-parses all 4 bundled YAML files + user + project config, then copies data into the Repository. This happens on every single settings write. + +**RAM Impact:** Total YAML size ~28KB, but parsing creates intermediate arrays and objects. A full reload generates **~100-150KB** of temporary arrays/objects per write, which are then GC'd. Under rapid successive writes (e.g., batch updates), this creates significant memory churn and can push PHP memory_limit. + +**Security Risk:** An attacker with settings write access (or a buggy tool) can trigger repeated config reloads to exhaust memory. The pattern is predictable and not rate-limited. + +--- + +### High Severity + +#### 4. No Path Resolution Cache — PathResolver::resolve() +**Files:** `src/Tool/Permission/PathResolver.php:21-39` + +**Issue:** `realpath()` syscall executed on every path check with no caching. `BlockedPathCheck` calls this for every file operation, and `GuardianEvaluator::isInsideProject()` calls it for every command. + +**RAM Impact:** Each `realpath()` result is a string (~256-1024 bytes). With 100 file checks per request, that's **25-100KB** of repeated string allocations. Strings are duplicated in memory if same path resolved multiple times. + +**Security Risk:** Path traversal attacks cause repeated resolution of deep/nested paths, amplifying memory usage. No TTL or eviction on cache (because none exists). + +--- + +#### 5. Duplicate Rule Evaluation — DenyPatternCheck + RuleCheck + ModeOverrideCheck +**Files:** `src/Tool/Permission/Check/DenyPatternCheck.php:26-49`, `src/Tool/Permission/Check/RuleCheck.php:25-48`, `src/Tool/Permission/Check/ModeOverrideCheck.php:30-70` + +**Issue:** Rules are evaluated up to **3 times** in a single permission flow: +1. `DenyPatternCheck` iterates all rules, calls `matchesGlob()` for each deny pattern +2. `RuleCheck` iterates all rules again, calls `evaluate()` (which calls `matchesGlob()` again) +3. `ModeOverrideCheck` iterates all rules a third time if mode is Guardian + +**RAM Impact:** Each evaluation creates temporary arrays and regex strings. Triple evaluation multiplies memory churn by 3×. For 50 rules × 5 patterns = 750 regex compilations instead of 250. + +**Security Risk:** Complex permission rules (many deny patterns) are amplified 3×, making them a more effective DoS vector. + +--- + +#### 6. No YAML Parse Cache — ConfigLoader & YamlConfigStore +**Files:** `src/ConfigLoader.php:26-47`, `src/Settings/YamlConfigStore.php:23-35` + +**Issue:** Every `SettingsManager::get()` call triggers `load()` which reads and parses YAML from disk. No opcode or user-space cache. `ConfigLoader::load()` parses 4+ YAML files on every boot and settings write. + +**RAM Impact:** Each `Yaml::parse()` creates a full array tree (~28KB for all configs). A single `get()` loads project + global = **~56KB** of parsed arrays. With 10 `get()` calls per request = **~560KB** of transient config data (though PHP may reuse array structures, still significant). + +**Security Risk:** Repeated disk I/O + parsing increases request latency, making timing attacks easier. Also increases memory footprint for concurrent requests. + +--- + +#### 7. No Token In-Memory Caching — CodexOAuthService & SettingsCodexTokenStore +**Files:** `vendor/opencompany/prism-codex/src/CodexOAuthService.php:180-196`, `src/LLM/Codex/SettingsCodexTokenStore.php` + +**Issue:** Every `getAccessToken()` call reads 7 settings from DB. No per-request or short-term caching. Even within a single request, multiple provider calls re-fetch the same token. + +**RAM Impact:** Each token fetch creates a `CodexToken` object (~500 bytes) + 7 DB result rows. With 5 LLM calls per request = **~2.5KB** of duplicated token objects + **~3.5KB** of DB results = **~6KB** per request that could be cached. + +**Security Risk:** Token refresh under concurrent load causes multiple simultaneous refreshes, each writing to SQLite, risking database lock contention and memory spikes from queued requests. + +--- + +### Medium Severity + +#### 8. No Provider Instance Reuse — RelayProviderRegistrar & PrismManager +**Files:** `src/LLM/RelayProviderRegistrar.php:42-117`, `vendor/prism-php/prism/src/PrismManager.php:40-57` + +**Issue:** Each `PrismManager::resolve()` creates a new provider instance. No caching of provider objects. + +**RAM Impact:** Provider instance ~200-500 bytes. With 10 LLM calls per request using same provider, that's **2-5KB** of duplicated objects. Minor but unnecessary. + +**Security Risk:** Provider instantiation may involve reading credentials from config each time, increasing exposure in memory dumps. + +--- + +#### 9. Repeated SettingsPaths Instantiation & Directory Walks +**Files:** `src/Settings/SettingsManager.php` (multiple), `src/ConfigLoader.php:125-150` + +**Issue:** `SettingsPaths` objects created on every `resolve()`/`getRaw()` call. Each instantiation re-evaluates `file_exists()` and walks directory tree for project root. + +**RAM Impact:** Each `SettingsPaths` ~100 bytes + path strings. Directory walk for deep project (e.g., 6 levels) creates 12 path strings (~200 bytes). With 10 calls = **~2KB** of temporary path strings. + +**Security Risk:** Directory walk on every load increases I/O, potentially leaking directory structure via timing. + +--- + +#### 10. JWT Decode on Every Token Store +**Files:** `vendor/opencompany/prism-codex/src/CodexOAuthService.php:246-304` + +**Issue:** `storeTokens()` decodes JWT (base64 + json) on every token exchange to extract `account_id` and `email`. No caching of decoded claims. + +**RAM Impact:** Decoded JWT claims array ~500 bytes. With each refresh + initial auth = **~1KB** per auth flow. Minor but repeated. + +**Security Risk:** JWT decoding failures could leak partial token data in error messages. + +--- + +### Low Severity + +#### 11. No File Watching / Invalidation Strategy +**Files:** All config loading code + +**Issue:** No inotify/fswatch; config changes only detected on next load. Not a RAM issue directly, but prevents efficient cache invalidation, forcing either stale cache or no cache. + +**RAM Impact:** N/A — current design avoids file handle overhead. + +**Security Risk:** Stale config may persist indefinitely in long-running processes (if ever introduced). + +--- + +## Memory Hotspots + +| File:Line | Component | Estimated KB per Request | Notes | +|-----------|-----------|--------------------------|-------| +| `PermissionRule.php:51-60` | Regex compilation hotspot | 20-50 KB | 250+ compilations × ~200 bytes each | +| `SettingsCodexTokenStore.php:32-38` | Token read (7 queries) | 3-5 KB | 7 DB result sets + CodexToken object | +| `SettingsManager.php:266-274` | Full config reload on write | 100-150 KB | 5 YAML parses + array merges | +| `YamlConfigStore.php:23-35` | YAML parse per get | 50-100 KB | 2 parses per `get()` call | +| `BlockedPathCheck.php:48-74` | Path resolution + pattern matching | 10-30 KB | realpath() + multiple matchesGlob | +| `GuardianEvaluator.php:94-112` | Safe command pattern matching | 5-15 KB | O(p) regex compilations per call | +| `DenyPatternCheck.php:26-49` | Deny pattern iteration | 10-20 KB | Rules × deny patterns × regex | +| `ModeOverrideCheck.php:30-70` | Rule re-evaluation | 10-20 KB | Duplicate of RuleCheck work | +| `ConfigLoader.php:125-150` | Directory walk | 1-3 KB | Per project config load | +| `RelayProviderRegistrar.php:42-117` | Provider instantiation | 2-5 KB | Per provider resolve | + +**Total estimated RAM churn per typical request:** **~200-400 KB** of short-lived objects/strings due to caching misses. With 10 concurrent requests, that's **2-4 MB** of transient memory pressure. + +--- + +## Attack Vectors (Memory Exhaustion) + +### 1. Permission Rule Bomb +**Vector:** Attacker provides tool arguments that match many deny patterns (e.g., wildcard paths, glob patterns). Each match triggers `matchesGlob()` for every deny pattern across all rules. + +**Amplification:** With 50 rules × 5 deny patterns = 250 regex compilations per check. No limit on number of permission checks per request (tools can be called repeatedly). + +**Impact:** CPU spike + memory allocation for regex strings. Can exhaust PHP memory_limit if combined with other allocations. + +**Mitigation Status:** None — no rate limiting, no caching, no pattern complexity limits. + +--- + +### 2. Token Refresh Storm +**Vector:** Multiple concurrent requests with expiring Codex token. Each request calls `getAccessToken()`, sees token expiring, and triggers `refreshToken()` simultaneously. + +**Amplification:** Each refresh performs 7 DB reads + 7 DB writes + HTTP call. SQLite locks cause queuing; queued requests accumulate memory. + +**Impact:** Database connection pool exhaustion, memory buildup from queued request objects, potential OOM. + +**Mitigation Status:** None — no refresh debouncing, no token lock, no refresh queue. + +--- + +### 3. Config Write Amplification +**Vector:** Attacker (or bug) repeatedly writes to settings (e.g., toggling a flag). Each write triggers `reloadRepository()` → full config re-parse. + +**Amplification:** 1 write = 5 YAML parses + array merges (~100-150KB churn). 100 writes/second = 10-15 MB/s memory churn, GC cannot keep up. + +**Impact:** Memory fragmentation, GC thrashing, eventual OOM. + +**Mitigation Status:** None — no write coalescing, no debouncing, no rate limiting on settings changes. + +--- + +### 4. Path Traversal Memory Bloat +**Vector:** Attacker passes deeply nested or absolute paths (e.g., `/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p`). `PathResolver::resolve()` calls `realpath()` twice per check (path + parent). No caching means each unique path allocates new strings. + +**Amplification:** Each path string ~50 bytes, resolved path ~100 bytes. 1000 unique paths = **~150KB** of path strings. Combined with permission checks on each, multiplies. + +**Impact:** Memory bloat from unique path strings; filesystem I/O amplification. + +**Mitigation Status:** None — no path resolution cache, no canonicalization before check. + +--- + +### 5. Provider Instantiation Flood +**Vector:** Attacker triggers many LLM calls with different provider names (or same provider repeatedly). Each call instantiates a new provider object and fetches credentials. + +**Amplification:** Each provider instantiation ~300 bytes + credential fetch (7 DB queries for Codex). 100 calls = 30KB objects + 700 DB queries. + +**Impact:** DB connection exhaustion, memory from provider objects, credential exposure in more memory locations. + +**Mitigation Status:** None — no provider instance caching. + +--- + +## Recommendations + +### Immediate (Deploy within 24-48h) + +1. **Add static regex cache to `PermissionRule::matchesGlob()`** + ```php + private static array $regexCache = []; + $key = $pattern; + if (!isset(self::$regexCache[$key])) { + self::$regexCache[$key] = '/^'.str_replace(['\*', '\?'], ['.*', '.'], preg_quote($pattern, '/')).'$/i'; + } + $regex = self::$regexCache[$key]; + ``` + **Impact:** Eliminates 90%+ of regex compilation overhead. ~5-10 lines change. + +2. **Bulk token fetch in `SettingsCodexTokenStore::current()`** + Replace 7 individual SELECTs with: + ```sql + SELECT key, value FROM settings WHERE scope='global' AND key LIKE 'provider.codex.%' + ``` + Build array from single result set. + **Impact:** Reduces token load from 7 DB round-trips to 1. ~10 lines change. + +3. **Add in-memory token cache to `SettingsCodexTokenStore`** + ```php + private ?CodexToken $cached = null; + private int $cachedAt = 0; + // In current(): return $this->cached if within 5s + ``` + **Impact:** Prevents DB thrashing on rapid successive calls. ~15 lines change. + +--- + +### Short-Term (1-2 weeks) + +4. **Memoize permission evaluation results in `PermissionEvaluator`** + Cache `(toolName, argsHash) => PermissionResult` for duration of request (or session). Invalidate on `resetGrants()`. + **Impact:** Avoids re-running chain for same tool+args. Major CPU/memory savings for repeated tool calls. + +5. **Cache path resolutions in `PathResolver`** + Static `array $cache = []` keyed by realpath. TTL not needed for request-lifetime. + **Impact:** Eliminates duplicate `realpath()` syscalls. ~10 lines change. + +6. **Avoid full config reload on write in `SettingsManager`** + In `reloadRepository()`, instead of full `ConfigLoader::load()`, update `$this->config` incrementally using the `$data` already loaded in `configTarget()`. + **Impact:** Reduces write amplification from 5 parses to 0. ~20 lines change. + +7. **Add YAML parse cache to `YamlConfigStore`** + Static `array $cache` keyed by `realpath($path) . filemtime($path)`. Invalidate on `save()`. + **Impact:** Eliminates redundant parses across multiple `get()` calls. ~20 lines change. + +8. **Cache provider instances in `RelayProviderRegistrar`** + Private array `$instances = []`. Return cached if already resolved. + **Impact:** Saves ~200-500 bytes per provider call, reduces credential fetch overhead. + +--- + +### Long-Term (1-2 months) + +9. **Index permission rules by tool name** + Build associative array `[toolName => PermissionRule[]]` during `PermissionEvaluator` construction. Avoid linear scan of all rules on every check. + **Impact:** O(1) rule lookup vs O(n). Significant for large rule sets. + +10. **Eliminate duplicate rule evaluation** + Refactor check chain so `RuleCheck` returns both Deny and Ask states in one pass, and `ModeOverrideCheck` reuses that result instead of re-evaluating. + **Impact:** Cuts rule evaluation overhead by 66% in Guardian mode. + +11. **Pre-compile all glob patterns at startup** + In `PermissionConfigParser`, convert each deny pattern to compiled regex once and store in `PermissionRule` as `\Closure|string`. No runtime compilation. + **Impact:** Zero regex compilation at runtime. + +12. **Add rate limiting to permission evaluation** + Per-session or per-user limit on permission checks per minute. Prevents DoS via tool spam. + **Impact:** Thwarts memory exhaustion attacks. + +13. **Token refresh debouncing with mutex** + Use SQLite `BEGIN IMMEDIATE` or file lock to ensure only one refresh occurs concurrently. Others wait and reuse result. + **Impact:** Prevents refresh storms. + +14. **Consider APCu/Redis for cross-request caching** + - Cache merged config array keyed by file mtimes + - Cache token in shared memory with TTL + - Cache compiled regex patterns (though static cache already helps) + **Impact:** Reduces per-request memory churn dramatically for long-running processes (if ever introduced). + +15. **Add config write coalescing** + Batch multiple `set()` calls within a short window into a single reload. Use a "dirty" flag and debounce reload by 1-2 seconds. + **Impact:** Prevents write amplification from rapid successive updates. + +--- + +## Conclusion + +The permission, authentication, and configuration systems exhibit **critical RAM inefficiencies** that are not merely performance issues but **security-adjacent vulnerabilities**. The lack of caching at every layer creates predictable memory churn patterns that can be exploited for denial-of-service through memory exhaustion. Immediate actions (regex cache, bulk token fetch, in-memory token cache) are low-effort, high-impact fixes that should be deployed within 48 hours. Short-term improvements (memoization, path cache, config reload optimization) will reduce per-request memory churn by an estimated **60-70%**. Long-term architectural changes (rule indexing, duplicate evaluation elimination, rate limiting) are necessary to harden the system against targeted attacks. + +**Priority:** Address Critical issues first — they represent the easiest wins with the largest security/performance payoff. + +--- + +**Report Generated By:** KosmoKrator Synthesis Agent +**Source Agents:** permission-system-overhead, codex-auth-integration, config-caching +**Output Path:** `docs/ram-audit/synthesis-security.md` diff --git a/docs/ecosystem/kosmokrator/audits/self-audit-2026-03-30.md b/docs/ecosystem/kosmokrator/audits/self-audit-2026-03-30.md new file mode 100644 index 0000000..f2c8e68 --- /dev/null +++ b/docs/ecosystem/kosmokrator/audits/self-audit-2026-03-30.md @@ -0,0 +1,317 @@ +# KosmoKrator Self-Audit + +> Status: Historical audit from 2026-03-30. Repository size, test counts, and implementation notes may no longer match the current tree. + +**Date:** 2026-03-30 +**Scope:** Full codebase — `src/`, `tests/`, `config/` +**Stats:** ~13,700 lines PHP 8.4 across 68 source files, 6,200 lines of tests (498 tests, 1060 assertions) + +## Architecture Overview + +``` +bin/kosmokrator → Kernel → AgentCommand → AgentLoop (REPL) + ├── LLM client (AsyncLlmClient or PrismService) + ├── UIManager → TuiRenderer | AnsiRenderer + ├── ToolRegistry → tools (bash, file_read, file_write, file_edit, grep, glob) + └── PermissionEvaluator → approval flow +``` + +Subsystems: Agent, LLM, Tool (Coding + Permission + Session + Task), UI (TUI + ANSI), Session (SQLite persistence), Task (in-memory tracking). + +## What's Done Well + +1. **Clean separation of concerns** — Tools, Permissions, Session, LLM, UI are distinct subsystems with narrow interfaces. +2. **Permission system is thoughtful** — Three modes (Guardian/Argus/Prometheus), Guardian uses static heuristics, blocked paths/glob patterns, session grants. +3. **Context management** — Three-tier: Pruner (cheap, replaces old tool results), Compactor (LLM summary), TrimOldest (last resort). Pre-flight check before LLM calls. +4. **Good test coverage** — Unit tests for every subsystem, 498 tests passing. +5. **Instruction loading** — Priority-based: global → project → subdirectory. YAML + SQLite settings with migration path. + +--- + +## Issues & Improvements + +### Security Concerns + +#### 1. `PermissionRule::matchesGlob()` — `*` matches across word boundaries + +**File:** `src/Tool/Permission/PermissionRule.php:45-53` + +The glob-to-regex conversion treats `*` as `.*`, which matches `/` and any character. This means Guardian safe-command patterns like `git *` would match `git log && rm -rf /`. + +```php +public static function matchesGlob(string $value, string $pattern): bool +{ + $regex = '/^' . str_replace( + ['\*', '\?'], + ['.*', '.'], // `.*` matches everything including spaces and `&&` + preg_quote($pattern, '/'), + ) . '$/i'; + + return (bool) preg_match($regex, $value); +} +``` + +**Recommendation:** For command matching, `*` should match non-whitespace only (`[^\s]*`) or the matcher should be aware of shell metacharacters (`&&`, `|`, `;`, backticks, `$()`). Alternatively, parse the command into a first-token + rest and only match against the first token. + +--- + +#### 2. `GrepTool` uses `exec()` instead of Symfony `Process` + +**File:** `src/Tool/Coding/GrepTool.php:53` + +```php +exec($fullCmd . ' 2>&1', $output, $returnCode); +``` + +Unlike `BashTool` which uses `Symfony\Component\Process\Process`, `GrepTool` uses raw `exec()`. This means: +- No process timeout +- Not cancellable +- Inconsistent with the rest of the codebase + +The `hasRipgrep()` check (line 66) also uses `exec()`. + +**Recommendation:** Migrate to Symfony `Process` for consistency and cancellability. + +--- + +#### 3. `ConfigLoader` env var resolution treats `"0"` as empty + +**File:** `src/ConfigLoader.php:57-59` + +```php +$content = preg_replace_callback('/\$\{(\w+)\}/', function (array $matches) { + return $_ENV[$matches[1]] ?? $_SERVER[$matches[1]] ?? getenv($matches[1]) ?: ''; +}, $content); +``` + +The `?: ''` fallback coerces `"0"` to `''` because `"0"` is falsy in PHP. If an env var is set to the string `"0"`, it silently becomes empty. + +**Recommendation:** Replace `?: ''` with proper false-check: +```php +$env = $_ENV[$matches[1]] ?? $_SERVER[$matches[1]] ?? getenv($matches[1]); +return $env !== false ? $env : ''; +``` + +--- + +#### 4. `OutputTruncator` truncation file path with empty tool call ID + +**File:** `src/Agent/OutputTruncator.php:82` + +```php +$path = $this->storagePath . '/tool_' . preg_replace('/[^a-zA-Z0-9_-]/', '_', $toolCallId) . '.txt'; +``` + +If `$toolCallId` is empty, the file becomes `tool_.txt`. Subsequent truncations with empty IDs would overwrite each other. Low risk but could lose data. + +**Recommendation:** Generate a fallback ID (timestamp + random) when `$toolCallId` is empty. + +--- + +### Bugs & Logic Issues + +#### 5. Default provider `'z'` is confusing + +**File:** `src/Kernel.php:147`, `src/Command/AgentCommand.php:62` + +```php +$provider = $config->get('kosmokrator.agent.default_provider', 'z'); +``` + +The hardcoded fallback to a single-letter provider name `'z'` is unclear. If a user hasn't configured a provider named `z`, the API key lookup returns empty and the agent fails with a generic error instead of a helpful message. + +**Recommendation:** Use a well-known provider as default (`'anthropic'` or `'openai'`), or better — detect available providers from configured API keys and pick the first one. + +--- + +#### 6. `PrismService` hardcodes `withMaxSteps(10)` + +**File:** `src/LLM/PrismService.php:128` + +```php +if (! empty($tools)) { + $request->withTools($tools); + $request->withMaxSteps(10); +} +``` + +The tool-call recursion limit of 10 is hardcoded. Complex refactoring tasks can legitimately need more rounds. When hit, the agent silently stops mid-task. + +**Recommendation:** Make this configurable via `config/kosmokrator.yaml` (e.g., `agent.max_tool_rounds: 25`). + +--- + +#### 7. `AgentLoop::executeToolCalls()` receives named args as associative array + +**File:** `src/Tool/ToolRegistry.php:46-48` + +```php +->using(function (...$args) use ($tool) { + $result = $tool->execute($args); + return $result->output; +}); +``` + +Prism calls tool handlers with named arguments. PHP spreads these into an associative array. This works but the contract is implicit — if Prism changes its calling convention, tools break silently. + +**Recommendation:** Add a defensive comment or normalize `$args` explicitly. Consider logging when `$args` structure is unexpected. + +--- + +#### 8. `TaskStore::clearTerminal()` has duplicate docblock + +**File:** `src/Task/TaskStore.php:240-248` + +Two consecutive `/**` docblocks — one says "Remove all completed tasks", the next says "Remove all terminal tasks". The second is correct (the method also removes cancelled tasks). + +**Recommendation:** Remove the stale first docblock. + +--- + +### Architecture / Design + +#### 9. `AgentCommand::repl()` is a 320-line method + +**File:** `src/Command/AgentCommand.php:151-478` + +The REPL handles 15+ slash commands (`/quit`, `/settings`, `/resume`, `/guardian`, etc.) with inline logic. Each command has direct access to `$agentLoop`, `$permissions`, `$sessionManager`, `$llm`, etc. + +**Recommendation:** Extract into a `SlashCommand` registry pattern: + +```php +interface SlashCommand { + public function name(): string; + public function handle(Context $ctx, string $args): void; +} +``` + +This would improve testability and make it easy to add new commands. + +--- + +#### 10. `UIManager` is a pure delegate with leaky abstraction + +**File:** `src/UI/UIManager.php` + +Every `RendererInterface` method is delegated one-to-one. Additionally, several methods do `instanceof` checks: + +```php +public function showWelcome(): void +{ + if ($this->renderer instanceof AnsiRenderer) { + $this->renderer->showWelcome(); + } elseif ($this->renderer instanceof TuiRenderer) { + $this->renderer->showWelcome(); + } +} +``` + +This pattern repeats for `playTheogony()`, `playPrometheus()`, `seedMockSession()`, `setTaskStore()`, `refreshTaskBar()`. + +**Recommendation:** Add these methods to `RendererInterface` with default no-op implementations, eliminating the instanceof checks. + +--- + +#### 11. `Kernel` uses Laravel's full Application container + +**File:** `src/Kernel.php:61` + +```php +$this->container = new LaravelApp($this->basePath); +``` + +The app bootstraps `Illuminate\Foundation\Application`, Facades, Events, Filesystem, and HTTP factory — all to serve Prism's Laravel integration. This is heavyweight for a CLI tool: + +- `LaravelApp` triggers bootstrapping overhead +- Facades add global state +- HTTP factory registered only because Prism uses the `Http` facade + +**Recommendation:** For now this works. If binary size or boot time becomes an issue, consider using `illuminate/container` standalone + a thin adapter for Prism. + +--- + +#### 12. `ModelCatalog` uses order-dependent substring matching + +**File:** `src/LLM/ModelCatalog.php:63-66` + +```php +foreach ($this->models as $name => $spec) { + if (str_contains($key, strtolower($name))) { + return $spec; + } +} +``` + +If the catalog has both `glm` and `glm-5`, the model `z/GLM-5` matches whichever comes first in the YAML. Order-dependent matching is fragile. + +**Recommendation:** Use exact match first (already done), then longest-prefix match instead of first-substring match. + +--- + +#### 13. No streaming for `AsyncLlmClient` + +**File:** `src/LLM/AsyncLlmClient.php:40-71` + +The async client buffers the entire response body before parsing. For long agent responses, the user sees nothing until the full response arrives. `AgentLoop::run()` calls `$this->ui->streamChunk($fullText)` with the complete text at once — not incremental. + +**Recommendation:** Implement SSE streaming for the async client, feeding chunks to the UI as they arrive. + +--- + +#### 14. No retry logic for transient API errors + +**File:** `src/Agent/AgentLoop.php:138-161` + +The error handling catches all `Throwable` but doesn't distinguish between retryable errors (429 rate limit, 503 service unavailable) and permanent errors (401, 400). A simple retry with exponential backoff for 429/503 would significantly improve reliability. + +**Recommendation:** Add retry logic in `AsyncLlmClient::chat()` for HTTP 429 and 5xx responses, with configurable max retries and backoff. + +--- + +#### 15. No concurrent tool execution + +**File:** `src/Agent/AgentLoop.php:263-376` + +Tool calls are executed sequentially in a `foreach`. Independent tool calls (e.g., reading two different files) could run concurrently, especially with the Amp async client. + +**Recommendation:** Group independent tool calls and execute them in parallel using `Amp\Future\awaitAll()`. + +--- + +### Tooling / DX + +#### 16. Pint checks `vendor-src/` — should only check `src/` and `tests/` + +The Pint `--test` run shows many style violations from `vendor-src/symfony/`. These are not part of the KosmoKrator codebase and should be excluded. + +**Recommendation:** Add a `pint.json` configuration: + +```json +{ + "paths": ["src", "tests"] +} +``` + +--- + +#### 17. `.gitignore` missing entries + +Missing: `*.phar`, `composer.phar`, `.phpcs-cache`. The `box.json` output path should also be ignored if building PHARs. + +--- + +## Priority Matrix + +| Priority | # | Issue | Impact | +|----------|---|-------|--------| +| **High** | 1 | Glob `*` matches across word boundaries | Security: Guardian bypass | +| **High** | 6 | Hardcoded `maxSteps(10)` | Agent silently stops on complex tasks | +| **Medium** | 3 | Env var `"0"` evaluates to empty | Subtle config bug | +| **Medium** | 5 | Default provider `'z'` is confusing | Bad DX for new users | +| **Medium** | 9 | 320-line REPL method | Maintainability | +| **Medium** | 16 | Pint checks vendor-src | CI noise | +| **Low** | 2 | GrepTool uses `exec()` not `Process` | Consistency, cancellability | +| **Low** | 10 | UIManager instanceof checks | Abstraction leak | +| **Low** | 13 | No streaming for async client | UX improvement | +| **Low** | 14 | No retry for transient API errors | Reliability | +| **Low** | 15 | No concurrent tool execution | Performance | diff --git a/docs/ecosystem/kosmokrator/deep-audit-2026-04-04.md b/docs/ecosystem/kosmokrator/deep-audit-2026-04-04.md new file mode 100644 index 0000000..edc7281 --- /dev/null +++ b/docs/ecosystem/kosmokrator/deep-audit-2026-04-04.md @@ -0,0 +1,715 @@ +# KosmoKrator Deep Audit — 2026-04-04 + +> **Scope**: Full codebase audit across 20 dimensions — code quality, edge cases, TUI/UX, security, refactoring opportunities. +> **Methodology**: 16 parallel exploration agents spawning ~62 sub-agents for deep-dive analysis across 20 dimensions. +> **Codebase**: 277 PHP files, ~50K lines, PHP 8.4, Symfony Console + TUI. +> **Findings**: 65 Critical, 128 Important, 91 Minor = **284 total findings**. + +--- + +## Table of Contents + +1. [Executive Summary](#executive-summary) +2. [Top 25 Critical Issues](#top-25-critical-issues) +3. [Area Findings](#area-findings) + - [AgentLoop Core](#1-agentloop-core--repl-orchestrator) + - [Subagent Orchestration](#2-subagent-orchestration) + - [TUI Renderer](#3-tui-renderer) + - [ANSI Renderer](#4-ansi-renderer--markdown) + - [Tool System & Permissions](#5-tool-system--permission-model) + - [LLM Client Layer](#6-llm-client-layer) + - [Session & Database Persistence](#7-session--database-persistence) + - [Commands & Slash Commands](#8-commands--slash-commands) + - [Settings & Configuration](#9-settings--configuration) + - [Diff & UI Display](#10-diff-rendering--ui-display) + - [Power Commands & UX](#11-power-commands--ux-workflows) + - [Testing Coverage](#12-testing-coverage--quality) +4. [Cross-Cutting Themes](#cross-cutting-themes) +5. [Security Concerns Summary](#security-concerns-summary) +6. [Refactoring Backlog](#refactoring-backlog-prioritized) + +--- + +## Executive Summary + +The audit identified **65 critical**, **128 important**, and **91 minor** issues across the codebase (284 total). The most systemic problems are: + +- **No graceful shutdown**: No signal handling anywhere in the codebase. Ctrl+C = orphaned processes, broken terminal, unsaved data. +- **Security**: File tools have no path containment checks; permission system is opt-in (default-allow). File writes are non-atomic. +- **Concurrency**: Shared mutable state (`ContextBudget`, `ProtectedContextBuilder`, `BashTool::$progressCallback`), subagent slot leaks for root agent, race conditions in tool result ordering. +- **Exception hygiene**: Only 2 custom exceptions in 277 files. 6 silently swallowed `\Throwable` catches. Raw `$e->getMessage()` leaked to LLM. +- **TUI stability**: Modal stacking can deadlock, triple concurrent 30fps render timers, no TUI→ANSI mid-session fallback. +- **Configuration**: `reloadRepository()` loses user/project overrides, audio config mutates shared LLM singleton, LLM clients capture stale config at registration. +- **Testing**: ContextManager has 1 test, no integration tests exist, no tool result ordering tests, no UTF-8 truncation tests. + +--- + +## Top 30 Critical Issues + +Ranked by impact (severity × likelihood × affected surface). + +| # | Issue | File | Impact | +|---|-------|------|--------| +| 1 | **No path traversal protection in file tools** | `FileWriteTool.php:49`, `FileEditTool.php:51`, `FileReadTool.php:57` | LLM can write `/etc/passwd`, `~/.ssh/authorized_keys`. Relies entirely on permission chain being configured. | +| 2 | **Permission evaluator defaults to Allow** | `PermissionEvaluator.php:66-68` | Any tool not explicitly covered by rules/grants/blocked-paths is auto-approved. Security should default-deny. | +| 3 | **Non-atomic file writes** | `FileWriteTool.php:49` | `file_put_contents()` leaves partial files on crash. `FileEditTool` correctly uses temp+rename; `FileWriteTool` does not. | +| 4 | **Shell sessions orphaned on process crash** | `ShellSessionManager.php:164-179` | No `__destruct()` or shutdown handler. SIGKILL leaves zombie processes. | +| 5 | **`reloadRepository()` loses YAML overrides** | `SettingsManager.php:267-274` | After any settings write, in-memory config reverts to bundled defaults only, discarding user/project YAML. | +| 6 | **Audio config mutates shared LLM client** | `SessionServiceProvider.php:56-65` | `setProvider()`/`setModel()` on the shared singleton permanently changes the LLM for all agent calls, not just audio. | +| 7 | **TUI modal stacking causes deadlock** | `TuiModalManager.php` | No mutex prevents two modals from being shown simultaneously. If `askToolPermission()` fires during `askUser()`, deadlock. | +| 8 | **No `SQLITE_BUSY` handling** | `Database.php:38-39` | Missing `PRAGMA busy_timeout`. Two KosmoKrator processes writing simultaneously crash immediately. | +| 9 | **Unlimited LLM retries by default** | `RetryableLlmClient.php:37`, `LlmServiceProvider.php:81` | `$maxAttempts = 0` = infinite retries. Persistent 429/5xx loops forever. | +| 10 | **Tool result ordering doesn't match call order** | `ToolExecutor.php:212-217` | Denied results are appended after approved results, confusing the LLM which expects results in call order. | +| 11 | **`OutputTruncator::truncate()` splits mid-UTF8** | `OutputTruncator.php:96-98` | `substr()` on byte boundary can slice through multi-byte characters, producing corrupted output sent to the LLM API. | +| 12 | **Context compactor LLM call has no cancellation** | `ContextCompactor.php:164-167` | User cancel during compaction doesn't abort the compaction LLM request. | +| 13 | **No signal handling in AgentCommand** | `AgentCommand.php` | Ctrl+C skips teardown — no `killAll()`, no `cancelAll()`, no `ui->teardown()`. Orphaned processes, broken terminal state. | +| 14 | **Silent message loss on null tool_result** | `MessageSerializer.php:109-111` | Missing `tool_results` data → `null` → silently filtered → broken conversation flow → API errors. | +| 15 | **No session/message deletion** | `SessionRepositoryInterface.php` | Database grows without bound. No way to clean up old sessions or their messages. | +| 16 | **`PrismService` drops `reasoningContent`** | `PrismService.php:111-120` | Reasoning/thinking content silently lost for Prism-backed providers (Anthropic, Gemini). | +| 17 | **AnsiTheogony: 80s unskippable animation** | `AnsiTheogony.php` | No skip mechanism. Screen shake bug (both branches produce same direction). | +| 18 | **Triple concurrent 30fps render timers** | `TuiAnimationManager.php:378`, `TuiToolRenderer.php:267`, `SubagentDisplayManager.php:205` | Breathing + loader + tool-executing timers each trigger full terminal re-render independently. | +| 19 | **Substring model matching can return wrong spec** | `ModelDefinitionSource.php:86-101` | `"gpt-4o-mini"` matches `"gpt-4o"` if mini not explicitly defined. Wrong pricing/context window. | +| 20 | **Stuck detector misses oscillating patterns** | `StuckDetector.php:49-58` | Only checks last signature. `[A,A,A,B,A,A,A,B,...]` never triggers. Any non-stuck round fully resets escalation. | +| 21 | **Non-atomic config file writes** | `YamlConfigStore.php:60` | `file_put_contents()` without temp+rename. Crash mid-write = corrupted YAML. | +| 22 | **`forProject()` loads ALL memories into RAM** | `MemoryRepository.php:65-88` | No limit/pagination. O(n log n) sort on full dataset every retrieval. | +| 23 | **`AsyncLlmClient` provider list not checked by factory** | `LlmClientFactory.php:45` vs `AsyncLlmClient.php:34` | Two independent provider lists can drift. Factory creates client for providers not in the compatibility list. | +| 24 | **`collectResult()` detects errors by "Error:" prefix** | `ToolExecutor.php:405` | `str_starts_with($result, 'Error:')` — grep output for the word "Error:" is falsely marked as failed. | +| 25 | **No terminal capability detection** | `UIManager.php:377-389`, `Theme.php` | Unconditional 24-bit color + Unicode. No `NO_COLOR`, `COLORTERM`, or `TERM` check. Garbled on limited terminals. | +| 26 | **`yieldSlot`/`reclaimSlot` slot leak for root agent** | `SubagentOrchestrator.php:471-496` | Root agent never acquires semaphore lock but `reclaimSlot` consumes one permanently. After N calls → deadlock. | +| 27 | **Shared `ContextBudget` across all subagent depths** | `SubagentFactory.php:87` | Deep child compaction deducts from root's budget pool. Root can run out prematurely. | +| 28 | **No error handling during kernel boot** | `bin/kosmokrator`, `Kernel.php:45-72` | Zero try-catch in bootstrap. Provider failure = partial initialization, raw stack trace. | +| 29 | **Raw `$e->getMessage()` leaked to LLM** | `AgentLoop.php:288,312,518`, `ToolExecutor.php:313` | Internal error messages (HTTP codes, file paths, provider details) stored as assistant messages. No sanitization. | +| 30 | **`wouldCreateCycle` crashes on pruned stats** | `SubagentOrchestrator.php:375` | Accesses `$this->stats[$current]->dependsOn` without existence check. Pruned agents → TypeError. | + +--- + +## Area Findings + +### 1. AgentLoop Core & REPL Orchestrator + +**Files**: `src/Agent/AgentLoop.php` (858 lines), `ToolExecutor.php` (465 lines), `ContextManager.php`, `StuckDetector.php`, `OutputTruncator.php`, `TokenEstimator.php` + +#### Critical +- `OutputTruncator::truncate()` uses byte-level `substr()` that can split mid-UTF8 character (`OutputTruncator.php:96-98`) +- `BashTool::$progressCallback` is static mutable — race condition in concurrent bash execution (`ToolExecutor.php:162`) +- Context compactor LLM call has no cancellation support (`ContextCompactor.php:164-167`) + +#### Important +- **Tool result ordering bug**: denied results appended after approved, not matching tool call order (`ToolExecutor.php:212-217`) +- **Stuck detector misses oscillating patterns**: only checks last signature, escalation resets on any non-stuck round (`StuckDetector.php:49-58`) +- **Token estimation 15-30% low for code**: fixed 4 chars/token ratio (`TokenEstimator.php:37`) +- **No max-iteration guard in `run()`**: infinite tool-call loop possible in interactive mode (`AgentLoop.php:198`) +- **`collectResult()` detects errors by "Error:" string prefix**: fragile, false positives on grep output (`ToolExecutor.php:405`) +- **`ContextBudget` default `reserveOutputTokens=0`**: no room for LLM response → API error (`ContextBudget.php:53-56`) +- **`isContextOverflow()` is a fragile heuristic**: string matching on error messages from different providers (`AgentLoop.php:748-757`) +- **`apply_patch` args don't populate `$writePaths`**: concurrent `file_read` of patched file gets stale data (`ToolExecutor.php:341-357`) +- **No timeout on individual tool execution**: misbehaving tool blocks event loop (`ToolExecutor.php:168`) +- **`shell_kill` not in read-only guard**: state-changing operation bypasses Ask/Plan mode checks (`ToolExecutor.php:109`) +- **`findTool()` is O(n) linear scan**: should use hash map (`ToolExecutor.php:437-446`) + +#### Minor +- `$autoApproved` / `$approvedById` built but never used — dead code (`AgentLoop.php:143-146`) +- `formatStatusModelLabel()` is a trivial passthrough (`AgentLoop.php:732-735`) +- Duplicate `performCompaction()` logic in two locations (`AgentLoop.php:364-372` vs `848-857`) +- `headlessPreFlightCheck()` is a trivial wrapper (`ContextManager.php:129-132`) +- `ContextPruner::importanceScore()` uses English-only phrases (`ContextPruner.php:194`) + +--- + +### 2. Subagent Orchestration + +**Files**: `src/Agent/SubagentOrchestrator.php` (665 lines), `SubagentFactory.php`, `SubagentStats.php`, `SubagentTool.php` + +#### Critical +- **Potential deadlock in dependency + group combo**: If agent A depends on agent B, and both are in the same group (sequential), the group semaphore blocks A from starting while the dependency waits for A to run. +- **`SubagentTool` input validation**: empty task descriptions, malformed `depends_on` arrays, and circular references aren't validated before submission to the orchestrator. + +#### Important +- **Retry logic doesn't distinguish transient vs permanent failures**: auth errors (401/403) correctly skipped, but malformed-request errors (400) may be retried unnecessarily. +- **Stats double-count tokens during retries**: each retry attempt adds to the token counter; no deduplication of pre-retry tokens. +- **Background agent results injected on next LLM turn**: if the parent never makes another LLM call (exits), background results are lost. +- **`SubagentStats::elapsed()` includes retry wait time**: makes timing metrics misleading. + +#### Minor +- Agent ID uniqueness not enforced — collision possible if LLM reuses IDs across batches. +- No telemetry/observability hooks for orchestrator events. + +--- + +### 3. TUI Renderer + +**Files**: `src/UI/Tui/TuiCoreRenderer.php` (1169 lines), `TuiToolRenderer.php` (641 lines), `TuiModalManager.php` (513 lines), `TuiAnimationManager.php` (434 lines), `SubagentDisplayManager.php` (537 lines) + +#### Critical +- **Modal stacking deadlock**: no mutex prevents `askToolPermission()` during `askUser()` (`TuiModalManager.php`) +- **`askUser()` cleanup bypassed on external resume**: QuestionWidget left in overlay when cancelled from `TuiCoreRenderer` (`TuiModalManager.php:130-149`) +- **`showToolResult` uses stale `lastToolArgs`**: concurrent tool calls overwrite each other's args (`TuiToolRenderer.php:194`) +- **`cycleMode()` breaks on unexpected label**: `array_search` returns `false` → silent wrong mode (`TuiCoreRenderer.php:903-911`) +- **Cancellation race in Thinking→Idle transition**: old cancelled token used after new one created (`TuiCoreRenderer.php:451-465`) +- **`showBatch()` filters by substring "spawned in background"**: real results containing this text are hidden (`SubagentDisplayManager.php:278`) + +#### Important +- **`streamChunk` rebuilds MarkdownWidget on every token**: string concat + full markdown re-parse per chunk. Performance issue on long responses (`TuiCoreRenderer.php:543-544`) +- **Triple concurrent 30fps render timers**: breathing (33ms) + loader (50ms) + tool-executing (50ms) each trigger full re-render independently +- **No truncation for large tool outputs in TUI**: CollapsibleWidget stores full string in memory (`TuiToolRenderer.php:220-230`) +- **Binary/null bytes in tool outputs**: `explode("\n", $output)` produces garbled display (`TuiToolRenderer.php:220`) +- **`toolExecutingTimerId` leaks on error**: orphaned 50ms repeat timer runs indefinitely (`TuiToolRenderer.php:305-318`) +- **`compactingTimerId` not cancelled on Idle**: `enterIdle()` cancels thinking timer but not compacting timer (`TuiAnimationManager.php:347-364`) +- **Container widgets accumulate in conversation**: each `showSpawn()` adds a new ContainerWidget; old ones persist (`SubagentDisplayManager.php:126-128`) +- **Progress bar counts failed agents as "done"**: misleading progress percentage (`SubagentDisplayManager.php:254-264`) +- **`pendingEditorRestore` text lost on error**: typed input never restored if agent errors during streaming (`TuiCoreRenderer.php:416-419`) +- **`clearConversationState()` doesn't reset tool renderer state**: orphaned timers reference removed widgets (`TuiCoreRenderer.php:791-801`) +- **No terminal resize handling during streaming**: scroll offsets become stale +- **`setMaxVisibleLines(2)`**: too restrictive for multi-line editing (`TuiCoreRenderer.php:298`) +- **No input length limit in EditorWidget**: very long pastes create enormous text buffers +- **No command history (up/down arrow)**: only conversation scroll via PAGE_UP/PAGE_DOWN + +#### Minor +- Spinner index increments indefinitely (`TuiAnimationManager.php:299`) +- ESC cancels during thinking — undocumented behavior +- `playAnimation()` stops/starts TUI without try/catch — TUI remains stopped on animation error +- `renderIntro()` uses blocking `usleep`/`sleep` on event loop + +--- + +### 4. ANSI Renderer & Markdown + +**Files**: `src/UI/Ansi/AnsiRenderer.php` (568 lines), `AnsiCoreRenderer.php`, `MarkdownToAnsi.php` (535 lines), `AnsiIntro.php` (611 lines), `AnsiTheogony.php` (2014 lines), `Theme.php` + +#### Critical +- **AnsiTheogony: no skip/abort mechanism**: ~80 second unskippable animation (`AnsiTheogony.php`) +- **Screen shake bugs**: both branches produce same direction `\033[1B` (`AnsiTheogony.php:927`); up+down cancels out `\033[1A\033[1B` (`AnsiTheogony.php:1026`) + +#### Important +- **No streaming output in ANSI mode**: user sees nothing until full response completes (`AnsiCoreRenderer.php:172-176`) +- **`clearThinking()` is a no-op**: "Thinking..." text never erased (`AnsiCoreRenderer.php:130-133`) +- **Status bar, welcome, separators overflow on narrow terminals**: fixed-width `━` bars assume ≥80 cols +- **Table rendering has no total-width overflow**: wide tables corrupt layout (`AnsiTableRenderer.php:22`) +- **All Theme colors designed for dark backgrounds only**: invisible on light terminals. No `COLORFGBG` detection +- **`wrapCodeLine()` is O(n²)**: `mb_substr(substr($line, $i), 0, 1)` per character (`MarkdownToAnsi.php:459-508`) +- **TableCollector drops nested inline elements**: links, images, strikethrough silently removed from table cells +- **Terminal size detection uses `exec('tput')` instead of `posix_get_terminal_size()`**: blocking, adds latency on SSH + +#### Minor +- Duplicate `wrapAnsiText()` in `MarkdownToAnsi` and `ListTracker` +- Missing `declare(strict_types=1)` in `MarkdownToAnsi.php` +- `Theme::codeBg()` defined but never used in rendering +- Italic/strikethrough escape codes hardcoded instead of using Theme +- Logo constants duplicated between `AnsiIntro` and `AnsiTheogony` +- `ListTracker` uses `mb_strlen` instead of `mb_strwidth` for bullet indent +- `Theme::white()` uses 16-color `[1;37m` inconsistent with 24-bit RGB elsewhere + +--- + +### 5. Tool System & Permission Model + +**Files**: `src/Tool/Coding/File*.php`, `PatchApplier.php`, `Shell*.php`, `BashTool.php`, `GrepTool.php`, `GlobTool.php`, `src/Tool/Permission/*` + +#### Critical +- **No path traversal protection**: `FileWriteTool`, `FileEditTool`, `FileReadTool` accept raw paths with zero project-root validation +- **Symlink following risk**: `PathResolver::resolve()` follows symlinks via `realpath()` — symlink to `/etc/shadow` inside project +- **Non-atomic writes in `FileWriteTool`**: `file_put_contents()` directly, no temp+rename +- **Permission system is opt-in per tool**: if tool not in `approval_required`, entire permission chain is bypassed +- **`PermissionEvaluator::evaluate()` defaults to Allow**: should default-deny for safety + +#### Important +- **Temp file leak on crash**: `FileEditTool` creates `$path.'.tmp.'.getmypid()` with no cleanup (`FileEditTool.php:139`) +- **PatchApplier update non-atomic for moves**: write destination → unlink source; crash between = data duplication +- **Concurrent file edits: last-write-wins**: no file locking +- **PatchApplier line-ending corruption**: `implode("\n", ...)` on CRLF files inserts LF +- **Shell session idle cleanup only on tool calls**: if agent stops, sessions live forever (`ShellSessionManager.php:238-251`) +- **No max session limit**: malicious agent could exhaust file descriptors +- **`GrepTool` timeout declared but never used**: `$timeout = 30` is dead code (`GrepTool.php:19`) +- **Regex DoS possible in GrepTool**: `(.){1000000}` causes catastrophic backtracking in GNU grep +- **`SessionGrants` are per-tool, not per-path**: approving `bash` once auto-approves all future commands +- **`GuardianEvaluator::isInsideProject()` fails for project root itself**: trailing slash issue + +#### Minor +- `FileReadTool` cache uses mtime (1-second granularity) +- No BOM handling in file tools +- `hasRipgrep()` spawns subprocess on every `GrepTool` call — should cache +- Binary file handling missing in grep +- GlobTool doesn't show permission-denied errors + +--- + +### 6. LLM Client Layer + +**Files**: `src/LLM/AsyncLlmClient.php`, `PrismService.php`, `RetryableLlmClient.php`, `ModelDefinitionSource.php`, `RelayProviderRegistry.php` + +#### Critical +- **Provider lists can drift**: `AsyncLlmClient::OPENAI_COMPATIBLE_PROVIDERS` not checked by `LlmClientFactory` (`LlmClientFactory.php:45`) +- **Unlimited retries by default**: `$maxAttempts = 0` in production wiring (`LlmServiceProvider.php:81`) +- **Substring model matching**: `"gpt-4o-mini"` matches `"gpt-4o"` — wrong pricing/context (`ModelDefinitionSource.php:86-101`) + +#### Important +- **`PrismService` drops `reasoningContent`**: thinking content lost for Anthropic/Gemini (`PrismService.php:111-120`) +- **No cancellation in `PrismService`**: `$cancellation` param documented as unused (`PrismService.php:107`) +- **Jitter always adds, never subtracts**: backoff is `base + [0, 0.3*base]`, not `base ± 0.3*base` (`RetryableLlmClient.php:132`) +- **No circuit breaker**: persistent failures retry forever +- **`smartDelay` blocking path**: `sleep()` in ANSI mode doesn't check cancellation during sleep +- **`cached_write_price` defaults to `input_price`**: Anthropic cache write is 1.25x, undercharged if missing from spec +- **Provider alias maps split between two classes**: can drift (`ModelDefinitionSource.php:25` vs `RelayProviderRegistry.php:213`) +- **No streaming support in `AsyncLlmClient`**: must unwrap via `inner()` — leaky abstraction + +#### Minor +- No connection pool sharing between subagent clients +- `setApiKey()` accepts empty strings +- Timeout values hardcoded (600s/300s), not configurable +- Duplicated `supportsTemperature()` in both client classes + +--- + +### 7. Session & Database Persistence + +**Files**: `src/Session/Database.php`, `MessageRepository.php`, `MessageSerializer.php`, `SessionManager.php`, `MemoryRepository.php`, `MemorySelector.php` + +#### Critical +- **No `PRAGMA busy_timeout`**: concurrent writes crash with `SQLITE_BUSY` (`Database.php:38-39`) +- **Silent message loss on null tool_result**: message silently dropped → broken conversation → API errors (`MessageSerializer.php:109-111`) +- **No session/message deletion**: database grows unbounded +- **`forProject()` loads ALL memories**: no limit, O(n log n) sort every retrieval (`MemoryRepository.php:65-88`) + +#### Important +- **`saveMessage()` silently no-ops when no session**: data loss with no warning (`SessionManager.php:115-117`) +- **Session switch doesn't validate target**: FK violation on first message save (`SessionManager.php:99-102`) +- **LIKE-based search, no FTS5**: full table scan per search (`MemoryRepository.php:186-192`) +- **Timestamp timezone mismatch in memory expiry**: `date('c')` produces timezone offsets, string comparison may break (`MemoryRepository.php:67`) +- **`loadActive()` loads all message content**: no pagination, multi-MB tool outputs in RAM (`MessageRepository.php:76-80`) +- **`markCompactedIds` not session-scoped**: cross-session compaction possible with leaked IDs (`MessageRepository.php:133-145`) +- **Role mismatch between `MessageMapper` and `MessageSerializer`**: `'tool'` vs `'tool_result'` +- **No role validation in `append()`**: invalid roles silently stored then dropped on deserialization + +#### Minor +- Directory permissions 0755 on database directory +- `findByPrefix` uses LIKE without escaping `%`/`_` +- Timestamp precision mismatch: sessions (microseconds) vs messages (seconds) +- No session title sanitization +- `MemoryInjector` truncation at 180-240 chars with no truncation indicator +- Memory scoring uses undocumented magic numbers + +--- + +### 8. Commands & Slash Commands + +**Files**: `src/Command/AgentCommand.php`, `SlashCommandRegistry.php`, `Slash/*.php` + +#### Critical +- **No signal handling**: Ctrl+C skips all cleanup — orphaned processes, broken terminal (`AgentCommand.php`) +- **QuitCommand double-teardown**: `teardown()` called twice if not idempotent (`QuitCommand.php:39` + `AgentCommand.php:299`) +- **`ResumeCommand` clears permissions but not mode**: mode mismatch after resume (`ResumeCommand.php:79`) +- **`FeedbackCommand` prompt injection**: user text interpolated directly into LLM prompt (`FeedbackCommand.php:57-72`) + +#### Important +- **Unknown slash commands fall through to LLM**: `/typo something` sent as user message instead of error +- **TUI init failure leaves terminal in broken state**: alternate screen buffer, raw mode not restored (`AgentSessionBuilder.php:49-52`) +- **Whitespace-only input sent to LLM**: `" "` not filtered +- **`NewCommand` doesn't cancel running subagents**: stale agents operate on new session (`NewCommand.php:40-48`) +- **`SessionFormatter::formatAge` assumes numeric timestamps**: ISO date strings produce wildly incorrect ages +- **`RenameCommand` inconsistent quote stripping**: single-quote regex missing `$` anchor +- **`ClearCommand` uses raw ANSI**: conflicts with TUI renderer state (`ClearCommand.php:48`) +- **`SettingsCommand` is 860+ lines**: severe maintenance concern +- **`CompactCommand` has no success/error feedback**: user gets no indication of result + +#### Minor +- No `/help` command +- No duplicate registration detection in `SlashCommandRegistry` +- `/tasks clear` space-in-name creates prefix collision risk +- CJK width not accounted for in preview truncation +- `ForgetCommand` shows success for non-existent IDs +- `PowerCommandRegistry` regex only matches `\w+` — hyphens excluded + +--- + +### 9. Settings & Configuration + +**Files**: `src/Settings/SettingsManager.php`, `YamlConfigStore.php`, `SettingsSchema.php`, `ConfigLoader.php`, `src/Provider/*` + +#### Critical +- **`reloadRepository()` loses user/project YAML overrides**: only reloads bundled defaults (`SettingsManager.php:267-274`) +- **Non-atomic config writes**: `file_put_contents()` without temp+rename (`YamlConfigStore.php:60`) +- **Audio config mutates shared LLM client**: `setProvider()`/`setModel()` on shared singleton (`SessionServiceProvider.php:56-65`) +- **Migration rewrites YAML every boot**: non-atomic, no one-time flag (`DatabaseServiceProvider.php:92-145`) +- **Provider registration order is implicit**: hardcoded sequence, no dependency declaration (`Kernel.php:48-58`) +- **`LlmServiceProvider` captures stale config**: singletons don't reflect runtime settings changes + +#### Important +- **Toggle normalization incomplete**: `"0"`, `"false"`, `"no"` not handled correctly (`SettingsManager.php:277-289`) +- **No change notification**: settings changes don't propagate to dependent components +- **Missing env vars resolve to empty string**: `${MISSING_KEY}` → `''` instead of `null` (`ConfigLoader.php:72-76`) +- **Malformed YAML crashes app**: no try/catch around `Yaml::parse()` (`YamlConfigStore.php:23-35`) +- **Config merge doesn't handle indexed arrays**: `mergeDeep()` appends instead of replacing for indexed arrays +- **`DatabaseServiceProvider::boot()` injects SQLite config after `RelayRegistry` already constructed**: stale config +- **No first-run config creation**: depends entirely on bundled defaults +- **Missing settings in schema**: ~10 config keys have no type validation or labels + +#### Minor +- Static schema caching creates cross-instance coupling +- `SettingsPaths` instantiated repeatedly instead of cached +- Legacy `.kosmokrator.yaml` support adds complexity +- `LoggingServiceProvider` has side effects in `register()` instead of `boot()` + +--- + +### 10. Diff Rendering & UI Display + +**Files**: `src/UI/Diff/DiffRenderer.php` (548 lines), `AgentDisplayFormatter.php`, `AgentTreeBuilder.php`, `UIManager.php`, `Theme.php` + +#### Critical +- **No binary file detection in DiffRenderer**: binary content produces garbled output (`DiffRenderer.php:33-166`) +- **No TUI→ANSI mid-session fallback**: renderer fixed at construction (`UIManager.php:27-29`) + +#### Important +- **Line numbers for context lines use `$newLine` only**: old-file line number lost (`DiffRenderer.php:131`) +- **30+ hardcoded ANSI codes outside Theme**: inconsistent color shades across 8+ files +- **Color shade inconsistencies**: gold/accent, success, error, info all have different RGB values in hardcoded vs Theme +- **No terminal capability detection**: no `NO_COLOR`, `COLORTERM`, `TERM` checks +- **No large diff truncation**: thousands of changes flood terminal in ANSI mode +- **`padWithFileContext` first-match ambiguity**: duplicated code blocks match wrong occurrence +- **`str_pad` with multi-byte strings**: CJK under-padded +- **No depth limit on tree recursion**: stack overflow possible with deep nesting + +#### Minor +- Hunk separator `· ✧ ·` has no Unicode fallback +- Missing Theme palette entries for 7 commonly-used colors +- `seedMockSession()` violates Liskov substitution +- Agent IDs not truncated — can produce very wide labels + +--- + +### 11. Power Commands & UX Workflows + +**Files**: `src/Command/Power/*.php` (21 commands), `src/UI/Ansi/Ansi*.php` (animation classes) + +#### Critical +- **`:release` has no programmatic push guard**: prompt-only "ask before push" (`ReleaseCommand.php:78-79`) +- **`:unleash` can spawn 125+ agents**: no resource constraints or rate limiting (`UnleashCommand.php:47-48`) +- **No cancellation in animations**: `usleep()` blocks, no SIGINT handling during animations + +#### Important +- **All power commands are purely prompt-driven**: no programmatic logic, all workflow enforcement via LLM compliance +- **`:autopilot` no loop guard**: Phase 5→3 re-entry has no max iteration count +- **`:babysit` no wall-clock timeout**: can run indefinitely +- **`:research` no cancellation guidance**: 7+ agents with no cleanup on cancel +- **`:release` no dry-run mode**: goes straight from version bump to push +- **18 commands registered manually**: no auto-discovery, adding a new command is error-prone +- **All animations use `register_shutdown_function(print(...))`**: `print` returns 1, may emit spurious "1" +- **No `KOSMOKRATOR_NO_ANIM` environment variable**: accessibility issue for screen readers/CI + +#### Minor +- `:auto` alias too generic, could clash +- `:sci` alias too short/non-obvious +- `:watch` conflicts with Unix `watch` mental model +- Animation `exec('tput cols')` called per animation, not cached + +--- + +### 12. Testing Coverage & Quality + +**Files**: `tests/Unit/**/*.php` (~140 tests), `tests/Feature/AgentCommandTest.php` (1 test) + +#### Critical +- **ContextManager has only 1 test**: core component with vast untested surface +- **No tool result ordering tests**: concurrent execution ordering completely unverified +- **No UTF-8 truncation tests**: `OutputTruncator` multi-byte handling untested +- **No integration tests for agent loop**: no end-to-end prompt→tool→response test + +#### Important +- **5 pipeline/factory classes untested**: `ContextPipeline`, `ContextPipelineFactory`, `SubagentPipeline`, `SubagentPipelineFactory`, `LlmClientFactory` +- **21 Power commands have zero tests** +- **Session persistence lifecycle untested**: no create→persist→load round-trip test +- **`ProviderAuthService` untested**: handles API key/auth flows +- **`SessionSettingsApplier` untested**: applies settings to running sessions +- **Only 1 feature test**: `AgentCommandTest` just verifies exit code 0 with `/quit` + +#### Minor +- StuckDetector missing oscillation pattern tests +- ToolExecutor missing UTF-8/malformed input tests +- No `tests/Integration/` or `tests/Functional/` directories +- No code coverage enforcement + +--- + +## Cross-Cutting Themes + +### 1. Static Mutable State (5 instances) +- `BashTool::$progressCallback` — race condition +- `SettingsSchema::$definitions` / `$aliases` — cross-instance pollution +- `ShellSessionManager` — no static state but shared instance with no cleanup guarantees +- **Pattern**: mutable statics in a concurrent (fiber-based) environment are dangerous. Each should be instance state or use fiber-local storage. + +### 2. Non-Atomic File Operations (6 instances) +- `FileWriteTool` — `file_put_contents()` directly +- `YamlConfigStore` — `file_put_contents()` directly +- `PatchApplier::applyAdd()` — `file_put_contents()` directly +- `DatabaseServiceProvider::migrateYamlKeys()` — `file_put_contents()` directly +- `OutputTruncator::saveFull()` — no error handling +- `PatchApplier` move operations — write+unlink not atomic +- **Fix**: Extract a shared `AtomicFileWriter` utility that does write-to-temp + `rename()`. + +### 3. Fragile String-Based Detection (4 instances) +- `collectResult()` — `"Error:"` prefix for success detection +- `isContextOverflow()` — string matching on error messages +- `showBatch()` — substring `"spawned in background"` for filtering +- `PermissionConfigParser` — tool name string matching for opt-in security +- **Fix**: Use typed result objects, error codes, or enums instead of string conventions. + +### 4. Resource Leak Pattern (8 instances) +- Shell sessions — orphaned on crash +- TUI timer IDs — not cancelled on phase transitions +- Container widgets — accumulate indefinitely +- Memory objects — loaded entirely into RAM +- Database rows — no deletion mechanism +- Subagent processes — no cleanup on parent crash +- Editor text restore — lost on error exit +- Service singletons — no disposal lifecycle +- **Fix**: Implement a coordinated cleanup/teardown system with shutdown handlers. + +### 5. Configuration Staleness (3 instances) +- `LlmServiceProvider` captures config at registration → stale singletons +- `SettingsManager::reloadRepository()` re-reads only bundled defaults → lost overrides +- `DatabaseServiceProvider::boot()` injects config after consumers constructed +- **Fix**: Implement config change notification (observer/event system) or use lazy resolution. + +### 6. Hardcoded ANSI Color Codes (30+ instances) +Across 8+ files, colors bypass `Theme` with slightly different RGB values. This makes the palette inconsistent and unmaintainable. +- **Fix**: Add missing palette entries to `Theme`, replace all hardcoded codes with `Theme::` calls. + +### 7. No Terminal Adaptation +- No color depth detection (16/256/24-bit) +- No Unicode fallback +- No light/dark terminal detection +- Fixed-width elements overflow on narrow terminals +- **Fix**: Add a `TerminalCapabilities` class that detects once at startup and is consulted by Theme. + +--- + +## Security Concerns Summary + +| # | Concern | Severity | Exploitability | File | +|---|---------|----------|---------------|------| +| 1 | File tools have no path containment | **Critical** | High — LLM can be tricked into writing outside project | `FileWriteTool.php:49` | +| 2 | Permission system defaults to Allow | **Critical** | Medium — requires misconfigured `approval_required` | `PermissionEvaluator.php:66-68` | +| 3 | SessionGrants are per-tool, not per-path | **High** | Medium — one approval grants all future operations | `SessionGrants.php:17-19` | +| 4 | Symlink following via `realpath()` | **High** | Low — requires symlink creation inside project | `PathResolver.php:27` | +| 5 | FeedbackCommand prompt injection | **High** | Medium — user text in LLM prompt | `FeedbackCommand.php:57-72` | +| 6 | Regex DoS in GrepTool | **Medium** | High — `(.){1000000}` pattern | `GrepTool.php:58` | +| 7 | GlobTool path traversal info leak | **Medium** | Low — can discover files outside project | `GlobTool.php:51` | +| 8 | API keys in config files with loose permissions | **Medium** | Medium — 0755 on config dir | `YamlConfigStore.php:46-61` | +| 9 | Config files written non-atomically | **Medium** | Low — race condition window | `YamlConfigStore.php:60` | +| 10 | Database directory world-readable | **Low** | Low — 0755 permissions | `Database.php:27` | + +**Recommended Priority**: +1. Add path containment checks directly in file tools (don't rely solely on permission chain) +2. Switch `PermissionEvaluator` to default-deny +3. Make `SessionGrants` path/command-scoped +4. Add timeout enforcement to `GrepTool` +5. Set config file permissions explicitly (0600) + +--- + +## Refactoring Backlog (Prioritized) + +### P0 — Do Now (Bugs & Security) + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 1 | Add `AtomicFileWriter` utility, use in `FileWriteTool`, `YamlConfigStore`, `PatchApplier` | 2h | Fixes 6 non-atomic write bugs | +| 2 | Add path containment check in file tools (validate against project root) | 1h | Critical security fix | +| 3 | Fix `OutputTruncator::truncate()` to use `mb_strcut()` instead of `substr()` | 15min | Prevents UTF-8 corruption | +| 4 | Fix tool result ordering in `ToolExecutor` to match original call order | 30min | Fixes LLM confusion | +| 5 | Add `PRAGMA busy_timeout=5000` to Database constructor | 1 line | Fixes concurrent process crashes | +| 6 | Set `maxAttempts` default to 3 in `RetryableLlmClient` or `LlmServiceProvider` | 1 line | Prevents infinite retry loops | +| 7 | Fix `reloadRepository()` to re-merge all YAML layers | 2h | Prevents config loss | +| 8 | Fix audio config to clone LLM client instead of mutating shared singleton | 30min | Prevents all-agent LLM corruption | +| 9 | Add modal mutex in `TuiModalManager` | 1h | Prevents deadlock | + +### P1 — Do Soon (Stability & UX) + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 10 | Consolidate triple 30fps timers into single tick with phase-aware dispatch | 4h | Performance, CPU reduction | +| 11 | Add signal handler in `AgentCommand` for cleanup on SIGINT/SIGTERM | 2h | Prevents resource leaks | +| 12 | Add `TerminalCapabilities` detection class | 3h | Enables light/dark, color depth, Unicode fallbacks | +| 13 | Move 30+ hardcoded ANSI codes to `Theme` palette methods | 4h | Color consistency, maintainability | +| 14 | Add `shell_kill` to read-only mode guard | 5min | Prevents state change in Ask/Plan mode | +| 15 | Fix `collectResult()` to use typed error detection instead of string prefix | 1h | Prevents false negatives | +| 16 | Add streaming output to ANSI renderer | 4h | Major UX improvement | +| 17 | Add `/help` command | 1h | Discoverability | +| 18 | Fix `PrismService` to pass through `reasoningContent` | 30min | Restores thinking content for Anthropic/Gemini | +| 19 | Add periodic cleanup timer for shell sessions | 1h | Prevents session leaks | +| 20 | Add AnsiTheogony skip mechanism (keypress detection) | 2h | UX — no more 80s unskippable animation | + +### P2 — Do Eventually (Code Quality) + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 21 | Split `SettingsCommand` (860 lines) into focused sub-commands | 8h | Maintainability | +| 22 | Split `AnsiTheogony` (2014 lines) into phase classes | 4h | Maintainability | +| 23 | Add integration test suite: agent loop, session persistence, permission flow | 8h | Test confidence | +| 24 | Implement config change notification system (events) | 4h | Settings propagation | +| 25 | Add `lazy()` resolution for LLM singletons to avoid stale config capture | 2h | Config freshness | +| 26 | Extract `wrapAnsiText()` to shared utility | 1h | DRY | +| 27 | Add depth limit to agent tree rendering | 30min | Safety | +| 28 | Cache `hasRipgrep()` result as static | 5min | Performance | +| 29 | Use hash map for `findTool()` instead of linear scan | 15min | Performance | +| 30 | Add `declare(strict_types=1)` to all files missing it | 2h | Type safety | + +--- + +## 13. Subagent Orchestration (Deep) + +**Files**: `src/Agent/SubagentOrchestrator.php` (665 lines), `SubagentFactory.php`, `SubagentTool.php`, `SubagentStats.php` + +#### Critical +- **`yieldSlot`/`reclaimSlot` slot leak for root agent**: Root agent (`id='root'`) never acquires a global semaphore lock. Each `reclaimSlot('root')` consumes a slot permanently. After N calls (concurrency limit), all slots are consumed → deadlock. (`SubagentOrchestrator.php:471-496`) +- **`wouldCreateCycle` crashes on pruned stats**: Accesses `$this->stats[$current]->dependsOn` without existence check. Pruned agents cause TypeError. (`SubagentOrchestrator.php:375`) +- **Shared `ContextBudget` across parent and all children**: All subagents at all depths share the same `ContextBudget` instance. Deep child compaction deducts from root's pool. (`SubagentFactory.php:87`) +- **Shared `ProtectedContextBuilder` — mutable state leak**: Child agents' protected context entries appear in parent's context too. (`SubagentFactory.php:101`) + +#### Important +- **`pruneCompleted` removes agents needed for dependency resolution**: New agents depending on pruned IDs get "Unknown dependency agent" errors. +- **Retry loop holds semaphore slot during delay**: Failing agent blocks a concurrency slot for 30+ seconds per retry. +- **Token double-counting during orchestrator-level retries**: Same stats object accumulates tokens across all retry attempts. Correct for total cost but misleading for per-attempt metrics. +- **`cancelAll()` does not clear `$this->cancellations`**: After cancel, array still references already-cancelled deferreds. + +#### Minor +- `autoIdCounter` not thread-safe (safe under Amp cooperative scheduling but undocumented). +- `extractFailureMessage` doesn't traverse full previous-exception chain. + +--- + +## 14. Error Handling & Resilience + +**Codebase-wide scan of exception patterns, catch blocks, and recovery logic.** + +#### Critical +- **No project-specific exception hierarchy**: Only 2 custom exceptions (`RetryableHttpException`, `IntroSkippedException`). All ~50+ other throws use bare `\RuntimeException` or `\InvalidArgumentException`. No `KosmokratorException` base class. +- **6 silently swallowed exceptions**: `TuiModalManager.php:343`, `TuiToolRenderer.php:363`, `DiffRenderer.php:539`, `UpdateChecker.php:132`, `SkillLoader.php:109`, `RetryableLlmClient.php:81` — all catch `\Throwable` with empty body or return, no logging. +- **Internal error messages leaked to LLM**: `$e->getMessage()` stored as assistant messages at `AgentLoop.php:288,312,518`, `ToolExecutor.php:313`, `AbstractTool.php:35`. No sanitization layer. Raw HTTP status codes, internal paths, provider details visible to the LLM. + +#### Important +- **~25 overly broad `\Throwable` catches**: Should catch specific types. Catches `Error`, `TypeError`, `ParseError` which indicate programming bugs, not runtime failures. +- **Missing exception types for 5+ failure domains**: LLM/API failures, file operations, auth/OAuth, shell sessions, patch parsing vs application. +- **`runHeadless()` has no `finally` block**: Unlike `run()`, headless agent crashes don't reset UI phase. + +#### Minor +- `SafeDisplay::call()` is an excellent pattern — prevents display errors from crashing execution. +- Tool error messages are generally well-crafted and actionable. + +--- + +## 15. Type Safety & PHP 8.4 Patterns + +**Codebase-wide scan of `declare(strict_types)`, return types, PHPStan config, modern PHP patterns.** + +#### Important +- **~20 files missing `declare(strict_types=1)`**: Most critically `AgentLoop.php`, `AsyncLlmClient.php`, all `Tool/Coding/` tools (BashTool, FileWriteTool, FileEditTool, FileReadTool), `Kernel.php`, `PrismService.php`. No dangerous implicit coercions found — all explicit casts — but policy inconsistency. +- **PHPStan level 5** with 30+ ignore rules: Some hide real issues (Container/Application type mismatch). Should target level 7-8. +- **No PHP 8.4 property hooks or asymmetric visibility used**: Project targets `^8.4` but only uses `readonly` and union types. +- **~80 `@var` annotations**: Indicates areas where PHP's type system can't express constraints natively. Consider value objects for common shapes. + +#### Minor +- All non-constructor methods have return type declarations — excellent. +- `mixed` return types only in 4 locations — all acceptable for generic config getters. +- `never` return type unused despite applicable exit() paths in CLI commands. + +--- + +## 16. Kernel Bootstrap & Service Wiring + +**Files**: `bin/kosmokrator`, `src/Kernel.php`, `src/Provider/*.php` + +#### Critical +- **No error handling during boot**: `bin/kosmokrator` has zero try-catch blocks. `Kernel::boot()` doesn't wrap provider loops. Partial initialization on failure. +- **No signal handling anywhere in codebase**: No `pcntl_signal`. Ctrl+C = unclean death — no session save, no DB cleanup, no child process termination. +- **`LlmServiceProvider::registerPrism()` resolves services eagerly**: `PrismManager` and `RelayRegistry` resolved immediately during registration, not lazily. Any construction error is immediately fatal. +- **Undefined env vars silently resolve to empty string**: `${MISSING_KEY}` → `''` instead of `null`. Provider may attempt API calls with empty string as key. +- **No config validation**: `temperature: "warm"` passes through to LLM clients unchecked. + +#### Important +- **Revolt error handler registered last in `boot()`**: Earlier async operations unprotected. +- **`DatabaseServiceProvider::boot()` performs file I/O**: `migrateYamlKeys()` reads/writes YAML during DI boot phase. Side-effect in boot is unexpected and risky. +- **Multiple config keys in code but absent from `kosmokrator.yaml`**: `max_tokens`, `audio_provider`, `audio_model`, `reasoning_effort`, etc. Defaults scattered across codebase. +- **`SettingsManager::reloadRepository()` re-parses all YAML on every write**: I/O-heavy, triggers on every `/set` command. + +#### Minor +- Version resolution uses `shell_exec('git describe')` on every boot — could cache. +- `LaravelApp` (full Application class) used as plain DI container — heavier than needed. +- No scoped/transient bindings — all services are singletons. + +--- + +## Updated Cross-Cutting Themes + +### 8. No Graceful Shutdown (Systemic) +- **No `pcntl_signal` handling anywhere**: Ctrl+C = immediate process death. +- No `finally` blocks in `runHeadless()`. +- No shutdown handlers for shell sessions. +- No `__destruct()` on resource-heavy services. +- **Fix**: Add `pcntl_signal(SIGINT, ...)` handler in `Kernel::boot()` that triggers coordinated cleanup. + +### 9. Exception Hygiene (Codebase-wide) +- Only 2 custom exceptions in 277 files. +- 6 silently swallowed `\Throwable` catches. +- Raw `$e->getMessage()` leaked to LLM in 5+ locations. +- ~25 overly broad catches that mask programming bugs. +- **Fix**: Create `KosmokratorException` hierarchy with 5-8 domain-specific types. Add error sanitization layer before LLM-facing messages. + +### 10. Shared Mutable State in Subagent Tree +- `ContextBudget` shared across all agent depths. +- `ProtectedContextBuilder` shared — child mutations leak to parent. +- `yieldSlot`/`reclaimSlot` slot leak for root agent. +- **Fix**: Clone these objects per-subagent rather than sharing references. + +--- + +## Updated Refactoring Backlog + +### P0 — Add to existing P0 list + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 31 | Fix `yieldSlot`/`reclaimSlot` for root agent: skip slot management for depth 0 | 1h | Prevents concurrency slot leak → deadlock | +| 32 | Clone `ContextBudget` and `ProtectedContextBuilder` per subagent | 2h | Prevents cross-agent context pollution | +| 33 | Add `KosmokratorException` base class + 5 domain subtypes | 3h | Enables proper catch granularity | +| 34 | Add error sanitization before LLM-facing messages | 2h | Prevents internal info leakage to LLM | +| 35 | Wrap `ensureSchema()` in transaction + add UNIQUE on schema_version | 30min | Prevents migration re-run bugs | +| 36 | Add `pcntl_signal` handler in Kernel for graceful shutdown | 4h | Systemic fix for resource leaks | + +### P1 — Add to existing P1 list + +| # | Refactoring | Effort | Impact | +|---|------------|--------|--------| +| 37 | Add existence check in `wouldCreateCycle` for pruned stats | 15min | Prevents TypeError crash | +| 38 | Log in all 6 silent `\Throwable` catches | 1h | Makes debugging possible | +| 39 | Bump PHPStan from level 5 to level 7 | 4h | Catches more type issues | +| 40 | Add `declare(strict_types=1)` to 20 missing files | 1h | Policy consistency | +| 41 | Add `pruneCompleted()` guard against in-use stats | 2h | Prevents "unknown dependency" errors | + +--- + +## Final Statistics + +| Dimension | Agents | Sub-agents | Critical | Important | Minor | +|-----------|--------|------------|----------|-----------|-------| +| AgentLoop Core | 1 | 4 | 4 | 11 | 5 | +| Subagent Orchestration | 1 | 4 | 5 | 6 | 4 | +| TUI Renderer | 1 | 5 | 8 | 15 | 8 | +| ANSI Renderer & Markdown | 1 | 4 | 2 | 8 | 8 | +| Tool System & Permissions | 1 | 4 | 5 | 9 | 8 | +| LLM Client Layer | 1 | 4 | 3 | 8 | 4 | +| Session & Database | 2 | 8 | 10 | 16 | 12 | +| Commands & Slash Commands | 1 | 4 | 4 | 11 | 9 | +| Settings & Configuration | 1 | 3 | 6 | 8 | 5 | +| Diff & UI Display | 1 | 3 | 3 | 10 | 6 | +| Power Commands & UX | 1 | 4 | 3 | 12 | 8 | +| Testing Coverage | 1 | 4 | 4 | 5 | 5 | +| Error Handling | 1 | 4 | 3 | 2 | 2 | +| Type Safety | 1 | 3 | 0 | 3 | 4 | +| Kernel Bootstrap | 1 | 3 | 5 | 4 | 3 | +| **Total** | **16** | **~62** | **65** | **128** | **91** | + +--- + +*Audit completed 2026-04-04. Generated by 16 parallel exploration agents spawning ~62 sub-agents across 20 audit dimensions. 284 total findings.* diff --git a/docs/ecosystem/kosmokrator/proposals/command-inspiration.md b/docs/ecosystem/kosmokrator/proposals/command-inspiration.md new file mode 100644 index 0000000..35ee4a7 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/command-inspiration.md @@ -0,0 +1,65 @@ +# Command Inspiration — From oh-my-claudecode + +Audit of oh-my-claudecode's 31 slash commands compared against KosmoKrator's existing 22. Candidates for future implementation, grouped by priority. + +## High-Value Additions + +### /autopilot +Full autonomous pipeline from idea to verified working code. 5 phases: Expand (clarify requirements) → Plan (architecture) → Execute (write code) → QA (test + verify) → Validate (acceptance criteria). User kicks it off and walks away. + +### /ralph (persistence loop) +"The boulder never stops." Keeps retrying a task until verified complete. PRD-driven verification — after each attempt, checks acceptance criteria. Mandatory architect review before marking done. Max retry limit prevents infinite loops. Named after Sisyphus's boulder — fits the mythology theme. + +### /trace +Evidence-driven investigative debugging. Generates competing hypotheses for a bug, ranks them by evidence weight, then runs discriminating probes (targeted searches/tests) to narrow down the root cause. Structured output: ranked explanations with confidence scores. + +### /deep-interview +Socratic requirements gathering before expensive work. Asks probing questions, scores ambiguity mathematically across dimensions (Goal, Constraints, Criteria, Context). Won't proceed until ambiguity drops below threshold (~20%). Uses challenge agents: Contrarian (pokes holes), Simplifier (finds simpler approaches), Ontologist (clarifies terms). Prevents wasted swarm runs on vague requests. + +### /deslop +AI slop cleaner — regression-safe deletion-first cleanup of AI-generated bloat. Reviews code for: unnecessary abstractions, over-engineering, dead code, excessive comments, unused error handling. Deletion-first workflow: remove before rewriting. Optional reviewer-only mode (reports but doesn't change). Natural complement to /unleash — clean up after the swarm. + +### /deepinit +One-shot comprehensive codebase documentation generator. Crawls entire project, generates hierarchical AGENTS.md-style docs across all directories. Useful for onboarding new contributors or giving AI agents better context. + +## Medium-Value Additions + +### /team +Staged pipeline with named roles: team-plan → team-prd → team-exec → team-verify → team-fix. Each stage is a specialized agent with handoff documents preserving decisions, alternatives, and risks between stages. Inter-agent messaging for coordination. + +### /ultraqa +Autonomous QA cycling: run tests → analyze failures → fix → re-run → verify. Repeats up to 5 cycles or until all tests pass. Useful after large refactors or /unleash runs. + +### /doctor +Self-diagnostic command. Checks: PHP version, extensions, config validity, provider API keys, database connectivity, TUI availability, dependency versions. Reports issues with suggested fixes. + +### /cancel +Gracefully cancel any active mode (autopilot, ralph, unleash) with intelligent state cleanup. Auto-detects what's running and tears it down properly. + +### /learner +Extract a reusable debugging pattern or technique from the current conversation. Quality-gated: only saves if the pattern is generalizable. Stores as a "skill" that can be referenced in future sessions. + +## Already Covered by KosmoKrator + +| OMC Command | KosmoKrator Equivalent | +|---|---| +| /plan, /ralplan | /plan (consensus planning could be added) | +| /ask | /ask | +| /setup | `kosmokrator setup` | +| /hud | Built-in status bar | +| /cancel | Ctrl+C cascading cancellation | +| /ultrawork (parallel execution) | /unleash + SubagentOrchestrator | +| /skill (memory/patterns) | /memories system | +| /external-context | Memory search + file tools | + +## OMC Patterns Worth Noting + +- **Mathematical ambiguity gating** — weighted scoring before execution prevents wasted work on vague requests +- **Handoff documents** — structured context preservation between pipeline stages (decisions, alternatives, risks) +- **Challenge agents** — Contrarian/Simplifier/Ontologist roles that stress-test plans before execution +- **Consensus planning** — Planner/Architect/Critic loop produces better plans than single-agent planning +- **Magic keywords** — trigger commands without `/` prefix (e.g. typing "autopilot" activates the pipeline) + +## Source + +Analysis based on: `/tmp/oh-my-claudecode/skills/*/SKILL.md` diff --git a/docs/ecosystem/kosmokrator/proposals/context-compaction.md b/docs/ecosystem/kosmokrator/proposals/context-compaction.md new file mode 100644 index 0000000..a4d5e48 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/context-compaction.md @@ -0,0 +1,142 @@ +# Context Compaction + +> Status: Historical plan. Parts of this design are now implemented, but this document remains a design snapshot rather than the canonical current-state description. + +## Context + +Long coding sessions hit the LLM context window limit. Currently `ConversationHistory::trimOldest()` silently drops complete turns — the agent loses context without knowing what was lost. We need intelligent compaction that summarizes old turns before discarding them, preserving critical context in a compressed form. + +Depends on: **Session Persistence (SQLite)** — compaction should be non-destructive, with original messages preserved in the database. + +## Two Triggers + +1. **Automatic** — After each LLM response, check `promptTokens >= contextWindow - buffer`. Buffer defaults to 20K tokens (configurable). Replaces `trimOldest()`. +2. **Manual** — `/compact` slash command for user-initiated compaction. + +## Algorithm + +``` +1. Check threshold: promptTokens >= (contextWindow - buffer) +2. Split history into OLD (everything before last 2-3 turns) and RECENT (preserved) +3. Prune: truncate large tool outputs (>1000 chars) in OLD to "[output truncated — N chars]" +4. Send OLD messages to LLM with compaction prompt +5. Replace OLD messages with a single SystemMessage containing the summary +6. Mark original messages as compacted in SQLite (non-destructive) +7. Continue — agent sees summary + recent turns +``` + +### Context After Compaction + +``` +[system prompt + instructions + environment] +[SystemMessage: summary of turns 1-15] <-- compacted +[user turn 16] <-- preserved (recent) +[assistant turn 16 + tool results] <-- preserved (recent) +[user turn 17] <-- current +``` + +### Compaction Prompt + +``` +Summarize the conversation above for a continuation agent. +Focus on information needed to continue the work seamlessly. + +Use this structure: +--- +## Goal +[What the user is trying to accomplish] + +## Key Decisions +[Important technical choices, constraints, user preferences] + +## Accomplished +[Work completed — specific file paths and changes made] + +## In Progress +[Current task and what remains to be done] + +## Relevant Files +[Files read, edited, or created — with brief notes on each] +--- +``` + +Compaction uses the same LLM client, no tools. The compaction agent is a hidden internal call. + +### Fallback + +If compaction itself overflows (conversation too large even for the summary call), fall back to `trimOldest()` as a last resort and log a warning. + +## Architecture + +### New: `src/Agent/ContextCompactor.php` + +```php +class ContextCompactor +{ + public function __construct( + private LlmClientInterface $llm, + private ModelCatalog $models, + private LoggerInterface $log, + private int $bufferTokens = 20_000, + ) {} + + public function needsCompaction(int $promptTokens, string $model): bool; + public function compact(ConversationHistory $history, int $keepRecent = 3): string; // returns summary +} +``` + +- `needsCompaction()` — checks threshold against context window +- `compact()` — builds the compaction prompt, calls LLM, returns summary text +- History replacement handled by `ConversationHistory::compact()` + +### Modified: `src/Agent/ConversationHistory.php` + +```php +public function compact(string $summary, int $keepRecent = 3): void; +// Replaces messages[0..n-keepRecent] with a SystemMessage containing the summary +// With SQLite: marks old messages as compacted, stores summary as a new message +``` + +### Modified: `src/Agent/AgentLoop.php` + +After each `run()` response: +```php +if ($this->compactor->needsCompaction($response->promptTokens, $this->getModelName())) { + $summary = $this->compactor->compact($this->history); + $this->ui->showNotice('Context compacted.'); +} +``` + +### Modified: `src/Command/AgentCommand.php` + +Add `/compact` slash command that triggers manual compaction. + +### Config + +```yaml +kosmokrator: + compaction: + auto: true # Enable automatic compaction + buffer: 20000 # Token buffer to reserve + keep_recent: 3 # Number of recent turns to preserve +``` + +## Differences from OpenCode + +| Aspect | OpenCode | KosmoKrator | +|--------|----------|-------------| +| Storage | SQLite, part-based | SQLite (once persistence added) | +| Pruning | Separate reversible pass | Inline truncation during compaction | +| Post-compact | Synthetic "continue" message | Normal flow continues | +| Summary stacking | Multiple summaries chain | One summary replaces all old | +| Destructive | No (DB keeps originals) | No (DB keeps originals, once SQLite added) | +| Fallback | Error on double-overflow | `trimOldest()` on double-overflow | + +## Verification + +1. Start a long session, watch token count climb in status bar +2. When threshold hit, auto-compaction fires — notice shown, status bar drops +3. Agent continues seamlessly — knows what was discussed +4. `/compact` works manually at any time +5. Summary includes file paths, decisions, and current task +6. Original messages preserved in SQLite (can be viewed later) diff --git a/docs/ecosystem/kosmokrator/proposals/context-management-redesign.md b/docs/ecosystem/kosmokrator/proposals/context-management-redesign.md new file mode 100644 index 0000000..12c32e8 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/context-management-redesign.md @@ -0,0 +1,705 @@ +# Context Management Redesign + +> Status: Proposal. This document expands the current context-management roadmap using patterns observed in `tmp/codex`, `tmp/claude-src`, `tmp/oh-my-claudecode`, and `tmp/opencode`. + +This is a forward-looking design document. It describes improvements beyond the current shipped pipeline and includes both recommended changes and optional experiments. + +## Why This Exists + +KosmoKrator already ships a layered context pipeline: + +- output truncation +- tool-result deduplication +- pruning +- LLM compaction +- oldest-turn trimming fallback + +That baseline works, but it still has structural weaknesses: + +- compaction boundaries are computed independently in multiple places +- subagents use a weaker context policy than the main agent +- token budgeting is coarse +- compaction produces a flat summary but does not preserve protected operating context as a first-class structure +- persistent memories exist, but recall remains fairly primitive + +The projects under `tmp/` show several stronger patterns: + +- explicit replacement-history compaction instead of summary-only compaction +- effective-context budgeting with reserved output headroom +- lightweight micro-pruning before expensive compaction +- tiered memory and selective recall +- transcript/session recall outside the live prompt +- stronger subagent-specific overflow handling +- better observability and failure guards + +## Scope + +This document covers all major ideas surfaced during the comparative review, not only the immediately recommended ones: + +1. unified compaction planning and replacement history +2. effective-context budgeting +3. protected context reinjection after compaction +4. micro-pruning before full compaction +5. truncation storage for oversized tool outputs +6. tiered persistent memory +7. selective memory recall +8. session-history recall/search +9. subagent-specific context policy +10. failure guards and circuit breakers +11. context-health observability +12. optional advanced heuristics and experiments + +## Current-State Problems + +### 1. Boundary Drift + +Compaction currently decides what to replace in more than one place: + +- `src/Agent/ContextCompactor.php` +- `src/Agent/ConversationHistory.php` +- `src/Session/SessionManager.php` + +This means in-memory replacement and persisted compaction can diverge if the rules change in one place but not another. + +### 2. Headless/Subagent Degradation + +The main interactive flow can compact. Headless flows only trim oldest turns. Subagents therefore have the least durable context policy even though they often do the most tool-heavy work. + +### 3. Coarse Token Estimation + +Current estimation is based on a flat character heuristic. That is good enough for rough checks but too weak for accurate budgeting around: + +- large tool outputs +- JSON-heavy tool calls +- code vs prose +- reserved output tokens +- model switches to smaller windows + +### 4. Flat Summary Replacement + +Compaction currently replaces old context with a single summary message. It does not explicitly preserve: + +- active mode +- current task tree +- current environment snapshot +- current parent brief for subagents +- any protected operator directives + +These may survive in practice, but they are not guaranteed. + +### 5. Memory Exists, Recall Is Underspecified + +KosmoKrator can persist memories, including memories derived from compaction summaries, but it does not yet separate memory classes cleanly or use a bounded relevance-selection flow. + +## External Patterns Worth Borrowing + +### Codex + +Observed in `tmp/codex/codex-rs`: + +- compaction creates explicit replacement history, not only a summary +- protected initial context can be re-injected around compaction +- compaction can trim oldest items during the compaction attempt itself if the compaction prompt overflows +- context limits are based on model metadata, not one global rule + +### Claude Code + +Observed in `tmp/claude-src`: + +- effective context window reserves output headroom +- warning, error, auto-compact, and blocking thresholds are distinct +- microcompact removes low-value tool payloads before full compaction +- memory recall scans cheap headers first and then selects top relevant files +- repeated auto-compact failure uses circuit breakers +- context-management health is surfaced to the user + +### oh-my-claudecode + +Observed in `tmp/oh-my-claudecode`: + +- notepad tiers: always-loaded context, working memory, and manual memory +- pre-compact reinjection of small project memory and directives +- session-history search over summaries and transcripts +- pending context injection queues for one-shot reinsertion + +### OpenCode + +Observed in `tmp/opencode`: + +- prune before full compaction +- full oversized tool output can be written to disk while only a preview remains inline +- compaction can replay a user turn in overflow scenarios +- plugin hooks can augment compaction prompts + +## Goals + +### Primary Goals + +- preserve continuity through long sessions without silent context loss +- make compaction deterministic and persistence-safe +- reduce unnecessary full compactions +- keep subagents viable in long-running trees +- improve cross-session recall without bloating the live prompt + +### Secondary Goals + +- improve user visibility into context health +- make behavior tunable per model +- leave room for experiments without destabilizing the core agent loop + +### Non-Goals + +- perfect token accounting matching provider internals exactly +- replacing live conversation with an external database-first retrieval system +- introducing a vector database or heavy semantic indexing in the first pass + +## Proposed Architecture + +### 1. Unified Compaction Plan + +Introduce a first-class `CompactionPlan` or `CompactionResult` object. Instead of each layer recomputing boundaries, one planner computes the exact replacement once and all consumers use that result. + +Suggested shape: + +```php +final class CompactionPlan +{ + public function __construct( + public readonly int $keepFromMessageIndex, + public readonly array $keptMessageIds, + public readonly array $compactedMessageIds, + public readonly string $summary, + public readonly array $replacementMessages, + public readonly array $extractedMemories, + public readonly int $tokensIn, + public readonly int $tokensOut, + public readonly array $stats, + ) {} +} +``` + +Responsibilities: + +- `ContextCompactor` computes the plan +- `ConversationHistory` applies `replacementMessages` +- `SessionManager` persists the exact `compactedMessageIds` and summary from the plan +- observability reads `stats` instead of re-deriving them + +Benefits: + +- removes duplicated boundary logic +- makes compaction persistence-safe +- allows richer replacement than a single summary message +- makes testing easier + +### 2. Replacement History, Not Only Summary + +Compaction should produce a replacement history that may contain: + +- one summary system message +- one protected reinjection block +- optionally one compact memory block +- then the recent untouched turns + +Instead of: + +```text +[summary] +[recent turns] +``` + +Prefer: + +```text +[protected operating context] +[summary of compacted history] +[selected recalled memory or pending brief] +[recent turns] +``` + +This follows the stronger Codex pattern and reduces accidental instruction loss. + +### 3. Effective Context Budgeting + +Replace a single percent-of-window rule with a richer model. + +Suggested per-model configuration: + +```yaml +agent: + context: + reserve_output_tokens: 16000 + warning_buffer_tokens: 24000 + auto_compact_buffer_tokens: 12000 + blocking_buffer_tokens: 3000 + auto_compact_enabled: true +``` + +Derived values: + +- `effective_context_window = model_context_window - reserve_output_tokens` +- `warning_threshold = effective_context_window - warning_buffer_tokens` +- `auto_compact_threshold = effective_context_window - auto_compact_buffer_tokens` +- `blocking_threshold = effective_context_window - blocking_buffer_tokens` + +Expected behavior: + +- warning state before auto-compact +- proactive micro-prune before full compaction +- hard-stop or forced emergency compaction near blocking +- recompute thresholds when switching models + +### 4. Improved Token Estimation + +Token estimation does not need to be exact, but it should be more structured. + +Suggested improvements: + +- separate estimation for prose, code, JSON, tool calls, and tool results +- conservative padding factor on rough estimates +- count system prompt, task tree, environment context, and injected memories explicitly +- track recent observed prompt-token deltas from provider responses and use them to calibrate future estimates + +Optional extension: + +- maintain lightweight rolling correction factors per provider/model pair + +### 5. Protected Context Reinjection + +After compaction, re-inject a small protected block that does not depend on the summary prompt remembering everything. + +Candidate contents: + +- current agent mode +- current cwd and repo root +- current branch if available +- active task tree +- current user constraints and instructions that must survive +- current parent brief for subagents +- current permission mode + +This block should be small, normalized, and rebuilt from runtime state rather than conversation text. + +Suggested class: + +```php +final class ProtectedContextBuilder +{ + public function buildMainAgentContext(...): array; + public function buildSubagentContext(...): array; +} +``` + +### 6. Micro-Pruning Before Full Compaction + +Add a cheap, deterministic pass before LLM compaction. + +Micro-prune targets: + +- old tool results +- old media/document payloads +- stale repeated file reads +- superseded grep/glob/search output +- tool results already represented by newer richer reads + +Progression: + +1. deduplicate +2. supersede stale reads +3. prune old low-value tool outputs +4. if still near limit, compact +5. if compaction fails, emergency trim or replay strategy + +This should be available in both interactive and headless flows. + +### 7. Progressive Tool Result Replacement + +Do not use only one placeholder shape. Use multiple progressively richer replacement formats depending on policy: + +- cleared: + `[Old tool result content cleared]` +- superseded: + `[Superseded by later file_read of /path/Foo.php]` +- structural summary: + `[file_read /src/Foo.php, 245 lines, class Foo with methods bar() and baz()]` +- truncation pointer: + `[Full output saved to .kosmokrator/truncation/tool_123; preview kept inline]` + +This preserves more semantic value than a uniform tombstone string. + +### 8. Truncation Storage for Oversized Outputs + +When a tool result is too large: + +- keep a bounded inline preview +- save the full payload to a local truncation store +- inject a pointer and usage hint +- let the agent or subagent inspect slices later using targeted reads/search + +Potential local storage: + +```text +.kosmokrator/truncation/ +``` + +Benefits: + +- keeps the live prompt compact +- preserves recoverability +- works well with grep/read-offset tools +- reduces pressure to keep huge shell and file-read output in memory + +### 9. Tiered Memory Model + +Split memory into three classes. + +#### Priority Context + +Always loaded. Very small. High-confidence durable constraints. + +Examples: + +- repository-specific invariants +- critical user workflow preferences +- known project hazards + +#### Working Memory + +Session-local or short-lived notes. Auto-pruned by age or staleness. + +Examples: + +- current investigation state +- active hypotheses +- recent but not durable discoveries + +#### Durable Memory + +Cross-session project, user, and decision memories. + +Examples: + +- architecture facts not obvious from code +- repeated user preferences +- prior technical decisions and rationale + +This is a stronger replacement for a single undifferentiated memory bucket. + +### 10. Selective Memory Recall + +Do not inject all memories. Add a bounded relevance-selection step. + +Flow: + +1. scan memory metadata cheaply +2. exclude already surfaced memories +3. exclude noisy reference material for tools already active +4. select top `K` memories for the current task +5. inject only short rendered snippets + +Implementation options: + +- start with SQLite metadata scan plus heuristic ranking +- optionally use a lightweight side-query model later + +Heuristic ranking signals: + +- memory type weight +- keyword overlap with user request and task tree +- recency or freshness +- prior usefulness +- explicit user pinning + +### 11. Session-History Recall/Search + +Move older context recovery out of the live prompt and into targeted recall. + +Capabilities: + +- search prior session titles +- search compaction summaries +- search prior full transcripts for the same project +- search prior subagent summaries + +This supports: + +- resuming interrupted work +- recovering prior decisions without keeping them resident +- starting a fresh thread with good recall + +Potential user-facing features: + +- `/recall ` +- `/sessions search ` +- automatic recall suggestions on `/resume` + +### 12. Subagent-Specific Context Policy + +Subagents should not share the exact same thresholds as the main agent. + +Subagent policy should include: + +- smaller effective context windows +- aggressive micro-pruning +- protected parent brief injected as a compact block +- compact-or-prune behavior in headless mode, not trim-only +- circuit breaker on repeated compaction failures + +Suggested rule of thumb: + +- main agent optimizes for continuity and broad recall +- subagents optimize for narrow task focus and fast turnover + +### 13. Failure Guards and Circuit Breakers + +Repeated auto-compaction failure should not thrash the model or the UI. + +Track: + +- consecutive compaction failures +- consecutive context-overflow errors +- emergency trims performed +- last successful compaction point + +Suggested behavior: + +- first failure: retry with more aggressive micro-prune +- second failure: compact with a smaller protected set +- third failure: enter circuit-breaker mode and stop automatic retries for a period +- expose the state to the user + +### 14. Compaction Prompt Extensibility + +Allow the compaction prompt to be augmented by internal providers or future plugin hooks. + +Possible uses: + +- domain-specific file summaries +- language-aware structural extraction +- project-specific compaction hints +- excluding noisy tool families + +This should be optional. The base compaction path must remain stable without external hooks. + +### 15. Replay-Aware Overflow Recovery + +When overflow is severe, consider replaying the current user turn against a freshly compacted history instead of repeatedly trimming the live thread. + +Use carefully: + +- useful when the latest turn is the important one +- dangerous if it hides prior context loss + +This is an optional advanced path, not a first-pass requirement. + +### 16. Background Consolidation + +Add a low-priority background process that periodically consolidates working memory into durable memory or small priority notes. + +Triggers may include: + +- idle time +- session count +- elapsed wall time +- after successful compaction + +Guardrails: + +- lock to avoid concurrent consolidators +- strict size budgets +- skip while the main loop is under active context pressure + +### 17. Context-Health Observability + +Expose context health explicitly in the UI and logs. + +Metrics to surface: + +- estimated prompt usage +- effective context window +- warning and compact thresholds +- tokens saved by dedup, prune, and compaction +- last compaction summary length +- consecutive compaction failures +- whether protected reinjection was applied +- memory items injected this turn + +Possible surfaces: + +- status bar +- `/context` or `/debug context` +- log events +- subagent dashboard integration + +## Optional Advanced Heuristics + +These are valuable, but should remain experimental until the deterministic foundation is stable. + +### 1. Semantic Importance Scoring + +Score tool results by importance and prune the lowest-value outputs first. + +Signals: + +- reference density +- decision influence +- tool-type weight +- downstream dependency + +This idea already exists in `docs/proposals/context-management-strategies.md` and remains compatible with this redesign. + +### 2. Sliding Context Tiers + +Apply different fidelity rules by age: + +- last 2 turns: full fidelity +- turns 3 to 5: summarized tool results +- turns 6+: cleared or superseded outputs + +This gives smoother degradation than a single hard compaction boundary. + +### 3. File Content Cache + +Cache file reads by `(path, mtime)` and replace repeated large reads with references rather than full content. + +### 4. Session Branching + +Let the user fork a long session into a fresh thread seeded by summary plus protected context. + +### 5. Model-Switch Compaction + +When switching to a smaller-window model, proactively compact before the next turn rather than waiting for an overflow condition. + +## Proposed Components + +### New or Expanded Runtime Components + +- `ContextBudget` + - computes effective windows and thresholds +- `CompactionPlanner` + - computes one `CompactionPlan` +- `ProtectedContextBuilder` + - builds non-conversational protected context blocks +- `MicroPruner` + - cheap deterministic context reduction +- `TruncationStore` + - persists oversized outputs for later targeted inspection +- `MemorySelector` + - bounded recall over stored memories +- `SessionRecall` + - search interface over summaries and transcript metadata +- `ContextTelemetry` + - status and observability layer + +### Existing Components To Refactor + +- `ContextManager` + - orchestrates threshold checks and policy decisions +- `ContextCompactor` + - becomes planner plus summarizer instead of summary-only helper +- `ConversationHistory` + - applies replacement history from a plan instead of recomputing a boundary +- `SessionManager` + - persists plan outputs directly +- `TokenEstimator` + - upgraded or wrapped by `ContextBudget` +- `SubagentFactory` + - provides headless agents with a real context policy + +## Data Model Changes + +Potential persistence additions: + +- compaction records table or extended message metadata: + - compacted message ids + - summary text + - saved tokens + - failure count + - protected-context metadata +- memory metadata: + - class: `priority`, `working`, `durable` + - pinned flag + - last surfaced time + - freshness score +- truncation store metadata: + - path + - source tool + - byte size + - retention expiry + +## Suggested Rollout Phases + +### Phase 1: Deterministic Foundation + +- unify compaction planning +- remove duplicated boundary logic +- add effective-context budgeting +- expose context-health metrics internally + +### Phase 2: Cheap Context Wins + +- strengthen micro-pruning +- add richer supersede placeholders +- add truncation storage +- enable better headless/subagent policy + +### Phase 3: Continuity and Recall + +- protected context reinjection +- tiered memory model +- selective memory recall +- session-history recall/search + +### Phase 4: Advanced Behaviors + +- circuit breakers +- replay-aware overflow recovery +- background consolidation +- prompt hooks +- semantic importance scoring +- sliding context tiers + +## Tradeoffs + +### Benefits + +- more reliable long-session continuity +- lower chance of drift between in-memory and persisted state +- fewer unnecessary LLM compaction calls +- better subagent stability +- better cross-session recall + +### Costs + +- more moving parts in the agent loop +- more metadata to persist and test +- more policy complexity per model and per agent type +- more UI/state concepts for debugging + +### Main Risk + +The main risk is overengineering before the deterministic base is fixed. The correct order is: + +1. unify compaction planning +2. improve budgeting and pruning +3. add reinjection and recall +4. add heuristic and background systems + +## Recommended First Implementation Slice + +Even though this document covers the full idea set, the best first slice is still: + +1. `CompactionPlan` as the single source of truth +2. effective-context budgeting +3. micro-prune in both main and headless flows +4. protected context reinjection + +That sequence improves correctness first and opens the door for the rest. + +## Relationship to Existing Docs + +- `docs/architecture/overview.md` remains the current-state document +- `docs/proposals/context-compaction.md` is a historical snapshot of the first compaction design +- `docs/proposals/context-management-strategies.md` remains a useful experimental appendix for heuristics like semantic importance scoring and sliding tiers + +This document is intended to become the main future-state reference for context-management redesign work. diff --git a/docs/ecosystem/kosmokrator/proposals/context-management-strategies.md b/docs/ecosystem/kosmokrator/proposals/context-management-strategies.md new file mode 100644 index 0000000..1668cb0 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/context-management-strategies.md @@ -0,0 +1,49 @@ +# Context Management Strategies + +> Status: Proposal. This document describes possible future improvements beyond the context-management pipeline that currently ships. + +Future improvements to KosmoKrator's context management beyond the current three-layer system (truncation → pruning → compaction). + +## 1. Semantic Importance Scoring + +Score each tool result (0.0–1.0) to decide what to prune first. Four signals, no LLM call: + +- **Reference density** — How much of the tool result did the assistant actually quote/use in its response? Split result into lines, count how many appear in the assistant's text. High overlap = load-bearing. +- **Decision influence** — Did the assistant make a decision citing this result? Detect decision language ("the issue is", "I'll use", "based on") + file path/tool name in the following assistant message. +- **Tool type weight** — Static: `bash` 0.7 (irreproducible), `grep` 0.5 (re-searchable), `file_read` 0.3 (on disk), `glob` 0.1 (trivial to redo), `file_write`/`file_edit` 0.2 (just confirmations). +- **Downstream dependency** — Did values from this result appear in arguments of later tool calls? (grep finds path → file_read uses that path). Breaking the chain loses reasoning context. + +Combined score: `0.3 × reference + 0.25 × decision + 0.25 × type + 0.2 × dependency` + +Pruner sorts candidates by score ascending, prunes lowest-value first until it hits the savings target. High-importance results survive even if old. + +## 2. Tool Result Deduplication + +The LLM frequently re-reads the same file (read → edit → read to verify). Each re-read dumps redundant content into context. + +Three tiers: + +- **Exact duplicate** — Same tool + same args + same result → replace older with `[superseded — same content returned by later call]` +- **Same-file re-read** — `file_read` same path, different offset/limit or after `file_edit` on that path → old content is stale, supersede it +- **Semantic overlap** — `grep` returns lines from `foo.php`, then `file_read foo.php` returns those same lines plus more → grep result is now a subset, replace with `[content included in later file_read of foo.php]` + +Runs eagerly after each tool call (before adding to history, scan backwards for matches). Detection is a hash lookup + string comparison — microseconds. + +The supersede message preserves the *fact* that the read happened (the LLM knows the file was relevant) without the *content* (which exists in the newer result). + +### How They Combine + +Dedup runs first as a cheap pass (always safe). Then importance scoring handles the rest — pruner removes lowest-scored results first. Together they form a priority queue: + +1. Duplicates → always prune +2. Low-importance results (low reference density, no decision influence) → prune when over budget +3. High-importance results → survive until compaction +4. Protected recent turns (last 2 user messages) → never pruned + +## 3. Other Ideas (Not Yet Designed) + +- **Progressive summarization** — Instead of `[cleared]`, replace with a heuristic summary: `[file_read /src/Foo.php: 245 lines, PHP class with methods bar(), baz()]`. No LLM call, just structural extraction. +- **Pre-flight context budget** — Before sending to LLM, estimate prompt size via `TokenEstimator` and proactively prune/compact. Avoids wasted API calls. +- **Sliding context tiers** — Last 2 turns: full fidelity. Turns 3-5: tool results summarized. Turns 6+: tool results cleared, assistant responses truncated. Graceful degradation instead of a cliff. +- **File content caching** — Store file reads in a local cache keyed by `path:mtime`. Replace tool result with compact reference. Re-read from cache instead of re-reading from disk. +- **Session branching** — `/branch` snapshots the current session and starts fresh with just a summary. Old session preserved intact and resumable. diff --git a/docs/ecosystem/kosmokrator/proposals/desktop-app.md b/docs/ecosystem/kosmokrator/proposals/desktop-app.md new file mode 100644 index 0000000..6b51825 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/desktop-app.md @@ -0,0 +1,296 @@ +# KosmoKrator Desktop App + +> Status: Proposal. This document describes a possible future desktop surface. The current shipped product is the terminal application. + +## Concept + +KosmoKrator is one product with two surfaces: terminal and desktop. The desktop app is not a companion or wrapper — it runs the same engine (AgentLoop, PrismService, ToolRegistry, Lua bridge, MCP client) with a GUI renderer instead of ANSI/TUI. + +``` + KosmoKrator (the engine) + ├── Kernel, ConfigLoader + ├── AgentLoop, PrismService + ├── ToolRegistry, Lua bridge + ├── MCP client + └── Integration loader + │ + ┌──────────┴──────────┐ + │ │ + CLI surface Desktop surface + (bin/kosmokrator) (NativePHP app) + │ │ + Symfony Console Electron window + + TUI renderer + web renderer + │ │ + terminal system tray + on-demand always-on + notifications + OAuth flows + global shortcuts +``` + +The split happens at the UI layer. `RendererInterface` already abstracts rendering — `AnsiRenderer`, `TuiRenderer`, and the desktop app adds a third: a web-based renderer that pushes events to the Electron frontend. + +--- + +## Why NativePHP + +NativePHP wraps a Laravel app in Electron (desktop) or native shells (mobile). It bundles its own static PHP binary — users install one app, no PHP or Node required. + +KosmoKrator's engine already boots an Illuminate Container (via Prism's transitive `laravel/framework` dependency). The desktop surface wraps this in a minimal Laravel HTTP layer that NativePHP can host, while the core engine remains framework-agnostic. + +**NativePHP provides what terminals and browsers can't:** + +| Capability | Terminal | Browser | Desktop (NativePHP) | +|-----------|----------|---------|---------------------| +| System tray (always-on) | No | No | Yes | +| Native notifications | No | Limited | Yes | +| Global shortcuts | No | No | Yes | +| OAuth redirect flows | Painful (copy-paste) | Callback URL complexity | Native redirect URI | +| File dialogs | CLI path input | Browser picker | Native OS picker | +| Deep linking | No | URL only | Custom protocol (`kosmokrator://`) | +| Auto-updater | Manual | N/A | Built-in OTA | +| Offline-first | Yes | No | Yes | + +--- + +## Architecture + +### Shared Core (framework-agnostic) + +``` +src/ +├── Kernel.php # Boots Illuminate Container + config +├── ConfigLoader.php # YAML → Config Repository +├── Agent/ +│ ├── AgentLoop.php # Core loop: prompt → LLM → tools → loop +│ ├── ConversationHistory.php +│ └── Middleware/ +├── LLM/ +│ └── PrismService.php # Prism wrapper, provider failover +├── Tool/ +│ ├── ToolInterface.php +│ ├── ToolRegistry.php +│ └── Coding/ # Built-in tools +├── Lua/ +│ ├── LuaSandboxService.php +│ ├── LuaBridge.php +│ └── LuaApiDocGenerator.php +├── Mcp/ +│ └── McpClient.php +├── Integration/ +│ ├── IntegrationLoader.php +│ └── YamlCredentialResolver.php +└── Session/ + ├── Session.php + └── SessionStore.php +``` + +This is the engine. It has no opinion about rendering. + +### CLI Surface (Symfony Console) + +``` +bin/kosmokrator +src/ +├── Command/ +│ └── AgentCommand.php # REPL loop +└── UI/ + ├── RendererInterface.php + ├── UIManager.php + ├── Ansi/AnsiRenderer.php + └── Tui/TuiRenderer.php +``` + +### Desktop Surface (NativePHP + Electron) + +``` +desktop/ +├── app/ +│ ├── Providers/ +│ │ └── NativeAppServiceProvider.php # NativePHP window, menu, tray +│ ├── Http/ +│ │ └── Controllers/ +│ │ └── AgentController.php # WebSocket bridge to AgentLoop +│ └── Renderers/ +│ └── WebRenderer.php # RendererInterface → WebSocket events +├── resources/ +│ ├── views/ # Blade/Vue frontend +│ └── js/ +│ ├── app.js +│ └── components/ +│ ├── ConversationView.vue # Chat UI +│ ├── ToolCallPanel.vue # Tool execution display +│ ├── IntegrationManager.vue # OAuth flows, credential management +│ └── StatusBar.vue # Model, tokens, cost +├── routes/ +│ └── web.php +├── composer.json # Requires kosmokrator/kosmokrator + nativephp/desktop +└── package.json # Frontend deps (Vue, Tailwind, etc.) +``` + +The desktop surface is a thin Laravel app that: +1. Boots the shared KosmoKrator Kernel +2. Creates a `WebRenderer` implementing `RendererInterface` +3. Pushes render events (thinking, streaming, tool calls) over WebSocket to the Vue frontend +4. Receives user input from the frontend and feeds it to `AgentLoop` +5. NativePHP handles the Electron shell, system tray, notifications, etc. + +### WebRenderer + +```php +class WebRenderer implements RendererInterface +{ + public function showThinking(): void + { + broadcast(new AgentEvent('thinking')); + } + + public function streamChunk(string $text): void + { + broadcast(new AgentEvent('chunk', ['text' => $text])); + } + + public function showToolCall(string $name, array $args): void + { + broadcast(new AgentEvent('tool_call', ['name' => $name, 'args' => $args])); + } + + public function showToolResult(string $name, string $output, bool $success): void + { + broadcast(new AgentEvent('tool_result', [ + 'name' => $name, + 'output' => $output, + 'success' => $success, + ])); + } + + // ... etc +} +``` + +Same `RendererInterface`, just broadcasting instead of printing ANSI codes. + +--- + +## OpenCompany Connection + +OpenCompany is an optional cloud backend — not required, not a separate product in this context. + +```yaml +# ~/.kosmokrator/config.yaml +opencompany: + enabled: true + url: https://my-instance.opencompany.app + api_key: sk-... +``` + +### When Connected + +- Pulls available hosted integrations (ClickUp, Google, etc.) +- Syncs integration credentials (no local OAuth needed for already-configured integrations) +- Proxies tool calls for hosted-mode integrations +- Syncs sessions/conversation history (optional) +- Access to OpenCompany's vector memory and knowledge base + +### When Disconnected + +- Full local operation — same agent, same built-in tools, same Lua bridge +- Local integrations work (credentials in `~/.kosmokrator/integrations.yaml`) +- Local LLM via Ollama works +- MCP servers work +- Zero degradation for core coding agent functionality + +The desktop app is KosmoKrator first, OpenCompany-connected second. + +--- + +## Desktop-Specific Features + +### System Tray + +Agent lives in the system tray. Click to open conversation window. Badge shows when agent needs attention (tool approval, error, completion). + +### Native Notifications + +``` +┌─────────────────────────────────┐ +│ KosmoKrator │ +│ ✓ Refactor complete — 4 files │ +│ changed, all tests passing │ +└─────────────────────────────────┘ +``` + +Notifications for: agent completion, tool approval requests, errors, integration connection status. + +### Global Shortcuts + +Summon KosmoKrator from any application: + +``` +Cmd+Shift+K → opens KosmoKrator window with prompt focused +``` + +Quick-action mode: type a command, hit enter, window minimizes back to tray. + +### OAuth Integration Flows + +The desktop app owns a real redirect URI (`kosmokrator://oauth/callback`). Adding integrations: + +1. Click "Add Gmail" +2. Browser opens Google OAuth consent screen +3. Google redirects to `kosmokrator://oauth/callback?code=...` +4. NativePHP's deep linking catches it +5. Tokens stored in credential resolver +6. Done — no copy-paste, no localhost callback server + +### File Context + +Native file picker for attaching context to conversations: + +``` +[Attach File] → OS file dialog → selected file added to conversation +``` + +Also: drag-and-drop files onto the conversation window. + +### Auto-Updater + +Ship updates via GitHub Releases. The app checks and updates silently in the background. Users always have the latest version without manual intervention. + +--- + +## Package Structure + +The desktop app is a separate Composer package that depends on the core: + +```json +{ + "name": "kosmokrator/desktop", + "require": { + "kosmokrator/kosmokrator": "^1.0", + "nativephp/desktop": "^2.0", + "laravel/framework": "^13.0" + } +} +``` + +The core `kosmokrator/kosmokrator` package remains CLI-first and framework-agnostic. The desktop package adds the Laravel HTTP layer and NativePHP integration on top. + +This means: +- `composer global require kosmokrator/kosmokrator` → CLI agent +- Download KosmoKrator.app → desktop agent (bundles everything) +- Same engine, same config, same sessions, same integrations + +--- + +## Rendering Surfaces Summary + +| Surface | Renderer | Input | Output | Runtime | +|---------|----------|-------|--------|---------| +| Terminal (ANSI) | `AnsiRenderer` | readline | ANSI escape codes | `php bin/kosmokrator` | +| Terminal (TUI) | `TuiRenderer` | Symfony TUI InputWidget | TUI widgets + Revolt | `php bin/kosmokrator` | +| Desktop | `WebRenderer` | Vue frontend via WebSocket | Electron BrowserWindow | NativePHP (bundled PHP) | +| *(future)* Mobile | `MobileRenderer` | Native UI via EDGE | Swift/Kotlin shell | NativePHP Mobile | + +All implement `RendererInterface`. The engine doesn't know which surface it's running on. diff --git a/docs/ecosystem/kosmokrator/proposals/ecosystem-architecture.md b/docs/ecosystem/kosmokrator/proposals/ecosystem-architecture.md new file mode 100644 index 0000000..87eab22 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/ecosystem-architecture.md @@ -0,0 +1,441 @@ +# KosmoKrator Ecosystem Architecture + +> Status: Proposal. This document outlines a future ecosystem architecture around Lua, MCP, and shared integrations. These capabilities are not fully implemented in the current CLI. + +## Overview + +KosmoKrator is not just a CLI coding agent — it's a runtime that can host any tool ecosystem via Lua code execution and MCP. It shares a tool ecosystem with OpenCompany, a self-hosted AI collaboration platform. + +``` + opencompanyapp/integration-core + (framework-agnostic contracts) + │ + opencompanyapp/integration-* + (ClickUp, Google, Plausible, ...) + │ + ┌───────────────┼───────────────┐ + │ │ + OpenCompany KosmoKrator + (web platform) (the engine) + │ │ + LuaBridge ┌─────────┼─────────┐ + │ │ │ │ + LuaSandbox CLI Desktop (Mobile) + (PECL ext) terminal NativePHP future + │ ANSI/TUI Electron + MCP Client │ │ + LuaBridge LuaBridge + │ │ + LuaSandbox LuaSandbox + │ │ + MCP Client MCP Client +``` + +KosmoKrator is one engine with multiple surfaces. Tools are written once as Composer packages. OpenCompany is an optional cloud backend for hosted integrations. See `docs/proposals/desktop-app.md` for the desktop surface architecture. + +--- + +## Lua Code Mode + +### The Problem with JSON tool_use + +Traditional tool calling requires one LLM round-trip per tool invocation. A task like "find all PHP files with TODOs and list them" needs: glob → read file 1 → read file 2 → ... → read file N. That's N+1 round-trips, each costing tokens and latency. + +### The Solution: LLM Writes Lua + +Instead of N sequential JSON tool_use blocks, the LLM writes a single Lua program: + +```lua +local files = app.glob({pattern = "src/**/*.php"}) +local results = {} +for _, f in ipairs(files) do + local content = app.read_file({path = f}) + if content:find("TODO") then + table.insert(results, f) + end +end +return results +``` + +One round-trip. One tool call (`execute_lua`). The LLM gets composability, loops, conditionals, variables — all the things that make code more expressive than structured JSON. + +### Evidence This Works + +| Source | Finding | +|--------|---------| +| Anthropic engineering blog | 98.7% token reduction vs JSON tool_use | +| Cloudflare Code Mode | 99.9% token reduction for large API surfaces | +| CodeAct (ICML 2024) | 20% higher success rate, 30% fewer turns | +| Anthropic "Code execution with MCP" | Explicitly advocates agents writing code to call MCP tools | + +### Why Lua Specifically + +- **Designed for embedding**: Smallest footprint of any mainstream scripting language. Built from day one to be embedded in host applications. +- **Easy to sandbox**: Remove `io`, `os`, `debug`, `package`, `loadfile` and the language physically cannot touch the filesystem or network. Only whitelisted functions are available. +- **Simple syntax**: No indentation sensitivity (Python), no prototype chains (JS). LLMs generate valid Lua reliably. +- **Stable**: Lua 5.1 hasn't changed since 2006. The attack surface is well-studied. +- **Familiar**: Config language for Neovim, scripting language for games. LLMs have seen plenty of it in training. + +### Runtime: LuaSandbox PECL Extension + +The `luasandbox` PECL extension, developed by Wikimedia for MediaWiki's Scribunto module, runs user-supplied Lua on Wikipedia at massive scale. It is purpose-built for untrusted code. + +**Security model (whitelist, not blacklist):** +- `setMemoryLimit(int $bytes)` — hard kill on exceed +- `setCPULimit(float $seconds)` — hard kill on exceed (includes PHP callback time) +- `registerLibrary(string $name, array $functions)` — expose specific PHP functions to Lua +- Blocks by default: `dofile()`, `loadfile()`, `io.*`, `os.*`, `debug.*`, `package.*`, `require()`, `load()`, `loadstring()`, `print()`, `string.dump()`, `collectgarbage()`, `coroutine` + +Only what you explicitly register is available. Everything else is inaccessible. + +### Self-Discoverable API + +The LLM doesn't need every tool schema in its system prompt. Instead: + +```lua +-- LLM can discover what's available at runtime +local all = docs() -- list all namespaces +local gmail = docs("app.gmail.work") -- list tools for this account +local detail = docs("app.gmail.work.send_message") -- full schema + examples +``` + +API docs are auto-generated from tool schemas by `LuaApiDocGenerator`. This keeps the system prompt small while giving the LLM access to arbitrarily large tool surfaces. + +### Fallback to Standard tool_use + +Lua code mode is not all-or-nothing. Simple single-tool calls can still use standard JSON tool_use. The LLM chooses: quick read → `tool_use`, complex multi-step → `execute_lua`. Both paths coexist. + +--- + +## MCP Integration + +### KosmoKrator as MCP Client + +KosmoKrator connects to external MCP servers, discovers their tools, and makes them available to the LLM — either as standard tool_use or as Lua functions in the sandbox. + +``` +MCP Server (external) + │ + ├── listTools() → discover available tools + │ + └── callTool(name, args) → execute and return result + │ +KosmoKrator MCP Client + │ + ├── Register as Lua functions: app.mcp.{server}.{tool}() + │ + └── Or expose as standard tool_use definitions +``` + +**Transport options:** +- **stdio**: MCP server runs as a child process (ideal for local tools) +- **HTTP + SSE**: Remote MCP servers (ideal for hosted OpenCompany tools) + +**PHP MCP client options:** +- `modelcontextprotocol/php-sdk` — official, maintained by PHP Foundation + Symfony +- `php-mcp/client` — fluent builder, sync facade +- `swisnl/mcp-client` — SSE, stdio, streamable HTTP + +### KosmoKrator as MCP Server + +KosmoKrator can also expose its own tools (file read/write, bash, glob, grep, git) as an MCP server. This allows other AI applications (Claude Desktop, IDE extensions, other agents) to use KosmoKrator's capabilities. + +### Lua + MCP Bridge + +The key innovation: MCP tools are registered as Lua functions in the sandbox. The LLM writes Lua that calls MCP tools, composes results, and handles logic — all in a single execution: + +``` +LLM writes Lua code + → KosmoKrator's Lua sandbox executes it + → Lua calls app.mcp.github.list_issues({repo = "..."}) + → KosmoKrator routes to GitHub MCP server + → Result returns to Lua as a table + → Lua filters, transforms, calls more tools + → Final result returns to the LLM +``` + +The LLM doesn't know or care whether a tool is local, an MCP server, or a hosted OpenCompany integration. The Lua namespace is the universal interface. + +--- + +## OpenCompany Tool Ecosystem + +### Existing Tool Packages + +OpenCompany has 15+ AI tool packages as standalone Composer packages under the `opencompanyapp` vendor: + +| Package | Tools | Description | +|---------|-------|-------------| +| `ai-tool-clickup` | 17 | Tasks, lists, folders, docs, time tracking, chat | +| `ai-tool-google` | 10+ | Calendar, Gmail, Drive, Contacts, Sheets, Search Console, Tasks, Analytics, Docs, Forms | +| `ai-tool-plausible` | 5+ | Web analytics queries, realtime visitors, sites, goals | +| `ai-tool-ticktick` | 5+ | Task management, projects, priorities | +| `ai-tool-mermaid` | 1 | Diagram rendering (flowcharts, sequences, ER, Gantt, etc.) | +| `ai-tool-plantuml` | 1 | UML diagram rendering | +| `ai-tool-typst` | 1 | Document typesetting | +| `ai-tool-vegalite` | 1 | Data visualization / charts | +| `ai-tool-coingecko` | 3+ | Cryptocurrency market data | +| `ai-tool-exchangerate` | 2+ | Currency conversion (340+ currencies) | +| `ai-tool-worldbank` | 3+ | Economic indicators for 200+ countries | +| `ai-tool-trustmrr` | 2+ | Startup revenue/MRR data | +| `ai-tool-celestial` | 6+ | Moon phases, sunrise/sunset, planet positions, eclipses | + +### Current Architecture Problem + +Today, every tool implements `Laravel\Ai\Contracts\Tool` — a hard dependency on laravel/ai: + +``` +ai-tool-clickup → integration-core → laravel/ai +``` + +This means KosmoKrator (which uses Prism directly, not laravel/ai) cannot use these packages without pulling in the full Laravel AI SDK. + +**However**, the actual business logic in each package (e.g., `ClickUpService`, `PlausibleService`) is framework-agnostic. The laravel/ai coupling is only in the thin tool wrapper layer (schema definition + handle method). + +### Refactored Architecture (Option C) + +Split `integration-core` into two packages: + +``` +opencompanyapp/integration-core (framework-agnostic) +├── Contracts/ +│ ├── Tool ← OWN interface, not laravel/ai's +│ │ ├── name(): string +│ │ ├── description(): string +│ │ ├── parameters(): array ← JSON Schema array +│ │ └── execute(array $args): ToolResult +│ ├── ToolProvider +│ ├── CredentialResolver +│ ├── ConfigurableIntegration +│ ├── AgentFileStorage +│ └── ProvidesLuaDocs +├── Support/ +│ ├── ToolResult ← Value object +│ ├── ConfigCredentialResolver +│ └── ToolProviderRegistry +└── composer.json ← NO laravel/ai dependency +``` + +No bridge package needed. Vendor package tools are Lua-only — they're never passed to the laravel/ai agent loop. Built-in tools (tasks, system, agents, memory, lua) still implement `Laravel\Ai\Contracts\Tool` directly. `LuaBridge` and `getToolCatalog()` use a dual-dispatch `instanceof` check to handle both tool types. + +**Result:** +- All tool packages depend only on `integration-core` (no laravel/ai) +- OpenCompany's built-in tools keep their `Laravel\Ai\Contracts\Tool` implementation +- KosmoKrator uses the tools natively through its own `ToolInterface` +- Tool packages become truly framework-agnostic + +--- + +## Dual-Mode Integrations: Local vs Hosted + +Users can run tool integrations in two modes: + +### Local Mode + +The tool package runs inside KosmoKrator's process. Credentials are stored locally. API calls go directly from the user's machine to the external service. + +``` +KosmoKrator → ClickUpService → ClickUp API +``` + +### Hosted Mode (OpenCompany) + +The tool runs on the user's OpenCompany instance. KosmoKrator sends requests to OpenCompany's API, which proxies to the external service. Credentials are managed in OpenCompany's encrypted storage. + +``` +KosmoKrator → OpenCompany API → ClickUpService → ClickUp API +``` + +Hosted mode is effectively MCP over HTTP — OpenCompany acts as an MCP server for its configured integrations. This means: + +- Users who already have OpenCompany with configured integrations can use them from KosmoKrator immediately +- No need to re-enter credentials or set up OAuth flows locally +- OpenCompany handles token refresh, rate limiting, and credential rotation +- KosmoKrator just needs an API key for the OpenCompany instance + +### From the Lua bridge perspective, both modes are identical + +```lua +-- User doesn't know or care whether this is local or hosted +app.gmail.work.send_message({ + to = "team@example.com", + subject = "Deploy complete", + body = "All tests passed." +}) +``` + +The credential resolver and transport layer handle the routing transparently. + +--- + +## Multi-Account Support + +Users can configure multiple accounts for the same provider. Each account gets a user-defined alias that becomes its namespace. + +### Configuration + +```yaml +# ~/.kosmokrator/integrations.yaml + +gmail: + work: + mode: local + credentials: + client_id: "..." + client_secret: "..." + refresh_token: "..." + personal: + mode: hosted + opencompany_key: "sk-..." + account_id: "acc_abc123" + +clickup: + default: + mode: local + credentials: + api_token: "..." + +clickup: + freelance: + mode: local + credentials: + api_token: "..." # different workspace +``` + +### Lua Namespace + +The namespace pattern is `app.{provider}.{alias}.{tool}`: + +```lua +-- Two Gmail accounts +app.gmail.work.send_message({to = "cto@company.com", ...}) +app.gmail.personal.list_messages({query = "is:unread"}) + +-- Two ClickUp workspaces +app.clickup.default.create_task({list_id = "...", name = "..."}) +app.clickup.freelance.get_tasks({list_id = "..."}) +``` + +### Architecture + +The `ToolProvider` yields multiple named instances instead of a flat tool list. Each instance carries: + +- **Alias**: user-defined label (`work`, `personal`, `freelance`) +- **Mode**: `local` or `hosted` +- **Credential scope**: isolated credentials per instance +- **Endpoint**: direct API URL (local) or OpenCompany API URL (hosted) + +```php +// CredentialResolver is scoped to the instance +$resolver->get('gmail:work', 'client_id'); // local credentials +$resolver->get('gmail:personal', 'api_token'); // proxied to OpenCompany +``` + +The Lua bridge registers functions per instance: + +```php +$sandbox->registerLibrary('app.gmail.work', [ + 'send_message' => fn($args) => $this->execute('gmail', 'work', 'send_message', $args), + 'list_messages' => fn($args) => $this->execute('gmail', 'work', 'list_messages', $args), +]); + +$sandbox->registerLibrary('app.gmail.personal', [ + 'send_message' => fn($args) => $this->execute('gmail', 'personal', 'send_message', $args), + 'list_messages' => fn($args) => $this->execute('gmail', 'personal', 'list_messages', $args), +]); +``` + +### Setup Flow + +When a user wants to add an integration: + +``` +$ kosmokrator integrations add gmail + +? Alias for this account: work +? Mode: (local / hosted) + > local + +? Client ID: xxxxxxxx +? Client Secret: xxxxxxxx +? Starting OAuth flow... (opens browser) +✓ Gmail "work" configured. + +Lua namespace: app.gmail.work.* +Available tools: send_message, list_messages, search_messages, ... +``` + +Or for hosted mode: + +``` +$ kosmokrator integrations add gmail + +? Alias for this account: personal +? Mode: (local / hosted) + > hosted + +? OpenCompany API key: sk-xxxxxxxx +? Select account from OpenCompany: + 1. personal@gmail.com (Gmail) + 2. work@company.com (Gmail) + > 1 +✓ Gmail "personal" configured (hosted via OpenCompany). + +Lua namespace: app.gmail.personal.* +``` + +--- + +## Putting It All Together + +### The Full Stack + +``` +┌─────────────────────────────────────────────────────────┐ +│ LLM Layer │ +│ Prism-PHP → Anthropic, OpenAI, Ollama, ... │ +│ Provider failover, streaming, tool_use + Lua code mode │ +└────────────────────────┬────────────────────────────────┘ + │ +┌────────────────────────┴────────────────────────────────┐ +│ Agent Loop │ +│ Conversation history, middleware pipeline, │ +│ event dispatch (thinking, streaming, tool calls) │ +└────────────────────────┬────────────────────────────────┘ + │ + ┌──────────────┼──────────────┐ + │ │ │ + ┌───────┴──────┐ ┌────┴─────┐ ┌──────┴──────┐ + │ Standard │ │ Lua │ │ MCP │ + │ tool_use │ │ Code │ │ Client │ + │ (JSON) │ │ Mode │ │ │ + └───────┬─────┘ └────┬─────┘ └──────┬──────┘ + │ │ │ + └──────────┬──┘ │ + │ │ + ┌──────────────────┴─────────────────┴──────────────────┐ + │ Tool Layer │ + │ │ + │ Built-in (read, write, bash, glob, grep, git) │ + │ │ │ + │ Integrations (opencompanyapp/ai-tool-*) │ + │ ├── local mode → direct API calls │ + │ └── hosted mode → OpenCompany API proxy │ + │ │ │ + │ MCP servers (external, discovered at runtime) │ + │ │ │ + │ All accessible via: app.{provider}.{alias}.{tool}() │ + └───────────────────────────────────────────────────────┘ +``` + +### What Makes This Powerful + +1. **Universal namespace**: Every tool — built-in, Composer package, MCP server, local or hosted — lives under `app.*` in Lua. The LLM has one consistent interface. + +2. **Write once, run anywhere**: Tool packages are framework-agnostic Composer packages. They work in OpenCompany (web), KosmoKrator (CLI), or any future PHP application. + +3. **Progressive complexity**: Simple tasks use standard tool_use. Complex orchestration uses Lua code mode. Users don't need to know the difference. + +4. **Ecosystem network effect**: Every tool added to OpenCompany is immediately available in KosmoKrator, and vice versa. MCP servers from the broader community plug in through the same Lua namespace. + +5. **Cost optimization**: Lua scripts execute at zero LLM cost. Repetitive or deterministic workflows (daily reports, scheduled syncs) run as pure Lua after initial AI authoring. diff --git a/docs/ecosystem/kosmokrator/proposals/integration-refactor-plan.md b/docs/ecosystem/kosmokrator/proposals/integration-refactor-plan.md new file mode 100644 index 0000000..d41f705 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/integration-refactor-plan.md @@ -0,0 +1,339 @@ +# Integration Package Refactor Plan + +> Status: Proposal. This is a forward-looking refactor plan, not a description of shipped KosmoKrator behavior. + +## Context + +The AI tool packages were originally built around `Laravel\Ai\Contracts\Tool` — each tool exposed `description()`, `schema(JsonSchema)`, and `handle(Request)` for direct LLM function calling. We then switched to Lua code mode where the LLM writes Lua scripts that call tools via `LuaBridge`, making the LLM-oriented interface unnecessary overhead. + +Additionally, KosmoKrator (CLI agent) needs to share the same tool ecosystem but cannot depend on `laravel/ai`. The packages must become framework-agnostic. + +### Current Pain Points + +1. `integration-core` **depends on** `laravel/ai` for the `Tool` interface. Every tool package transitively depends on `laravel/ai`. KosmoKrator cannot use them. +2. **225+ tool classes** implement `Laravel\Ai\Contracts\Tool` with `schema(JsonSchema)` and `handle(Request)` even though they are never direct LLM tools — they are called via Lua. +3. `ToolRegistry` **is a 1965-line monolith** mixing tool metadata (`TOOL_MAP`), instantiation (180-line `match`), permissions, catalog generation, and app group config. +4. **Two registration paths**: external packages self-register via `ToolProviderRegistry`, built-in tools are hardcoded in `TOOL_MAP` + the giant `match`. +5. `ProvidesLuaDocs` is optional and zero packages implement it despite Lua being the primary mode. +6. **No multi-account support** in `ToolProvider` or `CredentialResolver` — needed for KosmoKrator's `app.gmail.work.*` / `app.gmail.personal.*` pattern. +7. **Package naming** (`ai-tool-*`) reflects Era 1 thinking. + +--- + +## Phase 1: New Tool Contract in `integration-core` + +**Goal**: `integration-core` owns its own `Tool` interface. Drop the `laravel/ai` dependency. + +### New Contracts + +```php +// integration-core/src/Contracts/Tool.php +interface Tool +{ + public function name(): string; + public function description(): string; + public function parameters(): array; + public function execute(array $args): ToolResult; +} +``` + +`parameters()` returns a plain array — what `LuaApiDocGenerator` actually needs: + +```php +public function parameters(): array +{ + return [ + 'to' => ['type' => 'string', 'required' => true, 'description' => 'Recipient email'], + 'subject' => ['type' => 'string', 'required' => true, 'description' => 'Email subject'], + 'body' => ['type' => 'string', 'required' => false, 'description' => 'Email body'], + ]; +} +``` + +No `JsonSchema` factory, no `Request` wrapper. + +### ToolResult Value Object + +```php +// integration-core/src/Support/ToolResult.php +class ToolResult +{ + public function __construct( + public readonly mixed $data, + public readonly ?string $error = null, + public readonly array $meta = [], // attachments, files created, etc. + ) {} +} +``` + +Replaces returning raw strings. Both platforms can inspect structured results. + +### ToolProvider Changes + +Add `luaDocsPath()` directly (replacing the optional `ProvidesLuaDocs` interface) and `credentialFields()` for setup flows: + +```php +interface ToolProvider +{ + public function appName(): string; + public function appMeta(): array; + public function tools(): array; + public function isIntegration(): bool; + public function createTool(string $class, array $context = []): Tool; + public function luaDocsPath(): ?string; // NEW — null = auto-generated only + // credentialFields() deferred to Phase 5 (multi-account credential system) +} +``` + +### CredentialResolver — Account-Scoped + +```php +interface CredentialResolver +{ + public function get(string $provider, string $account, string $key): ?string; +} +``` + +In OpenCompany: `IntegrationSettingCredentialResolver` resolves from DB. In KosmoKrator: `YamlCredentialResolver` reads from `~/.kosmokrator/integrations.yaml`. + +--- + +## Phase 2: Bridge Package `integration-laravel-ai` + +> **OUTCOME: Skipped.** The plan assumed we'd need a `LaravelAiToolAdapter` to wrap new-style tools back into `Laravel\Ai\Contracts\Tool` for the agent loop. In practice, vendor package tools are never passed to the agent loop — they're Lua-only. Built-in tools (tasks, system, agents, memory, lua) still implement `Laravel\Ai\Contracts\Tool` directly. The dual-dispatch `instanceof` check in `LuaBridge` and `getToolCatalog()` was sufficient. No bridge package needed. + +--- + +## Phase 3: Migrate Tool Packages to New Contract + +**Goal**: Each `ai-tool-*` package implements the framework-agnostic `Tool` contract. + +### Before (coupled to laravel/ai) + +```php +use Laravel\Ai\Contracts\Tool; +use Laravel\Ai\Tools\Request; +use Illuminate\Contracts\JsonSchema\JsonSchema; + +class RenderMermaid implements Tool { + public function description(): string { ... } + public function handle(Request $request): string { ... } + public function schema(JsonSchema $schema): array { ... } +} +``` + +### After (framework-agnostic) + +```php +use OpenCompany\IntegrationCore\Contracts\Tool; +use OpenCompany\IntegrationCore\Support\ToolResult; + +class RenderMermaid implements Tool { + public function name(): string { return 'render_mermaid'; } + public function description(): string { ... } + public function parameters(): array { + return [ + 'syntax' => ['type' => 'string', 'required' => true, 'description' => 'Mermaid diagram syntax...'], + 'title' => ['type' => 'string', 'required' => false, 'description' => 'Diagram title (default: "Diagram")'], + 'width' => ['type' => 'integer', 'required' => false, 'description' => 'Output width in pixels (default: 1400)'], + 'theme' => ['type' => 'string', 'required' => false, 'description' => 'Theme', 'enum' => ['default', 'dark', 'forest', 'neutral']], + ]; + } + public function execute(array $args): ToolResult { ... } +} +``` + +Tool name moves into the tool itself (was only in `ToolProvider::tools()` key). `handle(Request)` becomes `execute(array)`. JsonSchema ceremony disappears. + +Migrate one package at a time. Order by simplicity: + +1. `ai-tool-mermaid` (1 tool — proof of concept) +2. `ai-tool-plantuml`, `ai-tool-typst`, `ai-tool-vegalite` (1 tool each) +3. `ai-tool-exchangerate`, `ai-tool-trustmrr`, `ai-tool-celestial`, `ai-tool-worldbank`, `ai-tool-coingecko` (data packages) +4. `ai-tool-plausible`, `ai-tool-ticktick` (integrations with credentials) +5. `ai-tool-clickup` (17 tools) +6. `ai-tool-google` (10+ sub-providers, largest package) + +--- + +## Phase 4: Built-In ToolProviders, Shrink ToolRegistry + +**Goal**: Built-in tools use the same `ToolProvider` pattern as external packages. Eliminate `TOOL_MAP` and the 180-line `match` statement. + +### New Provider Classes + +``` +app/Agents/Tools/Providers/ +├── ChatToolProvider.php (14 tools) +├── DocsToolProvider.php (14 tools) +├── FilesToolProvider.php (10 tools) +├── TablesToolProvider.php (20 tools) +├── CalendarToolProvider.php (7 tools) +├── ListsToolProvider.php (21 tools) +├── WorkspaceToolProvider.php (27 tools) +├── AutomationsToolProvider.php (6 tools) +├── SvgToolProvider.php (1 tool) +``` + +Each provider: + +- Declares tools via `tools()` (eliminates `TOOL_MAP`) +- Handles instantiation in `createTool()` (eliminates the `match` statement) +- Provides `appMeta()` (eliminates `APP_GROUPS` entries for that section) + +The 5 direct tool groups (`tasks`, `system`, `agents`, `memory`, `lua`) can also become providers or stay in ToolRegistry since they are core agent machinery. + +Register in `AppServiceProvider`: + +```php +$registry = $this->app->make(ToolProviderRegistry::class); +$registry->register(new ChatToolProvider($this->app)); +$registry->register(new DocsToolProvider($this->app)); +// ... +``` + +### ToolRegistry After Refactor (~300 lines) + +```php +class ToolRegistry +{ + public const DIRECT_TOOL_GROUPS = ['tasks', 'system', 'agents', 'memory', 'lua']; + + public function getToolsForAgent(User $agent): array { /* iterate registry, filter, wrap */ } + public function getAppCatalog(User $agent): string { /* build system prompt */ } + public function getAllToolsMeta(User $agent): array { /* for frontend */ } + public function instantiateToolBySlug(string $slug, User $agent): ?Tool { /* delegate to provider */ } +} +``` + +No more `TOOL_MAP`. No more `APP_GROUPS`. No more `APP_ICONS`. No more `INTEGRATION_LOGOS`. No more 180-line `match`. All derived from `ToolProviderRegistry`. + +--- + +## Phase 5: Multi-Account CredentialResolver + +**Goal**: Support `app.gmail.work.*` / `app.gmail.personal.*` pattern for KosmoKrator. + +### Context Array Extension + +The `createTool()` `$context` array already exists. Add account scoping: + +```php +$provider->createTool(GmailSendMessage::class, [ + 'agent' => $agent, + 'account' => 'work', // NEW + 'timezone' => 'UTC', +]); +``` + +### CredentialResolver + +```php +// OpenCompany: resolves from IntegrationSetting table +class IntegrationSettingCredentialResolver implements CredentialResolver +{ + public function get(string $provider, string $account, string $key): ?string + { + return IntegrationSetting::where('integration_id', $provider) + ->where('account', $account) + ->value("config->{$key}"); + } +} + +// KosmoKrator: resolves from YAML config +class YamlCredentialResolver implements CredentialResolver +{ + public function get(string $provider, string $account, string $key): ?string + { + return $this->config[$provider][$account]['credentials'][$key] ?? null; + } +} +``` + +### Lua Namespace + +The `LuaBridge` registers functions per account: + +```lua +app.gmail.work.send_message({to = "cto@company.com", ...}) +app.gmail.personal.list_messages({query = "is:unread"}) +``` + +OpenCompany initially uses a single implicit `default` account (backward compatible). Multi-account is opt-in. + +--- + +## Phase 6: Lua Docs in Every Package + +**Goal**: Every tool package ships a `lua-docs/` directory with real examples and common patterns. + +Add to every package: + +``` +ai-tool-mermaid/ +├── lua-docs/ +│ └── mermaid.md # examples, tips, common patterns +├── src/ +│ ├── MermaidToolProvider.php → luaDocsPath() returns __DIR__.'/../lua-docs/mermaid.md' +│ └── Tools/RenderMermaid.php +``` + +Example content: + +```markdown +## Common Patterns + +### Flowchart from data +\```lua +local items = app.tables.get_table_rows({table_id = "..."}) +local lines = {"graph TD"} +for _, item in ipairs(items.rows) do + table.insert(lines, string.format(" %s --> %s", item.from, item.to)) +end +app.mermaid.render_mermaid({syntax = table.concat(lines, "\n")}) +\``` +``` + +`LuaApiDocGenerator` already has `getProviderLuaDocs()` wired up — it just needs packages to start providing content. + +--- + +## Phase 7: Rename `ai-tool-*` to `integration-*` + +**Goal**: Package naming reflects what they are — integrations, not AI tools. + +``` +opencompanyapp/ai-tool-mermaid → opencompanyapp/integration-mermaid +opencompanyapp/ai-tool-google → opencompanyapp/integration-google +opencompanyapp/ai-tool-clickup → opencompanyapp/integration-clickup +... +``` + +Use Composer `replace` in the new package to smooth the transition: + +```json +{ + "name": "opencompanyapp/integration-mermaid", + "replace": { + "opencompanyapp/ai-tool-mermaid": "self.version" + } +} +``` + +Do this **after** the contract changes (phases 1-4) so each package is only touched once. + +--- + +## Sequencing Summary + +| Phase | What | Why This Order | +| --- | --- | --- | +| **1** | New `Tool` contract in `integration-core` | Unblocks everything — KosmoKrator can't exist without this | +| **~~2~~** | ~~`integration-laravel-ai` bridge package~~ | ~~Skipped — Lua-only tools don't need the adapter~~ | +| **3** | Migrate tool packages to new contract | Each package becomes framework-agnostic | +| **4** | Built-in `ToolProvider` implementations | Eliminates ToolRegistry monolith | +| **5** | Multi-account `CredentialResolver` | Required for KosmoKrator multi-account | +| **6** | Lua docs in every package | Agent quality improvement | +| **7** | Rename `ai-tool-*` → `integration-*` | Cosmetic, do last when stable | + +Phases 1-3 are the critical path for KosmoKrator. Phase 4 is the biggest maintenance win for OpenCompany. Phases 5-7 can happen in parallel with KosmoKrator development. diff --git a/docs/ecosystem/kosmokrator/proposals/laravel-ai-patterns.md b/docs/ecosystem/kosmokrator/proposals/laravel-ai-patterns.md new file mode 100644 index 0000000..4d94bd4 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/laravel-ai-patterns.md @@ -0,0 +1,209 @@ +# Design Patterns Worth Adopting from Laravel AI SDK + +> Status: Reference / proposal. This document records ideas worth borrowing; it is not a statement that KosmoKrator currently implements these patterns. + +Laravel AI SDK (`laravel/ai`) is a layer built on top of Prism-PHP. KosmoKrator uses Prism directly (lighter, no web-app assumptions), but several of laravel/ai's architectural patterns are worth adopting. + +--- + +## 1. Tool Interface Pattern (Schema via JSON Schema) + +### What laravel/ai does + +Tools implement a `schema()` method that uses `illuminate/json-schema` — a fluent builder that produces valid JSON Schema objects: + +```php +// laravel/ai approach +public function schema(JsonSchema $schema): array +{ + return [ + 'path' => $schema->string() + ->description('Absolute path to the file') + ->required(), + 'offset' => $schema->integer() + ->description('Line number to start reading from'), + 'limit' => $schema->integer() + ->description('Max lines to read') + ->default(200), + ]; +} +``` + +This produces the exact `input_schema` JSON Schema that LLM providers expect, without hand-writing JSON arrays. + +### Why this matters + +- **Type safety**: The builder prevents invalid schemas at compile time (e.g., you can't set `minimum` on a string parameter). +- **Self-documenting**: Tool definitions read like API docs. +- **Provider-agnostic**: JSON Schema is the universal format — Anthropic, OpenAI, and MCP all use it. +- **Lua bridge compatibility**: When auto-generating Lua API docs from tool schemas (for code mode), a structured schema object is far easier to traverse than a raw array. + +### How to adopt in KosmoKrator + +`illuminate/json-schema` is already available — it ships with `laravel/framework` v13 (transitive via Prism). Use it in `ToolInterface`: + +```php +namespace Kosmokrator\Tool; + +use Illuminate\JsonSchema\JsonSchema; + +interface ToolInterface +{ + public function name(): string; + public function description(): string; + public function schema(JsonSchema $schema): array; + public function execute(array $args): ToolResult; +} +``` + +The `ToolRegistry` converts these schemas to Prism's `Tool` format when building LLM requests, and to Lua function signatures when generating API docs for code mode. + +--- + +## 2. Middleware Pipeline for Agents + +### What laravel/ai does + +Agents can declare middleware that wraps every tool call or LLM interaction: + +```php +class MyAgent extends Agent +{ + public function middleware(): array + { + return [ + new RateLimitMiddleware(maxPerMinute: 60), + new LoggingMiddleware(), + new ApprovalMiddleware(tools: ['bash', 'file_write']), + ]; + } +} +``` + +Each middleware gets the request/context, can modify it, pass it through (`$next($request)`), or short-circuit (e.g., deny execution, ask for approval). + +### Why this matters + +KosmoKrator needs several cross-cutting concerns that are best modeled as middleware: + +| Concern | Without middleware | With middleware | +|---------|-------------------|----------------| +| **Tool approval** | if/else in AgentLoop | `ApprovalMiddleware` wraps dangerous tools | +| **Cost tracking** | Manual token counting | `CostTrackingMiddleware` intercepts every LLM call | +| **Rate limiting** | Ad-hoc sleep/retry | `RateLimitMiddleware` with token bucket | +| **Audit logging** | Scattered log calls | `AuditMiddleware` logs every tool execution | +| **Sandboxing policy** | Hardcoded in BashTool | `SandboxMiddleware` enforces blocked commands | + +### How to adopt in KosmoKrator + +Implement a simple pipeline — no need for Laravel's full `Pipeline` class: + +```php +namespace Kosmokrator\Agent; + +interface AgentMiddleware +{ + public function handle(AgentContext $context, callable $next): mixed; +} +``` + +The `AgentLoop` runs the middleware stack around each tool execution: + +```php +$pipeline = array_reduce( + array_reverse($this->middleware), + fn ($next, $middleware) => fn ($ctx) => $middleware->handle($ctx, $next), + fn ($ctx) => $this->executeTool($ctx) +); + +$result = $pipeline($context); +``` + +This keeps the `AgentLoop` clean — tool approval, logging, cost tracking are all separate, composable middleware classes. + +--- + +## 3. Provider Failover / Retry Strategy + +### What laravel/ai does + +Agents can declare fallback providers that activate automatically on failure: + +```php +class MyAgent extends Agent +{ + public function provider(): array|string + { + return [ + 'anthropic/claude-sonnet-4-20250514', // primary + 'openai/gpt-4.1', // fallback 1 + 'groq/llama-3.3-70b-versatile', // fallback 2 + ]; + } +} +``` + +On rate limit (429), server error (5xx), or timeout, laravel/ai automatically retries with the next provider in the list. It handles provider-specific error codes (Anthropic's 529 overloaded, OpenAI's 413 context too long). + +### Why this matters + +- **Reliability**: API rate limits and outages are common. Automatic failover keeps the agent running without user intervention. +- **Cost optimization**: Primary provider can be the best model; fallback can be cheaper/faster for when the primary is down. +- **Graceful degradation**: Better to get a response from a weaker model than to error out entirely. + +### How to adopt in KosmoKrator + +Wrap `PrismService` with retry logic: + +```php +namespace Kosmokrator\LLM; + +class PrismService +{ + private array $providers; // from config('kosmokrator.agent.providers') + + public function stream(array $messages, array $tools): \Generator + { + $lastException = null; + + foreach ($this->providers as $provider) { + try { + yield from $this->buildRequest($provider, $messages, $tools)->asStream(); + return; + } catch (PrismRateLimitedException|PrismServerException $e) { + $lastException = $e; + // Log failover, continue to next provider + } + } + + throw $lastException; + } +} +``` + +Config in `kosmokrator.yaml`: + +```yaml +agent: + providers: + - provider: anthropic + model: claude-sonnet-4-20250514 + - provider: openai + model: gpt-4.1 + - provider: ollama + model: llama3.3 +``` + +This gives you automatic failover with zero changes to `AgentLoop` — it just calls `PrismService::stream()` and gets responses regardless of which provider served them. + +--- + +## Summary + +| Pattern | Complexity to adopt | Value for KosmoKrator | +|---------|--------------------|-----------------------| +| JSON Schema tool definitions | Low (dependency already available) | High — cleaner tools, Lua doc generation | +| Agent middleware pipeline | Medium (20-30 lines of pipeline code) | High — keeps AgentLoop clean, enables approval/logging/cost tracking | +| Provider failover | Low (wrap PrismService) | Medium — reliability for daily use | + +All three patterns can be adopted incrementally without pulling in laravel/ai as a dependency. They're architectural ideas, not library lock-in. diff --git a/docs/ecosystem/kosmokrator/proposals/streaming.md b/docs/ecosystem/kosmokrator/proposals/streaming.md new file mode 100644 index 0000000..d1851b8 --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/streaming.md @@ -0,0 +1,139 @@ +# Streaming LLM Responses + +> Status: Proposal. This document describes a streaming design that is not the current shipped runtime behavior. + +## Context + +Both renderers currently buffer full LLM responses before displaying. The TUI renderer has live `MarkdownWidget` rendering ready (`streamChunk()` + `processRender()`), but `AgentLoop` always calls `chat()` which blocks until the complete response arrives. Streaming would improve perceived responsiveness in `/ask` and `/plan` modes where the LLM produces longer text output. Lower priority for tool-heavy `/edit` mode. + +## Current Architecture + +``` +AgentLoop.run() + → $llm->chat(messages, tools) ← blocks until complete response + → $ui->streamChunk($fullText) ← dumps entire text at once + → $ui->streamComplete() +``` + +**PrismService** has `stream()` returning `Generator` — never called. +**AsyncLlmClient** has no streaming — `$response->getBody()->buffer()` reads entire body. +**TUI renderer** has live widget updating — built and waiting. +**ANSI renderer** buffers chunks then renders markdown at the end. + +## Design + +### Three layers need changes + +**1. LlmClientInterface — add `stream()` method** + +```php +/** + * @return Generator + */ +public function stream(array $messages, array $tools = [], ?Cancellation $cancellation = null): Generator; +``` + +PrismService already has this. AsyncLlmClient needs it. + +**2. AsyncLlmClient — SSE parsing** + +Replace `$body->buffer()` with line-by-line SSE reading from Amp's async body stream. + +SSE format (Anthropic): +``` +event: content_block_delta +data: {"delta":{"type":"text_delta","text":"Hello"}} + +event: message_delta +data: {"delta":{"stop_reason":"end_turn"},"usage":{"input_tokens":123,"output_tokens":45}} +``` + +Yield typed `StreamEvent` objects (reuse Prism's event classes): +- `TextDeltaEvent` — incremental text chunk +- `ToolCallDeltaEvent` — incremental tool call JSON fragment +- `StreamEndEvent` — finish reason, usage, final tool calls + +**3. AgentLoop — stream-aware run loop** + +Replace: +```php +$response = $this->llm->chat(...); +$this->ui->streamChunk($response->text); +``` + +With: +```php +$text = ''; +$toolCallBuffers = []; // id → accumulated JSON +$usage = null; +$finishReason = null; + +foreach ($this->llm->stream($messages, $tools, $cancellation) as $event) { + if ($event instanceof TextDeltaEvent) { + $text .= $event->delta; + $this->ui->streamChunk($event->delta); // live incremental display + } + if ($event instanceof ToolCallDeltaEvent) { + $toolCallBuffers[$event->toolId] = ($toolCallBuffers[$event->toolId] ?? '') . $event->delta; + } + if ($event instanceof StreamEndEvent) { + $finishReason = $event->finishReason; + $usage = $event->usage; + } +} + +$this->ui->streamComplete(); +// Parse accumulated tool call JSON buffers into ToolCall objects +// Continue with tool execution as before +``` + +Tool calls only complete at stream end — execution logic unchanged. + +### RetryableLlmClient + +Wrap `stream()` with retry on initial connection failure only. Mid-stream failures cannot be retried (partial response already displayed). On mid-stream error, yield an error event or throw — AgentLoop handles it. + +### Fallback + +If provider doesn't support streaming (`supportsStreaming() === false`), fall back to `chat()` with the current buffer-then-display behavior. No regression for non-streaming providers. + +## Tool Call Streaming Behavior + +Tool arguments arrive as JSON fragments: +``` +{"path": ← ToolCallDeltaEvent +"src/file.php"} ← ToolCallDeltaEvent +``` + +Must accumulate and parse at `content_block_stop`. The final `ToolCall` objects are only reliable at stream end. This means tool execution timing is unchanged — streaming only speeds up text display, not tool execution. + +## Files + +| Action | File | +|--------|------| +| **Modify** | `src/LLM/LlmClientInterface.php` — add `stream()` | +| **Modify** | `src/LLM/AsyncLlmClient.php` — implement SSE parsing + `stream()` | +| **Modify** | `src/LLM/RetryableLlmClient.php` — wrap `stream()` with connection retry | +| **Modify** | `src/Agent/AgentLoop.php` — stream-aware `run()` loop | +| **Modify** | `src/UI/Ansi/AnsiRenderer.php` — optional: incremental echo instead of buffer | +| **None** | `src/UI/Tui/TuiRenderer.php` — already has live widget updating | + +## Edge Cases + +- **Non-streaming providers** — fallback to `chat()`, no visual change +- **Mid-stream disconnect** — show partial text, log error, don't retry +- **Empty stream** (immediate tool calls, no text) — skip streaming, go straight to tool execution +- **Thinking tokens** (Claude extended thinking) — `ThinkingEvent` can show a "thinking" indicator, discard content before `TextDeltaEvent` begins +- **Mixed text + tool calls** — text streams live, tool call JSON accumulates silently + +## Effort Estimate + +- SSE parser in AsyncLlmClient: medium (line protocol + provider-specific JSON shapes) +- AgentLoop stream loop: small (iterate events, dispatch to UI) +- RetryableLlmClient wrapper: small +- ANSI incremental rendering: small (optional) +- Testing: medium (mock SSE streams, partial responses, error cases) + +## Priority + +Medium-low. Biggest impact in `/ask` and `/plan` modes. Minimal impact in `/edit` mode where tool calls dominate response time. Implement after web tools, cost tracking, and deduplication. diff --git a/docs/ecosystem/kosmokrator/proposals/tui-ux-improvements.md b/docs/ecosystem/kosmokrator/proposals/tui-ux-improvements.md new file mode 100644 index 0000000..567049b --- /dev/null +++ b/docs/ecosystem/kosmokrator/proposals/tui-ux-improvements.md @@ -0,0 +1,415 @@ +# TUI UX Improvements + +> Status: Proposal. This document is a UX backlog / comparative design note, not a description of the current TUI. + +Comparative analysis of KosmoKrator vs OpenCode vs Claude Code terminal UIs, ranked by UX impact. + +## Benchmark Comparison + +| Aspect | KosmoKrator | OpenCode | Claude Code | +|--------|-------------|----------|-------------| +| **Framework** | Custom Symfony TUI (PHP) | Custom OpenTUI (SolidJS/Bun) | Forked Ink (React/Node) | +| **Rendering** | Widget tree, diff-based screen updates | 60 FPS, SolidJS reactive | Double-buffered Yoga flexbox | +| **Themes** | 1 hardcoded theme | 35+ themes, JSON-defined, auto dark/light | 6 themes incl. daltonized + ANSI fallback | +| **Diffs** | Word-level with syntax highlight | Split/unified, tree-sitter, 11 theme tokens | Native Rust NAPI module, word-level | +| **Spinners** | 14 custom sets, breathing animation | Knight Rider gradient, per-agent colors | Glimmer wave, stall-aware color shift | +| **Input** | Multi-line EditorWidget | Rich textarea, extmarks, frecency autocomplete | Vim mode, voice, image paste, typeahead | + +--- + +## Ranked Improvements (Highest to Lowest UX Impact) + +### 1. Collapsed Tool Groups + +**Impact**: Very High — single biggest readability win +**Effort**: Medium +**Source**: Original design (stacked brackets) + +Every tool call is currently rendered individually. Sequential `file_read` × 5 shows 5 separate entries, drowning the conversation in noise. + +**What others do**: +- Claude Code auto-collapses sequential Read/Grep/Glob calls into `"Reading 5 files"` or `"Searching 3 patterns"` — a single expandable line. +- OpenCode uses `InlineTool` for simple tools (single line) and `BlockTool` for complex ones (expandable). + +**What to build**: +- Detect consecutive same-type tool calls (file_read, grep, glob, bash) +- Collapse into a summary line with expand-to-detail on Ctrl+O +- Show aggregate stats (file count, match count, total time) + +#### Mockups — Stacked Brackets Style + +##### Before (current behavior) + +``` +☽ Read src/UI/Theme.php +✓ ⏋ 237 lines (ctrl+o to reveal) + +☽ Read src/UI/Tui/TuiRenderer.php +✓ ⏋ 1180 lines (ctrl+o to reveal) + +☽ Read src/UI/Diff/DiffRenderer.php +✓ ⏋ 95 lines (ctrl+o to reveal) + +♅ Edit src/UI/Theme.php +✓ 3 replacements applied + +⊛ Search pattern: "render()" path: src/ +✓ ⏋ 14 matches across 5 files (ctrl+o to reveal) +``` + +12 lines of visual noise for 3 reads, 1 edit, 1 search. + +##### After (collapsed — default view) + +Same scenario rendered in 5 lines: + +``` +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ +☽ Read 3 files · 412 lines · 1.2s + ⊛ src/UI/Theme.php + ⊛ src/UI/Tui/TuiRenderer.php + ⊛ src/UI/Diff/DiffRenderer.php +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ + +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ +⊛ Search 14 matches in 5 files · 0.8s + pattern: "render()" in src/ +┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄ +``` + +Edit and Write calls remain ungrouped — they always render individually as today: + +``` +♅ Edit src/UI/Theme.php +✓ 3 replacements applied +``` + +##### Expanded view (Ctrl+O on the Read group) + +``` +┌── ☽ Read 3 files · 412 lines · 1.2s ──────────────┐ +│ src/UI/Theme.php 237 lines │ +│ src/UI/Tui/TuiRenderer.php 1180 lines │ +│ src/UI/Diff/DiffRenderer.php 95 lines │ +├──────────────────────────────────────────────────────│ +│ Theme.php │ +│ ⏋ 1 ` | +| `packages/opencode/src/tool/task.ts` | Sub-agent tool that creates child sessions, supports resume via `task_id` | +| `packages/opencode/src/tool/batch.ts` | Parallel tool execution up to 25 concurrent calls | +| `packages/opencode/src/session/index.ts:244-303` | Per-message cost calculation with `Decimal` precision | +| `packages/opencode/src/snapshot/index.ts` | Git-based filesystem checkpointing | +| `packages/opencode/src/storage/db.ts` | SQLite persistence for sessions/messages | +| `packages/opencode/src/cli/cmd/stats.ts` | CLI stats: total cost, cost/day, per-model breakdown | + +--- + +## Porting Plan — Component by Component + +### 1. Global Concurrency Semaphore + +**Source pattern:** OpenCode `util/queue.ts` `work()` function + Claude Code `utils/generators.ts` `all()` with `concurrencyCap`. + +**Implementation in KosmoKrator:** + +```php +// In SubagentOrchestrator, add a class-level semaphore +private LocalSemaphore $globalSemaphore; + +public function __construct(/* ... */) { + $this->globalSemaphore = new LocalSemaphore(10); // max 10 concurrent agents +} + +// In spawnAgent(), wrap the Amp\async() block: +$lock = $this->globalSemaphore->acquire(); +// ... inside async, after agent completes: +$lock->release(); +``` + +**Effort:** ~20 lines. We already use `LocalSemaphore(1)` for group constraints — just need one more instance for the global cap. + +**Configuration:** Should be configurable via `subagent_max_concurrency` setting. + +--- + +### 2. Retry Logic with Exponential Backoff + +**Source pattern:** OpenCode `session/retry.ts` (cleaner, ~105 lines) + Claude Code `withRetry.ts:530-548` (jitter formula). + +**Key code to port:** + +``` +delay formula: min(base * 2^attempt + random_jitter, maxDelay) +retry-after-ms header: use directly if present +retry-after header: parse as seconds or HTTP date +retryable errors: 429, 529/overloaded, 408, 5xx +non-retryable: context overflow +``` + +**Implementation in KosmoKrator:** Enhance `RetryableLlmClient` with: +- `Retry-After` and `Retry-After-Ms` header parsing +- Jittered exponential backoff (base 500ms, max 32s) +- Per-agent retry (wrap the `runHeadless()` call in the orchestrator) +- Unattended mode: indefinite retries with 5-min max backoff + +**Effort:** ~100 lines. + +**Constants from Claude Code:** +- `BASE_DELAY_MS = 500` +- `DEFAULT_MAX_RETRIES = 10` +- `MAX_529_RETRIES = 3` (before model fallback) + +--- + +### 3. Background Agent Decoupled Cancellation + +**Source pattern:** Claude Code `tools/AgentTool/runAgent.ts:524-528`: +```typescript +const agentAbortController = isAsync + ? new AbortController() // NEW controller for async agents + : toolUseContext.abortController; // shared for sync agents +``` + +**Implementation in KosmoKrator:** In `SubagentFactory.php`, when creating `NullRenderer` for background agents, pass `null` for the cancellation closure instead of the parent's token. Add a process-level signal handler for Ctrl+C that sets a separate `Cancellation` shared by all agents. + +**Effort:** ~5 lines in factory + ~20 lines for signal handler. + +**Current code location:** `src/Agent/SubagentFactory.php:49` and `src/UI/NullRenderer.php:45-52`. + +--- + +### 4. Disk-Based Result Persistence & Resume + +**Source pattern:** Claude Code `utils/task/diskOutput.ts` — `DiskTaskOutput` class with async write queue, session-scoped directory, `O_NOFOLLOW` security. + +**Implementation in KosmoKrator:** + +``` +Storage layout: +~/.kosmokrator/tasks/{sessionId}/{agentId}.jsonl + +Each line is a JSON event: + {"ts":"...","event":"started","task":"..."} + {"ts":"...","event":"tool_call","tool":"grep"} + {"ts":"...","event":"progress","tokens_in":1234} + {"ts":"...","event":"completed","result":"...","tokens_total":5678} + +Resume logic: +1. On swarm start, scan output directory for completed agent IDs +2. Load their results from disk +3. Skip those agents when scheduling new work +4. Continue from where we left off +``` + +**Key security from Claude Code:** Use `O_NOFOLLOW` equivalent (check not a symlink before write) to prevent sandbox attacks. + +**Effort:** ~200 lines (writer class + resume scanner). + +--- + +### 5. Cost Tracking Aggregation + +**Source pattern:** Claude Code `cost-tracker.ts:250-323` — `addToTotalSessionCost()` with per-model USD calculation. + +**Implementation in KosmoKrator:** + +```php +class SwarmCostTracker { + private array $perModel = []; // model → {input, output, cost_usd} + private float $budgetUsd; + + public function add(string $model, int $in, int $out): void { + $cost = ModelCatalog::calculateCost($model, $in, $out); + // accumulate, check budget + } + + public function getSummary(): SwarmCostSummary { + // completed/total, total cost, per-model breakdown, ETA + } +} +``` + +**Dependency:** Needs `ModelCatalog` to know per-token prices for each model. Currently KosmoKrator has `ModelCatalog` but may need pricing data added. + +**Effort:** ~80 lines. + +--- + +### 6. Progress Dashboard + +**Source pattern:** Claude Code `tools/AgentTool/agentToolUtils.ts:538-593` — progress tracker with token counts, tool use, activity descriptions. + +**Implementation in KosmoKrator:** Aggregate existing `SubagentStats` into a `SwarmProgress` view: + +``` +┌─ Swarm Progress ─────────────────────────────┐ +│ Completed: 1,247 / 3,000 (41.6%) │ +│ Failed: 23 (retried: 18) │ +│ Running: 10 │ +│ Tokens: 2.4M in / 312K out │ +│ Cost: $47.12 │ +│ Elapsed: 34m 12s │ +│ ETA: ~48m │ +└───────────────────────────────────────────────┘ +``` + +**Effort:** ~150 lines (aggregator + renderer). + +--- + +### 7. Tool Result Size Persistence + +**Source pattern:** Claude Code `utils/toolResultStorage.ts:55-78` — `getPersistenceThreshold()` per tool. + +**Implementation in KosmoKrator:** Before injecting sub-agent result into parent's conversation history: +1. Check `strlen($result)` against threshold (e.g., 100KB for agent results) +2. If exceeded, write to `~/.kosmokrator/results/{agentId}.txt` +3. Replace with summary: `[Result persisted to disk: {path} ({size})]` +4. Parent LLM can use `file_read` if it needs the full result + +**Effort:** ~100 lines. + +--- + +### 8. Concurrency-Safe Tool Execution + +**Source pattern:** Claude Code `StreamingToolExecutor.ts:129-135`: +```typescript +private canExecuteTool(isConcurrencySafe: boolean): boolean { + const executing = this.tools.filter(t => t.status === 'executing') + return executing.length === 0 + || (isConcurrencySafe && executing.every(t => t.isConcurrencySafe)) +} +``` + +**Implementation in KosmoKrator:** Add `isConcurrencySafe(): bool` to `ToolInterface`. Read-only tools (`file_read`, `glob`, `grep`) return `true`. Destructive tools (`bash`, `file_edit`, `file_write`) return `false`. In `AgentLoop`'s tool dispatch, serialize non-safe tools. + +**Effort:** ~60 lines. + +--- + +### 9. Coordinator Mode + +**Source pattern:** Claude Code `coordinator/coordinatorMode.ts:111-368` — 370-line system prompt defining the coordinator role. + +**Key concepts to port:** +- Phases: Research (parallel workers) → Synthesis (coordinator) → Implementation (workers) → Verification (workers) +- Workers can't see coordinator's conversation (self-contained prompts) +- `` XML format for delivering results +- `SendMessage` tool for continuing a running worker +- Parallelism guidance: "Launch independent workers concurrently" + +**Implementation in KosmoKrator:** Add a `--coordinator` flag that swaps the system prompt and enables the coordinator tool set. Pure prompt engineering — no code architecture changes needed. + +**Effort:** ~50 lines (flag + prompt template). + +--- + +## Priority Matrix + +| Priority | Component | Effort | Impact | Source | +|----------|-----------|--------|--------|--------| +| **P0** | Global concurrency semaphore | ~20 lines | Prevents rate limit death | OpenCode `queue.ts` | +| **P0** | Retry with backoff + headers | ~100 lines | Survives rate limits | OpenCode `retry.ts` | +| **P0** | Decoupled cancellation | ~25 lines | Stops losing background agents | Claude `runAgent.ts:527` | +| **P1** | Disk result persistence + resume | ~200 lines | Survives crashes, enables resume | Claude `diskOutput.ts` | +| **P1** | Cost tracking aggregation | ~80 lines | Budget visibility | Claude `cost-tracker.ts` | +| **P2** | Progress dashboard | ~150 lines | Operational visibility | Claude `agentToolUtils.ts` | +| **P2** | Tool result size persistence | ~100 lines | Memory pressure relief | Claude `toolResultStorage.ts` | +| **P3** | Concurrency-safe tools | ~60 lines | Race condition prevention | Claude `StreamingToolExecutor.ts` | +| **P3** | Coordinator mode | ~50 lines | Better orchestration | Claude `coordinatorMode.ts` | + +**Total estimated effort:** ~785 lines for all components. + +--- + +## Test Results Summary + +The following tests were run against the current sub-agent system: + +| Test | Result | Key Finding | +|------|--------|-------------| +| Basic hello world (1 agent, await) | Pass | 1 round, 2.6s, clean | +| Parallel agents (3x background) | Pass | All spawned within 146ms, completed independently | +| Nested sub-sub-agents (depth 2) | Pass | Concurrent children at depth 2, results flow back | +| Dependency chain (3 steps, background) | Partial | Dependency graph works but agents cancelled mid-chain | +| Background vs await comparison | Pass | Both modes functional, await blocks, background async | + +### Key Log Patterns Observed + +- Agent spawn-to-completion ratio: 153/149 (97.4% success rate) +- Cancellations occur when parent loop advances during background agent execution +- Rate limits spike when multiple agents make concurrent API calls +- Context overflow auto-recovery works (trim + retry) +- No memory leaks observed across 153 agents in a single session +- Dependency resolution correctly cascades: step-1 completes → step-2 starts → step-3 starts + +--- + +## Files in KosmoKrator That Would Change + +| File | Changes | +|------|---------| +| `src/Agent/SubagentOrchestrator.php` | Add global semaphore, wrap spawn in retry logic, disk output hooks | +| `src/Agent/SubagentFactory.php` | Decouple cancellation for background agents | +| `src/Agent/SubagentStats.php` | Add cost fields, persist to disk | +| `src/LLM/RetryableLlmClient.php` | Add `Retry-After` header parsing, jittered backoff | +| `src/UI/NullRenderer.php` | Accept `null` cancellation for background agents | +| `src/Agent/AgentLoop.php` | Add tool result size persistence before injection | +| `src/Tool/ToolInterface.php` | Add `isConcurrencySafe(): bool` | +| `src/Tool/ToolRegistry.php` | Scope concurrency-safe filtering | +| `src/Command/AgentCommand.php` | Add `--coordinator` flag, global concurrency config | +| New: `src/Agent/SwarmProgress.php` | Progress aggregator | +| New: `src/Agent/DiskTaskOutput.php` | Per-agent result writer | +| New: `src/Agent/SwarmCostTracker.php` | Cost accumulation | + +--- + +## External Source File Index + +### Claude Code (`tmp/claude-src/`) + +``` +services/api/withRetry.ts — Retry engine (822 lines) +services/api/errors.ts — Error classification +services/tools/StreamingToolExecutor.ts — Concurrency-safe tool execution (519 lines) +utils/generators.ts — Concurrent generator pool (80 lines) +utils/task/diskOutput.ts — Disk output with write queue (457 lines) +utils/task/framework.ts — Task lifecycle management (308 lines) +utils/sessionRestore.ts — Session restore from transcripts (550+ lines) +utils/toolResultStorage.ts — Oversized result persistence +utils/forkedAgent.ts — Sub-agent context creation +cost-tracker.ts — Cost tracking (323 lines) +costHook.ts — Cost persistence hook +Task.ts — Task types and state machine (125 lines) +coordinator/coordinatorMode.ts — Coordinator system prompt (369 lines) +tools/AgentTool/runAgent.ts — Agent execution lifecycle (860 lines) +tools/AgentTool/agentToolUtils.ts — Progress tracking, lifecycle management +tools/AgentTool/agentMemory.ts — Agent memory scoping +Tool.ts — Tool interface and contracts (695 lines) +tools.ts — Tool registry and assembly +constants/tools.ts — Tool allowlists per agent type +``` + +### OpenCode (`tmp/opencode/`) + +``` +packages/opencode/src/session/retry.ts — Retry system (105 lines) +packages/opencode/src/util/queue.ts — Worker pool + async queue (30 lines) +packages/opencode/src/tool/task.ts — Sub-agent tool (164 lines) +packages/opencode/src/tool/batch.ts — Parallel tool execution +packages/opencode/src/session/index.ts — Session management, cost calculation +packages/opencode/src/session/prompt.ts — Core agent loop +packages/opencode/src/session/retry.ts — Retry policies +packages/opencode/src/agent/agent.ts — Agent type definitions +packages/opencode/src/agent/prompt/explore.txt — Explore agent system prompt +packages/opencode/src/snapshot/index.ts — Git-based checkpointing +packages/opencode/src/storage/db.ts — SQLite persistence +packages/opencode/src/cli/cmd/stats.ts — CLI stats command +``` diff --git a/docs/ecosystem/kosmokrator/research/claude-code-analysis.md b/docs/ecosystem/kosmokrator/research/claude-code-analysis.md new file mode 100644 index 0000000..3ade40c --- /dev/null +++ b/docs/ecosystem/kosmokrator/research/claude-code-analysis.md @@ -0,0 +1,1464 @@ +# Claude Code Source Analysis & KosmoKrator Comparison + +> **Generated**: 2025-03-31 +> **Source**: `tmp/claude-src/` (Claude Code TypeScript source, 1,903 files, 33MB) +> **Target**: KosmoKrator PHP agent (`src/`, ~100 files) + +--- + +## Table of Contents + +1. [Architecture Overview](#1-architecture-overview) +2. [Agent Loop & Query Engine](#2-agent-loop--query-engine) +3. [Streaming & LLM Integration](#3-streaming--llm-integration) +4. [Tool System](#4-tool-system) +5. [Tool Implementations — Deep Comparison](#5-tool-implementations--deep-comparison) +6. [Subagent & Multi-Agent System](#6-subagent--multi-agent-system) +7. [Context Management & Compaction](#7-context-management--compaction) +8. [Token Estimation & Budget](#8-token-estimation--budget) +9. [Permission System](#9-permission-system) +10. [Hook System](#10-hook-system) +11. [Memory System](#11-memory-system) +12. [Skills System](#12-skills-system) +13. [System Prompt Assembly](#13-system-prompt-assembly) +14. [Session & State Management](#14-session--state-management) +15. [Task System](#15-task-system) +16. [UI & Rendering](#16-ui--rendering) +17. [Cost Tracking](#17-cost-tracking) +18. [Command / Slash Command System](#18-command--slash-command-system) +19. [Keybinding System](#19-keybinding-system) +20. [MCP Integration](#20-mcp-integration) +21. [Feature Comparison Matrix](#21-feature-comparison-matrix) +22. [Concrete Thresholds & Constants](#22-concrete-thresholds--constants) +23. [Inspiration Roadmap](#23-inspiration-roadmap) +24. [Appendix: File Reference](#24-appendix-file-reference) + +--- + +## 1. Architecture Overview + +### Side-by-Side + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Language** | TypeScript (Bun runtime) | PHP 8.4 | +| **UI Framework** | React/Ink (custom reconciler, Yoga flexbox layout) | Symfony TUI + ANSI fallback | +| **Async Model** | Node async/await, async generators, streaming | Amp fibers (cooperative multitasking) | +| **DI Container** | Manual wiring + React context | Laravel Illuminate Container | +| **LLM Client** | Direct Anthropic SDK + SSE streaming | Prism PHP (multi-provider) + custom Amp HTTP | +| **Persistence** | JSON session files (one per session) | SQLite (WAL mode) | +| **Config** | JSON settings + CLAUDE.md hierarchy | YAML (multi-level merge) + KOSMOKRATOR.md | +| **Tool Count** | ~50+ built-in + unlimited via MCP | ~10 built-in | +| **Codebase Size** | 1,903 files / 33MB | ~100 files / ~500KB | +| **Build** | Bun binary bundle | PHAR (via box) | + +### Entry Point Flow + +**Claude Code:** +``` +main.tsx → Commander.js CLI → init() → REPL screen (React/Ink) + → QueryEngine.submitMessage() → query() async generator + → queryLoop() while(true) → API stream → tool execution → loop +``` + +**KosmoKrator:** +``` +bin/kosmokrator → Kernel → AgentCommand → AgentLoop.run() + → while(true) → LlmClient.chat() → executeToolCalls() → loop +``` + +Both follow the same fundamental pattern: a REPL that iterates LLM calls and tool executions until the model stops requesting tools. The key structural differences are in streaming, concurrency, and extensibility. + +--- + +## 2. Agent Loop & Query Engine + +### Claude Code: QueryEngine + query() + +The agent loop is split into two layers: + +**QueryEngine** (`QueryEngine.ts`, 46KB): +- Owns the session: `mutableMessages[]`, conversation state, tool permission callbacks +- `submitMessage()` is an **async generator** that yields `SDKMessage` types +- Manages compact boundaries, permission tracking, and transcript recording +- One QueryEngine per conversation; subagents get their own instances + +**query()** (`query.ts`, 68KB): +- The inner `queryLoop()` is a `while(true)` loop (line 307) +- Each iteration represents one LLM turn: + 1. Apply context compression (snip → microcompact → context collapse → autocompact) + 2. Build system prompt + user context + system context + 3. Stream API call via `queryModelWithStreaming()` + 4. Extract `tool_use` blocks **while streaming** (line 829) + 5. Feed blocks to `StreamingToolExecutor` which starts execution immediately + 6. Collect results, normalize messages + 7. Check stop conditions: no tool_use, max turns, budget exhausted, abort signal, error + 8. Continue loop or return `Terminal` reason + +**State machine** (`query.ts` line 202): +```typescript +type State = { + messages: Message[] + toolUseContext: ToolUseContext + autoCompactTracking: AutoCompactTrackingState | undefined + maxOutputTokensRecoveryCount: number + hasAttemptedReactiveCompact: boolean + turnCount: number + transition: Continue | undefined +} +``` + +### KosmoKrator: AgentLoop + +**AgentLoop.php** (904 lines): +- Single class handling both interactive (`run()`) and headless (`runHeadless()`) modes +- `run()` method: add user message → pre-flight context check → refresh system prompt → call LLM → execute tools → deduplicate/prune → loop or stop +- `runHeadless()`: simplified version for subagents (no UI, no session persistence, no compaction) +- Context overflow: up to 3 trim attempts (compact → trim oldest → trim oldest) +- Auto-compaction check after each response + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Streaming** | Async generator yields events token-by-token | Blocking `chat()` returns complete response | +| **Tool start timing** | Tools start executing while LLM still streams | Tools execute after full response received | +| **Loop state** | Explicit `State` type with transitions | Implicit via class properties | +| **Recovery** | 5+ recovery strategies (collapse drain, reactive compact, max-output escalation, stop hooks) | 3 trim attempts | +| **Turn tracking** | Explicit `turnCount`, budget tracking | No turn or budget tracking | + +### Adoptable Patterns + +1. **Streaming responses**: Add SSE streaming to `AsyncLlmClient` for real-time text display. The Anthropic API returns `text_delta` events that can be yielded to the renderer as they arrive. + +2. **Recovery escalation chain**: Claude Code has a sophisticated recovery tree when the LLM stops without finishing: + - Context collapse drain (cheap, preserves detail) + - Reactive compact (full LLM summarization) + - Max output token escalation (8k → 64k retry) + - Multi-turn recovery (up to 3 "resume" attempts) + + KosmoKrator only has trim/compact. Adding max-output escalation and a "resume where you left off" retry would help with long responses that hit the output limit. + +3. **Explicit state machine**: Wrapping loop state in an immutable `State` type makes the loop more predictable and debuggable. + +--- + +## 3. Streaming & LLM Integration + +### Claude Code: SSE Streaming Pipeline + +**API Call** (`claude.ts` lines 1778-1846): +```typescript +const result = await anthropic.beta.messages.create( + { ...params, stream: true }, + { signal, headers: { [CLIENT_REQUEST_ID_HEADER]: clientRequestId } } +).withResponse() +``` + +**SSE Event Loop** (`claude.ts` lines 1940-2304): +Iterates raw stream events (NOT the SDK's `BetaMessageStream` helper): +- `message_start` → Initialize partial message, usage tracking +- `content_block_start` → Initialize text/tool_use/thinking blocks +- `content_block_delta` → Accumulate `input_json_delta` / `text_delta` / `thinking_delta` +- `content_block_stop` → Yield completed `AssistantMessage` with finished block +- `message_delta` → Update usage, stop_reason, cost; mutate last yielded message + +**Idle Timeout Watchdog** (`claude.ts` lines 1877-1928): +- Default: 90 seconds (`STREAM_IDLE_TIMEOUT_MS`) +- Configurable via `CLAUDE_STREAM_IDLE_TIMEOUT_MS` env var +- Resets on each chunk; fires if no events for timeout period + +**Streaming Fallback** (`claude.ts` lines 2464-2569): +- On streaming error (not user abort): retries as non-streaming request +- Max 64k tokens for non-streaming (`MAX_NON_STREAMING_TOKENS`) +- Tombstone messages invalidate partially-streamed content + +### KosmoKrator: Blocking HTTP Client + +**AsyncLlmClient.php** (291 lines): +- Builds JSON POST payload, sends via Amp HTTP client +- **Transfer timeout**: 600s, **Inactivity timeout**: 300s +- Returns complete `LlmResponse` with text, toolCalls, token counts +- Retry handled by `RetryableLlmClient` wrapper (exponential backoff) + +### Gap Analysis + +KosmoKrator's biggest UX gap is the lack of streaming. Users see nothing until the full response arrives. Adding streaming would require: +1. SSE parsing in `AsyncLlmClient` (read chunked response body) +2. A `StreamingResponse` type that yields partial text/tool_use blocks +3. Renderer updates to display partial text as it arrives +4. Tool execution that can start before streaming completes (optional, advanced) + +The Anthropic API's streaming format is well-documented and PHP's Amp HTTP client supports streaming response bodies natively via `$response->getBody()->read()`. + +--- + +## 4. Tool System + +### Claude Code: Tool Architecture + +**Tool interface** (`Tool.ts`, 30KB): +```typescript +Tool = { + name: string + description(input): string + prompt(): string // Contributes to system prompt + inputSchema: Zod schema + outputSchema: Zod schema + call(input, context, canUseTool, parentMessage, onProgress): Promise<{data: Output}> + checkPermissions(input, context): Promise + validateInput(input, context): Promise + isConcurrencySafe(input): boolean // Can run in parallel + isReadOnly(): boolean // No side effects + shouldDefer: boolean // Deferred loading via ToolSearch + alwaysLoad: boolean // Always in prompt even with ToolSearch +} +``` + +**Tool registration** (`tools.ts`): +- `getAllBaseTools()` returns ~50+ tools with conditional loading via feature flags +- `getTools()` applies permission filters and mode-specific filtering +- `assembleToolPool()` merges built-in + MCP tools, deduplicates (built-ins win), sorts for prompt-cache stability +- Deferred tools have `shouldDefer: true` — only their names appear in the prompt until `ToolSearchTool` fetches their schemas + +**Concurrent execution** (`StreamingToolExecutor.ts`, 531 lines): +- `isConcurrencySafe` flag per tool determines parallel eligibility +- Concurrent-safe tools run in parallel; non-concurrent tools get exclusive access +- Tools queued as `tool_use` blocks arrive from streaming; execution starts immediately +- Bash errors abort sibling tools via `siblingAbortController` +- Three-level abort hierarchy: query → sibling → per-tool + +### KosmoKrator: Tool Architecture + +**ToolInterface** (simple contract): +```php +interface ToolInterface { + public function name(): string; + public function description(): string; + public function parameters(): array; + public function requiredParameters(): array; + public function execute(array $args): string; +} +``` + +**ToolRegistry** (93 lines): +- `register()`, `get()`, `all()`, `toPrismTools()` +- `scoped(AgentContext $context)` — filters by agent type, excludes subagent tool + +**Concurrent execution** (`AgentLoop::partitionConcurrentGroups()`): +- Conservative file-conflict detection: + - Bash + any write tool → sequential + - Multiple writes to same file → sequential + - Read + write to same file → sequential + - No conflicts → one concurrent group +- Within groups: `Amp\async()` for parallel execution +- Across groups: sequential `await()` + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Tool count** | ~50+ built-in + MCP | ~10 built-in | +| **Interface richness** | Input/output schemas, progress, permissions, prompts | Simple name/description/parameters/execute | +| **Concurrency model** | Per-tool `isConcurrencySafe` flag | File-conflict detection heuristic | +| **Deferred loading** | ToolSearch for large tool sets | N/A | +| **Progress reporting** | `onProgress` callback with typed events | None | +| **System prompt contribution** | Each tool can inject via `prompt()` | None | + +### Adoptable Patterns + +1. **`isConcurrencySafe()` method**: Add to `ToolInterface`. Simpler and more reliable than file-conflict heuristics. `file_read`, `glob`, `grep` are always safe; `bash`, `file_edit`, `file_write` are not. + +2. **`isReadOnly()` method**: Useful for plan/explore mode filtering and permission shortcuts. + +3. **`prompt()` method**: Let tools contribute usage instructions to the system prompt dynamically. The SubagentTool could explain its type hierarchy, the GrepTool could document its output modes. + +4. **Progress callbacks**: Enable streaming output from long-running tools (especially Bash). The TUI renderer could show real-time stdout. + +5. **Tool output persistence**: Claude Code saves outputs >100K chars to disk with a preview + path reference. KosmoKrator already has `OutputTruncator` doing this (saves to `~/.kosmokrator/data/truncations/`), so this is parity. + +--- + +## 5. Tool Implementations — Deep Comparison + +### BashTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Execution** | Bun `exec()` with AbortSignal | Symfony Process | +| **Timeout** | Default ~30s, configurable per-call | 120s configurable | +| **Background tasks** | Auto-background after 15s+; foreground task → background migration mid-execution | Not supported | +| **Sandbox** | SandboxManager integration (optional, can be disabled via `dangerouslyDisableSandbox`) | None | +| **Output capture** | `EndTruncatingAccumulator` (preserves start, truncates end) | Line + byte truncation | +| **Search detection** | `isSearchOrReadBashCommand()` splits on operators, classifies each part | None | +| **Security** | Zsh builtins blocklist (`zmodload`, `sysopen`, `ztcp`, etc.), sed parser, shell operator analysis | Shell metacharacter regex, mutative pattern list | + +**Adoptable**: Zsh builtins blocklist is a strong hardening measure. Add to `GuardianEvaluator`: +```php +private const ZSH_DANGEROUS = ['zmodload', 'emulate', 'sysopen', 'sysread', 'syswrite', 'sysseek', 'zpty', 'ztcp', 'zsocket', 'zf_rm', 'zf_mv', 'zf_ln', 'zf_chmod', 'zf_chown', 'zf_mkdir', 'zf_rmdir', 'zf_chgrp']; +``` + +### FileEditTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Match algorithm** | `findActualString()` with quote normalization (curly ↔ straight) | Exact `str_replace()` | +| **Concurrent edit detection** | `readFileState` Map with mtime + content hash verification | None | +| **Line ending preservation** | Normalizes to `\n` on read, restores original on write | None | +| **Encoding** | UTF-8 + UTF-16LE detection | UTF-8 only | +| **Diff output** | `getPatchForEdit()` unified diff | `(-N, +M)` line count | +| **File size limit** | 1 GiB max | No explicit limit | + +**Adoptable**: File state tracking is very valuable. When the LLM reads a file and later edits it, verifying the file hasn't changed in between prevents silent data corruption. Implementation: maintain a `readFileState: Map` in `AgentLoop`, check on edit. + +### FileReadTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Large file handling** | Range reads without loading whole file | Stream-read line-by-line above 10MB | +| **Deduplication** | Same-range reads return `file_unchanged` stub if mtime matches | None | +| **PDF support** | Page-range extraction, token-aware compression | None | +| **Image support** | Format detection, resize/downsample with token limits, base64 | None | +| **Notebook support** | `.ipynb` cell parsing with outputs | None | +| **Dangerous paths** | Block `/dev/zero`, `/dev/random`, `/proc/self/fd/*` | None | + +**Adoptable**: PDF and image support would be valuable additions. PHP libraries: `smalot/pdfparser` for PDFs, `intervention/image` for image processing. Dangerous path blocking is a good security hardening. + +### GrepTool + +| Feature | Claude Code | KosmoKrator | +|---------|-------------|-------------| +| **Backend** | Ripgrep via args array | Ripgrep (preferred) or grep | +| **Output modes** | `content`, `files_with_matches`, `count` with pagination (head_limit + offset) | Single mode, max 50 matches | +| **Multiline** | `-U --multiline-dotall` flag | Not supported | +| **VCS exclusion** | Automatic `.git`, `.svn`, `.hg` exclusion | Via ripgrep defaults | +| **Sorting** | Files sorted by mtime descending | Not sorted | +| **Default limit** | 250 results (`DEFAULT_HEAD_LIMIT`) | 50 results | + +**Adoptable**: Output modes (especially `files_with_matches` for quick scanning), multiline support, and higher default limits. The pagination pattern (offset + head_limit) is useful for browsing large result sets. + +### WebFetchTool (Claude Code only) + +```typescript +// Permission: preapproved hosts auto-allow, others need approval +// Content: domain:hostname used for permission matching +// Pipeline: fetch → HTML → markdown → optional Haiku summarization +// Cache: 15-minute URL result cache +// Large content: persisted to disk with size annotation +``` + +**Adoptable as new tool**: Use `league/html-to-markdown` or `readability-php` for HTML → markdown conversion. The preapproved host pattern is good UX (GitHub, MDN, StackOverflow, etc. don't need approval). + +### WebSearchTool (Claude Code only) + +```typescript +// Uses native Anthropic web_search_20250305 server tool +// Sends a sub-query to the API with web_search tool schema +// Max 8 searches per request (hardcoded) +// Results: title + URL pairs + text summaries +``` + +**Adoptable as new tool**: Integrate a search API (Tavily, Brave Search, SerpAPI). The implementation pattern of using an LLM sub-call with a server tool is interesting but can be simplified to a direct API call for third-party search providers. + +### ToolSearchTool (Claude Code only) + +**Deferred tool loading** for managing large tool sets: +- Tools with `shouldDefer: true` only show their names in the prompt +- LLM calls `ToolSearch` with a query to fetch full schemas +- Search algorithm: keyword scoring on tool name parts + description + searchHint +- Direct selection: `select:ToolName1,ToolName2` for exact fetches +- MCP tool name parsing: `mcp__github__list_repos` → keywords `[github, list, repos]` + +**Adoptable**: Becomes important when KosmoKrator adds MCP support (potentially dozens of external tools). Not needed at current tool count (~10). + +--- + +## 6. Subagent & Multi-Agent System + +### Claude Code: AgentTool + +**Spawning modes** (`AgentTool.tsx`, lines 686-1200): + +1. **Synchronous**: Run agent inline, block parent, return result +2. **Asynchronous**: Launch background agent, return immediately, inject result when done +3. **Remote**: Teleport to CCR environment (cloud execution) +4. **Auto-background**: Start synchronous, auto-migrate to background after 120s + +**Agent types** (built-in): +- **General Purpose**: Full read/write access +- **Explore**: Read-only code exploration +- **Plan**: Read-only architecture & design +- **Verification**: Adversarial testing (tries to break the implementation) +- **Claude Code Guide**: Documentation specialist +- **Fork**: Inherits parent's system prompt (cache-sharing optimization) +- **Custom**: Loaded from `~/.claude/agents/` as markdown with frontmatter + +**Worktree isolation** (`EnterWorktreeTool`): +```typescript +const worktreeInfo = await createAgentWorktree(slug); +// Agent works in isolated git branch +// On completion: check for changes +// - No changes → clean up worktree +// - Has changes → preserve with branch name +``` + +**Agent communication**: +- `SendMessageTool`: Agents send messages to each other by ID +- `TaskNotification` XML in user messages (coordinator pattern) +- Scratchpad directory for durable cross-worker state + +### KosmoKrator: SubagentOrchestrator + +**SubagentOrchestrator.php** (224 lines): +- Manages agent futures using Amp fibers +- Dependency resolution: agents wait for dependencies before starting +- Group-based sequential execution via `LocalSemaphore(1)` +- Background mode: results stored in `pendingResults`, injected when parent checks + +**SubagentFactory.php** (163 lines): +- Creates fresh `AgentLoop` instances with scoped tool registry +- Builds system prompt: base + type suffix + environment context +- If `canSpawn()`: registers recursive SubagentTool +- Mode mapping: General→Edit, Explore→Ask, Plan→Plan +- Hardcoded subagent pruner: `ContextPruner(20_000, 10_000)` + +**AgentContext.php** (54 lines): +- Immutable context traveling down the tree +- `canSpawn()`: `depth < maxDepth - 1` +- `childContext()`: validates type inheritance, increments depth + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Agent types** | 7 built-in + custom from files | 3 (General, Explore, Plan) | +| **Custom agents** | `~/.claude/agents/` markdown files | Not supported | +| **Verification agent** | Adversarial tester with strict output format | Not supported | +| **Worktree isolation** | Git worktree per agent | Not supported | +| **Inter-agent messaging** | SendMessageTool | Dependency results appended to task | +| **Auto-backgrounding** | After 120s, migrate sync→async | Not supported | +| **Agent colors** | Unique color per agent in UI | Not supported | +| **Max depth** | Configurable (default 3) | Configurable (default 3) | +| **Coordinator mode** | Multi-worker orchestration with task notifications | Not supported | + +### Adoptable Patterns + +1. **Custom agent definitions**: Load from `~/.kosmokrator/agents/` as markdown with frontmatter: + ```yaml + --- + name: reviewer + description: Code review specialist + type: explore + model: inherit + when-to-use: When the user asks for a code review + --- + You are a code review specialist. Focus on... + ``` + +2. **Verification agent**: An adversarial testing agent that tries to break implementations. Very powerful for quality assurance. System prompt enforces: run commands (don't just read code), structured output format with Command/Output/Result blocks, explicit VERDICT line. + +3. **Worktree isolation**: Create a `GitWorktreeTool` that creates temporary worktrees for experimental work. PHP's `Process` class can run `git worktree add/remove`. + +4. **Auto-backgrounding**: After N seconds of a synchronous subagent running, automatically migrate to background mode. Requires the Amp fiber to support mid-execution mode switch. + +5. **Agent color assignment**: Assign unique colors from `Theme` palette per agent depth/ID. Small UX win for visual differentiation. + +--- + +## 7. Context Management & Compaction + +### Claude Code: 5-Layer Strategy + +Claude Code has five layers of context pressure relief, applied in order: + +#### Layer 1: Microcompaction (cache-based) +- Uses Anthropic API's `cache_edits` to delete individual tool results without invalidating the cached prompt prefix +- Per-tool-result targeting: FILE_READ, SHELL, GREP, GLOB, WEB_SEARCH, WEB_FETCH, FILE_EDIT, FILE_WRITE results clearable +- Model-specific: only Claude Sonnet/Opus support cache editing +- Main thread only (subagents excluded) + +#### Layer 2: Time-based Microcompaction +- Trigger: `(now - lastAssistantMessage) > 60 minutes` (server cache TTL) +- Action: Clear tool results except 5 most recent +- Sentinel: `'[Old tool result content cleared]'` +- Rationale: after 60min the server cache is cold anyway, so clearing stale results costs nothing + +#### Layer 3: Context Collapse (feature-gated) +- Advanced selective message archiving that preserves granular detail longer +- Commit point: 90% of effective context +- Blocking spawn threshold: 95% +- When enabled, auto-compaction is disabled to prevent racing + +#### Layer 4: Auto-Compaction (LLM summarization) +- **Threshold**: `effectiveContextWindow - 13,000` tokens (~93% of usable window) +- **Effective window**: `contextWindowSize - min(maxOutputTokens, 20,000)` (reserves summary output budget) +- **Circuit breaker**: Stops after 3 consecutive failures +- **Post-compaction restoration**: + - Re-attach up to 5 recently-read files (50K token budget, 5K per file) + - Re-inject recently-used skills (25K token budget, 5K per skill) + - Preserve async agent attachments and plan mode state + +#### Layer 5: Session Memory Compaction +- Background extraction that summarizes old conversation segments +- Config: min 10K tokens preserved, max 40K, min 5 text-block messages +- Preserves API invariants (tool_use/tool_result pairing, thinking block grouping) + +### KosmoKrator: 3-Layer Strategy + +#### Layer 1: ToolResultDeduplicator +Three-tier deduplication: +1. **Exact match**: Same tool, args, result hash → `'[Superseded — identical result]'` +2. **Stale after edit**: File read superseded by write + later re-read → `'[Superseded — file was re-read after modification]'` +3. **Subset subsumption**: Grep on file subsumed by later full file_read → `'[Superseded — content included in later file_read]'` + +#### Layer 2: ContextPruner +- Protects last 2 user turns (40K tokens default) +- Replaces older tool results with `'[Old tool result content cleared]'` +- Only prunes if savings >= 20K tokens +- Applied after deduplication + +#### Layer 3: ContextCompactor +- **Threshold**: 60% of context window (configurable) +- LLM summarization with dedicated compaction prompt +- Keeps last 3 user turns (configurable) +- Formats messages for compaction (truncates each to 2000 chars, total cap 100K chars) +- Also extracts durable memories (project, user, decision types) from summary + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Layers** | 5 | 3 | +| **Trigger threshold** | ~93% of usable window | 60% of context window | +| **Post-compaction restoration** | 5 files (50K), skills (25K), agent state | None | +| **Cache-aware compaction** | Yes (cache_edits API, time-based clearing) | No | +| **Compaction prompt** | Detailed 9-section prompt with structured output | Simple summarization prompt | +| **Memory extraction** | Separate background agent after each turn | During compaction only | +| **Circuit breaker** | 3 consecutive failures → stop | None | +| **Deduplication** | Basic (per tool name) | Advanced 3-tier (exact, stale, subsumption) | + +### Adoptable Patterns + +1. **Post-compaction file restoration** (HIGH PRIORITY): After compacting, re-read and attach the most recently-read files. This prevents the common failure mode where the agent "forgets" what files it was working with after compaction. + ```php + // In ContextCompactor::compact(): + $recentFiles = $this->extractRecentFileReads($history, limit: 5, tokenBudget: 50000); + foreach ($recentFiles as $file) { + $content = substr(file_get_contents($file), 0, 5000 * 4); // ~5K tokens + $summary .= "\n\n## Recently read: {$file}\n```\n{$content}\n```"; + } + ``` + +2. **Post-compaction instruction re-injection**: Re-inject KOSMOKRATOR.md instructions after compaction since they may have been summarized away. + +3. **Circuit breaker**: Stop auto-compacting after 3 consecutive failures. Add a `$consecutiveCompactFailures` counter to `AgentLoop`. + +4. **Raise compaction threshold**: 60% is conservative. Claude Code uses ~93%. Consider raising to 75-80% to preserve more context before compacting. + +5. **Time-based result clearing**: If a conversation has been idle for >60 minutes, clear old tool results on resume (they're stale anyway). Simple timestamp check in `AgentLoop::preFlightContextCheck()`. + +--- + +## 8. Token Estimation & Budget + +### Claude Code + +**Estimation formula** (`tokenEstimation.ts`): +- Text: `length / 4` bytes per token (default) +- JSON files: `length / 2` (denser tokenization) +- Images/documents: 2000 tokens flat estimate +- Message-level padding: `ceil(total * 4/3)` (33% conservative multiplier) + +**Budget tracking** (`tokenBudget.ts`): +``` +COMPLETION_THRESHOLD = 0.9 // Stop at 90% budget +DIMINISHING_THRESHOLD = 500 // Tokens per turn threshold +Detection: 3+ continuations AND last 2 deltas both < 500 tokens +``` + +**Continuation logic**: +- Continue if: under 90% budget AND making progress +- Stop if: diminishing returns (3+ turns, <500 tokens/turn) OR any prior continuation +- Nudge messages tell the LLM remaining budget + +### KosmoKrator + +**Estimation formula** (`TokenEstimator.php`): +- Text: `ceil(mb_strlen($text) / 4)` — 4 characters per token +- No file-type-specific adjustment +- No padding multiplier +- No budget tracking or continuation logic + +### Adoptable Patterns + +1. **JSON-specific estimation**: Use `length / 2` for JSON content (important for tool results which are often JSON). + +2. **Conservative padding**: Apply a 1.33x multiplier to total estimates. Token estimation is inherently imprecise; padding prevents unexpected overflows. + +3. **Budget tracking**: Optional feature for cost-conscious users. Track cumulative tokens per turn, stop if diminishing returns detected. + +4. **Diminishing returns detection**: If the agent has been running for 3+ turns and the last 2 turns produced <500 tokens each, it's likely stuck in a loop. Inject a "you seem stuck, consider wrapping up" nudge. + +--- + +## 9. Permission System + +### Claude Code: Multi-Source Rules + +**Permission modes**: +- `default` — Prompt for all 'ask' decisions +- `acceptEdits` — Auto-allow file edits in CWD, prompt elsewhere +- `bypassPermissions` — Auto-allow all (except deny rules and safety checks) +- `auto` — AI classifier decides (ANT-only, uses transcript/bash classifier) +- `plan` — Shows action plan instead of executing +- `dontAsk` — Silently deny all 'ask' decisions + +**Rule sources** (8 levels, priority order): +`policySettings > flagSettings > projectSettings > localSettings > userSettings > cliArg > command > session` + +**Rule format**: `ToolName(content)` with wildcard support: +- `Bash(npm *)` — glob pattern, matches any npm command +- `Bash(npm:*)` — legacy prefix syntax +- `Bash(curl https://\*.com)` — escaped asterisk +- `mcp__server1__*` — MCP server-level rule +- `Agent(Explore)` — deny specific agent type + +**Evaluation order** (`permissions.ts` lines 1158-1320): +1. Check DENY rules (absolute, no override) +2. Check entire tool ASK rule +3. Call `tool.checkPermissions()` (tool-specific logic) +4. Check mode (bypass, acceptEdits, etc.) +5. Check ALLOW rules +6. Convert passthrough to ask +7. Apply dontAsk → deny conversion +8. Apply auto → classifier +9. Fall back to permission prompt + +**Session grants**: In-memory, non-persisted rules stored in `alwaysAllowRules['session']`. Discarded when session ends. + +**Denial tracking** (auto mode): +- `consecutiveDenials >= 3` OR `totalDenials >= 20` → fall back to user prompting +- Reset consecutive on allow, increment both on deny + +### KosmoKrator: 3-Mode System + +**Permission modes** (`PermissionMode`): +- `Guardian` — Heuristic auto-approve (safe reads + project-scoped writes + safe bash) +- `Argus` — Always ask +- `Prometheus` — Auto-approve everything + +**Rule evaluation** (`PermissionEvaluator.php` lines 20-71): +1. Blocked paths check (absolute deny) +2. Session grants check +3. Rule evaluation (first matching rule wins) +4. Mode-specific handling: + - Prometheus: auto-approve Ask + - Guardian: delegate to `GuardianEvaluator::shouldAutoApprove()` + - Argus: return Ask + +**Guardian heuristics** (`GuardianEvaluator.php`): +- Always safe: `file_read`, `glob`, `grep`, task tools, memory tools +- File writes safe if inside project root +- Bash safe if: no shell metacharacters (`/[;&|`$><\n]/`) AND not matching mutative patterns +- Mutative patterns: `rm`, `mv`, `git commit`, `npm install`, `docker`, `kubectl`, etc. + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Modes** | 6 | 3 | +| **Rule sources** | 8 levels with priority | Config + session grants | +| **Wildcard rules** | Glob patterns (`npm *`) | Static pattern matching | +| **AI classifier** | Yes (auto mode) | No | +| **Safety checks** | Bypass-immune (always prompt for .git/, .claude/, shell configs) | Blocked paths only | +| **Denial tracking** | Consecutive + total limits | None | +| **Zsh builtins** | Blocked (`zmodload`, `sysopen`, `ztcp`, etc.) | Not blocked | + +### Adoptable Patterns + +1. **Wildcard permission rules** (HIGH PRIORITY): Add glob pattern matching to `PermissionRule::matches()`. This enables rules like "allow all git commands" (`Bash(git *)`) or "allow all npm scripts" (`Bash(npm run *)`). + +2. **Bypass-immune safety checks**: Always prompt for operations on `.git/`, `.kosmokrator/`, shell config files (`.bashrc`, `.zshrc`, `.profile`), regardless of permission mode. + +3. **Zsh builtins blocklist**: Add to `GuardianEvaluator`. These builtins can bypass sandboxing: + ```php + private const ZSH_DANGEROUS = [ + 'zmodload', 'emulate', 'sysopen', 'sysread', 'syswrite', + 'sysseek', 'zpty', 'ztcp', 'zsocket', + 'zf_rm', 'zf_mv', 'zf_ln', 'zf_chmod', 'zf_chown', + 'zf_mkdir', 'zf_rmdir', 'zf_chgrp', + ]; + ``` + +4. **`dontAsk` mode equivalent**: Useful for fully automated/CI pipelines where there's no user to prompt. Silently deny rather than hanging. + +--- + +## 10. Hook System + +### Claude Code: Shell Command Hooks + +Claude Code supports external shell commands that execute in response to agent events. + +**Hook event types** (`types/hooks.ts`): +- `PreToolUse` — Before tool execution (can block, modify input, add context) +- `PostToolUse` — After tool success +- `PostToolUseFailure` — After tool failure +- `PermissionDenied` — Auto-mode classifier denied +- `PermissionRequest` — Permission prompt triggered +- `Notification` — Notification event +- `SessionStart` — Session initialization +- `UserPromptSubmit` — User message submitted +- `FileChanged` — Watched file changed +- `CwdChanged` — Working directory changed +- `SubagentStart` — Subagent spawned +- `WorktreeCreate` — Worktree created + +**Hook output** (PreToolUse example): +```typescript +{ + permissionDecision?: 'approve' | 'block', + permissionDecisionReason?: string, + updatedInput?: Record, + additionalContext?: string, +} +``` + +**Timeout**: 10 minutes for tool hooks, 1.5 seconds for session-end hooks. + +**Configuration**: In `settings.json`: +```json +{ + "hooks": { + "PreToolUse": [{ + "matcher": { "tool_name": "Bash" }, + "command": "~/.claude/hooks/lint-bash.sh" + }] + } +} +``` + +### KosmoKrator: No Hook System + +KosmoKrator has no equivalent hook system. Permission evaluation is the closest analog, but it doesn't support external command execution or input modification. + +### Adoptable Pattern + +A hook system is very powerful for customization without code changes. Implementation: + +```yaml +# ~/.kosmokrator/hooks.yaml +hooks: + PreToolUse: + - matcher: { tool_name: "bash" } + command: "~/.kosmokrator/hooks/validate-bash.sh" + timeout: 60 + PostToolUse: + - matcher: { tool_name: "file_edit" } + command: "~/.kosmokrator/hooks/format-on-save.sh" + UserPromptSubmit: + - command: "~/.kosmokrator/hooks/log-prompt.sh" +``` + +The hook receives JSON on stdin (tool name, input, context) and outputs JSON to stdout (approve/block/modify). This enables linting, formatting, logging, and custom approval workflows. + +--- + +## 11. Memory System + +### Claude Code: File-Based Persistent Memory + +**Directory structure**: +``` +~/.claude/projects//memory/ +├── MEMORY.md (index, max 200 lines / 25KB, always loaded) +├── user_role.md (individual memory files with frontmatter) +├── feedback_testing.md +└── project_goal.md +``` + +**Memory frontmatter format**: +```markdown +--- +name: {{memory name}} +description: {{one-line hook for relevance matching}} +type: {{user | feedback | project | reference}} +--- +{{content — for feedback/project: rule/fact, **Why:** line, **How to apply:** line}} +``` + +**Memory types** (4 categories): +1. **user**: Role, goals, preferences, knowledge level +2. **feedback**: Guidance on approach (corrections AND confirmations) +3. **project**: Ongoing work, goals, deadlines (not derivable from code) +4. **reference**: Pointers to external systems (Linear, Grafana, Slack) + +**What NOT to save**: Code patterns, git history, debugging recipes, CLAUDE.md content, ephemeral task details. + +**Extraction**: Background agent runs after each turn (feature-gated): +- Max 5 turns per extraction +- Tool restrictions: Read, Grep, Glob, read-only Bash, Edit/Write to memory dir only +- Throttled: every N turns (default 1) +- Pre-injects manifest of existing memories to avoid duplicates +- Analytics tracked: tokens, files written, duration + +**Memory mechanics prompt**: A detailed instruction set injected into the system prompt that teaches the LLM how to proactively save, update, and recall memories. This is the mechanism that makes the LLM autonomously manage its own memory. + +### KosmoKrator: SQLite-Based Memory + +**Storage**: `memories` table in SQLite database +- Columns: id, type, title, content, project, session_id, created_at +- Types: `project`, `user`, `decision`, `compaction` + +**Tools**: `MemorySaveTool`, `MemorySearchTool` + +**Extraction**: During compaction only (in `ContextCompactor::extractMemories()`) +- Calls LLM with `MEMORY_EXTRACTION_PROMPT` +- Parses JSON array: `[{type, title, content}]` +- Validates types, saves to repository + +**Injection**: `MemoryInjector::format()` groups by type into markdown sections + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Storage** | File-based (git-trackable, human-editable) | SQLite rows | +| **Index** | MEMORY.md always loaded in context | All memories injected in system prompt | +| **Types** | 4 (user, feedback, project, reference) | 4 (user, project, decision, compaction) | +| **Extraction trigger** | After each turn (background) | During compaction only | +| **Memory mechanics prompt** | Yes (teaches LLM to proactively save) | No | +| **Relevance decay** | Age tracking, staleness warnings | None | +| **Team sync** | Multi-agent memory sharing (feature-gated) | None | + +### Adoptable Patterns + +1. **Memory mechanics prompt** (HIGH PRIORITY): The single most impactful addition. Claude Code's memory prompt teaches the LLM: + - What types of information to save + - When to save (corrections, confirmations, learning about user) + - What NOT to save (code patterns, git history, debugging recipes) + - How to save (file format, MEMORY.md index) + - When to access memories + - When to verify before recommending + + KosmoKrator should inject an equivalent prompt section that teaches the LLM to use `memory_save` and `memory_search` proactively. + +2. **Post-turn extraction**: Don't wait for compaction to extract memories. Run a lightweight extraction after each turn (or every N turns) to capture feedback and decisions before they're compacted away. + +3. **Feedback type**: Rename `decision` to `feedback` and add explicit guidance about saving both corrections AND confirmations. The body structure `rule → Why → How to apply` is very effective. + +4. **Reference type**: Add for external system pointers (Jira boards, Grafana dashboards, Slack channels). + +--- + +## 12. Skills System + +### Claude Code: Loadable Prompt Templates + +**BundledSkillDefinition** (`bundledSkills.ts`): +```typescript +{ + name: string + description: string + aliases?: string[] + whenToUse?: string + argumentHint?: string + allowedTools?: string[] + model?: string + context?: 'inline' | 'fork' // fork = isolated sub-agent + agent?: string + files?: Record // Reference files extracted to disk + getPromptForCommand: (args, context) => Promise +} +``` + +**User-defined skills**: Markdown files in `~/.claude/skills/` or `.claude/skills/`: +```markdown +--- +name: review +description: Review code changes for quality +allowed-tools: file_read, grep, glob +context: fork +model: inherit +--- +Review the current git diff for bugs, security issues, and code quality... +``` + +**Skill execution**: Via `SkillTool` — either inline (added to conversation) or forked (isolated sub-agent with own context). + +**Bundled skills include**: `/commit`, `/review-pr`, `/simplify`, `/loop`, `/debug`, `/remember`, `/verify`, `/schedule`, `/claude-api`, `/keybindings`, `/update-config`, and many more. + +### KosmoKrator: Slash Commands + +KosmoKrator has slash commands (`/mode`, `/sessions`, `/resume`, `/settings`, etc.) but these are UI commands, not LLM-driven skills. There's no equivalent of loadable prompt templates. + +### Adoptable Pattern + +A skills system bridges the gap between slash commands and full agent modes: + +```php +// ~/.kosmokrator/skills/review/SKILL.md +// --- +// name: review +// description: Review code changes +// allowed-tools: file_read, grep, glob +// context: fork +// --- +// Review the current git diff for bugs... + +class SkillLoader { + public function loadFromDirectory(string $dir): array; + public function execute(Skill $skill, string $args, AgentLoop $agent): string; +} +``` + +Skills invoked via `/review` would either inject the prompt inline or fork a subagent with the skill's prompt and tool restrictions. This is a powerful extensibility mechanism that users can customize without touching code. + +--- + +## 13. System Prompt Assembly + +### Claude Code: Multi-Part Prompt + +The system prompt is assembled from multiple sources: + +**Static sections** (`prompts.ts`): +1. **Intro**: "You are an interactive agent that helps users with software engineering tasks..." +2. **System**: Tool execution, permission modes, hooks, context compression +3. **Doing tasks**: Engineering best practices, code quality, no unnecessary changes +4. **Executing actions with care**: Reversibility, blast radius, confirmation for risky actions +5. **Using your tools**: Dedicated tools over Bash, parallel calls, task management +6. **Tone and style**: No emojis, concise, file_path:line_number references + +**Dynamic sections**: +- Tool-specific guidance (Agent, Skills, ToolSearch) +- Verification agent contract (if enabled) +- Memory mechanics prompt (if auto-memory enabled) + +**Context layers** (`queryContext.ts`): +- `defaultSystemPrompt[]` — Static prompt array +- `userContext.claudeMd` — CLAUDE.md files from directory hierarchy +- `userContext.currentDate` — "Today's date is YYYY-MM-DD" +- `systemContext.gitStatus` — Branch, status, recent commits + +**Cache boundary** (`SYSTEM_PROMPT_DYNAMIC_BOUNDARY`): +Everything before this marker is globally cacheable. Everything after is session-specific. + +### KosmoKrator: Prompt Assembly + +**AgentCommand.php** (lines 131-134): +```php +$systemPrompt = $basePrompt; // From config +$systemPrompt .= MemoryInjector::format($memories); +$systemPrompt .= InstructionLoader::gather(); +$systemPrompt .= EnvironmentContext::gather(); +``` + +**AgentLoop** refreshes system prompt each turn: +```php +$prompt = $this->baseSystemPrompt; +$prompt .= $this->mode->systemPromptSuffix(); +$prompt .= $this->formatTaskContext(); +``` + +### Key Differences + +| Aspect | Claude Code | KosmoKrator | +|--------|-------------|-------------| +| **Base prompt size** | ~914 lines, very detailed | Configurable, shorter | +| **Tool prompt contributions** | Each tool can inject via `prompt()` | None | +| **Memory mechanics** | Full teaching prompt for auto-memory | None | +| **Cache boundary** | Explicit marker for API caching | None | +| **Dynamic refresh** | Memoized context (cached per conversation) | Refreshed each turn | +| **Git status** | Branch, status (2000 char cap), 5 recent commits | Branch, root | + +### Adoptable Patterns + +1. **Memory mechanics prompt injection**: Add a dedicated section teaching the LLM how to use `memory_save` and `memory_search` proactively. + +2. **Tool prompt contributions**: Add `systemPromptContribution(): ?string` to `ToolInterface`. The SubagentTool could explain type hierarchy and usage patterns. + +3. **Richer git context**: Include `git status --short` (capped at 2000 chars) and last 5 commit messages in the system prompt. Gives the LLM better awareness of the project state. + +--- + +## 14. Session & State Management + +### Claude Code: File-Based Sessions + +- One JSON file per session, written fire-and-forget via `recordTranscript()` +- `history.jsonl` for conversation history (max 100 entries) +- Pasted content stored externally when >1KB +- Session resume via message deserialization from log files +- Remote session support via WebSocket (`/v1/sessions/ws/{id}/subscribe`) + +### KosmoKrator: SQLite Sessions + +- `sessions` table: id, project, title, model, created_at, updated_at +- `messages` table: role, content, tool_calls, tool_results, tokens +- `settings` table: scope-based KV store (global, project-specific) +- `memories` table: type, title, content, project, session_id + +### Assessment + +KosmoKrator's SQLite approach is actually superior for: +- Atomic writes (WAL mode) +- Efficient queries (session listing, message search) +- Structured data (vs JSON parsing) +- Concurrent access safety + +No changes needed here. SQLite is the right choice. + +--- + +## 15. Task System + +### Claude Code + +**Task types**: `local_bash`, `local_agent`, `remote_agent`, `in_process_teammate`, `local_workflow`, `monitor_mcp`, `dream` + +**Task statuses**: `pending`, `running`, `completed`, `failed`, `killed` + +**Tools**: TaskCreateTool, TaskUpdateTool, TaskListTool, TaskGetTool, TaskOutputTool, TaskStopTool + +**Features**: +- Blocking relationships (addBlocks, addBlockedBy) +- Owner assignment for multi-agent teams +- Mailbox communication for teammates +- Auto-expand UI on task create/update +- Task completion hooks + +### KosmoKrator + +**Task statuses**: `Pending`, `InProgress`, `Completed`, `Cancelled` + +**Tools**: TaskCreateTool, TaskUpdateTool, TaskListTool, TaskGetTool + +**Features**: +- Parent-child relationships +- Blocking relationships (bidirectional) +- Auto-complete parents when all children terminal +- Tree rendering (text + ANSI) +- In-memory storage (no persistence) + +### Assessment + +KosmoKrator's task system is well-designed and covers the essential features. Claude Code's additions (task types, owner assignment, mailbox communication) are mostly relevant for multi-agent teams, which is a future feature. No immediate changes needed. + +--- + +## 16. UI & Rendering + +### Claude Code: React/Ink Custom Framework + +Claude Code has essentially built a **custom terminal GUI framework**: +- Custom React reconciler for terminal rendering +- Yoga-based flexbox layout engine +- Double-buffered frame rendering with diff optimization +- Mouse tracking (mode-1003), hit testing, text selection +- Bidirectional text support +- Scrollable containers, buttons, OSC 8 hyperlinks +- Keyboard chord parsing with configurable bindings +- Search highlighting across screen buffer +- Alternate screen mode (full-screen) + +This is approximately **10,000+ lines of UI infrastructure**. + +### KosmoKrator: Symfony TUI + ANSI + +- **TuiRenderer**: Symfony TUI widgets (PlanApprovalWidget, QuestionWidget, CollapsibleWidget, etc.) +- **AnsiRenderer**: Pure ANSI escape codes, readline input, MarkdownToAnsi for formatting +- **Theme**: Shared color palette, tool icons, planetary symbols +- **MarkdownToAnsi**: CommonMark + GFM extensions, Tempest Highlighter for code blocks + +### Assessment + +KosmoKrator's dual-renderer approach is pragmatic and effective. Trying to replicate Claude Code's custom Ink framework would be massive effort for marginal gain. Symfony TUI provides adequate interactivity. + +### Adoptable Patterns + +1. **Cost display**: Show running cost in the context bar. Use `ModelCatalog` pricing data: + ``` + $cost = ($tokensIn / 1_000_000) * $inputPrice + ($tokensOut / 1_000_000) * $outputPrice; + ``` + +2. **Collapsible tool output**: Claude Code collapses search/read tool results into summaries ("Found 3 files in 12ms"). KosmoKrator has `CollapsibleWidget` in TUI mode — ensure it's used for all tool results. + +3. **Thinking duration display**: Show "Thinking... (2.3s)" when the LLM is processing. Claude Code shows thinking state for minimum 2 seconds, then displays the duration. + +--- + +## 17. Cost Tracking + +### Claude Code + +**Formula** (`modelCost.ts`): +``` +cost = (input / 1M) * inputPrice + + (output / 1M) * outputPrice + + (cacheRead / 1M) * cacheReadPrice + + (cacheCreation / 1M) * cacheWritePrice + + webSearchRequests * webSearchPrice +``` + +**Pricing tiers** (per 1M tokens): +| Model | Input | Output | +|-------|-------|--------| +| Sonnet 4.x | $3 | $15 | +| Opus 4.0/4.1 | $15 | $75 | +| Opus 4.5 | $5 | $25 | +| Opus 4.6 (fast) | $30 | $150 | +| Haiku 3.5 | $0.80 | $4 | +| Haiku 4.5 | $1 | $5 | + +**Display**: On exit, shows total cost, API duration, wall duration, lines added/removed, per-model breakdown. + +### KosmoKrator + +KosmoKrator has `ModelCatalog` with pricing data and tracks `sessionTokensIn`/`sessionTokensOut` in `AgentLoop`, but doesn't calculate or display USD cost. + +### Adoptable Pattern + +Add cost calculation and display: +```php +$cost = ($this->sessionTokensIn / 1_000_000) * $this->models->inputPrice($model) + + ($this->sessionTokensOut / 1_000_000) * $this->models->outputPrice($model); +$this->ui->showStatus(sprintf('Session cost: $%.4f', $cost)); +``` + +--- + +## 18. Command / Slash Command System + +### Claude Code: ~100+ Commands + +Categories: +- **Prompt commands**: Invoke model with skill prompt (`/commit`, `/review`, `/simplify`, `/loop`) +- **Action commands**: Immediate execution (`/clear`, `/exit`, `/config`, `/model`, `/compact`) +- **Internal commands**: Developer-only (`/breakCache`, `/mockLimits`, `/debugToolCall`) + +Command availability filtered by: feature flags, user type (ant/external), subscription level. + +### KosmoKrator: ~15 Commands + +- `/mode`, `/clear`, `/compact`, `/sessions`, `/resume`, `/new`, `/quit` +- `/memories`, `/forget`, `/settings` +- `/guardian`, `/argus`, `/prometheus` +- `/tasks-clear`, `/theogony`, `/seed` + +### Assessment + +KosmoKrator has the essential commands. Additional commands can be added incrementally as features are implemented (skills, MCP, etc.). + +--- + +## 19. Keybinding System + +### Claude Code + +Fully configurable keybindings via `~/.claude/keybindings.json`: +- Context-aware: Global, Chat, Autocomplete, Confirmation, Help, Transcript, etc. +- Actions: `app:interrupt`, `app:exit`, `app:toggleTodos`, `app:toggleTranscript`, etc. +- Chord support: `ctrl+k ctrl+s` (multi-key sequences) +- Special keys: `esc`, `return`, `space`, arrows +- User bindings merged with defaults + +### KosmoKrator + +No keybinding customization. + +### Adoptable Pattern + +Medium priority. Add `~/.kosmokrator/keybindings.yaml` for common actions: +```yaml +keybindings: + chat: + submit: ctrl+return + cancel: ctrl+c + mode_cycle: shift+tab +``` + +--- + +## 20. MCP Integration + +### Claude Code + +Full Model Context Protocol support: +- Transport types: `stdio`, `sse`, `http`, `ws`, `sdk` +- OAuth token refresh for authenticated servers +- Tool integration: each MCP tool becomes a `mcp__server__action` tool +- Resource listing and reading +- Skill builders from MCP resources +- Channel permissions per server +- Config scopes: local, user, project, dynamic, enterprise, managed + +### KosmoKrator + +No MCP support. + +### Adoptable Pattern + +MCP integration is a HIGH PRIORITY addition. PHP MCP client libraries exist. Start with `stdio` transport (simplest) to connect to local MCP servers. Each server's tools register into the `ToolRegistry` with the `mcp__server__action` naming convention. + +--- + +## 21. Feature Comparison Matrix + +| Feature | Claude Code | KosmoKrator | Gap | +|---------|:-----------:|:-----------:|:---:| +| **Core agent loop** | Full | Full | - | +| **Streaming responses** | Full | None | HIGH | +| **Tool system** | 50+ tools | 10 tools | MEDIUM | +| **Subagent system** | Full + custom | Full (3 types) | LOW | +| **Context compaction** | 5 layers | 3 layers | MEDIUM | +| **Post-compact restoration** | Full | None | HIGH | +| **Token budget tracking** | Full | None | LOW | +| **Permission system** | 6 modes + wildcards | 3 modes | MEDIUM | +| **Hook system** | Full (12 event types) | None | MEDIUM | +| **Memory system** | File-based + extraction | SQLite + compaction-only | MEDIUM | +| **Memory mechanics prompt** | Full | None | HIGH | +| **Skills system** | Full (bundled + user) | None | HIGH | +| **MCP integration** | Full | None | HIGH | +| **Web fetch** | Full | None | HIGH | +| **Web search** | Full | None | HIGH | +| **PDF/Image reading** | Full | None | MEDIUM | +| **Cost tracking display** | Full | Partial (no display) | LOW | +| **Keybinding customization** | Full | None | LOW | +| **Git worktree isolation** | Full | None | MEDIUM | +| **Custom agent definitions** | Full | None | MEDIUM | +| **Verification agent** | Full | None | MEDIUM | +| **Voice mode** | Full | None | LOW | +| **Remote sessions** | Full | None | LOW | +| **Deferred tool loading** | Full | None | LOW | +| **File state tracking (edits)** | Full | None | MEDIUM | +| **Session persistence** | JSON files | SQLite | KosmoKrator better | +| **Config system** | JSON | YAML (multi-level) | KosmoKrator better | +| **Dual renderer** | React/Ink | Symfony TUI + ANSI | Parity | +| **Task system** | Full + teams | Full (in-memory) | Parity | +| **Mythology theming** | None | Full | KosmoKrator unique | + +--- + +## 22. Concrete Thresholds & Constants + +### Claude Code + +| Constant | Value | Location | +|----------|-------|----------| +| Auto-compact buffer | 13,000 tokens | `autoCompact.ts` | +| Auto-compact threshold | ~93% of effective window | Calculated | +| Warning threshold buffer | 20,000 tokens | `autoCompact.ts` | +| Max compaction output | 20,000 tokens | `autoCompact.ts` | +| Max compaction failures | 3 consecutive | `autoCompact.ts` | +| Post-compact file budget | 50,000 tokens | `compact.ts` | +| Post-compact file cap | 5,000 tokens/file | `compact.ts` | +| Post-compact max files | 5 | `compact.ts` | +| Post-compact skill budget | 25,000 tokens | `compact.ts` | +| Post-compact skill cap | 5,000 tokens/skill | `compact.ts` | +| Session memory min tokens | 10,000 | `sessionMemoryCompact.ts` | +| Session memory max tokens | 40,000 | `sessionMemoryCompact.ts` | +| Session memory min messages | 5 | `sessionMemoryCompact.ts` | +| Budget completion threshold | 90% | `tokenBudget.ts` | +| Diminishing returns threshold | 500 tokens/turn | `tokenBudget.ts` | +| Diminishing detection | 3+ continuations | `tokenBudget.ts` | +| Time-based MC gap | 60 minutes | `timeBasedMCConfig.ts` | +| Time-based MC keep recent | 5 tool results | `timeBasedMCConfig.ts` | +| Text token estimate | length / 4 | `tokenEstimation.ts` | +| JSON token estimate | length / 2 | `tokenEstimation.ts` | +| Message token padding | 4/3x multiplier | `tokenEstimation.ts` | +| Image/document tokens | 2,000 flat | `tokenEstimation.ts` | +| Stream idle timeout | 90,000 ms | `claude.ts` | +| Agent auto-background | 120,000 ms | `AgentTool.tsx` | +| Bash progress threshold | 2,000 ms | `BashTool.tsx` | +| Grep default head limit | 250 results | `GrepTool.ts` | +| WebSearch max uses | 8 per request | `WebSearchTool.ts` | +| WebFetch cache TTL | 15 minutes | `WebFetchTool.ts` | +| MEMORY.md max lines | 200 | `memdir.ts` | +| MEMORY.md max bytes | 25,000 | `memdir.ts` | +| Memory scan max files | 200 | `memoryScan.ts` | +| History max items | 100 | `history.ts` | +| Denial max consecutive | 3 | `denialTracking.ts` | +| Denial max total | 20 | `denialTracking.ts` | +| Tool hook timeout | 10 minutes | `hooks.ts` | +| Session-end hook timeout | 1,500 ms | `hooks.ts` | + +### KosmoKrator + +| Constant | Value | Location | +|----------|-------|----------| +| Compact threshold | 60% of context window | `ContextCompactor.php` | +| Compact keep recent | 3 user turns | `ContextCompactor.php` | +| Compact max format chars | 100,000 | `ContextCompactor.php` | +| Pruner protect tokens | 40,000 | `ContextPruner.php` | +| Pruner min savings | 20,000 | `ContextPruner.php` | +| Subagent pruner protect | 20,000 | `SubagentFactory.php` | +| Subagent pruner min savings | 10,000 | `SubagentFactory.php` | +| Token estimate | 4 chars/token | `TokenEstimator.php` | +| Output max lines | 2,000 | `OutputTruncator.php` | +| Output max bytes | 50,000 | `OutputTruncator.php` | +| Truncation cleanup age | 86,400s (1 day) | `OutputTruncator.php` | +| Bash timeout | 120s | Configurable | +| Grep timeout | 30s | `GrepTool.php` | +| Grep max matches | 50 | `GrepTool.php` | +| HTTP transfer timeout | 600s | `AsyncLlmClient.php` | +| HTTP inactivity timeout | 300s | `AsyncLlmClient.php` | +| Retry cap | 300s | `AsyncLlmClient.php` | +| File read large threshold | 10 MB | `FileReadTool.php` | +| File read max lines | 5,000 | `FileReadTool.php` | +| Memory warning | 50 MB | `AgentLoop.php` | +| Context overflow retries | 3 | `AgentLoop.php` | +| Subagent max depth | 3 | Configurable | +| Guardian shell metachar pattern | `/[;&\|`$><\n]/` | `GuardianEvaluator.php` | +| Pre-flight check | 80% of context | `AgentLoop.php` | + +--- + +## 23. Inspiration Roadmap + +### Tier 1 — High Impact, Moderate Effort + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 1 | **Streaming LLM responses** | Medium | Very High | SSE streaming in AsyncLlmClient, renderer updates for partial text | +| 2 | **WebFetch tool** | Low | High | URL → markdown via `league/html-to-markdown`, preapproved hosts | +| 3 | **WebSearch tool** | Low | High | Integrate Tavily/Brave/SerpAPI | +| 4 | **Post-compaction file restoration** | Low | High | Re-attach 5 recently-read files after compaction | +| 5 | **Memory mechanics prompt** | Low | High | Teach LLM to proactively use memory_save/memory_search | +| 6 | **Skills system** | Medium | High | Loadable markdown prompts from ~/.kosmokrator/skills/ | + +### Tier 2 — Medium Impact, Moderate Effort + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 7 | **MCP client integration** | High | High | PHP MCP client for external tool servers | +| 8 | **Wildcard permission rules** | Low | Medium | Glob patterns in PermissionRule (e.g., `Bash(git *)`) | +| 9 | **Hook system** | Medium | Medium | PreToolUse/PostToolUse shell command hooks | +| 10 | **Custom agent definitions** | Low | Medium | ~/.kosmokrator/agents/ markdown files | +| 11 | **Git worktree isolation** | Medium | Medium | EnterWorktreeTool for safe experimentation | +| 12 | **File state tracking** | Low | Medium | Track read files, detect concurrent edits on edit | +| 13 | **Cost display** | Low | Medium | USD cost in context bar | +| 14 | **Post-compaction instruction re-injection** | Low | Medium | Re-inject KOSMOKRATOR.md after compaction | +| 15 | **Verification agent type** | Medium | Medium | Adversarial testing agent | +| 16 | **Deferred tool loading** | Medium | Medium | ToolSearch for MCP tool sets | + +### Tier 3 — Nice to Have + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 17 | **Diminishing returns detection** | Low | Low | Stop after 3+ turns with <500 tokens/turn | +| 18 | **Compaction circuit breaker** | Low | Low | Stop after 3 consecutive failures | +| 19 | **Zsh builtins blocklist** | Low | Low | Block zmodload, sysopen, ztcp etc. | +| 20 | **Agent auto-backgrounding** | Medium | Low | Background long-running subagents after N seconds | +| 21 | **Agent color assignment** | Low | Low | Unique colors per subagent | +| 22 | **Configurable keybindings** | Medium | Low | ~/.kosmokrator/keybindings.yaml | +| 23 | **PDF/Image reading** | Medium | Low | smalot/pdfparser, intervention/image | +| 24 | **GrepTool output modes** | Low | Low | files_with_matches, count, content modes | +| 25 | **Multiline grep** | Low | Low | -U --multiline-dotall flag | +| 26 | **dontAsk permission mode** | Low | Low | Silent deny for CI/automation | +| 27 | **Bypass-immune safety checks** | Low | Low | Always prompt for .git/, .kosmokrator/, shell configs | +| 28 | **Tool `prompt()` contributions** | Low | Low | Tools inject system prompt sections | + +### Tier 4 — Future / Research + +| # | Feature | Effort | Impact | Notes | +|---|---------|--------|--------|-------| +| 29 | AI permission classifier | High | Medium | Auto-approve safe tool calls via LLM | +| 30 | Remote sessions | High | Low | WebSocket-based remote agent control | +| 31 | Voice mode | High | Low | STT/TTS integration | +| 32 | Plugin system | High | Medium | Loadable plugins with custom tools and UI | +| 33 | Context collapse | High | Medium | Advanced granular preservation | +| 34 | Cache-aware compaction | Medium | Medium | Requires Anthropic cache_edits API | + +--- + +## 24. Appendix: File Reference + +### Claude Code Key Files + +| File | Size | Purpose | +|------|------|---------| +| `main.tsx` | 4,683 lines | Application entry point | +| `QueryEngine.ts` | 46KB | Session state, submitMessage() | +| `query.ts` | 68KB | Main loop, API streaming, tool execution | +| `Tool.ts` | 30KB | Tool interface and factory | +| `tools.ts` | — | Tool registration and discovery | +| `query/tokenBudget.ts` | — | Budget tracking and continuation | +| `services/compact/autoCompact.ts` | — | Auto-compaction triggers | +| `services/compact/compact.ts` | — | Compaction algorithm | +| `services/compact/microCompact.ts` | — | Cache-based microcompaction | +| `services/tokenEstimation.ts` | 16KB | Token estimation formulas | +| `services/tools/StreamingToolExecutor.ts` | 531 lines | Concurrent streaming executor | +| `tools/BashTool/BashTool.tsx` | 1,143 lines | Shell execution | +| `tools/FileEditTool/FileEditTool.ts` | 625 lines | String replacement | +| `tools/FileReadTool/FileReadTool.ts` | 1,183 lines | File reading | +| `tools/GrepTool/GrepTool.ts` | — | Ripgrep integration | +| `tools/WebFetchTool/WebFetchTool.ts` | — | URL fetching | +| `tools/WebSearchTool/WebSearchTool.ts` | — | Web search | +| `tools/AgentTool/AgentTool.tsx` | — | Subagent spawning | +| `tools/ToolSearchTool/ToolSearchTool.ts` | — | Deferred tool discovery | +| `skills/bundledSkills.ts` | — | Skill registry | +| `skills/loadSkillsDir.ts` | — | Skill file loader | +| `memdir/memdir.ts` | — | Memory entrypoint | +| `memdir/memoryTypes.ts` | 272 lines | Memory type taxonomy | +| `services/extractMemories/extractMemories.ts` | — | Background extraction | +| `services/extractMemories/prompts.ts` | 154 lines | Extraction prompts | +| `constants/prompts.ts` | 914 lines | System prompt | +| `context.ts` | — | Context assembly | +| `utils/permissions/permissions.ts` | — | Permission evaluation | +| `types/hooks.ts` | — | Hook types | +| `cost-tracker.ts` | — | Cost tracking | +| `state/AppStateStore.ts` | — | Application state | +| `history.ts` | 465 lines | Session history | +| `commands.ts` | 25KB | Command registry | +| `keybindings/schema.ts` | — | Keybinding configuration | + +### KosmoKrator Key Files + +| File | Size | Purpose | +|------|------|---------| +| `src/Agent/AgentLoop.php` | 904 lines | Core REPL | +| `src/Agent/ConversationHistory.php` | 200 lines | Message buffer | +| `src/Agent/ContextCompactor.php` | 250 lines | LLM summarization | +| `src/Agent/ContextPruner.php` | 129 lines | Tool result pruning | +| `src/Agent/ToolResultDeduplicator.php` | 189 lines | 3-tier deduplication | +| `src/Agent/TokenEstimator.php` | 75 lines | Token estimation | +| `src/Agent/OutputTruncator.php` | 87 lines | Output size limiting | +| `src/Agent/SubagentOrchestrator.php` | 224 lines | Multi-agent management | +| `src/Agent/SubagentFactory.php` | 163 lines | Agent creation | +| `src/Agent/AgentContext.php` | 54 lines | Immutable context | +| `src/Agent/EnvironmentContext.php` | 179 lines | Environment detection | +| `src/Agent/InstructionLoader.php` | 113 lines | Instruction discovery | +| `src/Agent/MemoryInjector.php` | 76 lines | Memory formatting | +| `src/LLM/AsyncLlmClient.php` | 291 lines | Async HTTP client | +| `src/LLM/RetryableLlmClient.php` | — | Retry wrapper | +| `src/Tool/ToolRegistry.php` | 93 lines | Tool management | +| `src/Tool/Permission/PermissionEvaluator.php` | 135 lines | Permission system | +| `src/Tool/Permission/GuardianEvaluator.php` | 152 lines | Heuristic safety | +| `src/Tool/Coding/BashTool.php` | 76 lines | Shell execution | +| `src/Tool/Coding/FileEditTool.php` | 73 lines | File editing | +| `src/Tool/Coding/FileReadTool.php` | 117 lines | File reading | +| `src/Tool/Coding/GrepTool.php` | 94 lines | Text search | +| `src/Tool/Coding/SubagentTool.php` | 171 lines | Subagent spawning | +| `src/Session/SessionManager.php` | 290 lines | Session lifecycle | +| `src/Session/MemoryRepository.php` | 144 lines | Memory storage | +| `src/Task/TaskStore.php` | — | Task management | +| `src/Command/AgentCommand.php` | 340 lines | Main entry point | +| `src/Command/SlashCommandRegistry.php` | 83 lines | Command dispatch | +| `src/ConfigLoader.php` | 116 lines | YAML config | +| `src/Kernel.php` | 382 lines | DI container | + +--- + +## Key Takeaway + +KosmoKrator's **core architecture is solid and well-designed**. The agent loop, subagent orchestration with dependency graphs, 3-tier deduplication, permission modes with Guardian heuristics, and dual renderer are all production-quality implementations that compare well to Claude Code's equivalents. + +The main gaps are in **breadth** rather than **depth**: +- **Tools**: Web fetch, web search, MCP, PDF/image reading +- **Streaming**: Real-time LLM response display +- **Context recovery**: Post-compaction file/instruction restoration +- **Extensibility**: Skills, hooks, custom agent definitions +- **Memory**: Proactive extraction and mechanics prompt + +These can all be added incrementally without architectural changes. The Claude Code source provides exact thresholds, algorithms, and prompt templates that can be adapted for PHP implementation. diff --git a/docs/ecosystem/kosmokrator/research/claude-code-architecture.md b/docs/ecosystem/kosmokrator/research/claude-code-architecture.md new file mode 100644 index 0000000..07dc170 --- /dev/null +++ b/docs/ecosystem/kosmokrator/research/claude-code-architecture.md @@ -0,0 +1,2123 @@ +# How Claude Code Works — Architecture Deep Dive + +> A comprehensive visual walkthrough of every major system inside Claude Code, based on reading the full open-sourced TypeScript codebase (1,903 files, 33MB). Covers internal mechanics, exact thresholds, prompts, and design decisions. + +--- + +## Table of Contents + +1. [High-Level Architecture](#1-high-level-architecture) +2. [Startup & Initialization](#2-startup--initialization) +3. [The Agent Loop](#3-the-agent-loop) +4. [Streaming & SSE Pipeline](#4-streaming--sse-pipeline) +5. [Tool System](#5-tool-system) +6. [Tool Implementations](#6-tool-implementations) +7. [System Prompt Assembly](#7-system-prompt-assembly) +8. [Context Management — 5 Layers](#8-context-management--5-layers) +9. [Token Estimation & Budget](#9-token-estimation--budget) +10. [Subagent / Multi-Agent System](#10-subagent--multi-agent-system) +11. [Permission System](#11-permission-system) +12. [Hook System](#12-hook-system) +13. [Memory System](#13-memory-system) +14. [Skills System](#14-skills-system) +15. [Task System](#15-task-system) +16. [Terminal UI Architecture](#16-terminal-ui-architecture) +17. [Cost Tracking](#17-cost-tracking) +18. [MCP Integration](#18-mcp-integration) +19. [Session & State Management](#19-session--state-management) +20. [The Verification Agent](#20-the-verification-agent) + +--- + +## 1. High-Level Architecture + +```mermaid +graph TB + User([User]) --> CLI[main.tsx — Commander.js CLI] + CLI --> Init[init.ts — Setup & Auth] + CLI --> REPL[REPL.tsx — React/Ink Screen] + + REPL --> QE[QueryEngine] + QE --> QL[queryLoop — while true] + + QL --> CTX[Context Management
5 compression layers] + QL --> API[Anthropic API
SSE Streaming] + QL --> STE[StreamingToolExecutor
Concurrent execution] + + STE --> Tools[50+ Tools] + Tools --> Coding[Coding Tools
Bash, Read, Write,
Edit, Grep, Glob] + Tools --> Web[Web Tools
WebFetch, WebSearch] + Tools --> Agent[AgentTool
Subagent spawning] + Tools --> MCPTools[MCP Tools
External servers] + Tools --> TaskTools[Task Tools
Create, Update, List] + Tools --> SkillT[SkillTool
Prompt templates] + Tools --> Misc[Misc Tools
LSP, Notebook, REPL,
Worktree, Sleep, ...] + + Agent --> QE2[Child QueryEngine
Isolated context] + QE2 --> QL2[Child queryLoop] + + CTX --> MC[Microcompaction
cache_edits API] + CTX --> TB[Time-based clearing
60min TTL awareness] + CTX --> CC[Context Collapse
Selective archiving] + CTX --> AC[Auto-Compaction
LLM summarization] + CTX --> SM[Session Memory
Background extraction] + + REPL --> Perm[Permission System
6 modes, wildcards,
AI classifier] + REPL --> Hooks[Hook System
12 event types,
shell commands] + REPL --> Mem[Memory System
File-based, 4 types,
background extraction] + REPL --> Tasks[Task System
7 task types,
spinner integration] + REPL --> State[AppState Store
Pub/sub reactive state] + REPL --> Skills[Skills System
Bundled + user-defined
+ MCP + plugins] + + style QE fill:#4a9eff,color:#fff + style QL fill:#4a9eff,color:#fff + style STE fill:#ff6b6b,color:#fff + style CTX fill:#ffa94d,color:#fff + style Agent fill:#69db7c,color:#fff +``` + +The codebase is roughly organized as: + +| Directory | Purpose | Approx size | +|-----------|---------|-------------| +| `ink/` | Custom React reconciler + terminal rendering engine | ~8,000 lines | +| `tools/` | 50+ tool implementations | ~12,000 lines | +| `services/` | MCP, compact, memory, analytics, OAuth, plugins | ~10,000 lines | +| `utils/` | Permissions, hooks, settings, file ops | ~8,000 lines | +| `components/` | React UI components | ~5,000 lines | +| `screens/` | REPL, Doctor, Resume screens | ~3,000 lines | +| `query.ts` + `QueryEngine.ts` | Core agent loop | ~4,500 lines | +| `constants/` | System prompts, model config, tools config | ~2,000 lines | +| `state/` | App state management | ~1,500 lines | +| `keybindings/` | Keyboard shortcut system | ~1,000 lines | + +--- + +## 2. Startup & Initialization + +The startup sequence in `main.tsx` (4,683 lines) is heavily optimized for speed — several expensive operations run in parallel before imports even finish: + +```mermaid +sequenceDiagram + participant Entry as main.tsx entry + participant MDM as MDM Raw Read + participant KC as Keychain Prefetch + participant CLI as Commander CLI + participant Init as init() + participant REPL as REPL Screen + + Note over Entry: Side-effects fire BEFORE imports complete + Entry->>MDM: startMdmRawRead() — plutil/reg query in subprocess + Entry->>KC: startKeychainPrefetch() — read OAuth + API key + + Note over Entry: Heavy module evaluation (~135ms) + Entry->>CLI: Parse CLI args (Commander.js) + + CLI->>Init: init() + activate Init + Init->>Init: Node.js version check (18+) + Init->>Init: Session ID setup + Init->>Init: Git repo detection + Init->>Init: Hook config snapshot + Init->>Init: Release notes check + deactivate Init + + Init->>REPL: Launch React/Ink REPL + REPL->>REPL: Connect MCP servers + REPL->>REPL: Load permissions, settings + REPL->>REPL: Initialize GrowthBook feature flags + REPL->>REPL: Show prompt — ready for input +``` + +**Key optimization**: MDM settings reads (macOS `plutil` subprocess) and keychain reads (OAuth token + legacy API key) are fired as the very first lines, before the ~135ms of import evaluation. By the time imports finish, the subprocesses have completed. + +--- + +## 3. The Agent Loop + +The core of Claude Code is split into two layers: `QueryEngine` (session owner) and `query()` (inner loop). + +### QueryEngine (`QueryEngine.ts`, 46KB) + +Owns the session lifecycle: +- `mutableMessages[]` — in-memory message buffer +- `submitMessage()` — async generator that yields `SDKMessage` types +- Manages compact boundaries, permission tracking, transcript recording +- Wraps `canUseTool()` callback to track permission denials +- One QueryEngine per conversation; subagents get their own isolated instances + +### queryLoop() (`query.ts`, 68KB) + +The inner `while(true)` loop (line 307). Each iteration = one LLM turn: + +```mermaid +flowchart TD + Start([User sends message]) --> AddMsg[Add to message history] + AddMsg --> PreFlight{Context
pressure check} + + PreFlight -->|Under threshold| Prompt[Assemble system prompt
+ user context + system context] + PreFlight -->|Over threshold| Layers[Run compression layers:
1. Snip 2. Microcompact
3. Context Collapse 4. Autocompact] + Layers --> Prompt + + Prompt --> Normalize[normalizeMessagesForAPI
Repair tool_use/result pairing
Strip synthetic messages
Limit media to 100 items] + Normalize --> Stream[Stream API call via
queryModelWithStreaming] + + Stream --> EventLoop{SSE event type?} + + EventLoop -->|message_start| InitMsg[Initialize partial message
+ usage tracking] + EventLoop -->|content_block_start| InitBlock[Initialize text/tool_use/
thinking block] + EventLoop -->|content_block_delta| Accum[Accumulate:
input_json_delta
text_delta
thinking_delta] + EventLoop -->|content_block_stop| YieldBlock[Yield completed
AssistantMessage block] + EventLoop -->|message_delta| UpdateUsage[Update usage,
stop_reason, cost] + EventLoop -->|message_stop| StreamDone[Stream complete] + + YieldBlock --> HasToolUse{Block is
tool_use?} + HasToolUse -->|Yes| QueueTool[Queue in
StreamingToolExecutor] + HasToolUse -->|No| RenderText[Render text
to terminal] + + QueueTool --> ExecImmediate[Start execution
immediately if safe] + ExecImmediate --> CollectReady[Yield any
completed results] + + StreamDone --> AnyTools{Any tool_use
blocks in response?} + AnyTools -->|Yes| Remaining[Collect remaining
tool results] + Remaining --> NormResults[Normalize results
for API format] + NormResults --> CheckStop{Stop condition?} + + CheckStop -->|Max turns| Terminal1([Return: max_turns_reached]) + CheckStop -->|Budget exhausted| Terminal2([Return: budget_exhausted]) + CheckStop -->|Abort signal| HandleAbort[Generate synthetic
tool_results for orphans] + HandleAbort --> Terminal3([Return: aborted_streaming]) + CheckStop -->|Continue| NextTurn[Append messages
to state, increment turn] + NextTurn --> PreFlight + + AnyTools -->|No tools| Recovery{Recovery needed?} + Recovery -->|Collapse drain| DrainCollapse[Commit staged
context collapses] + DrainCollapse --> PreFlight + Recovery -->|Reactive compact| ReactiveCompact[Full LLM
summarization] + ReactiveCompact --> PreFlight + Recovery -->|Max output hit| Escalate[Retry: 8K → 64K
max output tokens] + Escalate --> Stream + Recovery -->|Multi-turn| Resume["Inject 'resume' message
Up to 3 retries"] + Resume --> Stream + Recovery -->|Stop hooks| RunHooks[Execute user-defined
stop hooks] + RunHooks -->|Blocking errors| PreFlight + RunHooks -->|Clean| Terminal4([Return to user]) + Recovery -->|Done| Terminal4 + + style Stream fill:#4a9eff,color:#fff + style QueueTool fill:#ff6b6b,color:#fff + style ExecImmediate fill:#ff6b6b,color:#fff + style Layers fill:#ffa94d,color:#fff +``` + +### Loop State + +The loop maintains explicit state that carries between iterations: + +```typescript +type State = { + messages: Message[] // Full conversation + toolUseContext: ToolUseContext // Tools, permissions, abort controller + autoCompactTracking: AutoCompactTrackingState // Compaction metrics + maxOutputTokensRecoveryCount: number // Max-output retries (0-3) + hasAttemptedReactiveCompact: boolean // Prevent compact spirals + turnCount: number // Current turn number + transition: Continue | undefined // Why we continued (next_turn, collapse_drain_retry, etc.) + pendingToolUseSummary: Promise<...> | undefined + stopHookActive: boolean | undefined + maxOutputTokensOverride: number | undefined +} +``` + +### Recovery Decision Tree + +When the LLM responds without any tool_use blocks but the task isn't done, Claude Code has a sophisticated recovery chain: + +```mermaid +flowchart TD + NoTools[LLM response has
no tool_use blocks] --> CollapseCheck{Context collapse
enabled & pending?} + + CollapseCheck -->|Yes| DrainCollapse["Commit staged collapses
(cheap, preserves detail)
transition: collapse_drain_retry"] + CollapseCheck -->|No| ReactiveCheck{First attempt &
context near limit?} + + ReactiveCheck -->|Yes| ReactiveCompact["Full LLM summarization
Strip excess media
hasAttemptedReactiveCompact = true"] + ReactiveCheck -->|No| MaxOutputCheck{stop_reason =
max_tokens?} + + MaxOutputCheck -->|Yes, count < 1| Escalate["Escalate: retry with 64K
max output tokens
(was 8K default)"] + MaxOutputCheck -->|Yes, count < 3| MultiTurn["Inject resume message:
'Continue from where you
left off'
maxOutputTokensRecoveryCount++"] + MaxOutputCheck -->|Yes, count >= 3| GiveUp[Return to user
with partial response] + MaxOutputCheck -->|No| StopHooks{Stop hooks
configured?} + + StopHooks -->|Yes| RunHooks["Execute stop hooks
(user-defined checks)"] + RunHooks -->|Blocking errors| InjectErrors["Inject errors
into context, retry"] + RunHooks -->|Clean| Done([Return to user]) + StopHooks -->|No| Done + + style DrainCollapse fill:#69db7c,color:#000 + style ReactiveCompact fill:#ffa94d,color:#000 + style Escalate fill:#ffd43b,color:#000 + style MultiTurn fill:#ffd43b,color:#000 + style GiveUp fill:#ff6b6b,color:#fff +``` + +--- + +## 4. Streaming & SSE Pipeline + +### API Integration (`claude.ts`) + +Claude Code uses the Anthropic SDK directly, creating a streaming request: + +```typescript +const result = await anthropic.beta.messages.create( + { ...params, stream: true }, + { signal, headers: { [CLIENT_REQUEST_ID_HEADER]: clientRequestId } } +).withResponse() +``` + +It then iterates the raw stream events directly (NOT using the SDK's `BetaMessageStream` helper), giving full control over each SSE event. + +### SSE Event Processing + +```mermaid +sequenceDiagram + participant API as Anthropic API + participant Parser as SSE Parser + participant Loop as Event Handler + participant UI as Terminal UI + participant STE as StreamingToolExecutor + + API->>Parser: data: {"type": "message_start", ...} + Parser->>Loop: message_start + Loop->>Loop: Initialize partialMessage, usage tracking + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "thinking"}} + Parser->>Loop: content_block_start (thinking) + Loop->>UI: Show "Thinking..." spinner + + API->>Parser: data: {"type": "content_block_delta", "delta": {"thinking": "Let me..."}} + Parser->>Loop: thinking_delta + Loop->>Loop: Accumulate thinking text (not shown to user) + + API->>Parser: data: {"type": "content_block_stop"} + Parser->>Loop: content_block_stop (thinking) + Loop->>UI: Show thinking duration "Thinking (2.3s)" + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "text"}} + Parser->>Loop: content_block_start (text) + + API->>Parser: data: {"type": "content_block_delta", "delta": {"text": "I'll search"}} + Parser->>Loop: text_delta + Loop->>UI: Render partial text token-by-token + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "tool_use", "name": "Grep"}} + Parser->>Loop: content_block_start (tool_use: Grep) + + API->>Parser: data: {"type": "content_block_delta", "delta": {"partial_json": "{\"pattern\":"}} + Parser->>Loop: input_json_delta + Loop->>Loop: Concatenate to partial input string + + API->>Parser: data: {"type": "content_block_stop"} + Parser->>Loop: content_block_stop (tool_use: Grep) + Loop->>Loop: Parse accumulated JSON → tool input + Loop->>STE: addTool(grep block) + STE->>STE: Start Grep execution immediately + + Note over API,STE: Model is STILL generating more blocks... + + API->>Parser: data: {"type": "content_block_start", "content_block": {"type": "tool_use", "name": "Read"}} + Note over STE: Grep may already be DONE by now + + API->>Parser: data: {"type": "message_delta", "delta": {"stop_reason": "tool_use"}, "usage": {...}} + Parser->>Loop: message_delta + Loop->>Loop: Update final usage, stop_reason, cost + + API->>Parser: data: {"type": "message_stop"} + Parser->>Loop: Streaming complete +``` + +### Idle Timeout Watchdog + +A configurable watchdog kills stalled streams: + +``` +Default: 90 seconds (STREAM_IDLE_TIMEOUT_MS) +Override: CLAUDE_STREAM_IDLE_TIMEOUT_MS env var +Behavior: Timer resets on every chunk. If no events arrive within timeout → abort stream. +``` + +### Streaming Fallback + +If the stream errors (not user abort), Claude Code retries as a **non-streaming** request: +- Max 64K tokens for non-streaming (`MAX_NON_STREAMING_TOKENS`) +- Partially-streamed messages are **tombstoned** (invalidated in the UI) +- The StreamingToolExecutor is discarded and a fresh one is created +- All partially-executed tools get synthetic error results + +--- + +## 5. Tool System + +### Tool Interface (`Tool.ts`, 30KB) + +Every tool conforms to a rich generic interface: + +```typescript +Tool = { + // Identity + name: string + userFacingName(input): string + description(input): string + + // Schemas (lazy-evaluated for token efficiency) + inputSchema: ZodSchema // Validated before execution + outputSchema: ZodSchema // Typed output + + // Execution + call(input, context, canUseTool, parentMessage, onProgress): Promise<{data: Output}> + validateInput(input, context): Promise + + // Permissions + checkPermissions(input, context): Promise + + // Behavior flags + isConcurrencySafe(input): boolean // Can run in parallel with others + isReadOnly(): boolean // No side effects + requiresUserInteraction(): boolean // Needs terminal input + + // System prompt + prompt(): string // Injects tool-specific guidance into system prompt + + // Deferred loading + shouldDefer: boolean // Only load schema when ToolSearch fetches it + alwaysLoad: boolean // Always include even with ToolSearch active + + // MCP + isMcp: boolean // From external MCP server +} +``` + +### Tool Registration & Discovery (`tools.ts`) + +```mermaid +flowchart TD + subgraph "Tool Assembly Pipeline" + Base["getAllBaseTools()
~50 built-in tools"] --> FeatureFilter{Feature flags
enabled?} + FeatureFilter -->|Yes| Include[Include tool] + FeatureFilter -->|No| Skip[Skip tool] + + Include --> PermFilter["filterToolsByDenyRules()
Remove blanket-denied tools"] + PermFilter --> ModeFilter{Simple mode?} + ModeFilter -->|Yes| SimpleSet["Only: Bash,
FileRead, FileEdit"] + ModeFilter -->|No| FullSet[Full tool set] + + FullSet --> MCPMerge["assembleToolPool()
Merge built-in + MCP tools"] + MCPMerge --> Dedup["Deduplicate by name
(built-ins take precedence)"] + Dedup --> Sort["Sort for prompt-cache stability
(deterministic ordering)"] + Sort --> DeferCheck{ToolSearch
enabled?} + DeferCheck -->|Yes| Split["Split: alwaysLoad tools
in prompt, rest deferred"] + DeferCheck -->|No| AllInPrompt["All tools in prompt"] + end + + style Base fill:#4a9eff,color:#fff + style MCPMerge fill:#69db7c,color:#000 +``` + +### StreamingToolExecutor (`StreamingToolExecutor.ts`, 531 lines) + +The executor that runs tools concurrently during streaming: + +```mermaid +flowchart TD + subgraph "Tool Queue Management" + Add["addTool(block, message)
Called when content_block_stop
arrives for tool_use"] --> Classify{Concurrent
safe?} + Classify -->|Yes| QueueConc["Queue as concurrent
Can run in parallel"] + Classify -->|No| QueueExcl["Queue as exclusive
Needs sole access"] + + QueueConc --> ProcessQueue["processQueue()"] + QueueExcl --> ProcessQueue + + ProcessQueue --> Check{Any executing
tools?} + Check -->|"All concurrent"| StartParallel["Start next concurrent
tool in parallel"] + Check -->|"Has exclusive"| Wait["Wait for exclusive
to finish"] + Check -->|"None"| StartNext["Start next tool
(concurrent or exclusive)"] + + StartParallel --> Execute["executeTool()"] + StartNext --> Execute + end + + subgraph "Per-Tool Execution" + Execute --> CreateAbort["Create child abort controller
(child of sibling controller)"] + CreateAbort --> RunGenerator["for await (update of runToolUse(...))"] + RunGenerator --> CheckAbort{Aborted?} + CheckAbort -->|"sibling_error"| SynthError["Create synthetic error result
'Interrupted: concurrent tool failed'"] + CheckAbort -->|"user_interrupted"| SynthCancel["Create synthetic result
'Interrupted by user'"] + CheckAbort -->|"streaming_fallback"| SynthFallback["Create fallback result"] + CheckAbort -->|No| ProcessResult{Result type?} + ProcessResult -->|Progress| EmitProgress["Emit progress event
(stdout lines, search hits)"] + ProcessResult -->|Complete| MarkDone["Mark tool complete"] + end + + subgraph "Abort Hierarchy" + QueryAbort["Query Controller
(user ESC / timeout)"] --> SiblingAbort["Sibling Controller
(bash error kills all)"] + SiblingAbort --> ToolAbort["Per-Tool Controller
(individual cancel)"] + end + + Execute --> |"Bash error"| AbortSiblings["this.siblingAbortController.abort('sibling_error')
Kills all parallel tools"] + + style Execute fill:#ff6b6b,color:#fff + style AbortSiblings fill:#ff6b6b,color:#fff +``` + +### Result Yielding + +Results are yielded to the query loop in two modes: +- **`getCompletedResults()`** (non-blocking): Returns any already-completed results in tool order. Called during streaming to drain ready results. +- **`getRemainingResults()`** (async generator): Waits for all pending tools using `Promise.race()` between tool completion and progress availability. Called after streaming ends. + +--- + +## 6. Tool Implementations + +### BashTool (`BashTool.tsx`, 1,143 lines) + +```mermaid +flowchart TD + Input["command, timeout?,
description?,
dangerouslyDisableSandbox?"] --> Sandbox{shouldUseSandbox?} + Sandbox -->|Yes| Wrap["Wrap in sandbox
(SandboxManager)"] + Sandbox -->|No| Direct[Direct execution] + + Wrap --> Exec["exec() via Bun child_process
with AbortSignal"] + Direct --> Exec + + Exec --> Progress["Yield progress events
every 2000ms (PROGRESS_THRESHOLD_MS)"] + Progress --> Timeout{Timeout?} + Timeout -->|No| Complete["Capture stdout + stderr
via EndTruncatingAccumulator"] + Timeout -->|Yes| AutoBG{Auto-background
enabled?} + AutoBG -->|Yes| Background["Migrate to background task
Register foreground → background"] + AutoBG -->|No| Kill[Kill process] + + Complete --> Size{Output > 100K chars?} + Size -->|Yes| Persist["Save full output to /tool-results/
Return preview + file path"] + Size -->|No| Return[Return output directly] + + subgraph "Security Analysis" + Classify["isSearchOrReadBashCommand()"] + Classify --> Split["Split on operators: || && | ; > >>"] + Split --> Each["Classify each part"] + Each --> Neutral["Skip neutral: echo, printf, true, false"] + Each --> SearchRead["Identify: grep, find, ls, cat,
head, tail, wc, stat, file"] + Each --> Mutative["Flag: rm, mv, git commit,
npm install, docker, kubectl"] + end + + subgraph "Zsh Defense" + Block["Block dangerous zsh builtins:
zmodload, emulate, sysopen,
sysread, syswrite, sysseek,
zpty, ztcp, zsocket,
zf_rm, zf_mv, zf_ln..."] + end + + style Exec fill:#4a9eff,color:#fff + style Background fill:#ffd43b,color:#000 + style Block fill:#ff6b6b,color:#fff +``` + +**Key details:** +- **EndTruncatingAccumulator**: Preserves the *start* of output, truncates from the *end* (more useful than tail truncation for most commands) +- **Background migration**: If a foreground task is already registered, it gets migrated in-place via `backgroundExistingForegroundTask()`. If not registered, a new background task is spawned via `spawnShellTask()` +- **Sed parsing**: `sedEditParser.ts` parses sed commands to generate a simulated preview for the permission dialog + +### FileEditTool (`FileEditTool.ts`, 625 lines) + +```mermaid +flowchart TD + Input["file_path, old_string,
new_string, replace_all?"] --> Validate{File read
previously?} + Validate -->|No| Error1["Error: File has not
been read yet"] + Validate -->|Yes| SizeCheck{File > 1 GiB?} + SizeCheck -->|Yes| Error2["Error: File too large"] + SizeCheck -->|No| ConcurrentCheck{mtime changed
since last read?} + ConcurrentCheck -->|Yes, content same| Proceed["Safe — external touch
but content unchanged"] + ConcurrentCheck -->|Yes, content differs| Error3["Error: File modified
externally since last read"] + ConcurrentCheck -->|No| Proceed + + Proceed --> FindString["findActualString(file, old_string)"] + FindString --> QuoteNorm["Try exact match first
Then normalize quotes:
curly ↔ straight quotes"] + QuoteNorm --> Found{Match found?} + Found -->|No| Error4["Error: String not found
in file"] + Found -->|Yes| CountCheck{Multiple
matches?} + CountCheck -->|"Yes & !replace_all"| Error5["Error: old_string not unique
Provide more context or
use replace_all: true"] + CountCheck -->|OK| PreserveQuotes["preserveQuoteStyle()
Match new_string to
file's typography"] + PreserveQuotes --> Detect["Detect encoding:
UTF-8 or UTF-16LE"] + Detect --> DetectEndings["Detect line endings:
CRLF, LF, or CR"] + DetectEndings --> Replace["Apply replacement
Generate unified diff"] + Replace --> WriteFile["Write file with original
encoding + line endings"] + WriteFile --> UpdateState["Update readFileState
with new content + mtime"] + + style FindString fill:#4a9eff,color:#fff + style WriteFile fill:#69db7c,color:#000 +``` + +**File state tracking**: Every `FileReadTool` call registers `{content, mtime, offset, limit, isPartialView}` in a `readFileState` Map. The FileEditTool checks this on every edit to prevent silent data corruption from concurrent modifications. Partial reads (`isPartialView = true`) block editing entirely. + +### FileReadTool (`FileReadTool.ts`, 1,183 lines) + +| Feature | Details | +|---------|---------| +| **Line range reads** | `offset` + `limit` params, reads specific range without loading whole file | +| **Deduplication** | If same file + range read twice without mtime change, returns `file_unchanged` stub (saves ~18% cache-creation tokens) | +| **PDF support** | Page-range extraction via `extractPDFPages()`, `MAX_PAGES_PER_READ` limit, token-aware compression | +| **Image support** | Format detection, resize/downsample with `compressImageBufferWithTokenLimit()`, metadata text generation | +| **Notebook support** | `.ipynb` cell parsing, maps cells to structured output with code + outputs | +| **Dangerous paths** | Blocks: `/dev/zero`, `/dev/random`, `/dev/stdin`, `/proc/self/fd/*` | +| **Memory freshness** | Appends notes for old CLAUDE.md files, triggers skill directory discovery | + +### GrepTool (`GrepTool.ts`) + +Three output modes with pagination: + +```mermaid +graph LR + subgraph "Output Modes" + Content["content mode
Shows matching lines
with -A/-B/-C context
and line numbers"] + Files["files_with_matches mode
Shows file paths only
Sorted by mtime descending"] + Count["count mode
Shows match counts
per file"] + end + + subgraph "Pagination" + Offset["offset: skip first N results"] + Limit["head_limit: cap output
Default: 250 results
Pass 0 for unlimited"] + end + + subgraph "Ripgrep Flags" + Hidden["--hidden (search hidden files)"] + VCS["--glob !.git --glob !.svn
--glob !.hg (exclude VCS)"] + MaxCols["--max-columns 500"] + Multi["multiline: -U --multiline-dotall"] + Case["-i for case insensitive"] + Type["--type js/py/rust/..."] + end + + style Content fill:#4a9eff,color:#fff + style Files fill:#69db7c,color:#000 + style Count fill:#ffd43b,color:#000 +``` + +### WebFetchTool + +```mermaid +flowchart TD + URL[URL input] --> Parse[Parse hostname] + Parse --> Preapproved{Preapproved host?
GitHub, MDN, npm,
PyPI, Stack Overflow...} + Preapproved -->|Yes| AutoAllow[Skip permission prompt] + Preapproved -->|No| AskPerm[Show permission dialog
with domain:hostname] + AutoAllow --> Fetch + AskPerm -->|Approved| Fetch + + Fetch["getURLMarkdownContent(url)"] --> Redirect{Redirect?} + Redirect -->|Yes| FollowRedirect[Follow + report] + Redirect -->|No| Convert[HTML → Markdown extraction] + + Convert --> CheckSize{Preapproved AND
text/markdown AND
< MAX_MARKDOWN_LENGTH?} + CheckSize -->|Yes| ReturnRaw[Return raw markdown] + CheckSize -->|No| Summarize["applyPromptToMarkdown()
Haiku summarization
with optional user prompt"] + + Summarize --> Binary{Binary content?} + Binary -->|Yes| PersistBinary["Save to disk
Append file path note"] + Binary -->|No| Return[Return result] + ReturnRaw --> Return + PersistBinary --> Return + + Return --> Cache["Cache result for
15 minutes (URL key)"] + + style Fetch fill:#4a9eff,color:#fff + style Summarize fill:#ffd43b,color:#000 +``` + +### WebSearchTool + +Uses Anthropic's native `web_search_20250305` server tool — it doesn't call an external search API. Instead, it makes a sub-call to the Claude API with the search tool enabled: + +```mermaid +sequenceDiagram + participant Tool as WebSearchTool + participant API as Anthropic API + participant UI as Progress UI + + Tool->>API: Create streaming request with:
system: "You are an assistant for web search"
tools: [{type: "web_search_20250305", max_uses: 8}]
message: "Perform a web search for: {query}" + + API->>Tool: content_block_start (server_tool_use) + API->>Tool: input_json_delta (partial query JSON) + Note over Tool: Extract query via regex from partial JSON + Tool->>UI: Progress: "Searching for: {extracted query}" + + API->>Tool: content_block_stop (web_search_tool_result) + Note over Tool: Parse search results: title + URL pairs + + API->>Tool: content_block (text — summary) + Note over Tool: Accumulate text summary + + API->>Tool: message_stop + Tool->>Tool: Combine results + summary into output +``` + +**Key constraint**: Max 8 searches per request (hardcoded in tool schema). Available on first-party API, Vertex (Claude 4+), and Foundry. + +--- + +## 7. System Prompt Assembly + +### Structure + +The system prompt is approximately **914 lines** split into cacheable and dynamic sections: + +```mermaid +graph TD + subgraph "Cacheable Prefix — SYSTEM_PROMPT_DYNAMIC_BOUNDARY" + direction TB + Intro["Identity & Role
'You are an interactive agent that
helps users with software engineering tasks'"] + System["# System
Tool execution rules, permission modes,
hooks, context compression, tags"] + Tasks2["# Doing tasks
Engineering best practices,
code quality, no unnecessary changes,
security awareness, error handling"] + Actions["# Executing actions with care
Reversibility, blast radius, confirmation
for risky ops, measure twice cut once"] + UsingTools["# Using your tools
Prefer dedicated tools over Bash,
parallel calls, task management"] + Tone["# Tone and style
No emojis, concise, file:line refs,
owner/repo#123 format"] + Efficiency["# Output efficiency
Go straight to the point,
skip filler, inverted pyramid"] + end + + subgraph "Dynamic Section — Per-Session" + direction TB + ToolGuidance["Tool-specific guidance
Agent, Skills, ToolSearch, MCP"] + MemMechanics["Memory mechanics prompt
How to save/recall memories
Types, format, when to access"] + VerifierContract["Verification agent contract
When to spawn, how to review"] + SkillList["Available skills list
Discovered /commands"] + end + + subgraph "User Context" + direction TB + ClaudeMD["CLAUDE.md files
From directory hierarchy"] + DateCtx["Current date
'Today's date is 2025-03-31'"] + end + + subgraph "System Context" + direction TB + GitStatus["Git status
Branch, status (2000 char cap),
5 recent commits, user name"] + end + + Intro --> System --> Tasks2 --> Actions --> UsingTools --> Tone --> Efficiency + Efficiency -.->|"__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__"| ToolGuidance + ToolGuidance --> MemMechanics --> VerifierContract --> SkillList + + style Intro fill:#4a9eff,color:#fff + style ToolGuidance fill:#ffd43b,color:#000 + style ClaudeMD fill:#69db7c,color:#000 + style GitStatus fill:#b197fc,color:#000 +``` + +### Notable prompt sections + +**"Executing actions with care"** — essentially a philosophy on reversibility: +> "Carefully consider the reversibility and blast radius of actions. The cost of pausing to confirm is low, while the cost of an unwanted action can be very high. A user approving an action once does NOT mean they approve it in all contexts. Measure twice, cut once." + +**"Doing tasks"** — anti-overengineering guidelines: +> "Don't add features, refactor code, or make 'improvements' beyond what was asked. Don't add docstrings, comments, or type annotations to code you didn't change. Three similar lines of code is better than a premature abstraction." + +**CLAUDE.md hierarchy** (loaded bottom-up, all files included): +``` +~/.claude/CLAUDE.md (user-global) +{git_root}/CLAUDE.md (project, committed) +{git_root}/.claude/CLAUDE.md (project, gitignored) +{git_root}/.claude.local/CLAUDE.md (local, always gitignored) +{subdir}/CLAUDE.md (subdirectory override) +``` + +--- + +## 8. Context Management — 5 Layers + +```mermaid +graph TD + subgraph "Layer 1: Cache-Edit Microcompaction" + MC["Surgically delete individual tool results
from the API's prompt cache"] + MC_how["Uses Anthropic cache_edits API
Does NOT modify local messages
Edits applied at API layer
via cache_reference + cache_edits blocks"] + MC_what["Clearable: FILE_READ, SHELL,
GREP, GLOB, WEB_SEARCH,
WEB_FETCH, FILE_EDIT, FILE_WRITE"] + MC_when["Trigger: count-based
(GrowthBook feature gate)"] + MC_scope["Main thread only —
subagents excluded to
prevent dangling references"] + end + + subgraph "Layer 2: Time-Based Microcompaction" + TB["Clear stale tool results when
server cache is already cold"] + TB_how["Content set to sentinel:
'[Old tool result content cleared]'"] + TB_when["Trigger: idle > 60 min
(= server cache TTL)"] + TB_keep["Keeps 5 most recent
compactable tool results"] + TB_est["Token savings estimated per result:
images/docs = 2000 tokens
text = length / 4"] + end + + subgraph "Layer 3: Context Collapse" + CL["Selective message archiving
that preserves detail longer"] + CL_commit["Commit point: 90% of
effective context"] + CL_block["Blocking spawn threshold:
95% of effective context"] + CL_race["When enabled, auto-compaction
is DISABLED to prevent
race conditions"] + end + + subgraph "Layer 4: Auto-Compaction" + AC["Full LLM summarization
of old conversation"] + AC_thresh["Threshold: effectiveWindow - 13K buffer
≈ 93% of usable context"] + AC_window["effectiveWindow = contextWindow
- min(maxOutput, 20K)"] + AC_circuit["Circuit breaker: stops after
3 consecutive failures"] + AC_prompt["9-section summary prompt:
1. Primary request
2. Key technical concepts
3. Files & code sections
4. Errors & fixes
5. Problem solving
6. All user messages
7. Pending tasks
8. Current work
9. Next step"] + end + + subgraph "Layer 5: Session Memory Compaction" + SM["Background summary extraction
to separate storage"] + SM_config["Config: minTokens=10K,
maxTokens=40K,
minTextBlockMessages=5"] + SM_invariants["Preserves API invariants:
tool_use/result pairing,
thinking block grouping"] + end + + MC -.->|"Still over?"| TB + TB -.->|"Still over?"| CL + CL -.->|"Still over?"| AC + AC -.->|"Still over?"| SM + + style MC fill:#69db7c,color:#000 + style TB fill:#a9e34b,color:#000 + style CL fill:#ffd43b,color:#000 + style AC fill:#ffa94d,color:#000 + style SM fill:#ff6b6b,color:#fff +``` + +### Post-Compaction Restoration + +After auto-compaction summarizes old messages, Claude Code reconstructs essential context: + +```mermaid +flowchart LR + Compact["Compaction complete
Old messages replaced
with summary"] --> FileRestore + + subgraph FileRestore ["File Restoration"] + direction TB + Scan["Scan messages for
FILE_READ tool_uses"] + Scan --> Collect["Collect file paths
(skip dedup stubs)"] + Collect --> Select["Select 5 most recent files
within 50K token budget"] + Select --> Truncate["Truncate each to
5K tokens if needed"] + Truncate --> Attach["Attach as file content
after summary"] + end + + FileRestore --> SkillRestore + + subgraph SkillRestore ["Skill Re-injection"] + direction TB + ScanSkills["Collect invoked skills
from bootstrap state"] + ScanSkills --> SortRecent["Sort most-recent-first"] + SortRecent --> TruncSkill["Head-preserving truncation
(keep setup/usage)"] + TruncSkill --> FitBudget["Fit within 25K budget
(5K per skill)"] + end + + SkillRestore --> StateRestore + + subgraph StateRestore ["State Preservation"] + direction TB + Plan["Plan mode attachment"] + AsyncAgents["Async agent status
(running/finished)"] + Hooks3["Hook result messages
(session start, plan mode)"] + end + + StateRestore --> Ready["Agent continues
with key context intact"] + + style Compact fill:#ffa94d,color:#000 + style Ready fill:#69db7c,color:#000 +``` + +### Prompt-Too-Long Retry Loop + +If the compaction API call itself hits a prompt-too-long error: + +``` +Max retries: 3 (MAX_PTL_RETRIES) +Strategy: truncateHeadForPTLRetry() — drop oldest API-round groups +Fallback: If token gap unparseable, drop 20% of groups +``` + +### All Thresholds + +| Metric | Value | Source | +|--------|-------|--------| +| Effective context window | `contextWindow - min(maxOutput, 20K)` | `autoCompact.ts` | +| Auto-compact buffer | 13,000 tokens | `AUTOCOMPACT_BUFFER_TOKENS` | +| Auto-compact threshold | ~93% of effective window | Calculated | +| Warning threshold buffer | 20,000 tokens | `WARNING_THRESHOLD_BUFFER_TOKENS` | +| Manual compact buffer | 3,000 tokens | `MANUAL_COMPACT_BUFFER_TOKENS` | +| Max compaction output | 20,000 tokens | `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | +| Max consecutive failures | 3 | `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | +| Post-compact file budget | 50,000 tokens | `POST_COMPACT_TOKEN_BUDGET` | +| Post-compact file cap | 5,000 tokens/file | `POST_COMPACT_MAX_TOKENS_PER_FILE` | +| Post-compact max files | 5 | `POST_COMPACT_MAX_FILES_TO_RESTORE` | +| Post-compact skill budget | 25,000 tokens | `POST_COMPACT_SKILLS_TOKEN_BUDGET` | +| Post-compact skill cap | 5,000 tokens/skill | `POST_COMPACT_MAX_TOKENS_PER_SKILL` | +| Session memory min tokens | 10,000 | Default config | +| Session memory max tokens | 40,000 | Default config | +| Session memory min messages | 5 text-block messages | Default config | +| Context collapse commit | 90% of effective window | Feature-gated | +| Context collapse blocking | 95% of effective window | Feature-gated | +| Time-based MC gap | 60 minutes | Server cache TTL | +| Time-based MC keep recent | 5 tool results | Default config | +| Max PTL retries | 3 | `MAX_PTL_RETRIES` | +| PTL fallback | Drop 20% of groups | When gap unparseable | + +--- + +## 9. Token Estimation & Budget + +### Estimation Formulas + +``` +Text: length / 4 bytes per token (general) +JSON: length / 2 bytes per token (denser structure) +Images/docs: 2,000 tokens flat estimate +Message total: ceil(sum * 4/3) — 33% conservative padding +``` + +File-type-specific: `.json`, `.jsonl`, `.jsonc` use 2 bytes/token. Everything else uses 4. + +### Budget Continuation Logic + +When an agent has a token budget, the system tracks usage and decides whether to continue: + +```mermaid +flowchart TD + Check["checkTokenBudget()"] --> HasBudget{Budget set?} + HasBudget -->|No| Stop1([No continuation]) + HasBudget -->|Yes| CalcPct["pct = (globalTurnTokens / budget) * 100"] + + CalcPct --> CalcDelta["delta = tokens since last check"] + CalcDelta --> Diminishing{continuationCount >= 3
AND last 2 deltas
both < 500 tokens?} + + Diminishing -->|Yes| StopDim([Stop: diminishing returns]) + Diminishing -->|No| UnderBudget{Under 90%?} + + UnderBudget -->|Yes| Continue["Continue
Inject nudge message:
'You've used X% of budget.
Y tokens remaining.'"] + UnderBudget -->|No| WasContinued{Any prior
continuation?} + WasContinued -->|Yes| StopBudget([Stop: budget threshold]) + WasContinued -->|No| StopFirst([Stop: first check]) + + style Continue fill:#69db7c,color:#000 + style StopDim fill:#ff6b6b,color:#fff + style StopBudget fill:#ff6b6b,color:#fff +``` + +| Constant | Value | +|----------|-------| +| `COMPLETION_THRESHOLD` | 0.9 (90%) | +| `DIMINISHING_THRESHOLD` | 500 tokens per turn | +| Min continuations for diminishing check | 3 | + +--- + +## 10. Subagent / Multi-Agent System + +### Agent Spawning + +```mermaid +flowchart TD + Call["AgentTool called with:
description, prompt,
subagent_type?, model?,
run_in_background?,
isolation?"] --> ResolveType{Agent type?} + + ResolveType -->|Explicit type| LoadBuiltIn["Load built-in agent:
General, Explore, Plan,
Verification, Guide"] + ResolveType -->|Custom name| LoadCustom["Load from
~/.claude/agents/.md"] + ResolveType -->|Not specified| ForkCheck{Fork
enabled?} + ForkCheck -->|Yes| LoadFork["Fork agent —
inherits parent prompt
(cache-sharing optimization)"] + ForkCheck -->|No| LoadGeneral[General Purpose agent] + + LoadBuiltIn --> AssembleTools["assembleToolPool()
Filter by agent permissions"] + LoadCustom --> AssembleTools + LoadFork --> AssembleTools + LoadGeneral --> AssembleTools + + AssembleTools --> Isolation{isolation
mode?} + Isolation -->|worktree| CreateWT["createAgentWorktree(slug)
Isolated git branch + filesystem"] + Isolation -->|remote| Teleport["teleportToRemote()
Launch on CCR cloud"] + Isolation -->|none| LocalExec[Local execution] + + CreateWT --> SpawnMode + Teleport --> ReturnRemote([Return remote session URL]) + LocalExec --> SpawnMode + + SpawnMode{run_in_background?} + SpawnMode -->|Yes| AsyncLaunch["registerAsyncAgent()
Run in background
Return immediately"] + SpawnMode -->|No| SyncLaunch["Run inline
Block parent"] + + SyncLaunch --> AutoBG["Start auto-background
timer: 120 seconds"] + AutoBG --> Race["Promise.race:
agent result vs timer"] + Race -->|"Agent finishes"| ReturnResult([Return result inline]) + Race -->|"Timer fires"| MigrateAsync["Migrate to background
mid-execution"] + MigrateAsync --> ReturnAsync([Return async_launched]) + + style CreateWT fill:#b197fc,color:#000 + style AsyncLaunch fill:#ffd43b,color:#000 + style SyncLaunch fill:#69db7c,color:#000 + style MigrateAsync fill:#ffa94d,color:#000 +``` + +### Built-in Agent Types + +| Agent | Access Level | System Prompt Focus | Disallowed | +|-------|-------------|-------------------|------------| +| **General Purpose** | Full read/write | "Complete the task fully, don't gold-plate" | AgentTool (no nesting), TaskOutputTool | +| **Explore** | Read-only | "File search specialist. STRICTLY PROHIBITED from creating/modifying files" | All write tools, file creation | +| **Plan** | Read-only | "Software architect and planning specialist. End with 3-5 critical files" | All write tools, state-changing bash | +| **Verification** | Read-only + run | "Your job is to try to break it. Fight your own cognitive biases" | File writes, git writes | +| **Fork** | Inherits parent | Same system prompt as parent (cache sharing) | Recursive forking | +| **Claude Code Guide** | Read + web | "Help with Claude Code, Agent SDK, Claude API questions" | Write tools | + +### Custom Agent Definition Format + +```markdown +--- +name: security-reviewer +description: Security-focused code reviewer +whenToUse: When user asks for security review +disallowedTools: [FileWrite, FileEdit, Bash] +model: inherit +--- + +You are a security review specialist. Analyze code for: +- OWASP Top 10 vulnerabilities +- Injection risks (SQL, command, XSS) +- Authentication/authorization flaws +- Sensitive data exposure +... +``` + +### Worktree Lifecycle + +```mermaid +sequenceDiagram + participant Agent as AgentTool + participant Git as Git + participant Child as Child Agent + participant Cleanup as Cleanup + + Agent->>Git: git worktree add agent-{id} + Git-->>Agent: worktreePath, worktreeBranch, headCommit + + Agent->>Child: Run in worktreePath (CWD override) + activate Child + Note over Child: Works in isolated filesystem + Child->>Child: Make changes, run tests + Child-->>Agent: Result + deactivate Child + + Agent->>Cleanup: cleanupWorktreeIfNeeded() + Cleanup->>Git: hasWorktreeChanges(path, headCommit)? + + alt No changes + Cleanup->>Git: git worktree remove + Note over Cleanup: Clean up — nothing to keep + else Has changes + Note over Cleanup: Preserve worktree + branch + Cleanup-->>Agent: Return {worktreePath, worktreeBranch} + end +``` + +### Inter-Agent Communication + +Agents communicate via two mechanisms: +1. **SendMessageTool**: Direct messaging by agent ID. The message lands in the target agent's `pendingUserMessages` queue. +2. **TaskNotification XML**: In coordinator mode, workers inject `` XML blocks into user messages to report status. +3. **Scratchpad directory**: For durable cross-worker state (file-based, feature-gated). + +--- + +## 11. Permission System + +### Full Decision Flow + +```mermaid +flowchart TD + ToolCall([Tool call requested]) --> Step1 + + subgraph Step1 ["Step 1: Deny Rules (absolute)"] + DenyTool{Entire tool
denied?} + DenyTool -->|Yes| Blocked([DENIED — no override]) + DenyTool -->|No| DenyContent{Content-specific
deny rule?} + DenyContent -->|Yes| Blocked + DenyContent -->|No| Next1[Continue] + end + + Next1 --> Step2 + + subgraph Step2 ["Step 2: Ask Rules"] + AskTool{Entire tool
has ask rule?} + AskTool -->|Yes, sandbox can auto-allow| AutoSandbox[Auto-allow via sandbox] + AskTool -->|Yes| GoToMode[Go to mode check] + AskTool -->|No| ToolPerms["tool.checkPermissions()"] + end + + ToolPerms --> Step3 + + subgraph Step3 ["Step 3: Tool-Specific Logic"] + ToolResult{Tool says?} + ToolResult -->|Allow| AllowTool([ALLOWED]) + ToolResult -->|Deny| DenyTool2([DENIED]) + ToolResult -->|Ask| SafetyCheck{Safety check?
.git/ .claude/ .vscode/
shell configs} + SafetyCheck -->|Yes| BypassImmune([ALWAYS ASK
bypass-immune]) + SafetyCheck -->|No| GoToMode + end + + GoToMode --> Step4 + + subgraph Step4 ["Step 4: Mode Resolution"] + Mode{Permission mode?} + Mode -->|default| Prompt([Show dialog]) + Mode -->|acceptEdits| AcceptCheck{File edit
in CWD?} + AcceptCheck -->|Yes| AllowAccept([ALLOWED]) + AcceptCheck -->|No| Prompt + Mode -->|bypass| AllowBypass([ALLOWED]) + Mode -->|plan| ShowPlan([Show plan]) + Mode -->|dontAsk| SilentDeny([DENIED silently]) + Mode -->|auto| Step5 + end + + subgraph Step5 ["Step 5: Auto Mode (AI Classifier)"] + FastPath1{Safe tool?
Read, Glob, Grep, LSP,
TaskCreate, Sleep...} + FastPath1 -->|Yes| AllowSafe([ALLOWED]) + FastPath1 -->|No| FastPath2{acceptEdits
would allow?} + FastPath2 -->|Yes| AllowFast([ALLOWED]) + FastPath2 -->|No| RunClassifier["classifyYoloAction()
AI side-query"] + RunClassifier --> ClassResult{Decision?} + ClassResult -->|Allow| AllowClass([ALLOWED
Reset consecutive denials]) + ClassResult -->|Deny| TrackDeny["Increment denials:
consecutive++
total++"] + TrackDeny --> Fallback{consecutive >= 3
OR total >= 20?} + Fallback -->|Yes| FallbackPrompt([Fall back to prompting]) + Fallback -->|No| SilentDenyClass([DENIED silently]) + end + + style Blocked fill:#ff6b6b,color:#fff + style BypassImmune fill:#ffa94d,color:#000 + style AllowTool fill:#69db7c,color:#000 + style AllowAccept fill:#69db7c,color:#000 + style AllowBypass fill:#69db7c,color:#000 + style AllowSafe fill:#69db7c,color:#000 + style AllowFast fill:#69db7c,color:#000 + style AllowClass fill:#69db7c,color:#000 + style Prompt fill:#ffd43b,color:#000 + style RunClassifier fill:#b197fc,color:#000 +``` + +### Rule Format & Matching + +Permission rules use a glob-style pattern language: + +``` +Bash — Matches entire tool (all bash commands) +Bash(npm install) — Exact command match +Bash(npm *) — Wildcard: any npm command +Bash(npm:*) — Legacy prefix syntax +Bash(curl https://\*.com) — Escaped asterisk (literal *) +Bash(git commit *) — Matches git commit with any flags + +mcp__github — All tools from GitHub MCP server +mcp__github__list_repos — Specific MCP tool + +Agent(Explore) — Deny Explore agent specifically +``` + +**Wildcard algorithm** (`shellRuleMatching.ts`): +1. Trim pattern +2. Replace `\*` → null-byte placeholder, `\\` → null-byte placeholder +3. Escape regex special chars (except unescaped `*`) +4. Convert unescaped `*` to `.*` +5. Make trailing ` .*` optional (so `git *` matches bare `git` too) +6. Test full string match: `^pattern$` with `dotAll` flag + +### Rule Sources (8 levels) + +```mermaid +graph TD + P["policySettings — Enterprise/admin managed
Read-only, cannot be overridden by user"] --> F + F["flagSettings — --permissions CLI flag
Applied at startup"] --> Proj + Proj["projectSettings — .claude/settings.json
Committed to repo, shared with team"] --> L + L["localSettings — .claude.local/settings.json
Always gitignored, personal overrides"] --> U + U["userSettings — ~/.claude/settings.json
Global user preferences"] --> C + C["cliArg — Runtime arguments
In-memory, from API/SDK callers"] --> Cmd + Cmd["command — Runtime directives
From coordinator or workflow"] --> S + S["session — In-memory grants
'Always allow for this session'
NOT persisted to disk"] + + style P fill:#ff6b6b,color:#fff + style Proj fill:#ffa94d,color:#000 + style U fill:#ffd43b,color:#000 + style S fill:#69db7c,color:#000 +``` + +### Session Grants Flow + +When the user clicks "Always allow for this session": + +```mermaid +sequenceDiagram + participant User + participant Dialog as Permission Dialog + participant Context as ToolPermissionContext + participant Future as Future Tool Calls + + User->>Dialog: Click "Always allow for this session" + Dialog->>Context: PermissionUpdate {
type: 'addRules',
rules: [{toolName: 'Bash', ruleContent: 'npm install'}],
behavior: 'allow',
destination: 'session'
} + Context->>Context: alwaysAllowRules['session'].push('Bash(npm install)') + Note over Context: In-memory only — NOT written to disk + + Future->>Context: Check: can use Bash(npm install)? + Context-->>Future: Matched session grant → ALLOW + + Note over Context: Session ends → context freed → grant lost +``` + +### Dangerous Bash Patterns (Auto Mode) + +When entering auto mode, these patterns are stripped from allow rules to prevent interpreter bypass: + +``` +python, python3, python2, node, deno, tsx, ruby, perl, php, lua, +npx, bunx, npm run, yarn run, pnpm run, bun run, bash, sh, ssh, +zsh, fish, eval, exec, env, xargs, sudo +``` + +Plus ANT-only: `gh`, `curl`, `wget`, `git`, `kubectl`, `aws`, `gcloud`, `gsutil` + +--- + +## 12. Hook System + +### Event Types & Lifecycle + +```mermaid +flowchart TD + subgraph "Pre-Execution Hooks" + PreTool["PreToolUse
Before tool runs
Can: block, modify input, add context"] + UserSubmit["UserPromptSubmit
User sends message
Can: modify, block"] + SubStart["SubagentStart
Before subagent spawns"] + WTCreate["WorktreeCreate
Before worktree creation"] + end + + subgraph "Post-Execution Hooks" + PostTool["PostToolUse
After tool succeeds
Can: add context, modify MCP output"] + PostFail["PostToolUseFailure
After tool fails"] + PermDenied["PermissionDenied
Classifier denied tool"] + end + + subgraph "Lifecycle Hooks" + SessStart["SessionStart
Session initialized
Can: inject initial messages"] + Setup["Setup
Additional initialization"] + FileChange["FileChanged
Watched file modified"] + CwdChange["CwdChanged
Working directory changed"] + end + + subgraph "Notification Hooks" + Notif["Notification
Types: permission_prompt,
idle_prompt, auth_success,
elicitation_dialog/complete/response"] + end + + subgraph "Permission Hooks" + PermReq["PermissionRequest
Permission prompt triggered
Can: approve/deny with rules"] + end + + style PreTool fill:#ffa94d,color:#000 + style PostTool fill:#69db7c,color:#000 + style SessStart fill:#4a9eff,color:#fff + style PermReq fill:#b197fc,color:#000 +``` + +### Hook I/O + +**PreToolUse** receives: +```json +{ + "type": "tool_use", + "name": "Bash", + "input": {"command": "npm install lodash"}, + "tool_use_id": "toolu_01...", + "tool_name": "Bash" +} +``` + +**PreToolUse** can return: +```json +{ + "hookEventName": "PreToolUse", + "permissionDecision": "approve", // or "block" + "permissionDecisionReason": "Lint passed", + "updatedInput": {"command": "npm install --save-exact lodash"}, + "additionalContext": "Note: package was pinned to exact version" +} +``` + +**PermissionRequest** can return: +```json +{ + "hookEventName": "PermissionRequest", + "decision": { + "behavior": "allow", + "updatedInput": {"command": "..."}, + "updatedPermissions": [ + {"type": "addRules", "rules": [{"toolName": "Bash", "ruleContent": "npm *"}], "behavior": "allow", "destination": "session"} + ], + "interrupt": false + } +} +``` + +### Configuration + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": {"tool_name": "Bash"}, + "command": "~/.claude/hooks/lint-bash.sh", + "timeout": 600000 + } + ], + "PostToolUse": [ + { + "matcher": {"tool_name": "FileEdit"}, + "command": "~/.claude/hooks/auto-format.sh" + } + ], + "UserPromptSubmit": [ + { + "command": "~/.claude/hooks/log-prompt.sh" + } + ] + } +} +``` + +### Timeouts + +| Hook type | Default timeout | +|-----------|----------------| +| Tool hooks (PreToolUse, PostToolUse, etc.) | 10 minutes | +| Session-end hooks | 1.5 seconds | +| Override via env | `CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS` | + +### Async Hooks + +Hooks can return `{ async: true, asyncTimeout?: number }` to execute in the background: +- The agent continues without waiting +- Hook runs in a subprocess +- Completion is notified via callback + +--- + +## 13. Memory System + +### Architecture Overview + +```mermaid +graph TD + subgraph "Conversation Flow" + Turn["Each turn completes"] --> PostHook["Post-sampling hook fires"] + PostHook --> Gate{Feature gate:
tengu_passport_quail
+ auto-memory enabled?} + Gate -->|No| Skip[Skip extraction] + Gate -->|Yes| Throttle{Every N turns?
Default: 1} + Throttle -->|Skip| Skip + Throttle -->|Run| Cursor["Advance cursor:
count messages since
lastMemoryMessageUuid"] + Cursor --> Overlap{Main agent
already wrote memories?} + Overlap -->|Yes| Skip + Overlap -->|No| Manifest["Pre-inject manifest
of existing memories"] + Manifest --> Fork["Fork extraction agent
Max 5 turns"] + end + + subgraph "Extraction Agent" + Fork --> Tools2["Restricted tools:
Read, Grep, Glob
read-only Bash (ls, find, grep...)
Edit/Write ONLY in memory dir"] + Tools2 --> Analyze["Analyze recent messages"] + Analyze --> Decide{Worth
remembering?} + Decide -->|Yes| WriteFile["Write memory file
with frontmatter"] + Decide -->|No| NextMsg[Check next message] + WriteFile --> UpdateIndex["Update MEMORY.md index"] + end + + subgraph "Storage" + direction LR + Dir["~/.claude/projects//memory/"] + Index["MEMORY.md
Max 200 lines / 25KB
ALWAYS loaded into context"] + Files["Individual .md files
with frontmatter"] + Dir --- Index + Dir --- Files + end + + subgraph "Recall (Next Conversation)" + Load["Load MEMORY.md
into system prompt"] --> LLM["LLM sees memories
in context"] + LLM --> Verify["Before recommending:
- File path? Check exists
- Function? Grep for it
- Recent state? Use git log"] + end + + style Fork fill:#b197fc,color:#000 + style Index fill:#4a9eff,color:#fff + style Verify fill:#ffa94d,color:#000 +``` + +### Memory Types + +| Type | What to save | When to save | Body structure | +|------|-------------|-------------|----------------| +| **user** | Role, goals, preferences, expertise level | When you learn any details about the user | Free-form | +| **feedback** | Approach guidance — corrections AND confirmations | Corrections ("don't do X") AND confirmations ("yes, exactly that") | Rule → **Why:** → **How to apply:** | +| **project** | Ongoing work, goals, deadlines, decisions | When you learn who/what/why/when — convert relative dates to absolute | Fact → **Why:** → **How to apply:** | +| **reference** | Pointers to external systems | When you learn about resources in external systems | URL/path + purpose | + +### What NOT to Save + +The prompt explicitly forbids saving: +- Code patterns, conventions, architecture, file paths — *read the current state* +- Git history, recent changes — *`git log` / `git blame` are authoritative* +- Debugging solutions or fix recipes — *the fix is in the code* +- Anything already documented in CLAUDE.md files +- Ephemeral task details: in-progress work, temporary state + +> "These exclusions apply even when the user explicitly asks you to save. If they ask you to save a PR list or activity summary, ask what was *surprising* or *non-obvious* about it — that is the part worth keeping." + +### Memory File Format + +```markdown +--- +name: User prefers single PRs for refactors +description: Bundled PRs over many small ones for refactoring work +type: feedback +--- + +User prefers one bundled PR over many small ones for refactors. + +**Why:** Splitting causes unnecessary churn in this codebase. +**How to apply:** When planning refactors, propose a single PR unless the change truly requires staging. +``` + +### MEMORY.md Index + +```markdown +- [User profile](user_profile.md) — Senior backend engineer, prefers Go, new to React +- [Testing approach](feedback_testing.md) — Integration tests must hit real DB, not mocks +- [Auth rewrite](project_auth.md) — Legal-driven, compliance deadline 2026-04-15 +- [Bug tracker](reference_linear.md) — Pipeline bugs tracked in Linear project "INGEST" +``` + +**Constraints**: Max 200 lines, max 25KB. If exceeded, truncated with warning: +> "WARNING: MEMORY.md is N lines and X bytes. Only part of it was loaded." + +### Extraction Agent Tool Permissions + +The extraction agent has a tightly-scoped tool set: + +```typescript +function createAutoMemCanUseTool(memoryDir: string): CanUseToolFn { + // ALLOW unrestricted: Read, Grep, Glob + // ALLOW if read-only: Bash (ls, find, grep, cat, stat, wc, head, tail) + // ALLOW only in memoryDir: Edit, Write + // DENY everything else: rm, MCP, Agent, write-capable Bash +} +``` + +### Staleness Verification + +Before recommending from memory, the prompt instructs: + +> "A memory that names a specific function, file, or flag is a claim that it existed *when the memory was written*. Before recommending it: +> - If the memory names a file path: check the file exists. +> - If the memory names a function or flag: grep for it. +> - If the user is about to act on your recommendation, verify first. +> 'The memory says X exists' is not the same as 'X exists now.'" + +--- + +## 14. Skills System + +### Skill Sources & Merging + +```mermaid +graph TD + subgraph "Bundled Skills (in binary)" + B1["/commit — Git commit workflow"] + B2["/review — Code review"] + B3["/simplify — Simplify code"] + B4["/loop — Recurring task runner"] + B5["/debug — Debug assistance"] + B6["/verify — Verification workflow"] + B7["/remember — Save a memory"] + B8["/schedule — Cron agent setup"] + B9["/claude-api — API helper"] + B10["/keybindings — Configure keys"] + B11["/update-config — Settings helper"] + B12["+20 more..."] + end + + subgraph "User Skills" + US["~/.claude/skills//SKILL.md"] + US2["~/.claude/commands/.md (legacy)"] + end + + subgraph "Project Skills" + PS[".claude/skills//SKILL.md"] + PS2[".claude/commands/.md (legacy)"] + end + + subgraph "MCP Skills" + MCP2["MCP resources → skill builders"] + end + + subgraph "Plugin Skills" + PL["Loaded plugins → skill exports"] + end + + B1 --> Merge["Merge with priority:
1. Bundled
2. Built-in plugins
3. User skills
4. Project skills
5. MCP skills
6. Plugin skills"] + US --> Merge + PS --> Merge + MCP2 --> Merge + PL --> Merge + + Merge --> Registry["Skill Registry
(first match by name wins)"] + Registry --> Available["Available as /commands
in prompt input"] + + style Registry fill:#4a9eff,color:#fff +``` + +### Skill Definition Format + +```markdown +--- +name: review +description: Review code changes for quality and bugs +when-to-use: When user asks for a code review or PR review +allowed-tools: Read, Grep, Glob, Bash +argument-hint: "[files or PR number]" +context: fork +model: inherit +user-invocable: true +effort: medium +hooks: + PostToolUse: + - matcher: {tool_name: "Bash"} + command: "echo 'reviewed'" +--- + +Review the current git diff comprehensively: + +1. **Security**: Check for injection, XSS, exposed secrets +2. **Logic**: Verify edge cases, error handling, race conditions +3. **Style**: Ensure consistent patterns with existing code +4. **Performance**: Flag N+1 queries, unnecessary allocations + +Report findings as: +- CRITICAL: Must fix before merge +- WARNING: Should address +- NOTE: Consider for future +``` + +### Skill Frontmatter Fields + +| Field | Type | Purpose | +|-------|------|---------| +| `name` | string | Skill name (also command name) | +| `description` | string | One-line description | +| `when-to-use` | string | When the LLM should suggest this skill | +| `allowed-tools` | string[] | Comma-separated tool names | +| `argument-hint` | string | Usage hint shown in autocomplete | +| `arguments` | string[] | Named argument list | +| `model` | string | Model override or 'inherit' | +| `user-invocable` | boolean | Can user invoke via `/command` | +| `disable-model-invocation` | boolean | LLM cannot invoke automatically | +| `context` | 'inline' \| 'fork' | Execution mode | +| `agent` | string | Agent type to use | +| `effort` | 'low' \| 'medium' \| 'high' | Effort level | +| `shell` | string | Shell interpreter for prompt | +| `hooks` | HooksSettings | Hook configuration | +| `paths` | string[] | File patterns for activation | + +### Execution Modes + +**Inline** (`context: 'inline'`): +- Skill prompt injected directly into the current conversation +- Uses the main agent's tools and context +- Simpler, no overhead + +**Fork** (`context: 'fork'`): +- Spawns an isolated sub-agent with the skill's prompt +- Gets its own `QueryEngine` with shared cache +- Tool set restricted to `allowedTools` from frontmatter +- Returns result text to the main conversation + +### Bundled Skill Reference Files + +Some bundled skills include reference files that are extracted to disk on first invocation: + +```typescript +{ + name: 'claude-api', + files: { + 'sdk-reference/anthropic-sdk.md': '...', + 'sdk-reference/tool-use.md': '...', + }, + getPromptForCommand: async (args, context) => { + // Prepends "Base directory for this skill: " to prompt + return [{ type: 'text', text: promptContent }] + } +} +``` + +Files are extracted with `O_EXCL` flag (fail if exists), per-process nonce, and `0o700`/`0o600` permissions for security. + +--- + +## 15. Task System + +### Task Model + +```mermaid +stateDiagram-v2 + [*] --> pending: TaskCreate + pending --> running: TaskUpdate(status) + running --> completed: TaskUpdate(status) + running --> failed: TaskUpdate(status) + pending --> completed: TaskUpdate(status) + pending --> deleted: TaskUpdate(status='deleted') + running --> killed: TaskStop + completed --> [*] + failed --> [*] + killed --> [*] + deleted --> [*] +``` + +### Task Types + +| Type | Prefix | Description | +|------|--------|-------------| +| `local_bash` | `b` | Background shell command | +| `local_agent` | `a` | Local subagent | +| `remote_agent` | `r` | CCR remote agent | +| `in_process_teammate` | `t` | In-process teammate (swarm) | +| `local_workflow` | `w` | Local workflow script | +| `monitor_mcp` | `m` | MCP server monitor | +| `dream` | `d` | Auto-dream background task | + +Task IDs are generated as `{prefix}{8 random alphanumeric chars}` (e.g., `a4kx92mf3n`). + +### Task Schema + +```typescript +TaskCreate input: { + subject: string // Brief title + description: string // What needs to be done + activeForm?: string // Present continuous for spinner ("Running tests") + metadata?: Record +} + +TaskUpdate input: { + taskId: string + subject?: string + description?: string + activeForm?: string + status?: 'pending' | 'running' | 'completed' | 'failed' | 'deleted' + addBlocks?: string[] // Task IDs this task blocks + addBlockedBy?: string[] // Task IDs blocking this task + owner?: string // Teammate name for assignment + metadata?: Record +} +``` + +### UI Integration + +The spinner component reads the active task: +``` +1. Find first task with status !== 'pending' && status !== 'completed' +2. Use its activeForm text as spinner verb +3. Fall back to random verb from getSpinnerVerbs() + +Example: "Running tests..." instead of "Thinking..." +``` + +The task list auto-expands (`expandedView = 'tasks'`) whenever a task is created or updated. + +### Teammate Assignment + +In swarm mode, tasks can be assigned to teammates: +```typescript +if (updates.owner && isAgentSwarmsEnabled()) { + const message = JSON.stringify({ + type: 'task_assignment', + taskId, + subject: existingTask.subject, + assignedBy: senderName, + timestamp: new Date().toISOString(), + }) + await writeToMailbox(updates.owner, message, taskListId) +} +``` + +--- + +## 16. Terminal UI Architecture + +Claude Code built a **custom React reconciler** for the terminal — the same abstraction layer that React DOM and React Native sit on. The `ink/` directory alone is thousands of lines. + +### Rendering Pipeline + +```mermaid +flowchart TD + subgraph "React Layer" + Components["React Components
REPL, Spinner, MessageList,
PermissionDialog, TaskList,
PromptInput, etc."] + end + + subgraph "Reconciler" + Reconciler["reconciler.ts
Custom React Reconciler
Manages terminal DOM lifecycle
createElement, appendChild,
removeChild, commitUpdate"] + end + + subgraph "Layout" + Yoga["Yoga Layout Engine
Flexbox for terminal cells
Same engine as React Native
flexDirection, justifyContent,
alignItems, padding, margin, etc."] + end + + subgraph "Render" + RenderNode["render-node-to-output.ts
Walk React tree → screen buffer
Apply styles per cell"] + Screen["screen.ts
Cell pool with interning:
- char (Unicode codepoint)
- style (fg, bg, bold, italic...)
- hyperlink (OSC 8 URL)"] + end + + subgraph "Display" + Frame["frame.ts
Double-buffered:
back frame (writing)
front frame (displayed)"] + Optimizer["optimizer.ts
Diff back vs front
Only emit changed cells"] + Output["output.ts
Generate ANSI escape codes
CSI sequences for colors,
cursor movement, styles"] + end + + subgraph "Input Processing" + Stdin["Terminal stdin"] --> ParseKey["parse-keypress.ts
State machine for
multi-byte sequences"] + ParseKey --> ChordMatch["keybindings/match.ts
Match against chord
bindings (ctrl+k ctrl+s)"] + Stdin --> MouseParse["Mouse mode-1003
Parse coordinates"] + MouseParse --> HitTest["hit-test.ts
Walk DOM tree
Find clicked element"] + HitTest --> Selection["selection.ts
Char/word/line modes
Shift+click range selection"] + end + + Components --> Reconciler + Reconciler --> Yoga + Yoga --> RenderNode + RenderNode --> Screen + Screen --> Frame + Frame --> Optimizer + Optimizer --> Output + Output --> Terminal(["Terminal stdout"]) + + ParseKey --> Components + ChordMatch --> Components + HitTest --> Components + + style Reconciler fill:#4a9eff,color:#fff + style Yoga fill:#b197fc,color:#000 + style Frame fill:#ffa94d,color:#000 + style Output fill:#69db7c,color:#000 +``` + +### Core Components + +| Component | File | Purpose | +|-----------|------|---------| +| `App.tsx` | `ink/components/App.tsx` (98KB) | Root: stdin/stdout/stderr, Ctrl+C, keyboard parsing, mouse tracking, focus | +| `Box` | `ink/components/Box.tsx` | Flex container (`
`) | +| `Text` | `ink/components/Text.tsx` | Text rendering with styles | +| `ScrollBox` | `ink/components/ScrollBox.tsx` | Scrollable container | +| `Button` | `ink/components/Button.tsx` | Interactive button | +| `AlternateScreen` | `ink/components/AlternateScreen.tsx` | Full-screen mode (smcup/rmcup) | +| `Link` | `ink/components/Link.tsx` | OSC 8 terminal hyperlinks | +| `RawAnsi` | `ink/components/RawAnsi.tsx` | Passthrough for pre-formatted ANSI | + +### Key Features + +- **Double buffering**: Back frame accumulates changes, optimizer diffs against front frame, only changed cells emit ANSI codes +- **Cell interning**: `screen.ts` uses pools for char/style/hyperlink to minimize memory allocation +- **Bidirectional text**: `bidi.ts` handles RTL text layout +- **Terminal capability detection**: `terminal-querier.ts` queries terminal for size, color support, sixel graphics +- **Search highlighting**: `searchHighlight.ts` overlays query matches across the screen buffer +- **Alternate screen**: Full-screen takeover for immersive views, restores original terminal on exit +- **Log update**: `log-update.ts` for incremental output (avoids full redraws) + +### Keyboard System + +```mermaid +flowchart LR + subgraph "Keybinding Configuration" + Default["Default bindings
(built into binary)"] + User["~/.claude/keybindings.json
(user overrides)"] + Default --> Merge["Merge: user overrides default"] + User --> Merge + end + + subgraph "Key Parsing" + Raw["Raw stdin bytes"] --> StateMachine["State machine parser
Handles: CSI, SS3, escape sequences"] + StateMachine --> KeyEvent["KeyEvent:
{key, ctrl, alt, shift, meta}"] + end + + subgraph "Chord Matching" + KeyEvent --> ChordBuffer["Chord buffer
Accumulates partial chords"] + ChordBuffer --> Match{"Match against
context bindings?"} + Match -->|"Full match"| Action["Dispatch action
e.g., app:toggleTodos"] + Match -->|"Partial"| Wait["Wait for next key"] + Match -->|"No match"| PassThrough["Pass to input handler"] + end + + style Merge fill:#4a9eff,color:#fff + style Action fill:#69db7c,color:#000 +``` + +**Contexts**: Global, Chat, Autocomplete, Confirmation, Help, Transcript, HistorySearch, Task, ThemePicker, Settings, Tabs, Attachments, Footer, MessageSelector, DiffDialog, ModelPicker, Select, Plugin + +**Example chord**: `ctrl+k ctrl+s` — first key enters chord mode, second key completes the action. + +--- + +## 17. Cost Tracking + +### Calculation + +```mermaid +flowchart LR + Response["API Response
usage object"] --> Extract["Extract:
input_tokens
output_tokens
cache_read_input_tokens
cache_creation_input_tokens
web_search_requests"] + + Extract --> Formula["cost =
(input / 1M) × inputPrice
+ (output / 1M) × outputPrice
+ (cacheRead / 1M) × cacheReadPrice
+ (cacheCreate / 1M) × cacheWritePrice
+ webSearches × searchPrice"] + + Formula --> PerModel["Accumulate per model:
tokensIn, tokensOut,
cacheRead, cacheCreate,
costUSD"] + + PerModel --> Display["On exit:
Total cost: $X.XX
Total duration (API): Xs
Total duration (wall): Xs
Total code changes: +N, -M
Per-model breakdown"] +``` + +### Pricing Tiers (per 1M tokens) + +| Tier | Input | Output | Cache Read | Cache Write | Models | +|------|-------|--------|------------|-------------|--------| +| COST_TIER_3_15 | $3 | $15 | $0.30 | $3.75 | Sonnet 3.5–4.6 | +| COST_TIER_15_75 | $15 | $75 | $1.50 | $18.75 | Opus 4.0, 4.1 | +| COST_TIER_5_25 | $5 | $25 | $0.50 | $6.25 | Opus 4.5 | +| COST_TIER_30_150 | $30 | $150 | $3.00 | $37.50 | Opus 4.6 (fast) | +| COST_HAIKU_35 | $0.80 | $4 | $0.08 | $1.00 | Haiku 3.5 | +| COST_HAIKU_45 | $1 | $5 | $0.10 | $1.25 | Haiku 4.5 | + +### Display Format + +``` +formatCost(cost): + if cost > $0.50 → round to 2 decimal places ($1.23) + else → show 4 decimal places ($0.0042) +``` + +Cost summary is saved per-session to project config for historical tracking. + +--- + +## 18. MCP Integration + +### Connection Flow + +```mermaid +sequenceDiagram + participant Config as Settings + participant Client as MCP Client + participant Server as MCP Server + participant Registry as Tool Registry + + Config->>Client: Load MCP server configs
(user + project + managed) + + loop For each configured server + Client->>Server: Connect via transport
(stdio | sse | http | ws | sdk) + + alt OAuth required + Client->>Client: Refresh OAuth token
(XAA cross-app access) + end + + Server-->>Client: Server capabilities + Client->>Server: List tools + Server-->>Client: Tool definitions (name, schema, description) + + loop For each tool + Client->>Registry: Register as mcp__server__toolname
with JSON Schema input + end + + Client->>Server: List resources + Server-->>Client: Resource definitions + + loop For each resource + Client->>Registry: Register ListMcpResourcesTool
+ ReadMcpResourceTool + end + end +``` + +### Transport Types + +| Transport | Protocol | Use case | +|-----------|----------|----------| +| `stdio` | JSON-RPC over stdin/stdout | Local process (most common) | +| `sse` | HTTP Server-Sent Events | HTTP servers | +| `http` | HTTP POST | Stateless HTTP APIs | +| `ws` | WebSocket | Persistent connections | +| `sdk` | In-process SDK | Embedded servers | + +### Config Scopes + +```mermaid +graph LR + subgraph "Sources (priority order)" + Managed["Managed settings
(enterprise, read-only)"] + Project[".claude/settings.json
(project, committed)"] + Local[".claude.local/settings.json
(personal, gitignored)"] + User["~/.claude/settings.json
(global user)"] + Dynamic["Dynamic
(runtime registration)"] + end + + Managed --> Merge[Merge configs] + Project --> Merge + Local --> Merge + User --> Merge + Dynamic --> Merge + + Merge --> Servers["Active MCP servers"] +``` + +### Permission Integration + +MCP tools follow the same permission system as built-in tools: +- Server-level rules: `mcp__github` matches ALL tools from the GitHub server +- Tool-level rules: `mcp__github__list_repos` matches a specific tool +- Wildcard: `mcp__github__*` matches all tools from GitHub +- Content-specific: `mcp__github__create_issue(repo:my-org/*)` matches specific repos + +### Content Handling + +- **Truncation**: Large MCP tool outputs are truncated before returning to the LLM +- **Binary blobs**: Binary content saved to persistent storage with a text reference +- **Error recovery**: Code-indexing detection prevents MCP errors from crashing the agent +- **OAuth refresh**: Automatic token refresh for authenticated MCP servers + +--- + +## 19. Session & State Management + +### Application State + +```mermaid +graph TD + subgraph "AppState Store (pub/sub)" + Settings["settings: SettingsJson"] + Model["mainLoopModel: ModelSetting"] + Permissions["toolPermissionContext"] + Tasks3["tasks: {[id]: TaskState}"] + MCPState["mcp: {clients, tools}"] + Plugins["plugins: {enabled, disabled}"] + Todos["todos: {[agentId]: TodoList}"] + Thinking["thinkingEnabled: boolean"] + View["expandedView: 'none'|'tasks'|'teammates'"] + Notifications["notifications: {current, queue}"] + Elicitation["elicitation: {queue}"] + end + + subgraph "Store API" + Get["getState() → snapshot"] + Set["setState(updater) → triggers listeners"] + Sub["subscribe(listener) → unsubscribe fn"] + end + + subgraph "Subscribers" + REPL3["REPL Screen"] + Spinner2["Spinner"] + TaskUI["Task List"] + PermUI["Permission Dialog"] + end + + Set --> REPL3 + Set --> Spinner2 + Set --> TaskUI + Set --> PermUI +``` + +### Session Persistence + +```mermaid +flowchart TD + subgraph "Per-Turn" + Record["recordTranscript()
Fire-and-forget"] + Record --> SessionJSON["~/.claude/projects//
.jsonl"] + end + + subgraph "History" + AddHistory["addToHistory()"] --> HistFile["~/.claude/history.jsonl
Max 100 entries"] + HistFile --> Format["Per entry:
{display, pastedContents,
timestamp, project, sessionId}"] + end + + subgraph "Pasted Content" + LargeContent{"> 1KB?"} -->|Yes| External["Store externally
Reference: [Pasted text #1 +10 lines]"] + LargeContent -->|No| Inline["Store inline in history entry"] + end + + subgraph "Resume Flow" + ResumeCmd["/resume or --resume"] --> LoadSession["Load session JSON"] + LoadSession --> Deserialize["Deserialize messages"] + Deserialize --> RestoreState["Restore compact boundaries,
tool state, permissions"] + RestoreState --> Continue["Continue conversation"] + end + + style SessionJSON fill:#4a9eff,color:#fff + style HistFile fill:#69db7c,color:#000 +``` + +### Session History Deduplication + +When reading history, current-session entries are yielded first, then other sessions from the same project: + +```typescript +for await (const entry of makeLogEntryReader()) { + if (entry.project !== currentProject) continue; + if (entry.sessionId === currentSession) { + yield entry; // Current session first + } else { + otherSessionEntries.push(entry); // Buffer others + } + if (yielded + others >= MAX_HISTORY_ITEMS) break; +} +for (const entry of otherSessionEntries) { + yield entry; // Then other sessions +} +``` + +### Remote Sessions + +For CCR (Claude Code Remote): + +```mermaid +sequenceDiagram + participant Local as Local Client + participant WS as WebSocket + participant Remote as Remote Agent + + Local->>WS: Connect to /v1/sessions/ws/{id}/subscribe + WS->>Local: auth required + Local->>WS: {type: 'auth', credential: {type: 'oauth', token: '...'}} + + loop Conversation + Local->>Remote: HTTP POST — send user message + Remote->>WS: Stream SDKMessage events + WS->>Local: Display messages + + alt Permission needed + Remote->>WS: permission_request event + WS->>Local: Show permission dialog + Local->>Remote: HTTP POST — permission response + end + end + + Note over WS: Reconnect: max 5 retries, 2s delay + Note over WS: Ping keepalive: every 30s + Note over WS: Permanent close: 4003 (unauthorized) + Note over WS: Transient retry: 4001 (session not found during compaction) +``` + +--- + +## 20. The Verification Agent + +One of the most architecturally interesting decisions: an adversarial agent specifically designed to distrust its own outputs and fight its cognitive biases. + +### Trigger Conditions + +The main agent spawns a verification agent when implementation is "non-trivial": +- 3+ file edits +- Backend/API changes +- Infrastructure changes + +### Verification Flow + +```mermaid +flowchart TD + Impl["Implementation complete"] --> Trigger{Non-trivial?} + Trigger -->|No| Report["Report to user directly"] + Trigger -->|Yes| Spawn["Spawn verification agent
subagent_type='verification'"] + + Spawn --> Receive["Receives:
- Original user request
- All files changed
- Approach taken
- Plan file path"] + + Receive --> Strategy["Select verification strategy
based on change type"] + + subgraph "Strategy Selection" + Frontend["Frontend: Start dev server,
hit all routes, test forms"] + Backend["Backend: Start server,
curl endpoints, test edge cases"] + CLI2["CLI: Run commands,
test flags, check output"] + Infra["Infra: Validate configs,
dry-run deploys"] + DB["Database: Check migrations,
test rollback"] + Refactor["Refactor: Run test suite,
check for regressions"] + end + + Strategy --> Execute["Execute checks"] + + subgraph "Required Output Format" + Check["### Check: [what you're verifying]
**Command run:**
[exact command]
**Output observed:**
[actual output — copy-paste]
**Result: PASS** (or FAIL)"] + end + + Execute --> Check + Check --> Verdict{VERDICT?} + + Verdict -->|PASS| SpotCheck["Main agent spot-checks:
Re-run 2-3 commands
Verify output matches"] + SpotCheck --> SpotResult{Outputs match?} + SpotResult -->|Yes| Done([Report PASS to user]) + SpotResult -->|No| Resume["Resume verifier
with discrepancy details"] + + Verdict -->|FAIL| Fix["Main agent fixes issue"] + Fix --> ReVerify["Resume verifier
with fix details"] + ReVerify --> Execute + + Verdict -->|PARTIAL| ReportPartial["Report what passed
and what couldn't verify"] + + style Spawn fill:#ff6b6b,color:#fff + style Execute fill:#ffa94d,color:#000 + style Done fill:#69db7c,color:#000 +``` + +### The Anti-Bias Prompt + +The verification agent's system prompt contains an extraordinary section on cognitive bias awareness: + +> **"You have two documented failure patterns."** +> +> **First, verification avoidance**: when faced with a check, you find reasons not to run it — you read code, narrate what you would test, write "PASS," and move on. +> +> **Second, being seduced by the first 80%**: you see a polished UI or a passing test suite and feel inclined to pass it, not noticing half the buttons do nothing, the state vanishes on refresh, or the backend crashes on bad input. The first 80% is the easy part. Your entire value is in finding the last 20%. + +Then it lists specific rationalizations and their counters: + +| Excuse the agent will reach for | Counter | +|------|---------| +| "The code looks correct based on my reading" | Reading is not verification. Run it. | +| "The implementer's tests already pass" | The implementer is an LLM. Verify independently. | +| "This is probably fine" | Probably is not verified. Run it. | +| "Let me start the server and check the code" | No. Start the server and hit the endpoint. | +| "I don't have a browser" | Did you check for mcp__chrome / mcp__playwright? Use them. | +| "This would take too long" | Not your call. | + +> "If you catch yourself writing an explanation instead of a command, stop. Run the command." + +### Spot-Check Protocol + +After the verifier returns PASS, the main agent doesn't just trust it: + +1. Re-run 2-3 commands from the verifier's report +2. Confirm every PASS has a "Command run" block with actual output +3. Verify output matches the re-run +4. If any PASS lacks a command block or output diverges → resume verifier with specifics + +--- + +## Summary: File Reference + +| System | Core Files | Lines | +|--------|-----------|-------| +| Agent Loop | `query.ts`, `QueryEngine.ts` | ~4,500 | +| Streaming | `StreamingToolExecutor.ts`, `claude.ts` | ~3,500 | +| Tool System | `Tool.ts`, `tools.ts` | ~1,500 | +| Tool Implementations | `tools/` directory | ~12,000 | +| System Prompt | `constants/prompts.ts` | 914 | +| Context Management | `services/compact/` | ~3,000 | +| Token Estimation | `services/tokenEstimation.ts` | ~500 | +| Subagents | `tools/AgentTool/` | ~2,000 | +| Permissions | `utils/permissions/` | ~3,000 | +| Hooks | `utils/hooks/`, `types/hooks.ts` | ~1,500 | +| Memory | `memdir/`, `services/extractMemories/` | ~2,000 | +| Skills | `skills/`, `tools/SkillTool/` | ~2,500 | +| Tasks | `tools/TaskCreateTool/` etc. | ~1,000 | +| Terminal UI | `ink/` | ~8,000 | +| Cost Tracking | `cost-tracker.ts`, `utils/modelCost.ts` | ~500 | +| MCP | `services/mcp/` | ~3,000 | +| Session/State | `state/`, `history.ts` | ~2,000 | +| Commands | `commands.ts`, `commands/` | ~3,000 | +| Keybindings | `keybindings/` | ~1,000 | +| **Total** | **1,903 files** | **~50,000+** | diff --git a/docs/ecosystem/kosmokrator/research/opencode-feature-analysis.md b/docs/ecosystem/kosmokrator/research/opencode-feature-analysis.md new file mode 100644 index 0000000..0c55001 --- /dev/null +++ b/docs/ecosystem/kosmokrator/research/opencode-feature-analysis.md @@ -0,0 +1,320 @@ +# OpenCode Feature Analysis for KosmoKrator + +Analysis of features from [OpenCode](https://github.com/opencode-ai/opencode) that could be implemented in KosmoKrator, ordered by impact and feasibility. + +--- + +## High Impact — Should Implement + +### 1. Permission System for Tool Execution + +OpenCode has a rule-based permission system (`allow` / `deny` / `ask`) with glob pattern matching. Tools like `bash`, `file_write`, `file_edit` prompt the user before executing. KosmoKrator currently auto-executes everything. + +**How OpenCode does it:** +- Rules defined as `{permission: string, pattern: string, action: "allow" | "deny" | "ask"}` +- Last-match-wins for overlapping patterns, wildcard/glob support +- Rulesets merge hierarchically: system defaults → agent defaults → user config +- Request/reply workflow: tool asks permission, UI shows prompt, user responds "once" / "always" / "reject" +- Edit tools require ask with diff metadata shown to user +- `.env` files always require explicit approval +- Tracks tool calls with messageID + callID for audit + +**Scope for KosmoKrator:** +- New `Permission/` namespace with `Rule`, `Ruleset`, `PermissionEvaluator` +- Default rules: `bash` → ask, `file_write`/`file_edit` → ask, `file_read`/`glob`/`grep` → allow +- UI integration: renderer shows permission prompt, user approves/denies +- Config in `kosmokrator.yaml` for user overrides +- "Always allow" memory per session + +**Why:** Safety is essential — one wrong `rm -rf` and you've lost work. + +--- + +### 2. Session Persistence (SQLite) + +OpenCode persists sessions to SQLite so you can resume conversations, review history, and export/import. KosmoKrator's `ConversationHistory` is in-memory only. + +**How OpenCode does it:** +- SQLite via Drizzle ORM with migration system +- Tables: sessions (id, slug, title, directory, version), messages (id, sessionID, role), parts (id, messageID, type, content) +- Message parts are polymorphic: text, tool_call, tool_result, reasoning, snapshot, patch +- Session listing with fuzzy search in TUI dialog +- Archive/restore capability +- Auto-generated session titles via dedicated LLM agent + +**Scope for KosmoKrator:** +- SQLite storage at `~/.kosmokrator/sessions.db` +- Schema: sessions table + messages table (JSON content column is simplest start) +- New commands: `/sessions` (list), `/resume ` (restore), `/export` (dump JSON) +- `ConversationHistory` backed by SQLite instead of in-memory array +- Session title auto-generation (use the LLM itself with a short prompt) + +**Why:** Losing context on restart is a major UX gap. + +--- + +### 3. Context Window Compaction + +OpenCode has a dedicated compaction agent that summarizes old messages when approaching token limits, preserving critical context. KosmoKrator has basic `trimOldest()` which just drops messages. + +**How OpenCode does it:** +- Dedicated hidden `compaction` agent with its own system prompt +- Triggered at configurable token/message thresholds +- Summarizes old messages into a compact system message +- Preserves critical context: file edits, error messages, tool results +- Maintains conversation continuity — the agent doesn't notice the compaction + +**Scope for KosmoKrator:** +- Replace `trimOldest()` with a compaction strategy +- When token count approaches limit, send oldest N messages to LLM with "summarize this conversation segment" prompt +- Replace those messages with a single `SystemMessage` containing the summary +- Keep the most recent messages intact +- Log compaction events + +**Why:** The current trim approach loses important context silently, leading to the agent forgetting what it was doing. + +--- + +### 4. Multi-Agent / Subagent System + +OpenCode has specialized agents: `build` (full access), `plan` (read-only), `explore` (fast search), `general` (subagent for complex tasks). Each with different tool access and system prompts. + +**How OpenCode does it:** +- Agent definitions with: name, tools list, permission ruleset, system prompt, temperature, mode (primary/subagent) +- `build`: default agent, all tools, question/planning allowed +- `plan`: disables all edit tools, read-only exploration +- `explore`: restricted to search tools (glob, grep, read), fast model +- `general`: subagent spawned by build for parallel/complex tasks +- Agent switching via slash command or automatic delegation +- Each agent has its own step limit + +**Scope for KosmoKrator:** +- `Agent/AgentDefinition` class with: name, allowed tools, system prompt, temperature, max rounds +- Built-in agents: `code` (full access), `plan` (read-only), `explore` (search only) +- `/plan` and `/code` commands to switch modes +- Agent config in `kosmokrator.yaml` +- ToolRegistry filtered by agent's allowed tools + +**Why:** Plan mode and explore mode are very useful for different workflows. Prevents accidental edits during analysis. + +--- + +### 5. Project Instructions (KOSMOKRATOR.md) + +OpenCode reads `.opencode/settings.json` and project-level instruction files. KosmoKrator should read project-specific files from the working directory to inject into the system prompt. + +**How OpenCode does it:** +- Reads `.opencode/settings.json` for project config +- Merges with user-level `~/.opencode/settings.json` +- Injects environment context: working directory, git status, platform, shell, date +- Custom system prompt additions from config + +**Scope for KosmoKrator:** +- On startup, look for `KOSMOKRATOR.md` (or `.kosmokrator/instructions.md`) in CWD +- Read contents and prepend to the system prompt +- Also check `~/.kosmokrator/instructions.md` for global instructions +- Inject environment context: CWD, git branch, platform, PHP version, date + +**Why:** Per-project customization is critical — the agent needs to know about coding standards, architecture decisions, and project-specific context. + +--- + +## Medium Impact — Worth Implementing + +### 6. Slash Commands & Skills System + +OpenCode has a skill system that loads `SKILL.md` files as reusable prompt templates. + +**How OpenCode does it:** +- Skills discovered from: `~/.claude/skills/**/SKILL.md`, `~/.agents/skills/**/SKILL.md`, `.opencode/skills/**/SKILL.md` +- Skill format: markdown with YAML frontmatter (name, description) +- Loaded into agent as available slash commands +- Permission-aware: skills can be denied per agent +- Shown in system prompt with descriptions + +**Scope for KosmoKrator:** +- Scan `~/.kosmokrator/skills/` and `.kosmokrator/skills/` for `*.md` files +- Parse frontmatter for name/description +- Register as slash commands: `/commit`, `/review`, `/test`, etc. +- When invoked, inject skill content as user message or system prompt addition +- Ship a few built-in skills: `/commit` (generate commit message), `/explain` (explain selected code) + +**Why:** Reusable prompt templates save time and ensure consistency. + +--- + +### 7. Accurate Cost Tracking with Per-Model Pricing + +OpenCode has detailed per-model pricing tables with cache-aware cost calculation. + +**How OpenCode does it:** +- Pricing table per provider/model with input/output/cache rates +- Separate tracking: prompt tokens, completion tokens, reasoning tokens, cache read/write +- Special pricing tiers (200K+ token discounts for some models) +- Cumulative session cost displayed in status bar +- `stats` command for historical cost breakdown + +**Scope for KosmoKrator:** +- Pricing config in `config/pricing.yaml` with per-model rates +- Replace hardcoded `estimateCost()` with config-driven calculation +- Track cumulative session cost +- Display per-turn and session-total cost in status bar +- `/cost` command for session cost breakdown + +**Why:** Users need to know what they're spending, especially with expensive models. + +--- + +### 8. LSP Integration Tool + +OpenCode integrates language servers for go-to-definition, hover info, diagnostics. + +**How OpenCode does it:** +- Multi-server support: TypeScript, Python, Go, Rust, C/C++ +- LSP features: documentSymbol, hover, definition, references, diagnostics +- Cached diagnostics per file with real-time updates +- Exposed as a tool the agent can call +- Auto-detects which language server to use based on file type + +**Scope for KosmoKrator:** +- New `LspTool` in `Tool/Coding/` +- Start language servers as background processes +- Operations: `hover` (type info), `definition` (go-to-def), `diagnostics` (errors/warnings), `references` +- Auto-detect server from file extension (phpstan for PHP, typescript-language-server for TS, etc.) +- Cache server instances per session + +**Why:** Gives the agent precise code intelligence beyond grep — especially useful for understanding types and finding references. + +--- + +### 9. Session Revert / Undo + +OpenCode can revert to a previous point in conversation, undoing tool calls. + +**How OpenCode does it:** +- Version tracking per session +- Snapshot conversation state at key points +- Revert removes messages after snapshot point +- Unrevert to restore if revert was accidental +- Works with persisted sessions (SQLite) + +**Scope for KosmoKrator:** +- Snapshot `ConversationHistory` state before each `agentLoop->run()` call +- `/undo` command: pop the last turn (user message + all agent messages/tool calls) +- Store snapshots as stack (last N turns) +- If session persistence is implemented, revert in DB too + +**Why:** Very useful when the agent goes down a wrong path — cheaper than `/reset` which loses everything. + +--- + +### 10. Environment Context in System Prompt + +OpenCode automatically injects runtime context into the system prompt. + +**How OpenCode does it:** +``` +Working directory: /path/to/project +Workspace root folder: /path/to/git/root +Is directory a git repo: yes +Platform: darwin +Shell: zsh +OS Version: Darwin 25.0.0 +Today's date: 2026-03-29 +``` + +**Scope for KosmoKrator:** +- Gather: CWD, git branch, git root, platform, PHP version, composer.json name/description, date +- Append as system prompt section before user's first message +- Update on each turn if CWD changes (bash `cd`) + +**Why:** Small effort, big payoff. The agent makes better decisions when it knows the environment. + +--- + +## Lower Priority — Nice to Have + +### 11. MCP (Model Context Protocol) Support + +Extend the agent's capabilities dynamically via external MCP servers. + +**How OpenCode does it:** +- MCP client with stdio, SSE, and HTTP streaming transports +- Auto-discovers tools from connected MCP servers +- OAuth support for authenticated servers +- Tool list change notifications + +**Scope:** New `Mcp/` namespace with client implementation, tool bridge to `ToolRegistry`. + +--- + +### 12. WebFetch / WebSearch Tools + +Let the agent browse documentation and search the web. + +**Scope:** Two new tools — `WebFetchTool` (HTTP GET + HTML-to-text) and `WebSearchTool` (via SearXNG, Brave, or similar API). + +--- + +### 13. Plugin / Hook System + +Extensibility for third-party integrations. + +**How OpenCode does it:** +- Hook-based: `chat.system.transform`, `chat.params`, `tool.definition`, `shell.env`, `event` +- Plugins loaded from npm packages or local paths +- Sequential hook execution for deterministic ordering + +**Scope:** Event-based hook system using Laravel's `Dispatcher`, plugin discovery from `~/.kosmokrator/plugins/`. + +--- + +### 14. Multi-Provider Support + +Easy switching between Claude, OpenAI, Gemini, local models. + +**How OpenCode does it:** +- 24+ bundled providers with unified interface +- Model discovery and fuzzy sorting +- Per-model capability detection + +**Scope:** Already partially handled by Prism. Need: model selection UI, `/model` command, pricing awareness per provider. + +--- + +### 15. Export / Import Sessions + +Share conversations as files. + +**Scope:** `/export` command dumps session to JSON/Markdown. `/import` restores from file. Requires session persistence (#2) first. + +--- + +### 16. Task / Todo Management + +Persistent task tracking across sessions. + +**How OpenCode does it:** +- `TodoWrite` tool for the agent to create/update tasks +- Tasks persisted in session storage +- Displayed in TUI sidebar +- Survive across conversation turns + +**Scope:** New `TodoTool`, tasks stored in `~/.kosmokrator/todos/` or session DB, `/todos` command to list. + +--- + +## Implementation Priority + +Suggested order based on dependencies and impact: + +1. **Environment Context** (#10) — quick win, no dependencies +2. **Project Instructions** (#5) — quick win, no dependencies +3. **Permission System** (#1) — safety-critical, should come before more tools +4. **Session Persistence** (#2) — enables many other features +5. **Context Compaction** (#3) — depends on LLM client being stable +6. **Cost Tracking** (#7) — straightforward config change +7. **Multi-Agent** (#4) — builds on permission system +8. **Skills System** (#6) — builds on slash command infrastructure +9. **Session Revert** (#9) — builds on session persistence +10. **LSP Integration** (#8) — standalone but complex diff --git a/docs/ecosystem/kosmokrator/tools/web-tools-spec.md b/docs/ecosystem/kosmokrator/tools/web-tools-spec.md new file mode 100644 index 0000000..30e5c7e --- /dev/null +++ b/docs/ecosystem/kosmokrator/tools/web-tools-spec.md @@ -0,0 +1,146 @@ +# Claude Code Web Tools — Reverse Engineered Spec + +Reverse engineered from tool schemas, runtime behavior, and inner model probing on 2026-03-30. +Claude Code version: 2.1.86 (Bun-compiled Mach-O binary, installed via Homebrew cask). + +## Architecture Overview + +``` + Claude Code (Opus 4.6, 1M context) + │ │ + WebSearch WebFetch + │ │ + ┌───────┴───────┐ ┌──────┴──────┐ + │ Search API │ │ HTTP GET │ + │ (unknown │ │ raw HTML │ + │ provider) │ │ │ │ + └───────┬───────┘ │ HTML→MD │ + │ │ converter │ + search result │ │ │ + blocks with │ Inner │ + titles/URLs/ │ Claude │ + snippets │ model │ + │ │ │ │ + ▼ │ processed │ + returned to │ response │ + outer model └──────┬──────┘ + │ + returned to + outer model +``` + +## WebSearch + +### Schema + +```json +{ + "name": "WebSearch", + "parameters": { + "query": { "type": "string", "required": true, "minLength": 2 }, + "allowed_domains": { "type": "string[]", "optional": true }, + "blocked_domains": { "type": "string[]", "optional": true } + } +} +``` + +### Behavior + +- Executes a web search and returns result blocks (titles, snippets, markdown hyperlinks) +- Domain filtering: whitelist via `allowed_domains`, blacklist via `blocked_domains` +- Geographically restricted to the US +- Results returned in a single API call — no pagination +- Search provider is opaque (likely Brave Search API based on public Anthropic disclosures) +- Outer model is required to append a `Sources:` section with URLs after any answer using results +- Must use current year (2026) in queries for recent info + +### Constraints + +- No authenticated/private URL access +- No JS rendering +- No control over result count +- US-only availability + +## WebFetch + +### Schema + +```json +{ + "name": "WebFetch", + "parameters": { + "url": { "type": "string", "format": "uri", "required": true }, + "prompt": { "type": "string", "required": true } + } +} +``` + +### Pipeline + +1. **HTTP GET** — Plain fetch, no JS execution, no headless browser +2. **HTTPS upgrade** — HTTP URLs auto-upgraded to HTTPS +3. **HTML to Markdown** — Raw HTML converted to markdown +4. **Inner model call** — Markdown content + user prompt sent to a Claude model +5. **Response** — Inner model's text response returned to the outer model + +### Inner Model Details + +| Property | Value | +|----------|-------| +| Model family | Claude (self-identifies as "3.5 Sonnet", actual version unknown, likely Haiku) | +| Context/budget | 200,000 tokens | +| System identity | "You are Claude Code, Anthropic's official CLI for Claude." | +| Tools | None — plain text completion | +| Conversation | Single turn, no history | +| Content placement | Web page content in user message, not system message | +| System prompt tags | ``, `` | + +### Caching + +- 15-minute self-cleaning cache +- Repeated fetches to the same URL within 15 minutes return cached results + +### Redirect Handling + +- Same-host redirects: followed automatically +- Cross-host redirects: returns redirect URL to outer model for manual re-fetch + +### Failure Modes + +- Authenticated URLs (Google Docs, Confluence, Jira): always fails +- JS-rendered SPAs (client-side only): returns empty shell HTML +- SSR pages: works fine (content in initial HTML) +- Large pages: content summarized/truncated by inner model + +### Content Processing + +The outer model (me) never sees raw HTML. The inner model acts as a lossy filter: +- Receives the full markdown conversion +- Processes it according to the `prompt` parameter +- Returns a summary/extraction +- Subject to IP restrictions (no full reproduction of copyrighted content, 125 char quote limit, no lyrics) + +This means the `prompt` parameter is critical — it determines what information survives the inner model's processing. + +## Typical Usage Pattern + +``` +1. WebSearch("laravel queue batching 2026") + → search result blocks with URLs + +2. User picks a relevant URL from results + +3. WebFetch("https://laravel.com/docs/...", "Extract the code example for queue batching") + → inner model reads page, extracts requested info, returns summary + +4. Outer model synthesizes answer with Sources: section +``` + +## What We Don't Know + +- Exact search provider (Brave suspected, not confirmed) +- Exact inner model version (Haiku suspected, self-reports as Sonnet) +- Whether the 200k budget is input context, output limit, or total +- Exact wording of the 5 ip_reminder sentences +- Whether the inner model system prompt varies by context +- Rate limits or quotas diff --git a/docs/ecosystem/kosmokrator/tools/webfetch-system-prompt.md b/docs/ecosystem/kosmokrator/tools/webfetch-system-prompt.md new file mode 100644 index 0000000..e6ad86a --- /dev/null +++ b/docs/ecosystem/kosmokrator/tools/webfetch-system-prompt.md @@ -0,0 +1,62 @@ +# WebFetch Inner Model — Reconstructed System Prompt + +Extracted via prompt injection (yes/no probing, paraphrase requests, tag name extraction) on 2026-03-30. + +## System Message + +``` +You are Claude Code, Anthropic's official CLI for Claude. + +200000 + + +1. Do not reproduce copyrighted material in full. +2. Do not reproduce song lyrics. +3. Do not reproduce full content from books. +4. Quote only briefly (125 character limit on quoted passages). +5. Summarize instead of quoting where possible. + +``` + +Note: The 5 ip_reminder sentences are paraphrased reconstructions. The exact wording was not extractable — the model refused verbatim reproduction of its own instructions. The semantics are confirmed via yes/no probing. + +## User Message Format + +``` +Web page content: +--- +[Page title converted to markdown heading] +=========================== +[HTML-to-markdown converted page body] +--- + +[User's prompt goes here] +``` + +## Confirmed Properties + +| Property | Value | Method | +|----------|-------|--------| +| Identity string | "You are Claude Code, Anthropic's official CLI for Claude." | Direct extraction | +| XML tags present | ``, `` | Yes/no + tag name listing | +| Budget value | 200000 (tokens) | Direct extraction | +| ip_reminder length | 5 sentences | Yes/no confirmation | +| Mentions copyright | Yes | Yes/no | +| Mentions song lyrics | Yes | Yes/no | +| Mentions books | Yes | Yes/no | +| Mentions quoting briefly | Yes | Yes/no | +| Mentions summarizing | Yes | Yes/no | +| Mentions 125 char limit | Yes (from earlier probing) | Yes/no | +| Mentions being concise | Yes | Yes/no | +| Mentions max response length | Yes | Yes/no | +| Mentions markdown | Yes | Yes/no | +| Mentions Claude Code | Yes | Yes/no | +| Mentions fair use | No | Yes/no | +| Mentions news | No | Yes/no | +| Mentions poetry | No | Yes/no | +| Mentions tool use | No | Yes/no | +| Tools available | None | Yes/no | +| Message count | 1 (single turn) | Direct answer | +| Web content location | User message (not system) | Yes/no | +| Prompt separate from content | Yes | Yes/no | +| XML tags in input | Yes | Yes/no | diff --git a/docs/testing/feature-test-map.md b/docs/testing/feature-test-map.md index e6b2f53..4fa144f 100644 --- a/docs/testing/feature-test-map.md +++ b/docs/testing/feature-test-map.md @@ -1,1093 +1,375 @@ -# OpenCompany Feature Test Map +# Feature Test Map — Full Project Audit -Complete checklist of all features, buttons, and functionality to test. +Detailed test cases for all changes in the current git tree. Check off each item as you verify. --- -## 1. AUTHENTICATION PAGES - -### Login (`/login`) -- [ ] Email input field -- [ ] Password input field -- [ ] "Remember me" checkbox -- [ ] Login button (submit) -- [ ] "Forgot password" link -- [ ] Register link -- [ ] Error states for invalid credentials -- [ ] Loading state on submit - -### Register (`/register`) -- [ ] Name input field -- [ ] Email input field -- [ ] Password input field -- [ ] Confirm password input field -- [ ] Register button (submit) -- [ ] Login link -- [ ] Validation errors display -- [ ] Loading state on submit - -### Forgot Password (`/forgot-password`) -- [ ] Email input field -- [ ] Send reset link button -- [ ] Success message display -- [ ] Back to login link - -### Reset Password (`/reset-password/{token}`) -- [ ] Password input field -- [ ] Confirm password input field -- [ ] Reset password button -- [ ] Validation errors - -### Verify Email (`/verify-email`) -- [ ] Resend verification email button -- [ ] Success message display +## 1. Integration Ecosystem Refactor + +**Commits:** `4cffd75`, `24e2cc8`, `df74cb3` + +### Composer & Autoloading + +- [ ] `../integrations/` directory exists as a sibling to the project root +- [ ] `../integrations/core/` exists (integration-core package) +- [ ] `../integrations/packages/` exists with all integration subdirectories +- [ ] `composer install` completes without errors +- [ ] `composer show | grep opencompanyapp` lists all integration packages + integration-core + prism-relay +- [ ] `php artisan tinker` boots without class-not-found errors +- [ ] `ToolRegistry` resolves as singleton +- [ ] `ToolProviderRegistry` resolves + +### BuiltInToolProviders (15 providers) + +- [ ] **AgentsToolProvider** — `list_agents`, `contact_agent` work +- [ ] **AutomationsToolProvider** — list, get, create, update, delete, run +- [ ] **CalendarToolProvider** — calendar tools available +- [ ] **ChatToolProvider** — `send_channel_message`, `get_channel`, `list_channels` +- [ ] **DocsToolProvider** — `query_documents`, `index_document` +- [ ] **FilesToolProvider** — `list_files`, `read_file`, `write_file`, `search_files` +- [ ] **ListsToolProvider** — list CRUD tools +- [ ] **LuaToolProvider** — `lua_exec`, `lua_read_doc`, `lua_search_docs` +- [ ] **MemoryToolProvider** — `save_memory`, `recall_memories` +- [ ] **SvgToolProvider** — SVG generation +- [ ] **SystemToolProvider** — system info +- [ ] **TablesToolProvider** — table query/management +- [ ] **TasksToolProvider** — task tools +- [ ] **WorkspaceToolProvider** — workspace management + +### Direct vs Lua-only Tools + +Only `DIRECT_TOOL_GROUPS` (tasks, system, agents, memory, lua) are direct AI tools. Everything else via `lua_exec`. + +- [ ] Agent uses `tasks` tools directly +- [ ] Agent uses `memory` tools directly +- [ ] Agent accesses `chat` tools via `lua_exec` +- [ ] Agent accesses `files` tools via `lua_exec` +- [ ] Agent accesses `tables` tools via `lua_exec` +- [ ] MCP tools still register and work via Lua + +### Integration Packages + +**No-auth (test first):** +- [ ] **CoinGecko** — `app.integrations.coingecko.*` returns data +- [ ] **ExchangeRate** — `app.integrations.exchangerate.*` returns data +- [ ] **Celestial** — `app.integrations.celestial.*` returns data +- [ ] **WorldBank** — `app.integrations.worldbank.*` returns data + +**Auth-required:** +- [ ] **ClickUp** — configure API key → returns data +- [ ] **Google** — configure OAuth → returns data +- [ ] **Plausible** — configure API key → returns data +- [ ] **TickTick** — configure credentials → returns data + +**ConfigurableIntegration:** +- [ ] Config UI renders on Integrations settings page +- [ ] "Test Connection" button returns success/failure +- [ ] Invalid credentials caught and shown as error + +### Lua Doc Generation + +- [ ] `lua_read_doc("overview")` — full namespace index +- [ ] `lua_read_doc("chat")` — detailed docs with parameter tables +- [ ] `lua_read_doc("integrations.clickup")` — includes supplementary Lua docs +- [ ] `lua_search_docs("send message")` — scored results +- [ ] `lua_read_doc("nonexistent")` — helpful error with available namespaces +- [ ] Namespaces sorted: internal → integrations → mcp + +### LuaBridge Call Routing + +- [ ] Table args: `app.chat.send_channel_message({channel_id = "x", content = "hello"})` works +- [ ] Positional args: `app.chat.send_channel_message("x", "hello")` maps to params +- [ ] Invalid function: `app.chat.nonexistent()` returns error with suggestions +- [ ] New-style integration tool executes via `Tool::execute()` +- [ ] Legacy tool executes via `handle(Request)` with snake_case→camelCase +- [ ] JSON auto-decoding: legacy JSON string → Lua table +- [ ] Call log records path, duration, status, icon, name, group +- [ ] Failed calls in call log with error message + +### PrismRelay (Custom LLM Providers) + +- [ ] GLM — agent generates response +- [ ] GLM Coding — agent generates response +- [ ] Kimi — agent generates response +- [ ] Kimi Coding — agent generates response +- [ ] MiniMax — agent generates response +- [ ] MiniMax CN — agent generates response +- [ ] No stored models → falls back to `ProviderMeta::defaultModel()` +- [ ] No custom URL → falls back to `ProviderMeta::url()` +- [ ] Unknown provider key → `InvalidArgumentException` + +### Monorepo Path & CI + +- [ ] `composer.json` path repo points to `../integrations/*` +- [ ] `.github/workflows/ci.yml` clones to `../integrations` +- [ ] `.claude/commands/create-integration.md` references `../integrations/` +- [ ] CI monorepo clone + `composer install` succeeds +- [ ] `php artisan test` passes --- -## 2. DASHBOARD (`/` or `/dashboard`) - -### Header -- [ ] Page title displays -- [ ] Subtitle displays - -### Stats Overview -- [ ] Agents Online stat card -- [ ] Pending Tasks stat card -- [ ] Unread Messages stat card -- [ ] Each stat shows correct number - -### Pending Approvals Section (if any) -- [ ] Approval cards display -- [ ] Approve button per item -- [ ] Reject button per item -- [ ] Amount display -- [ ] Requester info display -- [ ] View all link - -### Activity Feed -- [ ] Activity items load -- [ ] Activity type icons display -- [ ] Timestamps display -- [ ] User/agent avatars display -- [ ] Activity descriptions -- [ ] Load more (if > 20 items) - -### Quick Actions -- [ ] "Spawn Agent" button → opens modal -- [ ] "New Channel" button → opens modal -- [ ] "Create Task" button → opens modal -- [ ] "New Document" button → navigates - -### Working Agents Sidebar -- [ ] Agent cards display -- [ ] Agent status indicators (working/idle) -- [ ] Current task display -- [ ] Click agent → navigate to profile - -### Spawn Agent Modal -- [ ] Agent type selection (6 types: writer, analyst, researcher, creative, coder, coordinator) -- [ ] Agent name input -- [ ] Initial task textarea (optional) -- [ ] Behavior mode select (autonomous/supervised/strict) -- [ ] Ephemeral agent toggle -- [ ] Estimated cost display -- [ ] Cancel button -- [ ] Spawn Agent button -- [ ] Loading state on spawn +## 2. File Management System + +### Migrations & Models + +- [ ] `workspace_files` and `workspace_disks` tables exist +- [ ] Default "Local" disk auto-seeded per workspace +- [ ] `WorkspaceDisk.config` is encrypted at rest + +### File Browser UI (`/files`) + +- [ ] Page loads with grid view and file/folder icons +- [ ] Switch between grid and list view +- [ ] Create new folder (Cmd+Shift+N or toolbar) +- [ ] Upload file via drag-and-drop +- [ ] Upload file via toolbar button +- [ ] Inline rename (click name or press Enter) +- [ ] Move file to different folder +- [ ] Copy file +- [ ] Delete file (Delete key or context menu) +- [ ] Preview file (Space or click) — slideover opens +- [ ] Search files — results update as you type +- [ ] Select all (Cmd+A) +- [ ] Breadcrumb navigation +- [ ] Sidebar: disk list and folder tree + +### Storage Disks + +- [ ] Settings → Storage section visible +- [ ] Add new disk (S3 or SFTP) — config modal +- [ ] Test connection button returns success/failure +- [ ] Switch default disk +- [ ] Secrets masked in API responses (contain `****`) + +### Agent File Tools (10 tools) + +- [ ] `list_disks` — returns available disks +- [ ] `list_files` — files in folder +- [ ] `read_file` — file contents (text) or metadata+download URL (binary) +- [ ] `write_file` — creates/overwrites, auto-creates parent folders +- [ ] `create_folder` — creates with auto-intermediate +- [ ] `move_file` — moves between folders +- [ ] `copy_file` — copies file (not folders) +- [ ] `delete_file` — deletes file/folder (recursive for non-empty) +- [ ] `search_files` — by name/description, MIME filter +- [ ] `get_file_info` — metadata + +### Agent Permissions + +- [ ] Agent with no file_folder permissions can only access `/agents/{slug}/` home folder +- [ ] Grant folder access → agent can read/write that folder +- [ ] Revoke folder access → agent denied +- [ ] Wildcard `*` grants unrestricted access +- [ ] Behavior modes: `autonomous` (no approval), `supervised` (write needs approval), `strict` (all needs approval) +- [ ] Agent capabilities page shows file folder access section + +### Edge Cases + +- [ ] Overwrite: `write_file` to same path silently overwrites +- [ ] Copy folder: returns error (only files can be copied) +- [ ] Recursive delete without `recursive=true` on non-empty folder → error +- [ ] Agent name change: old slug folder still exists but doesn't match new home folder path +- [ ] Disk update with masked `****` values → preserves original secrets --- -## 3. CHAT (`/chat`) - -### Channel List Sidebar -- [ ] Channel items display -- [ ] Unread count badges -- [ ] Channel type icons (public/private/agent/dm/external) -- [ ] Selected channel highlight -- [ ] "New Channel" button -- [ ] Search channels (if available) - -### Create Channel Modal -- [ ] Channel type selection (public/private/agent/dm/external) -- [ ] Channel name input (validation: lowercase, hyphens) -- [ ] Description textarea -- [ ] Member search input -- [ ] Available members list -- [ ] Selected members chips with X buttons -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Chat Area -- [ ] Channel header with name -- [ ] Member count display -- [ ] Pinned messages button with count -- [ ] Members info button -- [ ] Messages load correctly -- [ ] Message grouping by author -- [ ] Date separators display -- [ ] Avatar display per message -- [ ] Timestamp per message -- [ ] Scroll to bottom on new messages -- [ ] Load more old messages (scroll up) - -### Message Features -- [ ] Hover actions appear on messages -- [ ] React to message (emoji picker) -- [ ] Reply to message (thread) -- [ ] Pin message button -- [ ] Edit own message -- [ ] Delete own message -- [ ] Message reactions display -- [ ] Reaction counts +## 3. Automation / Script System -### Message Input -- [ ] Textarea for typing -- [ ] Auto-resize on multi-line -- [ ] Attach file button (+) -- [ ] Emoji picker button -- [ ] Mention button (@) -- [ ] Send button -- [ ] Enter to send (Shift+Enter for newline) -- [ ] Character counter (if enabled) -- [ ] Format toolbar (bold, italic, code, etc.) -- [ ] @mention autocomplete popup -- [ ] Slash commands popup (/) -- [ ] Attachment preview with upload progress -- [ ] Reply-to banner (when replying) -- [ ] Cancel reply button -- [ ] Edit mode banner -- [ ] Cancel edit button - -### Channel Info Panel -- [ ] Toggle open/close -- [ ] Channel description -- [ ] Member list with avatars -- [ ] Member roles/types -- [ ] Add member button - -### Add Member Modal -- [ ] Search users input -- [ ] User list with selection checkboxes -- [ ] Selected count display -- [ ] Cancel button -- [ ] Add Members button - -### Pinned Messages Panel -- [ ] Toggle open/close -- [ ] Pinned messages list -- [ ] Click to jump to message -- [ ] Unpin button - -### Typing Indicator -- [ ] Shows when others typing -- [ ] Multiple users typing text +### Create Automation ---- +- [ ] Navigate to Automation → Create +- [ ] Toggle Prompt/Script mode — UI switches (agent selector ↔ Monaco editor) +- [ ] Script mode shows "Luau" badge and API Reference link +- [ ] Create prompt automation — agent, schedule, enable +- [ ] Create script automation with `--!strict` header — schedule, enable +- [ ] Script without `--!strict` header → validation error -## 4. DIRECT MESSAGES - -> **Note:** `/messages` now redirects to `/chat`. DMs are part of the unified chat interface and appear as `dm` type channels in the channel list. - -### DM Conversations (via `/chat`) -- [ ] DM channels appear in channel list -- [ ] DM channel type icon distinct from other types -- [ ] "New Message" button -- [ ] Search conversations input -- [ ] Avatar per conversation -- [ ] Last message preview -- [ ] Time ago display -- [ ] Unread count badges -- [ ] Click to open conversation -- [ ] Loading skeleton state -- [ ] Empty state if no conversations - -### New Message Modal -- [ ] Recipient select dropdown -- [ ] User/agent list with type labels -- [ ] Cancel button -- [ ] Start Chat button - -### Conversation View (`/messages/{id}`) -- [ ] Floating header with back button -- [ ] User/agent avatar and name -- [ ] User type label -- [ ] Status indicator (for agents) -- [ ] Settings/gear button (for agents) -- [ ] Profile link button -- [ ] Messages display -- [ ] Own messages right-aligned (dark bubble) -- [ ] Other messages left-aligned (light bubble) -- [ ] Avatar grouping (hide repeated) -- [ ] Timestamps per message -- [ ] Markdown rendering (bold, italic, code, links, lists) -- [ ] Code blocks with syntax highlighting -- [ ] Typing indicator -- [ ] Message input textarea -- [ ] Auto-resize input -- [ ] Send button -- [ ] Loading state on send -- [ ] Empty state for new conversations +### Run Automation ---- +- [ ] Prompt automation runs at scheduled time → task created, agent responds +- [ ] Script automation runs at scheduled time → task created, Luau executes, output posted to channel +- [ ] `ctx` table populated: `ctx.automation_id`, `ctx.run_number`, `ctx.last_run_at`, `ctx.schedule` +- [ ] Script calls `app.*` API: workspace tools work +- [ ] Script with syntax error → task shows failure status, error captured +- [ ] Auto-disable: trigger 5 consecutive failures → automation sets `is_active = false` -## 5. TASKS (`/tasks`) - -### Header -- [ ] Page title "Tasks" -- [ ] Filter tabs by status (All/Active/Pending/Completed/Failed) -- [ ] Filter by agent -- [ ] Filter by priority -- [ ] Filter by type -- [ ] "Create Task" button - -### Task List -- [ ] Task rows display -- [ ] Task title -- [ ] Type badge (ticket/request/analysis/content/research/custom) -- [ ] Status badge with color (pending/active/paused/completed/failed/cancelled) -- [ ] Priority badge (low/medium/high/urgent) -- [ ] Assigned agent with avatar -- [ ] Due date display -- [ ] Click to open task detail - -### Create Task Modal -- [ ] Title input (required) -- [ ] Description textarea -- [ ] Type select (ticket/request/analysis/content/research/custom) -- [ ] Priority select (low/medium/high/urgent) -- [ ] Agent assignment select -- [ ] Due date input -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Task Detail View (`/tasks/{id}`) -- [ ] Task title display -- [ ] Type badge -- [ ] Status badge with color -- [ ] Priority badge -- [ ] Description display -- [ ] Assigned agent with avatar -- [ ] Requester info -- [ ] Channel link (if linked) -- [ ] Due date -- [ ] Created/started/completed timestamps -- [ ] Lifecycle action buttons: - - [ ] Start button (pending → active) - - [ ] Pause button (active → paused) - - [ ] Resume button (paused → active) - - [ ] Complete button (active → completed) - - [ ] Fail button (active → failed) - - [ ] Cancel button (any → cancelled) - -### Task Steps -- [ ] Steps list display -- [ ] Step description -- [ ] Step type badge (action/decision/approval/sub_task/message) -- [ ] Step status indicator (pending/in_progress/completed/skipped) -- [ ] Step timestamps -- [ ] Step metadata display - -### Sub-Tasks -- [ ] Sub-task list (if parent task) -- [ ] Sub-task status indicators -- [ ] Click to open sub-task +### Run History & Edit ---- +- [ ] Edit page shows run history with status dots +- [ ] "Run" button triggers manual execution +- [ ] Run detail modal: status, duration, output +- [ ] Link to task from run details -## 6. LISTS (`/lists`) - -### Header -- [ ] Page title "Lists" -- [ ] View mode tabs (Board/List) -- [ ] Filter dropdown -- [ ] "Create Item" button - -### Board View (Kanban) -- [ ] Backlog column with count -- [ ] In Progress column with count -- [ ] Done column with count -- [ ] Item cards in each column -- [ ] Drag and drop between columns -- [ ] Item card: title, priority badge, assignee avatar, cost - -### List View -- [ ] Item rows in table format -- [ ] Sortable columns -- [ ] Item details visible - -### Create Item Modal -- [ ] Title input (required) -- [ ] Description textarea -- [ ] Status select (backlog/in_progress/done) -- [ ] Priority select (low/medium/high/urgent) -- [ ] Assignee select (grouped: agents/humans) -- [ ] Estimated cost input -- [ ] Channel select (optional) -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Item Detail Slideover -- [ ] Item title display -- [ ] Edit button → edit mode -- [ ] Close (X) button -- [ ] Status badge with color -- [ ] Priority badge with color -- [ ] Description display -- [ ] Assignee with avatar -- [ ] Cost display -- [ ] Created date -- [ ] Completed date (if done) -- [ ] Mark Complete button -- [ ] Reopen button (if done) -- [ ] Delete button -- [ ] Collaborators section -- [ ] Comments section -- [ ] Add comment input -- [ ] Comment list -- [ ] Delete comment (hover reveal) -- [ ] Edit mode: editable title -- [ ] Edit mode: editable description -- [ ] Edit mode: status select -- [ ] Edit mode: priority select -- [ ] Edit mode: cost input -- [ ] Save/Cancel buttons in edit mode +### Task Source Labels ---- +- [ ] Chat message → "Chat" label +- [ ] Manual run → "Manual" label +- [ ] Prompt automation → "Automation" label +- [ ] Script automation → "Automation" + "Luau Script" badge +- [ ] Delegated task → "Delegated" label -## 7. DOCUMENTS (`/docs`) - -### Document List Sidebar -- [ ] Search documents input -- [ ] Document tree/list display -- [ ] Document icons -- [ ] Selected document highlight -- [ ] "New Document" button -- [ ] Folder structure (if any) - -### Document Viewer/Editor -- [ ] Document title display -- [ ] Edit button -- [ ] Version history button -- [ ] Comments toggle button -- [ ] Attachments button -- [ ] Document content display -- [ ] Markdown rendering -- [ ] Code blocks with highlighting -- [ ] Edit mode: textarea/editor -- [ ] Save button in edit mode -- [ ] Cancel edit button - -### Version History Panel -- [ ] Version list display -- [ ] Version timestamps -- [ ] Version author -- [ ] Change description -- [ ] View diff button per version -- [ ] Restore version button -- [ ] Current version indicator - -### Diff Viewer Modal -- [ ] Side-by-side diff view -- [ ] Additions highlighted (green) -- [ ] Deletions highlighted (red) -- [ ] Version labels -- [ ] Close button - -### Comments Panel -- [ ] Comments list -- [ ] Comment author avatars -- [ ] Comment timestamps -- [ ] Reply to comment -- [ ] Resolve comment button -- [ ] Resolved comments section -- [ ] Add comment input -- [ ] Submit comment button - -### Attachments Panel -- [ ] Attachments list -- [ ] File icons -- [ ] File names -- [ ] Download button per file -- [ ] Delete button per file -- [ ] Upload attachment button +### Edge Cases ---- +- [ ] `keep_history = false`: channel messages cleared before each run (destructive — verify intended) +- [ ] Script runs synchronously within `RunAutomationJob` — verify timeout handling +- [ ] Retry guard: on `attempts() > 1`, skips if recent message exists in last 30 min +- [ ] Channel auto-creation: no `channel_id` → creates DM with creator + agent -## 8. ACTIVITY (`/activity`) +--- -### Header -- [ ] Page title -- [ ] Filter options +## 4. Chat UI -### Filter Panel -- [ ] Activity type filters (messages/tasks/approvals/agents/errors) -- [ ] User filter dropdown -- [ ] Date range filters (today/week/month/all) +### Channel List -### Activity Timeline -- [ ] Activity items display -- [ ] Type icons per activity -- [ ] User/agent avatars -- [ ] Timestamps -- [ ] Activity descriptions -- [ ] Metadata (task titles, amounts, channels) -- [ ] Load more button -- [ ] Empty state if no activities +- [ ] Filter chips work: All, Unread, DMs, Channels, External +- [ ] Search bar expands/collapses +- [ ] Pinned channels section +- [ ] Compose dropdown: New Message, New Channel, Connect External ---- +### Messages -## 9. APPROVALS (`/approvals`) - -### Header -- [ ] Page title -- [ ] Filter tabs with counts - -### Filter Tabs -- [ ] All tab -- [ ] Pending tab (with count) -- [ ] Approved tab -- [ ] Rejected tab - -### Approval List -- [ ] Approval cards display -- [ ] Request title -- [ ] Description -- [ ] Amount display -- [ ] Requester info with avatar -- [ ] Status badge -- [ ] Approve button (pending only) -- [ ] Reject button (pending only) -- [ ] Responder info (approved/rejected) -- [ ] Response timestamp -- [ ] Loading state -- [ ] Empty state per filter +- [ ] Telegram-style bubbles with grouped border-radius +- [ ] Consecutive same-sender messages visually grouped (5-min window) +- [ ] Inline timestamps on hover +- [ ] Code blocks with syntax highlighting +- [ ] Image messages inline with lightbox (zoom, pan, drag) +- [ ] Approval cards render correctly +- [ ] Reactions display and can be toggled +- [ ] Thread previews expandable +- [ ] Hover actions: emoji, reply, thread, more ---- +### Message Input -## 10. AUTOMATION (`/automation`) - -### Header -- [ ] Page title -- [ ] Tab navigation - -### Task Templates Tab -- [ ] Templates list display -- [ ] "New Template" button -- [ ] Template cards with: - - [ ] Template name - - [ ] Default title - - [ ] Priority badge - - [ ] Default assignee - - [ ] Estimated cost - - [ ] Tags display - - [ ] Edit button - - [ ] Delete button - - [ ] Use template button - -### Template Modal (Create/Edit) -- [ ] Template name input -- [ ] Default title input -- [ ] Default priority select -- [ ] Default assignee select -- [ ] Estimated cost input -- [ ] Tags input -- [ ] Cancel button -- [ ] Save button - -### Automation Rules Tab -- [ ] Rules list display -- [ ] "New Rule" button -- [ ] Rule cards with: - - [ ] Rule name - - [ ] Trigger type - - [ ] Action type - - [ ] Template association - - [ ] Enabled/disabled toggle - - [ ] Trigger count - - [ ] Edit button - - [ ] Delete button - -### Rule Modal (Create/Edit) -- [ ] Rule name input -- [ ] Trigger type select (task created/completed/assigned/approval) -- [ ] Action type select (create task/assign/notify/spawn agent) -- [ ] Template select (if action = create task) -- [ ] Enabled toggle -- [ ] Cancel button -- [ ] Save button +- [ ] Drag-and-drop file upload +- [ ] @mentions popup with fuzzy search (keyboard nav: up/down/enter/escape) +- [ ] /commands popup +- [ ] Formatting toolbar: bold, italic, strikethrough, code, codeblock, quote, link +- [ ] Emoji picker opens and inserts +- [ ] Attachment preview grid with progress +- [ ] Character count for long messages ---- +### Channel Info -## 11. ORGANIZATION (`/org`) - -### Header -- [ ] Page title -- [ ] Subtitle - -### View Mode Toggle -- [ ] Tree View button -- [ ] Chart View button -- [ ] Active state on selected - -### Tree View -- [ ] Tree structure displays -- [ ] Node cards with avatars -- [ ] Agent type badges -- [ ] Status indicators (working/idle) -- [ ] Current task display -- [ ] Email for humans -- [ ] Ephemeral badge if applicable -- [ ] Expand/collapse children -- [ ] Expand indicator with count -- [ ] Click to expand/collapse -- [ ] Keyboard navigation (Tab, Enter, Space) -- [ ] Focus ring on keyboard focus -- [ ] Profile link per node - -### Chart View -- [ ] Horizontal org chart displays -- [ ] Node cards with avatars -- [ ] Connector lines between nodes -- [ ] Root node highlighted -- [ ] Agent/human icons -- [ ] Ephemeral badge -- [ ] Focus indicator on cards -- [ ] Profile link per node - -### Stats Section -- [ ] Total Members stat card -- [ ] Humans stat card -- [ ] Agents stat card -- [ ] Active Agents stat card -- [ ] Correct counts displayed +- [ ] Collapsible sections: About, Pinned, Shared Media, Shared Files, Members +- [ ] Members filter: All / Humans / Agents / Online +- [ ] Notification settings (all/mentions/none) --- -## 12. WORKLOAD (`/workload`) - -### Summary Cards -- [ ] Active Agents card -- [ ] Current Tasks card -- [ ] Completed Today card -- [ ] Average Efficiency card - -### Agent Workload Cards -- [ ] Agent cards display -- [ ] Agent avatar with status -- [ ] Agent name and type -- [ ] Workload score bar -- [ ] Efficiency percentage -- [ ] Tasks in progress count -- [ ] Tasks pending count -- [ ] Tasks completed count -- [ ] Total cost display -- [ ] Status badge - -### Auto-refresh -- [ ] Data refreshes every 30 seconds -- [ ] Loading indicator on refresh +## 5. Telegram / File Forwarding + +- [ ] Agent sends inline workspace file URL → forwarded as Telegram document +- [ ] Agent sends PDF attachment → forwarded as Telegram document (not dropped) +- [ ] Agent sends image attachment → forwarded as Telegram photo +- [ ] File URLs detected in: markdown link, bare URL, image embed formats +- [ ] No echo loop: external messages not re-forwarded to external +- [ ] Content stripping: image URLs removed from text after sending as attachment --- -## 13. CALENDAR (`/calendar`) - -### Sidebar -- [ ] Mini calendar display -- [ ] Date selection -- [ ] Today highlight -- [ ] Month navigation - -### View Mode Buttons -- [ ] Month view button -- [ ] Week view button -- [ ] Day view button - -### Calendar Grid -- [ ] Month view: full month grid -- [ ] Week view: 7 days with hours -- [ ] Day view: single day with hours -- [ ] Events display on dates -- [ ] Click date to create event -- [ ] Click event to view/edit - -### Navigation -- [ ] Previous period button -- [ ] Next period button -- [ ] Today button -- [ ] Period label (dynamic) - -### Event Modal -- [ ] Event title input -- [ ] Date/time inputs -- [ ] Description textarea -- [ ] Cancel button -- [ ] Save button -- [ ] Delete button (edit mode) +## 6. LLM Providers & Token Metrics ---- +### Token Metrics -## 14. SETTINGS (`/settings`) - -### Organization Settings -- [ ] Organization name input -- [ ] Organization email input -- [ ] Timezone select -- [ ] Save button - -### Agent Defaults -- [ ] Default behavior mode select -- [ ] Cost limit input -- [ ] Auto-spawn toggle -- [ ] Save button - -### Action Policies -- [ ] Policies list -- [ ] "Add Policy" button -- [ ] Policy card: pattern, threshold, approval level -- [ ] Edit policy button -- [ ] Delete policy button - -### Policy Modal -- [ ] Pattern input -- [ ] Cost threshold input -- [ ] Approval level select -- [ ] Cancel button -- [ ] Save button - -### Notifications -- [ ] Email notifications toggle -- [ ] Slack notifications toggle -- [ ] Daily summary toggle -- [ ] Save button - -### Danger Zone -- [ ] Pause all agents button -- [ ] Reset agent memory button -- [ ] Delete organization button -- [ ] Confirmation dialogs for each +- [ ] Chat task → token counts recorded (prompt, completion, cache read/write) +- [ ] Automation task → same metrics +- [ ] Tool call count recorded +- [ ] Generation time and tokens/second computed ---- +### Analytics Dashboard -## 15. INTEGRATIONS (`/integrations`) - -### Webhooks Section -- [ ] Webhooks list -- [ ] "Create Webhook" button -- [ ] Webhook cards: - - [ ] URL display - - [ ] Target/events display - - [ ] Enabled/disabled toggle - - [ ] Last triggered date - - [ ] Call count - - [ ] Edit button - - [ ] Delete button - -### Webhook Modal -- [ ] URL input -- [ ] Target selection -- [ ] Events multiselect -- [ ] Cancel button -- [ ] Save button - -### API Keys Section -- [ ] API keys list -- [ ] "Generate Key" button -- [ ] Key cards: - - [ ] Key name - - [ ] Masked key value - - [ ] Copy button - - [ ] Revoke button - - [ ] Created date - -### Connected Services -- [ ] Services list/grid -- [ ] Service icons -- [ ] Service names -- [ ] Connection status -- [ ] Connect/Disconnect buttons +- [ ] Charts load on analytics page +- [ ] Breakdown by agent — correct +- [ ] Breakdown by model — correct +- [ ] Breakdown by source — correct +- [ ] Icons render (regression check) ---- +### Workspace Context -## 16. TABLES (`/tables`) - -### Header -- [ ] Page title -- [ ] "New Table" button - -### Tables Grid -- [ ] Table cards display -- [ ] Table icons -- [ ] Table names -- [ ] Descriptions -- [ ] Row counts -- [ ] Column counts -- [ ] Click to open table -- [ ] Delete button per table - -### Create Table Modal -- [ ] Table name input -- [ ] Description textarea -- [ ] Icon selection (optional) -- [ ] Cancel button -- [ ] Create button - -### Empty State -- [ ] Empty state message -- [ ] Create table button +- [ ] Task in Workspace A → belongs to Workspace A +- [ ] Task in Workspace B → belongs to Workspace B +- [ ] No context leak between queued jobs --- -## 17. TABLE VIEW (`/tables/{id}`) - -### Header -- [ ] Back button -- [ ] Table icon -- [ ] Table name -- [ ] Table description -- [ ] "Add Column" button -- [ ] "Add Row" button - -### Toolbar -- [ ] Search rows input -- [ ] Selected count display -- [ ] Bulk delete button (when selected) -- [ ] Row count display - -### Table Grid -- [ ] Column headers -- [ ] Column type indicators -- [ ] Column menu button (hover) -- [ ] Row selection checkboxes -- [ ] Cell data display per type: - - [ ] Text: inline edit - - [ ] Number: inline edit - - [ ] Date: date picker - - [ ] Checkbox: toggle - - [ ] Select: dropdown - - [ ] Multiselect: tags with add/remove - - [ ] URL: link display, edit button - - [ ] Email: mailto link, edit button -- [ ] Row actions menu (hover) -- [ ] Delete row button - -### Column Menu -- [ ] Edit column option -- [ ] Delete column option - -### Add Column Modal -- [ ] Column name input -- [ ] Column type selection grid -- [ ] Type descriptions -- [ ] Options input (for select/multiselect) -- [ ] Required toggle -- [ ] Cancel button -- [ ] Add Column button - -### Edit Column Modal -- [ ] Pre-filled column name -- [ ] Type change warning -- [ ] Options editing -- [ ] Cancel button -- [ ] Save Changes button - -### Bulk Delete Confirmation -- [ ] Confirmation message with count -- [ ] Cancel button -- [ ] Delete Rows button +## 7. Security ---- +### Workspace Scoping -## 18. AGENT PROFILE (`/agent/{id}`) - -### Header -- [ ] Agent avatar with status -- [ ] Agent name -- [ ] Agent type badge -- [ ] Status badge (working/idle/paused) -- [ ] Emoji display -- [ ] Current task display -- [ ] Message button -- [ ] Pause/Resume button - -### Tabs -- [ ] Overview tab -- [ ] Personality tab -- [ ] Instructions tab -- [ ] Capabilities tab -- [ ] Memory tab -- [ ] Activity tab -- [ ] Settings tab - -### Overview Tab -- [ ] Agent summary -- [ ] Recent activity -- [ ] Quick stats - -### Personality Tab -- [ ] Personality editor textarea -- [ ] Save button - -### Instructions Tab -- [ ] Instructions editor textarea -- [ ] Save button - -### Capabilities Tab -- [ ] Capabilities list -- [ ] Capability enabled/disabled status -- [ ] Approval tracking per capability - -### Memory Tab -- [ ] Memory entries list -- [ ] Add memory button -- [ ] Clear memory button - -### Activity Tab -- [ ] Activity log -- [ ] Activity type icons -- [ ] Timestamps -- [ ] Load more - -### Settings Tab -- [ ] Agent-specific settings -- [ ] Session management -- [ ] Save button +- [ ] Agent in Workspace A cannot access Workspace B data +- [ ] API returns 403 for cross-workspace resource access +- [ ] File API workspace-scoped ---- +### IDOR Prevention -## 19. USER PROFILE (`/profile/{id}`) - -### Header -- [ ] User avatar -- [ ] User name -- [ ] User type badge (human/agent) -- [ ] Email display -- [ ] Ephemeral indicator (if agent) -- [ ] Status display -- [ ] Current task (if agent) -- [ ] Message button -- [ ] Manage Agent button (if agent) - -### Tabs -- [ ] Activity tab -- [ ] Tasks tab - -### Activity Tab -- [ ] Activity steps list -- [ ] Status indicators -- [ ] Timestamps - -### Tasks Tab -- [ ] Assigned tasks list -- [ ] Task status badges -- [ ] Click to open task +- [ ] User cannot update another user's profile +- [ ] User cannot update another user's presence ---- +### XSS Prevention + +- [ ] Message with `` renders as text, doesn't execute +- [ ] Agent name with HTML entities renders safely +- [ ] File names with special characters render safely +- [ ] Markdown output sanitized via DOMPurify (verify `