From b811bb61b9a90c86d6be4d7b0dd7af360d78bb0e Mon Sep 17 00:00:00 2001 From: Aymen Date: Mon, 6 Apr 2026 22:52:55 +0200 Subject: [PATCH 1/5] feat: move to bicep & BYOK --- .gitignore | 4 + Dockerfile | 5 +- app/frontend/e2e/helpers.ts | 10 +- app/frontend/src/components/Sidebar.tsx | 4 +- app/frontend/src/components/TopBar.tsx | 2 +- app/frontend/src/hooks/useStatus.ts | 2 +- app/frontend/src/mock-server.ts | 5 +- .../src/pages/InfrastructureSettings.tsx | 658 +++--- app/frontend/src/pages/MessagingSettings.tsx | 109 +- app/frontend/src/pages/SetupWizard.tsx | 440 ++-- app/frontend/src/pages/Skills.tsx | 2 +- app/frontend/src/styles/global.css | 85 + app/frontend/src/types.ts | 4 +- app/runtime/agent/agent.py | 113 +- app/runtime/agent/aitl.py | 3 - app/runtime/agent/byok.py | 84 + app/runtime/agent/one_shot.py | 15 +- app/runtime/config/settings.py | 14 +- app/runtime/registries/catalog.py | 5 - app/runtime/server/app_routes.py | 2 +- app/runtime/server/lifecycle.py | 5 +- .../server/routes/content_safety_routes.py | 120 +- .../server/routes/foundry_iq_routes.py | 272 +-- app/runtime/server/routes/identity_routes.py | 157 +- .../server/routes/monitoring_routes.py | 196 +- app/runtime/server/routes/network_topology.py | 8 +- app/runtime/server/routes/sandbox_routes.py | 126 +- app/runtime/server/routes/skill_routes.py | 6 +- app/runtime/server/setup/__init__.py | 4 +- app/runtime/server/setup/_routes.py | 97 +- app/runtime/server/setup/azure.py | 25 +- app/runtime/server/setup/foundry.py | 190 ++ app/runtime/server/setup/prerequisites.py | 149 +- app/runtime/server/setup_voice.py | 477 ---- app/runtime/server/smoke_test.py | 6 +- app/runtime/server/wiring.py | 20 +- app/runtime/services/cloud/azure.py | 67 +- app/runtime/services/cloud/github.py | 4 +- .../services/cloud/runtime_identity.py | 14 +- app/runtime/services/deployment/__init__.py | 6 +- .../services/deployment/aca_deployer.py | 4 +- .../services/deployment/aca_provision.py | 12 +- .../services/deployment/bicep_deployer.py | 867 ++++++++ app/runtime/services/foundry_iq.py | 14 +- app/runtime/services/otel.py | 38 +- .../services/security/preflight_secrets.py | 48 - app/runtime/state/_base.py | 18 +- app/runtime/state/guardrails/risk.py | 17 +- app/runtime/state/guardrails_config.py | 500 ----- app/runtime/state/infra_config.py | 22 + app/runtime/state/monitoring_config.py | 22 + app/runtime/state/sandbox_config.py | 22 + app/runtime/tests/conftest.py | 14 +- app/runtime/tests/test_bicep_deploy.py | 437 ++++ .../tests/test_content_safety_routes.py | 261 +-- app/runtime/tests/test_e2e_aca_deploy.py | 539 +++++ app/runtime/tests/test_e2e_bicep_azure.py | 509 +++++ app/runtime/tests/test_e2e_setup_process.py | 1912 +++++++++++++++++ .../test_guardrails_policy_validation.py | 80 +- app/runtime/tests/test_guardrails_presets.py | 54 +- app/runtime/tests/test_monitoring.py | 46 +- app/runtime/tests/test_restart_survival.py | 632 ++++++ app/runtime/tests/test_settings.py | 2 +- app/runtime/tests/test_smoke_test.py | 10 +- app/tui/src/api/client.ts | 3 - app/tui/src/config/constants.ts | 2 - app/tui/src/config/types.ts | 1 - app/tui/src/deploy/aca.ts | 3 + app/tui/src/deploy/docker.ts | 46 +- app/tui/src/index.ts | 264 ++- app/tui/src/screens/dashboard.ts | 2 - app/tui/src/screens/setup.ts | 55 +- app/tui/src/ui/app.ts | 4 +- app/tui/src/ui/tui.ts | 19 +- app/tui/tests/types.test.ts | 1 - conftest.py | 25 +- e2e_secrets.env.example | 20 + entrypoint.sh | 35 +- infra/main.bicep | 428 ++++ infra/main.json | 489 +++++ pyproject.toml | 1 + 81 files changed, 7986 insertions(+), 3007 deletions(-) create mode 100644 app/runtime/agent/byok.py create mode 100644 app/runtime/server/setup/foundry.py delete mode 100644 app/runtime/server/setup_voice.py create mode 100644 app/runtime/services/deployment/bicep_deployer.py delete mode 100644 app/runtime/state/guardrails_config.py create mode 100644 app/runtime/tests/test_bicep_deploy.py create mode 100644 app/runtime/tests/test_e2e_aca_deploy.py create mode 100644 app/runtime/tests/test_e2e_bicep_azure.py create mode 100644 app/runtime/tests/test_e2e_setup_process.py create mode 100644 app/runtime/tests/test_restart_survival.py create mode 100644 e2e_secrets.env.example create mode 100644 infra/main.bicep create mode 100644 infra/main.json diff --git a/.gitignore b/.gitignore index 4353fd7..1985128 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,10 @@ Thumbs.db todo .env video +e2e_secrets.env + +# Docker compose override (auto-generated by TUI) +docker-compose.override.yml # Hugo docs/public/ diff --git a/Dockerfile b/Dockerfile index a78f486..bb7010a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 \ libxfixes3 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 \ libasound2 libatspi2.0-0 libxshmfence1 \ + libicu76 \ && rm -rf /var/lib/apt/lists/* # Node.js (for Playwright MCP via npx) + Copilot CLI @@ -33,7 +34,8 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ && rm -rf /var/lib/apt/lists/* # Azure CLI (for automated bot provisioning) -RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash +RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash \ + && az bicep install # Docker CLI only (no daemon) -- used to push the locally-built image to ACR RUN install -m 0755 -d /etc/apt/keyrings \ @@ -77,6 +79,7 @@ COPY app/cli/ app/cli/ RUN pip install --no-cache-dir --no-deps -e . COPY skills/ skills/ COPY plugins/ plugins/ +COPY infra/ infra/ # Embed the built frontend at the path _FRONTEND_DIR resolves to COPY --from=frontend-build /build/dist/ /app/frontend/dist/ diff --git a/app/frontend/e2e/helpers.ts b/app/frontend/e2e/helpers.ts index fc62dbf..a9fd5b8 100644 --- a/app/frontend/e2e/helpers.ts +++ b/app/frontend/e2e/helpers.ts @@ -12,7 +12,7 @@ import { type Page } from '@playwright/test' export const MOCK_STATUS = { azure: { logged_in: true, user: 'test@example.com', subscription: 'sub-123' }, - copilot: { authenticated: true, username: 'testuser' }, + foundry: { deployed: true, endpoint: 'https://mock-foundry.cognitiveservices.azure.com', name: 'mock-foundry' }, prerequisites_configured: true, telegram_configured: false, tunnel: { active: false, url: '' }, @@ -23,7 +23,7 @@ export const MOCK_STATUS = { export const MOCK_STATUS_NEEDS_SETUP = { azure: { logged_in: false }, - copilot: { authenticated: false }, + foundry: { deployed: false }, prerequisites_configured: false, telegram_configured: false, tunnel: { active: false }, @@ -453,12 +453,6 @@ export async function mockApi(page: Page) { await page.route('**/api/setup/azure/login', route => route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ status: 'ok' }) }), ) - await page.route('**/api/setup/copilot/login', route => - route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ status: 'ok', user_code: 'ABC-123', verification_uri: 'https://github.com/login/device' }) }), - ) - await page.route('**/api/setup/copilot/status', route => - route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ authenticated: true }) }), - ) await page.route('**/api/setup/configuration/save', route => route.fulfill({ status: 200, contentType: 'application/json', body: JSON.stringify({ status: 'ok' }) }), ) diff --git a/app/frontend/src/components/Sidebar.tsx b/app/frontend/src/components/Sidebar.tsx index e68dc93..5d79cda 100644 --- a/app/frontend/src/components/Sidebar.tsx +++ b/app/frontend/src/components/Sidebar.tsx @@ -15,7 +15,7 @@ const NAV_ITEMS = [ { to: '/mcp', icon: '🔌', label: 'MCP Servers' }, { to: '/schedules', icon: '📅', label: 'Schedules' }, { to: '/profile', icon: '👤', label: 'Profile' }, - { to: '/messaging', icon: '✉️', label: 'Messaging' }, + { to: '/messaging', icon: '✉️', label: 'AI Model' }, { to: '/infrastructure', icon: '🏗️', label: 'Infrastructure' }, { to: '/guardrails', icon: '🛡️', label: 'Hardening' }, { to: '/tool-activity', icon: '🔍', label: 'Tool Activity' }, @@ -53,7 +53,7 @@ export default function Sidebar({ status, collapsed, onToggle }: Props) { {!collapsed && status && (
- +
diff --git a/app/frontend/src/components/TopBar.tsx b/app/frontend/src/components/TopBar.tsx index d8189dd..6f03abe 100644 --- a/app/frontend/src/components/TopBar.tsx +++ b/app/frontend/src/components/TopBar.tsx @@ -10,7 +10,7 @@ interface Props { const LINKS = [ { path: '/customization', label: 'Customization', Icon: IconPalette }, - { path: '/messaging', label: 'Messaging', Icon: IconSliders }, + { path: '/messaging', label: 'AI Model', Icon: IconSliders }, { path: '/infrastructure', label: 'Infrastructure', Icon: IconSliders }, { path: '/guardrails', label: 'Hardening', Icon: IconShield }, { path: '/tool-activity', label: 'Tool Activity', Icon: IconActivity }, diff --git a/app/frontend/src/hooks/useStatus.ts b/app/frontend/src/hooks/useStatus.ts index 565ff5f..66193b5 100644 --- a/app/frontend/src/hooks/useStatus.ts +++ b/app/frontend/src/hooks/useStatus.ts @@ -20,7 +20,7 @@ export function useStatus(intervalMs = 30_000) { }, [refresh, intervalMs]) const needsSetup = status - ? !(status.azure?.logged_in && status.copilot?.authenticated) + ? !(status.azure?.logged_in && status.foundry?.deployed) : null return { status, refresh, needsSetup } diff --git a/app/frontend/src/mock-server.ts b/app/frontend/src/mock-server.ts index ed20030..93bc2df 100644 --- a/app/frontend/src/mock-server.ts +++ b/app/frontend/src/mock-server.ts @@ -14,7 +14,7 @@ import type { IncomingMessage, ServerResponse } from 'http' const STATUS = { azure: { logged_in: true, user: 'test@example.com', subscription: 'sub-123' }, - copilot: { authenticated: true, username: 'testuser' }, + foundry: { deployed: true, endpoint: 'https://mock-foundry.cognitiveservices.azure.com', name: 'mock-foundry' }, prerequisites_configured: true, telegram_configured: false, tunnel: { active: false, url: '' }, @@ -201,8 +201,7 @@ const routes: RouteEntry[] = [ { match: (u) => u === '/api/setup/status', respond: () => STATUS }, { match: (u) => u.startsWith('/api/setup/config'), respond: (_, m) => m === 'POST' ? { status: 'ok' } : CONFIG }, { match: (u) => u.startsWith('/api/setup/azure/login'), respond: () => ({ status: 'ok' }) }, - { match: (u) => u.startsWith('/api/setup/copilot/login'), respond: () => ({ status: 'ok', user_code: 'ABC-123', verification_uri: 'https://github.com/login/device' }) }, - { match: (u) => u.startsWith('/api/setup/copilot/status'), respond: () => ({ authenticated: true }) }, + { match: (u) => u.startsWith('/api/setup/configuration/save'), respond: () => ({ status: 'ok' }) }, { match: (u) => u.startsWith('/api/setup/tunnel/start'), respond: () => ({ status: 'ok' }) }, { match: (u) => u.startsWith('/api/setup/channels/'), respond: () => ({ status: 'ok' }) }, diff --git a/app/frontend/src/pages/InfrastructureSettings.tsx b/app/frontend/src/pages/InfrastructureSettings.tsx index 6722934..ddb0362 100644 --- a/app/frontend/src/pages/InfrastructureSettings.tsx +++ b/app/frontend/src/pages/InfrastructureSettings.tsx @@ -1,154 +1,200 @@ import { useState, useEffect, useCallback } from 'react' import { useNavigate } from 'react-router-dom' -import { api } from '../api' +import { api, getToken } from '../api' import { showToast } from '../components/Toast' import { EnvironmentsContent } from './Environments' import { WorkspaceContent } from './Workspace' import { FoundryIQContent } from './FoundryIQ' -import type { SetupStatus, FoundryIQConfig, MonitoringConfig } from '../types' +import type { SetupStatus } from '../types' type Tab = 'overview' | 'environments' | 'voice' | 'memory' | 'workspace' | 'monitoring' -interface PreflightCheck { - check: string - ok: boolean - detail: string - sub_checks?: { name: string; ok: boolean; detail: string }[] - endpoints?: { method: string; path: string; status: number | string; ok: boolean }[] +interface FoundryStatus { + deployed: boolean + foundry_endpoint: string + foundry_name: string + foundry_resource_group: string + deployed_models: string[] + key_vault_url: string + key_vault_name: string + content_safety_endpoint: string + content_safety_name: string + search_endpoint: string + search_name: string + embedding_aoai_endpoint: string + embedding_aoai_name: string + app_insights_name: string + session_pool_name: string + acs_name: string + bot_name: string + model: string } -interface PreflightResult { - status: string - checks: PreflightCheck[] +interface DeployConfig { + deploy_key_vault: boolean + deploy_acs: boolean + deploy_content_safety: boolean + deploy_search: boolean + deploy_embedding_aoai: boolean + deploy_monitoring: boolean + deploy_session_pool: boolean +} + +const RESOURCE_DEFS: { key: keyof DeployConfig; label: string; desc: string; tag?: string }[] = [ + { key: 'deploy_key_vault', label: 'Key Vault', desc: 'Secrets management (recommended)', tag: 'Core' }, + { key: 'deploy_content_safety', label: 'Content Safety', desc: 'Prompt Shields injection detection', tag: 'Recommended' }, + { key: 'deploy_acs', label: 'Communication Services', desc: 'Voice calling via ACS + OpenAI Realtime' }, + { key: 'deploy_search', label: 'AI Search', desc: 'Foundry IQ knowledge retrieval' }, + { key: 'deploy_embedding_aoai', label: 'Embedding Model', desc: 'Text embeddings for Foundry IQ' }, + { key: 'deploy_monitoring', label: 'Monitoring', desc: 'Application Insights + Log Analytics' }, + { key: 'deploy_session_pool', label: 'Session Pool', desc: 'Sandboxed code execution (experimental)' }, +] + +const DEFAULT_CONFIG: DeployConfig = { + deploy_key_vault: true, + deploy_acs: false, + deploy_content_safety: false, + deploy_search: false, + deploy_embedding_aoai: false, + deploy_monitoring: false, + deploy_session_pool: false, +} + +const _STEP_LABELS: Record = { + resource_group: 'Create resource group', + resolve_principal: 'Resolve identity', + ensure_runtime_sp: 'Provision service principal', + bicep_deploy: 'Deploy Azure resources (Bicep)', + extract_outputs: 'Extract deployment outputs', + persist_env: 'Persist environment variables', + configure_content_safety: 'Configure Content Safety', + configure_foundry_iq: 'Configure Foundry IQ', + create_search_index: 'Create search index', + configure_monitoring: 'Configure Monitoring', + configure_session_pool: 'Configure Session Pool', + configure_acs: 'Configure Communication Services', + persist_state: 'Save deployment record', + restart_runtime: 'Restart agent container', } -const CHECK_LABELS: Record = { - bot_credentials: 'Bot Credentials', - jwt_validation: 'JWT Validation', - tunnel: 'Tunnel', - tenant_id: 'Tenant ID', - endpoint_auth: 'Endpoint Auth', - telegram_security: 'Telegram Security', - acs_voice: 'ACS / Voice', - acs_callback_security: 'ACS Callback Security', +function prettyStepName(raw: string): string { + return _STEP_LABELS[raw] || raw.replace(/_/g, ' ') } export default function InfrastructureSettings() { const navigate = useNavigate() const [tab, setTab] = useState('overview') const [status, setStatus] = useState(null) - const [preflight, setPreflight] = useState(null) + const [fStatus, setFStatus] = useState(null) + const [config, setConfig] = useState(DEFAULT_CONFIG) const [loading, setLoading] = useState>({}) + const [deploySteps, setDeploySteps] = useState<{ step: string; status: string; detail?: string }[]>([]) const loadAll = useCallback(async () => { - try { - const s = await api('setup/status') - setStatus(s) - } catch { /* ignore */ } + try { setStatus(await api('setup/status')) } catch { /* ignore */ } + try { setFStatus(await api('setup/foundry/status')) } catch { /* ignore */ } }, []) useEffect(() => { loadAll() }, [loadAll]) - const runPreflight = async () => { - setLoading(p => ({ ...p, preflight: true })) - try { - const r = await api('setup/preflight') - setPreflight(r) - } catch (e: any) { showToast(e.message, 'error') } - setLoading(p => ({ ...p, preflight: false })) - } - - const startTunnel = async () => { - setLoading(p => ({ ...p, tunnel: true })) - try { - await api('setup/tunnel/start', { method: 'POST' }) - showToast('Tunnel started', 'success') - loadAll() - } catch (e: any) { showToast(e.message, 'error') } - setLoading(p => ({ ...p, tunnel: false })) - } - - const stopTunnel = async () => { - setLoading(p => ({ ...p, tunnel: true })) - try { - await api('setup/tunnel/stop', { method: 'POST' }) - showToast('Tunnel stopped', 'success') - loadAll() - } catch (e: any) { showToast(e.message, 'error') } - setLoading(p => ({ ...p, tunnel: false })) - } - - const deployInfra = async () => { + // Sync toggle state from deployed resources + useEffect(() => { + if (!fStatus) return + setConfig({ + deploy_key_vault: !!fStatus.key_vault_url, + deploy_acs: !!fStatus.acs_name, + deploy_content_safety: !!fStatus.content_safety_endpoint, + deploy_search: !!fStatus.search_endpoint, + deploy_embedding_aoai: !!fStatus.embedding_aoai_endpoint, + deploy_monitoring: !!fStatus.app_insights_name, + deploy_session_pool: !!fStatus.session_pool_name, + }) + }, [fStatus]) + + const handleDeploy = async () => { setLoading(p => ({ ...p, deploy: true })) + setDeploySteps([]) try { - await api('setup/infra/deploy', { method: 'POST' }) - showToast('Infrastructure deployment started', 'success') - loadAll() + const rg = fStatus?.foundry_resource_group || 'polyclaw-rg' + const configPayload = JSON.stringify({ resource_group: rg, ...config }) + const token = getToken() + const params = new URLSearchParams() + if (token) params.set('secret', token) + params.set('config', configPayload) + const url = `/api/setup/foundry/deploy/stream?${params.toString()}` + + await new Promise((resolve, reject) => { + const es = new EventSource(url) + es.onmessage = (e) => { + try { + const step = JSON.parse(e.data) + setDeploySteps(prev => [...prev, step]) + } catch { /* ignore parse errors */ } + } + es.addEventListener('done', (e) => { + es.close() + try { + const data = JSON.parse((e as MessageEvent).data) + if (data.status === 'ok') { + showToast('Infrastructure deployed successfully', 'success') + } else { + showToast(data.error || 'Deployment failed', 'error') + } + } catch { /* ignore */ } + resolve() + }) + es.onerror = () => { + es.close() + reject(new Error('Deployment stream disconnected')) + } + }) + await loadAll() } catch (e: any) { showToast(e.message, 'error') } setLoading(p => ({ ...p, deploy: false })) } - const decommission = async () => { - if (!confirm('Decommission infrastructure? This will delete cloud resources.')) return + const handleDecommission = async () => { + if (!confirm('Decommission all infrastructure? This will delete the resource group and all Azure resources.')) return setLoading(p => ({ ...p, decommission: true })) try { - await api('setup/infra/decommission', { method: 'POST' }) + await api('setup/foundry/decommission', { method: 'POST' }) showToast('Decommissioning started', 'success') - loadAll() + await loadAll() } catch (e: any) { showToast(e.message, 'error') } setLoading(p => ({ ...p, decommission: false })) } const restartContainer = async () => { - if (!confirm('Restart the agent container? This will briefly interrupt active sessions.')) return - setLoading(p => ({ ...p, containerRestart: true })) + if (!confirm('Restart the agent container?')) return + setLoading(p => ({ ...p, restart: true })) try { const res = await api<{ message: string }>('setup/container/restart', { method: 'POST' }) showToast(res.message || 'Agent container restarted', 'success') } catch (e: any) { showToast(e.message, 'error') } - setLoading(p => ({ ...p, containerRestart: false })) + setLoading(p => ({ ...p, restart: false })) } + const toggleConfig = (key: keyof DeployConfig) => { + setConfig(prev => ({ ...prev, [key]: !prev[key] })) + } return (

Infrastructure

- {status && ( -
- - - - -
- )} -
-

- Configuration changes require a container restart to take effect. -

- -
- -
{([ ['overview', 'Overview'], - ['memory', 'Memory / Foundry IQ'], ['environments', 'Environments'], ['voice', 'Voice'], + ['memory', 'Memory / Foundry IQ'], ['workspace', 'Workspace'], ['monitoring', 'Monitoring'], ] as [Tab, string][]).map(([t, label]) => ( @@ -158,188 +204,281 @@ export default function InfrastructureSettings() { ))}
- {/* Overview: Platform Status + Preflight + Provisioning */} {tab === 'overview' && ( <> - {status && ( -
-

Platform Status

-
-
Azure: {status.azure?.logged_in ? status.azure.subscription || 'Logged in' : 'Not logged in'}
-
GitHub Copilot: {status.copilot?.authenticated ? 'Authenticated' : 'Not authenticated'}
-
Tunnel: {status.tunnel?.active ? status.tunnel.url : 'Inactive'}
-
Bot: {status.bot_configured ? 'Configured' : 'Not configured'}
-
Voice: {status.voice_call_configured ? 'Configured' : 'Not configured'}
-
-
- )} - -
-

Preflight Checks

-

Security and readiness checks for your deployment.

- - - {preflight && ( -
- - {preflight.status === 'ok' ? 'All Checks Passed' : 'Warnings'} - - -
- {preflight.checks.map(c => ( -
-
- - {CHECK_LABELS[c.check] || c.check} - {c.detail} -
- - {c.sub_checks && c.sub_checks.length > 0 && ( -
- {c.sub_checks.filter(s => s.ok).length}/{c.sub_checks.length} sub-checks passed - {c.sub_checks.map(sc => ( -
- - {sc.name} - {sc.detail} -
- ))} -
- )} - - {c.endpoints && c.endpoints.length > 0 && ( -
- {c.endpoints.filter(e => e.ok).length}/{c.endpoints.length} endpoints secured - - - - {c.endpoints.map(ep => ( - - - - - - - ))} - -
MethodPathStatus
{ep.method}{ep.path}{ep.status}{ep.ok ? 'OK' : 'EXPOSED'}
-
- )} -
- ))} + {/* Deployed Resources */} + {fStatus?.deployed && ( +
+

Deployed Resources

+

Resource group: {fStatus.foundry_resource_group}

+
+ 0 ? `Models: ${fStatus.deployed_models.join(', ')}` : undefined} + /> + {fStatus.key_vault_name && ( + + )} + {fStatus.content_safety_name && ( + + )} + {fStatus.search_name && ( + + )} + {fStatus.embedding_aoai_name && ( + + )} + {fStatus.app_insights_name && ( + + )} + {fStatus.session_pool_name && ( + + )} + {fStatus.acs_name && ( + + )} + {fStatus.bot_name && ( + + )}
+ {fStatus.model && ( +

Active model: {fStatus.model}

+ )}
)} -
-
- {/* Tunnel Card */} -
-
-
- -
-
-

Tunnel

-

Cloudflare tunnel for exposing the bot endpoint publicly.

-
- - {status?.tunnel?.active ? 'Active' : 'Inactive'} - + {/* Runtime Status */} +
+

Runtime Status

+
+
Azure: {status?.azure?.logged_in ? (status.azure.subscription || 'Logged in') : 'Not logged in'}
+
Foundry: {fStatus?.deployed ? 'Deployed' : 'Not deployed'}
+
Tunnel: {status?.tunnel?.active ? <>Active {status.tunnel.url} : Inactive}
+
Bot: {status?.bot_configured ? Configured : Not configured}
- - {status?.tunnel?.active ? ( -
- {status.tunnel?.url && ( -
- - {status.tunnel.url} -
- )} - -
- ) : ( -
- -
- )}
- {/* Deploy / Decommission Cards */} - {status?.azure?.logged_in ? ( -
-
-
- -
-

Deploy Infrastructure

-

Provision Azure Bot Framework resources, register the bot channel, and wire up the messaging endpoint.

- + {/* Resource Configuration */} + {status?.azure?.logged_in && ( +
+

{fStatus?.deployed ? 'Update Resources' : 'Deploy Resources'}

+

Select which Azure resources to {fStatus?.deployed ? 'add to your deployment' : 'deploy'}. Foundry AI Services is always included.

+
+ {RESOURCE_DEFS.map(r => ( + + ))}
- -
-
- -
-

Decommission

-

Tear down all provisioned Azure resources. This is irreversible and will delete cloud infrastructure.

- + {fStatus?.deployed && ( + + )}
-
- ) : ( -
-
-
- -
-
-

Azure Login Required

-

Sign in to Azure to deploy or decommission infrastructure.

+ {deploySteps.length > 0 && ( +
+ {deploySteps.map((s, i) => ( +
+ {s.status === 'ok' ? '\u2713' : s.status === 'failed' ? '\u2717' : '\u2022'} + {prettyStepName(s.step)} + {s.detail && {s.detail}} +
+ ))} + {loading.deploy && ( +
+ + Working... +
+ )}
-
-
- -
+ )}
)} -
+ + {!status?.azure?.logged_in && ( +
+

Azure Login Required

+

Sign in to Azure to deploy or manage infrastructure.

+ +
+ )} + + {/* Channels + Bot Configuration */} + {fStatus?.deployed && ( + + )} )} - {/* Environments */} {tab === 'environments' && } + {tab === 'voice' && } + {tab === 'memory' && } + {tab === 'workspace' && } + {tab === 'monitoring' && } +
+ ) +} - {/* Voice */} - {tab === 'voice' && ( - - )} +function ResourceCard({ name, resource, detail, extra }: { name: string; resource: string; detail?: string; extra?: string }) { + return ( +
+
{name}
+
{resource}
+ {detail &&
{detail}
} + {extra &&
{extra}
} +
+ ) +} - {/* Memory / Foundry IQ */} - {tab === 'memory' && ( - - )} +function ChannelsCard({ status, onReload }: { status: SetupStatus | null; onReload: () => void }) { + const [loading, setLoading] = useState>({}) + const [showTgForm, setShowTgForm] = useState(false) + + const handleSaveTelegram = async (e: React.FormEvent) => { + e.preventDefault() + setLoading(p => ({ ...p, telegram: true })) + const fd = new FormData(e.currentTarget) + const token = (fd.get('telegram_token') as string || '').trim() + const whitelist = (fd.get('telegram_whitelist') as string || '').trim() + try { + await api('setup/channels/telegram/config', { + method: 'POST', + body: JSON.stringify({ token, whitelist }), + }) + showToast('Telegram configuration saved', 'success') + setShowTgForm(false) + onReload() + } catch (e: any) { showToast(e.message, 'error') } + setLoading(p => ({ ...p, telegram: false })) + } - {/* Workspace */} - {tab === 'workspace' && } + const handleDeployBot = async () => { + setLoading(p => ({ ...p, bot: true })) + try { + await api('setup/infra/deploy', { method: 'POST' }) + showToast('Bot deployed successfully', 'success') + onReload() + } catch (e: any) { showToast(e.message, 'error') } + setLoading(p => ({ ...p, bot: false })) + } - {/* Monitoring */} - {tab === 'monitoring' && } + const channels = [ + { + id: 'web', + name: 'Web Chat', + icon: '\uD83C\uDF10', + desc: 'Built-in browser chat interface', + status: 'always' as const, + }, + { + id: 'telegram', + name: 'Telegram', + icon: '\u2708\uFE0F', + desc: 'Chat via Telegram bot', + status: status?.telegram_configured ? 'connected' as const : 'available' as const, + }, + { + id: 'voice', + name: 'Voice Call', + icon: '\uD83D\uDCDE', + desc: 'Phone calls via ACS + OpenAI Realtime', + status: status?.voice_call_configured ? 'connected' as const : 'available' as const, + }, + ] + + return ( +
+

Channels

+

Available communication channels. Web chat always works; others require additional setup.

+ +
+ {channels.map(ch => ( +
+
{ch.icon}
+
+
+ {ch.name} + {ch.status === 'always' && Active} + {ch.status === 'connected' && Connected} +
+
{ch.desc}
+
+ {ch.id === 'telegram' && ch.status !== 'connected' && ( + + )} +
+ ))} +
+ + {showTgForm && ( +
+

Telegram Configuration

+
+
+
+ + +
+
+ + +
+
+
+ + +
+
+
+ )} + + {/* Bot Service */} +
+
+ Bot Service + -- Cloudflare tunnel for Telegram & Teams +
+
+ {status?.bot_deployed ? ( + Deployed + ) : ( + + )} + {status?.tunnel?.active && status.tunnel.url && ( + <> + Tunnel Active + {status.tunnel.url} + + )} +
+
) } + function StatusBadge({ ok, label }: { ok?: boolean; label: string }) { return ( @@ -348,6 +487,35 @@ function StatusBadge({ ok, label }: { ok?: boolean; label: string }) { ) } +interface MonitoringConfig { + enabled: boolean + sampling_ratio: number + enable_live_metrics: boolean + connection_string_set: boolean + connection_string_masked: string + provisioned: boolean + otel_active?: boolean + otel_status?: { active?: boolean; tracer_provider?: string } + app_insights_name?: string + portal_url?: string + workspace_name?: string + resource_group?: string + location?: string + grafana_dashboard_url?: string +} + +interface FoundryIQConfig { + configured: boolean + provisioned?: boolean + search_endpoint?: string + search_resource_name?: string + embedding_name?: string + embedding_endpoint?: string + openai_resource_name?: string + resource_group?: string + location?: string +} + // --------------------------------------------------------------------------- // Monitoring Tab -- OpenTelemetry / Application Insights configuration // --------------------------------------------------------------------------- diff --git a/app/frontend/src/pages/MessagingSettings.tsx b/app/frontend/src/pages/MessagingSettings.tsx index 44b89e1..cbb68b7 100644 --- a/app/frontend/src/pages/MessagingSettings.tsx +++ b/app/frontend/src/pages/MessagingSettings.tsx @@ -2,28 +2,22 @@ import { useState, useEffect, useCallback } from 'react' import { api } from '../api' import { showToast } from '../components/Toast' import { ProactiveContent } from './Proactive' -import type { SetupStatus, ModelInfo } from '../types' +import type { ModelInfo } from '../types' type Tab = 'config' | 'proactive' export default function MessagingSettings() { const [tab, setTab] = useState('config') - const [status, setStatus] = useState(null) const [models, setModels] = useState([]) const [currentModel, setCurrentModel] = useState('') const [loading, setLoading] = useState>({}) - // Channel state - const [telegramToken, setTelegramToken] = useState('') - const loadAll = useCallback(async () => { try { - const [s, cfg, mdl] = await Promise.all([ - api('setup/status'), + const [cfg, mdl] = await Promise.all([ api>('setup/config'), api<{ models: ModelInfo[]; current: string }>('models'), ]) - setStatus(s) setModels(mdl.models || []) setCurrentModel(cfg.COPILOT_MODEL || mdl.current || '') } catch { /* ignore */ } @@ -43,38 +37,15 @@ export default function MessagingSettings() { setLoading(p => ({ ...p, model: false })) } - const saveTelegram = async () => { - if (!telegramToken) return - setLoading(p => ({ ...p, telegram: true })) - try { - await api('setup/channels/telegram/config', { - method: 'POST', - body: JSON.stringify({ token: telegramToken }), - }) - showToast('Telegram configured', 'success') - loadAll() - } catch (e: any) { showToast(e.message, 'error') } - setLoading(p => ({ ...p, telegram: false })) - } - - const removeTelegram = async () => { - if (!confirm('Remove Telegram configuration?')) return - try { - await api('setup/channels/telegram/remove', { method: 'POST' }) - showToast('Telegram removed', 'success') - loadAll() - } catch (e: any) { showToast(e.message, 'error') } - } - return (
-

Messaging

+

AI Model

{([ - ['config', 'AI Model & Channels'], + ['config', 'AI Model'], ['proactive', 'Proactive'], ] as [Tab, string][]).map(([t, label]) => (
{tab === 'config' && ( - <> - {/* AI Model */} -
-

Default AI Model

-

Choose the model used for conversations.

-
-
- - -
- -
-
- - {/* Channels */} -
-

Channel Configuration

- -
-

Telegram

- {status?.telegram_configured ? ( -
- Configured - -
- ) : ( -
-
- - setTelegramToken(e.target.value)} - placeholder="Bot token from @BotFather" - /> -
- -
- )} +
+

Default AI Model

+

Choose the model used for conversations.

+
+
+ +
+
- +
)} {/* Proactive */} diff --git a/app/frontend/src/pages/SetupWizard.tsx b/app/frontend/src/pages/SetupWizard.tsx index 514d0ac..a61f5fe 100644 --- a/app/frontend/src/pages/SetupWizard.tsx +++ b/app/frontend/src/pages/SetupWizard.tsx @@ -2,21 +2,20 @@ import { useState, useEffect, useCallback, useRef } from 'react' import { useNavigate } from 'react-router-dom' import { api } from '../api' import { showToast } from '../components/Toast' -import type { SetupStatus, SandboxConfig, FoundryIQConfig, ContentSafetyConfig } from '../types' +import type { SetupStatus } from '../types' -type Step = 'azure' | 'github' | 'config' | 'deploy' +type Step = 'azure' | 'foundry' -interface VoiceConfig { - acs_resource_name?: string - acs_source_number?: string - [k: string]: unknown +interface AzureSubscription { + id: string + name: string + is_default: boolean + state: string } -const STEPS: { key: Step; label: string; description: string }[] = [ - { key: 'azure', label: 'Azure', description: 'Sign in with Azure CLI to manage cloud resources' }, - { key: 'github', label: 'GitHub', description: 'Authenticate with GitHub to power the AI agent' }, - { key: 'config', label: 'Channels', description: 'Connect messaging channels like Telegram' }, - { key: 'deploy', label: 'Bot', description: 'Provision Azure Bot Service and connect channels' }, +const STEPS: { key: Step; label: string }[] = [ + { key: 'azure', label: 'Azure' }, + { key: 'foundry', label: 'Foundry' }, ] export default function SetupWizard() { @@ -26,77 +25,65 @@ export default function SetupWizard() { const [loading, setLoading] = useState>({}) const manualStepRef = useRef(false) - // Optional infra state - const [voiceConfig, setVoiceConfig] = useState(null) - const [sandboxConfig, setSandboxConfig] = useState(null) - const [foundryConfig, setFoundryConfig] = useState(null) - const [contentSafetyConfig, setContentSafetyConfig] = useState(null) - - // Device code state const [azureDevice, setAzureDevice] = useState<{ code: string; url: string } | null>(null) - const [githubDevice, setGithubDevice] = useState<{ code: string; url: string } | null>(null) const [countdown, setCountdown] = useState(null) const azureDeviceRef = useRef(false) - const githubDeviceRef = useRef(false) + + // Subscription picker state + const [subscriptions, setSubscriptions] = useState([]) + const [selectedSub, setSelectedSub] = useState('') + + const azureReady = !!status?.azure?.logged_in && !status?.azure?.needs_subscription const refresh = useCallback(async () => { try { const s = await api('setup/status') setStatus(s) - // Auto-advance steps (skip if device code flow active or user clicked a step) - if (!manualStepRef.current) { - if (!azureDeviceRef.current && s.azure?.logged_in && currentStep === 'azure') setCurrentStep('github') - if (!githubDeviceRef.current && s.azure?.logged_in && s.copilot?.authenticated && currentStep === 'github') setCurrentStep('config') + + // Load subscriptions when logged in but no default sub + if (s.azure?.logged_in && s.azure?.needs_subscription) { + const subs = await api('setup/azure/subscriptions') + setSubscriptions(subs) + if (subs.length === 1) setSelectedSub(subs[0].id) + } + + if (!manualStepRef.current && !azureDeviceRef.current) { + const azDone = !!s.azure?.logged_in && !s.azure?.needs_subscription + const fDone = azDone && !!s.foundry?.deployed + if (fDone) { navigate('/chat'); return } + if (azDone && currentStep === 'azure') setCurrentStep('foundry') } } catch { /* ignore */ } - // Load optional infra status - try { setVoiceConfig(await api('setup/voice/config')) } catch { /* ignore */ } - try { setSandboxConfig(await api('sandbox/config')) } catch { /* ignore */ } - try { setFoundryConfig(await api('foundry-iq/config')) } catch { /* ignore */ } - try { setContentSafetyConfig(await api('content-safety/status')) } catch { /* ignore */ } - }, [currentStep]) + }, [currentStep, navigate]) useEffect(() => { refresh() }, [refresh]) - const setupDone = status?.azure?.logged_in && status?.copilot?.authenticated && status?.bot_configured - const botDeployed = !!status?.bot_deployed - - /** Show code, start countdown, open URL after 3s, then poll. */ - const startDeviceFlow = (code: string, url: string, setDevice: typeof setAzureDevice, openUrl: string) => { - setDevice({ code, url }) - setCountdown(3) - let t = 3 - const iv = setInterval(() => { - t -= 1 - setCountdown(t) - if (t <= 0) { - clearInterval(iv) - setCountdown(null) - window.open(openUrl, '_blank') - } - }, 1000) - } - const handleAzureLogin = async (force?: boolean) => { setLoading(p => ({ ...p, azure: true })) azureDeviceRef.current = true try { - // When re-authenticating, log out first so the backend starts a fresh device flow - if (force) { - await api('setup/azure/logout', { method: 'POST' }).catch(() => {}) - } + if (force) await api('setup/azure/logout', { method: 'POST' }).catch(() => {}) const r = await api<{ status: string; code?: string; url?: string; message?: string }>('setup/azure/login', { method: 'POST' }) if (r.status === 'already_logged_in') { showToast('Already signed in to Azure', 'success') azureDeviceRef.current = false await refresh() + } else if (r.status === 'needs_subscription') { + azureDeviceRef.current = false + await refresh() } else if (r.code && r.url) { - startDeviceFlow(r.code, r.url, setAzureDevice, r.url) - // Poll for completion + setAzureDevice({ code: r.code, url: r.url }) + setCountdown(3) + let t = 3 + const iv = setInterval(() => { + t -= 1 + setCountdown(t) + if (t <= 0) { clearInterval(iv); setCountdown(null); window.open(r.url!, '_blank') } + }, 1000) for (let i = 0; i < 120; i++) { await new Promise(res => setTimeout(res, 3000)) const check = await api<{ status: string }>('setup/azure/check') - if (check.status === 'logged_in') { + if (check.status === 'logged_in' || check.status === 'needs_subscription') { showToast('Azure authenticated!', 'success') setAzureDevice(null) azureDeviceRef.current = false @@ -115,78 +102,54 @@ export default function SetupWizard() { setLoading(p => ({ ...p, azure: false })) } - const handleCopilotLogin = async () => { - setLoading(p => ({ ...p, github: true })) - githubDeviceRef.current = true + const handleSetSubscription = async () => { + if (!selectedSub) return + setLoading(p => ({ ...p, subscription: true })) try { - const r = await api<{ status: string; message?: string; code?: string; url?: string; user_code?: string; verification_uri?: string }>('setup/copilot/login', { method: 'POST' }) - const code = r.code || r.user_code - const url = r.url || r.verification_uri - if (code && url) { - startDeviceFlow(code, url, setGithubDevice, url) - // Poll for completion - for (let i = 0; i < 120; i++) { - await new Promise(res => setTimeout(res, 3000)) - const check = await api<{ authenticated: boolean }>('setup/copilot/status') - if (check.authenticated) { - showToast('GitHub authenticated!', 'success') - setGithubDevice(null) - githubDeviceRef.current = false - break - } - } - } else { - githubDeviceRef.current = false - showToast(r.message || 'Login initiated', 'info') - } + await api('setup/azure/subscription', { + method: 'POST', + body: JSON.stringify({ subscription_id: selectedSub }), + }) + const sub = subscriptions.find(s => s.id === selectedSub) + showToast(`Subscription set: ${sub?.name || selectedSub}`, 'success') + setSubscriptions([]) await refresh() - } catch (e: any) { - githubDeviceRef.current = false - showToast(e.message, 'error') - } - setLoading(p => ({ ...p, github: false })) + } catch (e: any) { showToast(e.message, 'error') } + setLoading(p => ({ ...p, subscription: false })) } - const handleSaveConfig = async (e: React.FormEvent) => { - e.preventDefault() - setLoading(p => ({ ...p, config: true })) - const fd = new FormData(e.currentTarget) - const token = (fd.get('telegram_token') as string || '').trim() - const whitelist = (fd.get('telegram_whitelist') as string || '').trim() - const body = { - telegram: { token, whitelist }, - bot: {}, - } + const handleFoundryDeploy = async () => { + setLoading(p => ({ ...p, foundry: true })) try { - await api('setup/configuration/save', { method: 'POST', body: JSON.stringify(body) }) - showToast('Configuration saved!', 'success') + const r = await api<{ status: string; foundry_endpoint?: string; deployed_models?: string[]; error?: string }>('setup/foundry/deploy', { + method: 'POST', + body: JSON.stringify({ resource_group: 'polyclaw-rg', location: 'eastus' }), + }) + if (r.status === 'ok') { + showToast(`Foundry deployed: ${r.deployed_models?.join(', ') || 'models ready'}`, 'success') + } else { + showToast(r.error || 'Deployment failed', 'error') + } await refresh() - setCurrentStep('deploy') - } catch (e: any) { - showToast(e.message, 'error') - } - setLoading(p => ({ ...p, config: false })) + } catch (e: any) { showToast(e.message, 'error') } + setLoading(p => ({ ...p, foundry: false })) } + const setupDone = azureReady && status?.foundry?.deployed + return (
polyclaw -

Complete the initial setup to get started. Azure and GitHub authentication are required.

+

Complete the initial setup to get started. Azure sign-in and Foundry deployment are required.

- {/* Progress Steps */}
{STEPS.map((step, i) => { - const azureDone = !!status?.azure?.logged_in - const githubDone = azureDone && !!status?.copilot?.authenticated - const configDone = githubDone && !!status?.telegram_configured - const deployDone = configDone && !!status?.bot_deployed - const done = step.key === 'azure' ? azureDone - : step.key === 'github' ? githubDone - : step.key === 'config' ? configDone - : deployDone + const azDone = azureReady + const fDone = azDone && !!status?.foundry?.deployed + const done = step.key === 'azure' ? azDone : fDone return ( + + ) : ( +

Loading subscriptions...

)} +
+ + /* Fully authenticated with subscription */ + ) : azureReady ? ( +
+ Authenticated + {status?.azure?.subscription &&

Subscription: {status.azure.subscription}

}
- +
+ + /* Not logged in */ ) : (
)} - {currentStep === 'github' && ( + {currentStep === 'foundry' && (
-

GitHub

-

Authenticate with GitHub to enable the AI agent powered by Copilot.

- {githubDevice ? ( -
-

Copy the code below, then sign in at the link:

-
- {githubDevice.code} - -
- {countdown !== null ? ( -

Opening browser in {countdown}...

- ) : ( - <> - {githubDevice.url} -

Waiting for authentication...

- - )} -
- ) : status?.copilot?.authenticated ? ( +

Microsoft Foundry

+

Deploy AI models (gpt-4.1, gpt-5, gpt-5-mini) to your Azure subscription via Bicep. This also creates a Key Vault for secrets management.

+ {status?.foundry?.deployed ? (
- Authenticated - {status.copilot.username && ( -

User: {status.copilot.username}

- )} + Deployed +

Endpoint: {status.foundry.endpoint}

+ {status.foundry.name &&

Resource: {status.foundry.name}

}
- - +
) : (
- -

A device code will be shown. Enter it at github.com to complete authentication.

+

Creates an AI Services resource with model deployments and a Key Vault. Uses Entra ID authentication (no API keys).

)}
)} - - {currentStep === 'config' && ( -
-

Messaging Channels

-

Connect a Telegram bot to chat with polyclaw on Telegram. This is optional -- you can skip and configure it later from Settings.

-
-
- - - Get this from @BotFather on Telegram. -
-
- - - Only these users can interact with the bot. Leave empty to allow all. -
-
- - -
-
-
- )} - - {currentStep === 'deploy' && ( -
-

Bot

- {botDeployed ? ( -
- Deployed -

Azure Bot Service is running. Telegram and other channels are connected.

- -
- ) : ( - <> -

Deploy the Azure Bot Service to enable Telegram and other messaging channels. This will:

-
    -
  • Start a Cloudflare tunnel to expose your bot
  • -
  • Create an Azure Bot Service with an App Registration
  • - {status?.telegram_configured &&
  • Connect Telegram as a messaging channel
  • } -
-
- - -
- - )} -
- )}
- {setupDone && botDeployed && ( + {setupDone && (
-

Setup complete! Your bot is deployed and ready to use.

- -
- )} - - {setupDone && !botDeployed && currentStep !== 'deploy' && ( -
-

Configuration saved. Deploy the bot to enable Telegram and other channels.

- - -
- )} - - {/* Optional Infrastructure */} - {setupDone && status?.azure?.logged_in && ( -
-

Optional Infrastructure

-

Provision additional Azure resources. These are not required to use polyclaw.

- -
-
-
- Content Safety Recommended - Prompt Shields injection detection -
- {contentSafetyConfig?.deployed - ? Deployed - : } -
- -
-
- Voice Calling - ACS + OpenAI Realtime -
- {voiceConfig?.acs_resource_name - ? Provisioned - : } -
- -
-
- Agent Sandbox Experimental - Sandboxed code execution -
- {sandboxConfig?.is_provisioned - ? Provisioned - : } -
- -
-
- Foundry IQ - AI Search + embeddings -
- {foundryConfig?.provisioned - ? Provisioned - : } -
-
+

Setup complete! Configure channels, bot service, and more from .

+
)}
diff --git a/app/frontend/src/pages/Skills.tsx b/app/frontend/src/pages/Skills.tsx index 7eb47d2..3c5338a 100644 --- a/app/frontend/src/pages/Skills.tsx +++ b/app/frontend/src/pages/Skills.tsx @@ -424,7 +424,7 @@ export default function Skills() { {!(marketplaceData.recommended?.length || marketplaceData.loved?.length || marketplaceData.popular?.length || marketplaceData.github_awesome?.length || marketplaceData.anthropic?.length) && ( -

No skills available. Check your network connection or set a GITHUB_TOKEN.

+

No skills available. Check your network connection or Foundry endpoint configuration.

)}
)} diff --git a/app/frontend/src/styles/global.css b/app/frontend/src/styles/global.css index 75d4486..f3355ad 100644 --- a/app/frontend/src/styles/global.css +++ b/app/frontend/src/styles/global.css @@ -1420,6 +1420,20 @@ table.table { width: 100%; border-collapse: collapse; font-size: 13px; } .setup__complete { text-align: center; margin-top: 32px; } .setup__complete p { color: var(--text-2); margin-bottom: 12px; } +.setup__sub-list { display: flex; flex-direction: column; gap: 0; margin-top: 12px; } +.setup__sub-option { + display: flex; align-items: center; gap: 12px; padding: 10px 16px; cursor: pointer; + border: 1px solid var(--border); border-bottom: none; +} +.setup__sub-option:first-child { border-radius: var(--radius) var(--radius) 0 0; } +.setup__sub-option:last-child { border-bottom: 1px solid var(--border); border-radius: 0 0 var(--radius) var(--radius); } +.setup__sub-option:only-child { border-radius: var(--radius); border-bottom: 1px solid var(--border); } +.setup__sub-option:hover { background: var(--surface-alt); } +.setup__sub-option input[type="radio"] { accent-color: var(--gold); } +.setup__sub-info { display: flex; flex-direction: column; gap: 2px; } +.setup__sub-name { font-size: 14px; font-weight: 500; color: var(--text); } +.setup__sub-id { font-size: 11px; color: var(--text-3); font-family: var(--font-mono); } + .setup__optional { margin-top: 48px; } .setup__optional-title { font-size: 18px; font-weight: 600; color: var(--text); margin-bottom: 6px; } .setup__optional-desc { color: var(--text-3); font-size: 13px; margin-bottom: 16px; } @@ -1435,6 +1449,77 @@ table.table { width: 100%; border-collapse: collapse; font-size: 13px; } .setup__opt-name { font-size: 14px; font-weight: 500; color: var(--text); } .setup__opt-desc { font-size: 12px; color: var(--text-3); } +/* ── Infrastructure Overview ── */ +.infra__resource-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 12px; margin: 12px 0; } +.infra__resource-card { + padding: 12px 16px; border: 1px solid var(--border); border-radius: var(--radius); + background: var(--surface); display: flex; flex-direction: column; gap: 4px; +} +.infra__resource-name { font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; color: var(--text-3); font-weight: 600; } +.infra__resource-id { font-size: 14px; font-weight: 500; color: var(--text); } +.infra__resource-detail code { font-size: 11px; color: var(--text-2); word-break: break-all; } +.infra__resource-extra { font-size: 12px; } + +.infra__toggle-grid { display: flex; flex-direction: column; gap: 0; } +.infra__toggle-row { + display: flex; align-items: center; gap: 12px; padding: 10px 16px; + border: 1px solid var(--border); border-bottom: none; cursor: pointer; +} +.infra__toggle-row:first-child { border-radius: var(--radius) var(--radius) 0 0; } +.infra__toggle-row:last-child { border-bottom: 1px solid var(--border); border-radius: 0 0 var(--radius) var(--radius); } +.infra__toggle-row:only-child { border-radius: var(--radius); border-bottom: 1px solid var(--border); } +.infra__toggle-row:hover { background: var(--surface-alt); } +.infra__toggle-info { display: flex; flex-direction: column; gap: 2px; } +.infra__toggle-name { font-size: 14px; font-weight: 500; color: var(--text); display: flex; align-items: center; gap: 6px; } +.infra__toggle-desc { font-size: 12px; color: var(--text-3); } + +.infra__steps { display: flex; flex-direction: column; gap: 4px; } +.infra__step { display: flex; align-items: center; gap: 8px; font-size: 13px; color: var(--text-2); } +.infra__step-icon { width: 16px; text-align: center; font-weight: 700; } +.infra__step--ok .infra__step-icon { color: var(--ok); } +.infra__step--failed .infra__step-icon { color: var(--err); } +.infra__step--warning .infra__step-icon { color: var(--gold); } + +.spinner--inline { + display: inline-block; + width: 14px; height: 14px; + border: 2px solid var(--border); + border-top-color: var(--gold); + border-radius: 50%; + animation: spin 0.7s linear infinite; +} + +.infra__channels-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 16px; margin-top: 12px; } +.infra__channel-section { padding: 16px; border: 1px solid var(--border); border-radius: var(--radius); background: var(--surface); } +.infra__channel-section h4 { font-size: 15px; margin-bottom: 8px; } + +/* Channels card -- redesigned */ +.channels__grid { display: flex; flex-direction: column; gap: 8px; margin-top: 12px; } +.channels__item { + display: flex; align-items: center; gap: 12px; padding: 12px 16px; + border: 1px solid var(--border); border-radius: var(--radius); background: var(--surface); + transition: border-color 0.15s; +} +.channels__item--connected { border-color: var(--ok); } +.channels__item--always { border-color: var(--ok); } +.channels__icon { font-size: 22px; width: 36px; text-align: center; flex-shrink: 0; } +.channels__info { flex: 1; min-width: 0; } +.channels__name { font-size: 14px; font-weight: 600; display: flex; align-items: center; gap: 6px; } +.channels__desc { font-size: 12px; } + +.channels__config-panel { + padding: 16px; border: 1px solid var(--border); border-radius: var(--radius); background: var(--surface); +} +.channels__config-panel h4 { font-size: 14px; margin-bottom: 8px; } + +.channels__bot-bar { + display: flex; align-items: center; justify-content: space-between; + padding: 10px 16px; border: 1px solid var(--border); border-radius: var(--radius); + background: var(--surface); gap: 12px; flex-wrap: wrap; +} +.channels__bot-info { font-size: 13px; } +.channels__bot-actions { display: flex; align-items: center; gap: 8px; } + /* ── Profile ── */ /* Hero header */ diff --git a/app/frontend/src/types.ts b/app/frontend/src/types.ts index 2337b01..9c7360e 100644 --- a/app/frontend/src/types.ts +++ b/app/frontend/src/types.ts @@ -11,8 +11,8 @@ export interface AuthCheckResponse { // -- Setup Status -------------------------------------------------------- export interface SetupStatus { - azure?: { logged_in?: boolean; subscription?: string; tenant?: string } - copilot?: { authenticated?: boolean; username?: string } + azure?: { logged_in?: boolean; needs_subscription?: boolean; subscription?: string; subscription_id?: string; tenant?: string } + foundry?: { deployed?: boolean; endpoint?: string; name?: string; resource_group?: string } prerequisites_configured?: boolean telegram_configured?: boolean tunnel?: { active?: boolean; url?: string; restricted?: boolean } diff --git a/app/runtime/agent/agent.py b/app/runtime/agent/agent.py index 50ef4d1..cfab141 100644 --- a/app/runtime/agent/agent.py +++ b/app/runtime/agent/agent.py @@ -39,6 +39,7 @@ def __init__(self) -> None: self._client: CopilotClient | None = None self._session: Any = None self._authenticated: bool = False + self._byok: bool = False self.request_counts: dict[str, int] = {} self._sandbox: SandboxExecutor | None = None self._interceptor: SandboxToolInterceptor | None = None @@ -65,13 +66,15 @@ def has_session(self) -> bool: async def start(self) -> None: cfg.ensure_dirs() opts: dict[str, Any] = {"log_level": "error"} - if cfg.github_token: - opts["github_token"] = cfg.github_token - logger.info("[agent.start] GITHUB_TOKEN provided (%d chars)", len(cfg.github_token)) + + # Auth is handled per-session via the Foundry BYOK provider block. + self._byok = bool(cfg.foundry_endpoint) + + if self._byok: + logger.info("[agent.start] Foundry BYOK mode") else: logger.warning( - "[agent.start] No GITHUB_TOKEN found -- Copilot CLI will try the " - "logged-in gh session (may fail in containers)" + "[agent.start] No FOUNDRY_ENDPOINT -- authentication may fail" ) for attempt in range(1, MAX_START_RETRIES + 1): @@ -106,25 +109,28 @@ async def stop(self) -> None: await self._safe_stop_client() async def reload_auth(self) -> dict[str, Any]: - """Reload GITHUB_TOKEN from ``.env`` and restart the Copilot client. + """Reload configuration from ``.env`` and restart the Copilot client. Called by the ``/api/runtime/reload-auth`` endpoint when the admin - container writes a new token to ``/data/.env`` after the runtime has - already booted. + container writes new config to ``/data/.env`` after the runtime has + already booted. Handles Foundry BYOK endpoint changes. """ - old_token = cfg.github_token + old_endpoint = cfg.foundry_endpoint cfg.reload() - new_token = cfg.github_token + new_endpoint = cfg.foundry_endpoint - if not new_token: - return {"status": "no_token", "authenticated": False} + endpoint_changed = new_endpoint != old_endpoint - if new_token == old_token and self._authenticated: + if not endpoint_changed and self._authenticated: return {"status": "unchanged", "authenticated": True} + if not new_endpoint: + return {"status": "no_auth", "authenticated": False} + logger.info( - "[agent.reload_auth] GITHUB_TOKEN changed (%d chars), restarting Copilot client ...", - len(new_token), + "[agent.reload_auth] config changed (endpoint=%s), " + "restarting Copilot client ...", + "changed" if endpoint_changed else "same", ) await self.stop() await self.start() @@ -132,6 +138,7 @@ async def reload_auth(self) -> dict[str, Any]: return { "status": "ok" if self._authenticated else "auth_failed", "authenticated": self._authenticated, + "byok": bool(new_endpoint), } async def _verify_auth(self) -> None: @@ -139,9 +146,18 @@ async def _verify_auth(self) -> None: Sets ``_authenticated`` so that :meth:`send` can fail fast with a useful error message instead of silently hanging for 120 seconds. + + In BYOK mode (Foundry endpoint configured), GitHub auth is not + required -- authentication happens per-session via bearer token. """ if not self._client: return + + if self._byok: + logger.info("[agent.auth] BYOK mode -- skipping GitHub auth check") + self._authenticated = True + return + try: auth = await self._client.get_auth_status() if auth.isAuthenticated: @@ -150,8 +166,8 @@ async def _verify_auth(self) -> None: else: logger.error( "[agent.auth] Copilot CLI is NOT authenticated. " - "Chat will not work. Set GITHUB_TOKEN in /data/.env " - "or use the admin setup wizard to authenticate." + "Chat will not work. Configure FOUNDRY_ENDPOINT " + "for Foundry BYOK mode." ) except Exception: # auth.getStatus may not be supported on older CLI versions; @@ -163,6 +179,12 @@ async def _verify_model(self) -> None: """Log whether the configured model is available and enabled.""" if not self._client: return + if self._byok: + logger.info( + "[agent.model] BYOK mode -- using Foundry model %s", + cfg.copilot_model, + ) + return model_id = cfg.copilot_model try: models = await self._client.list_models() @@ -262,9 +284,7 @@ async def send( if not self._authenticated: msg = ( "Not authenticated. Please authenticate first.\n\n" - "Open the setup wizard and either:\n" - "- Sign in with GitHub, or\n" - "- Paste a GitHub personal access token." + "Open the setup wizard and deploy Foundry infrastructure." ) logger.error("[agent.send] aborting -- Copilot CLI not authenticated") if on_delta: @@ -274,9 +294,14 @@ async def send( model = cfg.copilot_model self.request_counts[model] = self.request_counts.get(model, 0) + 1 + t_lock_wait = _now() logger.info("[agent.send] waiting for send lock ...") async with self._send_lock: - logger.info("[agent.send] send lock acquired") + t_lock_acq = _now() + logger.info( + "[agent.send] send lock acquired (waited %.0fms)", + (t_lock_acq - t_lock_wait) * 1000, + ) if not self._session: logger.info("[agent.send] no session -- creating one") await self._new_session_inner() @@ -295,9 +320,13 @@ async def _send_inner( unsub = self._session.on(handler) try: try: + t_sdk = _now() logger.info("[agent.send] calling session.send() ...") await self._session.send({"prompt": prompt}) - logger.info("[agent.send] session.send() returned, waiting for completion ...") + logger.info( + "[agent.send] session.send() returned in %.0fms, waiting for completion ...", + (_now() - t_sdk) * 1000, + ) except Exception as exc: logger.error("[agent.send] session.send() raised: %s", exc, exc_info=True) if "Session not found" in str(exc): @@ -361,6 +390,11 @@ def _set_token_attributes(span: object | None, handler: EventHandler) -> None: async def list_models(self) -> list[dict]: if not self._client: raise RuntimeError("Agent not started") + + # BYOK mode: return Foundry-deployed models from .env + if self._byok: + return self._list_foundry_models() + try: models = await self._client.list_models() return [ @@ -377,6 +411,28 @@ async def list_models(self) -> list[dict]: logger.warning("Failed to list models: %s", exc) return [] + @staticmethod + def _list_foundry_models() -> list[dict]: + """Return models deployed on the Foundry endpoint. + + Reads ``DEPLOYED_MODELS`` (comma-separated) from ``.env``. + Falls back to the current ``COPILOT_MODEL`` if not set. + """ + raw = cfg.env.read("DEPLOYED_MODELS") or "" + names = [n.strip() for n in raw.split(",") if n.strip()] if raw else [] + if not names: + names = [cfg.copilot_model] + return [ + { + "id": name, + "name": name, + "policy": "enabled", + "billing_multiplier": 1.0, + "reasoning_efforts": [], + } + for name in names + ] + def _build_hooks(self) -> dict[str, Any]: """Compose pre/post-tool-use hooks from active interceptors.""" sandbox_active = ( @@ -483,6 +539,19 @@ def _build_session_config(self) -> dict[str, Any]: "tools": ["*"], }, } + + # Inject BYOK provider when Foundry is configured. + if self._byok: + from .byok import build_session_overrides + + overrides = build_session_overrides() + if overrides: + session_cfg.update(overrides) + logger.info( + "[agent.config] BYOK provider injected: endpoint=%s model=%s", + cfg.foundry_endpoint, session_cfg.get("model"), + ) + return session_cfg async def _abort_and_destroy_session(self) -> None: diff --git a/app/runtime/agent/aitl.py b/app/runtime/agent/aitl.py index 1497e83..a9bdd51 100644 --- a/app/runtime/agent/aitl.py +++ b/app/runtime/agent/aitl.py @@ -109,9 +109,6 @@ async def _ensure_client(self) -> CopilotClient: if self._client and self._started: return self._client opts: dict[str, Any] = {"log_level": "error"} - token = cfg.github_token - if token: - opts["github_token"] = token self._client = CopilotClient(opts) await self._client.start() self._started = True diff --git a/app/runtime/agent/byok.py b/app/runtime/agent/byok.py new file mode 100644 index 0000000..9e46647 --- /dev/null +++ b/app/runtime/agent/byok.py @@ -0,0 +1,84 @@ +"""BYOK provider configuration for the Copilot SDK. + +Builds a ``provider`` dict for ``CopilotClient.create_session()`` that +points at a Foundry (Azure AI Services) endpoint using Entra ID +bearer-token authentication -- no API keys required. + +Token acquisition uses ``az account get-access-token`` so it works with +whatever identity is logged in (user, service principal, managed identity). +""" + +from __future__ import annotations + +import json +import logging +import subprocess +from typing import Any + +from ..config.settings import cfg + +logger = logging.getLogger(__name__) + +_COGNITIVE_SERVICES_SCOPE = "https://cognitiveservices.azure.com" + + +def get_bearer_token() -> str: + """Obtain a short-lived Entra ID token for Cognitive Services.""" + try: + result = subprocess.run( + [ + "az", "account", "get-access-token", + "--resource", _COGNITIVE_SERVICES_SCOPE, + "--query", "accessToken", + "--output", "json", + ], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0: + logger.error("[byok] az get-access-token failed: %s", result.stderr.strip()) + return "" + return json.loads(result.stdout) + except Exception: + logger.error("[byok] failed to obtain bearer token", exc_info=True) + return "" + + +def build_provider_config() -> dict[str, Any] | None: + """Build the BYOK provider dict for a Copilot SDK session. + + Returns ``None`` when Foundry is not configured, which signals the + caller to fall back to GitHub Copilot authentication. + """ + endpoint = cfg.foundry_endpoint + if not endpoint: + return None + + token = get_bearer_token() + if not token: + logger.warning("[byok] no bearer token -- Foundry BYOK will not work") + return None + + return { + "type": "azure", + "base_url": endpoint.rstrip("/"), + "bearer_token": token, + "azure": {"api_version": "2024-10-21"}, + } + + +def build_session_overrides() -> dict[str, Any]: + """Return extra kwargs to merge into session config when BYOK is active. + + These override the model and inject the provider block. Returns an + empty dict when BYOK is not configured. + """ + provider = build_provider_config() + if provider is None: + return {} + + return { + "model": cfg.copilot_model, + "provider": provider, + } diff --git a/app/runtime/agent/one_shot.py b/app/runtime/agent/one_shot.py index 144ad5f..de50546 100644 --- a/app/runtime/agent/one_shot.py +++ b/app/runtime/agent/one_shot.py @@ -30,28 +30,35 @@ async def auto_approve(input_data: dict, invocation: dict) -> dict: async def run_one_shot( prompt: str, *, - model: str = "gpt-4.1", + model: str = "", system_message: str = "", timeout: float = 300, tools: list[Any] | None = None, on_pre_tool_use: PreToolHook | None = None, ) -> str | None: opts: dict[str, Any] = {"log_level": "error"} - if cfg.github_token: - opts["github_token"] = cfg.github_token hook = on_pre_tool_use or auto_approve client = CopilotClient(opts) await client.start() try: session_cfg: dict[str, Any] = { - "model": model, + "model": model or cfg.copilot_model, "hooks": {"on_pre_tool_use": hook}, } if system_message: session_cfg["system_message"] = {"mode": "append", "content": system_message} if tools: session_cfg["tools"] = tools + + # Inject BYOK provider when Foundry is configured. + if cfg.foundry_endpoint: + from .byok import build_session_overrides + + overrides = build_session_overrides() + if overrides: + session_cfg.update(overrides) + session = await client.create_session(session_cfg) return await _send_and_wait(session, prompt, timeout) finally: diff --git a/app/runtime/config/settings.py b/app/runtime/config/settings.py index b750937..1dc73a6 100644 --- a/app/runtime/config/settings.py +++ b/app/runtime/config/settings.py @@ -15,7 +15,6 @@ SECRET_ENV_KEYS: frozenset[str] = frozenset({ "ADMIN_SECRET", "BOT_APP_PASSWORD", - "GITHUB_TOKEN", "ACS_CONNECTION_STRING", "AZURE_OPENAI_API_KEY", }) @@ -62,7 +61,7 @@ class AdminConfig: @dataclass class ModelConfig: - copilot_model: str = "claude-sonnet-4.6" + copilot_model: str = "gpt-4.1" copilot_agent: str = "" @@ -96,11 +95,14 @@ def reload(self) -> None: self.bot_app_tenant_id: str = e("BOT_APP_TENANT_ID") self.bot_port: int = int(e("BOT_PORT") or "3978") - self.github_token: str = e("GITHUB_TOKEN") - - self.copilot_model: str = e("COPILOT_MODEL") or "claude-sonnet-4.6" + self.copilot_model: str = e("COPILOT_MODEL") or "gpt-4.1" self.copilot_agent: str = e("COPILOT_AGENT") or "" + # Foundry (BYOK) configuration + self.foundry_endpoint: str = e("FOUNDRY_ENDPOINT") + self.foundry_name: str = e("FOUNDRY_NAME") + self.foundry_resource_group: str = e("FOUNDRY_RESOURCE_GROUP") + self.admin_port: int = int(e("ADMIN_PORT") or "9090") self.lockdown_mode: bool = bool(e("LOCKDOWN_MODE")) self.tunnel_restricted: bool = bool(e("TUNNEL_RESTRICTED")) @@ -119,7 +121,7 @@ def reload(self) -> None: self.admin_secret: str = e("ADMIN_SECRET") - self.memory_model: str = e("MEMORY_MODEL") or "claude-sonnet-4.6" + self.memory_model: str = e("MEMORY_MODEL") or "gpt-4.1" self.memory_idle_minutes: int = int(e("MEMORY_IDLE_MINUTES") or "5") self.proactive_enabled: bool = e("PROACTIVE_ENABLED").lower() in ("1", "true", "yes") if e("PROACTIVE_ENABLED") else False diff --git a/app/runtime/registries/catalog.py b/app/runtime/registries/catalog.py index db95aaa..021fe54 100644 --- a/app/runtime/registries/catalog.py +++ b/app/runtime/registries/catalog.py @@ -12,8 +12,6 @@ import aiohttp -from ..config.settings import cfg - logger = logging.getLogger(__name__) _CATALOG_SOURCES: list[dict[str, str]] = [ @@ -46,9 +44,6 @@ def _github_headers() -> dict[str, str]: "Accept": "application/vnd.github.v3+json", "User-Agent": "polyclaw-skill-registry", } - token = cfg.github_token - if token: - headers["Authorization"] = f"token {token}" return headers diff --git a/app/runtime/server/app_routes.py b/app/runtime/server/app_routes.py index a5c30ee..6e90e8c 100644 --- a/app/runtime/server/app_routes.py +++ b/app/runtime/server/app_routes.py @@ -74,7 +74,7 @@ def register_admin_routes( FoundryIQRoutes(foundry_iq_store, az, deploy_store).register(router) NetworkRoutes(tunnel, az, sandbox_store, foundry_iq_store).register(router) MonitoringRoutes(monitoring_store, az, deploy_store).register(router) - ContentSafetyRoutes(az, guardrails_store).register(router) + ContentSafetyRoutes(az, guardrails_store, deploy_store).register(router) from .routes.identity_routes import IdentityRoutes diff --git a/app/runtime/server/lifecycle.py b/app/runtime/server/lifecycle.py index cff0e94..ff1b64e 100644 --- a/app/runtime/server/lifecycle.py +++ b/app/runtime/server/lifecycle.py @@ -83,10 +83,9 @@ async def on_startup_runtime( bot_endpoint = os.environ.get("BOT_ENDPOINT", "") if mode != ServerMode.combined: - github_token = cfg.github_token - if not github_token: + if not cfg.foundry_endpoint: logger.warning( - "[startup.runtime] Setup incomplete -- missing GITHUB_TOKEN. " + "[startup.runtime] Setup incomplete -- missing FOUNDRY_ENDPOINT. " "Complete the setup wizard in the admin container, " "then recreate the agent container.", ) diff --git a/app/runtime/server/routes/content_safety_routes.py b/app/runtime/server/routes/content_safety_routes.py index eb5a2d4..fc6e761 100644 --- a/app/runtime/server/routes/content_safety_routes.py +++ b/app/runtime/server/routes/content_safety_routes.py @@ -10,13 +10,14 @@ from ...config.settings import cfg from ...services.cloud.azure import AzureCLI +from ...services.deployment.bicep_deployer import BicepDeployer, BicepDeployRequest from ...services.security.prompt_shield import PromptShieldService +from ...state.deploy_state import DeployStateStore from ...state.guardrails import GuardrailsConfigStore from ...util.async_helpers import run_sync logger = logging.getLogger(__name__) -_DEFAULT_RESOURCE_NAME = "polyclaw-content-safety" _DEFAULT_RG = "polyclaw-rg" _DEFAULT_LOCATION = "eastus" @@ -37,9 +38,12 @@ def __init__( self, az: AzureCLI | None = None, guardrails_store: GuardrailsConfigStore | None = None, + deploy_store: DeployStateStore | None = None, ) -> None: self._az = az self._store = guardrails_store + self._deploy_store = deploy_store + self._bicep = BicepDeployer(az, deploy_store) if az and deploy_store else None def register(self, router: web.UrlDispatcher) -> None: router.add_post("/api/content-safety/deploy", self._deploy) @@ -93,19 +97,10 @@ async def _test(self, _req: web.Request) -> web.Response: }) async def _deploy(self, req: web.Request) -> web.Response: - """Provision an Azure AI Content Safety resource. - - Steps: - 1. Create the Cognitive Services account (``--kind ContentSafety``). - 2. Retrieve the endpoint URL. - 3. Assign *Cognitive Services User* RBAC to the runtime identity. - 4. Update guardrails config. - - No API keys are retrieved or stored. - """ - if not self._az: + """Provision an Azure AI Content Safety resource via the central Bicep template.""" + if not self._bicep: return web.json_response( - {"status": "error", "message": "Azure CLI not available"}, + {"status": "error", "message": "Azure CLI or deploy store not available"}, status=400, ) if not self._store: @@ -119,31 +114,29 @@ async def _deploy(self, req: web.Request) -> web.Response: except Exception: data = {} - resource_name = data.get("resource_name", _DEFAULT_RESOURCE_NAME).strip() resource_group = data.get("resource_group", _DEFAULT_RG).strip() location = data.get("location", _DEFAULT_LOCATION).strip() - steps: list[dict[str, Any]] = [] - - # 1. Create the Content Safety resource - resource_id, endpoint = await self._create_resource( - resource_group, location, resource_name, steps, + bicep_req = BicepDeployRequest( + resource_group=resource_group, + location=location, + deploy_foundry=False, + deploy_key_vault=False, + deploy_content_safety=True, ) + result = await run_sync(self._bicep.deploy, bicep_req) - if not endpoint: + if not result.ok or not result.content_safety_endpoint: return web.json_response({ "status": "error", - "message": "Failed to create Content Safety resource", - "steps": steps, + "message": result.error or "Failed to deploy Content Safety resource", + "steps": result.steps, }, status=500) - # 2. Assign RBAC to the runtime identity - await self._assign_rbac(resource_id, steps) - - # 3. Update guardrails config - self._store.set_content_safety_endpoint(endpoint) + # Update guardrails config + self._store.set_content_safety_endpoint(result.content_safety_endpoint) self._store.set_filter_mode("prompt_shields") - steps.append({ + result.steps.append({ "step": "update_config", "status": "ok", "detail": ( @@ -154,8 +147,8 @@ async def _deploy(self, req: web.Request) -> web.Response: return web.json_response({ "status": "ok", - "steps": steps, - "endpoint": endpoint, + "steps": result.steps, + "endpoint": result.content_safety_endpoint, "filter_mode": "prompt_shields", }) @@ -269,73 +262,6 @@ def _match_endpoint( return acct.get("id", "") return "" - async def _create_resource( - self, - rg: str, - location: str, - name: str, - steps: list[dict[str, Any]], - ) -> tuple[str, str]: - """Create Azure AI Content Safety resource and retrieve endpoint. - - Returns ``(resource_id, endpoint)`` -- either may be empty on - failure. - """ - assert self._az is not None - - result = await run_sync( - self._az.json, - "cognitiveservices", "account", "create", - "--name", name, "--resource-group", rg, - "--location", location, "--kind", "ContentSafety", - "--sku", "S0", "--custom-domain", name, - ) - resource_id = "" - if not result or not isinstance(result, dict): - err = self._az.last_stderr or "Unknown error" - if "already exists" in err.lower() or "conflict" in err.lower(): - steps.append({ - "step": "create_resource", - "status": "ok", - "detail": f"{name} already exists, reusing", - }) - else: - steps.append({ - "step": "create_resource", - "status": "failed", - "detail": err[:300], - }) - return ("", "") - else: - resource_id = result.get("id", "") - steps.append({ - "step": "create_resource", - "status": "ok", - "detail": f"Content Safety resource '{name}' created in {rg}", - }) - - # Retrieve endpoint (and resource id if missing from create) - info = await run_sync( - self._az.json, - "cognitiveservices", "account", "show", - "--name", name, "--resource-group", rg, - ) - endpoint = "" - if isinstance(info, dict): - endpoint = info.get("properties", {}).get("endpoint", "") - if not resource_id: - resource_id = info.get("id", "") - if not endpoint: - endpoint = f"https://{name}.cognitiveservices.azure.com/" - - steps.append({ - "step": "get_endpoint", - "status": "ok", - "detail": endpoint, - }) - - return (resource_id, endpoint) - async def _resolve_runtime_principal( self, ) -> tuple[str, str]: diff --git a/app/runtime/server/routes/foundry_iq_routes.py b/app/runtime/server/routes/foundry_iq_routes.py index def92be..6acc0bc 100644 --- a/app/runtime/server/routes/foundry_iq_routes.py +++ b/app/runtime/server/routes/foundry_iq_routes.py @@ -9,6 +9,7 @@ from aiohttp import web from ...services.cloud.azure import AzureCLI +from ...services.deployment.bicep_deployer import BicepDeployer, BicepDeployRequest from ...services.foundry_iq import ( delete_index, ensure_index, @@ -40,6 +41,7 @@ def __init__( self._store = config_store self._az = az self._deploy_store = deploy_store + self._bicep = BicepDeployer(az, deploy_store) if az and deploy_store else None def register(self, router: web.UrlDispatcher) -> None: router.add_get("/api/foundry-iq/config", self._get_config) @@ -105,7 +107,7 @@ async def _search(self, req: web.Request) -> web.Response: return web.json_response(result) async def _provision(self, req: web.Request) -> web.Response: - if not self._az: + if not self._bicep: return _no_az() if self._store.is_provisioned: return web.json_response({ @@ -122,74 +124,89 @@ async def _provision(self, req: web.Request) -> web.Response: location = body.get("location", "eastus").strip() rg = body.get("resource_group", "").strip() or _DEFAULT_FIQ_RG - search_name = ( - body.get("search_name", "").strip() - or f"polyclaw-search-{_secrets.token_hex(4)}" - ) - openai_name = ( - body.get("openai_name", "").strip() - or f"polyclaw-aoai-{_secrets.token_hex(4)}" - ) embedding_model = body.get("embedding_model", "text-embedding-3-large").strip() embedding_dimensions = int(body.get("embedding_dimensions", 3072)) - steps: list[dict[str, Any]] = [] - - if not await self._ensure_rg(rg, location, steps): - return _fail_response(steps) - - search_result = await self._create_search(rg, location, search_name, steps) - if not search_result: - return _fail_response(steps) - search_endpoint, search_key = search_result - - openai_result = await self._create_openai(rg, location, openai_name, steps) - if not openai_result: - return _fail_response(steps) - openai_endpoint, openai_key = openai_result - - deployment_name = await self._deploy_model( - rg, openai_name, embedding_model, steps + bicep_req = BicepDeployRequest( + resource_group=rg, + location=location, + deploy_foundry=False, + deploy_key_vault=False, + deploy_search=True, + deploy_embedding_aoai=True, + embedding_model_name=embedding_model, ) - if not deployment_name: - return _fail_response(steps) + result = await run_sync(self._bicep.deploy, bicep_req) + + if not result.ok: + return _fail_response(result.steps) + + # Retrieve the search admin key (not available as a Bicep output) + search_key = "" + if result.search_name and self._az: + keys = await run_sync( + self._az.json, + "search", "admin-key", "show", + "--service-name", result.search_name, + "--resource-group", rg, + ) + search_key = keys.get("primaryKey", "") if isinstance(keys, dict) else "" + result.steps.append({ + "step": "search_key", + "status": "ok" if search_key else "warning", + "detail": "Key retrieved" if search_key else "Key unavailable", + }) + + # Retrieve the AOAI key (fallback; prefer Entra ID) + aoai_key = "" + if result.embedding_aoai_name and self._az: + aoai_keys = await run_sync( + self._az.json, + "cognitiveservices", "account", "keys", "list", + "--name", result.embedding_aoai_name, + "--resource-group", rg, + ) + aoai_key = aoai_keys.get("key1", "") if isinstance(aoai_keys, dict) else "" self._store.save( resource_group=rg, location=location, - search_resource_name=search_name, - openai_resource_name=openai_name, - openai_deployment_name=deployment_name, - search_endpoint=search_endpoint, + search_resource_name=result.search_name, + openai_resource_name=result.embedding_aoai_name, + openai_deployment_name=result.embedding_deployment_name, + search_endpoint=result.search_endpoint, search_api_key=search_key, - embedding_endpoint=openai_endpoint, - embedding_api_key=openai_key, - embedding_model=deployment_name, + embedding_endpoint=result.embedding_aoai_endpoint, + embedding_api_key=aoai_key, + embedding_model=result.embedding_deployment_name, embedding_dimensions=embedding_dimensions, index_name="polyclaw-memories", provisioned=True, enabled=True, ) - steps.append({"step": "save_config", "status": "ok", "detail": "Saved"}) + result.steps.append({"step": "save_config", "status": "ok", "detail": "Saved"}) try: idx_result = await run_sync(ensure_index, self._store) idx_ok = idx_result.get("status") == "ok" - steps.append({ + result.steps.append({ "step": "create_index", "status": "ok" if idx_ok else "failed", "detail": idx_result.get("detail", ""), }) except Exception as exc: - steps.append({ + result.steps.append({ "step": "create_index", "status": "failed", "detail": str(exc)[:200] }) - logger.info("Foundry IQ provisioned: search=%s, openai=%s", search_name, openai_name) + logger.info( + "Foundry IQ provisioned (Bicep): search=%s, aoai=%s", + result.search_name, result.embedding_aoai_name, + ) return web.json_response({ "status": "ok", "message": f"Foundry IQ provisioned in {rg}", - "steps": steps, + "steps": result.steps, "config": self._store.to_safe_dict(), }) @@ -263,177 +280,4 @@ async def _decommission(self, _req: web.Request) -> web.Response: logger.info("Foundry IQ decommissioned: %s, %s", search_name, openai_name) return web.json_response({ "status": "ok", "message": "Resources removed", "steps": steps - }) - - # -- internal helpers -- - - async def _ensure_rg( - self, rg: str, location: str, steps: list[dict[str, Any]] - ) -> bool: - existing = await run_sync(self._az.json, "group", "show", "--name", rg) - if existing: - steps.append({ - "step": "resource_group", "status": "ok", "detail": f"{rg} (existing)" - }) - return True - - tag_args: list[str] = [] - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - tag_args = ["--tags", f"polyclaw_deploy={rec.tag}"] - - result = await run_sync( - self._az.json, - "group", "create", "--name", rg, "--location", location, *tag_args, - ) - ok = bool(result) - steps.append({ - "step": "resource_group", - "status": "ok" if ok else "failed", - "detail": rg if ok else (self._az.last_stderr or "Unknown error"), - }) - if ok and self._deploy_store: - rec = self._deploy_store.current_local() - if rec and rg not in rec.resource_groups: - rec.resource_groups.append(rg) - self._deploy_store.update(rec) - return ok - - async def _create_search( - self, rg: str, location: str, name: str, steps: list[dict[str, Any]] - ) -> tuple[str, str] | None: - result = await run_sync( - self._az.json, - "search", "service", "create", - "--name", name, "--resource-group", rg, - "--location", location, "--sku", "basic", - "--partition-count", "1", "--replica-count", "1", - ) - if not result or not isinstance(result, dict): - steps.append({ - "step": "create_search", "status": "failed", - "detail": (self._az.last_stderr or "Unknown")[:300], - }) - return None - - host_name = result.get("hostName") or f"{name}.search.windows.net" - endpoint = f"https://{host_name}" - steps.append({ - "step": "create_search", "status": "ok", "detail": f"{name} ({endpoint})" - }) - - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - rec.add_resource( - resource_type="search", resource_group=rg, - resource_name=name, purpose="Foundry IQ - Azure AI Search", - resource_id=result.get("id", ""), - ) - self._deploy_store.update(rec) - - keys = await run_sync( - self._az.json, - "search", "admin-key", "show", - "--service-name", name, "--resource-group", rg, - ) - admin_key = keys.get("primaryKey", "") if isinstance(keys, dict) else "" - if not admin_key: - steps.append({ - "step": "search_key", "status": "failed", - "detail": (self._az.last_stderr or "Key empty")[:300], - }) - return None - steps.append({"step": "search_key", "status": "ok", "detail": "Key retrieved"}) - return endpoint, admin_key - - async def _create_openai( - self, rg: str, location: str, name: str, steps: list[dict[str, Any]] - ) -> tuple[str, str] | None: - result = await run_sync( - self._az.json, - "cognitiveservices", "account", "create", - "--name", name, "--resource-group", rg, - "--location", location, "--kind", "OpenAI", - "--sku", "S0", "--custom-domain", name, - ) - if not result or not isinstance(result, dict): - steps.append({ - "step": "create_openai", "status": "failed", - "detail": (self._az.last_stderr or "Unknown")[:300], - }) - return None - - steps.append({ - "step": "create_openai", "status": "ok", "detail": f"{name} created" - }) - - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - rec.add_resource( - resource_type="cognitiveservices", resource_group=rg, - resource_name=name, purpose="Foundry IQ - Azure OpenAI", - resource_id=result.get("id", ""), - ) - self._deploy_store.update(rec) - - info = await run_sync( - self._az.json, - "cognitiveservices", "account", "show", - "--name", name, "--resource-group", rg, - ) - endpoint = "" - if isinstance(info, dict): - endpoint = info.get("properties", {}).get("endpoint", "") - if not endpoint: - endpoint = f"https://{name}.openai.azure.com/" - - aoai_keys = await run_sync( - self._az.json, - "cognitiveservices", "account", "keys", "list", - "--name", name, "--resource-group", rg, - ) - api_key = aoai_keys.get("key1", "") if isinstance(aoai_keys, dict) else "" - if api_key: - steps.append({"step": "openai_key", "status": "ok", "detail": "Key retrieved"}) - else: - steps.append({ - "step": "openai_key", "status": "ok", - "detail": "Key-based auth disabled; will use Entra ID", - }) - return endpoint, api_key - - async def _deploy_model( - self, rg: str, account: str, model: str, steps: list[dict[str, Any]] - ) -> str | None: - deployment_name = model - result = await run_sync( - self._az.json, - "cognitiveservices", "account", "deployment", "create", - "--name", account, "--resource-group", rg, - "--deployment-name", deployment_name, - "--model-name", model, "--model-version", "1", - "--model-format", "OpenAI", - "--sku-capacity", "1", "--sku-name", "Standard", - ) - if result is None: - err = self._az.last_stderr or "" - if "already exists" in err.lower() or "conflict" in err.lower(): - steps.append({ - "step": "deploy_model", "status": "ok", - "detail": f"{deployment_name} (already exists)", - }) - return deployment_name - steps.append({ - "step": "deploy_model", "status": "failed", "detail": err[:300] - }) - return None - - steps.append({ - "step": "deploy_model", "status": "ok", - "detail": f"{deployment_name} deployed", - }) - return deployment_name - + }) \ No newline at end of file diff --git a/app/runtime/server/routes/identity_routes.py b/app/runtime/server/routes/identity_routes.py index 53dd9e1..761e5ab 100644 --- a/app/runtime/server/routes/identity_routes.py +++ b/app/runtime/server/routes/identity_routes.py @@ -46,6 +46,18 @@ "role": "Key Vault Secrets Officer", "data_action": "", }, + { + "feature": "Foundry IQ (AI Search)", + "role": "Search Index Data Contributor", + "role_id": "8ece5a2f-6d1e-5eb5-8592-d2a6e1a458d6", + "data_action": "", + }, + { + "feature": "Foundry IQ (Embeddings)", + "role": "Cognitive Services OpenAI User", + "role_id": "5e0bd9bd-7b93-4f28-af87-19fc36ad61bd", + "data_action": "", + }, { "feature": "Sandbox / Code Interpreter", "role": "Azure ContainerApps Session Executor", @@ -153,6 +165,8 @@ async def _roles(self, _req: web.Request) -> web.Response: # Resolve expected session pool scope for scope-aware checking. session_pool_scope = self._resolve_session_pool_scope() + if not session_pool_scope: + session_pool_scope = await self._discover_session_pool() # Check which required roles are present. For the Session # Executor role we also verify that the assignment scope covers @@ -239,26 +253,29 @@ async def _fix_roles(self, req: web.Request) -> web.Response: if self._guardrails_store: cs_endpoint = self._guardrails_store.config.content_safety_endpoint + cs_resource_id = "" if cs_endpoint: - resource_id = await self._resolve_cs_resource(cs_endpoint) - if resource_id: - await self._assign_role( - principal_id, principal_type, - "a97b65f3-24c7-4388-baec-2e87135dc908", - resource_id, "Cognitive Services User", steps, - use_object_id=use_object_id, - ) - else: - steps.append({ - "step": "content_safety_rbac", - "status": "warning", - "detail": f"Cannot resolve resource for endpoint {cs_endpoint}", - }) + cs_resource_id = await self._resolve_cs_resource(cs_endpoint) + else: + # Discover from the resource group when no endpoint is stored. + cs_resource_id = await self._discover_cs_resource() + + if cs_resource_id: + await self._assign_role( + principal_id, principal_type, + "a97b65f3-24c7-4388-baec-2e87135dc908", + cs_resource_id, "Cognitive Services User", steps, + use_object_id=use_object_id, + ) else: steps.append({ "step": "content_safety_rbac", - "status": "skipped", - "detail": "No Content Safety endpoint configured", + "status": "warning" if cs_endpoint else "skipped", + "detail": ( + f"Cannot resolve resource for endpoint {cs_endpoint}" + if cs_endpoint + else "No Content Safety resource found in resource group" + ), }) # Fix Session Pool Executor role @@ -361,11 +378,14 @@ async def _fix_session_pool_role( f"/subscriptions/{sub_id}/resourceGroups/{rg}" f"/providers/Microsoft.App/sessionPools/{name}" ) + if not pool_id: + # Discover from the resource group. + pool_id = await self._discover_session_pool() if not pool_id: steps.append({ "step": "session_pool_rbac", "status": "skipped", - "detail": "No session pool configured", + "detail": "No session pool found in resource group", }) return @@ -429,45 +449,88 @@ async def _resolve_principal( async def _resolve_cs_resource(self, endpoint: str) -> str: """Find the ARM resource ID for a Content Safety endpoint. - First tries scoping to the configured resource group (fast). If - that yields nothing, falls back to a subscription-wide listing. + Extracts the resource name from the endpoint hostname and looks it + up via ``az resource list`` (fast) instead of the slow + ``az cognitiveservices account list``. Falls back to the + default-RG discovery helper when name extraction fails. """ assert self._az is not None - normalised = endpoint.rstrip("/").lower() - rg = _DEFAULT_RG - if rg: - accounts = await run_sync( + resource_name = "" + stripped = endpoint.rstrip("/").lower() + for prefix in ("https://", "http://"): + if stripped.startswith(prefix): + stripped = stripped[len(prefix):] + break + host = stripped.split("/")[0] + if ".cognitiveservices.azure.com" in host: + resource_name = host.split(".cognitiveservices.azure.com")[0] + + if resource_name: + # Subscription-wide lookup by name -- works across any RG. + resources = await run_sync( + self._az.json, + "resource", "list", + "--name", resource_name, + "--resource-type", "Microsoft.CognitiveServices/accounts", + "--query", "[].id", + ) + if isinstance(resources, list) and resources: + logger.info("[identity.resolve] CS resource: %s", resources[0]) + return resources[0] + + # Fallback to RG-scoped discovery + return await self._discover_cs_resource() + + async def _discover_cs_resource(self) -> str: + """Find a ContentSafety Cognitive Services account. + + Checks the configured / default resource group first, then falls + back to a subscription-wide search (covers dedicated service RGs). + """ + if not self._az: + return "" + rg = cfg.env.read("FOUNDRY_RESOURCE_GROUP") or _DEFAULT_RG + for rg_args in ( + ["--resource-group", rg], + [], # subscription-wide fallback + ): + resources = await run_sync( self._az.json, - "cognitiveservices", "account", "list", - "--resource-group", rg, + "resource", "list", + *rg_args, + "--resource-type", "Microsoft.CognitiveServices/accounts", + "--query", "[?kind=='ContentSafety'].id", ) - rid = self._match_cs_endpoint(accounts, normalised) - if rid: + if isinstance(resources, list) and resources: + rid = resources[0] + logger.info("[identity.discover] found CS resource: %s", rid) return rid + return "" - # Fallback: subscription-wide (slower) - accounts = await run_sync( - self._az.json, "cognitiveservices", "account", "list", - ) - return self._match_cs_endpoint(accounts, normalised) + async def _discover_session_pool(self) -> str: + """Find a session pool ARM id. - @staticmethod - def _match_cs_endpoint( - accounts: list[Any] | dict[str, Any] | None, - normalised: str, - ) -> str: - """Return the ARM resource ID whose endpoint matches *normalised*.""" - if not isinstance(accounts, list): + Checks the default RG first, then falls back to subscription-wide. + """ + if not self._az: return "" - for acct in accounts: - if not isinstance(acct, dict): - continue - acct_ep = ( - acct.get("properties", {}).get("endpoint", "") - ).rstrip("/").lower() - if acct_ep == normalised: - return acct.get("id", "") + rg = cfg.env.read("FOUNDRY_RESOURCE_GROUP") or _DEFAULT_RG + for rg_args in ( + ["--resource-group", rg], + [], # subscription-wide fallback + ): + resources = await run_sync( + self._az.json, + "resource", "list", + *rg_args, + "--resource-type", "Microsoft.App/sessionPools", + "--query", "[].id", + ) + if isinstance(resources, list) and resources: + rid = resources[0] + logger.info("[identity.discover] found session pool: %s", rid) + return rid return "" async def _assign_role( diff --git a/app/runtime/server/routes/monitoring_routes.py b/app/runtime/server/routes/monitoring_routes.py index eec581e..ec74dfc 100644 --- a/app/runtime/server/routes/monitoring_routes.py +++ b/app/runtime/server/routes/monitoring_routes.py @@ -9,6 +9,7 @@ from aiohttp import web from ...services.cloud.azure import AzureCLI +from ...services.deployment.bicep_deployer import BicepDeployer, BicepDeployRequest from ...services.otel import configure_otel, get_status, is_active, shutdown_otel from ...state.deploy_state import DeployStateStore from ...state.monitoring_config import MonitoringConfigStore @@ -32,6 +33,7 @@ def __init__( self._store = store self._az = az self._deploy_store = deploy_store + self._bicep = BicepDeployer(az, deploy_store) if az and deploy_store else None def register(self, router: web.UrlDispatcher) -> None: router.add_get("/api/monitoring/config", self._get_config) @@ -151,10 +153,7 @@ async def _test_connection(self, req: web.Request) -> web.Response: # ------------------------------------------------------------------ async def _provision(self, req: web.Request) -> web.Response: - """Provision a Log Analytics workspace + Application Insights resource.""" - if not self._az: - return _no_az() - + """Provision Log Analytics + Application Insights via the central Bicep template.""" if self._store.is_provisioned: return web.json_response({ "status": "ok", @@ -163,6 +162,9 @@ async def _provision(self, req: web.Request) -> web.Response: **self._store.to_dict(), }) + if not self._bicep: + return _no_az() + try: body = await req.json() if req.can_read_body else {} except Exception: @@ -170,81 +172,50 @@ async def _provision(self, req: web.Request) -> web.Response: location = body.get("location", "eastus").strip() rg = body.get("resource_group", "").strip() or _DEFAULT_MONITORING_RG - suffix = _secrets.token_hex(4) - ai_name = body.get("app_insights_name", "").strip() or f"polyclaw-insights-{suffix}" - ws_name = body.get("workspace_name", "").strip() or f"polyclaw-logs-{suffix}" - - steps: list[dict[str, Any]] = [] - # 1. Ensure the application-insights CLI extension is installed - if not await self._ensure_extension(steps): - return _fail_response(steps) - - # 2. Ensure resource group - if not await self._ensure_rg(rg, location, steps): - return _fail_response(steps) - - # 3. Create Log Analytics workspace - ws_id = await self._create_workspace(rg, location, ws_name, steps) - if not ws_id: - return _fail_response(steps) + bicep_req = BicepDeployRequest( + resource_group=rg, + location=location, + deploy_foundry=False, + deploy_key_vault=False, + deploy_monitoring=True, + ) + result = await run_sync(self._bicep.deploy, bicep_req) - # 4. Create Application Insights component linked to the workspace - cs = await self._create_app_insights(rg, location, ai_name, ws_id, steps) - if not cs: - return _fail_response(steps) + if not result.ok or not result.app_insights_connection_string: + return _fail_response(result.steps) - # 5. Persist metadata and enable OTel + # Persist metadata and enable OTel sub_id = "" if self._az: account = self._az.account_info() sub_id = account.get("id", "") if account else "" self._store.set_provisioned_metadata( - app_insights_name=ai_name, - workspace_name=ws_name, + app_insights_name=result.app_insights_name, + workspace_name=result.log_analytics_workspace_name, resource_group=rg, location=location, - connection_string=cs, + connection_string=result.app_insights_connection_string, subscription_id=sub_id, ) - steps.append({"step": "save_config", "status": "ok", "detail": "Configuration saved"}) - - # Register resources in deploy state - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - rec.add_resource( - resource_type="log_analytics_workspace", - resource_group=rg, - resource_name=ws_name, - purpose="Monitoring Log Analytics workspace", - ) - rec.add_resource( - resource_type="app_insights", - resource_group=rg, - resource_name=ai_name, - purpose="Application Insights for OTel telemetry", - ) - if rg not in rec.resource_groups: - rec.resource_groups.append(rg) - self._deploy_store.update(rec) + result.steps.append({"step": "save_config", "status": "ok", "detail": "Configuration saved"}) - # 6. Activate OTel immediately + # Activate OTel immediately configure_otel( - cs, + result.app_insights_connection_string, sampling_ratio=self._store.config.sampling_ratio, enable_live_metrics=self._store.config.enable_live_metrics, ) - steps.append({"step": "otel_bootstrap", "status": "ok", "detail": "OTel configured"}) + result.steps.append({"step": "otel_bootstrap", "status": "ok", "detail": "OTel configured"}) logger.info( - "[monitoring.provision] App Insights '%s' provisioned (rg=%s)", - ai_name, rg, + "[monitoring.provision] App Insights '%s' provisioned via Bicep (rg=%s)", + result.app_insights_name, rg, ) return web.json_response({ "status": "ok", - "message": f"Application Insights '{ai_name}' provisioned and monitoring enabled.", - "steps": steps, + "message": f"Application Insights '{result.app_insights_name}' provisioned and monitoring enabled.", + "steps": result.steps, **self._store.to_dict(), }) @@ -326,114 +297,3 @@ async def _decommission(self, _req: web.Request) -> web.Response: "steps": steps, **self._store.to_dict(), }) - - # -- internal helpers -- - - async def _ensure_extension(self, steps: list[dict[str, Any]]) -> bool: - """Ensure the ``application-insights`` CLI extension is installed.""" - ok, msg = await run_sync( - self._az.ok, - "extension", "add", "--name", "application-insights", "--yes", - ) - steps.append({ - "step": "cli_extension", - "status": "ok" if ok else "failed", - "detail": "application-insights extension ready" if ok else (msg or "Unknown error"), - }) - return ok - - async def _ensure_rg( - self, rg: str, location: str, steps: list[dict[str, Any]] - ) -> bool: - existing = await run_sync(self._az.json, "group", "show", "--name", rg) - if existing: - steps.append({"step": "resource_group", "status": "ok", "detail": f"{rg} (existing)"}) - return True - - tag_args: list[str] = [] - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - tag_args = ["--tags", f"polyclaw_deploy={rec.tag}"] - - result = await run_sync( - self._az.json, - "group", "create", "--name", rg, "--location", location, *tag_args, - ) - ok = bool(result) - steps.append({ - "step": "resource_group", - "status": "ok" if ok else "failed", - "detail": rg if ok else (self._az.last_stderr or "Unknown error"), - }) - if ok and self._deploy_store: - rec = self._deploy_store.current_local() - if rec and rg not in rec.resource_groups: - rec.resource_groups.append(rg) - self._deploy_store.update(rec) - return ok - - async def _create_workspace( - self, - rg: str, - location: str, - ws_name: str, - steps: list[dict[str, Any]], - ) -> str | None: - """Create a Log Analytics workspace. Returns the workspace resource ID.""" - logger.info("[monitoring.provision] Creating Log Analytics workspace '%s'...", ws_name) - result = await run_sync( - self._az.json, - "monitor", "log-analytics", "workspace", "create", - "--workspace-name", ws_name, "--resource-group", rg, "--location", location, - ) - if not result or not isinstance(result, dict): - err = self._az.last_stderr or "Unknown error" - steps.append({"step": "create_workspace", "status": "failed", "detail": err[:300]}) - return None - - ws_id = result.get("id", "") - steps.append({ - "step": "create_workspace", "status": "ok", - "detail": f"{ws_name} created", - }) - return ws_id - - async def _create_app_insights( - self, - rg: str, - location: str, - ai_name: str, - ws_id: str, - steps: list[dict[str, Any]], - ) -> str | None: - """Create an Application Insights component. Returns the connection string.""" - logger.info("[monitoring.provision] Creating Application Insights '%s'...", ai_name) - result = await run_sync( - self._az.json, - "monitor", "app-insights", "component", "create", - "--app", ai_name, - "--location", location, - "--resource-group", rg, - "--workspace", ws_id, - "--application-type", "web", - ) - if not result or not isinstance(result, dict): - err = self._az.last_stderr or "Unknown error" - steps.append({"step": "create_app_insights", "status": "failed", "detail": err[:300]}) - return None - - cs = result.get("connectionString", "") - if not cs: - steps.append({ - "step": "create_app_insights", "status": "failed", - "detail": "Resource created but connectionString not found in response", - }) - return None - - steps.append({ - "step": "create_app_insights", "status": "ok", - "detail": f"{ai_name} created", - }) - return cs - diff --git a/app/runtime/server/routes/network_topology.py b/app/runtime/server/routes/network_topology.py index 1c2157e..b96c826 100644 --- a/app/runtime/server/routes/network_topology.py +++ b/app/runtime/server/routes/network_topology.py @@ -94,12 +94,12 @@ def build_components( "status": "configured", }) - # GitHub Copilot (model backend) - if cfg.github_token: + # Foundry (model backend) + if cfg.foundry_endpoint: components.append({ - "name": "GitHub Copilot", + "name": "Foundry", "type": "ai", - "endpoint": "https://api.githubcopilot.com", + "endpoint": cfg.foundry_endpoint, "model": cfg.copilot_model, "status": "configured", }) diff --git a/app/runtime/server/routes/sandbox_routes.py b/app/runtime/server/routes/sandbox_routes.py index 65dc998..0c93ab3 100644 --- a/app/runtime/server/routes/sandbox_routes.py +++ b/app/runtime/server/routes/sandbox_routes.py @@ -10,6 +10,7 @@ from ...sandbox import SandboxExecutor from ...services.cloud.azure import AzureCLI +from ...services.deployment.bicep_deployer import BicepDeployer, BicepDeployRequest from ...state.deploy_state import DeployStateStore from ...state.sandbox_config import BLACKLIST, DEFAULT_WHITELIST, SandboxConfigStore from ...util.async_helpers import run_sync @@ -34,6 +35,7 @@ def __init__( self._executor = executor self._az = az self._deploy_store = deploy_store + self._bicep = BicepDeployer(az, deploy_store) if az and deploy_store else None def register(self, router: web.UrlDispatcher) -> None: router.add_get("/api/sandbox/config", self.get_config) @@ -118,7 +120,7 @@ async def test_sandbox(self, req: web.Request) -> web.Response: ) async def provision_pool(self, req: web.Request) -> web.Response: - if not self._az: + if not self._bicep: return _no_az() try: body = await req.json() if req.can_read_body else {} @@ -127,12 +129,6 @@ async def provision_pool(self, req: web.Request) -> web.Response: location = body.get("location", "eastus").strip() rg = body.get("resource_group", "").strip() or _DEFAULT_SANDBOX_RG - pool_name = ( - body.get("pool_name", "").strip() - or f"polyclaw-sandbox-{_secrets.token_hex(4)}" - ) - - steps: list[dict[str, Any]] = [] if self._store.is_provisioned: return web.json_response({ @@ -143,30 +139,34 @@ async def provision_pool(self, req: web.Request) -> web.Response: "is_provisioned": True, }) - if not await self._ensure_rg(rg, location, steps): - return _fail_response(steps) + bicep_req = BicepDeployRequest( + resource_group=rg, + location=location, + deploy_foundry=False, + deploy_key_vault=False, + deploy_session_pool=True, + ) + result = await run_sync(self._bicep.deploy, bicep_req) - pool_result = await self._create_pool(rg, location, pool_name, steps) - if not pool_result: - return _fail_response(steps) + if not result.ok or not result.session_pool_endpoint: + return _fail_response(result.steps) - endpoint, pool_id = pool_result self._store.set_pool_metadata( resource_group=rg, location=location, - pool_name=pool_name, - pool_id=pool_id, - endpoint=endpoint, + pool_name=result.session_pool_name, + pool_id=result.session_pool_id, + endpoint=result.session_pool_endpoint, ) - steps.append({ + result.steps.append({ "step": "save_config", "status": "ok", "detail": "Configuration saved" }) - logger.info("Sandbox pool provisioned: %s (rg=%s)", pool_name, rg) + logger.info("Sandbox pool provisioned (Bicep): %s (rg=%s)", result.session_pool_name, rg) return web.json_response({ "status": "ok", - "message": f"Session pool '{pool_name}' provisioned", - "steps": steps, + "message": f"Session pool '{result.session_pool_name}' provisioned", + "steps": result.steps, **self._store.to_dict(), "is_provisioned": True, }) @@ -229,89 +229,3 @@ async def remove_pool(self, _req: web.Request) -> web.Response: **self._store.to_dict(), "is_provisioned": False, }) - - # -- internal helpers -- - - async def _ensure_rg( - self, rg: str, location: str, steps: list[dict[str, Any]] - ) -> bool: - existing = await run_sync(self._az.json, "group", "show", "--name", rg) - if existing: - steps.append({ - "step": "resource_group", "status": "ok", "detail": f"{rg} (existing)" - }) - return True - - tag_args: list[str] = [] - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - tag_args = ["--tags", f"polyclaw_deploy={rec.tag}"] - - result = await run_sync( - self._az.json, - "group", "create", "--name", rg, "--location", location, *tag_args, - ) - ok = bool(result) - steps.append({ - "step": "resource_group", - "status": "ok" if ok else "failed", - "detail": rg if ok else (self._az.last_stderr or "Unknown error"), - }) - if ok and self._deploy_store: - rec = self._deploy_store.current_local() - if rec and rg not in rec.resource_groups: - rec.resource_groups.append(rg) - self._deploy_store.update(rec) - return ok - - async def _create_pool( - self, - rg: str, - location: str, - pool_name: str, - steps: list[dict[str, Any]], - ) -> tuple[str, str] | None: - logger.info("Creating session pool '%s' in rg '%s'...", pool_name, rg) - result = await run_sync( - self._az.json, - "containerapp", "sessionpool", "create", - "--name", pool_name, "--resource-group", rg, - "--location", location, "--container-type", "PythonLTS", - "--cooldown-period", "300", - ) - if not result or not isinstance(result, dict): - err = self._az.last_stderr or "Unknown error" - steps.append({ - "step": "create_pool", "status": "failed", "detail": err[:300] - }) - return None - - props = result.get("properties", {}) - endpoint = props.get("poolManagementEndpoint", "") - pool_id = result.get("id", "") - if not endpoint: - endpoint = ( - f"https://{location}.dynamicsessions.io" - f"/subscriptions/pools/{pool_name}" - ) - - steps.append({ - "step": "create_pool", "status": "ok", - "detail": f"{pool_name} -> {endpoint}", - }) - - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - rec.add_resource( - resource_type="session_pool", - resource_group=rg, - resource_name=pool_name, - purpose="Agent sandbox session pool", - resource_id=pool_id, - ) - self._deploy_store.update(rec) - - return endpoint, pool_id - diff --git a/app/runtime/server/routes/skill_routes.py b/app/runtime/server/routes/skill_routes.py index 4e786e3..7bb8d25 100644 --- a/app/runtime/server/routes/skill_routes.py +++ b/app/runtime/server/routes/skill_routes.py @@ -106,14 +106,12 @@ async def _marketplace(self, req: web.Request) -> web.Response: result["rate_limit_warning"] = ( f"GitHub API rate limit exceeded for your IP. " f"Catalog results may be incomplete. " - f"Rate limit resets in ~{wait_min} min. " - f"Set a GITHUB_TOKEN for a higher limit." + f"Rate limit resets in ~{wait_min} min." ) else: result["rate_limit_warning"] = ( "GitHub API rate limit exceeded for your IP. " - "Catalog results may be incomplete. " - "Set a GITHUB_TOKEN for a higher limit." + "Catalog results may be incomplete." ) return web.json_response(result) diff --git a/app/runtime/server/setup/__init__.py b/app/runtime/server/setup/__init__.py index f10a8b5..05a1de6 100644 --- a/app/runtime/server/setup/__init__.py +++ b/app/runtime/server/setup/__init__.py @@ -1,10 +1,11 @@ -"""Setup wizard -- Azure, deployment, voice, prerequisites, and preflight.""" +"""Setup wizard -- Azure, Foundry deployment, voice, prerequisites, and preflight.""" from __future__ import annotations from ._routes import SetupRoutes from .azure import AzureSetupRoutes from .deploy import DeploymentRoutes +from .foundry import FoundryDeployRoutes from .preflight import PreflightRoutes from .prerequisites import PrerequisitesRoutes from .voice import VoiceSetupRoutes @@ -12,6 +13,7 @@ __all__ = [ "AzureSetupRoutes", "DeploymentRoutes", + "FoundryDeployRoutes", "PreflightRoutes", "PrerequisitesRoutes", "SetupRoutes", diff --git a/app/runtime/server/setup/_routes.py b/app/runtime/server/setup/_routes.py index 8565db3..c617399 100644 --- a/app/runtime/server/setup/_routes.py +++ b/app/runtime/server/setup/_routes.py @@ -4,6 +4,7 @@ import logging import os +import subprocess from collections.abc import Callable import aiohttp as _aiohttp @@ -21,6 +22,7 @@ from .azure import AzureSetupRoutes from ._helpers import error_response as _error, ok_response as _ok from .deploy import DeploymentRoutes +from .foundry import FoundryDeployRoutes from .preflight import PreflightRoutes from .prerequisites import PrerequisitesRoutes from .voice import VoiceSetupRoutes @@ -57,6 +59,11 @@ def __init__( self._voice_routes = VoiceSetupRoutes(az, infra_store) self._prerequisites_routes = PrerequisitesRoutes(az, infra_store, deploy_store) self._preflight_routes = PreflightRoutes(tunnel, infra_store, az=az) + self._foundry_routes = FoundryDeployRoutes( + az=az, + deploy_store=deploy_store, + restart_runtime=self._restart_runtime, + ) self._deployment_routes = DeploymentRoutes( az=az, provisioner=provisioner, @@ -89,6 +96,7 @@ def register(self, router: web.UrlDispatcher) -> None: r.add_get("/api/setup/config", self.get_config) r.add_post("/api/setup/config", self.save_config) self._preflight_routes.register(r) + self._foundry_routes.register(r) self._deployment_routes.register(r) # -- Status -- @@ -97,18 +105,26 @@ async def status(self, _req: web.Request) -> web.Response: from ..tunnel_status import resolve_tunnel_info account = self._az.account_info() - copilot = self._gh.status() kv_url = cfg.env.read("KEY_VAULT_URL") or "" tunnel_info = await resolve_tunnel_info(self._tunnel, self._az) + logged_in = account is not None + needs_subscription = bool(account and account.get("_no_default_subscription")) + return web.json_response({ "azure": { - "logged_in": account is not None, - "user": account.get("user", {}).get("name") if account else None, - "subscription": account.get("name") if account else None, - "subscription_id": account.get("id") if account else None, + "logged_in": logged_in, + "needs_subscription": needs_subscription, + "user": account.get("user", {}).get("name") if account and not needs_subscription else None, + "subscription": account.get("name") if account and not needs_subscription else None, + "subscription_id": account.get("id") if account and not needs_subscription else None, + }, + "foundry": { + "deployed": bool(cfg.foundry_endpoint), + "endpoint": cfg.foundry_endpoint, + "name": cfg.foundry_name, + "resource_group": cfg.foundry_resource_group, }, - "copilot": copilot, "tunnel": tunnel_info, "lockdown_mode": cfg.lockdown_mode, "prerequisites_configured": bool(kv_url), @@ -125,12 +141,6 @@ async def status(self, _req: web.Request) -> web.Response: async def copilot_status(self, _req: web.Request) -> web.Response: info = self._gh.status() - if info.get("authenticated") and not cfg.github_token: - token = self._gh.extract_token() - if token: - cfg.write_env(GITHUB_TOKEN=token) - logger.info("[setup.copilot] persisted GITHUB_TOKEN from gh CLI session") - await self._restart_runtime() return web.json_response(info) async def copilot_login(self, _req: web.Request) -> web.Response: @@ -140,20 +150,65 @@ async def copilot_login(self, _req: web.Request) -> web.Response: ) async def copilot_set_token(self, req: web.Request) -> web.Response: - body = await req.json() - token = body.get("token", "").strip() - if not token: - return _error("Token is required", 400) - cfg.write_env(GITHUB_TOKEN=token) - await self._restart_runtime() - return _ok("GitHub token saved") + return _error("GitHub token is no longer used. Configure FOUNDRY_ENDPOINT instead.", 410) async def _restart_runtime(self) -> None: - """Signal the runtime container to reload configuration.""" + """Restart or reload the runtime container. + + Docker mode: full ``docker restart`` so the entrypoint re-runs + ``az login --service-principal`` with the SP credentials from + ``/data/.env``. A soft reload cannot replicate this. + + Non-Docker: HTTP POST to ``/api/internal/reload``. + """ runtime_url = os.getenv("RUNTIME_URL", "") if not runtime_url or cfg.server_mode == ServerMode.combined: return + # Docker mode -- hard restart (re-runs entrypoint + az login) + # Falls back to ``docker compose up -d runtime`` when the container + # does not exist yet (first provision). + if os.getenv("POLYCLAW_CONTAINER") == "1": + try: + proc = await run_sync( + subprocess.run, + ["docker", "restart", "polyclaw-runtime"], + capture_output=True, text=True, timeout=60, + ) + if proc.returncode == 0: + logger.info("[setup.restart_runtime] docker restart succeeded") + else: + stderr = proc.stderr.strip() + logger.warning( + "[setup.restart_runtime] docker restart failed: %s", stderr, + ) + # Container doesn't exist yet -- start it via compose + if "No such container" in stderr or "not found" in stderr.lower(): + logger.info( + "[setup.restart_runtime] container missing, " + "attempting docker compose up -d runtime", + ) + up = await run_sync( + subprocess.run, + ["docker", "compose", "up", "-d", "runtime"], + capture_output=True, text=True, timeout=120, + ) + if up.returncode == 0: + logger.info( + "[setup.restart_runtime] compose up succeeded", + ) + else: + logger.warning( + "[setup.restart_runtime] compose up failed: %s", + up.stderr.strip(), + ) + except Exception as exc: + logger.warning( + "[setup.restart_runtime] docker restart error: %s", + exc, exc_info=True, + ) + return + url = f"{runtime_url.rstrip('/')}/api/internal/reload" headers: dict[str, str] = {} if cfg.admin_secret: @@ -356,7 +411,6 @@ async def get_config(self, _req: web.Request) -> web.Response: raw = { "COPILOT_MODEL": cfg.env.read("COPILOT_MODEL") or cfg.copilot_model, "BOT_PORT": cfg.env.read("BOT_PORT") or str(cfg.bot_port), - "GITHUB_TOKEN": cfg.env.read("GITHUB_TOKEN"), } for key in raw: if key in SECRET_ENV_KEYS and raw[key]: @@ -366,7 +420,6 @@ async def get_config(self, _req: web.Request) -> web.Response: _ALLOWED_CONFIG_KEYS: frozenset[str] = frozenset({ "COPILOT_MODEL", "BOT_PORT", - "GITHUB_TOKEN", }) async def save_config(self, req: web.Request) -> web.Response: diff --git a/app/runtime/server/setup/azure.py b/app/runtime/server/setup/azure.py index 12a1e6a..66e1dea 100644 --- a/app/runtime/server/setup/azure.py +++ b/app/runtime/server/setup/azure.py @@ -28,23 +28,30 @@ def register(self, router: web.UrlDispatcher) -> None: async def azure_login(self, _req: web.Request) -> web.Response: account = self._az.account_info() - if account: + if account and not account.get("_no_default_subscription"): return web.json_response({ "status": "already_logged_in", "user": account.get("user", {}).get("name"), "subscription": account.get("name"), }) + if account and account.get("_no_default_subscription"): + return web.json_response({ + "status": "needs_subscription", + "message": "Logged in but no default subscription. Please select one.", + }) info = self._az.login_device_code() return web.json_response({"status": "device_code_pending", **info}) async def azure_check(self, _req: web.Request) -> web.Response: account = self._az.account_info() - if account: + if account and not account.get("_no_default_subscription"): return web.json_response({ "status": "logged_in", "user": account.get("user", {}).get("name"), "subscription": account.get("name"), }) + if account and account.get("_no_default_subscription"): + return web.json_response({"status": "needs_subscription"}) return web.json_response({"status": "pending"}) async def azure_logout(self, _req: web.Request) -> web.Response: @@ -53,16 +60,8 @@ async def azure_logout(self, _req: web.Request) -> web.Response: return _ok(msg) if ok else _error(msg) async def list_subscriptions(self, _req: web.Request) -> web.Response: - subs = self._az.json("account", "list") or [] - return web.json_response([ - { - "id": s.get("id", ""), - "name": s.get("name", ""), - "is_default": s.get("isDefault", False), - "state": s.get("state", ""), - } - for s in (subs if isinstance(subs, list) else []) - ]) + subs = self._az.list_subscriptions() + return web.json_response(subs) async def set_subscription(self, req: web.Request) -> web.Response: body = await req.json() @@ -70,7 +69,7 @@ async def set_subscription(self, req: web.Request) -> web.Response: if not sub_id: return _error("subscription_id is required", 400) ok, msg = self._az.ok("account", "set", "--subscription", sub_id) - self._az.invalidate_cache("account", "show") + self._az.invalidate_cache() return _ok(f"Subscription set to {sub_id}") if ok else _error(f"Failed: {msg}") async def list_resource_groups(self, _req: web.Request) -> web.Response: diff --git a/app/runtime/server/setup/foundry.py b/app/runtime/server/setup/foundry.py new file mode 100644 index 0000000..1de40b1 --- /dev/null +++ b/app/runtime/server/setup/foundry.py @@ -0,0 +1,190 @@ +"""Foundry infrastructure routes -- /api/setup/foundry/*. + +Single entry point for all infrastructure provisioning via Bicep template. +Replaces the scattered ``az`` CLI provisioning with one clean deployment. +""" + +from __future__ import annotations + +import asyncio +import json +import logging +from typing import Any + +from aiohttp import web + +from ...config.settings import cfg +from ...services.cloud.azure import AzureCLI +from ...services.deployment.bicep_deployer import BicepDeployer, BicepDeployRequest +from ...state.deploy_state import DeployStateStore +from ...util.async_helpers import run_sync +from ._helpers import error_response as _error, ok_response as _ok + +logger = logging.getLogger(__name__) + + +class FoundryDeployRoutes: + """Handles Foundry infrastructure provisioning via Bicep.""" + + def __init__( + self, + az: AzureCLI, + deploy_store: DeployStateStore, + restart_runtime: Any = None, + ) -> None: + self._az = az + self._deployer = BicepDeployer(az, deploy_store) + self._restart_runtime = restart_runtime + + def register(self, router: web.UrlDispatcher) -> None: + router.add_get("/api/setup/foundry/status", self.foundry_status) + router.add_post("/api/setup/foundry/deploy", self.foundry_deploy) + router.add_get("/api/setup/foundry/deploy/stream", self.foundry_deploy_stream) + router.add_post("/api/setup/foundry/decommission", self.foundry_decommission) + + async def foundry_status(self, _req: web.Request) -> web.Response: + status = self._deployer.status() + return web.json_response(status) + + async def foundry_deploy(self, req: web.Request) -> web.Response: + body = await req.json() if req.can_read_body else {} + deploy_req = BicepDeployRequest( + resource_group=body.get("resource_group", "polyclaw-rg"), + location=body.get("location", "eastus"), + base_name=body.get("base_name", ""), + deploy_key_vault=body.get("deploy_key_vault", True), + deploy_acs=body.get("deploy_acs", False), + deploy_content_safety=body.get("deploy_content_safety", False), + deploy_search=body.get("deploy_search", False), + deploy_embedding_aoai=body.get("deploy_embedding_aoai", False), + deploy_monitoring=body.get("deploy_monitoring", False), + deploy_session_pool=body.get("deploy_session_pool", False), + ) + if body.get("models"): + deploy_req.models = body["models"] + + result = await run_sync(self._deployer.deploy, deploy_req) + + if result.ok and self._restart_runtime: + try: + await self._restart_runtime() + except Exception: + logger.warning("Failed to restart runtime after deploy", exc_info=True) + + return web.json_response({ + "status": "ok" if result.ok else "error", + "deploy_id": result.deploy_id, + "foundry_endpoint": result.foundry_endpoint, + "foundry_name": result.foundry_name, + "deployed_models": result.deployed_models, + "key_vault_url": result.key_vault_url, + "steps": result.steps, + "error": result.error, + }, status=200 if result.ok else 500) + + async def foundry_deploy_stream(self, req: web.Request) -> web.StreamResponse: + """SSE endpoint that streams deployment progress in real time. + + The deploy config is passed as query-string JSON (``?config={...}``). + Each step is sent as ``data: {json}\n\n``. A final ``event: done`` + message carries the full result. + """ + config_raw = req.query.get("config", "{}") + try: + body = json.loads(config_raw) + except json.JSONDecodeError: + body = {} + + deploy_req = BicepDeployRequest( + resource_group=body.get("resource_group", "polyclaw-rg"), + location=body.get("location", "eastus"), + base_name=body.get("base_name", ""), + deploy_key_vault=body.get("deploy_key_vault", True), + deploy_acs=body.get("deploy_acs", False), + deploy_content_safety=body.get("deploy_content_safety", False), + deploy_search=body.get("deploy_search", False), + deploy_embedding_aoai=body.get("deploy_embedding_aoai", False), + deploy_monitoring=body.get("deploy_monitoring", False), + deploy_session_pool=body.get("deploy_session_pool", False), + ) + if body.get("models"): + deploy_req.models = body["models"] + + resp = web.StreamResponse( + status=200, + reason="OK", + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + await resp.prepare(req) + + loop = asyncio.get_event_loop() + queue: asyncio.Queue[dict[str, str] | None] = asyncio.Queue() + + def _on_step(step: dict[str, str]) -> None: + loop.call_soon_threadsafe(queue.put_nowait, step) + + async def _run_deploy() -> Any: + try: + return await run_sync(self._deployer.deploy, deploy_req, _on_step) + finally: + loop.call_soon_threadsafe(queue.put_nowait, None) + + task = asyncio.ensure_future(_run_deploy()) + + # Stream steps as they arrive; None signals completion + while True: + step = await queue.get() + if step is None: + break + try: + await resp.write( + ("data: %s\n\n" % json.dumps(step)).encode() + ) + except ConnectionResetError: + task.cancel() + return resp + + result = await task + + if result.ok and self._restart_runtime: + try: + await self._restart_runtime() + await resp.write( + ("data: %s\n\n" % json.dumps( + {"step": "restart_runtime", "status": "ok"} + )).encode() + ) + except Exception: + logger.warning("Failed to restart runtime after deploy", exc_info=True) + + # Final done event + done_payload = json.dumps({ + "status": "ok" if result.ok else "error", + "deploy_id": result.deploy_id, + "error": result.error, + }) + await resp.write(("event: done\ndata: %s\n\n" % done_payload).encode()) + await resp.write_eof() + return resp + + async def foundry_decommission(self, req: web.Request) -> web.Response: + body = await req.json() if req.can_read_body else {} + rg = body.get("resource_group", "") + steps = await run_sync(self._deployer.decommission, rg) + has_failure = any(s.get("status") == "failed" for s in steps) + + if not has_failure and self._restart_runtime: + try: + await self._restart_runtime() + except Exception: + logger.warning("Failed to restart runtime after decommission", exc_info=True) + + return web.json_response({ + "status": "error" if has_failure else "ok", + "steps": steps, + }, status=500 if has_failure else 200) diff --git a/app/runtime/server/setup/prerequisites.py b/app/runtime/server/setup/prerequisites.py index 5b2a1fd..2e70c68 100644 --- a/app/runtime/server/setup/prerequisites.py +++ b/app/runtime/server/setup/prerequisites.py @@ -11,6 +11,7 @@ from ...config.settings import SECRET_ENV_KEYS, cfg from ...services.cloud.azure import AzureCLI +from ...services.deployment.bicep_deployer import BicepDeployer, BicepDeployRequest from ...services.keyvault import env_key_to_secret_name, is_kv_ref from ...services.keyvault import kv as _kv from ...state.deploy_state import DeployStateStore @@ -34,6 +35,7 @@ def __init__( self._az = az self._store = store self._deploy_store = deploy_store + self._bicep = BicepDeployer(az, deploy_store) if deploy_store else None def register(self, router: web.UrlDispatcher) -> None: router.add_get("/api/setup/prerequisites/status", self.status) @@ -89,15 +91,9 @@ async def deploy(self, req: web.Request) -> web.Response: "message": "Key Vault already configured", }) - if not await self._ensure_rg(prereq_rg, location, steps): + if not await self._deploy_keyvault_via_bicep(prereq_rg, location, steps): return _fail(steps) - vault_url = await self._create_keyvault(prereq_rg, location, steps) - if not vault_url: - return _fail(steps) - - await self._assign_role(prereq_rg, steps) - try: migrated = await run_sync(self._migrate_existing_secrets) if migrated: @@ -133,14 +129,8 @@ async def ensure_keyvault_ready( prereq_rg = _DEFAULT_PREREQ_RG - if not await self._ensure_rg(prereq_rg, location, steps): - return steps - - vault_url = await self._create_keyvault(prereq_rg, location, steps) - if not vault_url: + if not await self._deploy_keyvault_via_bicep(prereq_rg, location, steps): return steps - - await self._assign_role(prereq_rg, steps) await self._wait_for_access(steps) return steps @@ -197,118 +187,29 @@ async def _wait_for_access( }) return False - async def _ensure_rg( - self, rg: str, location: str, steps: list[dict] + async def _deploy_keyvault_via_bicep( + self, rg: str, location: str, steps: list[dict], ) -> bool: - existing = await run_sync(self._az.json, "group", "show", "--name", rg) - if existing: - steps.append({ - "step": "resource_group", "status": "ok", - "detail": f"{rg} (existing)", - }) - return True - - tag_args: list[str] = [] - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - tag_args = ["--tags", f"polyclaw_deploy={rec.tag}"] - - result = await run_sync( - self._az.json, - "group", "create", "--name", rg, "--location", location, *tag_args, + """Deploy Key Vault via the central Bicep template.""" + if not self._bicep: + steps.append({"step": "keyvault", "status": "failed", + "detail": "BicepDeployer not available"}) + return False + + req = BicepDeployRequest( + resource_group=rg, + location=location, + deploy_foundry=False, + deploy_key_vault=True, ) - ok = bool(result) - steps.append({ - "step": "resource_group", - "status": "ok" if ok else "failed", - "detail": rg if ok else (self._az.last_stderr or "Unknown error"), - }) - if ok and self._deploy_store: - rec = self._deploy_store.current_local() - if rec and rg not in rec.resource_groups: - rec.resource_groups.append(rg) - self._deploy_store.update(rec) - return ok - - async def _create_keyvault( - self, rg: str, location: str, steps: list[dict] - ) -> str: - import secrets as _secrets - - vault_name = f"polyclaw-kv-{_secrets.token_hex(4)}" - result = await run_sync( - self._az.json, - "keyvault", "create", - "--name", vault_name, "--resource-group", rg, - "--location", location, "--enable-rbac-authorization", "true", - ) - if not result: - steps.append({ - "step": "keyvault", "status": "failed", - "detail": self._az.last_stderr or "Unknown error", - }) - return "" - - vault_url = "" - if isinstance(result, dict): - vault_url = result.get("properties", {}).get("vaultUri", "") - if not vault_url: - vault_url = f"https://{vault_name}.vault.azure.net" - - cfg.write_env( - KEY_VAULT_URL=vault_url, - KEY_VAULT_NAME=vault_name, - KEY_VAULT_RG=rg, - ) - _kv.reinit() - - if self._deploy_store: - rec = self._deploy_store.current_local() - if rec: - rec.add_resource( - resource_type="keyvault", resource_group=rg, - resource_name=vault_name, purpose="Secret storage", - ) - self._deploy_store.update(rec) - - steps.append({"step": "keyvault", "status": "ok", "detail": vault_url}) - return vault_url - - async def _assign_role(self, rg: str, steps: list[dict]) -> None: - account = self._az.account_info() - if not account: - steps.append({ - "step": "rbac_role", "status": "failed", - "detail": "No Azure account", - }) - return - - user_id = account.get("user", {}).get("name", "") - kv_name = cfg.env.read("KEY_VAULT_NAME") or "" - sub_id = account.get("id", "") - if not (user_id and kv_name and sub_id): - steps.append({ - "step": "rbac_role", "status": "failed", - "detail": "Missing user/vault/subscription info", - }) - return - - scope = ( - f"/subscriptions/{sub_id}/resourceGroups/{rg}" - f"/providers/Microsoft.KeyVault/vaults/{kv_name}" - ) - ok, msg = await run_sync( - self._az.ok, - "role", "assignment", "create", - "--role", "Key Vault Secrets Officer", - "--assignee", user_id, "--scope", scope, - ) - steps.append({ - "step": "rbac_role", - "status": "ok" if ok else "failed", - "detail": f"Assigned to {user_id}" if ok else (msg or "Unknown error"), - }) + result = await run_sync(self._bicep.deploy, req) + steps.extend(result.steps) + if not result.ok: + return False + + if result.key_vault_url: + _kv.reinit() + return True def _migrate_existing_secrets( self, *, max_retries: int = 4, initial_wait: float = 10.0 diff --git a/app/runtime/server/setup_voice.py b/app/runtime/server/setup_voice.py deleted file mode 100644 index f416a7a..0000000 --- a/app/runtime/server/setup_voice.py +++ /dev/null @@ -1,477 +0,0 @@ -"""Voice setup routes -- ``/api/setup/voice/*``.""" - -from __future__ import annotations - -import logging - -from aiohttp import web - -from ..config.settings import cfg -from ..services.cloud.azure import AzureCLI -from ..state.infra_config import InfraConfigStore -from ..util.async_helpers import run_sync -from .voice_provision import ( - create_acs, - create_aoai, - ensure_rbac, - ensure_rg, - persist_config, -) - -logger = logging.getLogger(__name__) - - -class VoiceSetupRoutes: - """ACS + Azure OpenAI provisioning, phone config, and decommissioning.""" - - def __init__(self, az: AzureCLI, store: InfraConfigStore) -> None: - self._az = az - self._store = store - - def register(self, router: web.UrlDispatcher) -> None: - router.add_get("/api/setup/voice/config", self.get_config) - router.add_post("/api/setup/voice/deploy", self.deploy) - router.add_post("/api/setup/voice/connect", self.connect_existing) - router.add_post("/api/setup/voice/phone", self.save_phone) - router.add_post("/api/setup/voice/decommission", self.decommission) - router.add_get("/api/setup/voice/aoai/list", self.list_aoai) - router.add_get("/api/setup/voice/aoai/deployments", self.list_aoai_deployments) - router.add_post("/api/setup/voice/aoai/validate", self.validate_aoai) - router.add_get("/api/setup/voice/acs/list", self.list_acs) - router.add_get("/api/setup/voice/acs/phones", self.list_acs_phones) - - # ------------------------------------------------------------------ - # Config - # ------------------------------------------------------------------ - - async def get_config(self, _req: web.Request) -> web.Response: - vc = self._store.to_safe_dict().get("channels", {}).get("voice_call", {}) - if vc.get("acs_resource_name"): - rg = vc.get("voice_resource_group") or vc.get("resource_group") - if rg: - account = self._az.account_info() - sub_id = account.get("id", "") if account else "" - if sub_id: - vc["portal_phone_url"] = ( - f"https://portal.azure.com/#@/resource/subscriptions/{sub_id}" - f"/resourceGroups/{rg}" - f"/providers/Microsoft.Communication" - f"/CommunicationServices/{vc['acs_resource_name']}" - f"/phonenumbers" - ) - return web.json_response(vc) - - # ------------------------------------------------------------------ - # Deploy - # ------------------------------------------------------------------ - - async def deploy(self, req: web.Request) -> web.Response: - body = await req.json() - location = body.get("location", "swedencentral").strip() - voice_rg = body.get("voice_resource_group", "").strip() or "polyclaw-voice-rg" - logger.info("Voice deploy started: voice_rg=%s, location=%s", voice_rg, location) - - steps: list[dict] = [] - - if not await ensure_rg(self._az, voice_rg, location, steps): - return _voice_fail(steps) - - acs_name, conn_str = await create_acs(self._az, voice_rg, steps) - if not conn_str: - return _voice_fail(steps) - - aoai_name, aoai_endpoint, aoai_key, deployment_name = await create_aoai( - self._az, voice_rg, location, steps - ) - if not aoai_endpoint: - return _voice_fail(steps) - - if not aoai_key: - await ensure_rbac(self._az, aoai_name, voice_rg, steps) - - persist_config( - self._store, voice_rg, location, acs_name, conn_str, - aoai_name, aoai_endpoint, aoai_key, deployment_name, steps, - ) - logger.info("Voice deploy completed: acs=%s, aoai=%s", acs_name, aoai_name) - - reinit = req.app.get("_reinit_voice") - if reinit: - reinit() - - return web.json_response({ - "status": "ok", - "steps": steps, - "message": ( - "Voice infrastructure deployed." - " Now purchase a phone number in the Azure Portal." - ), - }) - - # ------------------------------------------------------------------ - # Phone - # ------------------------------------------------------------------ - - async def save_phone(self, req: web.Request) -> web.Response: - body = await req.json() - phone = body.get("phone_number", "").strip() - target = body.get("target_number", "").strip() - - updates: dict[str, str] = {} - env_updates: dict[str, str] = {} - - if phone: - if not phone.startswith("+"): - return _error("Source phone number must be in E.164 format (e.g. +14155551234)", 400) - updates["acs_source_number"] = phone - env_updates["ACS_SOURCE_NUMBER"] = phone - - if target: - if not target.startswith("+"): - return _error("Target phone number must be in E.164 format (e.g. +41781234567)", 400) - updates["voice_target_number"] = target - env_updates["VOICE_TARGET_NUMBER"] = target - - if not updates: - return _error("At least one phone number is required", 400) - - self._store.save_voice_call(**updates) - cfg.write_env(**env_updates) - - reinit = req.app.get("_reinit_voice") - if reinit: - reinit() - - return _ok("Phone number(s) saved") - - # ------------------------------------------------------------------ - # Decommission - # ------------------------------------------------------------------ - - async def decommission(self, req: web.Request) -> web.Response: - vc = self._store.channels.voice_call - voice_rg = vc.voice_resource_group or vc.resource_group - steps: list[dict] = [] - - if voice_rg: - rg_exists = await run_sync(self._az.json, "group", "show", "--name", voice_rg) - if rg_exists: - ok, msg = await run_sync( - self._az.ok, "group", "delete", "--name", voice_rg, "--yes", "--no-wait", - ) - steps.append({ - "step": "voice_rg_delete", - "status": "ok" if ok else "failed", - "name": voice_rg, - "detail": f"Deleting {voice_rg}" if ok else msg, - }) - else: - steps.append({"step": "voice_rg_delete", "status": "skip", "detail": "RG not found"}) - else: - rg = vc.resource_group - if vc.acs_resource_name and rg: - ok, _ = await run_sync( - self._az.ok, "communication", "delete", - "--name", vc.acs_resource_name, "--resource-group", rg, "--yes", - ) - steps.append({ - "step": "acs_resource", - "status": "ok" if ok else "failed", - "name": vc.acs_resource_name, - }) - - if vc.azure_openai_resource_name and rg: - ok, _ = await run_sync( - self._az.ok, "cognitiveservices", "account", "delete", - "--name", vc.azure_openai_resource_name, "--resource-group", rg, "--yes", - ) - steps.append({ - "step": "aoai_resource", - "status": "ok" if ok else "failed", - "name": vc.azure_openai_resource_name, - }) - - self._store.clear_voice_call() - cfg.write_env( - ACS_CONNECTION_STRING="", - ACS_SOURCE_NUMBER="", - VOICE_TARGET_NUMBER="", - AZURE_OPENAI_ENDPOINT="", - AZURE_OPENAI_API_KEY="", - AZURE_OPENAI_REALTIME_DEPLOYMENT="", - ACS_CALLBACK_TOKEN="", - ) - - return web.json_response({ - "status": "ok", - "steps": steps, - "message": "Voice infrastructure decommissioned", - }) - - # ------------------------------------------------------------------ - # Discovery: AOAI - # ------------------------------------------------------------------ - - async def list_aoai(self, _req: web.Request) -> web.Response: - resources = await run_sync( - self._az.json, "resource", "list", - "--resource-type", "Microsoft.CognitiveServices/accounts", - ) - if not isinstance(resources, list): - return web.json_response([]) - - return web.json_response([ - { - "name": r.get("name", ""), - "resource_group": r.get("resourceGroup", ""), - "location": r.get("location", ""), - } - for r in resources - if r.get("kind") == "OpenAI" - ]) - - async def list_aoai_deployments(self, req: web.Request) -> web.Response: - name = req.query.get("name", "").strip() - rg = req.query.get("resource_group", "").strip() - if not name or not rg: - return _error("name and resource_group are required", 400) - - deployments = await run_sync( - self._az.json, "cognitiveservices", "account", "deployment", "list", - "--name", name, "--resource-group", rg, - ) - if not isinstance(deployments, list): - return web.json_response([]) - - return web.json_response([ - { - "deployment_name": d.get("name", ""), - "model_name": d.get("properties", {}).get("model", {}).get("name", ""), - "model_version": d.get("properties", {}).get("model", {}).get("version", ""), - "model_format": d.get("properties", {}).get("model", {}).get("format", ""), - } - for d in deployments - ]) - - async def validate_aoai(self, req: web.Request) -> web.Response: - body = await req.json() - name = body.get("name", "").strip() - rg = body.get("resource_group", "").strip() - if not name or not rg: - return _error("name and resource_group are required", 400) - - deployments = await run_sync( - self._az.json, "cognitiveservices", "account", "deployment", "list", - "--name", name, "--resource-group", rg, - ) - if not isinstance(deployments, list): - return web.json_response({ - "valid": False, - "message": f"Cannot list deployments for {name}", - "deployments": [], - }) - - realtime_models = { - "gpt-4o-realtime-preview", - "gpt-realtime-mini", - "gpt-4o-mini-realtime-preview", - } - found = [] - for d in deployments: - model = d.get("properties", {}).get("model", {}) - model_name = model.get("name", "") - found.append({ - "deployment_name": d.get("name", ""), - "model_name": model_name, - "model_version": model.get("version", ""), - "is_realtime": model_name in realtime_models, - }) - - has_realtime = any(f["is_realtime"] for f in found) - return web.json_response({ - "valid": has_realtime, - "message": ( - "Realtime model deployment found" - if has_realtime - else "No realtime model deployment found. Deploy gpt-realtime-mini or gpt-4o-realtime-preview." - ), - "deployments": found, - }) - - # ------------------------------------------------------------------ - # Discovery: ACS - # ------------------------------------------------------------------ - - async def list_acs(self, _req: web.Request) -> web.Response: - resources = await run_sync(self._az.json, "communication", "list") - if not isinstance(resources, list): - return web.json_response([]) - - return web.json_response([ - { - "name": r.get("name", ""), - "resource_group": r.get("resourceGroup", ""), - "location": r.get("location", ""), - } - for r in resources - ]) - - async def list_acs_phones(self, req: web.Request) -> web.Response: - name = req.query.get("name", "").strip() - rg = req.query.get("resource_group", "").strip() - if not name or not rg: - return _error("name and resource_group are required", 400) - - keys = await run_sync( - self._az.json, "communication", "list-key", - "--name", name, "--resource-group", rg, - ) - conn_str = keys.get("primaryConnectionString", "") if isinstance(keys, dict) else "" - if not conn_str: - return web.json_response([]) - - phones = await run_sync( - self._az.json, "communication", "phonenumber", "list", - "--connection-string", conn_str, - ) - if not isinstance(phones, list): - return web.json_response([]) - - return web.json_response([ - {"phone_number": p.get("phoneNumber", "")} - for p in phones - if p.get("phoneNumber") - ]) - - # ------------------------------------------------------------------ - # Connect existing - # ------------------------------------------------------------------ - - async def connect_existing(self, req: web.Request) -> web.Response: - body = await req.json() - steps: list[dict] = [] - - aoai_name = body.get("aoai_name", "").strip() - aoai_rg = body.get("aoai_resource_group", "").strip() - aoai_deployment = body.get("aoai_deployment", "").strip() or "gpt-realtime-mini" - - if not aoai_name or not aoai_rg: - return _error("aoai_name and aoai_resource_group are required", 400) - - aoai_info = await run_sync( - self._az.json, "cognitiveservices", "account", "show", - "--name", aoai_name, "--resource-group", aoai_rg, - ) - if not isinstance(aoai_info, dict): - return _error(f"Azure OpenAI resource '{aoai_name}' not found in RG '{aoai_rg}'", 404) - - aoai_endpoint = aoai_info.get("properties", {}).get("endpoint", "") - steps.append({"step": "aoai_resource", "status": "ok", "name": f"{aoai_name} (existing)"}) - - deployments = await run_sync( - self._az.json, "cognitiveservices", "account", "deployment", "list", - "--name", aoai_name, "--resource-group", aoai_rg, - ) - dep_found = isinstance(deployments, list) and any( - d.get("name") == aoai_deployment for d in deployments - ) - if not dep_found: - steps.append({ - "step": "aoai_deployment", "status": "failed", - "name": aoai_deployment, - "detail": f"Deployment '{aoai_deployment}' not found on {aoai_name}", - }) - return _voice_fail(steps) - - steps.append({"step": "aoai_deployment", "status": "ok", "name": f"{aoai_deployment} (verified)"}) - - aoai_keys = await run_sync( - self._az.json, "cognitiveservices", "account", "keys", "list", - "--name", aoai_name, "--resource-group", aoai_rg, - ) - aoai_key = aoai_keys.get("key1", "") if isinstance(aoai_keys, dict) else "" - if aoai_key: - steps.append({"step": "aoai_keys", "status": "ok"}) - else: - logger.info("AOAI key retrieval skipped (disableLocalAuth likely true)") - steps.append({ - "step": "aoai_keys", "status": "ok", - "detail": "Key-based auth disabled; will use Entra ID (DefaultAzureCredential)", - }) - - acs_name = body.get("acs_name", "").strip() - acs_rg = body.get("acs_resource_group", "").strip() - conn_str = "" - voice_rg = aoai_rg - - if acs_name and acs_rg: - keys = await run_sync( - self._az.json, "communication", "list-key", - "--name", acs_name, "--resource-group", acs_rg, - ) - conn_str = keys.get("primaryConnectionString", "") if isinstance(keys, dict) else "" - if not conn_str: - steps.append({ - "step": "acs_resource", "status": "failed", - "name": acs_name, "detail": "Cannot retrieve connection string", - }) - return _voice_fail(steps) - steps.append({"step": "acs_resource", "status": "ok", "name": f"{acs_name} (existing)"}) - voice_rg = acs_rg - else: - voice_rg = aoai_rg - if not await ensure_rg(self._az, voice_rg, "Global", steps): - return _voice_fail(steps) - acs_name, conn_str = await create_acs(self._az, voice_rg, steps) - if not conn_str: - return _voice_fail(steps) - - location = aoai_info.get("location", "swedencentral") - - if not aoai_key: - await ensure_rbac(self._az, aoai_name, aoai_rg, steps) - - persist_config( - self._store, voice_rg, location, acs_name, conn_str, - aoai_name, aoai_endpoint, aoai_key, aoai_deployment, steps, - ) - - phone = body.get("phone_number", "").strip() - if phone: - self._store.save_voice_call(acs_source_number=phone) - cfg.write_env(ACS_SOURCE_NUMBER=phone) - steps.append({"step": "phone_number", "status": "ok", "name": phone}) - - target = body.get("target_number", "").strip() - if target: - self._store.save_voice_call(voice_target_number=target) - cfg.write_env(VOICE_TARGET_NUMBER=target) - steps.append({"step": "target_number", "status": "ok", "name": target}) - - logger.info("Voice connect completed: acs=%s, aoai=%s", acs_name, aoai_name) - - reinit = req.app.get("_reinit_voice") - if reinit: - reinit() - - return web.json_response({ - "status": "ok", - "steps": steps, - "message": "Connected to existing Azure resources.", - }) - - -def _ok(message: str) -> web.Response: - return web.json_response({"status": "ok", "message": message}) - - -def _error(message: str, status: int = 500) -> web.Response: - return web.json_response({"status": "error", "message": message}, status=status) - - -def _voice_fail(steps: list[dict]) -> web.Response: - failed = [s for s in steps if s.get("status") == "failed"] - msg = failed[0].get("name", "Unknown step") if failed else "Unknown error" - return web.json_response( - {"status": "error", "steps": steps, "message": f"Voice deploy failed at: {msg}"}, - ) diff --git a/app/runtime/server/smoke_test.py b/app/runtime/server/smoke_test.py index 92d8fc6..a9e32b9 100644 --- a/app/runtime/server/smoke_test.py +++ b/app/runtime/server/smoke_test.py @@ -64,9 +64,13 @@ def _fail(self, message: str) -> dict[str, Any]: return {"status": "error", "steps": self._steps, "message": message} def _check_auth(self) -> bool: + # In BYOK mode, GitHub auth is not required. + if cfg.foundry_endpoint: + self._step("auth", True, "Foundry BYOK mode (endpoint: %s)" % cfg.foundry_endpoint) + return True st = self._gh.status() self._step("gh_auth", st.get("authenticated", False), st.get("details", "")) - return st.get("authenticated", False) or bool(cfg.github_token) + return st.get("authenticated", False) def _check_cli(self) -> bool: path = shutil.which("copilot") diff --git a/app/runtime/server/wiring.py b/app/runtime/server/wiring.py index 94d63d3..ec209ba 100644 --- a/app/runtime/server/wiring.py +++ b/app/runtime/server/wiring.py @@ -178,12 +178,12 @@ def init_services(mode: ServerMode) -> dict[str, Any]: Returns a dict of service/store references keyed by name. """ from ..state.deploy_state import DeployStateStore - from ..state.foundry_iq_config import FoundryIQConfigStore - from ..state.guardrails import GuardrailsConfigStore - from ..state.infra_config import InfraConfigStore + from ..state.foundry_iq_config import get_foundry_iq_config + from ..state.guardrails import get_guardrails_config + from ..state.infra_config import get_infra_config from ..state.mcp_config import McpConfigStore - from ..state.monitoring_config import MonitoringConfigStore - from ..state.sandbox_config import SandboxConfigStore + from ..state.monitoring_config import get_monitoring_config + from ..state.sandbox_config import get_sandbox_config is_admin = mode in (ServerMode.admin, ServerMode.combined) is_runtime = mode in (ServerMode.runtime, ServerMode.combined) @@ -191,12 +191,12 @@ def init_services(mode: ServerMode) -> dict[str, Any]: result: dict[str, Any] = { "tunnel": None, "deploy_store": DeployStateStore(), - "infra_store": InfraConfigStore(), + "infra_store": get_infra_config(), "mcp_store": McpConfigStore(), - "sandbox_store": SandboxConfigStore(), - "foundry_iq_store": FoundryIQConfigStore(), - "guardrails_store": GuardrailsConfigStore(), - "monitoring_store": MonitoringConfigStore(), + "sandbox_store": get_sandbox_config(), + "foundry_iq_store": get_foundry_iq_config(), + "guardrails_store": get_guardrails_config(), + "monitoring_store": get_monitoring_config(), "az": None, "gh": None, "deployer": None, diff --git a/app/runtime/services/cloud/azure.py b/app/runtime/services/cloud/azure.py index 6fde317..0af9515 100644 --- a/app/runtime/services/cloud/azure.py +++ b/app/runtime/services/cloud/azure.py @@ -29,7 +29,8 @@ def __init__(self) -> None: self.last_stderr: str = "" self._cache: dict[str, tuple[float, Any]] = {} - def _run(self, cmd: list[str], cmd_summary: str) -> subprocess.CompletedProcess[str]: + def _run(self, cmd: list[str], cmd_summary: str, timeout: int | None = None) -> subprocess.CompletedProcess[str]: + effective_timeout = timeout if timeout is not None else self.TIMEOUT env = {**os.environ, "AZURE_EXTENSION_USE_DYNAMIC_INSTALL": "yes_without_prompt"} proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env, @@ -46,8 +47,8 @@ def _run(self, cmd: list[str], cmd_summary: str) -> subprocess.CompletedProcess[ if now >= next_heartbeat: elapsed = now - t0 mins_e, secs_e = divmod(int(elapsed), 60) - if self.TIMEOUT: - remaining = max(0, self.TIMEOUT - elapsed) + if effective_timeout: + remaining = max(0, effective_timeout - elapsed) mins_r, secs_r = divmod(int(remaining), 60) logger.info( "[az] %dm %02ds elapsed | timeout %dm %02ds | az %s", @@ -58,14 +59,14 @@ def _run(self, cmd: list[str], cmd_summary: str) -> subprocess.CompletedProcess[ else: logger.info("[az] still waiting (%.0fs): az %s", elapsed, cmd_summary) next_heartbeat = now + self.HEARTBEAT_INTERVAL - if self.TIMEOUT and (now - t0) > self.TIMEOUT: + if effective_timeout and (now - t0) > effective_timeout: proc.kill() proc.wait() - logger.error("[az] TIMEOUT after %ds: az %s", self.TIMEOUT, cmd_summary) + logger.error("[az] TIMEOUT after %ds: az %s", effective_timeout, cmd_summary) return subprocess.CompletedProcess( cmd, returncode=-1, stdout=proc.stdout.read() if proc.stdout else "", - stderr=f"Timed out after {self.TIMEOUT}s", + stderr=f"Timed out after {effective_timeout}s", ) return subprocess.CompletedProcess( @@ -131,8 +132,60 @@ def ok(self, *args: str) -> Result: return Result(success=success, message=result.stderr.strip()) def account_info(self) -> dict[str, Any] | None: + """Return the active subscription, or ``None`` if not logged in. + + When ``az account show`` fails (e.g. no default subscription set) + we check the local ``azureProfile.json`` file to distinguish + *not logged in* from *logged in but no default*. This avoids + running ``az account list`` which can be very slow with many + subscriptions. + """ account = self.json_cached("account", "show") - return account if isinstance(account, dict) else None + if isinstance(account, dict): + return account + # Fast file-based check: does azureProfile.json have subscriptions? + if self._has_azure_profile(): + return {"_no_default_subscription": True} + return None + + def _has_azure_profile(self) -> bool: + """Check if the Azure CLI profile file has any subscriptions.""" + return len(self._read_profile_subscriptions()) > 0 + + def _read_profile_subscriptions(self) -> list[dict[str, Any]]: + """Read subscriptions directly from ``azureProfile.json`` (no subprocess).""" + config_dir = os.environ.get("AZURE_CONFIG_DIR") or os.path.join( + os.path.expanduser("~"), ".azure", + ) + profile_path = os.path.join(config_dir, "azureProfile.json") + try: + raw = open(profile_path, "rb").read() # noqa: SIM115 + text = raw.decode("utf-8-sig") + data = json.loads(text) + return data.get("subscriptions", []) + except (OSError, json.JSONDecodeError, UnicodeDecodeError): + return [] + + def list_subscriptions(self) -> list[dict[str, Any]]: + """Return all enabled subscriptions from the local profile file.""" + subs = self._read_profile_subscriptions() + return [ + { + "id": s.get("id", ""), + "name": s.get("name", ""), + "state": s.get("state", ""), + "is_default": s.get("isDefault", False), + "tenant_id": s.get("tenantId", ""), + } + for s in subs + if s.get("state") == "Enabled" + ] + + def set_subscription(self, subscription_id: str) -> bool: + """Set the active subscription and clear caches.""" + r = self.json("account", "set", "--subscription", subscription_id, quiet=True) + self.invalidate_cache() + return r is not None or self.last_stderr == "" def login_device_code(self) -> dict[str, Any]: proc = subprocess.Popen( diff --git a/app/runtime/services/cloud/github.py b/app/runtime/services/cloud/github.py index cf55537..fc7415e 100644 --- a/app/runtime/services/cloud/github.py +++ b/app/runtime/services/cloud/github.py @@ -22,8 +22,8 @@ def __init__(self) -> None: def status(self) -> dict[str, Any]: from ...config.settings import cfg - if cfg.github_token: - return {"authenticated": True, "details": "Using GITHUB_TOKEN from environment"} + if cfg.foundry_endpoint: + return {"authenticated": True, "details": "Using Foundry BYOK mode"} try: result = subprocess.run( ["gh", "auth", "status"], diff --git a/app/runtime/services/cloud/runtime_identity.py b/app/runtime/services/cloud/runtime_identity.py index 62d6493..fd1270f 100644 --- a/app/runtime/services/cloud/runtime_identity.py +++ b/app/runtime/services/cloud/runtime_identity.py @@ -112,8 +112,18 @@ def provision(self, resource_group: str) -> dict[str, Any]: logger.info("Created runtime SP: %s", app_id) steps.append({"step": "create_sp", "status": "ok", "detail": app_id}) - # 5. Rotate credentials - cred = self._az.json("ad", "app", "credential", "reset", "--id", app_id, "--years", "2") + # 5. Rotate credentials -- try shorter lifetimes for tenant policies. + cred = self._az.json( + "ad", "app", "credential", "reset", "--id", app_id, "--years", "2", + ) + if (not isinstance(cred, dict) or not cred.get("password")) and \ + "Credential lifetime" in (self._az.last_stderr or ""): + from datetime import datetime, timedelta + end_date = (datetime.utcnow() + timedelta(days=90)).strftime("%Y-%m-%dT%H:%M:%SZ") + cred = self._az.json( + "ad", "app", "credential", "reset", "--id", app_id, + "--end-date", end_date, + ) if not isinstance(cred, dict) or not cred.get("password"): steps.append({"step": "rotate_creds", "status": "failed", "detail": self._az.last_stderr}) diff --git a/app/runtime/services/deployment/__init__.py b/app/runtime/services/deployment/__init__.py index c5a29a1..e67a4ba 100644 --- a/app/runtime/services/deployment/__init__.py +++ b/app/runtime/services/deployment/__init__.py @@ -3,7 +3,11 @@ from __future__ import annotations from .aca_deployer import AcaDeployer +from .bicep_deployer import BicepDeployer, BicepDeployRequest, BicepDeployResult from .deployer import BotDeployer from .provisioner import Provisioner -__all__ = ["AcaDeployer", "BotDeployer", "Provisioner"] +__all__ = [ + "AcaDeployer", "BicepDeployer", "BicepDeployRequest", "BicepDeployResult", + "BotDeployer", "Provisioner", +] diff --git a/app/runtime/services/deployment/aca_deployer.py b/app/runtime/services/deployment/aca_deployer.py index 6cd44e3..d62c6b9 100644 --- a/app/runtime/services/deployment/aca_deployer.py +++ b/app/runtime/services/deployment/aca_deployer.py @@ -79,7 +79,8 @@ def deploy(self, req: AcaDeployRequest) -> AcaDeployResult: env_vars = self._load_env_vars(steps) - acr_name = ensure_acr(self._az, req.resource_group, req.location, steps, rec) + acr_name = ensure_acr(self._az, req.resource_group, req.location, steps, rec, + acr_name=req.acr_name) if not acr_name: result.error = "Container registry creation failed" return result @@ -105,6 +106,7 @@ def deploy(self, req: AcaDeployRequest) -> AcaDeployResult: env_name, env_id = ensure_aca_environment( self._az, req.resource_group, req.location, steps, rec, + env_name=req.env_name, ) if not env_name: result.error = "Container Apps environment creation failed" diff --git a/app/runtime/services/deployment/aca_provision.py b/app/runtime/services/deployment/aca_provision.py index 00c0ed5..3ea2b61 100644 --- a/app/runtime/services/deployment/aca_provision.py +++ b/app/runtime/services/deployment/aca_provision.py @@ -31,11 +31,13 @@ def ensure_acr( location: str, steps: list[dict], rec: DeploymentRecord, + acr_name: str = "", ) -> str: """Create a container registry. Returns the ACR name, or ``""`` on failure.""" logger.info("[aca] Step 3/10: Creating container registry ...") - acr_name = "polyclaw" + secrets.token_hex(4) - acr_name = acr_name[:50].replace("-", "") + if not acr_name: + acr_name = "polyclaw" + secrets.token_hex(4) + acr_name = acr_name[:50].replace("-", "") result = az.json( "acr", "create", @@ -229,10 +231,12 @@ def ensure_aca_environment( location: str, steps: list[dict], rec: DeploymentRecord, + env_name: str = "", ) -> tuple[str, str]: """Create an ACA environment. Returns ``(env_name, env_id)``.""" logger.info("[aca] Step 7/10: Creating ACA environment ...") - env_name = f"{_ENV_NAME_PREFIX}-{secrets.token_hex(4)}" + if not env_name: + env_name = f"{_ENV_NAME_PREFIX}-{secrets.token_hex(4)}" result = az.json( "containerapp", "env", "create", @@ -278,7 +282,7 @@ def ensure_runtime_app( _SECRET_ENV_KEYS = frozenset({ "RUNTIME_SP_PASSWORD", "ACS_CALLBACK_TOKEN", - "GITHUB_TOKEN", "BOT_APP_PASSWORD", + "BOT_APP_PASSWORD", "ACS_CONNECTION_STRING", "AZURE_OPENAI_API_KEY", }) _SKIP = frozenset({ diff --git a/app/runtime/services/deployment/bicep_deployer.py b/app/runtime/services/deployment/bicep_deployer.py new file mode 100644 index 0000000..b751e02 --- /dev/null +++ b/app/runtime/services/deployment/bicep_deployer.py @@ -0,0 +1,867 @@ +"""Bicep-based infrastructure deployer. + +Replaces the ad-hoc ``az`` CLI provisioning scattered across the codebase +with a single ``az deployment group create`` driven by ``infra/main.bicep``. +All resource creation is parameterised from internal config state. +""" + +from __future__ import annotations + +import json +import logging +import secrets +from collections.abc import Callable +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from ...config.settings import cfg +from ...state.deploy_state import DeployStateStore, DeploymentRecord, ResourceEntry, generate_deploy_id +from ..cloud.azure import AzureCLI + +logger = logging.getLogger(__name__) + + +def _find_bicep_template() -> Path: + """Locate infra/main.bicep by walking up from this file.""" + here = Path(__file__).resolve() + for parent in here.parents: + candidate = parent / "infra" / "main.bicep" + if candidate.exists(): + return candidate + # Fallback for local dev layout + return here.parents[4] / "infra" / "main.bicep" + + +_BICEP_TEMPLATE = _find_bicep_template() + + +@dataclass +class BicepDeployRequest: + """Parameters for a Bicep deployment. + + Every ``deploy_*`` flag gates an optional resource block in the + Bicep template. Callers enable only the subset they need. + """ + + resource_group: str = "polyclaw-rg" + location: str = "eastus" + base_name: str = "" + + # Foundry (AI Services) + model deployments + deploy_foundry: bool = True + models: list[dict[str, Any]] = field(default_factory=lambda: [ + {"name": "gpt-4.1", "version": "2025-04-14", "sku": "GlobalStandard", "capacity": 10}, + {"name": "gpt-5", "version": "2025-08-07", "sku": "GlobalStandard", "capacity": 10}, + {"name": "gpt-5-mini", "version": "2025-08-07", "sku": "GlobalStandard", "capacity": 10}, + ]) + + # Key Vault + deploy_key_vault: bool = True + + # ACS (voice) + deploy_acs: bool = False + acs_data_location: str = "United States" + + # Content Safety + deploy_content_safety: bool = False + + # Azure AI Search (Foundry IQ) + deploy_search: bool = False + + # Embedding Azure OpenAI (Foundry IQ) + deploy_embedding_aoai: bool = False + embedding_model_name: str = "text-embedding-3-large" + embedding_model_version: str = "1" + + # Log Analytics + Application Insights + deploy_monitoring: bool = False + + # Container Apps session pool (sandbox) + deploy_session_pool: bool = False + + def __post_init__(self) -> None: + if not self.base_name: + self.base_name = "polyclaw-%s" % secrets.token_hex(4) + + +@dataclass +class BicepDeployResult: + """Result from a Bicep deployment.""" + + ok: bool = False + deploy_id: str = "" + steps: list[dict[str, Any]] = field(default_factory=list) + error: str = "" + + # Foundry + foundry_endpoint: str = "" + foundry_name: str = "" + foundry_resource_id: str = "" + deployed_models: list[str] = field(default_factory=list) + + # Key Vault + key_vault_url: str = "" + key_vault_name: str = "" + + # ACS + acs_name: str = "" + + # Content Safety + content_safety_endpoint: str = "" + content_safety_name: str = "" + content_safety_resource_id: str = "" + + # Azure AI Search + search_endpoint: str = "" + search_name: str = "" + + # Embedding Azure OpenAI + embedding_aoai_endpoint: str = "" + embedding_aoai_name: str = "" + embedding_deployment_name: str = "" + + # Monitoring + app_insights_connection_string: str = "" + app_insights_name: str = "" + log_analytics_workspace_name: str = "" + + # Sandbox + session_pool_endpoint: str = "" + session_pool_id: str = "" + session_pool_name: str = "" + + +class _ObservableSteps(list): + """A list subclass that fires a callback on every ``append``.""" + + def __init__(self, callback: Callable[[dict[str, str]], None] | None = None) -> None: + super().__init__() + self._cb = callback + + def append(self, item: Any) -> None: # type: ignore[override] + super().append(item) + if self._cb is not None: + try: + self._cb(item) + except Exception: + pass # never let callback errors abort the deploy + + +class BicepDeployer: + """Orchestrates infrastructure via a single Bicep template.""" + + def __init__( + self, + az: AzureCLI, + deploy_store: DeployStateStore, + ) -> None: + self._az = az + self._store = deploy_store + + # -- public API -------------------------------------------------------- + + def deploy( + self, + req: BicepDeployRequest, + on_step: Callable[[dict[str, str]], None] | None = None, + ) -> BicepDeployResult: + """Run the full Bicep deployment and persist results. + + *on_step*, when supplied, is called synchronously every time a new + progress step is recorded. This enables streaming (e.g. SSE) from + the route handler. + """ + result = BicepDeployResult() + deploy_id = generate_deploy_id() + result.deploy_id = deploy_id + + # Wrap the steps list so that append() also fires the callback. + steps = _ObservableSteps(on_step) + result.steps = steps # type: ignore[assignment] + + # 1. Ensure resource group + if not self._ensure_resource_group(req, steps): + result.error = "Resource group creation failed" + return result + + # 2. Resolve principal for RBAC + principal_id, principal_type = self._resolve_principal(steps) + if not principal_id: + result.error = "Cannot determine current principal for RBAC" + return result + + # 2b. Ensure a runtime service principal exists. + # The runtime container needs its own identity for: + # - Key Vault secret resolution (when KV is deployed) + # - Foundry BYOK bearer tokens (az account get-access-token) + needs_sp = req.deploy_key_vault or req.deploy_foundry + runtime_sp = self._ensure_runtime_sp(req, steps) if needs_sp else None + + # 3. Run Bicep deployment + runtime_sp_oid = runtime_sp["object_id"] if runtime_sp else "" + outputs = self._run_bicep(req, principal_id, principal_type, runtime_sp_oid, steps) + if outputs is None: + result.error = "Bicep deployment failed" + return result + + # 4. Extract outputs + def _out(key: str) -> str: + return outputs.get(key, {}).get("value", "") + + def _out_list(key: str) -> list[str]: + val = outputs.get(key, {}).get("value", []) + return val if isinstance(val, list) else [] + + result.foundry_endpoint = _out("foundryEndpoint") + result.foundry_name = _out("foundryName") + result.foundry_resource_id = _out("foundryResourceId") + result.deployed_models = _out_list("deployedModels") + result.key_vault_url = _out("keyVaultUrl") + result.key_vault_name = _out("keyVaultName") + result.acs_name = _out("acsName") + result.content_safety_endpoint = _out("contentSafetyEndpoint") + result.content_safety_name = _out("contentSafetyName") + result.content_safety_resource_id = _out("contentSafetyResourceId") + result.search_endpoint = _out("searchEndpoint") + result.search_name = _out("searchName") + result.embedding_aoai_endpoint = _out("embeddingAoaiEndpoint") + result.embedding_aoai_name = _out("embeddingAoaiName") + result.embedding_deployment_name = _out("embeddingDeploymentName") + result.app_insights_connection_string = _out("appInsightsConnectionString") + result.app_insights_name = _out("appInsightsName") + result.log_analytics_workspace_name = _out("logAnalyticsWorkspaceName") + result.session_pool_endpoint = _out("sessionPoolEndpoint") + result.session_pool_id = _out("sessionPoolId") + result.session_pool_name = _out("sessionPoolName") + steps.append({"step": "extract_outputs", "status": "ok"}) + + # 5. Persist to .env and state store + self._persist(req, result, deploy_id, steps, runtime_sp=runtime_sp) + + result.ok = True + logger.info( + "[bicep.deploy] completed: endpoint=%s models=%s kv=%s", + result.foundry_endpoint, result.deployed_models, result.key_vault_url, + ) + return result + + def status(self) -> dict[str, Any]: + """Return current Foundry deployment status from .env.""" + deployed_raw = cfg.env.read("DEPLOYED_MODELS") or "" + deployed_models = [m.strip() for m in deployed_raw.split(",") if m.strip()] + return { + "deployed": bool(cfg.env.read("FOUNDRY_ENDPOINT")), + "foundry_endpoint": cfg.env.read("FOUNDRY_ENDPOINT") or "", + "foundry_name": cfg.env.read("FOUNDRY_NAME") or "", + "foundry_resource_group": cfg.env.read("FOUNDRY_RESOURCE_GROUP") or "", + "deployed_models": deployed_models, + "key_vault_url": cfg.env.read("KEY_VAULT_URL") or "", + "key_vault_name": cfg.env.read("KEY_VAULT_NAME") or "", + "content_safety_endpoint": cfg.env.read("CONTENT_SAFETY_ENDPOINT") or "", + "content_safety_name": cfg.env.read("CONTENT_SAFETY_NAME") or "", + "search_endpoint": cfg.env.read("SEARCH_ENDPOINT") or "", + "search_name": cfg.env.read("SEARCH_NAME") or "", + "embedding_aoai_endpoint": cfg.env.read("EMBEDDING_AOAI_ENDPOINT") or "", + "embedding_aoai_name": cfg.env.read("EMBEDDING_AOAI_NAME") or "", + "app_insights_name": cfg.env.read("APP_INSIGHTS_NAME") or "", + "session_pool_name": cfg.env.read("SESSION_POOL_NAME") or "", + "acs_name": cfg.env.read("ACS_RESOURCE_NAME") or "", + "bot_name": cfg.env.read("BOT_NAME") or "", + "model": cfg.copilot_model, + } + + def decommission(self, resource_group: str = "") -> list[dict[str, Any]]: + """Delete the resource group (cascade deletes everything).""" + rg = resource_group or cfg.env.read("FOUNDRY_RESOURCE_GROUP") or "" + steps: list[dict[str, Any]] = [] + if not rg: + steps.append({"step": "decommission", "status": "skip", "detail": "No RG configured"}) + return steps + + ok, msg = self._az.ok( + "group", "delete", "--name", rg, "--yes", "--no-wait", + ) + steps.append({ + "step": "delete_resource_group", + "status": "ok" if ok else "failed", + "detail": rg if ok else msg, + }) + + if ok: + # Clean up the runtime service principal + sp_app_id = cfg.env.read("RUNTIME_SP_APP_ID") or "" + if sp_app_id: + del_ok, del_msg = self._az.ok("ad", "sp", "delete", "--id", sp_app_id) + steps.append({ + "step": "delete_runtime_sp", + "status": "ok" if del_ok else "warning", + "detail": sp_app_id if del_ok else del_msg, + }) + + cfg.write_env( + FOUNDRY_ENDPOINT="", + FOUNDRY_NAME="", + FOUNDRY_RESOURCE_GROUP="", + KEY_VAULT_URL="", + KEY_VAULT_NAME="", + KEY_VAULT_RG="", + RUNTIME_SP_APP_ID="", + RUNTIME_SP_PASSWORD="", + RUNTIME_SP_TENANT="", + ) + steps.append({"step": "clear_env", "status": "ok"}) + + return steps + + # -- internal helpers -------------------------------------------------- + + def _ensure_resource_group( + self, req: BicepDeployRequest, steps: list[dict], + ) -> bool: + existing = self._az.json("group", "show", "--name", req.resource_group, quiet=True) + if existing: + steps.append({"step": "resource_group", "status": "ok", + "detail": "%s (existing)" % req.resource_group}) + return True + + result = self._az.json( + "group", "create", + "--name", req.resource_group, + "--location", req.location, + ) + ok = bool(result) + steps.append({ + "step": "resource_group", + "status": "ok" if ok else "failed", + "detail": req.resource_group, + }) + if not ok: + logger.error("RG creation failed: %s", self._az.last_stderr) + return ok + + def _resolve_principal(self, steps: list[dict]) -> tuple[str, str]: + """Return ``(principal_id, principal_type)`` for the signed-in identity.""" + account = self._az.account_info() + if not account: + steps.append({"step": "resolve_principal", "status": "failed", + "detail": "Not logged in"}) + return "", "" + + # Try user principal first + user_info = self._az.json("ad", "signed-in-user", "show", quiet=True) + if isinstance(user_info, dict) and user_info.get("id"): + steps.append({"step": "resolve_principal", "status": "ok", + "detail": "User: %s" % user_info.get("userPrincipalName", "")}) + return user_info["id"], "User" + + # Fall back to service principal + sp_name = account.get("user", {}).get("name", "") + if sp_name: + sp_info = self._az.json("ad", "sp", "show", "--id", sp_name, quiet=True) + if isinstance(sp_info, dict) and sp_info.get("id"): + steps.append({"step": "resolve_principal", "status": "ok", + "detail": "ServicePrincipal: %s" % sp_name}) + return sp_info["id"], "ServicePrincipal" + + steps.append({"step": "resolve_principal", "status": "failed", + "detail": "Cannot determine principal"}) + return "", "" + + def _ensure_runtime_sp( + self, req: BicepDeployRequest, steps: list[dict], + ) -> dict[str, str] | None: + """Create or reuse a service principal for the runtime container. + + The runtime container needs its own Azure identity to resolve + Key Vault secrets. This method: + + 1. Checks if ``RUNTIME_SP_APP_ID`` is already configured and valid. + 2. If not, creates a new SP via ``az ad sp create-for-rbac`` scoped + to the resource group. + 3. Returns ``{app_id, password, tenant, object_id}`` for Bicep RBAC + and ``.env`` persistence. + """ + # Reuse existing SP if configured and valid + existing_id = cfg.env.read("RUNTIME_SP_APP_ID") or "" + existing_pw = cfg.env.read("RUNTIME_SP_PASSWORD") or "" + existing_tenant = cfg.env.read("RUNTIME_SP_TENANT") or "" + if existing_id and existing_pw and existing_tenant: + sp_info = self._az.json("ad", "sp", "show", "--id", existing_id, quiet=True) + if isinstance(sp_info, dict) and sp_info.get("id"): + steps.append({ + "step": "runtime_sp", "status": "ok", + "detail": "Reusing existing SP: %s" % existing_id, + }) + return { + "app_id": existing_id, + "password": existing_pw, + "tenant": existing_tenant, + "object_id": sp_info["id"], + } + logger.warning( + "[bicep.runtime_sp] existing SP %s not found in AD; creating new one", + existing_id, + ) + + # Create a new SP scoped to the resource group + scope = "/subscriptions/%s/resourceGroups/%s" % ( + self._az.account_info().get("id", ""), + req.resource_group, + ) + sp_name = "polyclaw-runtime-%s" % req.base_name + + # Try creating the SP with a 1-year credential. If the tenant + # policy rejects the lifetime, fall back to creating the SP without + # a password and then adding a short-lived credential separately. + sp: dict | list | None = None + sp = self._az.json( + "ad", "sp", "create-for-rbac", + "--name", sp_name, + "--role", "Reader", + "--scopes", scope, + ) + + if not isinstance(sp, dict) or not sp.get("appId"): + if "Credential lifetime" in (self._az.last_stderr or ""): + logger.info("[bicep.runtime_sp] tenant restricts cred lifetime; using short-lived") + # Create SP without password + sp = self._az.json( + "ad", "sp", "create-for-rbac", + "--name", sp_name, + "--role", "Reader", + "--scopes", scope, + "--create-password", "false", + ) + if isinstance(sp, dict) and sp.get("appId"): + from datetime import datetime, timedelta + end_date = (datetime.utcnow() + timedelta(days=90)).strftime( + "%Y-%m-%dT%H:%M:%SZ", + ) + cred = self._az.json( + "ad", "app", "credential", "reset", + "--id", sp["appId"], + "--end-date", end_date, + ) + if isinstance(cred, dict) and cred.get("password"): + sp["password"] = cred["password"] + sp["tenant"] = cred.get("tenant", sp.get("tenant", "")) + else: + steps.append({ + "step": "runtime_sp", "status": "failed", + "detail": "Short-lived credential creation failed: %s" + % self._az.last_stderr[:200], + }) + return None + + if not isinstance(sp, dict) or not sp.get("appId"): + steps.append({ + "step": "runtime_sp", "status": "failed", + "detail": "az ad sp create-for-rbac failed: %s" % self._az.last_stderr[:300], + }) + logger.error("[bicep.runtime_sp] SP creation failed: %s", self._az.last_stderr) + return None + + # Resolve the SP's object ID (needed for Bicep RBAC assignment) + sp_show = self._az.json("ad", "sp", "show", "--id", sp["appId"], quiet=True) + object_id = sp_show["id"] if isinstance(sp_show, dict) and sp_show.get("id") else "" + if not object_id: + steps.append({ + "step": "runtime_sp", "status": "failed", + "detail": "Could not resolve SP object ID for %s" % sp["appId"], + }) + return None + + steps.append({ + "step": "runtime_sp", "status": "ok", + "detail": "Created SP: %s (object_id=%s)" % (sp_name, object_id), + }) + logger.info( + "[bicep.runtime_sp] created: name=%s app_id=%s object_id=%s", + sp_name, sp["appId"], object_id, + ) + return { + "app_id": sp["appId"], + "password": sp.get("password", ""), + "tenant": sp.get("tenant", ""), + "object_id": object_id, + } + + def _run_bicep( + self, + req: BicepDeployRequest, + principal_id: str, + principal_type: str, + runtime_sp_object_id: str, + steps: list[dict], + ) -> dict[str, Any] | None: + """Execute ``az deployment group create`` with the Bicep template.""" + if not _BICEP_TEMPLATE.exists(): + steps.append({"step": "bicep_deploy", "status": "failed", + "detail": "Template not found: %s" % _BICEP_TEMPLATE}) + logger.error("Bicep template not found at %s", _BICEP_TEMPLATE) + return None + + params = { + "baseName": {"value": req.base_name}, + "location": {"value": req.location}, + "principalId": {"value": principal_id}, + "principalType": {"value": principal_type}, + "deployFoundry": {"value": req.deploy_foundry}, + "models": {"value": req.models}, + "deployKeyVault": {"value": req.deploy_key_vault}, + "runtimeSpObjectId": {"value": runtime_sp_object_id}, + "deployAcs": {"value": req.deploy_acs}, + "acsDataLocation": {"value": req.acs_data_location}, + "deployContentSafety": {"value": req.deploy_content_safety}, + "deploySearch": {"value": req.deploy_search}, + "deployEmbeddingAoai": {"value": req.deploy_embedding_aoai}, + "embeddingModelName": {"value": req.embedding_model_name}, + "embeddingModelVersion": {"value": req.embedding_model_version}, + "deployMonitoring": {"value": req.deploy_monitoring}, + "deploySessionPool": {"value": req.deploy_session_pool}, + } + params_json = json.dumps(params) + + deploy_name = "polyclaw-%s" % req.base_name + + logger.info( + "[bicep.deploy] running: rg=%s base=%s models=%d kv=%s acs=%s", + req.resource_group, req.base_name, len(req.models), + req.deploy_key_vault, req.deploy_acs, + ) + + # Run the deployment — use --name so we can query it afterwards. + result = self._az.json( + "deployment", "group", "create", + "--resource-group", req.resource_group, + "--name", deploy_name, + "--template-file", str(_BICEP_TEMPLATE), + "--parameters", params_json, + ) + + # If the create command failed, it may be an Azure CLI response-parsing + # bug (e.g. "The content for this response was already consumed" in + # az 2.77.0). Check if the deployment actually succeeded by querying it. + if result is None: + stderr = self._az.last_stderr + logger.warning( + "[bicep.deploy] create returned None; checking deployment status: %s", + stderr[:200], + ) + result = self._az.json( + "deployment", "group", "show", + "--resource-group", req.resource_group, + "--name", deploy_name, + "--query", "properties.outputs", + quiet=True, + ) + if result is None: + steps.append({"step": "bicep_deploy", "status": "failed", + "detail": stderr[:500]}) + logger.error("Bicep deployment failed: %s", stderr) + return None + + logger.info("[bicep.deploy] deployment found via fallback query") + else: + # Extract outputs from the inline response + if isinstance(result, dict): + result = result.get("properties", result).get("outputs", result) + + steps.append({"step": "bicep_deploy", "status": "ok", + "detail": "Deployment succeeded"}) + return result if isinstance(result, dict) else {} + + def _persist( + self, + req: BicepDeployRequest, + result: BicepDeployResult, + deploy_id: str, + steps: list[dict], + runtime_sp: dict[str, str] | None = None, + ) -> None: + """Write deployment outputs to .env and the deploy state store.""" + env_vars: dict[str, str] = {} + + if result.foundry_endpoint: + env_vars.update({ + "FOUNDRY_ENDPOINT": result.foundry_endpoint, + "FOUNDRY_NAME": result.foundry_name, + "FOUNDRY_RESOURCE_GROUP": req.resource_group, + "COPILOT_MODEL": ( + result.deployed_models[0] if result.deployed_models else "gpt-4.1" + ), + "DEPLOYED_MODELS": ",".join(result.deployed_models), + }) + if result.key_vault_url: + env_vars.update({ + "KEY_VAULT_URL": result.key_vault_url, + "KEY_VAULT_NAME": result.key_vault_name, + "KEY_VAULT_RG": req.resource_group, + }) + if runtime_sp: + env_vars.update({ + "RUNTIME_SP_APP_ID": runtime_sp["app_id"], + "RUNTIME_SP_PASSWORD": runtime_sp["password"], + "RUNTIME_SP_TENANT": runtime_sp["tenant"], + }) + if result.content_safety_endpoint: + env_vars.update({ + "CONTENT_SAFETY_ENDPOINT": result.content_safety_endpoint, + "CONTENT_SAFETY_NAME": result.content_safety_name, + }) + if result.search_endpoint: + env_vars.update({ + "SEARCH_ENDPOINT": result.search_endpoint, + "SEARCH_NAME": result.search_name, + }) + if result.embedding_aoai_endpoint: + env_vars.update({ + "EMBEDDING_AOAI_ENDPOINT": result.embedding_aoai_endpoint, + "EMBEDDING_AOAI_NAME": result.embedding_aoai_name, + "EMBEDDING_DEPLOYMENT_NAME": result.embedding_deployment_name, + }) + if result.app_insights_connection_string: + env_vars.update({ + "APP_INSIGHTS_CONNECTION_STRING": result.app_insights_connection_string, + "APP_INSIGHTS_NAME": result.app_insights_name, + "LOG_ANALYTICS_WORKSPACE_NAME": result.log_analytics_workspace_name, + }) + if result.session_pool_endpoint: + env_vars.update({ + "SESSION_POOL_ENDPOINT": result.session_pool_endpoint, + "SESSION_POOL_ID": result.session_pool_id, + "SESSION_POOL_NAME": result.session_pool_name, + }) + if result.acs_name: + env_vars["ACS_RESOURCE_NAME"] = result.acs_name + + if env_vars: + cfg.write_env(**env_vars) + steps.append({"step": "persist_env", "status": "ok"}) + + # Auto-configure feature stores from deployment outputs + self._configure_stores(req, result, steps) + + rec = DeploymentRecord( + deploy_id=deploy_id, + kind="local", + status="active", + resource_groups=[req.resource_group], + ) + rec.resources = [] + + _RESOURCE_MAP: list[tuple[bool, str, str, str]] = [ + (bool(result.foundry_name), + "Microsoft.CognitiveServices/accounts", + result.foundry_name, "Foundry AI Services"), + (bool(result.key_vault_name), + "Microsoft.KeyVault/vaults", + result.key_vault_name, "Key Vault"), + (bool(result.acs_name), + "Microsoft.Communication/communicationServices", + result.acs_name, "Communication Services"), + (bool(result.content_safety_name), + "Microsoft.CognitiveServices/accounts", + result.content_safety_name, "Content Safety"), + (bool(result.search_name), + "Microsoft.Search/searchServices", + result.search_name, "Azure AI Search"), + (bool(result.embedding_aoai_name), + "Microsoft.CognitiveServices/accounts", + result.embedding_aoai_name, "Embedding Azure OpenAI"), + (bool(result.app_insights_name), + "Microsoft.Insights/components", + result.app_insights_name, "Application Insights"), + (bool(result.log_analytics_workspace_name), + "Microsoft.OperationalInsights/workspaces", + result.log_analytics_workspace_name, "Log Analytics Workspace"), + (bool(result.session_pool_name), + "Microsoft.App/sessionPools", + result.session_pool_name, "Session Pool"), + ] + for enabled, rtype, rname, purpose in _RESOURCE_MAP: + if enabled: + rec.resources.append(ResourceEntry( + resource_type=rtype, + resource_group=req.resource_group, + resource_name=rname, + purpose=purpose, + )) + + self._store.register(rec) + steps.append({"step": "persist_state", "status": "ok"}) + + def _configure_stores( + self, + req: BicepDeployRequest, + result: BicepDeployResult, + steps: list[dict], + ) -> None: + """Auto-configure feature JSON stores from Bicep outputs. + + After a one-click deploy the features should be immediately usable + without any manual configuration steps in the admin GUI. + """ + # -- Content Safety / Prompt Shields ---------------------------------- + if result.content_safety_endpoint: + try: + from ...state.guardrails.config import get_guardrails_config + gs = get_guardrails_config() + gs.set_content_safety_endpoint(result.content_safety_endpoint) + gs.set_filter_mode("prompt_shields") + steps.append({ + "step": "configure_content_safety", "status": "ok", + "detail": result.content_safety_endpoint, + }) + except Exception as exc: + logger.warning("[bicep.configure] content safety: %s", exc, exc_info=True) + steps.append({ + "step": "configure_content_safety", "status": "failed", + "detail": str(exc)[:200], + }) + + # -- Foundry IQ (Azure AI Search + Embedding) ------------------------- + if result.search_endpoint and result.embedding_aoai_endpoint: + try: + self._configure_foundry_iq(req, result, steps) + except Exception as exc: + logger.warning("[bicep.configure] foundry_iq: %s", exc, exc_info=True) + steps.append({ + "step": "configure_foundry_iq", "status": "failed", + "detail": str(exc)[:200], + }) + + # -- Monitoring (App Insights + OTel) --------------------------------- + if result.app_insights_connection_string: + try: + from ...state.monitoring_config import get_monitoring_config + account = self._az.account_info() + sub_id = account.get("id", "") if account else "" + ms = get_monitoring_config() + ms.set_provisioned_metadata( + app_insights_name=result.app_insights_name, + workspace_name=result.log_analytics_workspace_name, + resource_group=req.resource_group, + location=req.location, + connection_string=result.app_insights_connection_string, + subscription_id=sub_id, + ) + steps.append({ + "step": "configure_monitoring", "status": "ok", + "detail": result.app_insights_name, + }) + except Exception as exc: + logger.warning("[bicep.configure] monitoring: %s", exc, exc_info=True) + steps.append({ + "step": "configure_monitoring", "status": "failed", + "detail": str(exc)[:200], + }) + + # -- Sandbox (Session Pool) ------------------------------------------- + if result.session_pool_endpoint: + try: + from ...state.sandbox_config import get_sandbox_config + ss = get_sandbox_config() + ss.set_pool_metadata( + resource_group=req.resource_group, + location=req.location, + pool_name=result.session_pool_name, + pool_id=result.session_pool_id, + endpoint=result.session_pool_endpoint, + ) + steps.append({ + "step": "configure_session_pool", "status": "ok", + "detail": result.session_pool_name, + }) + except Exception as exc: + logger.warning("[bicep.configure] session pool: %s", exc, exc_info=True) + steps.append({ + "step": "configure_session_pool", "status": "failed", + "detail": str(exc)[:200], + }) + + # -- Voice / ACS ------------------------------------------------------- + if result.acs_name: + try: + from ...state.infra_config import get_infra_config + # Fetch the ACS connection string for voice calling + keys = self._az.json( + "communication", "list-key", + "--name", result.acs_name, + "--resource-group", req.resource_group, + quiet=True, + ) + conn_string = ( + keys.get("primaryConnectionString", "") + if isinstance(keys, dict) else "" + ) + infra = get_infra_config() + infra.save_voice_call( + acs_resource_name=result.acs_name, + acs_connection_string=conn_string, + resource_group=req.resource_group, + location=req.location, + ) + steps.append({ + "step": "configure_acs", "status": "ok", + "detail": result.acs_name, + }) + except Exception as exc: + logger.warning("[bicep.configure] acs: %s", exc, exc_info=True) + steps.append({ + "step": "configure_acs", "status": "failed", + "detail": str(exc)[:200], + }) + + def _configure_foundry_iq( + self, + req: BicepDeployRequest, + result: BicepDeployResult, + steps: list[dict], + ) -> None: + """Wire up Azure AI Search + Embedding AOAI for Foundry IQ.""" + from ...state.foundry_iq_config import get_foundry_iq_config + + # Managed-identity auth is preferred (Bicep assigns RBAC roles). + # API keys are only used as a fallback when local-auth is enabled. + search_key = "" + aoai_key = "" + + fiq = get_foundry_iq_config() + fiq.save( + resource_group=req.resource_group, + location=req.location, + search_resource_name=result.search_name, + openai_resource_name=result.embedding_aoai_name, + openai_deployment_name=result.embedding_deployment_name, + search_endpoint=result.search_endpoint, + search_api_key=search_key, + embedding_endpoint=result.embedding_aoai_endpoint, + embedding_api_key=aoai_key, + embedding_model=result.embedding_deployment_name, + embedding_dimensions=3072, + index_name="polyclaw-memories", + provisioned=True, + enabled=True, + ) + steps.append({ + "step": "configure_foundry_iq", "status": "ok", + "detail": "search=%s aoai=%s" % (result.search_name, result.embedding_aoai_name), + }) + + # Create the search index + try: + from ..foundry_iq import ensure_index + idx_result = ensure_index(fiq) + idx_ok = idx_result.get("status") == "ok" + steps.append({ + "step": "create_search_index", + "status": "ok" if idx_ok else "warning", + "detail": idx_result.get("detail", ""), + }) + except Exception as exc: + steps.append({ + "step": "create_search_index", "status": "warning", + "detail": str(exc)[:200], + }) diff --git a/app/runtime/services/foundry_iq.py b/app/runtime/services/foundry_iq.py index 3760fb9..059f874 100644 --- a/app/runtime/services/foundry_iq.py +++ b/app/runtime/services/foundry_iq.py @@ -43,7 +43,19 @@ def _get_entra_token() -> str: def _search_headers(config: FoundryIQConfigStore) -> dict[str, str]: - return {"api-key": config.config.search_api_key, "Content-Type": "application/json"} + headers: dict[str, str] = {"Content-Type": "application/json"} + if config.config.search_api_key: + headers["api-key"] = config.config.search_api_key + else: + headers["Authorization"] = f"Bearer {_get_search_token()}" + return headers + + +def _get_search_token() -> str: + from azure.identity import DefaultAzureCredential # type: ignore[import-untyped] + + credential = DefaultAzureCredential() + return credential.get_token("https://search.azure.com/.default").token def _search_url(config: FoundryIQConfigStore, path: str) -> str: diff --git a/app/runtime/services/otel.py b/app/runtime/services/otel.py index 4997579..cd29744 100644 --- a/app/runtime/services/otel.py +++ b/app/runtime/services/otel.py @@ -166,11 +166,13 @@ def agent_span( from opentelemetry import trace tracer = trace.get_tracer(_TRACER_NAME) - with tracer.start_as_current_span(name, attributes=attributes) as span: - yield span except Exception: logger.debug("[otel.agent_span] Failed to create span %s", name, exc_info=True) yield None + return + + with tracer.start_as_current_span(name, attributes=attributes) as span: + yield span @contextmanager @@ -208,21 +210,6 @@ def invoke_agent_span( from opentelemetry.trace import SpanKind, StatusCode tracer = trace.get_tracer(_TRACER_NAME) - attrs: dict[str, Any] = {"gen_ai.agent.name": agent_name} - if model: - attrs["gen_ai.request.model"] = model - with tracer.start_as_current_span( - "invoke_agent", - kind=SpanKind.CLIENT, - attributes=attrs, - ) as span: - try: - yield span - except Exception as exc: - if span.is_recording(): - span.set_attribute("error.type", type(exc).__name__) - span.set_status(StatusCode.ERROR, str(exc)[:200]) - raise except Exception: logger.debug( "[otel.invoke_agent_span] Failed to create span for %s", @@ -230,6 +217,23 @@ def invoke_agent_span( exc_info=True, ) yield None + return + + attrs: dict[str, Any] = {"gen_ai.agent.name": agent_name} + if model: + attrs["gen_ai.request.model"] = model + with tracer.start_as_current_span( + "invoke_agent", + kind=SpanKind.CLIENT, + attributes=attrs, + ) as span: + try: + yield span + except Exception as exc: + if span.is_recording(): + span.set_attribute("error.type", type(exc).__name__) + span.set_status(StatusCode.ERROR, str(exc)[:200]) + raise def record_event(name: str, attributes: dict[str, Any] | None = None) -> None: diff --git a/app/runtime/services/security/preflight_secrets.py b/app/runtime/services/security/preflight_secrets.py index 152be4f..819f508 100644 --- a/app/runtime/services/security/preflight_secrets.py +++ b/app/runtime/services/security/preflight_secrets.py @@ -12,7 +12,6 @@ def run_secret_checks(result: PreflightResult) -> None: """Execute all secret-isolation checks.""" check_admin_cli_isolated(result) - check_no_github_in_runtime(result) check_bot_credentials(result) check_admin_secret(result) check_kv_reachable(result) @@ -74,53 +73,6 @@ def check_admin_cli_isolated(result: PreflightResult) -> None: ) -def check_no_github_in_runtime(result: PreflightResult) -> None: - env_data = cfg.env.read_all() - gh_token = env_data.get("GITHUB_TOKEN", "") - gh2 = env_data.get("GH_TOKEN", "") - mode = cfg.server_mode.value - - if mode == "runtime": - has = bool(gh_token or gh2) - _add( - result, id="secret_no_github_runtime", category="secrets", - name="No GitHub Token in Runtime", - status="fail" if has else "pass", - detail=( - "GitHub token NOT present in runtime environment" - if not has - else "RISK: GitHub token accessible in runtime env" - ), - evidence=( - f"GITHUB_TOKEN={'set (' + str(len(gh_token)) + ' chars)' if gh_token else 'empty'}\n" - f"GH_TOKEN={'set' if gh2 else 'empty'}" - ), - command="env: GITHUB_TOKEN, GH_TOKEN", - ) - elif mode == "admin": - has = bool(gh_token or gh2) - _add( - result, id="secret_no_github_runtime", category="secrets", - name="GitHub Token (Admin Only)", - status="pass", - detail=f"GitHub token on admin: {'present' if has else 'not configured'}", - evidence=( - f"GITHUB_TOKEN={'set' if gh_token else 'empty'}\n" - f"GH_TOKEN={'set' if gh2 else 'empty'}" - ), - command="env: GITHUB_TOKEN, GH_TOKEN", - ) - else: - _add( - result, id="secret_no_github_runtime", category="secrets", - name="GitHub Token Isolation", - status="warn", - detail="Combined mode -- GitHub token shared with agent runtime", - evidence=f"POLYCLAW_SERVER_MODE={mode}", - command="cfg.server_mode + env", - ) - - def check_bot_credentials(result: PreflightResult) -> None: env_data = cfg.env.read_all() app_id = env_data.get("BOT_APP_ID", "") diff --git a/app/runtime/state/_base.py b/app/runtime/state/_base.py index 21df52b..8b02d0e 100644 --- a/app/runtime/state/_base.py +++ b/app/runtime/state/_base.py @@ -72,12 +72,15 @@ def _load(self) -> None: def _apply_raw(self, raw: dict[str, Any]) -> None: """Populate config fields from a raw JSON dict. - Default implementation sets every dataclass field found in *raw*. - Override for custom deserialisation (e.g. secret resolution). + Default implementation sets every dataclass field found in *raw*, + resolving secret references for fields listed in ``_SECRET_FIELDS``. """ for field_name in self._config_type.__dataclass_fields__: if field_name in raw: - setattr(self._config, field_name, raw[field_name]) + value = raw[field_name] + if field_name in self._SECRET_FIELDS and isinstance(value, str): + value = self._resolve_secret(value) + setattr(self._config, field_name, value) def _save(self) -> None: self._path.parent.mkdir(parents=True, exist_ok=True) @@ -86,10 +89,13 @@ def _save(self) -> None: def _save_data(self) -> dict[str, Any]: """Return the data dict to serialise. - Default implementation returns ``dataclasses.asdict(self._config)``. - Override for custom serialisation (e.g. secret storage). + Default implementation returns ``dataclasses.asdict(self._config)``, + storing secrets via Key Vault when ``_SECRET_FIELDS`` is non-empty. """ - return asdict(self._config) + data = asdict(self._config) + if self._SECRET_FIELDS: + data = self._store_secrets(data) + return data # -- secret helpers ---------------------------------------------------- diff --git a/app/runtime/state/guardrails/risk.py b/app/runtime/state/guardrails/risk.py index 3485696..9dd059e 100644 --- a/app/runtime/state/guardrails/risk.py +++ b/app/runtime/state/guardrails/risk.py @@ -5,22 +5,17 @@ from typing import Any # ── Model tiers ────────────────────────────────────────────────────────── -# Tier 1 (cautious): large frontier models -- most access, highest risk posture +# Tier 1 (safe): large frontier models -- most permissive guardrails # Tier 2 (standard): capable mid-range models -# Tier 3 (safe): smaller / older models -- least access, lowest risk posture +# Tier 3 (cautious): smaller / older models -- most restrictive guardrails _MODEL_TIERS: dict[str, int] = { - # Tier 1 -- cautious (most permissive, highest risk) - "gpt-5.3-codex": 1, - "claude-opus-4.6": 1, - "claude-opus-4.6-fast": 1, + # Tier 1 -- safe (most permissive) + "gpt-5": 1, # Tier 2 -- standard - "claude-sonnet-4.6": 2, - "gpt-5.2": 2, - "gemini-3-pro-preview": 2, - # Tier 3 -- safe (most restrictive, lowest risk) + "gpt-4.1": 2, + # Tier 3 -- cautious (most restrictive) "gpt-5-mini": 3, - "gpt-4.1": 3, } _DEFAULT_TIER = 3 # Unknown models get the most restrictive tier diff --git a/app/runtime/state/guardrails_config.py b/app/runtime/state/guardrails_config.py deleted file mode 100644 index f47356f..0000000 --- a/app/runtime/state/guardrails_config.py +++ /dev/null @@ -1,500 +0,0 @@ -"""Guardrails configuration -- HITL approval rules for tools and MCP servers.""" - -from __future__ import annotations - -import json -import logging -from dataclasses import asdict -from pathlib import Path -from typing import Any - -from ..agent.policy_bridge import ( - build_engine, - config_to_yaml, - make_eval_context, - validate_yaml, - yaml_to_config, -) -from ..config.settings import cfg - -from .guardrails_bulk import ( - apply_model_defaults_to_config, - apply_preset_to_config, - set_all_strategies_on_config, -) - -# Re-export public symbols so existing imports keep working. -from .guardrails_models import GuardrailRule, GuardrailsConfig, _VALID_STRATEGIES -from .guardrails_presets import ( - PRESET_BALANCED, - PRESET_PERMISSIVE, - PRESET_RESTRICTIVE, - _ALL_PRESET_TOOL_IDS, - _build_preset_policies, - list_background_agents, - list_presets, -) -from .guardrails_risk import ( - _MODEL_TIERS, - _risk_of, - get_model_tier, - get_preset_for_model, - list_model_tiers, -) - -logger = logging.getLogger(__name__) - -_instance: GuardrailsConfigStore | None = None - - -class GuardrailsConfigStore: - """JSON-file-backed guardrails configuration. - - The store maintains both a JSON file (UI state, phone numbers, AITL - config, etc.) and a YAML policy file consumed by the agent-policy-guard - ``PolicyEngine``. Every mutation regenerates the YAML and rebuilds - the engine so that ``resolve_action()`` always reflects the latest - configuration. - """ - - def __init__(self, path: Path | None = None) -> None: - self._path = path or (cfg.data_dir / "guardrails.json") - self._policy_path = self._path.with_name("policy.yaml") - self._config = GuardrailsConfig() - self._engine = build_engine(self._generate_yaml()) - self._load() - - @property - def path(self) -> Path: - return self._path - - @property - def config(self) -> GuardrailsConfig: - return self._config - - @property - def hitl_enabled(self) -> bool: - return self._config.hitl_enabled - - @property - def default_action(self) -> str: - return self._config.default_action - - @property - def rules(self) -> list[GuardrailRule]: - return list(self._config.rules) - - def set_hitl_enabled(self, enabled: bool) -> None: - self._config.hitl_enabled = enabled - self._save() - - def set_default_action(self, action: str) -> None: - if action not in _VALID_STRATEGIES: - raise ValueError("action must be one of: %s" % ", ".join(sorted(_VALID_STRATEGIES))) - self._config.default_action = action - self._save() - - @property - def default_channel(self) -> str: - return self._config.default_channel - - @property - def phone_number(self) -> str: - return self._config.phone_number - - def set_default_channel(self, channel: str) -> None: - if channel not in ("chat", "phone"): - raise ValueError("channel must be 'chat' or 'phone'") - self._config.default_channel = channel - self._save() - - def set_phone_number(self, number: str) -> None: - self._config.phone_number = number - self._save() - - def set_aitl_model(self, model: str) -> None: - self._config.aitl_model = model - self._save() - - def set_aitl_spotlighting(self, enabled: bool) -> None: - self._config.aitl_spotlighting = enabled - self._save() - - def set_filter_mode(self, mode: str) -> None: - if mode != "prompt_shields": - raise ValueError("filter_mode must be 'prompt_shields'") - self._config.filter_mode = mode - self._save() - - def set_content_safety_endpoint(self, endpoint: str) -> None: - self._config.content_safety_endpoint = endpoint - self._save() - - def set_content_safety_key(self, key: str) -> None: - self._config.content_safety_key = key - self._save() - - def set_context_default(self, context: str, strategy: str) -> None: - if strategy not in _VALID_STRATEGIES: - raise ValueError("strategy must be one of: %s" % ", ".join(sorted(_VALID_STRATEGIES))) - self._config.context_defaults[context] = strategy - self._save() - - def remove_context_default(self, context: str) -> bool: - """Remove a context-level default, reverting to fallback resolution.""" - if context in self._config.context_defaults: - del self._config.context_defaults[context] - self._save() - return True - return False - - def set_tool_policy( - self, context: str, tool_id: str, strategy: str, - ) -> None: - if strategy not in _VALID_STRATEGIES: - raise ValueError("strategy must be one of: %s" % ", ".join(sorted(_VALID_STRATEGIES))) - if context not in self._config.tool_policies: - self._config.tool_policies[context] = {} - self._config.tool_policies[context][tool_id] = strategy - self._save() - - def remove_tool_policy(self, context: str, tool_id: str) -> bool: - policies = self._config.tool_policies.get(context, {}) - if tool_id in policies: - del policies[tool_id] - self._save() - return True - return False - - def add_model_column(self, model: str) -> None: - if model not in self._config.model_columns: - self._config.model_columns.append(model) - self._save() - - def remove_model_column(self, model: str) -> bool: - if model in self._config.model_columns: - self._config.model_columns.remove(model) - self._config.model_policies.pop(model, None) - self._save() - return True - return False - - def set_model_policy( - self, model: str, tool_id: str, strategy: str, context: str = "interactive", - ) -> None: - if strategy not in _VALID_STRATEGIES: - raise ValueError("strategy must be one of: %s" % ", ".join(sorted(_VALID_STRATEGIES))) - if model not in self._config.model_policies: - self._config.model_policies[model] = {} - if context not in self._config.model_policies[model]: - self._config.model_policies[model][context] = {} - self._config.model_policies[model][context][tool_id] = strategy - self._save() - - def remove_model_policy( - self, model: str, tool_id: str, context: str = "interactive", - ) -> bool: - ctx_policies = self._config.model_policies.get(model, {}).get(context, {}) - if tool_id in ctx_policies: - del ctx_policies[tool_id] - self._save() - return True - return False - - def apply_preset(self, preset: str, *, auto_models: bool = True) -> None: - """Apply a named preset to context_defaults and tool_policies.""" - apply_preset_to_config(self._config, preset, auto_models=auto_models) - self._save() - - def set_all_strategies(self, strategy: str) -> None: - """Set every tool policy and context default to *strategy*.""" - set_all_strategies_on_config(self._config, strategy) - self._save() - - def apply_model_defaults( - self, - models: list[str] | None = None, - *, - preset: str | None = None, - ) -> None: - """Auto-populate model columns with tier-appropriate policies.""" - apply_model_defaults_to_config(self._config, models, preset=preset) - self._save() - - def add_rule( - self, - *, - name: str, - pattern: str, - scope: str = "tool", - action: str = "ask", - enabled: bool = True, - description: str = "", - contexts: list[str] | None = None, - models: list[str] | None = None, - hitl_channel: str = "chat", - ) -> GuardrailRule: - if scope not in ("tool", "mcp"): - raise ValueError("scope must be 'tool' or 'mcp'") - if action not in _VALID_STRATEGIES: - raise ValueError("action must be one of: %s" % ", ".join(sorted(_VALID_STRATEGIES))) - if hitl_channel not in ("chat", "phone"): - raise ValueError("hitl_channel must be 'chat' or 'phone'") - rule = GuardrailRule( - name=name, - pattern=pattern, - scope=scope, - action=action, - enabled=enabled, - description=description, - contexts=contexts or [], - models=models or [], - hitl_channel=hitl_channel, - ) - self._config.rules.append(rule) - self._save() - return rule - - def update_rule(self, rule_id: str, **kwargs: Any) -> GuardrailRule | None: - for rule in self._config.rules: - if rule.id == rule_id: - for k, v in kwargs.items(): - if k == "id": - continue - if hasattr(rule, k): - setattr(rule, k, v) - self._save() - return rule - return None - - def remove_rule(self, rule_id: str) -> bool: - before = len(self._config.rules) - self._config.rules = [r for r in self._config.rules if r.id != rule_id] - if len(self._config.rules) < before: - self._save() - return True - return False - - def get_rule(self, rule_id: str) -> GuardrailRule | None: - for rule in self._config.rules: - if rule.id == rule_id: - return rule - return None - - def resolve_action( - self, - tool_name: str, - mcp_server: str | None = None, - execution_context: str = "", - model: str = "", - ) -> str: - """Determine the strategy for a given tool invocation. - - Delegates to the agent-policy-guard ``PolicyEngine`` which evaluates - the generated YAML policy set. The YAML encodes all context defaults, - tool policies, model policies, legacy rules, and background-agent - fallbacks. - - When ``hitl_enabled`` is ``False`` the engine already has ``allow`` - as its default effect and no policies are generated, so it returns - ``"allow"`` for every call. - """ - ctx = make_eval_context( - tool_name=tool_name, - mcp_server=mcp_server, - execution_context=execution_context, - model=model, - ) - result = self._engine.resolve(ctx) - logger.debug( - "[guardrails.resolve] engine result: tool=%s ctx=%s model=%s -> %s", - tool_name, execution_context, model, result, - ) - return result - - def resolve_channel( - self, - tool_name: str, - mcp_server: str | None = None, - execution_context: str = "", - model: str = "", - ) -> str: - """Determine the HITL channel for a tool invocation. - - Returns the ``hitl_channel`` of the first matching rule, or the - store-level ``default_channel``. - """ - if not self._config.hitl_enabled: - return "chat" - - for rule in self._config.rules: - if not rule.enabled: - continue - if rule.contexts and execution_context and execution_context not in rule.contexts: - continue - if rule.models and model: - if not any(self._matches(m, model) for m in rule.models): - continue - if rule.scope == "tool" and self._matches(rule.pattern, tool_name): - return rule.hitl_channel - if rule.scope == "mcp" and mcp_server and self._matches(rule.pattern, mcp_server): - return rule.hitl_channel - - return self._config.default_channel - - def to_dict(self) -> dict[str, Any]: - return { - # Frontend-canonical fields - "enabled": self._config.hitl_enabled, - "default_strategy": self._config.default_action, - "hitl_channel": self._config.default_channel, - "context_defaults": dict(self._config.context_defaults), - "tool_policies": { - ctx: dict(policies) - for ctx, policies in self._config.tool_policies.items() - }, - "model_columns": list(self._config.model_columns), - "model_policies": { - model: { - ctx: dict(tool_map) - for ctx, tool_map in ctx_policies.items() - } - for model, ctx_policies in self._config.model_policies.items() - }, - # Backend / legacy fields - "hitl_enabled": self._config.hitl_enabled, - "default_action": self._config.default_action, - "default_channel": self._config.default_channel, - "phone_number": self._config.phone_number, - "aitl_model": self._config.aitl_model, - "aitl_spotlighting": self._config.aitl_spotlighting, - "filter_mode": self._config.filter_mode, - "content_safety_endpoint": self._config.content_safety_endpoint, - "rules": [asdict(r) for r in self._config.rules], - } - - @staticmethod - def _matches(pattern: str, name: str) -> bool: - """Simple glob-style matching: '*' matches everything, prefix* matches prefix.""" - if pattern == "*": - return True - if pattern.endswith("*"): - return name.startswith(pattern[:-1]) - return pattern == name - - def _load(self) -> None: - if not self._path.exists(): - self._rebuild_engine() - return - try: - raw = json.loads(self._path.read_text()) - self._config = GuardrailsConfig( - hitl_enabled=raw.get("enabled", raw.get("hitl_enabled", False)), - default_action=raw.get("default_strategy", raw.get("default_action", "allow")), - default_channel=raw.get("hitl_channel", raw.get("default_channel", "chat")), - phone_number=raw.get("phone_number", ""), - aitl_model=raw.get("aitl_model", "gpt-4.1"), - aitl_spotlighting=raw.get("aitl_spotlighting", True), - filter_mode=raw.get("filter_mode", "prompt_shields"), - content_safety_endpoint=raw.get("content_safety_endpoint", ""), - content_safety_key=raw.get("content_safety_key", ""), - rules=[ - GuardrailRule(**{ - k: v for k, v in r.items() - if k in GuardrailRule.__dataclass_fields__ - }) - for r in raw.get("rules", []) - ], - context_defaults=raw.get("context_defaults", {}), - tool_policies=raw.get("tool_policies", {}), - model_columns=raw.get("model_columns", []), - model_policies=raw.get("model_policies", {}), - ) - self._rebuild_engine() - except Exception as exc: - logger.warning("Failed to load guardrails config from %s: %s", self._path, exc) - - @property - def policy_path(self) -> Path: - """Path to the generated policy YAML file.""" - return self._policy_path - - def get_policy_yaml(self) -> str: - """Return the current policy as a YAML string.""" - return self._generate_yaml() - - def set_policy_yaml(self, yaml_text: str) -> str | None: - """Apply a raw YAML policy, updating the config to match. - - Returns ``None`` on success or an error message string. - """ - error = validate_yaml(yaml_text) - if error: - return error - try: - parsed = yaml_to_config(yaml_text) - self._config.default_action = parsed["default_action"] - self._config.default_channel = parsed["default_channel"] - self._config.context_defaults = parsed["context_defaults"] - self._config.tool_policies = parsed["tool_policies"] - self._config.model_columns = parsed["model_columns"] - self._config.model_policies = parsed["model_policies"] - if parsed.get("rules"): - self._config.rules = [ - GuardrailRule(**{ - k: v for k, v in r.items() - if k in GuardrailRule.__dataclass_fields__ - }) - for r in parsed["rules"] - ] - self._save() - return None - except Exception as exc: - logger.warning("[guardrails] failed to apply YAML: %s", exc, exc_info=True) - return str(exc) - - def _generate_yaml(self) -> str: - """Generate a policy YAML string from the current config.""" - return config_to_yaml( - hitl_enabled=self._config.hitl_enabled, - default_action=self._config.default_action, - default_channel=self._config.default_channel, - context_defaults=self._config.context_defaults, - tool_policies=self._config.tool_policies, - model_columns=self._config.model_columns, - model_policies=self._config.model_policies, - rules=[asdict(r) for r in self._config.rules], - ) - - def _rebuild_engine(self) -> None: - """Rebuild the PolicyEngine from the current config.""" - yaml_text = self._generate_yaml() - self._engine = build_engine(yaml_text) - # Write the YAML file alongside the JSON for reference / expert mode - try: - self._policy_path.write_text(yaml_text) - except Exception as exc: - logger.warning("[guardrails] failed to write policy.yaml: %s", exc) - - def _save(self) -> None: - self._path.parent.mkdir(parents=True, exist_ok=True) - self._path.write_text(json.dumps(self.to_dict(), indent=2) + "\n") - self._rebuild_engine() - - -def get_guardrails_config(path: Path | None = None) -> GuardrailsConfigStore: - """Module-level singleton accessor.""" - global _instance - if _instance is None: - _instance = GuardrailsConfigStore(path) - return _instance - - -def _reset_guardrails_config() -> None: - global _instance - _instance = None - - -from ..util.singletons import register_singleton # noqa: E402 - -register_singleton(_reset_guardrails_config) diff --git a/app/runtime/state/infra_config.py b/app/runtime/state/infra_config.py index 7ee2a17..8c9d625 100644 --- a/app/runtime/state/infra_config.py +++ b/app/runtime/state/infra_config.py @@ -155,3 +155,25 @@ def _mask_secrets(self, d: dict[str, Any]) -> dict[str, Any]: k: ("****" if k in self._SECRET_FIELDS and v else v) for k, v in d.items() } + + +# -- singleton ------------------------------------------------------------- + +_instance: InfraConfigStore | None = None + + +def get_infra_config() -> InfraConfigStore: + global _instance + if _instance is None: + _instance = InfraConfigStore() + return _instance + + +def _reset_infra_config() -> None: + global _instance + _instance = None + + +from ..util.singletons import register_singleton # noqa: E402 + +register_singleton(_reset_infra_config) diff --git a/app/runtime/state/monitoring_config.py b/app/runtime/state/monitoring_config.py index 627e59a..f898f16 100644 --- a/app/runtime/state/monitoring_config.py +++ b/app/runtime/state/monitoring_config.py @@ -144,3 +144,25 @@ def to_dict_full(self) -> dict[str, Any]: return asdict(self._config) +# -- singleton ------------------------------------------------------------- + +_instance: MonitoringConfigStore | None = None + + +def get_monitoring_config() -> MonitoringConfigStore: + global _instance + if _instance is None: + _instance = MonitoringConfigStore() + return _instance + + +def _reset_monitoring_config() -> None: + global _instance + _instance = None + + +from ..util.singletons import register_singleton # noqa: E402 + +register_singleton(_reset_monitoring_config) + + diff --git a/app/runtime/state/sandbox_config.py b/app/runtime/state/sandbox_config.py index 4113bae..6d46b3a 100644 --- a/app/runtime/state/sandbox_config.py +++ b/app/runtime/state/sandbox_config.py @@ -142,3 +142,25 @@ def update(self, **kwargs: Any) -> None: self._save() +# -- singleton ------------------------------------------------------------- + +_instance: SandboxConfigStore | None = None + + +def get_sandbox_config() -> SandboxConfigStore: + global _instance + if _instance is None: + _instance = SandboxConfigStore() + return _instance + + +def _reset_sandbox_config() -> None: + global _instance + _instance = None + + +from ..util.singletons import register_singleton # noqa: E402 + +register_singleton(_reset_sandbox_config) + + diff --git a/app/runtime/tests/conftest.py b/app/runtime/tests/conftest.py index 38054c8..66f9a3d 100644 --- a/app/runtime/tests/conftest.py +++ b/app/runtime/tests/conftest.py @@ -9,17 +9,25 @@ @pytest.fixture(autouse=True) -def _isolate_data_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: +def _isolate_data_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, request) -> Path: + # E2E tests drive Docker containers externally -- skip isolation. + if any(m.name == "e2e_setup" for m in request.node.iter_markers()): + yield tmp_path + return data_dir = tmp_path / "data" data_dir.mkdir() monkeypatch.setenv("POLYCLAW_DATA_DIR", str(data_dir)) monkeypatch.setenv("POLYCLAW_PROJECT_ROOT", str(tmp_path)) monkeypatch.setenv("DOTENV_PATH", str(tmp_path / ".env")) - return data_dir + yield data_dir @pytest.fixture(autouse=True) -def _reset_singletons(_isolate_data_dir: Path): +def _reset_singletons(_isolate_data_dir: Path, request): + # E2E tests drive Docker containers externally -- skip singleton reset. + if any(m.name == "e2e_setup" for m in request.node.iter_markers()): + yield + return from app.runtime.util.singletons import reset_all_singletons reset_all_singletons() diff --git a/app/runtime/tests/test_bicep_deploy.py b/app/runtime/tests/test_bicep_deploy.py new file mode 100644 index 0000000..506ba63 --- /dev/null +++ b/app/runtime/tests/test_bicep_deploy.py @@ -0,0 +1,437 @@ +"""Tests for the Bicep-based infrastructure deployer.""" + +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from app.runtime.services.deployment.bicep_deployer import ( + BicepDeployer, + BicepDeployRequest, + BicepDeployResult, + _BICEP_TEMPLATE, + _ObservableSteps, +) +from app.runtime.state.deploy_state import DeployStateStore + + +class TestBicepDeployRequest: + """BicepDeployRequest defaults and auto-naming.""" + + def test_default_models(self) -> None: + req = BicepDeployRequest() + assert len(req.models) == 3 + names = [m["name"] for m in req.models] + assert "gpt-4.1" in names + assert "gpt-5" in names + assert "gpt-5-mini" in names + + def test_auto_generates_base_name(self) -> None: + req = BicepDeployRequest() + assert req.base_name.startswith("polyclaw-") + assert len(req.base_name) == len("polyclaw-") + 8 + + def test_explicit_base_name(self) -> None: + req = BicepDeployRequest(base_name="my-custom-name") + assert req.base_name == "my-custom-name" + + def test_default_location(self) -> None: + req = BicepDeployRequest() + assert req.location == "eastus" + + def test_default_resource_group(self) -> None: + req = BicepDeployRequest() + assert req.resource_group == "polyclaw-rg" + + +class TestBicepDeployer: + """Unit tests for BicepDeployer with mocked AzureCLI.""" + + def _make_deployer(self) -> tuple[BicepDeployer, MagicMock, DeployStateStore]: + az = MagicMock() + az.last_stderr = "" + store = DeployStateStore() + deployer = BicepDeployer(az, store) + return deployer, az, store + + def test_deploy_succeeds(self) -> None: + deployer, az, store = self._make_deployer() + + # Mock: RG exists + az.json.side_effect = self._az_json_router({ + ("group", "show"): {"name": "polyclaw-rg"}, + ("ad", "signed-in-user"): {"id": "user-oid-123", "userPrincipalName": "user@test.com"}, + ("ad", "sp", "create-for-rbac"): {"appId": "sp-app-id", "password": "sp-pw", "tenant": "sp-tenant"}, + ("ad", "sp", "show"): {"id": "sp-object-id"}, + ("deployment", "group"): { + "foundryEndpoint": {"value": "https://myai.openai.azure.com/"}, + "foundryName": {"value": "myai"}, + "foundryResourceId": {"value": "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.CognitiveServices/accounts/myai"}, + "deployedModels": {"value": ["gpt-4.1", "gpt-5", "gpt-5-mini"]}, + "keyVaultUrl": {"value": "https://myai-kv.vault.azure.net/"}, + "keyVaultName": {"value": "myai-kv"}, + "acsName": {"value": ""}, + "acsResourceId": {"value": ""}, + }, + }) + az.account_info.return_value = {"id": "sub-123", "user": {"name": "user@test.com"}, "name": "MySub"} + + req = BicepDeployRequest(base_name="myai", resource_group="polyclaw-rg") + result = deployer.deploy(req) + + assert result.ok is True + assert result.foundry_endpoint == "https://myai.openai.azure.com/" + assert result.foundry_name == "myai" + assert result.deployed_models == ["gpt-4.1", "gpt-5", "gpt-5-mini"] + assert result.key_vault_url == "https://myai-kv.vault.azure.net/" + assert result.deploy_id != "" + + # Verify deploy state was recorded + assert len(store.summary()) >= 1 + + def test_deploy_fails_on_rg_creation(self) -> None: + deployer, az, _store = self._make_deployer() + + # Mock: RG does not exist and creation fails + az.json.return_value = None + az.last_stderr = "subscription not found" + + req = BicepDeployRequest(base_name="test") + result = deployer.deploy(req) + + assert result.ok is False + assert "Resource group creation failed" in result.error + + def test_deploy_fails_without_principal(self) -> None: + deployer, az, _store = self._make_deployer() + + # RG exists but principal resolution fails + def _side_effect(*args, **kwargs): + if args and args[0] == "group": + return {"name": "polyclaw-rg"} + return None + az.json.side_effect = _side_effect + az.account_info.return_value = None + + req = BicepDeployRequest(base_name="test") + result = deployer.deploy(req) + + assert result.ok is False + assert "principal" in result.error.lower() + + def test_deploy_fails_on_bicep_error(self) -> None: + deployer, az, _store = self._make_deployer() + + call_count = 0 + def _side_effect(*args, **kwargs): + nonlocal call_count + call_count += 1 + if args and args[0] == "group": + return {"name": "polyclaw-rg"} + if args and args[0] == "ad" and "signed-in-user" in args: + return {"id": "user-oid", "userPrincipalName": "u@t.com"} + if args and args[0] == "ad" and "create-for-rbac" in args: + return {"appId": "sp-id", "password": "pw", "tenant": "t"} + if args and args[0] == "ad" and "show" in args: + return {"id": "sp-oid"} + # Bicep deployment fails + return None + + az.json.side_effect = _side_effect + az.account_info.return_value = {"id": "sub-1", "user": {"name": "u@t.com"}, "name": "MySub"} + az.last_stderr = "InvalidTemplate" + + req = BicepDeployRequest(base_name="test") + result = deployer.deploy(req) + + assert result.ok is False + assert "Bicep deployment failed" in result.error + + def test_status_returns_env_values(self) -> None: + deployer, _az, _store = self._make_deployer() + status = deployer.status() + assert "deployed" in status + assert "foundry_endpoint" in status + + def test_decommission_no_rg(self) -> None: + deployer, _az, _store = self._make_deployer() + with patch("app.runtime.services.deployment.bicep_deployer.cfg") as mock_cfg: + mock_cfg.env.read.return_value = "" + steps = deployer.decommission("") + assert steps[0]["status"] == "skip" + + def test_decommission_deletes_rg(self) -> None: + deployer, az, _store = self._make_deployer() + az.ok.return_value = (True, "") + + with patch("app.runtime.services.deployment.bicep_deployer.cfg") as mock_cfg: + mock_cfg.env.read.return_value = "" + mock_cfg.write_env = MagicMock() + steps = deployer.decommission("polyclaw-rg") + + assert any(s["step"] == "delete_resource_group" and s["status"] == "ok" for s in steps) + + def test_ensure_runtime_sp_creates_new(self) -> None: + """First deploy must create a runtime SP.""" + deployer, az, _store = self._make_deployer() + + with patch("app.runtime.services.deployment.bicep_deployer.cfg") as mock_cfg: + mock_cfg.env.read.return_value = "" + + az.account_info.return_value = {"id": "sub-123"} + az.json.side_effect = self._az_json_router({ + ("ad", "sp", "create-for-rbac"): { + "appId": "new-sp-id", "password": "new-sp-pw", "tenant": "my-tenant", + }, + ("ad", "sp", "show"): {"id": "sp-object-id-123"}, + }) + + req = BicepDeployRequest(base_name="test", resource_group="rg") + result = deployer._ensure_runtime_sp(req, []) + + assert result is not None + assert result["app_id"] == "new-sp-id" + assert result["password"] == "new-sp-pw" + assert result["tenant"] == "my-tenant" + assert result["object_id"] == "sp-object-id-123" + + def test_ensure_runtime_sp_reuses_existing(self) -> None: + """Existing valid SP must be reused (no new create-for-rbac).""" + deployer, az, _store = self._make_deployer() + + with patch("app.runtime.services.deployment.bicep_deployer.cfg") as mock_cfg: + mock_cfg.env.read.side_effect = lambda k: { + "RUNTIME_SP_APP_ID": "existing-id", + "RUNTIME_SP_PASSWORD": "existing-pw", + "RUNTIME_SP_TENANT": "existing-tenant", + }.get(k, "") + + az.json.side_effect = self._az_json_router({ + ("ad", "sp", "show"): {"id": "existing-oid"}, + }) + + req = BicepDeployRequest(base_name="test", resource_group="rg") + result = deployer._ensure_runtime_sp(req, []) + + assert result is not None + assert result["app_id"] == "existing-id" + assert result["object_id"] == "existing-oid" + # create-for-rbac should NOT have been called + for call in az.json.call_args_list: + assert "create-for-rbac" not in call[0] + + # -- helpers ----------------------------------------------------------- + + @staticmethod + def _az_json_router(routes: dict) -> callable: + """Create a side_effect function that routes az.json calls.""" + def _route(*args, **kwargs): + for key, value in routes.items(): + if all(k in args for k in key): + return value + return None + return _route + + +class TestBicepTemplate: + """Verify the Bicep template file exists and is syntactically valid.""" + + def test_template_exists(self) -> None: + assert _BICEP_TEMPLATE.exists(), f"Bicep template not found at {_BICEP_TEMPLATE}" + + def test_template_has_required_params(self) -> None: + content = _BICEP_TEMPLATE.read_text() + assert "param baseName" in content + assert "param location" in content + assert "param principalId" in content + assert "param models" in content + + def test_template_has_required_outputs(self) -> None: + content = _BICEP_TEMPLATE.read_text() + assert "output foundryEndpoint" in content + assert "output foundryName" in content + assert "output deployedModels" in content + assert "output keyVaultUrl" in content + + def test_template_creates_ai_services(self) -> None: + content = _BICEP_TEMPLATE.read_text() + assert "Microsoft.CognitiveServices/accounts" in content + assert "AIServices" in content + + def test_template_creates_rbac(self) -> None: + content = _BICEP_TEMPLATE.read_text() + assert "Microsoft.Authorization/roleAssignments" in content + # Cognitive Services OpenAI User role ID + assert "5e0bd9bd-7b93-4f28-af87-19fc36ad61bd" in content + + +class TestBYOKProvider: + """Tests for the BYOK provider configuration builder.""" + + def test_no_provider_without_endpoint(self) -> None: + from app.runtime.agent.byok import build_provider_config + + with patch("app.runtime.agent.byok.cfg") as mock_cfg: + mock_cfg.foundry_endpoint = "" + result = build_provider_config() + assert result is None + + def test_session_overrides_empty_without_endpoint(self) -> None: + from app.runtime.agent.byok import build_session_overrides + + with patch("app.runtime.agent.byok.cfg") as mock_cfg: + mock_cfg.foundry_endpoint = "" + result = build_session_overrides() + assert result == {} + + @patch("app.runtime.agent.byok.get_bearer_token") + def test_provider_config_with_endpoint(self, mock_token: MagicMock) -> None: + from app.runtime.agent.byok import build_provider_config + + mock_token.return_value = "test-token-123" + with patch("app.runtime.agent.byok.cfg") as mock_cfg: + mock_cfg.foundry_endpoint = "https://myai.openai.azure.com/" + result = build_provider_config() + + assert result is not None + assert result["type"] == "azure" + assert result["base_url"] == "https://myai.openai.azure.com" + assert result["bearer_token"] == "test-token-123" + assert "api_version" in result["azure"] + + @patch("app.runtime.agent.byok.get_bearer_token") + def test_session_overrides_with_endpoint(self, mock_token: MagicMock) -> None: + from app.runtime.agent.byok import build_session_overrides + + mock_token.return_value = "test-token-456" + with patch("app.runtime.agent.byok.cfg") as mock_cfg: + mock_cfg.foundry_endpoint = "https://myai.openai.azure.com/" + mock_cfg.copilot_model = "gpt-4.1" + result = build_session_overrides() + + assert "provider" in result + assert result["model"] == "gpt-4.1" + + @patch("app.runtime.agent.byok.get_bearer_token") + def test_provider_returns_none_without_token(self, mock_token: MagicMock) -> None: + from app.runtime.agent.byok import build_provider_config + + mock_token.return_value = "" + with patch("app.runtime.agent.byok.cfg") as mock_cfg: + mock_cfg.foundry_endpoint = "https://myai.openai.azure.com/" + result = build_provider_config() + + assert result is None + + +class TestModelPresets: + """Verify the simplified Foundry model presets.""" + + def test_foundry_models_exist(self) -> None: + from app.runtime.state.guardrails.risk import _MODEL_TIERS + + assert "gpt-4.1" in _MODEL_TIERS + assert "gpt-5" in _MODEL_TIERS + assert "gpt-5-mini" in _MODEL_TIERS + + def test_github_models_removed(self) -> None: + from app.runtime.state.guardrails.risk import _MODEL_TIERS + + assert "claude-sonnet-4.6" not in _MODEL_TIERS + assert "claude-opus-4.6" not in _MODEL_TIERS + assert "gpt-5.3-codex" not in _MODEL_TIERS + assert "gemini-3-pro-preview" not in _MODEL_TIERS + + def test_tier_assignment(self) -> None: + from app.runtime.state.guardrails.risk import get_model_tier + + assert get_model_tier("gpt-5") == 1 + assert get_model_tier("gpt-4.1") == 2 + assert get_model_tier("gpt-5-mini") == 3 + + def test_unknown_model_defaults_to_restrictive(self) -> None: + from app.runtime.state.guardrails.risk import get_model_tier + + assert get_model_tier("unknown-model") == 3 + + def test_list_model_tiers(self) -> None: + from app.runtime.state.guardrails.risk import list_model_tiers + + tiers = list_model_tiers() + assert len(tiers) == 3 + names = [t["model"] for t in tiers] + assert "gpt-4.1" in names + assert "gpt-5" in names + assert "gpt-5-mini" in names + + +class TestSettingsFoundry: + """Verify Foundry settings are loaded from env.""" + + def test_foundry_settings_default_empty(self) -> None: + from app.runtime.config.settings import Settings + + s = Settings() + assert s.foundry_endpoint == "" + assert s.foundry_name == "" + assert s.foundry_resource_group == "" + + def test_default_model_is_gpt41(self) -> None: + from app.runtime.config.settings import Settings + + s = Settings() + assert s.copilot_model == "gpt-4.1" + + def test_default_memory_model_is_gpt41(self) -> None: + from app.runtime.config.settings import Settings + + s = Settings() + assert s.memory_model == "gpt-4.1" + + +class TestObservableSteps: + """Tests for the _ObservableSteps callback list.""" + + def test_callback_fires_on_append(self) -> None: + received: list[dict] = [] + steps = _ObservableSteps(lambda s: received.append(s)) + steps.append({"step": "a", "status": "ok"}) + steps.append({"step": "b", "status": "failed"}) + assert len(received) == 2 + assert received[0]["step"] == "a" + assert received[1]["step"] == "b" + assert list(steps) == received + + def test_no_callback(self) -> None: + steps = _ObservableSteps(None) + steps.append({"step": "a", "status": "ok"}) + assert len(steps) == 1 + + def test_callback_exception_does_not_abort(self) -> None: + def bad_cb(_: dict) -> None: + raise RuntimeError("boom") + + steps = _ObservableSteps(bad_cb) + steps.append({"step": "a", "status": "ok"}) + assert len(steps) == 1 + + def test_deploy_with_on_step_callback(self) -> None: + """deploy() should invoke on_step for each step.""" + az = MagicMock() + az.last_stderr = "" + az.json.return_value = None # RG creation fails + store = DeployStateStore() + deployer = BicepDeployer(az, store) + + received: list[dict] = [] + req = BicepDeployRequest(base_name="test") + result = deployer.deploy(req, on_step=received.append) + + assert result.ok is False + # Steps should have been recorded via the callback + assert len(received) >= 1 + # The steps list on result should match + assert list(result.steps) == received diff --git a/app/runtime/tests/test_content_safety_routes.py b/app/runtime/tests/test_content_safety_routes.py index eea9fff..b0fdeeb 100644 --- a/app/runtime/tests/test_content_safety_routes.py +++ b/app/runtime/tests/test_content_safety_routes.py @@ -2,13 +2,14 @@ from __future__ import annotations -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest from aiohttp import web from aiohttp.test_utils import TestClient, TestServer from app.runtime.server.routes.content_safety_routes import ContentSafetyRoutes +from app.runtime.services.deployment.bicep_deployer import BicepDeployResult from app.runtime.services.security.prompt_shield import ShieldResult from app.runtime.state.guardrails import GuardrailsConfigStore @@ -74,10 +75,10 @@ async def test_deploy_no_az_returns_400(self, tmp_path) -> None: ) assert resp.status == 400 data = await resp.json() - assert "Azure CLI" in data["message"] + assert "not available" in data["message"].lower() @pytest.mark.asyncio - async def test_deploy_no_store_returns_500(self) -> None: + async def test_deploy_no_store_returns_error(self) -> None: az = MagicMock() routes = ContentSafetyRoutes(az=az, guardrails_store=None) app = _build_app(routes) @@ -86,38 +87,36 @@ async def test_deploy_no_store_returns_500(self) -> None: "/api/content-safety/deploy", json={"resource_name": "test-cs"}, ) - assert resp.status == 500 + assert resp.status == 400 data = await resp.json() - assert "store" in data["message"].lower() + assert "not available" in data["message"].lower() @pytest.mark.asyncio - @patch("app.runtime.server.routes.content_safety_routes.cfg") - async def test_deploy_success(self, mock_cfg, tmp_path) -> None: - """Full deploy flow: create, endpoint, RBAC, config updated.""" - mock_cfg.runtime_sp_app_id = "sp-app-id-1234" - mock_cfg.aca_mi_client_id = "" + @patch("app.runtime.server.routes.content_safety_routes.run_sync", new_callable=AsyncMock) + async def test_deploy_success(self, mock_run_sync, tmp_path) -> None: + """Full deploy flow via Bicep: returns endpoint, config updated.""" + result = BicepDeployResult( + ok=True, + deploy_id="test-deploy-id", + content_safety_endpoint="https://test-cs.cognitiveservices.azure.com/", + content_safety_name="test-cs", + steps=[ + {"step": "bicep_deploy", "status": "ok", "detail": "Deployed"}, + ], + ) + mock_run_sync.return_value = result az = MagicMock() - az.last_stderr = "" - # az.json calls: create, show, ad sp show (identity resolution) - az.json.side_effect = [ - {"id": "/sub/rg/res/test-cs", "name": "test-cs"}, - {"properties": {"endpoint": "https://test-cs.cognitiveservices.azure.com/"}}, - {"id": "sp-object-id-5678", "objectId": "sp-object-id-5678"}, - ] - az.ok.return_value = (True, "") - + deploy_store = MagicMock() store = GuardrailsConfigStore(tmp_path / "g.json") - routes = ContentSafetyRoutes(az=az, guardrails_store=store) + routes = ContentSafetyRoutes( + az=az, guardrails_store=store, deploy_store=deploy_store, + ) app = _build_app(routes) async with TestClient(TestServer(app)) as client: resp = await client.post( "/api/content-safety/deploy", - json={ - "resource_name": "test-cs", - "resource_group": "test-rg", - "location": "westus2", - }, + json={"resource_group": "test-rg", "location": "westus2"}, ) assert resp.status == 200 data = await resp.json() @@ -125,25 +124,9 @@ async def test_deploy_success(self, mock_cfg, tmp_path) -> None: assert data["endpoint"] == "https://test-cs.cognitiveservices.azure.com/" assert data["filter_mode"] == "prompt_shields" - # Verify all steps steps = {s["step"]: s["status"] for s in data["steps"]} - assert steps["create_resource"] == "ok" - assert steps["get_endpoint"] == "ok" - assert steps["rbac_assign"] == "ok" + assert steps["bicep_deploy"] == "ok" assert steps["update_config"] == "ok" - # No key-related steps - assert "get_key" not in steps - assert "store_key_kv" not in steps - - # Verify RBAC was assigned with correct args - ok_call = az.ok.call_args - assert "--role" in ok_call[0] - assert "--scope" in ok_call[0] - scope_idx = list(ok_call[0]).index("--scope") - assert ok_call[0][scope_idx + 1] == "/sub/rg/res/test-cs" - # Verify principal type is passed - type_idx = list(ok_call[0]).index("--assignee-principal-type") - assert ok_call[0][type_idx + 1] == "ServicePrincipal" # Verify guardrails config was updated assert store.config.content_safety_endpoint == ( @@ -152,105 +135,24 @@ async def test_deploy_success(self, mock_cfg, tmp_path) -> None: assert store.config.filter_mode == "prompt_shields" @pytest.mark.asyncio - @patch("app.runtime.server.routes.content_safety_routes.cfg") - async def test_deploy_rbac_skip_no_identity(self, mock_cfg, tmp_path) -> None: - """When no identity can be resolved, RBAC step warns.""" - mock_cfg.runtime_sp_app_id = "" - mock_cfg.aca_mi_client_id = "" - - az = MagicMock() - az.last_stderr = "" - az.json.side_effect = [ - {"id": "/sub/rg/res/test-cs", "name": "test-cs"}, - {"properties": {"endpoint": "https://test-cs.cognitiveservices.azure.com/"}}, - None, # signed-in-user show - ] - az.account_info.return_value = None - - store = GuardrailsConfigStore(tmp_path / "g.json") - routes = ContentSafetyRoutes(az=az, guardrails_store=store) - app = _build_app(routes) - async with TestClient(TestServer(app)) as client: - resp = await client.post( - "/api/content-safety/deploy", json={}, - ) - assert resp.status == 200 - data = await resp.json() - steps = {s["step"]: s for s in data["steps"]} - assert steps["rbac_assign"]["status"] == "warning" - assert "manually" in steps["rbac_assign"]["detail"] - - @pytest.mark.asyncio - @patch("app.runtime.server.routes.content_safety_routes.cfg") - async def test_deploy_rbac_with_managed_identity(self, mock_cfg, tmp_path) -> None: - """When ACA_MI_CLIENT_ID is set, RBAC is assigned to the MI.""" - mock_cfg.runtime_sp_app_id = "" - mock_cfg.aca_mi_client_id = "mi-client-id-abc" - - az = MagicMock() - az.last_stderr = "" - az.json.side_effect = [ - {"id": "/sub/rg/res/test-cs", "name": "test-cs"}, - {"properties": {"endpoint": "https://test-cs.cognitiveservices.azure.com/"}}, - {"id": "mi-object-id-xyz"}, # ad sp show for MI - ] - az.ok.return_value = (True, "") - - store = GuardrailsConfigStore(tmp_path / "g.json") - routes = ContentSafetyRoutes(az=az, guardrails_store=store) - app = _build_app(routes) - async with TestClient(TestServer(app)) as client: - resp = await client.post("/api/content-safety/deploy", json={}) - assert resp.status == 200 - data = await resp.json() - steps = {s["step"]: s for s in data["steps"]} - assert steps["rbac_assign"]["status"] == "ok" - - ok_call = az.ok.call_args - oid_idx = list(ok_call[0]).index("--assignee-object-id") - assert ok_call[0][oid_idx + 1] == "mi-object-id-xyz" - - @pytest.mark.asyncio - @patch("app.runtime.server.routes.content_safety_routes.cfg") - async def test_deploy_rbac_with_cli_user(self, mock_cfg, tmp_path) -> None: - """When no SP/MI, RBAC falls back to signed-in CLI user.""" - mock_cfg.runtime_sp_app_id = "" - mock_cfg.aca_mi_client_id = "" - - az = MagicMock() - az.last_stderr = "" - az.json.side_effect = [ - {"id": "/sub/rg/res/test-cs", "name": "test-cs"}, - {"properties": {"endpoint": "https://test-cs.cognitiveservices.azure.com/"}}, - {"id": "user-oid-1234"}, # signed-in-user show - ] - az.ok.return_value = (True, "") - - store = GuardrailsConfigStore(tmp_path / "g.json") - routes = ContentSafetyRoutes(az=az, guardrails_store=store) - app = _build_app(routes) - async with TestClient(TestServer(app)) as client: - resp = await client.post("/api/content-safety/deploy", json={}) - assert resp.status == 200 - data = await resp.json() - steps = {s["step"]: s for s in data["steps"]} - assert steps["rbac_assign"]["status"] == "ok" - - ok_call = az.ok.call_args - oid_idx = list(ok_call[0]).index("--assignee-object-id") - assert ok_call[0][oid_idx + 1] == "user-oid-1234" - type_idx = list(ok_call[0]).index("--assignee-principal-type") - assert ok_call[0][type_idx + 1] == "User" + @patch("app.runtime.server.routes.content_safety_routes.run_sync", new_callable=AsyncMock) + async def test_deploy_bicep_fails(self, mock_run_sync, tmp_path) -> None: + """When Bicep deployment fails, route returns 500 with steps.""" + result = BicepDeployResult( + ok=False, + error="Subscription not found", + steps=[ + {"step": "bicep_deploy", "status": "failed", "detail": "Subscription not found"}, + ], + ) + mock_run_sync.return_value = result - @pytest.mark.asyncio - async def test_deploy_create_fails(self, tmp_path) -> None: - """When resource creation fails, route returns 500 with steps.""" az = MagicMock() - az.last_stderr = "Subscription not found" - az.json.return_value = None - + deploy_store = MagicMock() store = GuardrailsConfigStore(tmp_path / "g.json") - routes = ContentSafetyRoutes(az=az, guardrails_store=store) + routes = ContentSafetyRoutes( + az=az, guardrails_store=store, deploy_store=deploy_store, + ) app = _build_app(routes) async with TestClient(TestServer(app)) as client: resp = await client.post( @@ -260,76 +162,45 @@ async def test_deploy_create_fails(self, tmp_path) -> None: assert resp.status == 500 data = await resp.json() assert data["status"] == "error" - steps = {s["step"]: s["status"] for s in data["steps"]} - assert steps["create_resource"] == "failed" + assert "Subscription not found" in data["message"] @pytest.mark.asyncio - @patch("app.runtime.server.routes.content_safety_routes.cfg") - async def test_deploy_resource_already_exists( - self, mock_cfg, tmp_path, - ) -> None: - """When resource already exists, the route reuses it.""" - mock_cfg.runtime_sp_app_id = "" - mock_cfg.aca_mi_client_id = "" + @patch("app.runtime.server.routes.content_safety_routes.run_sync", new_callable=AsyncMock) + async def test_deploy_no_endpoint_returns_error(self, mock_run_sync, tmp_path) -> None: + """When Bicep succeeds but no endpoint, returns 500.""" + result = BicepDeployResult( + ok=True, + deploy_id="test-deploy", + content_safety_endpoint="", + steps=[], + ) + mock_run_sync.return_value = result az = MagicMock() - az.last_stderr = "Conflict: resource already exists" - az.json.side_effect = [ - None, # create returns None (conflict) - {"id": "/sub/rg/existing-cs", "properties": { - "endpoint": "https://existing.cognitiveservices.azure.com/", - }}, - None, # signed-in-user show - ] - az.account_info.return_value = None - + deploy_store = MagicMock() store = GuardrailsConfigStore(tmp_path / "g.json") - routes = ContentSafetyRoutes(az=az, guardrails_store=store) + routes = ContentSafetyRoutes( + az=az, guardrails_store=store, deploy_store=deploy_store, + ) app = _build_app(routes) async with TestClient(TestServer(app)) as client: - resp = await client.post( - "/api/content-safety/deploy", - json={"resource_name": "existing-cs"}, - ) - assert resp.status == 200 - data = await resp.json() - assert data["status"] == "ok" - assert store.config.content_safety_endpoint == ( - "https://existing.cognitiveservices.azure.com/" - ) + resp = await client.post("/api/content-safety/deploy", json={}) + assert resp.status == 500 @pytest.mark.asyncio - @patch("app.runtime.server.routes.content_safety_routes.cfg") - async def test_deploy_uses_defaults(self, mock_cfg, tmp_path) -> None: - """When no parameters provided, defaults are used.""" - mock_cfg.runtime_sp_app_id = "" - mock_cfg.aca_mi_client_id = "" - + async def test_deploy_no_guardrails_store_returns_500(self, tmp_path) -> None: + """When az and deploy_store are present but no guardrails store.""" az = MagicMock() - az.last_stderr = "" - az.json.side_effect = [ - {"id": "/sub/rg/res"}, - {"properties": {"endpoint": "https://polyclaw-content-safety.cognitiveservices.azure.com/"}}, - None, # signed-in-user show - ] - az.account_info.return_value = None - - store = GuardrailsConfigStore(tmp_path / "g.json") - routes = ContentSafetyRoutes(az=az, guardrails_store=store) + deploy_store = MagicMock() + routes = ContentSafetyRoutes( + az=az, guardrails_store=None, deploy_store=deploy_store, + ) app = _build_app(routes) async with TestClient(TestServer(app)) as client: resp = await client.post("/api/content-safety/deploy", json={}) - assert resp.status == 200 - - # Check az was called with default values - create_call = az.json.call_args_list[0] - args = create_call[0] - assert "--kind" in args - idx = list(args).index("--kind") - assert args[idx + 1] == "ContentSafety" - assert "--name" in args - name_idx = list(args).index("--name") - assert args[name_idx + 1] == "polyclaw-content-safety" + assert resp.status == 500 + data = await resp.json() + assert "store" in data["message"].lower() class TestContentSafetyEnsureRbac: diff --git a/app/runtime/tests/test_e2e_aca_deploy.py b/app/runtime/tests/test_e2e_aca_deploy.py new file mode 100644 index 0000000..8d65d6f --- /dev/null +++ b/app/runtime/tests/test_e2e_aca_deploy.py @@ -0,0 +1,539 @@ +"""End-to-end ACA (Azure Container Apps) deployment test bench. + +Drives the admin API to: + 1. Verify local Docker stack prerequisites (Foundry deployed, bot configured). + 2. Push the container image to ACR. + 3. Provision Managed Identity + RBAC. + 4. Deploy the runtime as an Azure Container App. + 5. Verify the cloud-hosted runtime is reachable and chat works. + 6. Redeploy (idempotency). + 7. Destroy and verify cleanup. + +Usage:: + + pytest app/runtime/tests/test_e2e_aca_deploy.py --run-e2e-setup -s -v + +Requirements: + - Docker running locally with the ``polyclaw:latest`` image built. + - Active ``az login`` session. + - Prior successful run of test_e2e_setup_process.py (Foundry deployed). + - Sufficient Azure quota in the target region for ACA + ACR. + +Typical wall-clock: 10-20 min. +""" + +from __future__ import annotations + +import json +import logging +import os +import subprocess +import time +from pathlib import Path +from typing import Any + +import pytest + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_PROJECT_ROOT = Path(__file__).resolve().parents[3] +_ADMIN_CONTAINER = "polyclaw-admin" +_ADMIN_URL = "http://localhost:9090" +_BUILD_TIMEOUT = 900 +_BOOT_TIMEOUT = 120 +_DEPLOY_TIMEOUT = 600 +_ACA_HEALTH_TIMEOUT = 300 +_HEALTH_POLL = 5 +_API_TIMEOUT = 30 + +_RG = "polyclaw-e2e-aca-rg" +_LOCATION = "eastus" + + +# --------------------------------------------------------------------------- +# Shell / Docker / API helpers (shared with test_e2e_setup_process.py) +# --------------------------------------------------------------------------- + +def _run( + cmd: list[str], + *, + timeout: int = 60, + check: bool = True, + cwd: Path | None = None, +) -> subprocess.CompletedProcess[str]: + return subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + check=check, + cwd=cwd or _PROJECT_ROOT, + ) + + +def _compose(*args: str, timeout: int = 60) -> subprocess.CompletedProcess[str]: + return _run(["docker", "compose", *args], timeout=timeout) + + +def _api( + path: str, + *, + method: str = "GET", + body: dict | None = None, + secret: str = "", + timeout: int = _API_TIMEOUT, +) -> tuple[int, dict | None]: + url = f"{_ADMIN_URL}{path}" + cmd: list[str] = [ + "curl", "-s", "--max-time", str(timeout), + "-o", "/dev/stdout", "-w", "\n%{http_code}", + ] + if secret: + cmd += ["-H", f"Authorization: Bearer {secret}"] + if method == "POST": + cmd += ["-X", "POST", "-H", "Content-Type: application/json"] + cmd += ["-d", json.dumps(body) if body else "{}"] + elif method == "DELETE": + cmd += ["-X", "DELETE"] + cmd.append(url) + + try: + r = _run(cmd, check=False, timeout=timeout + 10) + except subprocess.TimeoutExpired: + return 0, None + + parts = r.stdout.rsplit("\n", 1) + if len(parts) < 2: + return 0, None + try: + status_code = int(parts[-1]) + except ValueError: + return 0, None + try: + data = json.loads(parts[0]) + except (json.JSONDecodeError, IndexError): + data = None + return status_code, data + + +def _api_ok( + path: str, + *, + method: str = "GET", + body: dict | None = None, + secret: str = "", + timeout: int = _API_TIMEOUT, + expected_status: int = 200, +) -> dict: + code, data = _api(path, method=method, body=body, secret=secret, timeout=timeout) + assert code == expected_status, ( + f"{method} {path} returned {code} (expected {expected_status}).\n" + f"Response: {json.dumps(data, indent=2) if data else ''}" + ) + assert data is not None, f"{method} {path} returned no JSON body" + return data + + +def _poll_health(timeout: float = _BOOT_TIMEOUT) -> dict | None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + r = _run( + ["curl", "-sf", "--max-time", "3", f"{_ADMIN_URL}/health"], + check=False, timeout=10, + ) + if r.returncode == 0 and r.stdout.strip(): + return json.loads(r.stdout) + except Exception: + pass + time.sleep(_HEALTH_POLL) + return None + + +def _copy_azure_creds() -> bool: + azure_dir = Path.home() / ".azure" + if not azure_dir.exists(): + return False + try: + _run( + ["docker", "exec", _ADMIN_CONTAINER, "mkdir", "-p", "/admin-home/.azure"], + timeout=10, + ) + _ESSENTIAL = [ + "azureProfile.json", + "msal_token_cache.json", + "msal_token_cache.bin", + "az.json", + "az.sess", + "clouds.config", + "config", + ] + copied = 0 + for name in _ESSENTIAL: + src = azure_dir / name + if src.exists(): + _run( + ["docker", "cp", str(src), f"{_ADMIN_CONTAINER}:/admin-home/.azure/{name}"], + timeout=30, + ) + copied += 1 + logger.info("Copied %d Azure auth files into admin container", copied) + return copied > 0 + except Exception as exc: + logger.error("Failed to copy Azure creds: %s", exc) + return False + + +def _get_admin_secret() -> str: + """Read the admin secret from the running container's /data/.env.""" + try: + r = _run( + ["docker", "exec", _ADMIN_CONTAINER, "sh", "-c", + "grep '^ADMIN_SECRET=' /data/.env | head -1 | cut -d= -f2"], + check=False, timeout=10, + ) + return r.stdout.strip().strip('"') + except Exception: + return "" + + +def _poll_aca_health(fqdn: str, timeout: float = _ACA_HEALTH_TIMEOUT) -> bool: + """Poll the ACA runtime health endpoint until it responds.""" + url = f"https://{fqdn}/health" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + r = _run( + ["curl", "-sf", "--max-time", "5", url], + check=False, timeout=10, + ) + if r.returncode == 0 and r.stdout.strip(): + logger.info("ACA runtime healthy: %s", r.stdout.strip()[:200]) + return True + except Exception: + pass + time.sleep(_HEALTH_POLL) + return False + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="module") +def admin_secret(): + """Discover or inject the admin secret from the running Docker stack.""" + # First check if the stack is already running + health = _poll_health(timeout=10) + if health: + secret = _get_admin_secret() + if secret: + return secret + + # Boot the stack + try: + _run(["docker", "info"], timeout=15) + except Exception: + pytest.skip("Docker not available") + + logger.info("Building Docker image ...") + try: + _compose("build", timeout=_BUILD_TIMEOUT) + except subprocess.CalledProcessError as exc: + pytest.fail(f"Docker build failed:\n{exc.stderr[:2000]}") + + logger.info("Starting Docker stack ...") + try: + _compose("up", "-d", timeout=60) + except subprocess.CalledProcessError as exc: + pytest.fail(f"docker compose up failed:\n{exc.stderr[:2000]}") + + health = _poll_health(timeout=_BOOT_TIMEOUT) + if not health: + pytest.fail("Admin container not healthy") + + # Inject a secret + secret = "e2e-aca-secret-" + os.urandom(8).hex() + try: + _run( + ["docker", "exec", _ADMIN_CONTAINER, "sh", "-c", + f'grep -q "^ADMIN_SECRET=" /data/.env 2>/dev/null ' + f'&& sed -i "s|^ADMIN_SECRET=.*|ADMIN_SECRET={secret}|" /data/.env ' + f'|| echo "ADMIN_SECRET={secret}" >> /data/.env'], + timeout=10, + ) + except Exception as exc: + pytest.fail(f"Failed to inject ADMIN_SECRET: {exc}") + + _compose("restart", timeout=60) + health = _poll_health(timeout=_BOOT_TIMEOUT) + if not health: + pytest.fail("Admin not healthy after restart") + + ok = _copy_azure_creds() + if not ok: + pytest.fail("Failed to copy Azure creds") + + time.sleep(35) + return secret + + +@pytest.fixture(scope="module") +def stack(admin_secret): + """Verify the local Docker stack is healthy and Azure-authenticated.""" + health = _poll_health(timeout=10) + assert health, "Admin container not healthy" + + # Verify Azure login + deadline = time.monotonic() + 60 + while time.monotonic() < deadline: + data = _api_ok("/api/setup/azure/check", secret=admin_secret) + if data.get("status") == "logged_in": + break + time.sleep(5) + assert data.get("status") == "logged_in", "Azure CLI not logged in" + return health + + +@pytest.fixture(scope="module") +def _aca_rg_cleanup(): + """Best-effort cleanup of the ACA resource group after all tests.""" + yield + logger.info("Cleaning up ACA RG %s ...", _RG) + try: + _run(["az", "group", "delete", "--name", _RG, "--yes", "--no-wait"], + check=False, timeout=30) + except Exception as exc: + logger.warning("ACA RG cleanup failed: %s", exc) + + +# =================================================================== +# PHASE 1: Pre-flight -- local stack prerequisites +# =================================================================== + +@pytest.mark.e2e_setup +class TestAcaPhase01Preflight: + """Verify local Docker stack prerequisites before ACA deployment.""" + + def test_local_stack_healthy(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + logger.info("Local status: %s", json.dumps(data, indent=2)[:500]) + + def test_foundry_deployed(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + if not data["foundry"]["deployed"]: + pytest.skip("Foundry not deployed -- run test_e2e_setup_process first") + logger.info("Foundry endpoint: %s", data["foundry"]["endpoint"]) + + def test_aca_status_initial(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/aca/status", secret=admin_secret) + logger.info("Initial ACA status: %s", json.dumps(data, indent=2)) + + def test_docker_image_exists(self, stack) -> None: + r = _run(["docker", "images", "polyclaw:latest", "--format", "{{.ID}}"], + check=False, timeout=10) + assert r.stdout.strip(), "polyclaw:latest image not found" + + +# =================================================================== +# PHASE 2: ACA deployment +# =================================================================== + +@pytest.mark.e2e_setup +class TestAcaPhase02Deploy: + """Deploy the runtime to Azure Container Apps.""" + + def test_deploy_aca(self, stack, admin_secret) -> None: + body = { + "resource_group": _RG, + "location": _LOCATION, + "display_name": "polyclaw-e2e-aca", + "admin_port": 9090, + "runtime_port": 8080, + "image_tag": "latest", + } + code, data = _api( + "/api/setup/aca/deploy", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + logger.info("ACA deploy: code=%d", code) + if data: + for step in data.get("steps", []): + logger.info( + " %s: %s %s", + step.get("step"), step.get("status"), + step.get("detail", "")[:200], + ) + assert code == 200, ( + f"ACA deploy returned {code}:\n" + f"{json.dumps(data, indent=2)[:2000] if data else ''}" + ) + assert data.get("status") == "ok", f"Deploy failed: {data.get('message')}" + assert data.get("runtime_fqdn"), "No runtime FQDN returned" + logger.info("ACA deployed: fqdn=%s", data["runtime_fqdn"]) + + def test_aca_status_deployed(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/aca/status", secret=admin_secret) + assert data.get("deployed"), f"ACA not showing as deployed: {data}" + assert data.get("runtime_fqdn") + assert data.get("acr_name") + logger.info("ACA status: %s", json.dumps(data, indent=2)) + + +# =================================================================== +# PHASE 3: Verify cloud runtime is alive +# =================================================================== + +@pytest.mark.e2e_setup +class TestAcaPhase03RuntimeHealth: + """Verify the deployed ACA runtime is reachable and healthy.""" + + def test_runtime_health(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/aca/status", secret=admin_secret) + fqdn = data.get("runtime_fqdn") + if not fqdn: + pytest.skip("ACA not deployed") + ok = _poll_aca_health(fqdn) + assert ok, f"ACA runtime at {fqdn} not healthy after {_ACA_HEALTH_TIMEOUT}s" + + def test_runtime_api_reachable(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/aca/status", secret=admin_secret) + fqdn = data.get("runtime_fqdn") + if not fqdn: + pytest.skip("ACA not deployed") + # Try to reach the /api/models endpoint on the cloud runtime + url = f"https://{fqdn}/api/models" + try: + r = _run(["curl", "-sf", "--max-time", "10", url], check=False, timeout=15) + logger.info("Cloud /api/models: rc=%d body=%s", r.returncode, r.stdout[:300]) + except Exception as exc: + logger.warning("Cloud API unreachable: %s", exc) + + +# =================================================================== +# PHASE 4: Chat via cloud runtime +# =================================================================== + +@pytest.mark.e2e_setup +class TestAcaPhase04Chat: + """Verify chat works through the ACA-deployed runtime.""" + + def test_chat_via_cloud(self, stack, admin_secret) -> None: + """Admin's copilot smoke-test should hit the cloud runtime URL.""" + # After ACA deploy, RUNTIME_URL should be set to https:// + code, data = _api( + "/api/setup/copilot/smoke-test", + method="POST", secret=admin_secret, timeout=120, + ) + logger.info("Cloud smoke test: code=%d data=%s", + code, json.dumps(data or {}, indent=2)[:1000]) + if code != 200: + logger.warning("Smoke test failed -- chat may not work through ACA yet") + + +# =================================================================== +# PHASE 5: Container restart +# =================================================================== + +@pytest.mark.e2e_setup +class TestAcaPhase05Restart: + """Verify container restart works on ACA.""" + + def test_restart_runtime(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/aca/status", secret=admin_secret) + if not data.get("deployed"): + pytest.skip("ACA not deployed") + code, resp = _api( + "/api/setup/container/restart", + method="POST", secret=admin_secret, timeout=120, + ) + logger.info("Container restart: code=%d data=%s", code, json.dumps(resp or {}, indent=2)) + if code == 200: + # Wait for runtime to come back + fqdn = data.get("runtime_fqdn") + if fqdn: + time.sleep(10) + ok = _poll_aca_health(fqdn, timeout=120) + assert ok, "Runtime not healthy after restart" + + +# =================================================================== +# PHASE 6: Idempotency -- redeploy +# =================================================================== + +@pytest.mark.e2e_setup +class TestAcaPhase06Idempotency: + """Redeploy ACA and verify stability.""" + + def test_redeploy_aca(self, stack, admin_secret) -> None: + # Get existing ACR and env names to reuse + status_data = _api_ok("/api/setup/aca/status", secret=admin_secret) + body = { + "resource_group": _RG, + "location": _LOCATION, + "display_name": "polyclaw-e2e-aca", + "image_tag": "latest", + "acr_name": status_data.get("acr_name", ""), + "env_name": status_data.get("env_name", ""), + } + code, data = _api( + "/api/setup/aca/deploy", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + logger.info("ACA redeploy: code=%d", code) + if data: + for step in data.get("steps", []): + logger.info(" %s: %s", step.get("step"), step.get("status")) + if code != 200: + pytest.xfail(f"ACA redeploy failed: {data}") + assert data.get("runtime_fqdn") + + def test_runtime_still_healthy_after_redeploy(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/aca/status", secret=admin_secret) + fqdn = data.get("runtime_fqdn") + if not fqdn: + pytest.skip("ACA not deployed") + time.sleep(15) + ok = _poll_aca_health(fqdn, timeout=120) + assert ok, f"ACA not healthy after redeploy: {fqdn}" + + +# =================================================================== +# PHASE 7: Destroy +# =================================================================== + +@pytest.mark.e2e_setup +class TestAcaPhase07Destroy: + """Destroy ACA deployment and verify cleanup.""" + + def test_destroy_aca(self, stack, admin_secret) -> None: + code, data = _api( + "/api/setup/aca/destroy", + method="POST", body={}, + secret=admin_secret, timeout=300, + ) + logger.info("ACA destroy: code=%d data=%s", code, json.dumps(data or {}, indent=2)) + assert code == 200, f"Destroy returned {code}: {data}" + assert data.get("status") == "ok" + + def test_aca_status_after_destroy(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/aca/status", secret=admin_secret) + assert not data.get("deployed"), f"ACA still showing deployed: {data}" + assert not data.get("runtime_fqdn") + logger.info("ACA post-destroy status: %s", json.dumps(data, indent=2)) + + def test_collect_diagnostics(self, stack) -> None: + try: + r = _run( + ["docker", "logs", "--tail", "50", _ADMIN_CONTAINER], + check=False, timeout=15, + ) + logger.info("=== Admin logs ===\n%s", (r.stdout + r.stderr).strip()[:2000]) + except Exception: + pass diff --git a/app/runtime/tests/test_e2e_bicep_azure.py b/app/runtime/tests/test_e2e_bicep_azure.py new file mode 100644 index 0000000..5946a62 --- /dev/null +++ b/app/runtime/tests/test_e2e_bicep_azure.py @@ -0,0 +1,509 @@ +"""End-to-end Azure deployment test. + +Provisions REAL Azure resources via the Bicep template, verifies they appear +in the subscription, exercises enable/disable flows through the Python API +layer, and tears everything down at the end. + +Usage: + pytest app/runtime/tests/test_e2e_bicep_azure.py --run-slow -s -v + +Requires: + - ``az login`` (active session) + - Sufficient Azure quota in eastus for CognitiveServices, Search, etc. + - The test creates its own resource group and deletes it on teardown. + +Cost: deploys S0 Cognitive Services (Foundry, Content Safety), basic +Search, Log Analytics, App Insights. Teardown deletes the RG which +cascade-deletes everything. Typical wall-clock time: 5-8 minutes. +""" + +from __future__ import annotations + +import json +import logging +import subprocess +import time + +import pytest + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_RG = "polyclaw-e2e-test-rg" +_LOCATION = "eastus" +_BASE_NAME = "pclawe2etest" +_TIMEOUT_AZ = 120 # seconds for az CLI calls + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _az(*args: str, timeout: int = _TIMEOUT_AZ) -> subprocess.CompletedProcess[str]: + """Run an ``az`` CLI command and return the result.""" + return subprocess.run( + ["az", *args, "-o", "json"], + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + + +def _az_json(*args: str, timeout: int = _TIMEOUT_AZ) -> dict | list | None: + """Run ``az`` and parse JSON output. Return None on failure.""" + r = _az(*args, timeout=timeout) + if r.returncode != 0: + logger.warning("az %s failed (rc=%d): %s", args[0], r.returncode, r.stderr[:300]) + return None + try: + return json.loads(r.stdout) + except json.JSONDecodeError: + return None + + +def _wait_for_resource(resource_type: str, name: str, rg: str = _RG, timeout: int = 180) -> bool: + """Poll until a resource appears in the resource group.""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + resources = _az_json( + "resource", "list", + "--resource-group", rg, + "--resource-type", resource_type, + "--query", f"[?name=='{name}']", + ) + if resources and len(resources) > 0: + return True + time.sleep(5) + return False + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def _az_available() -> None: + """Skip if Azure CLI is not logged in.""" + r = _az("account", "show", timeout=15) + if r.returncode != 0: + pytest.skip("Azure CLI not logged in") + + +@pytest.fixture(scope="module") +def deployer(_az_available): + """Create a BicepDeployer wired to a real AzureCLI.""" + from app.runtime.services.cloud.azure import AzureCLI + from app.runtime.services.deployment.bicep_deployer import BicepDeployer + from app.runtime.state.deploy_state import DeployStateStore + + az = AzureCLI() + store = DeployStateStore() + return BicepDeployer(az, store) + + +@pytest.fixture(scope="module") +def az_cli(_az_available): + """Real AzureCLI instance.""" + from app.runtime.services.cloud.azure import AzureCLI + return AzureCLI() + + +@pytest.fixture(scope="module", autouse=True) +def _cleanup_rg(_az_available): + """Ensure the test resource group is deleted after all tests.""" + yield + logger.info("Tearing down resource group %s ...", _RG) + _az( + "group", "delete", + "--name", _RG, + "--yes", + "--no-wait", + timeout=30, + ) + logger.info("Resource group %s deletion initiated (no-wait).", _RG) + + +# --------------------------------------------------------------------------- +# Tests -- ordered, each builds on the previous +# --------------------------------------------------------------------------- + + +@pytest.mark.slow +class TestE2EBicepDeploy: + """Full deployment lifecycle against real Azure.""" + + # -- Phase 1: Foundry + Key Vault (core deploy) ----------------------- + + def test_01_deploy_foundry_and_keyvault(self, deployer) -> None: + """Deploy Foundry AI Services with 3 models and Key Vault.""" + from app.runtime.services.deployment.bicep_deployer import BicepDeployRequest + + req = BicepDeployRequest( + resource_group=_RG, + location=_LOCATION, + base_name=_BASE_NAME, + deploy_foundry=True, + deploy_key_vault=True, + deploy_acs=False, + deploy_content_safety=False, + deploy_search=False, + deploy_embedding_aoai=False, + deploy_monitoring=False, + deploy_session_pool=False, + ) + + result = deployer.deploy(req) + + logger.info("Foundry deploy result: ok=%s steps=%s", result.ok, result.steps) + if not result.ok: + logger.error("Deploy error: %s", result.error) + for s in result.steps: + logger.error(" step: %s", s) + + assert result.ok, f"Foundry deploy failed: {result.error}" + assert result.foundry_endpoint, "No Foundry endpoint returned" + assert result.foundry_name == _BASE_NAME + assert len(result.deployed_models) == 3 + assert "gpt-4.1" in result.deployed_models + assert "gpt-5" in result.deployed_models + assert "gpt-5-mini" in result.deployed_models + assert result.key_vault_url, "No Key Vault URL returned" + assert result.key_vault_name == f"{_BASE_NAME}-kv" + + # -- Phase 2: verify Azure resources exist ---------------------------- + + def test_02_foundry_resource_exists(self) -> None: + """AI Services resource exists in the resource group.""" + resources = _az_json( + "resource", "list", + "--resource-group", _RG, + "--resource-type", "Microsoft.CognitiveServices/accounts", + "--query", f"[?name=='{_BASE_NAME}']", + ) + assert resources and len(resources) == 1, ( + f"Expected 1 AI Services resource '{_BASE_NAME}', got: {resources}" + ) + assert resources[0]["kind"] == "AIServices" + + def test_03_keyvault_resource_exists(self) -> None: + """Key Vault exists in the resource group.""" + kv_name = f"{_BASE_NAME}-kv" + resources = _az_json( + "resource", "list", + "--resource-group", _RG, + "--resource-type", "Microsoft.KeyVault/vaults", + "--query", f"[?name=='{kv_name}']", + ) + assert resources and len(resources) == 1, ( + f"Expected 1 Key Vault '{kv_name}', got: {resources}" + ) + + def test_04_model_deployments_exist(self) -> None: + """All three model deployments exist on the AI Services resource.""" + deployments = _az_json( + "cognitiveservices", "account", "deployment", "list", + "--name", _BASE_NAME, + "--resource-group", _RG, + ) + assert deployments is not None, "Failed to list model deployments" + names = [d.get("name") for d in deployments] + logger.info("Model deployments found: %s", names) + assert "gpt-4.1" in names + assert "gpt-5" in names + assert "gpt-5-mini" in names + + def test_05_rbac_assignment_exists(self) -> None: + """At least one RBAC role assignment exists on the Foundry resource.""" + resource_id = _az_json( + "cognitiveservices", "account", "show", + "--name", _BASE_NAME, + "--resource-group", _RG, + "--query", "id", + ) + assert resource_id, "AI Services resource not found" + + roles = _az_json( + "role", "assignment", "list", + "--scope", resource_id, + "--query", "[?roleDefinitionName=='Cognitive Services OpenAI User']", + ) + assert roles and len(roles) >= 1, "Missing RBAC assignment" + + # -- Phase 3: enable Content Safety via incremental Bicep deploy ------ + + def test_06_deploy_content_safety(self, deployer) -> None: + """Deploy Content Safety into the same RG via Bicep.""" + from app.runtime.services.deployment.bicep_deployer import BicepDeployRequest + + req = BicepDeployRequest( + resource_group=_RG, + location=_LOCATION, + base_name=_BASE_NAME, + deploy_foundry=True, # re-declare (idempotent) + deploy_key_vault=True, # re-declare (idempotent) + deploy_content_safety=True, # new + deploy_search=False, + deploy_embedding_aoai=False, + deploy_monitoring=False, + deploy_session_pool=False, + ) + + result = deployer.deploy(req) + + logger.info("Content Safety deploy: ok=%s", result.ok) + assert result.ok, f"Content Safety deploy failed: {result.error}" + assert result.content_safety_endpoint, "No CS endpoint" + assert result.content_safety_name == f"{_BASE_NAME}-content-safety" + + # Original resources should still be intact + assert result.foundry_endpoint, "Foundry lost after incremental deploy" + assert result.key_vault_url, "KV lost after incremental deploy" + + def test_07_content_safety_resource_exists(self) -> None: + """Content Safety resource appeared in the RG.""" + cs_name = f"{_BASE_NAME}-content-safety" + resources = _az_json( + "resource", "list", + "--resource-group", _RG, + "--resource-type", "Microsoft.CognitiveServices/accounts", + "--query", f"[?name=='{cs_name}']", + ) + assert resources and len(resources) == 1, ( + f"Expected Content Safety resource '{cs_name}', got: {resources}" + ) + assert resources[0]["kind"] == "ContentSafety" + + # -- Phase 4: enable Monitoring via incremental Bicep deploy ---------- + + def test_08_deploy_monitoring(self, deployer) -> None: + """Deploy Log Analytics + App Insights into the same RG.""" + from app.runtime.services.deployment.bicep_deployer import BicepDeployRequest + + req = BicepDeployRequest( + resource_group=_RG, + location=_LOCATION, + base_name=_BASE_NAME, + deploy_foundry=True, + deploy_key_vault=True, + deploy_content_safety=True, + deploy_monitoring=True, # new + deploy_search=False, + deploy_embedding_aoai=False, + deploy_session_pool=False, + ) + + result = deployer.deploy(req) + + logger.info("Monitoring deploy: ok=%s", result.ok) + assert result.ok, f"Monitoring deploy failed: {result.error}" + assert result.app_insights_connection_string, "No AppInsights connection string" + assert result.app_insights_name == f"{_BASE_NAME}-insights" + assert result.log_analytics_workspace_name == f"{_BASE_NAME}-logs" + + def test_09_monitoring_resources_exist(self) -> None: + """App Insights and Log Analytics appeared in the RG.""" + # App Insights + ai_name = f"{_BASE_NAME}-insights" + ai = _az_json( + "resource", "list", + "--resource-group", _RG, + "--resource-type", "Microsoft.Insights/components", + "--query", f"[?name=='{ai_name}']", + ) + assert ai and len(ai) == 1, f"Expected App Insights '{ai_name}'" + + # Log Analytics + la_name = f"{_BASE_NAME}-logs" + la = _az_json( + "resource", "list", + "--resource-group", _RG, + "--resource-type", "Microsoft.OperationalInsights/workspaces", + "--query", f"[?name=='{la_name}']", + ) + assert la and len(la) == 1, f"Expected Log Analytics '{la_name}'" + + # -- Phase 5: enable Search + Embedding AOAI (Foundry IQ) ------------ + + def test_10_deploy_foundry_iq(self, deployer) -> None: + """Deploy Search + Embedding AOAI for Foundry IQ.""" + from app.runtime.services.deployment.bicep_deployer import BicepDeployRequest + + req = BicepDeployRequest( + resource_group=_RG, + location=_LOCATION, + base_name=_BASE_NAME, + deploy_foundry=True, + deploy_key_vault=True, + deploy_content_safety=True, + deploy_monitoring=True, + deploy_search=True, # new + deploy_embedding_aoai=True, # new + deploy_session_pool=False, + ) + + result = deployer.deploy(req) + + logger.info("Foundry IQ deploy: ok=%s", result.ok) + assert result.ok, f"Foundry IQ deploy failed: {result.error}" + assert result.search_endpoint, "No search endpoint" + assert result.search_name == f"{_BASE_NAME}-search" + assert result.embedding_aoai_endpoint, "No embedding AOAI endpoint" + assert result.embedding_aoai_name == f"{_BASE_NAME}-aoai" + assert result.embedding_deployment_name == "text-embedding-3-large" + + def test_11_search_resource_exists(self) -> None: + """Azure AI Search resource appeared.""" + search_name = f"{_BASE_NAME}-search" + resources = _az_json( + "resource", "list", + "--resource-group", _RG, + "--resource-type", "Microsoft.Search/searchServices", + "--query", f"[?name=='{search_name}']", + ) + assert resources and len(resources) == 1 + + def test_12_embedding_aoai_exists(self) -> None: + """Embedding Azure OpenAI resource appeared with model deployment.""" + aoai_name = f"{_BASE_NAME}-aoai" + resources = _az_json( + "resource", "list", + "--resource-group", _RG, + "--resource-type", "Microsoft.CognitiveServices/accounts", + "--query", f"[?name=='{aoai_name}']", + ) + assert resources and len(resources) == 1 + assert resources[0]["kind"] == "OpenAI" + + # Check model deployment + deployments = _az_json( + "cognitiveservices", "account", "deployment", "list", + "--name", aoai_name, + "--resource-group", _RG, + ) + assert deployments is not None + names = [d.get("name") for d in deployments] + assert "text-embedding-3-large" in names + + # -- Phase 6: full resource inventory --------------------------------- + + def test_13_full_resource_inventory(self) -> None: + """All expected resources exist in the RG.""" + resources = _az_json( + "resource", "list", + "--resource-group", _RG, + "--query", "[].{name: name, type: type, kind: kind}", + ) + assert resources is not None + + names = {r["name"] for r in resources} + logger.info("Resources in %s: %s", _RG, sorted(names)) + + expected = { + _BASE_NAME, # AI Services (Foundry) + f"{_BASE_NAME}-kv", # Key Vault + f"{_BASE_NAME}-content-safety", # Content Safety + f"{_BASE_NAME}-logs", # Log Analytics + f"{_BASE_NAME}-insights", # App Insights + f"{_BASE_NAME}-search", # AI Search + f"{_BASE_NAME}-aoai", # Embedding AOAI + } + missing = expected - names + assert not missing, f"Missing resources: {missing}" + + # -- Phase 7: idempotency -- re-deploy same config -------------------- + + def test_14_idempotent_redeploy(self, deployer) -> None: + """Re-deploying the same config succeeds without errors.""" + from app.runtime.services.deployment.bicep_deployer import BicepDeployRequest + + req = BicepDeployRequest( + resource_group=_RG, + location=_LOCATION, + base_name=_BASE_NAME, + deploy_foundry=True, + deploy_key_vault=True, + deploy_content_safety=True, + deploy_monitoring=True, + deploy_search=True, + deploy_embedding_aoai=True, + deploy_session_pool=False, + ) + + result = deployer.deploy(req) + + assert result.ok, f"Idempotent redeploy failed: {result.error}" + # All outputs should still be present + assert result.foundry_endpoint + assert result.key_vault_url + assert result.content_safety_endpoint + assert result.app_insights_connection_string + assert result.search_endpoint + assert result.embedding_aoai_endpoint + + # -- Phase 8: disable a service (remove Content Safety) --------------- + + def test_15_deploy_without_content_safety(self, deployer) -> None: + """Deploy with Content Safety disabled -- resource should remain + (Bicep incremental mode does not delete resources it does not + manage), but the outputs should reflect the disabled flag.""" + from app.runtime.services.deployment.bicep_deployer import BicepDeployRequest + + req = BicepDeployRequest( + resource_group=_RG, + location=_LOCATION, + base_name=_BASE_NAME, + deploy_foundry=True, + deploy_key_vault=True, + deploy_content_safety=False, # disabled + deploy_monitoring=True, + deploy_search=True, + deploy_embedding_aoai=True, + deploy_session_pool=False, + ) + + result = deployer.deploy(req) + + assert result.ok, f"Deploy-without-CS failed: {result.error}" + # CS outputs should be empty (conditional block not evaluated) + assert result.content_safety_endpoint == "" + assert result.content_safety_name == "" + # Other resources still intact + assert result.foundry_endpoint + assert result.key_vault_url + assert result.app_insights_connection_string + + # -- Phase 9: decommission (delete resource group) -------------------- + + def test_16_decommission(self, deployer) -> None: + """Decommission deletes the resource group.""" + steps = deployer.decommission(_RG) + logger.info("Decommission steps: %s", steps) + + ok_steps = [s for s in steps if s["status"] == "ok"] + assert len(ok_steps) >= 1, f"Decommission had no OK steps: {steps}" + + rg_step = next( + (s for s in steps if s["step"] == "delete_resource_group"), None, + ) + assert rg_step is not None + assert rg_step["status"] == "ok" + + def test_17_resource_group_deleted(self) -> None: + """After decommission, the RG should be gone (or deleting).""" + # Give Azure a moment to start the deletion + time.sleep(10) + + rg = _az_json("group", "show", "--name", _RG, timeout=30) + if rg is not None: + # It may still be 'Deleting' + state = rg.get("properties", {}).get("provisioningState", "") + logger.info("RG %s state after decommission: %s", _RG, state) + assert state in ("Deleting", "Deleted", ""), ( + f"RG in unexpected state: {state}" + ) diff --git a/app/runtime/tests/test_e2e_setup_process.py b/app/runtime/tests/test_e2e_setup_process.py new file mode 100644 index 0000000..b7f9cd2 --- /dev/null +++ b/app/runtime/tests/test_e2e_setup_process.py @@ -0,0 +1,1912 @@ +"""End-to-end setup and functionality test bench for local Docker deployment. + +Boots the Docker stack, copies the host Azure credentials into the admin +container, drives the admin API through every deployment combination, and +verifies each subsystem works end-to-end with real Azure resources. + +Covers the full lifecycle a real user goes through: provision, configure, +stop, start, reconfigure, verify chat works at every stage. Bot service +and Telegram are treated as optional -- all core functionality (Foundry + +chat) is validated without them. + +Usage:: + + pytest app/runtime/tests/test_e2e_setup_process.py --run-e2e-setup -s -v + +Requirements: + - Docker running locally + - Active ``az login`` session (``~/.azure`` is copied into the container) + - Sufficient Azure quota in the target region + - (Optional) ``.botservice-secret.txt`` at repo root for Telegram tests + +The test creates a **real** resource group, provisions Foundry, Key Vault, +Content Safety, Search, Embedding AOAI, and (optionally) Bot Service +resources, then tears everything down at the end. Typical wall-clock: +20-30 min. + +Phases (22): + 1. Clean state verification + 2. Deploy Foundry + KV (chat verified without bot) + 3. Content Safety / Prompt Shields + 4. Bot + Telegram config (chat asserted with bot, no tunnel) + 5. Tunnel + full stack (chat assertion) + 6. Skills CRUD + 7. Sessions + 8. Guardrails + 9. Plugins + 10. MCP servers + 11. Scheduler + 12. Profile + 13. Foundry IQ + 14. Idempotency (redeploy + chat assertion) + 15. Combined configuration save + 16. Stop/start lifecycle (2 cycles + chat each time) + 17. Config change mid-lifecycle (profile + guardrails + chat) + 18. Bot service add/remove/toggle (chat each time) + 19. Voice / ACS + 20. Lockdown mode + 21. Decommission + 22. TUI headless (health check + host-side WebSocket chat) +""" + +from __future__ import annotations + +import json +import logging +import os +import subprocess +import time +from pathlib import Path +from typing import Any + +import pytest + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_PROJECT_ROOT = Path(__file__).resolve().parents[3] +_ADMIN_CONTAINER = "polyclaw-admin" +_RUNTIME_CONTAINER = "polyclaw-runtime" +_ADMIN_URL = "http://localhost:9090" +_HEALTH_URL = f"{_ADMIN_URL}/health" + +_BUILD_TIMEOUT = 900 +_BOOT_TIMEOUT = 120 +_DEPLOY_TIMEOUT = 480 +_HEALTH_POLL = 3 +_API_TIMEOUT = 30 +_CHAT_TIMEOUT = 90 + +_RG = "polyclaw-e2e-setup-rg" +_LOCATION = "eastus" +_BASE_NAME = "e2esetup" + + +# --------------------------------------------------------------------------- +# Chat probe script -- runs INSIDE the runtime container +# --------------------------------------------------------------------------- + +_CHAT_PROBE_SCRIPT = r""" +import asyncio, json, sys, os, aiohttp + +async def main(): + secret = "" + try: + with open("/data/.env") as f: + for line in f: + if line.startswith("ADMIN_SECRET="): + secret = line.split("=", 1)[1].strip().strip('"') + except FileNotFoundError: + pass + + port = os.environ.get("ADMIN_PORT", "8080") + url = f"http://localhost:{port}/api/chat/ws" + headers = {} + if secret: + headers["Authorization"] = f"Bearer {secret}" + + timeout = aiohttp.ClientTimeout(total=80) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.ws_connect(url, headers=headers) as ws: + await ws.send_json({"action": "send", "text": "Reply with exactly: PROBE_OK"}) + chunks = [] + async for msg in ws: + if msg.type == aiohttp.WSMsgType.TEXT: + data = json.loads(msg.data) + t = data.get("type", "") + if t == "delta": + chunks.append(data.get("content", "")) + elif t == "message": + chunks.append(data.get("content", "")) + elif t == "done": + break + elif t == "error": + detail = data.get("content", "") + print(detail, file=sys.stderr) + sys.exit(2) + elif msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR): + break + print("".join(chunks)) + +asyncio.run(main()) +""" + + +# --------------------------------------------------------------------------- +# Telegram config from .botservice-secret.txt +# --------------------------------------------------------------------------- + +def _load_telegram_config() -> tuple[str, str]: + """Return ``(token, whitelist)`` from ``.botservice-secret.txt``.""" + secret_file = _PROJECT_ROOT / ".botservice-secret.txt" + if not secret_file.exists(): + return "", "" + lines = secret_file.read_text().strip().splitlines() + token = lines[0].strip() if lines else "" + whitelist = lines[1].strip() if len(lines) > 1 else "" + return token, whitelist + + +# --------------------------------------------------------------------------- +# Shell / Docker helpers +# --------------------------------------------------------------------------- + +def _run( + cmd: list[str], + *, + timeout: int = 60, + check: bool = True, + cwd: Path | None = None, + env: dict[str, str] | None = None, +) -> subprocess.CompletedProcess[str]: + return subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + check=check, + cwd=cwd or _PROJECT_ROOT, + env=env, + ) + + +def _compose(*args: str, timeout: int = 60) -> subprocess.CompletedProcess[str]: + return _run(["docker", "compose", *args], timeout=timeout) + + +def _api( + path: str, + *, + method: str = "GET", + body: dict | None = None, + secret: str = "", + timeout: int = _API_TIMEOUT, +) -> tuple[int, dict | None]: + """Call the admin API. Returns ``(http_status, json_body | None)``.""" + url = f"{_ADMIN_URL}{path}" + cmd: list[str] = [ + "curl", "-s", "--max-time", str(timeout), + "-o", "/dev/stdout", "-w", "\n%{http_code}", + ] + if secret: + cmd += ["-H", f"Authorization: Bearer {secret}"] + if method == "POST": + cmd += ["-X", "POST", "-H", "Content-Type: application/json"] + cmd += ["-d", json.dumps(body) if body else "{}"] + elif method == "PUT": + cmd += ["-X", "PUT", "-H", "Content-Type: application/json"] + cmd += ["-d", json.dumps(body) if body else "{}"] + elif method == "DELETE": + cmd += ["-X", "DELETE"] + cmd.append(url) + + try: + r = _run(cmd, check=False, timeout=timeout + 10) + except subprocess.TimeoutExpired: + logger.warning("API call timed out: %s %s", method, path) + return 0, None + + parts = r.stdout.rsplit("\n", 1) + if len(parts) < 2: + return 0, None + try: + status_code = int(parts[-1]) + except ValueError: + return 0, None + try: + data = json.loads(parts[0]) + except (json.JSONDecodeError, IndexError): + data = None + return status_code, data + + +def _api_ok( + path: str, + *, + method: str = "GET", + body: dict | None = None, + secret: str = "", + timeout: int = _API_TIMEOUT, + expected_status: int = 200, +) -> dict: + """Call the API and assert success. Returns the JSON body.""" + code, data = _api(path, method=method, body=body, secret=secret, timeout=timeout) + assert code == expected_status, ( + f"{method} {path} returned {code} (expected {expected_status}).\n" + f"Response: {json.dumps(data, indent=2) if data else ''}" + ) + assert data is not None, f"{method} {path} returned no JSON body" + return data + + +def _poll_health(timeout: float = _BOOT_TIMEOUT) -> dict | None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + r = _run(["curl", "-sf", "--max-time", "3", _HEALTH_URL], check=False, timeout=10) + if r.returncode == 0 and r.stdout.strip(): + return json.loads(r.stdout) + except Exception: + pass + time.sleep(_HEALTH_POLL) + return None + + +def _container_logs(container: str, tail: int = 200) -> str: + try: + r = _run(["docker", "logs", "--tail", str(tail), container], check=False, timeout=15) + return (r.stdout + r.stderr).strip() + except Exception as exc: + return f"" + + +def _copy_azure_creds() -> bool: + azure_dir = Path.home() / ".azure" + if not azure_dir.exists(): + return False + try: + # Create the target directory first + _run( + ["docker", "exec", _ADMIN_CONTAINER, "mkdir", "-p", "/admin-home/.azure"], + timeout=10, + ) + # Copy only the essential auth files (not the 900 MB cliextensions/bin) + _ESSENTIAL = [ + "azureProfile.json", + "msal_token_cache.json", + "msal_token_cache.bin", + "az.json", + "az.sess", + "clouds.config", + "config", + ] + copied = 0 + for name in _ESSENTIAL: + src = azure_dir / name + if src.exists(): + _run( + ["docker", "cp", str(src), f"{_ADMIN_CONTAINER}:/admin-home/.azure/{name}"], + timeout=30, + ) + copied += 1 + logger.info("Copied %d Azure auth files into admin container", copied) + return copied > 0 + except Exception as exc: + logger.error("Failed to copy Azure creds: %s", exc) + return False + + +def _send_chat_probe() -> tuple[str | None, str]: + """Returns ``(text, status)`` where status is ok|error|not_authenticated|empty.""" + try: + r = _run( + ["docker", "exec", _RUNTIME_CONTAINER, "python", "-c", _CHAT_PROBE_SCRIPT], + check=False, timeout=_CHAT_TIMEOUT, + ) + if r.returncode == 2: + return r.stderr.strip() or None, "not_authenticated" + if r.returncode == 0 and r.stdout.strip(): + return r.stdout.strip(), "ok" + if r.returncode == 0: + return None, "empty" + logger.warning("Chat probe exit %d: %s", r.returncode, r.stderr[:300]) + return None, "error" + except subprocess.TimeoutExpired: + return None, "error" + except Exception as exc: + logger.warning("Chat probe exception: %s", exc) + return None, "error" + + +def _diag(phase: str) -> str: + lines = [f"\n{'='*72}", f"DIAGNOSTICS -- {phase}", f"{'='*72}"] + for c in (_ADMIN_CONTAINER, _RUNTIME_CONTAINER): + lines.append(f"\n--- {c} ---") + lines.append(_container_logs(c, tail=100)) + lines.append(f"{'='*72}\n") + return "\n".join(lines) + + +def _purge_soft_deleted_resources() -> None: + """Purge any soft-deleted Cognitive Services accounts matching _BASE_NAME.""" + try: + r = _run( + ["az", "cognitiveservices", "account", "list-deleted", "-o", "json"], + check=False, timeout=30, + ) + if r.returncode != 0: + return + deleted = json.loads(r.stdout) if r.stdout.strip() else [] + for item in deleted: + name = item.get("name", "") + rg = item.get("resourceGroup", "") + loc = item.get("location", "") + if _BASE_NAME in name or rg == _RG: + logger.info("Purging soft-deleted resource: %s (rg=%s)", name, rg) + _run( + [ + "az", "cognitiveservices", "account", "purge", + "--name", name, + "--resource-group", rg, + "--location", loc, + ], + check=False, timeout=60, + ) + except Exception as exc: + logger.warning("Soft-delete purge failed: %s", exc) + + +# --------------------------------------------------------------------------- +# Fixtures (module-scoped -- one Docker stack for the whole file) +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="module") +def admin_secret() -> str: + return "e2e-test-secret-" + os.urandom(8).hex() + + +@pytest.fixture(scope="module") +def telegram_config() -> tuple[str, str]: + """Return ``(token, whitelist)`` -- may be empty if no secret file.""" + return _load_telegram_config() + + +@pytest.fixture(scope="module") +def stack(admin_secret): + """Build, start, and tear down the Docker compose stack.""" + try: + _run(["docker", "info"], timeout=15) + except Exception: + pytest.skip("Docker not available") + + # Build + logger.info("Building Docker image ...") + try: + _compose("build", timeout=_BUILD_TIMEOUT) + except subprocess.CalledProcessError as exc: + pytest.fail(f"Docker build failed:\n{exc.stderr[:2000]}") + + # Start + logger.info("Starting Docker stack ...") + try: + _compose("up", "-d", timeout=60) + except subprocess.CalledProcessError as exc: + pytest.fail(f"docker compose up failed:\n{exc.stderr[:2000]}") + + # Wait for health + health = _poll_health(timeout=_BOOT_TIMEOUT) + if not health: + pytest.fail(f"Admin container not healthy.\n{_diag('boot')}") + + # Inject ADMIN_SECRET into the shared /data/.env so both containers pick it up. + logger.info("Injecting ADMIN_SECRET into /data/.env ...") + try: + _run( + [ + "docker", "exec", _ADMIN_CONTAINER, + "sh", "-c", + f'grep -q "^ADMIN_SECRET=" /data/.env 2>/dev/null ' + f'&& sed -i "s|^ADMIN_SECRET=.*|ADMIN_SECRET={admin_secret}|" /data/.env ' + f'|| echo "ADMIN_SECRET={admin_secret}" >> /data/.env', + ], + timeout=10, + ) + except Exception as exc: + pytest.fail(f"Failed to inject ADMIN_SECRET: {exc}") + + # Restart containers to pick up the new secret + logger.info("Restarting containers to pick up ADMIN_SECRET ...") + _compose("restart", timeout=60) + health = _poll_health(timeout=_BOOT_TIMEOUT) + if not health: + pytest.fail(f"Admin not healthy after restart.\n{_diag('restart')}") + + # Inject Azure creds AFTER restart (container filesystem is ephemeral). + ok = _copy_azure_creds() + if not ok: + pytest.fail("Failed to copy Azure creds into container. Ensure ~/.azure exists.") + + # Wait a moment for any cached az results to expire (TTL=30s). + # The 890 MB copy may trigger `az` calls that cache failures, so we + # need to wait well past the 30s AzureCLI cache TTL. + time.sleep(35) + + yield health + + # Teardown + logger.info("Tearing down Docker stack ...") + _compose("down", "-v", "--remove-orphans", timeout=60) + + +@pytest.fixture(scope="module", autouse=True) +def _cleanup_azure_rg(): + """Best-effort cleanup of the test resource group after all tests. + + Also purges soft-deleted Cognitive Services accounts in the RG so + subsequent runs don't hit ``FlagMustBeSetForRestore``. + """ + # Pre-clean: purge any soft-deleted resources from previous runs + _purge_soft_deleted_resources() + yield + logger.info("Initiating cleanup of %s ...", _RG) + try: + _run(["az", "group", "delete", "--name", _RG, "--yes", "--no-wait"], + check=False, timeout=30) + except Exception as exc: + logger.warning("RG cleanup failed: %s", exc) + + +# =================================================================== +# PHASE 1: Clean state verification +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase01CleanState: + """Verify the freshly-booted stack has a clean, undeployed state.""" + + def test_health(self, stack) -> None: + assert stack.get("status") == "ok" or "version" in stack + + def test_initial_status(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + assert not data["foundry"]["deployed"], "Foundry should not be deployed" + # bot_configured may be True when /data volume has residual config + logger.info( + "Initial status: foundry_deployed=%s bot_configured=%s", + data["foundry"]["deployed"], data.get("bot_configured"), + ) + logger.info("Full status: %s", json.dumps(data, indent=2)) + + def test_azure_logged_in(self, stack, admin_secret) -> None: + # The creds were copied in the fixture; the container may need to + # resolve the AZURE_CONFIG_DIR env var. Retry up to 60s for + # the 30s AzureCLI cache to expire after a failed az call. + deadline = time.monotonic() + 60 + data: dict = {} + while time.monotonic() < deadline: + data = _api_ok("/api/setup/azure/check", secret=admin_secret) + if data.get("status") == "logged_in": + break + time.sleep(5) + assert data.get("status") == "logged_in", ( + f"Azure CLI not logged in after 60s: {json.dumps(data, indent=2)}" + ) + logger.info("Azure user: %s", data.get("user")) + + def test_skills_list_empty_or_builtin(self, stack, admin_secret) -> None: + data = _api_ok("/api/skills", secret=admin_secret) + skills = data.get("skills", []) + logger.info("Initial skills (%d): %s", len(skills), [s["name"] for s in skills[:5]]) + + def test_sessions_empty(self, stack, admin_secret) -> None: + data = _api_ok("/api/sessions", secret=admin_secret) + assert isinstance(data, list) + logger.info("Initial sessions: %d", len(data)) + + def test_plugins_list(self, stack, admin_secret) -> None: + data = _api_ok("/api/plugins", secret=admin_secret) + logger.info("Plugins: %s", json.dumps(data, indent=2)[:500]) + + def test_guardrails_config(self, stack, admin_secret) -> None: + data = _api_ok("/api/guardrails/config", secret=admin_secret) + assert "enabled" in data or "mode" in data or "hitl_mode" in data + logger.info("Guardrails config keys: %s", list(data.keys())) + + def test_mcp_servers_list(self, stack, admin_secret) -> None: + data = _api_ok("/api/mcp/servers", secret=admin_secret) + logger.info("MCP servers: %s", json.dumps(data, indent=2)[:500]) + + def test_schedules_empty(self, stack, admin_secret) -> None: + data = _api_ok("/api/schedules", secret=admin_secret) + logger.info("Schedules: %s", json.dumps(data, indent=2)[:300]) + + def test_profile(self, stack, admin_secret) -> None: + data = _api_ok("/api/profile", secret=admin_secret) + logger.info("Profile: %s", json.dumps(data, indent=2)[:500]) + + def test_models_list(self, stack, admin_secret) -> None: + data = _api_ok("/api/models", secret=admin_secret) + logger.info("Models: %s", json.dumps(data, indent=2)[:500]) + + def test_content_safety_not_deployed(self, stack, admin_secret) -> None: + data = _api_ok("/api/content-safety/status", secret=admin_secret) + assert not data.get("deployed"), "Content Safety should not be deployed yet" + + def test_foundry_iq_config(self, stack, admin_secret) -> None: + data = _api_ok("/api/foundry-iq/config", secret=admin_secret) + logger.info("Foundry IQ config: %s", json.dumps(data, indent=2)[:500]) + + def test_chat_fails_before_setup(self, stack) -> None: + text, status = _send_chat_probe() + logger.info("Chat probe (clean): status=%s detail=%r", status, text) + assert status != "ok", "Chat should not succeed before any setup" + + +# =================================================================== +# PHASE 2: Deploy Foundry + Key Vault (core infra) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase02DeployFoundry: + """Provision Foundry AI Services with models and Key Vault.""" + + def test_deploy_foundry_and_kv(self, stack, admin_secret) -> None: + body = { + "resource_group": _RG, + "location": _LOCATION, + "base_name": _BASE_NAME, + "deploy_key_vault": True, + } + code, data = _api( + "/api/setup/foundry/deploy", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + if code == 500 and data: + detail = json.dumps(data.get("steps", [])) + if "FlagMustBeSetForRestore" in detail or "soft-de" in detail: + pytest.xfail("Soft-deleted resource blocking deploy (purge needed)") + assert code == 200, f"Deploy returned {code}: {json.dumps(data, indent=2)[:1000]}" + assert data["status"] == "ok", f"Deploy failed: {data.get('error')}" + assert data.get("foundry_endpoint"), "No Foundry endpoint" + assert data.get("key_vault_url"), "No Key Vault URL" + assert len(data.get("deployed_models", [])) >= 1, "No models deployed" + logger.info( + "Deployed: endpoint=%s models=%s kv=%s", + data["foundry_endpoint"], + data.get("deployed_models"), + data.get("key_vault_url"), + ) + + def test_foundry_status(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/foundry/status", secret=admin_secret) + if not data.get("deployed"): + pytest.xfail(f"Foundry not deployed (prior deploy may have failed): {data}") + assert data.get("foundry_endpoint") + + def test_global_status_reflects_foundry(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + if not data["foundry"]["deployed"]: + pytest.xfail("Foundry not deployed (prior deploy may have failed)") + assert data["foundry"]["endpoint"] + assert data.get("prerequisites_configured"), "Prerequisites should be configured (KV)" + + def test_prerequisites_status(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/prerequisites/status", secret=admin_secret) + kv = data.get("keyvault", {}) + logger.info("Prerequisites: %s", json.dumps(data, indent=2)) + if kv.get("configured"): + assert kv.get("url"), "KV configured but no URL" + elif kv.get("name"): + logger.warning("KV created (%s) but not yet configured", kv["name"]) + else: + # Deploy may have failed entirely (soft-delete, quota, etc.) + logger.warning("KV not provisioned: %s", kv) + + def test_runtime_sp_provisioned(self, stack, admin_secret) -> None: + """Foundry deploy must provision a runtime SP for Key Vault access. + + The deploy step creates a service principal via + ``az ad sp create-for-rbac`` and writes its credentials to + ``/data/.env``. The runtime container uses these to + ``az login --service-principal`` at boot and resolve ``@kv:`` + secrets from Key Vault. + """ + result = subprocess.run( + ["docker", "exec", _ADMIN_CONTAINER, "cat", "/data/.env"], + capture_output=True, text=True, timeout=15, + ) + assert result.returncode == 0, f"Could not read .env: {result.stderr}" + env_lines = result.stdout.strip().splitlines() + env_dict = {} + for line in env_lines: + if "=" in line: + k, v = line.split("=", 1) + env_dict[k] = v + + has_kv = bool(env_dict.get("KEY_VAULT_URL")) + if not has_kv: + pytest.skip("Key Vault not deployed") + + sp_id = env_dict.get("RUNTIME_SP_APP_ID", "") + sp_pw = env_dict.get("RUNTIME_SP_PASSWORD", "") + sp_tenant = env_dict.get("RUNTIME_SP_TENANT", "") + assert sp_id, ( + "RUNTIME_SP_APP_ID not set in .env -- the runtime container " + "cannot resolve @kv: secrets from Key Vault" + ) + assert sp_pw, "RUNTIME_SP_PASSWORD not set" + assert sp_tenant, "RUNTIME_SP_TENANT not set" + logger.info("Runtime SP provisioned: app_id=%s tenant=%s", sp_id, sp_tenant) + + def test_runtime_has_no_kv_errors(self, stack) -> None: + """Runtime container logs must not contain Key Vault resolution errors.""" + result = subprocess.run( + ["docker", "logs", _RUNTIME_CONTAINER, "--tail", "100"], + capture_output=True, text=True, timeout=15, + ) + logs = result.stdout + result.stderr + kv_errors = [ + line for line in logs.splitlines() + if "Failed to resolve Key Vault" in line + or "DefaultAzureCredential failed" in line + ] + if kv_errors: + pytest.fail( + f"Runtime container has Key Vault resolution errors:\n" + + "\n".join(f" {l.strip()}" for l in kv_errors[:5]) + ) + + def test_chat_works_after_foundry_no_bot(self, stack, admin_secret) -> None: + """Foundry deployed, no bot -- chat MUST work (bot is optional). + + The deploy handler restarts the runtime container so it picks + up ``FOUNDRY_ENDPOINT``. We wait up to 90 s for the restart to + finish and the agent to initialise. + """ + # Trigger a container restart so the runtime picks up the new env + code, data = _api( + "/api/setup/container/restart", + method="POST", secret=admin_secret, timeout=60, + ) + logger.info("Container restart after Foundry deploy: %d %s", code, data) + + # Wait for the runtime to come back up + time.sleep(10) + _poll_health(timeout=60) + + # Chat must work -- Foundry is provisioned, bot is NOT required + deadline = time.monotonic() + 90 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat works without bot: %r", text[:200]) + return + logger.info("Chat probe (foundry, no bot): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat did not work after Foundry deploy (no bot). " + f"Last status={last_status}\n{_diag('chat-after-foundry')}" + ) + + +# =================================================================== +# PHASE 3: Deploy Content Safety (Prompt Shields) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase03ContentSafety: + """Deploy Azure AI Content Safety and test Prompt Shields.""" + + def test_deploy_content_safety(self, stack, admin_secret) -> None: + body = { + "resource_group": _RG, + "location": _LOCATION, + } + data = _api_ok( + "/api/content-safety/deploy", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + assert data.get("status") == "ok", f"CS deploy failed: {data}" + assert data.get("endpoint"), "No Content Safety endpoint" + logger.info("Content Safety deployed: %s", data.get("endpoint")) + + def test_content_safety_status(self, stack, admin_secret) -> None: + data = _api_ok("/api/content-safety/status", secret=admin_secret) + if not data.get("deployed"): + pytest.xfail(f"Content Safety not deployed (deploy may have failed): {data}") + assert data.get("endpoint") + + def test_content_safety_dry_run(self, stack, admin_secret) -> None: + """Test Prompt Shields dry-run against the deployed endpoint.""" + # Skip if not deployed + status_code, status_data = _api("/api/content-safety/status", secret=admin_secret) + if not (status_data and status_data.get("deployed")): + pytest.skip("Content Safety not deployed") + data = _api_ok( + "/api/content-safety/test", + method="POST", secret=admin_secret, timeout=60, + ) + logger.info("Prompt Shields test: %s", json.dumps(data, indent=2)) + assert data.get("status") == "ok" + assert data.get("passed"), f"Prompt Shields dry-run failed: {data.get('detail')}" + + +# =================================================================== +# PHASE 4: Configure bot + Telegram (no tunnel yet) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase04BotConfig: + """Save bot and Telegram configuration without starting tunnel.""" + + def test_save_bot_config(self, stack, admin_secret) -> None: + body = { + "resource_group": _RG, + "location": _LOCATION, + "display_name": "polyclaw-e2e-test", + "bot_handle": "", + } + _api_ok("/api/setup/bot/config", method="POST", body=body, secret=admin_secret) + + def test_bot_config_persisted(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/bot/config", secret=admin_secret) + assert data.get("resource_group") == _RG + logger.info("Bot config: %s", json.dumps(data, indent=2)) + + def test_save_telegram_config(self, stack, admin_secret, telegram_config) -> None: + token, whitelist = telegram_config + if not token: + pytest.skip("No .botservice-secret.txt -- Telegram is optional") + body = {"token": token, "whitelist": whitelist} + _api_ok("/api/setup/channels/telegram/config", method="POST", body=body, secret=admin_secret) + + def test_channels_config(self, stack, admin_secret, telegram_config) -> None: + data = _api_ok("/api/setup/channels/config", secret=admin_secret) + token, _ = telegram_config + if token: + tg = data.get("telegram", {}) + assert tg.get("configured") or tg.get("token"), f"Telegram not configured: {data}" + logger.info("Channels: %s", json.dumps(data, indent=2)[:500]) + + def test_status_shows_bot_configured(self, stack, admin_secret, telegram_config) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + assert data["bot_configured"], f"Bot not marked configured: {data}" + token, _ = telegram_config + if token: + assert data.get("telegram_configured"), f"Telegram not configured: {data}" + else: + logger.info("Telegram not configured (optional -- no secret file)") + + def test_chat_works_with_bot_config_no_tunnel(self, stack) -> None: + """Bot configured but no tunnel -- chat MUST still work. + + The bot service is optional and should not block core chat. + """ + deadline = time.monotonic() + 60 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat works with bot config, no tunnel: %s", text[:200]) + return + logger.info("Chat (bot cfg, no tunnel): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broken after adding bot config (no tunnel). " + f"Last status={last_status}\n{_diag('chat-bot-no-tunnel')}" + ) + + +# =================================================================== +# PHASE 5: Start tunnel + provision bot infra (full stack) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase05FullStack: + """Start tunnel, provision bot, and verify end-to-end chat.""" + + def test_start_tunnel(self, stack, admin_secret) -> None: + code, data = _api( + "/api/setup/tunnel/start", + method="POST", body={"port": 9090}, + secret=admin_secret, timeout=60, + ) + logger.info("Tunnel start: code=%d data=%s", code, json.dumps(data or {}, indent=2)) + if code == 400 and data and "managed by the runtime" in str(data.get("message", "")): + pytest.skip("Tunnel managed by runtime container (split mode)") + assert code == 200, f"Tunnel start failed: {data}" + assert data.get("url"), "No tunnel URL" + logger.info("Tunnel URL: %s", data["url"]) + + def test_tunnel_in_status(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + tunnel = data.get("tunnel", {}) + logger.info("Tunnel status: %s", json.dumps(tunnel, indent=2)) + + def test_deploy_bot_infrastructure(self, stack, admin_secret) -> None: + """Provision Bot Service via infra deploy.""" + code, data = _api( + "/api/setup/infra/deploy", + method="POST", secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + logger.info("Bot infra deploy: code=%d", code) + if data: + for step in data.get("steps", []): + logger.info(" step: %s", step) + if code != 200: + logger.warning("Bot infra deploy failed: %s", json.dumps(data or {}, indent=2)) + pytest.xfail("Bot infra deploy failed (may need RBAC propagation)") + assert data.get("status") == "ok" + + def test_preflight_checks(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/preflight", secret=admin_secret) + for check in data.get("checks", data) if isinstance(data, list) else [data]: + logger.info("Preflight: %s", check) + + def test_status_full_stack(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + logger.info("Full stack status:\n%s", json.dumps(data, indent=2)) + assert data["bot_configured"] + if not data["foundry"]["deployed"]: + pytest.xfail("Foundry not deployed (deploy may have failed earlier)") + + def test_chat_full_stack(self, stack) -> None: + """With full stack running, chat MUST work end-to-end.""" + time.sleep(8) + deadline = time.monotonic() + 90 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Full stack chat response: %s", text[:300]) + return + logger.info("Chat probe (full stack): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat did not work in full stack mode. " + f"Last status={last_status}\n{_diag('chat-full-stack')}" + ) + + def test_smoke_test(self, stack, admin_secret) -> None: + code, data = _api( + "/api/setup/copilot/smoke-test", + method="POST", secret=admin_secret, timeout=120, + ) + logger.info("Smoke test: code=%d data=%s", code, json.dumps(data or {}, indent=2)[:1000]) + + +# =================================================================== +# PHASE 6: Skills CRUD +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase06Skills: + """Verify skill management works end-to-end.""" + + def test_list_installed_skills(self, stack, admin_secret) -> None: + data = _api_ok("/api/skills/installed", secret=admin_secret) + assert isinstance(data, list) + logger.info("Installed skills: %d", len(data)) + for s in data[:5]: + logger.info(" %s (%s)", s.get("name"), s.get("source")) + + def test_catalog_fetch(self, stack, admin_secret) -> None: + data = _api_ok("/api/skills/catalog", secret=admin_secret) + assert isinstance(data, list) + logger.info("Catalog skills: %d", len(data)) + + def test_marketplace(self, stack, admin_secret) -> None: + data = _api_ok("/api/skills/marketplace", secret=admin_secret) + assert "all" in data + logger.info( + "Marketplace: all=%d recommended=%d installed=%d", + len(data.get("all", [])), + len(data.get("recommended", [])), + len(data.get("installed", [])), + ) + + def test_install_skill(self, stack, admin_secret) -> None: + """Install from catalog if available; skip if catalog is empty or rate-limited.""" + catalog = _api_ok("/api/skills/catalog", secret=admin_secret) + if not catalog: + pytest.skip("Catalog empty (likely rate-limited)") + name = catalog[0].get("name") + code, data = _api( + "/api/skills/install", + method="POST", + body={"name": name}, + secret=admin_secret, + ) + logger.info("Install %s: code=%d data=%s", name, code, data) + if code == 400 and data and "429" in str(data.get("message", "")): + pytest.skip(f"GitHub rate limit hit installing {name}") + assert code == 200, f"Install {name} returned {code}: {data}" + + def test_skill_appears_in_list(self, stack, admin_secret) -> None: + data = _api_ok("/api/skills/installed", secret=admin_secret) + names = [s.get("name") for s in data] + # At minimum, built-in skills should be present + assert len(names) >= 1, f"No skills installed: {names}" + assert "web-search" in names, f"web-search not in installed: {names}" + + def test_remove_skill(self, stack, admin_secret) -> None: + """Remove a user-installed skill (not built-in).""" + data = _api_ok("/api/skills/installed", secret=admin_secret) + user_skills = [s for s in data if s.get("origin") not in ("built-in", "plugin")] + if not user_skills: + pytest.skip("No user-installed skills to remove") + name = user_skills[0]["name"] + code, resp = _api(f"/api/skills/{name}", method="DELETE", secret=admin_secret) + assert code == 200, f"Remove {name} returned {code}: {resp}" + + +# =================================================================== +# PHASE 7: Sessions +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase07Sessions: + """Verify session management after chat activity.""" + + def test_session_stats(self, stack, admin_secret) -> None: + data = _api_ok("/api/sessions/stats", secret=admin_secret) + logger.info("Session stats: %s", json.dumps(data, indent=2)) + + def test_list_sessions(self, stack, admin_secret) -> None: + data = _api_ok("/api/sessions", secret=admin_secret) + logger.info("Sessions: %d", len(data) if isinstance(data, list) else 0) + + def test_archival_policy(self, stack, admin_secret) -> None: + data = _api_ok("/api/sessions/policy", secret=admin_secret) + assert "policy" in data + assert "options" in data + logger.info("Archival policy: %s, options: %s", data["policy"], data["options"]) + + +# =================================================================== +# PHASE 8: Guardrails + security +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase08Guardrails: + """Verify guardrails management.""" + + def test_config(self, stack, admin_secret) -> None: + data = _api_ok("/api/guardrails/config", secret=admin_secret) + logger.info("Guardrails config: %s", json.dumps(data, indent=2)[:500]) + + def test_list_rules(self, stack, admin_secret) -> None: + data = _api_ok("/api/guardrails/rules", secret=admin_secret) + logger.info("Guardrails rules: %s", json.dumps(data, indent=2)[:500]) + + def test_list_tools(self, stack, admin_secret) -> None: + data = _api_ok("/api/guardrails/tools", secret=admin_secret) + logger.info("Guardrails tools: %d items", len(data) if isinstance(data, list) else 0) + + def test_list_presets(self, stack, admin_secret) -> None: + data = _api_ok("/api/guardrails/presets", secret=admin_secret) + logger.info("Presets: %s", json.dumps(data, indent=2)[:500]) + + def test_templates(self, stack, admin_secret) -> None: + data = _api_ok("/api/guardrails/templates", secret=admin_secret) + logger.info("Templates: %s", json.dumps(data, indent=2)[:300]) + + def test_contexts(self, stack, admin_secret) -> None: + data = _api_ok("/api/guardrails/contexts", secret=admin_secret) + logger.info("Contexts: %s", json.dumps(data, indent=2)[:300]) + + def test_preflight_run(self, stack, admin_secret) -> None: + data = _api_ok( + "/api/guardrails/preflight/run", + method="POST", secret=admin_secret, timeout=60, + ) + logger.info("Preflight run: %s", json.dumps(data, indent=2)[:1000]) + + +# =================================================================== +# PHASE 9: Plugins +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase09Plugins: + """Verify plugin management.""" + + def test_list_plugins(self, stack, admin_secret) -> None: + data = _api_ok("/api/plugins", secret=admin_secret) + plugins = data if isinstance(data, list) else data.get("plugins", []) + logger.info("Plugins: %d", len(plugins)) + for p in plugins[:5]: + logger.info( + " %s (enabled=%s)", p.get("id") or p.get("name"), p.get("enabled"), + ) + + def test_enable_plugin(self, stack, admin_secret) -> None: + data = _api_ok("/api/plugins", secret=admin_secret) + plugins = data if isinstance(data, list) else data.get("plugins", []) + if not plugins: + pytest.skip("No plugins available") + pid = plugins[0].get("id") or plugins[0].get("name") + code, resp = _api( + f"/api/plugins/{pid}/enable", + method="POST", secret=admin_secret, + ) + logger.info("Enable plugin %s: %d %s", pid, code, resp) + + def test_disable_plugin(self, stack, admin_secret) -> None: + data = _api_ok("/api/plugins", secret=admin_secret) + plugins = data if isinstance(data, list) else data.get("plugins", []) + enabled = [p for p in plugins if p.get("enabled")] + if not enabled: + pytest.skip("No enabled plugins") + pid = enabled[0].get("id") or enabled[0].get("name") + code, resp = _api( + f"/api/plugins/{pid}/disable", + method="POST", secret=admin_secret, + ) + logger.info("Disable plugin %s: %d %s", pid, code, resp) + + +# =================================================================== +# PHASE 10: MCP servers +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase10MCP: + """Verify MCP server management.""" + + def test_list_servers(self, stack, admin_secret) -> None: + data = _api_ok("/api/mcp/servers", secret=admin_secret) + logger.info("MCP servers: %s", json.dumps(data, indent=2)[:500]) + + def test_registry(self, stack, admin_secret) -> None: + data = _api_ok("/api/mcp/registry", secret=admin_secret) + items = data if isinstance(data, list) else data.get("servers", []) + logger.info("MCP registry: %d entries", len(items)) + + +# =================================================================== +# PHASE 11: Scheduler +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase11Scheduler: + """Verify scheduled task management.""" + + def test_list_schedules(self, stack, admin_secret) -> None: + data = _api_ok("/api/schedules", secret=admin_secret) + logger.info("Schedules: %s", json.dumps(data, indent=2)[:300]) + + def test_create_schedule(self, stack, admin_secret) -> None: + body = { + "prompt": "Hello, this is a test schedule", + "cron": "0 9 * * 1", + "enabled": False, + } + code, data = _api( + "/api/schedules", method="POST", body=body, secret=admin_secret, + ) + logger.info("Create schedule: %d %s", code, data) + assert code in (200, 201), f"Create schedule returned {code}: {data}" + + def test_delete_schedule(self, stack, admin_secret) -> None: + data = _api_ok("/api/schedules", secret=admin_secret) + tasks = data if isinstance(data, list) else data.get("tasks", []) + if not tasks: + pytest.skip("No schedules to delete") + task_id = tasks[0].get("id") or tasks[0].get("task_id") + code, resp = _api( + f"/api/schedules/{task_id}", + method="DELETE", secret=admin_secret, + ) + assert code == 200, f"Delete schedule returned {code}: {resp}" + + +# =================================================================== +# PHASE 12: Profile management +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase12Profile: + """Verify agent profile CRUD.""" + + def test_get_profile(self, stack, admin_secret) -> None: + data = _api_ok("/api/profile", secret=admin_secret) + logger.info("Profile: %s", json.dumps(data, indent=2)[:500]) + + def test_update_profile(self, stack, admin_secret) -> None: + data = _api_ok( + "/api/profile", + method="POST", + body={"name": "E2E Test Agent", "personality": "helpful and concise"}, + secret=admin_secret, + ) + logger.info("Profile updated: %s", data) + + +# =================================================================== +# PHASE 13: Foundry IQ (search + embedding) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase13FoundryIQ: + """Deploy and test Foundry IQ (AI Search + Embedding AOAI).""" + + def test_provision_foundry_iq(self, stack, admin_secret) -> None: + body = { + "resource_group": _RG, + "location": _LOCATION, + } + code, data = _api( + "/api/foundry-iq/provision", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + logger.info("Foundry IQ provision: code=%d data=%s", code, json.dumps(data or {}, indent=2)) + if code != 200: + pytest.xfail(f"Foundry IQ provision failed: {data}") + assert data.get("status") == "ok" or data.get("search_endpoint") + + def test_foundry_iq_config(self, stack, admin_secret) -> None: + data = _api_ok("/api/foundry-iq/config", secret=admin_secret) + logger.info("Foundry IQ config: %s", json.dumps(data, indent=2)) + + def test_foundry_iq_stats(self, stack, admin_secret) -> None: + data = _api_ok("/api/foundry-iq/stats", secret=admin_secret) + logger.info("Foundry IQ stats: %s", json.dumps(data, indent=2)) + + +# =================================================================== +# PHASE 14: Idempotency -- redeploy everything +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase14Idempotency: + """Redeploy the same config and verify it remains stable.""" + + def test_redeploy_foundry(self, stack, admin_secret) -> None: + body = { + "resource_group": _RG, + "location": _LOCATION, + "base_name": _BASE_NAME, + "deploy_key_vault": True, + } + code, data = _api( + "/api/setup/foundry/deploy", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + if code == 500 and data: + detail = json.dumps(data.get("steps", [])) + if "FlagMustBeSetForRestore" in detail or "soft-de" in detail: + pytest.xfail("Soft-deleted resource blocking redeploy") + assert code == 200, f"Redeploy returned {code}: {json.dumps(data, indent=2)[:500]}" + assert data["status"] == "ok" + assert data.get("foundry_endpoint") + + def test_redeploy_content_safety(self, stack, admin_secret) -> None: + body = {"resource_group": _RG, "location": _LOCATION} + data = _api_ok( + "/api/content-safety/deploy", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + assert data.get("status") == "ok" + + def test_chat_still_works_after_redeploy(self, stack) -> None: + """Chat MUST survive an idempotent redeploy.""" + time.sleep(5) + deadline = time.monotonic() + 90 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat OK after redeploy: %s", text[:200]) + return + logger.info("Chat probe (post-redeploy): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broke after redeploy. Last status={last_status}\n" + f"{_diag('chat-after-redeploy')}" + ) + + +# =================================================================== +# PHASE 15: Configuration save (combined save endpoint) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase15CombinedSave: + """Test the combined configuration/save endpoint.""" + + def test_save_configuration(self, stack, admin_secret, telegram_config) -> None: + token, whitelist = telegram_config + body: dict[str, Any] = { + "bot": { + "resource_group": _RG, + "location": _LOCATION, + "display_name": "polyclaw-e2e-test", + }, + } + if token: + body["telegram"] = {"token": token, "whitelist": whitelist} + data = _api_ok( + "/api/setup/configuration/save", + method="POST", body=body, + secret=admin_secret, timeout=120, + ) + assert data.get("status") == "ok", f"Combined save failed: {data}" + logger.info("Combined save steps: %s", data.get("steps")) + + def test_status_after_combined_save(self, stack, admin_secret, telegram_config) -> None: + data = _api_ok("/api/setup/status", secret=admin_secret) + assert data["bot_configured"] + token, _ = telegram_config + if token: + assert data.get("telegram_configured") + + +# =================================================================== +# PHASE 16: Stop / start lifecycle (config survives restart) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase16Lifecycle: + """Stop and restart the Docker stack, verify config persists and chat works. + + This simulates the real-world scenario where a user shuts down their + machine, boots it up again, and expects everything to still work + without re-running setup. + """ + + def test_stop_stack(self, stack, admin_secret) -> None: + """Stop both containers gracefully.""" + _compose("stop", timeout=60) + # Verify containers are stopped + time.sleep(3) + r = _run( + ["docker", "compose", "ps", "--format", "json"], + check=False, timeout=15, + ) + logger.info("Containers after stop: %s", r.stdout[:500]) + + def test_start_stack_again(self, stack, admin_secret) -> None: + """Start containers back up.""" + _compose("start", timeout=60) + health = _poll_health(timeout=_BOOT_TIMEOUT) + assert health is not None, ( + f"Admin not healthy after restart.\n{_diag('lifecycle-start-1')}" + ) + logger.info("Stack healthy after first restart: %s", health) + + def test_azure_creds_survive_restart(self, stack, admin_secret) -> None: + """Azure CLI should still be logged in after restart. + + Docker volumes persist across stop/start, but ephemeral container + filesystem does not. We re-copy creds just in case. + """ + _copy_azure_creds() + time.sleep(10) # let cache expire + deadline = time.monotonic() + 60 + data: dict = {} + while time.monotonic() < deadline: + data = _api_ok("/api/setup/azure/check", secret=admin_secret) + if data.get("status") == "logged_in": + logger.info("Azure OK after restart: %s", data.get("user")) + return + time.sleep(5) + pytest.fail(f"Azure CLI not logged in after restart: {data}") + + def test_foundry_still_deployed(self, stack, admin_secret) -> None: + """Foundry deployment status should survive restart.""" + data = _api_ok("/api/setup/foundry/status", secret=admin_secret) + assert data.get("deployed"), ( + f"Foundry deploy state lost after restart: {json.dumps(data, indent=2)}" + ) + assert data.get("foundry_endpoint"), "Foundry endpoint lost after restart" + logger.info("Foundry still deployed: %s", data.get("foundry_endpoint")) + + def test_config_survives_restart(self, stack, admin_secret) -> None: + """Bot config and profile should persist on the /data volume.""" + status = _api_ok("/api/setup/status", secret=admin_secret) + assert status["bot_configured"], "Bot config lost after restart" + profile = _api_ok("/api/profile", secret=admin_secret) + logger.info("Profile after restart: %s", json.dumps(profile, indent=2)[:300]) + # Profile name was set to "E2E Test Agent" in Phase 12 + if profile.get("name"): + assert profile["name"] == "E2E Test Agent", ( + f"Profile name changed after restart: {profile['name']}" + ) + + def test_chat_works_after_restart(self, stack) -> None: + """Chat MUST work after a stop/start cycle.""" + deadline = time.monotonic() + 120 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat works after restart: %s", text[:200]) + return + logger.info("Chat probe (post-restart): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broken after restart. Last status={last_status}\n" + f"{_diag('chat-after-restart')}" + ) + + def test_stop_and_start_again(self, stack, admin_secret) -> None: + """Second stop/start cycle to verify repeated restarts work.""" + _compose("stop", timeout=60) + time.sleep(3) + _compose("start", timeout=60) + health = _poll_health(timeout=_BOOT_TIMEOUT) + assert health is not None, ( + f"Admin not healthy after second restart.\n{_diag('lifecycle-start-2')}" + ) + # Re-inject creds (ephemeral FS) + _copy_azure_creds() + time.sleep(5) + logger.info("Stack healthy after second restart: %s", health) + + def test_chat_works_after_second_restart(self, stack) -> None: + """Chat MUST still work after two stop/start cycles.""" + deadline = time.monotonic() + 120 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat OK after 2nd restart: %s", text[:200]) + return + logger.info("Chat (2nd restart): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broken after second restart. Last status={last_status}\n" + f"{_diag('chat-after-restart-2')}" + ) + + def test_no_kv_errors_after_restart(self, stack) -> None: + """Runtime must not have KV errors after stop/start cycles. + + This catches the scenario where the admin container rewrites + ADMIN_SECRET as ``@kv:admin-secret`` during a restart (because + KV is already deployed) and the runtime cannot resolve it. + """ + result = subprocess.run( + ["docker", "logs", _RUNTIME_CONTAINER, "--tail", "200"], + capture_output=True, text=True, timeout=15, + ) + logs = result.stdout + result.stderr + kv_errors = [ + line for line in logs.splitlines() + if "Failed to resolve Key Vault" in line + or "DefaultAzureCredential failed" in line + ] + if kv_errors: + pytest.fail( + f"Runtime has Key Vault errors after lifecycle restart:\n" + + "\n".join(f" {l.strip()}" for l in kv_errors[:5]) + ) + + +# =================================================================== +# PHASE 17: Config change mid-lifecycle +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase17ConfigChange: + """Change configuration, restart, verify the new config takes effect + and chat still works. + """ + + def test_change_profile(self, stack, admin_secret) -> None: + """Update the agent name and personality.""" + data = _api_ok( + "/api/profile", + method="POST", + body={"name": "Reconfigured Agent", "personality": "terse and technical"}, + secret=admin_secret, + ) + logger.info("Profile changed: %s", data) + + def test_profile_persisted(self, stack, admin_secret) -> None: + data = _api_ok("/api/profile", secret=admin_secret) + assert data.get("name") == "Reconfigured Agent", f"Profile not updated: {data}" + + def test_change_guardrails_mode(self, stack, admin_secret) -> None: + """Toggle guardrails to a different mode and verify.""" + config = _api_ok("/api/guardrails/config", secret=admin_secret) + current_mode = config.get("hitl_mode") or config.get("mode", "auto") + new_mode = "always" if current_mode != "always" else "auto" + code, data = _api( + "/api/guardrails/config", + method="POST", + body={"hitl_mode": new_mode}, + secret=admin_secret, + ) + logger.info("Guardrails mode change to %s: %d %s", new_mode, code, data) + if code == 200: + readback = _api_ok("/api/guardrails/config", secret=admin_secret) + actual = readback.get("hitl_mode") or readback.get("mode") + logger.info("Guardrails mode after change: %s", actual) + + def test_restart_after_config_change(self, stack, admin_secret) -> None: + """Restart the runtime container via API and verify chat works.""" + code, data = _api( + "/api/setup/container/restart", + method="POST", secret=admin_secret, timeout=60, + ) + logger.info("Container restart: %d %s", code, data) + time.sleep(10) + _poll_health(timeout=60) + + def test_chat_works_after_config_change(self, stack) -> None: + """Chat MUST still work after config changes + restart.""" + deadline = time.monotonic() + 120 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat OK after config change: %s", text[:200]) + return + logger.info("Chat (config change): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broken after config change. Last status={last_status}\n" + f"{_diag('chat-after-config-change')}" + ) + + def test_changed_profile_survives_restart(self, stack, admin_secret) -> None: + data = _api_ok("/api/profile", secret=admin_secret) + assert data.get("name") == "Reconfigured Agent", ( + f"Profile reverted after restart: {data}" + ) + + def test_restore_profile(self, stack, admin_secret) -> None: + """Restore original profile for subsequent phases.""" + _api_ok( + "/api/profile", + method="POST", + body={"name": "E2E Test Agent", "personality": "helpful and concise"}, + secret=admin_secret, + ) + + +# =================================================================== +# PHASE 18: Bot service add / remove / toggle +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase18BotServiceToggle: + """Verify chat survives adding, removing, and re-adding bot config. + + Bot service is **fully optional**. The app must keep working with + only Foundry deployed, regardless of whether bot config is present. + """ + + def test_remove_telegram_config(self, stack, admin_secret, telegram_config) -> None: + """Remove Telegram channel config.""" + token, _ = telegram_config + if not token: + pytest.skip("Telegram was never configured") + code, data = _api( + "/api/setup/channels/telegram/config", + method="DELETE", secret=admin_secret, + ) + logger.info("Remove Telegram: %d %s", code, data) + # Verify it's gone + status = _api_ok("/api/setup/status", secret=admin_secret) + assert not status.get("telegram_configured"), ( + f"Telegram still configured after removal: {status}" + ) + + def test_remove_bot_config(self, stack, admin_secret) -> None: + """Clear bot config by saving with empty resource group.""" + _api_ok( + "/api/setup/bot/config", + method="POST", + body={"resource_group": "", "location": "", "display_name": "", "bot_handle": ""}, + secret=admin_secret, + ) + status = _api_ok("/api/setup/status", secret=admin_secret) + assert not status["bot_configured"], ( + f"Bot still configured after clearing: {status}" + ) + logger.info("Bot config cleared") + + def test_restart_after_bot_removal(self, stack, admin_secret) -> None: + """Restart runtime so it picks up the cleared config.""" + code, data = _api( + "/api/setup/container/restart", + method="POST", secret=admin_secret, timeout=60, + ) + logger.info("Container restart after bot removal: %d %s", code, data) + time.sleep(10) + health = _poll_health(timeout=60) + assert health is not None, ( + f"Admin not healthy after bot removal + restart.\n" + f"{_diag('bot-removal-restart')}" + ) + + def test_chat_works_without_bot_service(self, stack) -> None: + """Chat MUST work with no bot config at all -- only Foundry.""" + deadline = time.monotonic() + 120 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat works without bot service: %s", text[:200]) + return + logger.info("Chat (no bot): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broken after bot removal. Last status={last_status}\n" + f"{_diag('chat-no-bot')}" + ) + + def test_re_add_bot_config(self, stack, admin_secret) -> None: + """Re-add bot config.""" + _api_ok( + "/api/setup/bot/config", + method="POST", + body={ + "resource_group": _RG, + "location": _LOCATION, + "display_name": "polyclaw-e2e-test", + "bot_handle": "", + }, + secret=admin_secret, + ) + status = _api_ok("/api/setup/status", secret=admin_secret) + assert status["bot_configured"], f"Bot not configured after re-add: {status}" + logger.info("Bot config re-added") + + def test_re_add_telegram_config(self, stack, admin_secret, telegram_config) -> None: + """Re-add Telegram if available.""" + token, whitelist = telegram_config + if not token: + pytest.skip("No Telegram token") + _api_ok( + "/api/setup/channels/telegram/config", + method="POST", + body={"token": token, "whitelist": whitelist}, + secret=admin_secret, + ) + + def test_chat_works_after_bot_re_add(self, stack) -> None: + """Chat MUST work after re-adding bot config.""" + deadline = time.monotonic() + 90 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat works after bot re-add: %s", text[:200]) + return + logger.info("Chat (bot re-add): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broken after bot re-add. Last status={last_status}\n" + f"{_diag('chat-bot-re-add')}" + ) + + def test_remove_bot_config_again(self, stack, admin_secret) -> None: + """Remove bot config a second time to leave stack in bot-free state.""" + _api_ok( + "/api/setup/bot/config", + method="POST", + body={"resource_group": "", "location": "", "display_name": "", "bot_handle": ""}, + secret=admin_secret, + ) + + def test_chat_still_works_after_second_removal(self, stack) -> None: + """Chat MUST work after the second bot removal.""" + time.sleep(5) + deadline = time.monotonic() + 90 + last_status = "" + while time.monotonic() < deadline: + text, last_status = _send_chat_probe() + if last_status == "ok" and text: + logger.info("Chat after 2nd bot removal: %s", text[:200]) + return + logger.info("Chat (2nd bot removal): status=%s -- retrying", last_status) + time.sleep(8) + pytest.fail( + f"Chat broken after second bot removal. Last status={last_status}\n" + f"{_diag('chat-bot-2nd-removal')}" + ) + + def test_restore_bot_config_for_subsequent_phases(self, stack, admin_secret, telegram_config) -> None: + """Restore bot + telegram for remaining phases.""" + _api_ok( + "/api/setup/bot/config", + method="POST", + body={ + "resource_group": _RG, + "location": _LOCATION, + "display_name": "polyclaw-e2e-test", + "bot_handle": "", + }, + secret=admin_secret, + ) + token, whitelist = telegram_config + if token: + _api_ok( + "/api/setup/channels/telegram/config", + method="POST", + body={"token": token, "whitelist": whitelist}, + secret=admin_secret, + ) + status = _api_ok("/api/setup/status", secret=admin_secret) + assert status["bot_configured"] + logger.info("Bot config restored for remaining phases") + + +# =================================================================== +# PHASE 19: Voice / ACS (optional) +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase19Voice: + """Deploy ACS for voice calls.""" + + def test_voice_config(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/voice/config", secret=admin_secret) + logger.info("Voice config: %s", json.dumps(data, indent=2)[:500]) + + def test_deploy_acs(self, stack, admin_secret) -> None: + body = { + "resource_group": _RG, + "location": _LOCATION, + } + code, data = _api( + "/api/setup/voice/deploy", + method="POST", body=body, + secret=admin_secret, timeout=_DEPLOY_TIMEOUT, + ) + logger.info("ACS deploy: code=%d data=%s", code, json.dumps(data or {}, indent=2)) + if code != 200: + pytest.xfail(f"ACS deploy failed: {data}") + + def test_list_acs_resources(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/voice/acs/list", secret=admin_secret) + logger.info("ACS resources: %s", json.dumps(data, indent=2)[:500]) + + +# =================================================================== +# PHASE 20: Lockdown mode +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase20Lockdown: + """Test lockdown mode toggle.""" + + def test_lockdown_status(self, stack, admin_secret) -> None: + data = _api_ok("/api/setup/lockdown", secret=admin_secret) + logger.info("Lockdown: %s", data) + + def test_enable_lockdown(self, stack, admin_secret) -> None: + code, data = _api( + "/api/setup/lockdown", + method="POST", + body={"enabled": True}, + secret=admin_secret, + ) + logger.info("Enable lockdown: %d %s", code, data) + + def test_disable_lockdown(self, stack, admin_secret) -> None: + code, data = _api( + "/api/setup/lockdown", + method="POST", + body={"enabled": False}, + secret=admin_secret, + ) + logger.info("Disable lockdown: %d %s", code, data) + + +# =================================================================== +# PHASE 21: Decommission all resources +# =================================================================== + +@pytest.mark.e2e_setup +class TestPhase21Decommission: + """Tear down all Azure resources.""" + + def test_decommission_foundry(self, stack, admin_secret) -> None: + body = {"resource_group": _RG} + code, data = _api( + "/api/setup/foundry/decommission", + method="POST", body=body, + secret=admin_secret, timeout=120, + ) + logger.info("Decommission: code=%d data=%s", code, json.dumps(data or {}, indent=2)) + if code == 500 and data and "No subscription" in str(data.get("steps", [])): + pytest.xfail("Subscription not set in decommission context") + assert code == 200, f"Decommission returned {code}: {data}" + assert data["status"] == "ok" + logger.info("Decommission steps: %s", data.get("steps")) + + def test_status_after_decommission(self, stack, admin_secret) -> None: + time.sleep(5) + data = _api_ok("/api/setup/status", secret=admin_secret) + if data["foundry"]["deployed"]: + logger.warning( + "Foundry still shows deployed (decommission may have xfailed)" + ) + logger.info( + "Post-decommission status: foundry_deployed=%s", + data["foundry"]["deployed"], + ) + + def test_collect_final_diagnostics(self, stack) -> None: + for name in (_ADMIN_CONTAINER, _RUNTIME_CONTAINER): + logs = _container_logs(name, tail=80) + logger.info("=== %s final logs ===\n%s", name, logs) + + +# =================================================================== +# PHASE 22: TUI headless mode (health + actual polyclaw-cli run) +# =================================================================== + +_TUI_DIR = _PROJECT_ROOT / "app" / "tui" +_TUI_ENTRY = _TUI_DIR / "src" / "index.ts" +_TUI_RUN_TIMEOUT = 300 # TUI may rebuild + wait for ready + chat + + +def _bun_available() -> bool: + try: + r = _run(["bun", "--version"], check=False, timeout=10) + return r.returncode == 0 + except Exception: + return False + + +def _ensure_tui_deps() -> bool: + """Install TUI dependencies if needed. Returns True on success.""" + if (_TUI_DIR / "node_modules").exists(): + return True + try: + r = _run(["bun", "install"], check=False, timeout=60, cwd=_TUI_DIR) + return r.returncode == 0 + except Exception: + return False + + +@pytest.mark.e2e_setup +class TestPhase22TUIHeadless: + """Verify the TUI CLI actually works in headless mode. + + Tests: + 1. ``polyclaw-cli health`` against the running stack (read-only). + 2. ``polyclaw-cli stop`` to gracefully shut down. + 3. ``polyclaw-cli run "prompt"`` which does the full lifecycle: + build -> start -> wait -> chat via WebSocket -> print -> stop. + This is the real user-facing headless path. + + The ``run`` command tears down any existing stack and starts fresh. + The ``/data`` named volume persists (``docker compose down`` without + ``-v``), so ``FOUNDRY_ENDPOINT`` from earlier phases is still + available and chat should work. + """ + + def test_bun_available(self, stack) -> None: + if not _bun_available(): + pytest.skip("Bun not installed -- cannot test TUI") + + def test_tui_deps_installed(self, stack) -> None: + if not _bun_available(): + pytest.skip("Bun not installed") + if not _TUI_ENTRY.exists(): + pytest.skip("TUI source not found") + assert _ensure_tui_deps(), "Failed to install TUI dependencies" + + def test_tui_health(self, stack) -> None: + """``polyclaw-cli health`` should succeed against the running stack.""" + if not _bun_available(): + pytest.skip("Bun not installed") + if not _TUI_ENTRY.exists(): + pytest.skip("TUI source not found") + r = _run( + ["bun", "run", str(_TUI_ENTRY), "health"], + check=False, timeout=30, cwd=_TUI_DIR, + ) + logger.info("TUI health stdout: %s", r.stdout[:500]) + logger.info("TUI health stderr: %s", r.stderr[:300]) + assert r.returncode == 0, ( + f"polyclaw-cli health exited {r.returncode}: {r.stderr[:500]}" + ) + try: + health = json.loads(r.stdout.strip()) + assert health.get("status") == "ok" or "version" in health + except json.JSONDecodeError: + pass # extra output lines are OK if exit code is 0 + + def test_tui_stop(self, stack) -> None: + """``polyclaw-cli stop`` should gracefully shut down the stack. + + This prepares for the ``run`` test which builds its own stack. + """ + if not _bun_available(): + pytest.skip("Bun not installed") + if not _TUI_ENTRY.exists(): + pytest.skip("TUI source not found") + r = _run( + ["bun", "run", str(_TUI_ENTRY), "stop"], + check=False, timeout=60, cwd=_TUI_DIR, + ) + logger.info("TUI stop: exit=%d stdout=%s", r.returncode, r.stdout[:300]) + # stop may exit non-zero if stack was already down -- that's fine + time.sleep(5) + + def test_tui_run_prompt(self, stack, admin_secret) -> None: + """``polyclaw-cli run "prompt"`` -- the real headless E2E path. + + This does the full TUI lifecycle: build image, start containers, + wait for health, open WebSocket, send prompt, collect response, + stop containers. The ``/data`` volume still has + ``FOUNDRY_ENDPOINT`` from earlier phases. + + We also re-inject ``ADMIN_SECRET`` into the data volume before + starting, because the TUI's ``run`` mode reads it from there. + """ + if not _bun_available(): + pytest.skip("Bun not installed") + if not _TUI_ENTRY.exists(): + pytest.skip("TUI source not found") + + # Pre-inject ADMIN_SECRET into the data volume so the fresh + # containers pick it up. We write directly to a temp container + # that mounts the same volume. + try: + _run( + [ + "docker", "run", "--rm", + "-v", "polyclaw-data:/data", + "alpine", "sh", "-c", + f'grep -q "^ADMIN_SECRET=" /data/.env 2>/dev/null ' + f'&& sed -i "s|^ADMIN_SECRET=.*|ADMIN_SECRET={admin_secret}|" /data/.env ' + f'|| echo "ADMIN_SECRET={admin_secret}" >> /data/.env', + ], + check=False, timeout=30, + ) + except Exception as exc: + logger.warning("Failed to pre-inject ADMIN_SECRET: %s", exc) + + # Run the actual TUI command + env = {**os.environ, "VERBOSE": "1"} + r = _run( + ["bun", "run", str(_TUI_ENTRY), "run", "Reply with exactly: TUI_RUN_OK"], + check=False, timeout=_TUI_RUN_TIMEOUT, cwd=_TUI_DIR, env=env, + ) + logger.info("TUI run exit=%d", r.returncode) + logger.info("TUI run stdout:\n%s", r.stdout[:2000]) + if r.stderr: + logger.info("TUI run stderr:\n%s", r.stderr[:1000]) + + assert r.returncode == 0, ( + f"polyclaw-cli run exited {r.returncode}.\n" + f"stdout: {r.stdout[:1500]}\n" + f"stderr: {r.stderr[:1000]}" + ) + # The last line of stdout should be the chat response + output = r.stdout.strip() + assert output, "polyclaw-cli run produced no output" + # The response is the last line (previous lines are status messages) + response_line = output.split("\n")[-1].strip() + logger.info("TUI run response: %s", response_line[:300]) + assert len(response_line) > 0, "TUI run response was empty" + + def test_stack_down_after_tui_run(self, stack) -> None: + """Verify the TUI left the stack stopped (it calls stopContainer).""" + r = _run( + ["docker", "compose", "ps", "-q"], + check=False, timeout=15, + ) + running = r.stdout.strip() + if running: + logger.info("Containers still running after TUI run (expected stopped): %s", running) + else: + logger.info("Stack correctly stopped by polyclaw-cli run") diff --git a/app/runtime/tests/test_guardrails_policy_validation.py b/app/runtime/tests/test_guardrails_policy_validation.py index c0ed8a3..6e154d7 100644 --- a/app/runtime/tests/test_guardrails_policy_validation.py +++ b/app/runtime/tests/test_guardrails_policy_validation.py @@ -50,9 +50,9 @@ def test_even_with_preset_applied_disabled_wins(self, tmp_path) -> None: def test_model_policy_ignored_when_disabled(self, tmp_path) -> None: s = _store(tmp_path) - s.apply_model_defaults(["gpt-4.1"]) + s.apply_model_defaults(["gpt-5-mini"]) s.set_hitl_enabled(False) - assert s.resolve_action("run", model="gpt-4.1") == "allow" + assert s.resolve_action("run", model="gpt-5-mini") == "allow" # ── 2. Restrictive preset -- tightest controls ────────────────────────── @@ -244,69 +244,69 @@ class TestModelScopedResolution: def setup_store(self, tmp_path) -> None: self.s = _store(tmp_path) self.s.apply_preset(PRESET_BALANCED, auto_models=False) - self.s.apply_model_defaults(["gpt-5.3-codex", "gpt-5.2", "gpt-4.1"]) + self.s.apply_model_defaults(["gpt-5", "gpt-4.1", "gpt-5-mini"]) - # Strong model (gpt-5.3-codex) gets permissive policies -- model wins + # Strong model (gpt-5) gets permissive policies -- model wins def test_strong_run_filter(self) -> None: # Model policy (permissive interactive high=filter) beats context (balanced hitl) - assert self.s.resolve_action("run", model="gpt-5.3-codex") == "filter" + assert self.s.resolve_action("run", model="gpt-5") == "filter" def test_strong_view_filter(self) -> None: - assert self.s.resolve_action("view", model="gpt-5.3-codex") == "filter" + assert self.s.resolve_action("view", model="gpt-5") == "filter" def test_strong_github_filter(self) -> None: # Model policy (permissive interactive high=filter) beats context (balanced hitl) assert self.s.resolve_action( "mcp:github-mcp-server", mcp_server="github-mcp-server", - model="gpt-5.3-codex", + model="gpt-5", ) == "filter" def test_strong_mslearn_filter(self) -> None: assert self.s.resolve_action( "mcp:microsoft-learn", mcp_server="microsoft-learn", - model="gpt-5.3-codex", + model="gpt-5", ) == "filter" # Standard model defaults to interactive context -- context policy wins def test_standard_run_hitl(self) -> None: # Default ctx=interactive, balanced interactive high-risk=hitl - assert self.s.resolve_action("run", model="gpt-5.2") == "hitl" + assert self.s.resolve_action("run", model="gpt-4.1") == "hitl" def test_standard_file_ops_filter(self) -> None: - assert self.s.resolve_action("create", model="gpt-5.2") == "filter" - assert self.s.resolve_action("edit", model="gpt-5.2") == "filter" + assert self.s.resolve_action("create", model="gpt-4.1") == "filter" + assert self.s.resolve_action("edit", model="gpt-4.1") == "filter" def test_standard_github_hitl(self) -> None: # Default ctx=interactive, balanced interactive high-risk=hitl assert self.s.resolve_action( "mcp:github-mcp-server", mcp_server="github-mcp-server", - model="gpt-5.2", + model="gpt-4.1", ) == "hitl" # Cautious model defaults to interactive context -- context policy wins def test_cautious_run_hitl(self) -> None: # Default ctx=interactive, balanced interactive high-risk=hitl - assert self.s.resolve_action("run", model="gpt-4.1") == "hitl" + assert self.s.resolve_action("run", model="gpt-5-mini") == "hitl" def test_cautious_create_hitl(self) -> None: # Model policy (restrictive interactive medium=hitl) beats context (balanced filter) - assert self.s.resolve_action("create", model="gpt-4.1") == "hitl" + assert self.s.resolve_action("create", model="gpt-5-mini") == "hitl" def test_cautious_github_hitl(self) -> None: # Default ctx=interactive, balanced interactive high-risk=hitl assert self.s.resolve_action( "mcp:github-mcp-server", mcp_server="github-mcp-server", - model="gpt-4.1", + model="gpt-5-mini", ) == "hitl" def test_cautious_mslearn_filter(self) -> None: assert self.s.resolve_action( "mcp:microsoft-learn", mcp_server="microsoft-learn", - model="gpt-4.1", + model="gpt-5-mini", ) == "filter" def test_cautious_view_filter(self) -> None: - assert self.s.resolve_action("view", model="gpt-4.1") == "filter" + assert self.s.resolve_action("view", model="gpt-5-mini") == "filter" # Unknown model falls back to context policies (no model column) def test_unknown_model_uses_context_policy(self) -> None: @@ -318,10 +318,10 @@ def test_unknown_model_uses_context_policy(self) -> None: assert result == "hitl" def test_model_overrides_context(self) -> None: - # gpt-4.1 model policy (restrictive interactive high=hitl) and context + # gpt-5-mini model policy (restrictive interactive high=hitl) and context # policy (balanced interactive high=hitl) both happen to agree here. result = self.s.resolve_action( - "run", execution_context="interactive", model="gpt-4.1", + "run", execution_context="interactive", model="gpt-5-mini", ) # Both agree on hitl, but model wins in general assert result == "hitl" @@ -376,7 +376,7 @@ def test_rule_with_model_filter(self) -> None: ) == "deny" # Allowed for another model (falls to global default) assert self.s.resolve_action( - "my_custom_tool", model="gpt-5.3-codex", + "my_custom_tool", model="gpt-5", ) == "allow" def test_disabled_rule_ignored(self) -> None: @@ -420,9 +420,9 @@ class TestResolutionPriority: def test_model_beats_tool_policy(self, tmp_path) -> None: s = _store(tmp_path) s.apply_preset(PRESET_PERMISSIVE, auto_models=False) # interactive run=filter - s.apply_model_defaults(["gpt-4.1"]) # model restrictive interactive run=hitl + s.apply_model_defaults(["gpt-5-mini"]) # model restrictive interactive run=hitl # Model policy (more specific) wins over context tool policy - assert s.resolve_action("run", execution_context="interactive", model="gpt-4.1") == "hitl" + assert s.resolve_action("run", execution_context="interactive", model="gpt-5-mini") == "hitl" def test_tool_policy_beats_context_default(self, tmp_path) -> None: s = _store(tmp_path) @@ -465,7 +465,7 @@ class TestMixedScenario: def setup_store(self, tmp_path) -> None: self.s = _store(tmp_path) self.s.apply_preset(PRESET_BALANCED, auto_models=False) - self.s.apply_model_defaults(["gpt-5.3-codex", "gpt-5.2", "gpt-4.1"]) + self.s.apply_model_defaults(["gpt-5", "gpt-4.1", "gpt-5-mini"]) # Custom rule: block voice calls for all models self.s.add_rule( name="no-voice", pattern="make_voice_call", action="deny", @@ -486,18 +486,18 @@ def test_voice_call_aitl_background(self) -> None: ) == "aitl" def test_strong_model_create_files_filtered(self) -> None: - assert self.s.resolve_action("create", model="gpt-5.3-codex") == "filter" - assert self.s.resolve_action("edit", model="gpt-5.3-codex") == "filter" + assert self.s.resolve_action("create", model="gpt-5") == "filter" + assert self.s.resolve_action("edit", model="gpt-5") == "filter" def test_cautious_model_uses_context_policy_for_terminal(self) -> None: # Default ctx=interactive, balanced interactive high-risk=hitl # Context tool policy takes precedence over model policy - assert self.s.resolve_action("run", model="gpt-4.1") == "hitl" - assert self.s.resolve_action("bash", model="gpt-4.1") == "hitl" + assert self.s.resolve_action("run", model="gpt-5-mini") == "hitl" + assert self.s.resolve_action("bash", model="gpt-5-mini") == "hitl" def test_standard_model_mslearn_filtered(self) -> None: assert self.s.resolve_action( - "mcp:microsoft-learn", mcp_server="microsoft-learn", model="gpt-5.2", + "mcp:microsoft-learn", mcp_server="microsoft-learn", model="gpt-4.1", ) == "filter" def test_context_fallback_for_unknown_tool(self) -> None: @@ -523,7 +523,7 @@ def test_strong_model_github_filter(self) -> None: # Model policy (permissive interactive high=filter) beats context (balanced hitl) assert self.s.resolve_action( "mcp:github-mcp-server", mcp_server="github-mcp-server", - model="gpt-5.3-codex", + model="gpt-5", ) == "filter" def test_cautious_model_github_hitl(self) -> None: @@ -531,7 +531,7 @@ def test_cautious_model_github_hitl(self) -> None: # Context tool policy takes precedence over model policy (deny) assert self.s.resolve_action( "mcp:github-mcp-server", mcp_server="github-mcp-server", - model="gpt-4.1", + model="gpt-5-mini", ) == "hitl" @@ -544,26 +544,26 @@ def test_balanced_adds_tier_2_models(self, tmp_path) -> None: s = _store(tmp_path) s.apply_preset(PRESET_BALANCED, auto_models=True) # Should have the tier-2 models as columns - assert "claude-sonnet-4.6" in s.config.model_columns - assert "gpt-5.2" in s.config.model_columns + assert "gpt-4.1" in s.config.model_columns + assert "gpt-4.1" in s.config.model_columns def test_restrictive_adds_tier_3_models(self, tmp_path) -> None: s = _store(tmp_path) s.apply_preset(PRESET_RESTRICTIVE, auto_models=True) - assert "gpt-4.1" in s.config.model_columns + assert "gpt-5-mini" in s.config.model_columns assert "gpt-5-mini" in s.config.model_columns def test_permissive_adds_tier_1_models(self, tmp_path) -> None: s = _store(tmp_path) s.apply_preset(PRESET_PERMISSIVE, auto_models=True) - assert "gpt-5.3-codex" in s.config.model_columns - assert "claude-opus-4.6" in s.config.model_columns + assert "gpt-5" in s.config.model_columns + assert "gpt-5" in s.config.model_columns def test_auto_models_have_policies(self, tmp_path) -> None: s = _store(tmp_path) s.apply_preset(PRESET_RESTRICTIVE, auto_models=True) - # gpt-4.1 should have model policies populated per context - assert "gpt-4.1" in s.config.model_policies - assert s.config.model_policies["gpt-4.1"]["interactive"]["run"] == "hitl" - assert s.config.model_policies["gpt-4.1"]["background"]["run"] == "deny" - assert s.config.model_policies["gpt-4.1"]["interactive"]["view"] == "filter" + # gpt-5-mini should have model policies populated per context + assert "gpt-5-mini" in s.config.model_policies + assert s.config.model_policies["gpt-5-mini"]["interactive"]["run"] == "hitl" + assert s.config.model_policies["gpt-5-mini"]["background"]["run"] == "deny" + assert s.config.model_policies["gpt-5-mini"]["interactive"]["view"] == "filter" diff --git a/app/runtime/tests/test_guardrails_presets.py b/app/runtime/tests/test_guardrails_presets.py index 96e6b6e..a0104b4 100644 --- a/app/runtime/tests/test_guardrails_presets.py +++ b/app/runtime/tests/test_guardrails_presets.py @@ -19,26 +19,26 @@ class TestModelTiers: def test_strong_models_are_tier_1(self) -> None: - assert get_model_tier("gpt-5.3-codex") == 1 - assert get_model_tier("claude-opus-4.6") == 1 - assert get_model_tier("claude-opus-4.6-fast") == 1 + assert get_model_tier("gpt-5") == 1 + assert get_model_tier("gpt-5") == 1 + assert get_model_tier("gpt-5") == 1 def test_standard_models_are_tier_2(self) -> None: - assert get_model_tier("claude-sonnet-4.6") == 2 - assert get_model_tier("gpt-5.2") == 2 - assert get_model_tier("gemini-3-pro-preview") == 2 + assert get_model_tier("gpt-4.1") == 2 + assert get_model_tier("gpt-4.1") == 2 + assert get_model_tier("gpt-4.1") == 2 def test_cautious_models_are_tier_3(self) -> None: - assert get_model_tier("gpt-4.1") == 3 + assert get_model_tier("gpt-5-mini") == 3 assert get_model_tier("gpt-5-mini") == 3 def test_unknown_model_defaults_to_tier_3(self) -> None: assert get_model_tier("some-future-model") == 3 def test_tier_to_preset(self) -> None: - assert get_preset_for_model("gpt-5.3-codex") == PRESET_PERMISSIVE - assert get_preset_for_model("claude-sonnet-4.6") == PRESET_BALANCED - assert get_preset_for_model("gpt-4.1") == PRESET_RESTRICTIVE + assert get_preset_for_model("gpt-5") == PRESET_PERMISSIVE + assert get_preset_for_model("gpt-4.1") == PRESET_BALANCED + assert get_preset_for_model("gpt-5-mini") == PRESET_RESTRICTIVE assert get_preset_for_model("unknown") == PRESET_RESTRICTIVE @@ -213,17 +213,17 @@ def test_apply_preset_invalid_raises(self, tmp_path) -> None: def test_apply_model_defaults_adds_columns(self, tmp_path) -> None: store = GuardrailsConfigStore(tmp_path / "g.json") - store.apply_model_defaults(["gpt-5.3-codex", "gpt-4.1"]) - assert "gpt-5.3-codex" in store.config.model_columns - assert "gpt-4.1" in store.config.model_columns + store.apply_model_defaults(["gpt-5", "gpt-5-mini"]) + assert "gpt-5" in store.config.model_columns + assert "gpt-5-mini" in store.config.model_columns def test_apply_model_defaults_differentiates_tiers(self, tmp_path) -> None: store = GuardrailsConfigStore(tmp_path / "g.json") - store.apply_model_defaults(["gpt-5.3-codex", "gpt-5.2", "gpt-4.1"]) + store.apply_model_defaults(["gpt-5", "gpt-4.1", "gpt-5-mini"]) - strong = store.config.model_policies["gpt-5.3-codex"] - standard = store.config.model_policies["gpt-5.2"] - cautious = store.config.model_policies["gpt-4.1"] + strong = store.config.model_policies["gpt-5"] + standard = store.config.model_policies["gpt-4.1"] + cautious = store.config.model_policies["gpt-5-mini"] # Strong (permissive): view filtered everywhere, run filtered interactive / hitl bg assert strong["interactive"]["view"] == "filter" @@ -247,8 +247,8 @@ def test_apply_model_defaults_differentiates_tiers(self, tmp_path) -> None: def test_mcp_risk_differentiation_in_model_policies(self, tmp_path) -> None: store = GuardrailsConfigStore(tmp_path / "g.json") - store.apply_model_defaults(["gpt-5.3-codex"]) - strong = store.config.model_policies["gpt-5.3-codex"] + store.apply_model_defaults(["gpt-5"]) + strong = store.config.model_policies["gpt-5"] # MS Learn (low risk) -> filter everywhere assert strong["interactive"]["mcp:microsoft-learn"] == "filter" assert strong["background"]["mcp:microsoft-learn"] == "filter" @@ -263,8 +263,8 @@ def test_mcp_risk_differentiation_in_model_policies(self, tmp_path) -> None: def test_cautious_model_mcp_risk_differentiation(self, tmp_path) -> None: store = GuardrailsConfigStore(tmp_path / "g.json") - store.apply_model_defaults(["gpt-4.1"]) - cautious = store.config.model_policies["gpt-4.1"] + store.apply_model_defaults(["gpt-5-mini"]) + cautious = store.config.model_policies["gpt-5-mini"] # MS Learn (low risk) -> filter everywhere assert cautious["interactive"]["mcp:microsoft-learn"] == "filter" assert cautious["background"]["mcp:microsoft-learn"] == "filter" @@ -280,24 +280,24 @@ def test_cautious_model_mcp_risk_differentiation(self, tmp_path) -> None: def test_resolve_action_uses_model_policy(self, tmp_path) -> None: store = GuardrailsConfigStore(tmp_path / "g.json") store.set_hitl_enabled(True) - store.apply_model_defaults(["gpt-4.1"]) - # gpt-4.1 is tier 3 (restrictive) -- run in interactive should be hitl + store.apply_model_defaults(["gpt-5-mini"]) + # gpt-5-mini is tier 3 (restrictive) -- run in interactive should be hitl result = store.resolve_action( - "run", execution_context="interactive", model="gpt-4.1", + "run", execution_context="interactive", model="gpt-5-mini", ) assert result == "hitl" # run in background should be deny result = store.resolve_action( - "run", execution_context="background", model="gpt-4.1", + "run", execution_context="background", model="gpt-5-mini", ) assert result == "deny" def test_resolve_action_mslearn_allowed_for_cautious(self, tmp_path) -> None: store = GuardrailsConfigStore(tmp_path / "g.json") store.set_hitl_enabled(True) - store.apply_model_defaults(["gpt-4.1"]) + store.apply_model_defaults(["gpt-5-mini"]) result = store.resolve_action( - "mcp:microsoft-learn", mcp_server="microsoft-learn", model="gpt-4.1", + "mcp:microsoft-learn", mcp_server="microsoft-learn", model="gpt-5-mini", ) # MS Learn is low risk, filtered even for cautious models assert result == "filter" diff --git a/app/runtime/tests/test_monitoring.py b/app/runtime/tests/test_monitoring.py index a98d34e..d0deb21 100644 --- a/app/runtime/tests/test_monitoring.py +++ b/app/runtime/tests/test_monitoring.py @@ -424,7 +424,8 @@ def routes_with_az( ) -> object: from app.runtime.server.routes.monitoring_routes import MonitoringRoutes - return MonitoringRoutes(store, az=mock_az, deploy_store=None) + deploy_store = MagicMock() + return MonitoringRoutes(store, az=mock_az, deploy_store=deploy_store) @pytest.fixture(autouse=True) def _reset_otel(self) -> None: @@ -603,24 +604,26 @@ async def test_provision_already_provisioned( assert "already provisioned" in data["message"].lower() async def test_provision_success(self, store, routes_with_az, mock_az) -> None: - """Full provisioning flow with mocked az CLI calls.""" - mock_az.ok.return_value = Result(success=True, message="") - mock_az.json.side_effect = [ - # 1. group show -> None (doesn't exist) - None, - # 2. group create - {"id": "/subscriptions/sub/resourceGroups/rg"}, - # 3. workspace create - {"id": "/subscriptions/sub/resourceGroups/rg/providers/...workspace_id"}, - # 4. app-insights create - {"connectionString": _FAKE_CS, "name": "polyclaw-insights-test"}, - ] + """Full provisioning flow via Bicep deployer.""" + from app.runtime.services.deployment.bicep_deployer import BicepDeployResult + + bicep_result = BicepDeployResult( + ok=True, + deploy_id="test-deploy", + app_insights_connection_string=_FAKE_CS, + app_insights_name="polyclaw-insights-test", + log_analytics_workspace_name="polyclaw-logs-test", + steps=[ + {"step": "bicep_deploy", "status": "ok", "detail": "Deployed"}, + ], + ) app = _build_app(routes_with_az.register) async with TestClient(TestServer(app)) as client: with patch( "app.runtime.server.routes.monitoring_routes.run_sync", - side_effect=lambda fn, *a, **kw: fn(*a, **kw), + new_callable=AsyncMock, + return_value=bicep_result, ): mock_cam = MagicMock() with patch.dict( @@ -644,16 +647,25 @@ async def test_provision_success(self, store, routes_with_az, mock_az) -> None: assert store.is_provisioned is True assert store.enabled is True - async def test_provision_extension_failure( + async def test_provision_bicep_failure( self, store, routes_with_az, mock_az ) -> None: - mock_az.ok.return_value = Result(success=False, message="extension install failed") + from app.runtime.services.deployment.bicep_deployer import BicepDeployResult + + bicep_result = BicepDeployResult( + ok=False, + error="deployment failed", + steps=[ + {"step": "bicep_deploy", "status": "failed", "detail": "deployment failed"}, + ], + ) app = _build_app(routes_with_az.register) async with TestClient(TestServer(app)) as client: with patch( "app.runtime.server.routes.monitoring_routes.run_sync", - side_effect=lambda fn, *a, **kw: fn(*a, **kw), + new_callable=AsyncMock, + return_value=bicep_result, ): resp = await client.post( "/api/monitoring/provision", diff --git a/app/runtime/tests/test_restart_survival.py b/app/runtime/tests/test_restart_survival.py new file mode 100644 index 0000000..0470f21 --- /dev/null +++ b/app/runtime/tests/test_restart_survival.py @@ -0,0 +1,632 @@ +"""Docker restart survival integration test. + +Verifies that the polyclaw container stack survives restarts without +losing healthy state. Collects container logs and diagnostics when a +failure is detected to help pinpoint the root cause. + +Usage: + pytest app/runtime/tests/test_restart_survival.py --run-slow -s + +Requires Docker to be running and the project root to contain a valid +docker-compose.yml. The test tears down its stack on exit (even on +failure) so it does not leave containers lying around. +""" + +from __future__ import annotations + +import json +import logging +import subprocess +import time +from pathlib import Path +from typing import Any + +import pytest + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_PROJECT_ROOT = Path(__file__).resolve().parents[3] +_COMPOSE_FILE = _PROJECT_ROOT / "docker-compose.yml" +_HEALTH_URL_ADMIN = "http://localhost:9090/health" +_RUNTIME_HEALTH_INTERNAL = "http://localhost:8080/health" +_ADMIN_CONTAINER = "polyclaw-admin" +_RUNTIME_CONTAINER = "polyclaw-runtime" + +# Timeouts +_BUILD_TIMEOUT = 600 # 10 min for image build +_BOOT_TIMEOUT = 120 # 2 min for containers to become healthy +_HEALTH_POLL_INTERVAL = 2 # seconds between health checks +_RESTART_SETTLE = 5 # seconds to wait after restart command +_CHAT_TIMEOUT = 90 # seconds to wait for a chat response + +# A small Python script that runs *inside* the runtime container, opens a +# WebSocket to the local chat endpoint, sends a probe message, and prints +# the concatenated response. Uses only stdlib + aiohttp (already installed +# in the container image). +# +# Exit codes: +# 0 -- got a response (printed to stdout) +# 2 -- agent not authenticated (prints error detail to stderr) +# 1 -- other failure +_CHAT_PROBE_SCRIPT = r""" +import asyncio, json, sys, os + +import aiohttp + +async def main(): + # Read the admin secret for authentication + secret = "" + try: + with open("/data/.env") as f: + for line in f: + if line.startswith("ADMIN_SECRET="): + secret = line.split("=", 1)[1].strip().strip('"') + except FileNotFoundError: + pass + + # Connect directly to the local server (runtime listens on 8080) + port = os.environ.get("ADMIN_PORT", "8080") + url = f"http://localhost:{port}/api/chat/ws" + headers = {} + if secret: + headers["Authorization"] = f"Bearer {secret}" + + timeout = aiohttp.ClientTimeout(total=80) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.ws_connect(url, headers=headers) as ws: + await ws.send_json({"action": "send", "text": "Reply with exactly: HEALTH_PROBE_OK"}) + chunks = [] + full_messages = [] + async for msg in ws: + if msg.type == aiohttp.WSMsgType.TEXT: + data = json.loads(msg.data) + t = data.get("type", "") + if t == "delta": + chunks.append(data.get("content", "")) + elif t == "message": + full_messages.append(data.get("content", "")) + elif t == "done": + break + elif t == "error": + content = data.get("content", "") + # Distinguish "not authenticated" from other errors + if "not authenticated" in content.lower() or "not respond" in content.lower(): + print(content, file=sys.stderr) + sys.exit(2) + print("ERROR:" + content, file=sys.stderr) + sys.exit(1) + elif msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR): + break + response = "".join(chunks) or "\n".join(full_messages) + print(response) + +asyncio.run(main()) +""" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run( + cmd: list[str], + *, + timeout: int = 60, + check: bool = True, + cwd: Path | None = None, +) -> subprocess.CompletedProcess[str]: + """Run a shell command and return the result.""" + return subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + check=check, + cwd=cwd or _PROJECT_ROOT, + ) + + +def _compose(*args: str, timeout: int = 60) -> subprocess.CompletedProcess[str]: + """Run a ``docker compose`` command against the project root.""" + return _run(["docker", "compose", *args], timeout=timeout) + + +def _poll_health(url: str, timeout: float = _BOOT_TIMEOUT) -> dict[str, Any] | None: + """Poll a health endpoint until it returns 200 or ``timeout`` expires. + + Returns the parsed JSON body on success, ``None`` on timeout. + """ + deadline = time.monotonic() + timeout + last_error = "" + while time.monotonic() < deadline: + try: + r = _run( + ["curl", "-sf", "--max-time", "3", url], + check=False, + timeout=10, + ) + if r.returncode == 0 and r.stdout.strip(): + return json.loads(r.stdout) + except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception) as exc: + last_error = str(exc) + time.sleep(_HEALTH_POLL_INTERVAL) + + logger.warning("Health poll timed out for %s (last error: %s)", url, last_error) + return None + + +def _poll_runtime_health(timeout: float = _BOOT_TIMEOUT) -> dict[str, Any] | None: + """Poll the runtime health endpoint via ``docker exec``. + + The runtime container only exposes port 3978 (bot endpoint) to the + host. The web server on port 8080 is only reachable from inside the + container or the Docker network. + """ + deadline = time.monotonic() + timeout + last_error = "" + while time.monotonic() < deadline: + try: + r = _run( + [ + "docker", "exec", _RUNTIME_CONTAINER, + "curl", "-sf", "--max-time", "3", _RUNTIME_HEALTH_INTERNAL, + ], + check=False, + timeout=10, + ) + if r.returncode == 0 and r.stdout.strip(): + return json.loads(r.stdout) + except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception) as exc: + last_error = str(exc) + time.sleep(_HEALTH_POLL_INTERVAL) + + logger.warning("Runtime health poll timed out (last error: %s)", last_error) + return None + + +def _send_chat_probe() -> tuple[str | None, str]: + """Send a chat probe message via WebSocket and return the response. + + Runs a small Python script inside the **runtime** container that + connects to the local WebSocket chat endpoint, sends a test prompt, + and collects the streamed response. + + Returns ``(response_text, status)`` where status is one of: + - ``"ok"`` -- got a response containing the expected marker + - ``"not_authenticated"`` -- agent is not authenticated (GITHUB_TOKEN missing) + - ``"empty"`` -- connected but got an empty response + - ``"error"`` -- script failed or timed out + """ + try: + r = _run( + [ + "docker", "exec", _RUNTIME_CONTAINER, + "python", "-c", _CHAT_PROBE_SCRIPT, + ], + check=False, + timeout=_CHAT_TIMEOUT, + ) + if r.returncode == 2: + # Agent not authenticated -- expected in Docker without GITHUB_TOKEN + return None, "not_authenticated" + if r.returncode == 0 and r.stdout.strip(): + return r.stdout.strip(), "ok" + if r.returncode == 0: + return None, "empty" + logger.warning( + "Chat probe failed (exit %d): stdout=%r stderr=%r", + r.returncode, r.stdout[:200], r.stderr[:200], + ) + return None, "error" + except subprocess.TimeoutExpired: + logger.warning("Chat probe timed out after %ds", _CHAT_TIMEOUT) + return None, "error" + except Exception as exc: + logger.warning("Chat probe error: %s", exc) + return None, "error" + + +def _container_logs(container: str, tail: int = 200) -> str: + """Fetch the last ``tail`` lines of logs from a container.""" + try: + r = _run( + ["docker", "logs", "--tail", str(tail), container], + check=False, + timeout=15, + ) + return (r.stdout + r.stderr).strip() + except Exception as exc: + return f"" + + +def _container_inspect(container: str) -> dict[str, Any]: + """Return the docker inspect JSON for a container.""" + try: + r = _run( + ["docker", "inspect", container], + check=False, + timeout=15, + ) + if r.returncode == 0: + data = json.loads(r.stdout) + return data[0] if data else {} + except Exception: + pass + return {} + + +def _collect_diagnostics(phase: str) -> dict[str, Any]: + """Gather container state and logs for failure analysis.""" + diag: dict[str, Any] = {"phase": phase, "timestamp": time.time()} + + for name in (_ADMIN_CONTAINER, _RUNTIME_CONTAINER): + info = _container_inspect(name) + state = info.get("State", {}) + diag[name] = { + "status": state.get("Status", "unknown"), + "running": state.get("Running", False), + "exit_code": state.get("ExitCode", -1), + "oom_killed": state.get("OOMKilled", False), + "restart_count": info.get("RestartCount", 0), + "started_at": state.get("StartedAt", ""), + "finished_at": state.get("FinishedAt", ""), + "health": state.get("Health", {}).get("Status", "none"), + "logs_tail": _container_logs(name, tail=80), + } + + return diag + + +def _format_diagnostics(diag: dict[str, Any]) -> str: + """Render diagnostics as a human-readable report.""" + lines = [ + f"\n{'='*72}", + f"RESTART SURVIVAL DIAGNOSTICS -- phase: {diag['phase']}", + f"{'='*72}", + ] + for name in (_ADMIN_CONTAINER, _RUNTIME_CONTAINER): + info = diag.get(name, {}) + lines.append(f"\n--- {name} ---") + lines.append(f" status: {info.get('status')}") + lines.append(f" running: {info.get('running')}") + lines.append(f" exit_code: {info.get('exit_code')}") + lines.append(f" oom_killed: {info.get('oom_killed')}") + lines.append(f" restart_count: {info.get('restart_count')}") + lines.append(f" health: {info.get('health')}") + lines.append(f" started_at: {info.get('started_at')}") + lines.append(f" finished_at: {info.get('finished_at')}") + logs = info.get("logs_tail", "") + if logs: + lines.append(f" logs (last 80 lines):") + for log_line in logs.splitlines(): + lines.append(f" {log_line}") + lines.append(f"{'='*72}\n") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def _docker_available() -> None: + """Skip the entire module if Docker is not available.""" + try: + r = _run(["docker", "info"], check=False, timeout=10) + if r.returncode != 0: + pytest.skip("Docker daemon not available") + except (FileNotFoundError, subprocess.TimeoutExpired): + pytest.skip("Docker CLI not found or timed out") + + if not _COMPOSE_FILE.exists(): + pytest.skip(f"docker-compose.yml not found at {_COMPOSE_FILE}") + + +@pytest.fixture(scope="module") +def compose_stack(_docker_available: None) -> str: + """Build and start the compose stack; tear it down after all tests.""" + # Tear down any pre-existing stack so we start clean + _compose("down", "--remove-orphans", timeout=60) + + # Build + try: + _compose("build", timeout=_BUILD_TIMEOUT) + except subprocess.CalledProcessError as exc: + pytest.fail(f"docker compose build failed:\n{exc.stderr}") + + # Start -- use a longer timeout because `docker compose up -d` blocks + # until healthcheck-dependent containers report healthy. + try: + _compose("up", "-d", "--wait", timeout=_BOOT_TIMEOUT + 30) + except subprocess.CalledProcessError as exc: + # Collect diagnostics before tearing down + diag = _collect_diagnostics("compose_up") + _compose("down", "--remove-orphans", timeout=30) + pytest.fail( + f"docker compose up failed:\n{exc.stderr}" + + _format_diagnostics(diag) + ) + + yield _ADMIN_CONTAINER + + # Teardown -- always run + _compose("down", "--remove-orphans", timeout=60) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +@pytest.mark.slow +class TestRestartSurvival: + """Verify the container stack survives stop/start and restart cycles.""" + + def test_initial_boot_healthy(self, compose_stack: str) -> None: + """After first ``docker compose up``, both containers become healthy.""" + admin_health = _poll_health(_HEALTH_URL_ADMIN) + if admin_health is None: + diag = _collect_diagnostics("initial_boot_admin") + pytest.fail( + f"Admin container did not become healthy within {_BOOT_TIMEOUT}s." + + _format_diagnostics(diag) + ) + assert admin_health["status"] == "ok" + + runtime_health = _poll_runtime_health() + if runtime_health is None: + diag = _collect_diagnostics("initial_boot_runtime") + pytest.fail( + f"Runtime container did not become healthy within {_BOOT_TIMEOUT}s." + + _format_diagnostics(diag) + ) + assert runtime_health["status"] == "ok" + + def test_chat_works_after_boot(self, compose_stack: str) -> None: + """The chat WebSocket accepts a prompt and returns a response. + + If the Copilot CLI is not authenticated (no GITHUB_TOKEN in the + container), the test still passes because the WebSocket pipeline + itself is functional -- only the upstream model is unreachable. + """ + # Runtime must be up first + if _poll_runtime_health(timeout=30) is None: + pytest.skip("Runtime not healthy -- cannot test chat") + + response, status = _send_chat_probe() + + if status == "not_authenticated": + # WebSocket worked, agent processed the message, but Copilot + # CLI has no token. This is not a restart failure. + logger.info("Chat probe: agent not authenticated (expected in test)") + return + + if status == "error": + diag = _collect_diagnostics("chat_after_boot") + pytest.fail( + "Chat probe failed (WebSocket unreachable or script error) after initial boot." + + _format_diagnostics(diag) + ) + + if status == "empty": + diag = _collect_diagnostics("chat_after_boot_empty") + pytest.fail( + "Chat probe connected but got empty response after initial boot." + + _format_diagnostics(diag) + ) + + assert response is not None + assert "HEALTH_PROBE_OK" in response, ( + f"Chat response did not contain expected marker: {response[:200]}" + ) + + def test_restart_admin_survives(self, compose_stack: str) -> None: + """Restarting the admin container recovers to healthy state.""" + # Ensure we start from a healthy baseline + baseline = _poll_health(_HEALTH_URL_ADMIN) + if baseline is None: + pytest.skip("Admin not healthy before restart test") + + _compose("restart", "admin") + time.sleep(_RESTART_SETTLE) + + health = _poll_health(_HEALTH_URL_ADMIN) + if health is None: + diag = _collect_diagnostics("restart_admin") + pytest.fail( + "Admin container did not recover after restart." + + _format_diagnostics(diag) + ) + assert health["status"] == "ok" + + def test_restart_runtime_survives(self, compose_stack: str) -> None: + """Restarting the runtime container recovers to healthy state.""" + baseline = _poll_runtime_health() + if baseline is None: + pytest.skip("Runtime not healthy before restart test") + + _compose("restart", "runtime") + time.sleep(_RESTART_SETTLE) + + health = _poll_runtime_health() + if health is None: + diag = _collect_diagnostics("restart_runtime") + pytest.fail( + "Runtime container did not recover after restart." + + _format_diagnostics(diag) + ) + assert health["status"] == "ok" + + def test_full_stack_restart_survives(self, compose_stack: str) -> None: + """Full ``docker compose restart`` recovers both containers.""" + _compose("restart") + time.sleep(_RESTART_SETTLE) + + admin_health = _poll_health(_HEALTH_URL_ADMIN) + runtime_health = _poll_runtime_health() + + failures: list[str] = [] + if admin_health is None: + failures.append("admin did not recover") + elif admin_health["status"] != "ok": + failures.append(f"admin status={admin_health['status']}") + + if runtime_health is None: + failures.append("runtime did not recover") + elif runtime_health["status"] != "ok": + failures.append(f"runtime status={runtime_health['status']}") + + if failures: + diag = _collect_diagnostics("full_stack_restart") + pytest.fail( + f"Full stack restart failed: {', '.join(failures)}" + + _format_diagnostics(diag) + ) + + def test_chat_works_after_restart(self, compose_stack: str) -> None: + """Chat still works after a full stack restart. + + Same tolerance as ``test_chat_works_after_boot``: if the agent + is not authenticated the test passes because the WebSocket + pipeline itself survived the restart. + """ + if _poll_runtime_health(timeout=30) is None: + pytest.skip("Runtime not healthy after restart -- cannot test chat") + + response, status = _send_chat_probe() + + if status == "not_authenticated": + logger.info("Chat probe post-restart: agent not authenticated (expected)") + return + + if status == "error": + diag = _collect_diagnostics("chat_after_restart") + pytest.fail( + "Chat probe failed (WebSocket unreachable or script error) after restart." + + _format_diagnostics(diag) + ) + + if status == "empty": + diag = _collect_diagnostics("chat_after_restart_empty") + pytest.fail( + "Chat probe connected but got empty response after restart." + + _format_diagnostics(diag) + ) + + assert response is not None + assert "HEALTH_PROBE_OK" in response, ( + f"Chat response did not contain expected marker: {response[:200]}" + ) + + def test_stop_start_cycle_survives(self, compose_stack: str) -> None: + """``docker compose stop`` then ``up -d`` recovers cleanly.""" + _compose("stop", timeout=30) + time.sleep(2) + + # Verify containers are actually stopped + for name in (_ADMIN_CONTAINER, _RUNTIME_CONTAINER): + info = _container_inspect(name) + state = info.get("State", {}) + assert not state.get("Running", True), f"{name} still running after stop" + + _compose("up", "-d") + time.sleep(_RESTART_SETTLE) + + admin_health = _poll_health(_HEALTH_URL_ADMIN) + if admin_health is None: + diag = _collect_diagnostics("stop_start_admin") + pytest.fail( + "Admin did not recover after stop/start cycle." + + _format_diagnostics(diag) + ) + assert admin_health["status"] == "ok" + + runtime_health = _poll_runtime_health() + if runtime_health is None: + diag = _collect_diagnostics("stop_start_runtime") + pytest.fail( + "Runtime did not recover after stop/start cycle." + + _format_diagnostics(diag) + ) + assert runtime_health["status"] == "ok" + + def test_rapid_restart_cycle(self, compose_stack: str) -> None: + """Three rapid restarts in succession do not corrupt state.""" + for i in range(3): + _compose("restart") + time.sleep(_RESTART_SETTLE) + + admin_health = _poll_health(_HEALTH_URL_ADMIN) + runtime_health = _poll_runtime_health() + + if admin_health is None or runtime_health is None: + diag = _collect_diagnostics("rapid_restart") + unhealthy = [] + if admin_health is None: + unhealthy.append("admin") + if runtime_health is None: + unhealthy.append("runtime") + pytest.fail( + f"Containers unhealthy after 3 rapid restarts: {', '.join(unhealthy)}" + + _format_diagnostics(diag) + ) + + assert admin_health["status"] == "ok" + assert runtime_health["status"] == "ok" + + def test_state_files_survive_restart(self, compose_stack: str) -> None: + """State files on the shared volume are not corrupted by restarts. + + Writes a marker file, restarts, then verifies the marker is intact. + """ + marker = {"test": "restart_survival", "ts": time.time()} + marker_json = json.dumps(marker) + + # Write marker into the shared data volume via docker exec + r = _run( + [ + "docker", "exec", _ADMIN_CONTAINER, + "python", "-c", + f"import pathlib; pathlib.Path('/data/.restart_test_marker.json').write_text('{marker_json}')", + ], + check=False, + ) + if r.returncode != 0: + pytest.skip(f"Could not write marker file: {r.stderr}") + + _compose("restart") + time.sleep(_RESTART_SETTLE) + _poll_health(_HEALTH_URL_ADMIN, timeout=60) + + # Read marker back + r = _run( + [ + "docker", "exec", _ADMIN_CONTAINER, + "cat", "/data/.restart_test_marker.json", + ], + check=False, + ) + if r.returncode != 0: + diag = _collect_diagnostics("state_file_read") + pytest.fail( + "Could not read marker file after restart." + + _format_diagnostics(diag) + ) + + recovered = json.loads(r.stdout) + assert recovered == marker, f"Marker mismatch: {recovered} != {marker}" + + # Cleanup + _run( + [ + "docker", "exec", _ADMIN_CONTAINER, + "rm", "-f", "/data/.restart_test_marker.json", + ], + check=False, + ) diff --git a/app/runtime/tests/test_settings.py b/app/runtime/tests/test_settings.py index 8e7780a..f02877d 100644 --- a/app/runtime/tests/test_settings.py +++ b/app/runtime/tests/test_settings.py @@ -10,7 +10,7 @@ class TestSettings: def test_defaults(self, data_dir: Path) -> None: s = Settings() - assert s.copilot_model == "claude-sonnet-4.6" + assert s.copilot_model == "gpt-4.1" assert s.admin_port == 9090 assert s.bot_port == 3978 diff --git a/app/runtime/tests/test_smoke_test.py b/app/runtime/tests/test_smoke_test.py index f73ad26..6b7d7b5 100644 --- a/app/runtime/tests/test_smoke_test.py +++ b/app/runtime/tests/test_smoke_test.py @@ -123,19 +123,11 @@ def test_check_auth_gh_authenticated(self) -> None: assert result is True assert r._steps[-1]["ok"] is True - def test_check_auth_with_token(self) -> None: - r = self._make_runner() - r._gh.status.return_value = {"authenticated": False, "details": "no gh"} - with patch("app.runtime.server.smoke_test.cfg") as mock_cfg: - mock_cfg.github_token = "ghp_test" - result = r._check_auth() - assert result is True - def test_check_auth_no_auth(self) -> None: r = self._make_runner() r._gh.status.return_value = {"authenticated": False, "details": "not logged in"} with patch("app.runtime.server.smoke_test.cfg") as mock_cfg: - mock_cfg.github_token = "" + mock_cfg.foundry_endpoint = "" result = r._check_auth() assert result is False diff --git a/app/tui/src/api/client.ts b/app/tui/src/api/client.ts index 1e845a5..2cd6a6f 100644 --- a/app/tui/src/api/client.ts +++ b/app/tui/src/api/client.ts @@ -62,7 +62,6 @@ export class ApiClient { async getSetupStatus() { return this.fetch<{ azure?: { logged_in?: boolean; user?: string; subscription?: string }; - copilot?: { authenticated?: boolean; details?: string }; prerequisites_configured?: boolean; telegram_configured?: boolean; tunnel?: { active?: boolean; url?: string }; @@ -75,8 +74,6 @@ export class ApiClient { async azureLogin() { return this.fetch>("/api/setup/azure/login", { method: "POST" }); } async azureLogout() { return this.fetch>("/api/setup/azure/logout", { method: "POST" }); } async azureCheck() { return this.fetch>("/api/setup/azure/check"); } - async copilotLogin() { return this.fetch>("/api/setup/copilot/login", { method: "POST" }); } - async copilotStatus() { return this.fetch>("/api/setup/copilot/status"); } async startTunnel() { return this.fetch>("/api/setup/tunnel/start", { method: "POST" }); } async smokeTest() { return this.fetch>("/api/setup/smoke-test", { method: "POST" }); } diff --git a/app/tui/src/config/constants.ts b/app/tui/src/config/constants.ts index bbf6c8a..5dc910c 100644 --- a/app/tui/src/config/constants.ts +++ b/app/tui/src/config/constants.ts @@ -82,14 +82,12 @@ export const STARTUP_PHASES = [ { key: "start", label: "Container" }, { key: "server", label: "Server" }, { key: "azure", label: "Azure" }, - { key: "github", label: "GitHub" }, { key: "tunnel", label: "Tunnel" }, { key: "bot", label: "Bot" }, ] as const; export const STATUS_ITEMS = [ { key: "azure", label: "Azure" }, - { key: "github", label: "GitHub" }, { key: "tunnel", label: "Tunnel" }, { key: "bot", label: "Bot" }, ] as const; diff --git a/app/tui/src/config/types.ts b/app/tui/src/config/types.ts index f6493fd..3732898 100644 --- a/app/tui/src/config/types.ts +++ b/app/tui/src/config/types.ts @@ -8,7 +8,6 @@ export interface StatusResponse { azure?: { logged_in?: boolean; user?: string; subscription?: string }; - copilot?: { authenticated?: boolean; details?: string }; prerequisites_configured?: boolean; telegram_configured?: boolean; tunnel?: { active?: boolean; url?: string }; diff --git a/app/tui/src/deploy/aca.ts b/app/tui/src/deploy/aca.ts index 6d8209c..378e42b 100644 --- a/app/tui/src/deploy/aca.ts +++ b/app/tui/src/deploy/aca.ts @@ -30,6 +30,7 @@ import { getAdminSecret, resolveKvSecret, waitForReady, + writeAzureOverride, } from "./docker.js"; const PROJECT_ROOT = resolve(import.meta.dir, "../../../.."); @@ -271,6 +272,8 @@ export class AcaDeployTarget implements DeployTarget { await exec(["docker", "compose", "down", "--remove-orphans"], PROJECT_ROOT); } catch { /* may not be running */ } + writeAzureOverride(); + const { exitCode, stderr } = await exec( ["docker", "compose", "up", "-d", "admin"], PROJECT_ROOT, diff --git a/app/tui/src/deploy/docker.ts b/app/tui/src/deploy/docker.ts index 666dda5..f79c7e4 100644 --- a/app/tui/src/deploy/docker.ts +++ b/app/tui/src/deploy/docker.ts @@ -11,7 +11,9 @@ * down` is called on exit. */ -import { resolve } from "path"; +import { existsSync, unlinkSync, writeFileSync } from "fs"; +import { homedir } from "os"; +import { join, resolve } from "path"; import type { DeployResult, LogStream } from "../config/types.js"; import type { DeployTarget } from "./target.js"; import { exec, execStream } from "./process.js"; @@ -22,6 +24,45 @@ const PROJECT_ROOT = resolve(import.meta.dir, "../../../.."); /** Well-known container name from docker-compose.yml. */ const ADMIN_CONTAINER = "polyclaw-admin"; +/** Path to the auto-generated compose override for Azure CLI mounts. */ +const OVERRIDE_PATH = join(PROJECT_ROOT, "docker-compose.override.yml"); + +// --------------------------------------------------------------------------- +// Azure CLI credential mount +// --------------------------------------------------------------------------- + +/** + * Write a `docker-compose.override.yml` that bind-mounts the host's + * `~/.azure` directory into the admin container. + * + * This lets the admin container reuse the host's `az login` session + * so the user doesn't need to re-authenticate inside the container. + * The override is a no-op if `~/.azure` doesn't exist. + */ +export function writeAzureOverride(): void { + const azureDir = join(homedir(), ".azure"); + if (!existsSync(azureDir)) return; + + const yaml = [ + "# Auto-generated by the Polyclaw TUI -- do not commit.", + "services:", + " admin:", + " volumes:", + ` - ${azureDir}:/admin-home/.azure`, + ].join("\n") + "\n"; + + writeFileSync(OVERRIDE_PATH, yaml, "utf-8"); +} + +/** Remove the auto-generated compose override. */ +export function removeAzureOverride(): void { + try { + unlinkSync(OVERRIDE_PATH); + } catch { + // May not exist + } +} + // --------------------------------------------------------------------------- // Standalone functions (also used by the headless bot-only mode) // --------------------------------------------------------------------------- @@ -77,6 +118,7 @@ export async function killExisting(_adminPort?: number, _botPort?: number): Prom } catch { // Stack may not be running -- ignore } + removeAzureOverride(); } /** @@ -92,6 +134,7 @@ export async function startContainer( _mode: string, ): Promise { await killExisting(); + writeAzureOverride(); const { exitCode, stderr } = await exec( ["docker", "compose", "up", "-d"], @@ -115,6 +158,7 @@ export async function stopContainer(_containerId: string): Promise { } catch { // May already be stopped } + removeAzureOverride(); } /** Read the admin secret from the shared data volume. diff --git a/app/tui/src/index.ts b/app/tui/src/index.ts index 6441595..f41f36b 100644 --- a/app/tui/src/index.ts +++ b/app/tui/src/index.ts @@ -1,10 +1,20 @@ /** * Polyclaw TUI -- entry point. * - * Admin mode: launches the interactive TUI (disclaimer -> target picker - * -> deploy lifecycle & chat). + * Admin mode: launches the interactive TUI (disclaimer -> target picker + * -> deploy lifecycle & chat). * - * Bot mode: headless -- Docker build, run, block until Ctrl-C. + * Bot mode: headless -- Docker build, run, block until Ctrl-C. + * + * Start mode: headless -- build, start, print admin URL, block. + * Designed for scripts and CI: no TUI, no disclaimer, no + * interactive prompts. + * + * Run mode: headless -- build, start, send a single prompt via the + * chat API, print the response, and exit. Designed for + * scripted single-shot interactions. + * + * Health mode: check if the stack is already running and healthy. */ import { @@ -24,13 +34,82 @@ import { pickDeployTarget } from "./ui/target-picker.js"; // ----------------------------------------------------------------------- function usage(): void { - console.log("Usage: polyclaw-cli [admin|bot]"); + console.log("Usage: polyclaw-cli [options]"); console.log(""); - console.log(" admin - TUI with status dashboard and chat (default)"); - console.log(" bot - Bot Framework server only (headless)"); + console.log("Commands:"); + console.log(" admin Interactive TUI with dashboard and chat (default)"); + console.log(" bot Bot Framework server only (headless)"); + console.log(" start Build, start, and print admin URL (scriptable)"); + console.log(" run Start stack, send prompt, print response, exit"); + console.log(" health Check if the stack is running and healthy"); + console.log(" stop Stop the running stack"); + console.log(""); + console.log("Environment:"); + console.log(" ADMIN_PORT Admin server port (default: 8080)"); + console.log(" BOT_PORT Bot Framework port (default: 3978)"); console.log(""); } +const VALID_MODES = ["admin", "bot", "start", "run", "health", "stop"]; + +// ----------------------------------------------------------------------- +// CLI helpers +// ----------------------------------------------------------------------- + +/** Build + start the compose stack, returning the instance ID. */ +async function ensureStack( + adminPort: number, + botPort: number, + onLine?: (line: string) => void, +): Promise { + const buildOk = await buildImage(onLine); + if (!buildOk) { + console.error("Docker build failed."); + process.exit(1); + } + + try { + return await startContainer(adminPort, botPort, "bot"); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + console.error("Failed to start containers:", msg); + process.exit(1); + } +} + +/** Resolve the admin secret and build the full admin URL. */ +async function resolveAdminUrl(port: number): Promise<{ secret: string; url: string }> { + let secret = await getAdminSecret(); + if (secret.startsWith("@kv:")) { + secret = await resolveKvSecret(secret); + } + const url = secret + ? `http://localhost:${port}/?secret=${secret}` + : `http://localhost:${port}`; + return { secret, url }; +} + +/** Wait for the stack to become healthy or exit with an error. */ +async function waitOrDie(baseUrl: string, instanceId: string): Promise { + const ready = await waitForReady(baseUrl); + if (!ready) { + console.error("Server did not become ready."); + await stopContainer(instanceId); + process.exit(1); + } +} + +/** Wire Ctrl-C / SIGTERM to gracefully stop the stack. */ +function wireShutdown(instanceId: string): void { + const shutdown = async () => { + console.log("\nStopping..."); + await stopContainer(instanceId); + process.exit(0); + }; + process.on("SIGINT", shutdown); + process.on("SIGTERM", shutdown); +} + // ----------------------------------------------------------------------- // Main // ----------------------------------------------------------------------- @@ -43,14 +122,15 @@ async function main(): Promise { process.exit(0); } - if (!["admin", "bot"].includes(mode)) { - console.error(`Unknown mode: ${mode}`); + if (!VALID_MODES.includes(mode)) { + console.error(`Unknown command: ${mode}`); usage(); process.exit(1); } const adminPort = parseInt(process.env.ADMIN_PORT || "8080", 10); const botPort = parseInt(process.env.BOT_PORT || "3978", 10); + const composeAdminPort = 9090; // ---- Admin TUI mode --------------------------------------------------- if (mode === "admin") { @@ -61,57 +141,153 @@ async function main(): Promise { return; } - // ---- Bot-only mode (headless) ----------------------------------------- - console.log("Building polyclaw v3..."); - console.log(""); + // ---- Health check (no build, no start) -------------------------------- + if (mode === "health") { + try { + const res = await fetch(`http://localhost:${composeAdminPort}/health`, { + signal: AbortSignal.timeout(5_000), + }); + if (res.ok) { + const body = await res.json(); + console.log(JSON.stringify(body, null, 2)); + process.exit(0); + } else { + console.error(`Health check failed: ${res.status} ${res.statusText}`); + process.exit(1); + } + } catch { + console.error("Stack is not running or not reachable."); + process.exit(1); + } + } - const buildOk = await buildImage(); - if (!buildOk) { - console.error("Build failed."); - process.exit(1); + // ---- Stop ------------------------------------------------------------- + if (mode === "stop") { + console.log("Stopping stack..."); + await stopContainer("polyclaw-admin"); + console.log("Stopped."); + process.exit(0); } - console.log("Starting polyclaw (admin + runtime)..."); - let instanceId: string; - try { - instanceId = await startContainer(adminPort, botPort, "bot"); - } catch (err: unknown) { - const msg = err instanceof Error ? err.message : String(err); - console.error("Failed to start containers:", msg); - process.exit(1); + // ---- Start mode (scriptable, headless) -------------------------------- + if (mode === "start") { + console.log("Building and starting polyclaw..."); + const instanceId = await ensureStack(adminPort, botPort); + const { url } = await resolveAdminUrl(composeAdminPort); + + console.log(`Runtime: http://localhost:8080`); + console.log(`Admin: ${url}`); + + wireShutdown(instanceId); + + console.log("Waiting for server..."); + await waitOrDie(`http://localhost:${composeAdminPort}`, instanceId); + console.log("Server is ready. Press Ctrl+C to stop."); + await new Promise(() => {}); + return; } - // Admin container listens on 9090 (docker-compose.yml) - const composeAdminPort = 9090; + // ---- Run mode (single prompt, headless) ------------------------------- + if (mode === "run") { + const prompt = process.argv.slice(3).join(" ").trim(); + if (!prompt) { + console.error("Usage: polyclaw-cli run "); + process.exit(1); + } - let secret = await getAdminSecret(); - if (secret.startsWith("@kv:")) { - secret = await resolveKvSecret(secret); + console.log("Building and starting polyclaw..."); + const instanceId = await ensureStack(adminPort, botPort, (line) => { + // Suppress build output in run mode unless verbose + if (process.env.VERBOSE) console.log(line); + }); + + const { secret } = await resolveAdminUrl(composeAdminPort); + const baseUrl = `http://localhost:${composeAdminPort}`; + + console.log("Waiting for server..."); + await waitOrDie(baseUrl, instanceId); + + // Send the prompt via the chat WebSocket + let response = ""; + try { + const wsUrl = secret + ? `ws://localhost:${composeAdminPort}/api/chat/ws?token=${secret}` + : `ws://localhost:${composeAdminPort}/api/chat/ws`; + const ws = new WebSocket(wsUrl); + + response = await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + ws.close(); + reject(new Error("Chat response timed out after 120s")); + }, 120_000); + + const chunks: string[] = []; + + ws.onopen = () => { + ws.send(JSON.stringify({ + action: "send", + text: prompt, + })); + }; + + ws.onmessage = (event) => { + try { + const data = JSON.parse(String(event.data)); + if (data.type === "delta" && data.content) { + chunks.push(data.content); + } else if (data.type === "done" || data.type === "end") { + clearTimeout(timeout); + ws.close(); + resolve(chunks.join("")); + } else if (data.type === "error") { + clearTimeout(timeout); + ws.close(); + reject(new Error(data.content || data.message || "Chat error")); + } + } catch { + // Non-JSON message, ignore + } + }; + + ws.onerror = (err) => { + clearTimeout(timeout); + reject(new Error(`WebSocket error: ${err}`)); + }; + + ws.onclose = () => { + clearTimeout(timeout); + if (chunks.length > 0) { + resolve(chunks.join("")); + } + }; + }); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`Chat failed: ${msg}`); + await stopContainer(instanceId); + process.exit(1); + } + + console.log(response); + await stopContainer(instanceId); + process.exit(0); } - const adminUrl = secret - ? `http://localhost:${composeAdminPort}/?secret=${secret}` - : `http://localhost:${composeAdminPort}`; + // ---- Bot-only mode (headless) ----------------------------------------- + console.log("Building polyclaw..."); + console.log(""); + + const instanceId = await ensureStack(adminPort, botPort); + const { url: adminUrl } = await resolveAdminUrl(composeAdminPort); console.log(`Runtime on port 8080 | Admin on port ${composeAdminPort}`); console.log(`Admin: ${adminUrl}`); console.log(""); - const shutdown = async () => { - console.log("\nStopping..."); - await stopContainer(instanceId); - process.exit(0); - }; - process.on("SIGINT", shutdown); - process.on("SIGTERM", shutdown); + wireShutdown(instanceId); console.log("Waiting for server..."); - const ready = await waitForReady(`http://localhost:${composeAdminPort}`); - if (!ready) { - console.error("Server did not become ready."); - await stopContainer(instanceId); - process.exit(1); - } + await waitOrDie(`http://localhost:${composeAdminPort}`, instanceId); console.log("Server is ready. Press Ctrl+C to stop."); await new Promise(() => {}); } diff --git a/app/tui/src/screens/dashboard.ts b/app/tui/src/screens/dashboard.ts index 4577738..90aa388 100644 --- a/app/tui/src/screens/dashboard.ts +++ b/app/tui/src/screens/dashboard.ts @@ -111,14 +111,12 @@ export class DashboardScreen extends Screen { const dot = (ok: boolean) => ok ? "\x1b[32m●\x1b[0m" : "\x1b[31m●\x1b[0m"; const azOk = s.azure?.logged_in ?? false; - const ghOk = s.copilot?.authenticated ?? false; const tunnelOk = s.tunnel?.active ?? false; const botOk = s.bot_configured ?? false; const voiceOk = s.voice_call_configured ?? false; this.statusText.content = [ ` ${dot(azOk)} Azure ${azOk ? (s.azure?.user ?? "Logged in") : "Not logged in"}`, - ` ${dot(ghOk)} GitHub ${ghOk ? (s.copilot?.details ?? "Authenticated") : "Not authenticated"}`, ` ${dot(tunnelOk)} Tunnel ${tunnelOk ? (s.tunnel?.url ?? "Active") : "Inactive"}`, ` ${dot(botOk)} Bot ${botOk ? "Configured" : "Not configured"}`, ` ${dot(voiceOk)} Voice ${voiceOk ? "Configured" : "Not configured"}`, diff --git a/app/tui/src/screens/setup.ts b/app/tui/src/screens/setup.ts index 066cc06..8eb01de 100644 --- a/app/tui/src/screens/setup.ts +++ b/app/tui/src/screens/setup.ts @@ -1,5 +1,5 @@ /** - * Setup screen -- Azure auth, GitHub auth, tunnel, configuration, infrastructure. + * Setup screen -- Azure auth, tunnel, configuration, infrastructure. */ import { @@ -50,8 +50,7 @@ export class SetupScreen extends Screen { options: [ { name: "Azure Login", description: "Log in to Azure" }, { name: "Azure Logout", description: "Log out from Azure" }, - { name: "GitHub Login", description: "Authenticate with GitHub Copilot" }, - { name: "Set GitHub Token", description: "Set a personal access token" }, + { name: "Deploy Foundry", description: "Deploy AI models via Bicep" }, { name: "Start Tunnel", description: "Start dev tunnel" }, { name: "Run Smoke Test", description: "Test Copilot connectivity" }, { name: "Save Configuration", description: "Save bot and channel config" }, @@ -151,12 +150,10 @@ export class SetupScreen extends Screen { try { const s = await this.api.getSetupStatus(); const azOk = s.azure?.logged_in ?? false; - const ghOk = s.copilot?.authenticated ?? false; const tunnelOk = s.tunnel?.active ?? false; const dot = (ok: boolean) => ok ? "\x1b[32m●\x1b[0m" : "\x1b[31m●\x1b[0m"; this.authText.content = [ ` ${dot(azOk)} Azure ${azOk ? `${s.azure?.user ?? ""} (${s.azure?.subscription ?? ""})` : "Not logged in -- run 'Azure Login' below"}`, - ` ${dot(ghOk)} GitHub ${ghOk ? (s.copilot?.details ?? "Authenticated") : "Not authenticated -- run 'GitHub Login' below"}`, ` ${dot(tunnelOk)} Tunnel ${tunnelOk ? (s.tunnel?.url ?? "Active") : "Not active -- run 'Start Tunnel' below"}`, ].join("\n"); } catch (err: unknown) { @@ -209,8 +206,7 @@ export class SetupScreen extends Screen { const actions: (() => Promise)[] = [ () => this.doAzureLogin(), () => this.doAzureLogout(), - () => this.doGitHubLogin(), - () => this.doSetGitHubToken(), + () => this.doDeployFoundry(), () => this.doStartTunnel(), () => this.doSmokeTest(), () => this.doSaveConfiguration(), @@ -270,46 +266,25 @@ export class SetupScreen extends Screen { } } - private async doGitHubLogin(): Promise { - this.setResult(" Starting GitHub login..."); + private async doDeployFoundry(): Promise { + this.setResult(" Deploying Foundry infrastructure via Bicep..."); try { - const r = await this.api.copilotLogin(); - if (r.status === "already_authenticated") { - this.setResult(" \x1b[32mAlready authenticated\x1b[0m"); - return; - } - const code = r.user_code || r.code; - const url = r.verification_uri || r.url || "https://github.com/login/device"; - if (code) { - this.setResult(` Open ${url} and enter code: \x1b[1m${code}\x1b[0m\n Waiting for completion...`); - await this.pollGitHub(); + const r = await this.api.fetchRaw("/api/setup/foundry/deploy", { + method: "POST", + body: JSON.stringify({ resource_group: "polyclaw-rg", location: "eastus" }), + signal: AbortSignal.timeout(600_000), + }); + const body = await r.json(); + if (body.status === "ok") { + this.setResult(` \x1b[32mFoundry deployed!\x1b[0m\n Endpoint: ${body.foundry_endpoint}\n Models: ${(body.deployed_models || []).join(", ")}`); } else { - this.setResult(` ${r.message || "Login started"}`); + this.setResult(` \x1b[31mDeployment failed: ${body.error || "unknown error"}\x1b[0m`); } this.loadAuthStatus(); } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err); - this.setResult(` \x1b[31m${msg}\x1b[0m`); - } - } - - private async pollGitHub(): Promise { - for (let i = 0; i < 60; i++) { - await new Promise((r) => setTimeout(r, 3000)); - try { - const c = await this.api.copilotStatus(); - if (c.authenticated) { - this.setResult(" \x1b[32mGitHub authenticated!\x1b[0m"); - this.loadAuthStatus(); - return; - } - } catch { /* keep trying */ } + this.setResult(` \x1b[31mError: ${msg}\x1b[0m`); } - this.setResult(" \x1b[33mLogin timed out.\x1b[0m"); - } - - private async doSetGitHubToken(): Promise { - this.setResult(" Enter a GitHub token in the input and run this action again.\n (Token input not yet wired -- use GitHub Login instead)"); } private async doStartTunnel(): Promise { diff --git a/app/tui/src/ui/app.ts b/app/tui/src/ui/app.ts index a8d0719..e6aaf52 100644 --- a/app/tui/src/ui/app.ts +++ b/app/tui/src/ui/app.ts @@ -460,11 +460,9 @@ export class App { try { const status = await this.api.getSetupStatus(); const azOk = status.azure?.logged_in ?? false; - const ghOk = status.copilot?.authenticated ?? false; const tunnelOk = status.tunnel?.active ?? false; const pieces = [ `Azure: ${azOk ? "\x1b[32mOK\x1b[0m" : "\x1b[31m--\x1b[0m"}`, - `GitHub: ${ghOk ? "\x1b[32mOK\x1b[0m" : "\x1b[31m--\x1b[0m"}`, `Tunnel: ${tunnelOk ? "\x1b[32mOK\x1b[0m" : "\x1b[90m--\x1b[0m"}`, ]; @@ -486,7 +484,7 @@ export class App { pieces.push(`Runtime: ${cColor(cs.runtime.health)}${cLabel(cs.runtime.health)}\x1b[0m`); } catch { /* Docker unavailable */ } - const dot = azOk && ghOk ? "\x1b[32m●\x1b[0m" : "\x1b[33m●\x1b[0m"; + const dot = azOk ? "\x1b[32m●\x1b[0m" : "\x1b[33m●\x1b[0m"; this.titleBar.content = ` ${dot} polyclaw v3 | ${pieces.join(" ")}`; } catch { // Leave whatever container info was written above diff --git a/app/tui/src/ui/tui.ts b/app/tui/src/ui/tui.ts index dbe5d2a..46d265a 100644 --- a/app/tui/src/ui/tui.ts +++ b/app/tui/src/ui/tui.ts @@ -426,17 +426,20 @@ export async function launchTUI( } function updateStatusDots(s: StatusResponse): void { - const states: Record = { - azure: s.azure?.logged_in ?? false, - github: s.copilot?.authenticated ?? false, - tunnel: s.tunnel?.active ?? false, - bot: s.bot_configured ?? false, + const azureOk = s.azure?.logged_in ?? false; + const botConfigured = s.bot_configured ?? false; + const tunnelActive = s.tunnel?.active ?? false; + + const colors: Record = { + azure: azureOk ? Colors.green : Colors.red, + tunnel: !botConfigured ? Colors.dim : tunnelActive ? Colors.green : Colors.red, + bot: botConfigured ? Colors.green : Colors.dim, }; for (const item of STATUS_ITEMS) { - try { (statusDots[item.key] as unknown as { fg: string }).fg = states[item.key] ? Colors.green : Colors.red; } catch { /* ignore */ } + try { (statusDots[item.key] as unknown as { fg: string }).fg = colors[item.key] ?? Colors.dim; } catch { /* ignore */ } } // Auto-open admin UI if auth needs attention - if (!browserOpened && (!states.azure || !states.github)) { + if (!browserOpened && !azureOk) { browserOpened = true; const adminUrl = secret ? `${baseUrl}/?secret=${secret}` : baseUrl; addMessage("system", `Opening admin UI for setup: ${adminUrl}`, Colors.muted); @@ -1001,7 +1004,6 @@ export async function launchTUI( } } if (/Resolved.*secret.*Key Vault|azure.*logged.in/i.test(line)) markPhase("azure", true); - if (/copilot.*authenticated|gh.*logged.in/i.test(line)) markPhase("github", true); if (/Tunnel started/i.test(line)) markPhase("tunnel", true); if (/Bot deployment completed|bot_deploy.*ok/i.test(line)) markPhase("bot", true); }); @@ -1227,7 +1229,6 @@ export async function launchTUI( if (bootComplete) { updateStatusDots(s); } else { markPhase("azure", s.azure?.logged_in ?? false); - markPhase("github", s.copilot?.authenticated ?? false); markPhase("tunnel", s.tunnel?.active ?? false); markPhase("bot", s.bot_configured ?? false); } diff --git a/app/tui/tests/types.test.ts b/app/tui/tests/types.test.ts index 9acf34b..d603055 100644 --- a/app/tui/tests/types.test.ts +++ b/app/tui/tests/types.test.ts @@ -21,7 +21,6 @@ describe("Config types (compile-time validation)", () => { test("StatusResponse can be constructed", () => { const status: StatusResponse = { azure: { logged_in: true, subscription: "sub-1" }, - copilot: { authenticated: true, details: "ok" }, tunnel: { active: true, url: "https://tunnel.example.com" }, }; expect(status.azure?.logged_in).toBe(true); diff --git a/conftest.py b/conftest.py index 4e85708..71b37c5 100644 --- a/conftest.py +++ b/conftest.py @@ -12,12 +12,25 @@ def pytest_addoption(parser): default=False, help="Include tests marked @pytest.mark.slow (skipped by default).", ) + parser.addoption( + "--run-e2e-setup", + action="store_true", + default=False, + help="Include tests marked @pytest.mark.e2e_setup (skipped by default).", + ) def pytest_collection_modifyitems(config, items): - if config.getoption("--run-slow"): - return - skip_slow = pytest.mark.skip(reason="slow test — pass --run-slow to include") - for item in items: - if "slow" in item.keywords: - item.add_marker(skip_slow) + if not config.getoption("--run-slow"): + skip_slow = pytest.mark.skip(reason="slow test — pass --run-slow to include") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) + + if not config.getoption("--run-e2e-setup"): + skip_e2e = pytest.mark.skip( + reason="E2E setup test — pass --run-e2e-setup to include", + ) + for item in items: + if "e2e_setup" in item.keywords: + item.add_marker(skip_e2e) diff --git a/e2e_secrets.env.example b/e2e_secrets.env.example new file mode 100644 index 0000000..74b4630 --- /dev/null +++ b/e2e_secrets.env.example @@ -0,0 +1,20 @@ +# E2E Setup Test Secrets +# +# Copy this file to e2e_secrets.env and fill in values. +# e2e_secrets.env is gitignored and never committed. +# +# The test provisions real Azure resources under ADMIN_SECRET +# and tears them down afterwards. An active `az login` session +# on the host is required (the test copies ~/.azure into the +# container for authentication). + +# Admin secret used to authorise API requests to the admin container. +# Any non-empty string works (the Docker stack generates one on first boot). +ADMIN_SECRET=test-e2e-secret-changeme + +# Azure resource group name used for temporary test resources. +# Will be created if it does not exist, and deleted on teardown. +E2E_RESOURCE_GROUP=polyclaw-e2e-setup-rg + +# Azure region for resource provisioning. +E2E_LOCATION=eastus diff --git a/entrypoint.sh b/entrypoint.sh index 17e99f9..7df7925 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -44,6 +44,16 @@ else export AZURE_CONFIG_DIR="$DATA_DIR/.azure" fi +# --------------------------------------------------------------------------- +# Bicep binary: az bicep install puts the binary under /root/.azure/bin/ at +# build time. When HOME is overridden (admin=/admin-home, runtime=/runtime-home), +# az cannot find it. Symlink into $AZURE_CONFIG_DIR/bin so it is always available. +# --------------------------------------------------------------------------- +if [[ -x /root/.azure/bin/bicep && -n "${AZURE_CONFIG_DIR:-}" ]]; then + mkdir -p "$AZURE_CONFIG_DIR/bin" + ln -sf /root/.azure/bin/bicep "$AZURE_CONFIG_DIR/bin/bicep" +fi + # Clean stale copilot CLI runtime cache (forces re-download of matching version) COPILOT_INSTALLED="$(copilot --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' || echo '')" if [[ -n "$COPILOT_INSTALLED" && -d "$HOME/.copilot/pkg" ]]; then @@ -176,7 +186,7 @@ elif [[ -n "${KEY_VAULT_URL:-}" ]]; then echo " Vault URL: ${KEY_VAULT_URL}" # Collect @kv: references for debugging - _KV_REFS=$(env | grep '=@kv:' | cut -d= -f1 | tr '\n' ', ' | sed 's/,$//') + _KV_REFS=$(env | grep '=@kv:' | cut -d= -f1 | tr '\n' ', ' | sed 's/,$//' || true) if [[ -n "$_KV_REFS" ]]; then echo " @kv: refs: ${_KV_REFS}" else @@ -186,7 +196,7 @@ elif [[ -n "${KEY_VAULT_URL:-}" ]]; then # Gate on Azure CLI auth: if `az account show` fails, there are no # credentials available and DefaultAzureCredential will hang probing # IMDS (especially on ACA where the admin has no managed identity). - if az account show --output none 2>/dev/null; then + if timeout 15 az account show --output none 2>/dev/null; then echo " Azure CLI: authenticated" _KV_OUTPUT=$(timeout 60 python -m polyclaw.keyvault_resolve 2>&1) || { _KV_RC=$? @@ -220,21 +230,6 @@ elif [[ -n "${KEY_VAULT_URL:-}" ]]; then fi fi -AUTH_DONE="$DATA_DIR/.copilot-auth/.authenticated" - -# --- GitHub Authentication ------------------------------------------------ - -if [[ "$MODE" != "runtime" ]]; then - # Only admin / combined modes care about GitHub auth - if [[ -n "${GITHUB_TOKEN:-}" ]] || [[ -n "${GH_TOKEN:-}" ]]; then - echo "Using token from environment." - elif [[ -f "$AUTH_DONE" ]]; then - echo "Already authenticated (cached)." - else - echo "GitHub not authenticated -- use the web admin UI to authenticate." - fi -fi - # --- Launch --------------------------------------------------------------- if [[ "$MODE" == "run" ]]; then @@ -277,7 +272,11 @@ elif [[ "$MODE" == "runtime" ]]; then echo " Identity: managed identity (AZURE_CLIENT_ID=${AZURE_CLIENT_ID:-})" else echo " Platform: Docker" - echo " Identity: ${RUNTIME_SP_APP_ID:+scoped SP $RUNTIME_SP_APP_ID}${RUNTIME_SP_APP_ID:-none}" + if [[ -n "${RUNTIME_SP_APP_ID:-}" ]]; then + echo " Identity: scoped SP ${RUNTIME_SP_APP_ID}" + else + echo " Identity: (none)" + fi fi echo " Bot messages: http://localhost:${ADMIN_PORT}/api/messages" echo "" diff --git a/infra/main.bicep b/infra/main.bicep new file mode 100644 index 0000000..c5be3c4 --- /dev/null +++ b/infra/main.bicep @@ -0,0 +1,428 @@ +// Polyclaw -- central infrastructure deployment. +// +// Deploys all Azure resources from a single parameterised template. +// Each resource block is gated by a deploy* boolean so callers can +// request only the subset they need. +// +// Usage: +// az deployment group create \ +// --resource-group \ +// --template-file infra/main.bicep \ +// --parameters infra/main.bicepparam + +// ── Global parameters ─────────────────────────────────────────────────── + +@description('Base name for all resources (must be globally unique).') +param baseName string + +@description('Azure region for resource deployment.') +param location string = resourceGroup().location + +@description('Object ID of the principal to grant data-plane access.') +param principalId string + +@description('Principal type for RBAC assignment.') +@allowed(['User', 'ServicePrincipal']) +param principalType string = 'User' + +// ── Feature toggles ───────────────────────────────────────────────────── + +@description('Deploy the Foundry (AI Services) resource + model deployments.') +param deployFoundry bool = true + +@description('Model deployments to create on the Foundry resource.') +param models array = [ + { name: 'gpt-4.1', version: '2025-04-14', sku: 'GlobalStandard', capacity: 10 } + { name: 'gpt-5', version: '2025-08-07', sku: 'GlobalStandard', capacity: 10 } + { name: 'gpt-5-mini', version: '2025-08-07', sku: 'GlobalStandard', capacity: 10 } +] + +@description('Deploy a Key Vault alongside the Foundry resource.') +param deployKeyVault bool = true + +@description('Object ID of the runtime service principal for Key Vault access (empty = skip).') +param runtimeSpObjectId string = '' + +@description('Deploy an ACS resource for voice calling.') +param deployAcs bool = false + +@description('ACS data location.') +param acsDataLocation string = 'United States' + +@description('Deploy a Content Safety resource.') +param deployContentSafety bool = false + +@description('Deploy Azure AI Search for Foundry IQ.') +param deploySearch bool = false + +@description('Deploy a dedicated Azure OpenAI resource for embeddings (Foundry IQ).') +param deployEmbeddingAoai bool = false + +@description('Embedding model deployment name.') +param embeddingModelName string = 'text-embedding-3-large' + +@description('Embedding model version.') +param embeddingModelVersion string = '1' + +@description('Deploy Log Analytics + Application Insights for monitoring.') +param deployMonitoring bool = false + +@description('Deploy a Container Apps session pool (code sandbox).') +param deploySessionPool bool = false + +// ── Foundry (AI Services) ─────────────────────────────────────────────── + +resource aiServices 'Microsoft.CognitiveServices/accounts@2024-10-01' = if (deployFoundry) { + name: baseName + location: location + kind: 'AIServices' + sku: { name: 'S0' } + properties: { + customSubDomainName: baseName + publicNetworkAccess: 'Enabled' + } +} + +@batchSize(1) +resource modelDeployments 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = [ + for model in (deployFoundry ? models : []): { + parent: aiServices + name: model.name + sku: { + name: model.sku + capacity: model.capacity + } + properties: { + model: { + format: 'OpenAI' + name: model.name + version: model.version + } + } + } +] + +var cognitiveServicesOpenAIUser = '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' + +resource openAiUserRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployFoundry) { + name: guid(aiServices.id, principalId, cognitiveServicesOpenAIUser) + scope: aiServices + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', cognitiveServicesOpenAIUser) + principalId: principalId + principalType: principalType + } +} + +// Foundry RBAC for the runtime service principal (Docker local mode). +// The runtime calls `az account get-access-token --resource cognitiveservices` +// to authenticate with the Foundry endpoint. Without this role the token +// is rejected with 401. +resource openAiUserRoleRuntimeSp 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployFoundry && runtimeSpObjectId != '') { + name: guid(aiServices.id, runtimeSpObjectId, cognitiveServicesOpenAIUser) + scope: aiServices + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', cognitiveServicesOpenAIUser) + principalId: runtimeSpObjectId + principalType: 'ServicePrincipal' + } +} + +// ── Key Vault (optional) ──────────────────────────────────────────────── + +var kvSecretsOfficer = 'b86a8fe4-44ce-4948-aee5-eccb2c155cd7' + +resource keyVault 'Microsoft.KeyVault/vaults@2023-07-01' = if (deployKeyVault) { + name: '${baseName}-kv' + location: location + properties: { + tenantId: subscription().tenantId + sku: { family: 'A', name: 'standard' } + enableRbacAuthorization: true + enableSoftDelete: true + softDeleteRetentionInDays: 7 + publicNetworkAccess: 'Enabled' + } +} + +resource kvRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployKeyVault) { + name: guid(keyVault.id, principalId, kvSecretsOfficer) + scope: keyVault + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', kvSecretsOfficer) + principalId: principalId + principalType: principalType + } +} + +// Key Vault RBAC for the runtime service principal (Docker local mode). +// The runtime runs in a separate container without the admin's interactive +// creds, so it needs its own SP with Secrets Officer on the vault. +resource kvRoleRuntimeSp 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployKeyVault && runtimeSpObjectId != '') { + name: guid(keyVault.id, runtimeSpObjectId, kvSecretsOfficer) + scope: keyVault + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', kvSecretsOfficer) + principalId: runtimeSpObjectId + principalType: 'ServicePrincipal' + } +} + +// ── ACS (optional, for voice) ─────────────────────────────────────────── + +resource acs 'Microsoft.Communication/communicationServices@2023-04-01' = if (deployAcs) { + name: '${baseName}-acs' + location: 'Global' + properties: { + dataLocation: acsDataLocation + } +} + +// ── Content Safety (optional) ─────────────────────────────────────────── + +resource contentSafety 'Microsoft.CognitiveServices/accounts@2024-10-01' = if (deployContentSafety) { + name: '${baseName}-content-safety' + location: location + kind: 'ContentSafety' + sku: { name: 'S0' } + properties: { + customSubDomainName: '${baseName}-content-safety' + publicNetworkAccess: 'Enabled' + } +} + +var cognitiveServicesUser = 'a97b65f3-24c7-4388-baec-2e87135dc908' + +resource csUserRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployContentSafety) { + name: guid(contentSafety.id, principalId, cognitiveServicesUser) + scope: contentSafety + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', cognitiveServicesUser) + principalId: principalId + principalType: principalType + } +} + +// Content Safety RBAC for the runtime SP (Prompt Shields) +resource csUserRoleRuntimeSp 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployContentSafety && runtimeSpObjectId != '') { + name: guid(contentSafety.id, runtimeSpObjectId, cognitiveServicesUser) + scope: contentSafety + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', cognitiveServicesUser) + principalId: runtimeSpObjectId + principalType: 'ServicePrincipal' + } +} + +// ── Azure AI Search (optional, for Foundry IQ) ───────────────────────── + +resource searchService 'Microsoft.Search/searchServices@2023-11-01' = if (deploySearch) { + name: '${baseName}-search' + location: location + sku: { name: 'basic' } + properties: { + replicaCount: 1 + partitionCount: 1 + publicNetworkAccess: 'enabled' + } +} + +// Search Index Data Contributor for the admin principal +var searchIndexDataContributor = '8ebe5a00-799e-43f5-93ac-243d3dce84a7' + +resource searchDataRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deploySearch) { + name: guid(searchService.id, principalId, searchIndexDataContributor) + scope: searchService + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', searchIndexDataContributor) + principalId: principalId + principalType: principalType + } +} + +// Search Index Data Contributor for the runtime SP (managed-identity auth) +resource searchDataRoleRuntimeSp 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deploySearch && runtimeSpObjectId != '') { + name: guid(searchService.id, runtimeSpObjectId, searchIndexDataContributor) + scope: searchService + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', searchIndexDataContributor) + principalId: runtimeSpObjectId + principalType: 'ServicePrincipal' + } +} + +// ── Embedding Azure OpenAI (optional, for Foundry IQ) ────────────────── + +resource embeddingAoai 'Microsoft.CognitiveServices/accounts@2024-10-01' = if (deployEmbeddingAoai) { + name: '${baseName}-aoai' + location: location + kind: 'OpenAI' + sku: { name: 'S0' } + properties: { + customSubDomainName: '${baseName}-aoai' + publicNetworkAccess: 'Enabled' + } +} + +resource embeddingDeployment 'Microsoft.CognitiveServices/accounts/deployments@2024-10-01' = if (deployEmbeddingAoai) { + parent: embeddingAoai + name: embeddingModelName + sku: { + name: 'Standard' + capacity: 1 + } + properties: { + model: { + format: 'OpenAI' + name: embeddingModelName + version: embeddingModelVersion + } + } +} + +var embeddingCogUser = 'a97b65f3-24c7-4388-baec-2e87135dc908' + +resource embeddingAoaiRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployEmbeddingAoai) { + name: guid(embeddingAoai.id, principalId, embeddingCogUser) + scope: embeddingAoai + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', embeddingCogUser) + principalId: principalId + principalType: principalType + } +} + +// Embedding AOAI RBAC for the runtime SP (managed-identity auth) +resource embeddingAoaiRoleRuntimeSp 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deployEmbeddingAoai && runtimeSpObjectId != '') { + name: guid(embeddingAoai.id, runtimeSpObjectId, embeddingCogUser) + scope: embeddingAoai + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', embeddingCogUser) + principalId: runtimeSpObjectId + principalType: 'ServicePrincipal' + } +} + +// ── Log Analytics + App Insights (optional, for monitoring) ───────────── + +resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2023-09-01' = if (deployMonitoring) { + name: '${baseName}-logs' + location: location + properties: { + retentionInDays: 30 + sku: { name: 'PerGB2018' } + } +} + +resource appInsights 'Microsoft.Insights/components@2020-02-02' = if (deployMonitoring) { + name: '${baseName}-insights' + location: location + kind: 'web' + properties: { + Application_Type: 'web' + WorkspaceResourceId: logAnalytics.id + } +} + +// ── Container Apps Session Pool (optional, for sandbox) ───────────────── + +resource sessionPool 'Microsoft.App/sessionPools@2024-02-02-preview' = if (deploySessionPool) { + name: '${baseName}-sandbox' + location: location + properties: { + poolManagementType: 'Dynamic' + containerType: 'PythonLTS' + scaleConfiguration: { + maxConcurrentSessions: 10 + } + dynamicPoolConfiguration: { + cooldownPeriodInSeconds: 300 + } + } +} + +// Session Executor for the admin principal +var sessionExecutor = '0fb8eba5-a2bb-4abe-b1c1-49dfad359bb0' + +resource sessionPoolRole 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deploySessionPool) { + name: guid(sessionPool.id, principalId, sessionExecutor) + scope: sessionPool + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', sessionExecutor) + principalId: principalId + principalType: principalType + } +} + +// Session Executor for the runtime SP +resource sessionPoolRoleRuntimeSp 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (deploySessionPool && runtimeSpObjectId != '') { + name: guid(sessionPool.id, runtimeSpObjectId, sessionExecutor) + scope: sessionPool + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', sessionExecutor) + principalId: runtimeSpObjectId + principalType: 'ServicePrincipal' + } +} + +// ── Outputs ───────────────────────────────────────────────────────────── + +// Foundry +#disable-next-line BCP318 +output foundryEndpoint string = deployFoundry ? aiServices.properties.endpoint : '' +#disable-next-line BCP318 +output foundryResourceId string = deployFoundry ? aiServices.id : '' +#disable-next-line BCP318 +output foundryName string = deployFoundry ? aiServices.name : '' +output deployedModels array = [for (m, i) in (deployFoundry ? models : []): m.name] + +// Key Vault +#disable-next-line BCP318 +output keyVaultUrl string = deployKeyVault ? keyVault.properties.vaultUri : '' +#disable-next-line BCP318 +output keyVaultName string = deployKeyVault ? keyVault.name : '' + +// ACS +#disable-next-line BCP318 +output acsResourceId string = deployAcs ? acs.id : '' +#disable-next-line BCP318 +output acsName string = deployAcs ? acs.name : '' + +// Content Safety +#disable-next-line BCP318 +output contentSafetyEndpoint string = deployContentSafety ? contentSafety.properties.endpoint : '' +#disable-next-line BCP318 +output contentSafetyResourceId string = deployContentSafety ? contentSafety.id : '' +#disable-next-line BCP318 +output contentSafetyName string = deployContentSafety ? contentSafety.name : '' + +// Azure AI Search +#disable-next-line BCP318 +output searchEndpoint string = deploySearch ? 'https://${searchService.name}.search.windows.net' : '' +#disable-next-line BCP318 +output searchName string = deploySearch ? searchService.name : '' + +// Embedding Azure OpenAI +#disable-next-line BCP318 +output embeddingAoaiEndpoint string = deployEmbeddingAoai ? embeddingAoai.properties.endpoint : '' +#disable-next-line BCP318 +output embeddingAoaiName string = deployEmbeddingAoai ? embeddingAoai.name : '' +output embeddingDeploymentName string = deployEmbeddingAoai ? embeddingModelName : '' + +// Monitoring +#disable-next-line BCP318 +output logAnalyticsWorkspaceId string = deployMonitoring ? logAnalytics.id : '' +#disable-next-line BCP318 +output logAnalyticsWorkspaceName string = deployMonitoring ? logAnalytics.name : '' +#disable-next-line BCP318 +output appInsightsConnectionString string = deployMonitoring ? appInsights.properties.ConnectionString : '' +#disable-next-line BCP318 +output appInsightsName string = deployMonitoring ? appInsights.name : '' + +// Sandbox +#disable-next-line BCP318 +output sessionPoolEndpoint string = deploySessionPool ? sessionPool.properties.poolManagementEndpoint : '' +#disable-next-line BCP318 +output sessionPoolId string = deploySessionPool ? sessionPool.id : '' +#disable-next-line BCP318 +output sessionPoolName string = deploySessionPool ? sessionPool.name : '' diff --git a/infra/main.json b/infra/main.json new file mode 100644 index 0000000..8826a56 --- /dev/null +++ b/infra/main.json @@ -0,0 +1,489 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.41.2.15936", + "templateHash": "18062785448056283597" + } + }, + "parameters": { + "baseName": { + "type": "string", + "metadata": { + "description": "Base name for all resources (must be globally unique)." + } + }, + "location": { + "type": "string", + "defaultValue": "[resourceGroup().location]", + "metadata": { + "description": "Azure region for resource deployment." + } + }, + "principalId": { + "type": "string", + "metadata": { + "description": "Object ID of the principal to grant data-plane access." + } + }, + "principalType": { + "type": "string", + "defaultValue": "User", + "allowedValues": [ + "User", + "ServicePrincipal" + ], + "metadata": { + "description": "Principal type for RBAC assignment." + } + }, + "deployFoundry": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Deploy the Foundry (AI Services) resource + model deployments." + } + }, + "models": { + "type": "array", + "defaultValue": [ + { + "name": "gpt-4.1", + "version": "2025-04-14", + "sku": "GlobalStandard", + "capacity": 10 + }, + { + "name": "gpt-5", + "version": "2025-10-01", + "sku": "GlobalStandard", + "capacity": 10 + }, + { + "name": "gpt-5-mini", + "version": "2025-10-01", + "sku": "GlobalStandard", + "capacity": 10 + } + ], + "metadata": { + "description": "Model deployments to create on the Foundry resource." + } + }, + "deployKeyVault": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Deploy a Key Vault alongside the Foundry resource." + } + }, + "deployAcs": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Deploy an ACS resource for voice calling." + } + }, + "acsDataLocation": { + "type": "string", + "defaultValue": "United States", + "metadata": { + "description": "ACS data location." + } + }, + "deployContentSafety": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Deploy a Content Safety resource." + } + }, + "deploySearch": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Deploy Azure AI Search for Foundry IQ." + } + }, + "deployEmbeddingAoai": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Deploy a dedicated Azure OpenAI resource for embeddings (Foundry IQ)." + } + }, + "embeddingModelName": { + "type": "string", + "defaultValue": "text-embedding-3-large", + "metadata": { + "description": "Embedding model deployment name." + } + }, + "embeddingModelVersion": { + "type": "string", + "defaultValue": "1", + "metadata": { + "description": "Embedding model version." + } + }, + "deployMonitoring": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Deploy Log Analytics + Application Insights for monitoring." + } + }, + "deploySessionPool": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Deploy a Container Apps session pool (code sandbox)." + } + } + }, + "variables": { + "cognitiveServicesOpenAIUser": "5e0bd9bd-7b93-4f28-af87-19fc36ad61bd", + "kvSecretsOfficer": "b86a8fe4-44ce-4948-aee5-eccb2c155cd7", + "cognitiveServicesUser": "a97b65f3-24c7-4388-baec-2e87135dc908", + "embeddingCogUser": "a97b65f3-24c7-4388-baec-2e87135dc908" + }, + "resources": [ + { + "condition": "[parameters('deployFoundry')]", + "type": "Microsoft.CognitiveServices/accounts", + "apiVersion": "2024-10-01", + "name": "[parameters('baseName')]", + "location": "[parameters('location')]", + "kind": "AIServices", + "sku": { + "name": "S0" + }, + "properties": { + "customSubDomainName": "[parameters('baseName')]", + "publicNetworkAccess": "Enabled" + } + }, + { + "copy": { + "name": "modelDeployments", + "count": "[length(if(parameters('deployFoundry'), parameters('models'), createArray()))]", + "mode": "serial", + "batchSize": 1 + }, + "type": "Microsoft.CognitiveServices/accounts/deployments", + "apiVersion": "2024-10-01", + "name": "[format('{0}/{1}', parameters('baseName'), if(parameters('deployFoundry'), parameters('models'), createArray())[copyIndex()].name)]", + "sku": { + "name": "[if(parameters('deployFoundry'), parameters('models'), createArray())[copyIndex()].sku]", + "capacity": "[if(parameters('deployFoundry'), parameters('models'), createArray())[copyIndex()].capacity]" + }, + "properties": { + "model": { + "format": "OpenAI", + "name": "[if(parameters('deployFoundry'), parameters('models'), createArray())[copyIndex()].name]", + "version": "[if(parameters('deployFoundry'), parameters('models'), createArray())[copyIndex()].version]" + } + }, + "dependsOn": [ + "[resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName'))]" + ] + }, + { + "condition": "[parameters('deployFoundry')]", + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2022-04-01", + "scope": "[resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName'))]", + "name": "[guid(resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName')), parameters('principalId'), variables('cognitiveServicesOpenAIUser'))]", + "properties": { + "roleDefinitionId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', variables('cognitiveServicesOpenAIUser'))]", + "principalId": "[parameters('principalId')]", + "principalType": "[parameters('principalType')]" + }, + "dependsOn": [ + "[resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName'))]" + ] + }, + { + "condition": "[parameters('deployKeyVault')]", + "type": "Microsoft.KeyVault/vaults", + "apiVersion": "2023-07-01", + "name": "[format('{0}-kv', parameters('baseName'))]", + "location": "[parameters('location')]", + "properties": { + "tenantId": "[subscription().tenantId]", + "sku": { + "family": "A", + "name": "standard" + }, + "enableRbacAuthorization": true, + "enableSoftDelete": true, + "softDeleteRetentionInDays": 7, + "publicNetworkAccess": "Enabled" + } + }, + { + "condition": "[parameters('deployKeyVault')]", + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2022-04-01", + "scope": "[resourceId('Microsoft.KeyVault/vaults', format('{0}-kv', parameters('baseName')))]", + "name": "[guid(resourceId('Microsoft.KeyVault/vaults', format('{0}-kv', parameters('baseName'))), parameters('principalId'), variables('kvSecretsOfficer'))]", + "properties": { + "roleDefinitionId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', variables('kvSecretsOfficer'))]", + "principalId": "[parameters('principalId')]", + "principalType": "[parameters('principalType')]" + }, + "dependsOn": [ + "[resourceId('Microsoft.KeyVault/vaults', format('{0}-kv', parameters('baseName')))]" + ] + }, + { + "condition": "[parameters('deployAcs')]", + "type": "Microsoft.Communication/communicationServices", + "apiVersion": "2023-04-01", + "name": "[format('{0}-acs', parameters('baseName'))]", + "location": "Global", + "properties": { + "dataLocation": "[parameters('acsDataLocation')]" + } + }, + { + "condition": "[parameters('deployContentSafety')]", + "type": "Microsoft.CognitiveServices/accounts", + "apiVersion": "2024-10-01", + "name": "[format('{0}-content-safety', parameters('baseName'))]", + "location": "[parameters('location')]", + "kind": "ContentSafety", + "sku": { + "name": "S0" + }, + "properties": { + "customSubDomainName": "[format('{0}-content-safety', parameters('baseName'))]", + "publicNetworkAccess": "Enabled" + } + }, + { + "condition": "[parameters('deployContentSafety')]", + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2022-04-01", + "scope": "[resourceId('Microsoft.CognitiveServices/accounts', format('{0}-content-safety', parameters('baseName')))]", + "name": "[guid(resourceId('Microsoft.CognitiveServices/accounts', format('{0}-content-safety', parameters('baseName'))), parameters('principalId'), variables('cognitiveServicesUser'))]", + "properties": { + "roleDefinitionId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', variables('cognitiveServicesUser'))]", + "principalId": "[parameters('principalId')]", + "principalType": "[parameters('principalType')]" + }, + "dependsOn": [ + "[resourceId('Microsoft.CognitiveServices/accounts', format('{0}-content-safety', parameters('baseName')))]" + ] + }, + { + "condition": "[parameters('deploySearch')]", + "type": "Microsoft.Search/searchServices", + "apiVersion": "2023-11-01", + "name": "[format('{0}-search', parameters('baseName'))]", + "location": "[parameters('location')]", + "sku": { + "name": "basic" + }, + "properties": { + "replicaCount": 1, + "partitionCount": 1, + "publicNetworkAccess": "enabled" + } + }, + { + "condition": "[parameters('deployEmbeddingAoai')]", + "type": "Microsoft.CognitiveServices/accounts", + "apiVersion": "2024-10-01", + "name": "[format('{0}-aoai', parameters('baseName'))]", + "location": "[parameters('location')]", + "kind": "OpenAI", + "sku": { + "name": "S0" + }, + "properties": { + "customSubDomainName": "[format('{0}-aoai', parameters('baseName'))]", + "publicNetworkAccess": "Enabled" + } + }, + { + "condition": "[parameters('deployEmbeddingAoai')]", + "type": "Microsoft.CognitiveServices/accounts/deployments", + "apiVersion": "2024-10-01", + "name": "[format('{0}/{1}', format('{0}-aoai', parameters('baseName')), parameters('embeddingModelName'))]", + "sku": { + "name": "Standard", + "capacity": 1 + }, + "properties": { + "model": { + "format": "OpenAI", + "name": "[parameters('embeddingModelName')]", + "version": "[parameters('embeddingModelVersion')]" + } + }, + "dependsOn": [ + "[resourceId('Microsoft.CognitiveServices/accounts', format('{0}-aoai', parameters('baseName')))]" + ] + }, + { + "condition": "[parameters('deployEmbeddingAoai')]", + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2022-04-01", + "scope": "[resourceId('Microsoft.CognitiveServices/accounts', format('{0}-aoai', parameters('baseName')))]", + "name": "[guid(resourceId('Microsoft.CognitiveServices/accounts', format('{0}-aoai', parameters('baseName'))), parameters('principalId'), variables('embeddingCogUser'))]", + "properties": { + "roleDefinitionId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', variables('embeddingCogUser'))]", + "principalId": "[parameters('principalId')]", + "principalType": "[parameters('principalType')]" + }, + "dependsOn": [ + "[resourceId('Microsoft.CognitiveServices/accounts', format('{0}-aoai', parameters('baseName')))]" + ] + }, + { + "condition": "[parameters('deployMonitoring')]", + "type": "Microsoft.OperationalInsights/workspaces", + "apiVersion": "2023-09-01", + "name": "[format('{0}-logs', parameters('baseName'))]", + "location": "[parameters('location')]", + "properties": { + "retentionInDays": 30, + "sku": { + "name": "PerGB2018" + } + } + }, + { + "condition": "[parameters('deployMonitoring')]", + "type": "Microsoft.Insights/components", + "apiVersion": "2020-02-02", + "name": "[format('{0}-insights', parameters('baseName'))]", + "location": "[parameters('location')]", + "kind": "web", + "properties": { + "Application_Type": "web", + "WorkspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', format('{0}-logs', parameters('baseName')))]" + }, + "dependsOn": [ + "[resourceId('Microsoft.OperationalInsights/workspaces', format('{0}-logs', parameters('baseName')))]" + ] + }, + { + "condition": "[parameters('deploySessionPool')]", + "type": "Microsoft.App/sessionPools", + "apiVersion": "2024-02-02-preview", + "name": "[format('{0}-sandbox', parameters('baseName'))]", + "location": "[parameters('location')]", + "properties": { + "poolManagementType": "Dynamic", + "containerType": "PythonLTS", + "scaleConfiguration": { + "maxConcurrentSessions": 10 + }, + "dynamicPoolConfiguration": { + "cooldownPeriodInSeconds": 300 + } + } + } + ], + "outputs": { + "foundryEndpoint": { + "type": "string", + "value": "[if(parameters('deployFoundry'), reference(resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName')), '2024-10-01').endpoint, '')]" + }, + "foundryResourceId": { + "type": "string", + "value": "[if(parameters('deployFoundry'), resourceId('Microsoft.CognitiveServices/accounts', parameters('baseName')), '')]" + }, + "foundryName": { + "type": "string", + "value": "[if(parameters('deployFoundry'), parameters('baseName'), '')]" + }, + "deployedModels": { + "type": "array", + "copy": { + "count": "[length(if(parameters('deployFoundry'), parameters('models'), createArray()))]", + "input": "[if(parameters('deployFoundry'), parameters('models'), createArray())[copyIndex()].name]" + } + }, + "keyVaultUrl": { + "type": "string", + "value": "[if(parameters('deployKeyVault'), reference(resourceId('Microsoft.KeyVault/vaults', format('{0}-kv', parameters('baseName'))), '2023-07-01').vaultUri, '')]" + }, + "keyVaultName": { + "type": "string", + "value": "[if(parameters('deployKeyVault'), format('{0}-kv', parameters('baseName')), '')]" + }, + "acsResourceId": { + "type": "string", + "value": "[if(parameters('deployAcs'), resourceId('Microsoft.Communication/communicationServices', format('{0}-acs', parameters('baseName'))), '')]" + }, + "acsName": { + "type": "string", + "value": "[if(parameters('deployAcs'), format('{0}-acs', parameters('baseName')), '')]" + }, + "contentSafetyEndpoint": { + "type": "string", + "value": "[if(parameters('deployContentSafety'), reference(resourceId('Microsoft.CognitiveServices/accounts', format('{0}-content-safety', parameters('baseName'))), '2024-10-01').endpoint, '')]" + }, + "contentSafetyResourceId": { + "type": "string", + "value": "[if(parameters('deployContentSafety'), resourceId('Microsoft.CognitiveServices/accounts', format('{0}-content-safety', parameters('baseName'))), '')]" + }, + "contentSafetyName": { + "type": "string", + "value": "[if(parameters('deployContentSafety'), format('{0}-content-safety', parameters('baseName')), '')]" + }, + "searchEndpoint": { + "type": "string", + "value": "[if(parameters('deploySearch'), format('https://{0}.search.windows.net', format('{0}-search', parameters('baseName'))), '')]" + }, + "searchName": { + "type": "string", + "value": "[if(parameters('deploySearch'), format('{0}-search', parameters('baseName')), '')]" + }, + "embeddingAoaiEndpoint": { + "type": "string", + "value": "[if(parameters('deployEmbeddingAoai'), reference(resourceId('Microsoft.CognitiveServices/accounts', format('{0}-aoai', parameters('baseName'))), '2024-10-01').endpoint, '')]" + }, + "embeddingAoaiName": { + "type": "string", + "value": "[if(parameters('deployEmbeddingAoai'), format('{0}-aoai', parameters('baseName')), '')]" + }, + "embeddingDeploymentName": { + "type": "string", + "value": "[if(parameters('deployEmbeddingAoai'), parameters('embeddingModelName'), '')]" + }, + "logAnalyticsWorkspaceId": { + "type": "string", + "value": "[if(parameters('deployMonitoring'), resourceId('Microsoft.OperationalInsights/workspaces', format('{0}-logs', parameters('baseName'))), '')]" + }, + "logAnalyticsWorkspaceName": { + "type": "string", + "value": "[if(parameters('deployMonitoring'), format('{0}-logs', parameters('baseName')), '')]" + }, + "appInsightsConnectionString": { + "type": "string", + "value": "[if(parameters('deployMonitoring'), reference(resourceId('Microsoft.Insights/components', format('{0}-insights', parameters('baseName'))), '2020-02-02').ConnectionString, '')]" + }, + "appInsightsName": { + "type": "string", + "value": "[if(parameters('deployMonitoring'), format('{0}-insights', parameters('baseName')), '')]" + }, + "sessionPoolEndpoint": { + "type": "string", + "value": "[if(parameters('deploySessionPool'), reference(resourceId('Microsoft.App/sessionPools', format('{0}-sandbox', parameters('baseName'))), '2024-02-02-preview').poolManagementEndpoint, '')]" + }, + "sessionPoolId": { + "type": "string", + "value": "[if(parameters('deploySessionPool'), resourceId('Microsoft.App/sessionPools', format('{0}-sandbox', parameters('baseName'))), '')]" + }, + "sessionPoolName": { + "type": "string", + "value": "[if(parameters('deploySessionPool'), format('{0}-sandbox', parameters('baseName')), '')]" + } + } +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 82da220..517d620 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,7 @@ asyncio_mode = "auto" filterwarnings = ["ignore::DeprecationWarning"] markers = [ "slow: marks tests as slow (skipped by default, include with '--run-slow')", + "e2e_setup: full E2E setup-process test against real Azure (skipped by default, include with '--run-e2e-setup')", ] [tool.coverage.run] From 43ffef5168e5f6ae496445d91c9d8fa79132688b Mon Sep 17 00:00:00 2001 From: Aymen Date: Tue, 7 Apr 2026 23:45:09 +0200 Subject: [PATCH 2/5] feat: cleanup --- app/runtime/agent/__init__.py | 15 +- app/runtime/agent/agent.py | 67 +- app/runtime/agent/aitl.py | 57 +- app/runtime/agent/byok.py | 22 +- app/runtime/agent/event_handler.py | 5 +- app/runtime/agent/hitl.py | 44 +- app/runtime/agent/hitl_channels.py | 87 +-- app/runtime/agent/one_shot.py | 6 +- app/runtime/agent/phone_verify.py | 28 +- app/runtime/agent/policy_bridge.py | 61 +- app/runtime/agent/tools/cards.py | 52 +- app/runtime/config/settings.py | 56 +- app/runtime/env_cli.py | 98 ++- app/runtime/keyvault_resolve.py | 20 +- app/runtime/media/classify.py | 13 +- app/runtime/media/incoming.py | 8 +- app/runtime/media/outgoing.py | 33 +- app/runtime/messaging/bot.py | 41 +- app/runtime/messaging/cards.py | 26 +- app/runtime/messaging/commands/__init__.py | 9 +- app/runtime/messaging/commands/_dispatcher.py | 173 ++--- app/runtime/messaging/message_processor.py | 8 +- app/runtime/messaging/proactive.py | 16 +- app/runtime/messaging/proactive_loop.py | 82 +-- app/runtime/realtime/tools.py | 19 +- app/runtime/registries/plugins.py | 23 +- app/runtime/registries/skills.py | 22 +- app/runtime/sandbox/executor.py | 49 +- app/runtime/scheduler/engine.py | 22 +- app/runtime/server/app.py | 13 + app/runtime/server/lifecycle.py | 9 + app/runtime/server/routes/_helpers.py | 63 +- .../server/routes/content_safety_routes.py | 142 ++--- app/runtime/server/routes/env_routes.py | 21 +- .../server/routes/foundry_iq_routes.py | 52 +- .../server/routes/guardrails_routes.py | 67 +- .../server/routes/guardrails_routes_meta.py | 1 - app/runtime/server/routes/identity_routes.py | 149 +++-- app/runtime/server/routes/mcp_routes.py | 74 +-- .../server/routes/monitoring_routes.py | 85 ++- app/runtime/server/routes/network_audit.py | 316 ++++------ app/runtime/server/routes/plugin_routes.py | 67 +- app/runtime/server/routes/profile_routes.py | 5 +- app/runtime/server/routes/sandbox_routes.py | 69 +- app/runtime/server/routes/scheduler_routes.py | 47 +- app/runtime/server/routes/session_routes.py | 23 +- .../server/routes/tool_activity_routes.py | 32 +- app/runtime/server/setup/_helpers.py | 18 + app/runtime/server/setup/_routes.py | 122 ++-- app/runtime/server/setup/azure.py | 17 +- app/runtime/server/setup/foundry.py | 45 +- app/runtime/server/setup/preflight.py | 82 +-- app/runtime/server/setup/prerequisites.py | 8 +- app/runtime/server/setup/voice.py | 8 +- app/runtime/server/setup/voice_provision.py | 51 +- app/runtime/server/wiring.py | 49 +- app/runtime/services/cloud/azure.py | 33 +- .../services/cloud/runtime_identity.py | 49 +- app/runtime/services/deployment/__init__.py | 3 +- app/runtime/services/deployment/_models.py | 94 +++ .../services/deployment/aca_deployer.py | 192 ++---- .../services/deployment/aca_provision.py | 168 +++-- .../services/deployment/bicep_deployer.py | 592 ++++++++++-------- app/runtime/services/deployment/deployer.py | 84 +-- .../services/deployment/provisioner.py | 112 ++-- app/runtime/services/otel.py | 18 +- app/runtime/services/resource_tracker.py | 19 +- .../services/security/misconfig_checker.py | 180 +++--- .../services/security/preflight_identity.py | 71 +-- .../services/security/preflight_rbac.py | 113 ++-- .../services/security/preflight_secrets.py | 146 ++--- .../services/security/prompt_shield.py | 94 +-- .../services/security/security_preflight.py | 15 +- app/runtime/state/deploy_state.py | 37 +- app/runtime/state/foundry_iq_config.py | 18 +- app/runtime/state/guardrails/config.py | 100 ++- app/runtime/state/guardrails/risk.py | 13 +- app/runtime/state/infra_config.py | 52 +- app/runtime/state/memory.py | 18 +- app/runtime/state/monitoring_config.py | 41 +- app/runtime/state/proactive.py | 18 +- app/runtime/state/profile.py | 115 ++-- app/runtime/state/sandbox_config.py | 19 +- app/runtime/state/tool_activity_csv.py | 102 +++ app/runtime/state/tool_activity_store.py | 214 ++----- app/runtime/tests/test_bicep_deploy.py | 16 +- app/runtime/tests/test_e2e_aca_lifecycle.py | 432 +++++++++++++ app/runtime/tests/test_e2e_lifecycle.py | 471 ++++++++++++++ app/runtime/tests/test_e2e_setup_process.py | 210 +++++-- app/runtime/tests/test_identity_routes.py | 43 +- app/runtime/tests/test_media_outgoing.py | 12 - app/runtime/tests/test_provisioner.py | 5 +- app/runtime/util/__init__.py | 3 +- app/runtime/util/singletons.py | 40 ++ app/runtime/util/spotlight.py | 40 +- app/tui/src/deploy/docker.ts | 3 + app/tui/src/headless/aca_setup.ts | 425 +++++++++++++ app/tui/src/headless/setup.ts | 332 ++++++++++ app/tui/src/index.ts | 81 ++- entrypoint.sh | 30 +- 100 files changed, 4348 insertions(+), 3322 deletions(-) create mode 100644 app/runtime/services/deployment/_models.py create mode 100644 app/runtime/state/tool_activity_csv.py create mode 100644 app/runtime/tests/test_e2e_aca_lifecycle.py create mode 100644 app/runtime/tests/test_e2e_lifecycle.py create mode 100644 app/tui/src/headless/aca_setup.ts create mode 100644 app/tui/src/headless/setup.ts diff --git a/app/runtime/agent/__init__.py b/app/runtime/agent/__init__.py index 069c153..93e2e29 100644 --- a/app/runtime/agent/__init__.py +++ b/app/runtime/agent/__init__.py @@ -1,14 +1 @@ -"""Core Copilot SDK integration -- agent, sessions, tools, and prompts. - -Public submodules (import directly): - -- ``agent.agent`` -- ``Agent``, ``MAX_START_RETRIES`` -- ``agent.aitl`` -- ``AitlReviewer`` -- ``agent.event_handler`` -- ``EventHandler`` -- ``agent.hitl`` -- ``HitlInterceptor`` -- ``agent.one_shot`` -- ``run_one_shot``, ``auto_approve`` -- ``agent.phone_verify`` -- ``PhoneVerifier`` -- ``agent.policy_bridge`` -- ``build_engine``, ``config_to_yaml``, ... -- ``agent.prompt`` -- ``build_system_prompt``, ``load_soul``, ``TEMPLATES_DIR`` -- ``agent.tools`` -- ``get_all_tools``, ``ALL_TOOLS``, tool functions -""" +"""Core Copilot SDK integration -- agent, sessions, tools, and prompts.""" diff --git a/app/runtime/agent/agent.py b/app/runtime/agent/agent.py index cfab141..43334ba 100644 --- a/app/runtime/agent/agent.py +++ b/app/runtime/agent/agent.py @@ -109,12 +109,7 @@ async def stop(self) -> None: await self._safe_stop_client() async def reload_auth(self) -> dict[str, Any]: - """Reload configuration from ``.env`` and restart the Copilot client. - - Called by the ``/api/runtime/reload-auth`` endpoint when the admin - container writes new config to ``/data/.env`` after the runtime has - already booted. Handles Foundry BYOK endpoint changes. - """ + """Reload configuration and restart the Copilot client.""" old_endpoint = cfg.foundry_endpoint cfg.reload() new_endpoint = cfg.foundry_endpoint @@ -142,14 +137,7 @@ async def reload_auth(self) -> dict[str, Any]: } async def _verify_auth(self) -> None: - """Check that the Copilot CLI is authenticated and log the result. - - Sets ``_authenticated`` so that :meth:`send` can fail fast with a - useful error message instead of silently hanging for 120 seconds. - - In BYOK mode (Foundry endpoint configured), GitHub auth is not - required -- authentication happens per-session via bearer token. - """ + """Check that the Copilot CLI is authenticated.""" if not self._client: return @@ -170,52 +158,42 @@ async def _verify_auth(self) -> None: "for Foundry BYOK mode." ) except Exception: - # auth.getStatus may not be supported on older CLI versions; - # assume OK and let send() surface any real error. + # auth.getStatus may not be supported on older CLI versions. logger.debug("[agent.auth] auth status check unavailable", exc_info=True) - self._authenticated = True # optimistic + self._authenticated = True async def _verify_model(self) -> None: """Log whether the configured model is available and enabled.""" if not self._client: return if self._byok: - logger.info( - "[agent.model] BYOK mode -- using Foundry model %s", - cfg.copilot_model, - ) + logger.info("[agent.model] BYOK mode -- using Foundry model %s", cfg.copilot_model) return model_id = cfg.copilot_model try: models = await self._client.list_models() - available_ids = [m.id for m in models] match = next((m for m in models if m.id == model_id), None) if match: policy = match.policy.state if match.policy else "unknown" - if policy == "enabled": - logger.info("[agent.model] model %s is available (policy=enabled)", model_id) - else: + if policy != "enabled": logger.warning( "[agent.model] model %s found but policy=%s -- " "requests may fail silently. Change COPILOT_MODEL in .env", model_id, policy, ) + else: + logger.info("[agent.model] model %s is available (policy=enabled)", model_id) else: + available_ids = [m.id for m in models] logger.warning( - "[agent.model] model %s NOT found in %d available models: %s. " - "Requests may fail silently. Change COPILOT_MODEL in .env", + "[agent.model] model %s NOT found in %d available models: %s", model_id, len(available_ids), available_ids[:10], ) except Exception: logger.debug("[agent.model] could not list models", exc_info=True) def _start_stderr_monitor(self) -> None: - """Read the Copilot CLI subprocess stderr in a daemon thread. - - The SDK pipes stderr but never reads it, so auth failures, rate - limits, and API errors are completely invisible. This drains and - logs every line at WARNING level. - """ + """Drain Copilot CLI stderr in a daemon thread and log at WARNING.""" proc = getattr(self._client, "_process", None) if not proc: return @@ -257,18 +235,12 @@ async def _new_session_inner(self) -> Any: return self._session async def ensure_session(self) -> Any: - """Return the existing SDK session, or create one if none exists. - - Safe to call from multiple connections -- will not destroy an - active session that another caller may be using. - """ + """Return the existing SDK session, or create one if none exists.""" if self._session: return self._session if not self._client: raise RuntimeError("Agent not started") async with self._send_lock: - # Double-check after acquiring lock -- another caller may have - # created the session while we were waiting. if self._session: return self._session return await self._new_session_inner() @@ -315,7 +287,7 @@ async def _send_inner( on_event: Callable[[str, dict], None] | None, otel_span: object | None, ) -> str | None: - """Execute the actual send, wrapped by :meth:`send`'s OTel span.""" + """Execute the actual send within the OTel span.""" handler = EventHandler(on_delta, on_event) unsub = self._session.on(handler) try: @@ -413,11 +385,7 @@ async def list_models(self) -> list[dict]: @staticmethod def _list_foundry_models() -> list[dict]: - """Return models deployed on the Foundry endpoint. - - Reads ``DEPLOYED_MODELS`` (comma-separated) from ``.env``. - Falls back to the current ``COPILOT_MODEL`` if not set. - """ + """Return models deployed on the Foundry endpoint.""" raw = cfg.env.read("DEPLOYED_MODELS") or "" names = [n.strip() for n in raw.split(",") if n.strip()] if raw else [] if not names: @@ -555,12 +523,7 @@ def _build_session_config(self) -> dict[str, Any]: return session_cfg async def _abort_and_destroy_session(self) -> None: - """Abort any in-flight request, then destroy the session. - - Used after timeouts and cancellations to ensure the next ``send()`` - gets a clean session instead of reusing one stuck on a pending - model request. - """ + """Abort any in-flight request, then destroy the session.""" if self._session: try: await self._session.abort() diff --git a/app/runtime/agent/aitl.py b/app/runtime/agent/aitl.py index a9bdd51..505ea84 100644 --- a/app/runtime/agent/aitl.py +++ b/app/runtime/agent/aitl.py @@ -1,9 +1,4 @@ -"""Agent-in-the-Loop (AITL) reviewer. - -A background agent that reviews tool calls and conversation history to -decide whether to approve or deny a tool execution. The reviewer runs -as a separate Copilot SDK session with a single tool: ``submit_decision``. -""" +"""Agent-in-the-Loop (AITL) reviewer.""" from __future__ import annotations @@ -185,57 +180,27 @@ def on_event(event: Any) -> None: nonlocal event_count event_count += 1 etype = event.type - logger.debug( - "[aitl.event] #%d type=%s tool=%s", event_count, etype, tool_name - ) - if etype == SessionEventType.TOOL_EXECUTION_COMPLETE: - logger.info( - "[aitl.event] tool completed inside reviewer for tool=%s, " - "decision so far: approved=%s reason=%s", - tool_name, - decision["approved"], - decision["reason"], - ) - elif etype == SessionEventType.SESSION_IDLE: - logger.info( - "[aitl.event] SESSION_IDLE -- review done for tool=%s " - "(events=%d, approved=%s)", - tool_name, - event_count, - decision["approved"], - ) + logger.debug("[aitl.event] #%d type=%s tool=%s", event_count, etype, tool_name) + if etype == SessionEventType.SESSION_IDLE: + logger.info("[aitl.event] SESSION_IDLE -- review done for tool=%s (events=%d)", tool_name, event_count) done.set() elif etype == SessionEventType.SESSION_ERROR: err = str(event.data) if hasattr(event, "data") else "unknown" - logger.error( - "[aitl.event] SESSION_ERROR for tool=%s: %s", tool_name, err - ) + logger.error("[aitl.event] SESSION_ERROR for tool=%s: %s", tool_name, err) decision["reason"] = f"Review session error: {err}" done.set() unsub = session.on(on_event) try: - logger.info( - "[aitl.review] sending prompt to reviewer for tool=%s " - "(prompt_len=%d, timeout=%.0fs)", - tool_name, len(prompt), _REVIEW_TIMEOUT, - ) + logger.info("[aitl.review] sending prompt for tool=%s (len=%d)", tool_name, len(prompt)) await session.send({"prompt": prompt}) - logger.info("[aitl.review] prompt sent, waiting for reviewer decision...") await asyncio.wait_for(done.wait(), timeout=_REVIEW_TIMEOUT) - logger.info( - "[aitl.review] reviewer finished for tool=%s in %d events", - tool_name, event_count, - ) + logger.info("[aitl.review] reviewer finished for tool=%s in %d events", tool_name, event_count) except TimeoutError: - logger.warning( - "[aitl.review] timed out after %.0fs", _REVIEW_TIMEOUT - ) + logger.warning("[aitl.review] timed out after %.0fs", _REVIEW_TIMEOUT) decision["reason"] = "Review timed out" except Exception as exc: - logger.error( - "[aitl.review] send failed: %s", exc, exc_info=True - ) + logger.error("[aitl.review] send failed: %s", exc, exc_info=True) decision["reason"] = f"Review error: {exc}" finally: unsub() @@ -246,9 +211,7 @@ def on_event(event: Any) -> None: logger.info( "[aitl.review] tool=%s approved=%s reason=%s", - tool_name, - decision["approved"], - decision["reason"], + tool_name, decision["approved"], decision["reason"], ) return decision["approved"], decision["reason"] diff --git a/app/runtime/agent/byok.py b/app/runtime/agent/byok.py index 9e46647..69c02c5 100644 --- a/app/runtime/agent/byok.py +++ b/app/runtime/agent/byok.py @@ -1,12 +1,4 @@ -"""BYOK provider configuration for the Copilot SDK. - -Builds a ``provider`` dict for ``CopilotClient.create_session()`` that -points at a Foundry (Azure AI Services) endpoint using Entra ID -bearer-token authentication -- no API keys required. - -Token acquisition uses ``az account get-access-token`` so it works with -whatever identity is logged in (user, service principal, managed identity). -""" +"""BYOK provider configuration for the Copilot SDK.""" from __future__ import annotations @@ -46,11 +38,7 @@ def get_bearer_token() -> str: def build_provider_config() -> dict[str, Any] | None: - """Build the BYOK provider dict for a Copilot SDK session. - - Returns ``None`` when Foundry is not configured, which signals the - caller to fall back to GitHub Copilot authentication. - """ + """Build the BYOK provider dict for a Copilot SDK session.""" endpoint = cfg.foundry_endpoint if not endpoint: return None @@ -69,11 +57,7 @@ def build_provider_config() -> dict[str, Any] | None: def build_session_overrides() -> dict[str, Any]: - """Return extra kwargs to merge into session config when BYOK is active. - - These override the model and inject the provider block. Returns an - empty dict when BYOK is not configured. - """ + """Return extra session config kwargs when BYOK is active.""" provider = build_provider_config() if provider is None: return {} diff --git a/app/runtime/agent/event_handler.py b/app/runtime/agent/event_handler.py index e43af42..f8833d6 100644 --- a/app/runtime/agent/event_handler.py +++ b/app/runtime/agent/event_handler.py @@ -1,7 +1,4 @@ -"""Copilot SDK session event handler. - -Dispatch table replaces deep if/elif chains for session events. -""" +"""Copilot SDK session event handler.""" from __future__ import annotations diff --git a/app/runtime/agent/hitl.py b/app/runtime/agent/hitl.py index 4691789..6844b79 100644 --- a/app/runtime/agent/hitl.py +++ b/app/runtime/agent/hitl.py @@ -32,26 +32,16 @@ class HitlInterceptor: - """Human-in-the-loop tool approval interceptor. - - Per-turn state (emit, model, session context) is bound via - :meth:`bind_turn` and released via :meth:`unbind_turn`. Persistent - wiring (phone verifier, AITL reviewer, prompt shield) is set once - during application startup. - """ + """Human-in-the-loop tool approval interceptor.""" def __init__(self, guardrails: GuardrailsConfigStore) -> None: self._guardrails = guardrails - - # -- per-turn state (bound/unbound each agent turn) ---------------- self._emit: Callable[[str, dict[str, Any]], None] | None = None self._bot_reply_fn: Callable[[str], Awaitable[None]] | None = None self._execution_context: str = "" self._model: str = "" self._session_id: str = "" self._tool_activity: ToolActivityStore | None = None - - # -- persistent state ---------------------------------------------- self._pending: dict[str, asyncio.Future[bool]] = {} self._phone_verifier: PhoneVerifier | None = None self._aitl_reviewer: AitlReviewer | None = None @@ -88,8 +78,6 @@ def unbind_turn(self) -> None: self._session_id = "" self._tool_activity = None - # -- persistent wiring ------------------------------------------------- - def set_phone_verifier(self, verifier: PhoneVerifier) -> None: self._phone_verifier = verifier @@ -224,10 +212,14 @@ async def _evaluate_tool(self, input_data: dict, tool_name: str) -> dict: call_id, tool_name, args_str, mcp_server, ) + def _track(self, tool_name: str, strategy: str) -> None: + """Record a resolved strategy for later retrieval.""" + self._resolved_strategies.setdefault(tool_name, []).append(strategy) + def _make_deny(self, call_id: str, tool_name: str) -> dict: """Build a deny response and emit an event.""" logger.info("[hitl.hook] DENY tool=%s call_id=%s", tool_name, call_id) - self._resolved_strategies.setdefault(tool_name, []).append("deny") + self._track(tool_name, "deny") if self._emit: self._emit("tool_denied", { "call_id": call_id, @@ -237,17 +229,9 @@ def _make_deny(self, call_id: str, tool_name: str) -> dict: return dict(_DENY) async def _dispatch_strategy( - self, - strategy: str, - call_id: str, - tool_name: str, - args_str: str, + self, strategy: str, call_id: str, tool_name: str, args_str: str, ) -> dict | None: - """Delegate to a strategy-specific handler. - - Returns a decision dict, or ``None`` to fall through to - interactive approval. - """ + """Delegate to a strategy-specific handler.""" if strategy == "aitl": return await self._handle_aitl(call_id, tool_name, args_str) if strategy == "filter": @@ -260,7 +244,7 @@ async def _handle_aitl( self, call_id: str, tool_name: str, args_str: str, ) -> dict | None: """AI-in-the-loop review.""" - self._resolved_strategies.setdefault(tool_name, []).append("aitl") + self._track(tool_name, "aitl") if self._aitl_reviewer: return await self._apply_aitl(call_id, tool_name, args_str) logger.warning( @@ -274,7 +258,7 @@ async def _handle_filter( self, call_id: str, tool_name: str, args_str: str, ) -> dict | None: """Content-safety filter.""" - self._resolved_strategies.setdefault(tool_name, []).append("filter") + self._track(tool_name, "filter") if self._prompt_shield: result = await self._apply_filter(call_id, tool_name, args_str) if result is not None: @@ -300,7 +284,7 @@ async def _handle_pitl( self, call_id: str, tool_name: str, args_str: str, ) -> dict | None: """Phone-in-the-loop verification.""" - self._resolved_strategies.setdefault(tool_name, []).append("pitl") + self._track(tool_name, "pitl") if self._phone_verifier: logger.info("[hitl.hook] PITL routing to phone: tool=%s", tool_name) return await self._ask_phone(call_id, tool_name, args_str) @@ -338,21 +322,21 @@ async def _route_interactive( logger.info( "[hitl.hook] routing to phone channel: tool=%s", tool_name, ) - self._resolved_strategies.setdefault(tool_name, []).append("pitl") + self._track(tool_name, "pitl") return await self._ask_phone(call_id, tool_name, args_str) if self._bot_reply_fn: logger.info( "[hitl.hook] routing to bot channel: tool=%s", tool_name, ) - self._resolved_strategies.setdefault(tool_name, []).append("hitl") + self._track(tool_name, "hitl") return await self._ask_bot_channel(call_id, tool_name, args_str) if self._emit: logger.info( "[hitl.hook] routing to web chat: tool=%s", tool_name, ) - self._resolved_strategies.setdefault(tool_name, []).append("hitl") + self._track(tool_name, "hitl") return await self._ask_chat(call_id, tool_name, args_str) logger.error( diff --git a/app/runtime/agent/hitl_channels.py b/app/runtime/agent/hitl_channels.py index f5bc063..d1fb978 100644 --- a/app/runtime/agent/hitl_channels.py +++ b/app/runtime/agent/hitl_channels.py @@ -20,49 +20,45 @@ _APPROVAL_TIMEOUT = 300.0 -async def ask_chat_approval( - *, - emit: Callable[[str, dict[str, Any]], None], +async def _wait_for_approval( pending: dict[str, asyncio.Future[bool]], call_id: str, tool_name: str, - args_str: str, timeout: float = _APPROVAL_TIMEOUT, -) -> dict[str, str]: - """Request approval via the WebSocket chat channel.""" - logger.info( - "[hitl.chat] sending approval_request via WebSocket: " - "tool=%s call_id=%s", - tool_name, call_id, - ) - emit("approval_request", { - "call_id": call_id, - "tool": tool_name, - "arguments": args_str, - }) - logger.info("[hitl.chat] approval_request emitted, waiting for response...") - +) -> bool: + """Register a future and wait for approval/denial or timeout.""" loop = asyncio.get_running_loop() future: asyncio.Future[bool] = loop.create_future() pending[call_id] = future - try: - approved = await asyncio.wait_for(future, timeout=timeout) + return await asyncio.wait_for(future, timeout=timeout) except asyncio.TimeoutError: logger.warning("[hitl] approval timed out: call_id=%s tool=%s", call_id, tool_name) - approved = False + return False finally: pending.pop(call_id, None) + +async def ask_chat_approval( + *, + emit: Callable[[str, dict[str, Any]], None], + pending: dict[str, asyncio.Future[bool]], + call_id: str, + tool_name: str, + args_str: str, + timeout: float = _APPROVAL_TIMEOUT, +) -> dict[str, str]: + """Request approval via the WebSocket chat channel.""" + logger.info("[hitl.chat] approval_request: tool=%s call_id=%s", tool_name, call_id) + emit("approval_request", { + "call_id": call_id, "tool": tool_name, "arguments": args_str, + }) + + approved = await _wait_for_approval(pending, call_id, tool_name, timeout) decision = "allow" if approved else "deny" - logger.info( - "[hitl.chat] decision: tool=%s call_id=%s approved=%s decision=%s", - tool_name, call_id, approved, decision, - ) + logger.info("[hitl.chat] decision: tool=%s call_id=%s decision=%s", tool_name, call_id, decision) emit("approval_resolved", { - "call_id": call_id, - "tool": tool_name, - "approved": approved, + "call_id": call_id, "tool": tool_name, "approved": approved, }) return {"permissionDecision": decision} @@ -83,39 +79,19 @@ async def ask_bot_approval( f"Arguments: `{truncated}`\n\n" f"Reply **y** to approve or anything else to deny." ) - logger.info( - "[hitl] bot-channel approval request: tool=%s call_id=%s", - tool_name, call_id, - ) + logger.info("[hitl] bot-channel approval request: tool=%s call_id=%s", tool_name, call_id) try: await bot_reply_fn(confirmation_msg) except Exception: logger.exception("[hitl] failed to send bot approval message: call_id=%s", call_id) return {"permissionDecision": "deny"} - loop = asyncio.get_running_loop() - future: asyncio.Future[bool] = loop.create_future() - pending[call_id] = future - - try: - approved = await asyncio.wait_for(future, timeout=timeout) - except asyncio.TimeoutError: - logger.warning("[hitl] bot approval timed out: call_id=%s tool=%s", call_id, tool_name) - approved = False - finally: - pending.pop(call_id, None) - + approved = await _wait_for_approval(pending, call_id, tool_name, timeout) decision = "allow" if approved else "deny" - logger.info( - "[hitl] bot-channel decision: tool=%s call_id=%s decision=%s", - tool_name, call_id, decision, - ) + logger.info("[hitl] bot-channel decision: tool=%s call_id=%s decision=%s", tool_name, call_id, decision) - outcome_msg = ( - f"Tool **{tool_name}** {'approved' if approved else 'denied'}." - ) try: - await bot_reply_fn(outcome_msg) + await bot_reply_fn(f"Tool **{tool_name}** {'approved' if approved else 'denied'}.") except Exception: logger.exception("[hitl] failed to send bot outcome message: call_id=%s", call_id) @@ -211,12 +187,7 @@ async def apply_filter_check( tool_name: str, args_str: str, ) -> tuple[dict[str, str] | None, dict[str, Any]]: - """Run a Prompt Shield content-safety check. - - Returns ``(decision | None, shield_result_info)``. When ``decision`` - is ``None`` the content passed the filter and the caller should - continue with the next step. - """ + """Run a Prompt Shield content-safety check.""" import time as _time t0 = _time.monotonic() diff --git a/app/runtime/agent/one_shot.py b/app/runtime/agent/one_shot.py index de50546..430cbb6 100644 --- a/app/runtime/agent/one_shot.py +++ b/app/runtime/agent/one_shot.py @@ -1,8 +1,4 @@ -"""One-shot Copilot session runner. - -Spawns an ephemeral CopilotClient to execute a single prompt. Used by the -scheduler and memory formation to avoid re-using the interactive session. -""" +"""Ephemeral one-shot Copilot session runner.""" from __future__ import annotations diff --git a/app/runtime/agent/phone_verify.py b/app/runtime/agent/phone_verify.py index c02b28c..2e75735 100644 --- a/app/runtime/agent/phone_verify.py +++ b/app/runtime/agent/phone_verify.py @@ -76,10 +76,8 @@ async def request_verification( cleanup = _register_verify_tools(middleware, call_id, self) logger.info( - "[phone_verify] setting exclusive prompt: call_id=%s tool=%s " - "tools=%d prompt_len=%d opening_len=%d", - call_id, tool_name, len(VERIFY_TOOL_SCHEMAS), - len(prompt), len(opening), + "[phone_verify] setting exclusive prompt: call_id=%s tool=%s", + call_id, tool_name, ) middleware.set_pending_prompt( prompt, @@ -91,16 +89,9 @@ async def request_verification( target = cfg.voice_target_number try: caller = voice_handler._caller - logger.info( - "[phone_verify] initiating call: number=%s tool=%s call_id=%s", - target, tool_name, call_id, - ) + logger.info("[phone_verify] initiating call: number=%s tool=%s", target, tool_name) await caller.initiate_call(target) - logger.info( - "[phone_verify] call initiated successfully, waiting for " - "decision: call_id=%s", - call_id, - ) + logger.info("[phone_verify] call initiated, waiting for decision: call_id=%s", call_id) except Exception: logger.exception("[phone_verify] call initiation failed") self._pending.pop(call_id, None) @@ -110,20 +101,13 @@ async def request_verification( try: approved = await asyncio.wait_for(future, timeout=_PHONE_VERIFY_TIMEOUT) except TimeoutError: - logger.warning( - "[phone_verify] timed out waiting for decision: call_id=%s", call_id, - ) + logger.warning("[phone_verify] timed out: call_id=%s", call_id) approved = False finally: self._pending.pop(call_id, None) cleanup() - logger.info( - "[phone_verify] cleaned up verify tools: call_id=%s", call_id, - ) - logger.info( - "[phone_verify] decision: call_id=%s approved=%s", call_id, approved, - ) + logger.info("[phone_verify] decision: call_id=%s approved=%s", call_id, approved) return approved def resolve(self, call_id: str, approved: bool) -> bool: diff --git a/app/runtime/agent/policy_bridge.py b/app/runtime/agent/policy_bridge.py index 24d1708..9c156a6 100644 --- a/app/runtime/agent/policy_bridge.py +++ b/app/runtime/agent/policy_bridge.py @@ -1,13 +1,4 @@ -"""Bridge between GuardrailsConfig and the agent-policy-guard PolicyEngine. - -Converts the runtime's guardrails configuration (nested dicts, presets, -model columns) into an agent-policy YAML document and evaluates tool -invocations through the guard ``PolicyEngine``. - -The guard library is imported through a single top-level import so that -swapping to an externally-installed package later requires no code changes -here. -""" +"""Bridge between GuardrailsConfig and the agent-policy-guard PolicyEngine.""" from __future__ import annotations @@ -17,10 +8,6 @@ import yaml -# ── Guard library imports ──────────────────────────────────────────────── -# Published at https://github.com/agent-policy/guard (Python SDK under -# ``python/`` subdirectory). Installed via the Git dependency in -# ``pyproject.toml``. from agent_policy_guard import ( EvalContext, PolicyEngine, @@ -30,17 +17,11 @@ logger = logging.getLogger(__name__) -# Priority bands for generated policies. Lower number = higher priority. -# Cascade: model_policies > tool_policies > context_defaults > rules > global. -# Model policies are most specific (model + context + tool) so they win. -# -# Bands are spaced 10 000 apart so that even with thousands of policies per -# band (e.g. 8 models × 2 contexts × 40 tools = 640), counters never bleed -# into the next band. -_PRIORITY_MODEL_TOOL = 10_000 # model + context + tool (most specific, wins) -_PRIORITY_CTX_TOOL = 20_000 # context + tool -_PRIORITY_CTX_DEFAULT = 30_000 # context catch-all (beats rules) -_PRIORITY_RULE = 80_000 # legacy rules (lowest explicit policy) +# Priority bands: model_policies > tool_policies > context_defaults > rules. +_PRIORITY_MODEL_TOOL = 10_000 +_PRIORITY_CTX_TOOL = 20_000 +_PRIORITY_CTX_DEFAULT = 30_000 +_PRIORITY_RULE = 80_000 # Background agent IDs that fall back to "background" context. _BG_AGENT_IDS = frozenset({ @@ -64,15 +45,7 @@ def config_to_yaml( model_policies: dict[str, dict[str, dict[str, str]]], rules: list[dict[str, Any]] | None = None, ) -> str: - """Convert a guardrails config into an agent-policy YAML string. - - The generated document is a valid ``PolicySet`` that can be loaded by - the guard library. The conversion is deterministic: same input always - produces the same YAML. - - When ``hitl_enabled`` is ``False`` no policies are emitted so the - engine returns ``"allow"`` for every tool call. - """ + """Convert a guardrails config into an agent-policy YAML string.""" # ── Short-circuit: guardrails disabled → everything allowed ── if not hitl_enabled: doc: dict[str, Any] = { @@ -90,7 +63,7 @@ def config_to_yaml( policies: list[dict[str, Any]] = [] priority_counter = _PRIORITY_MODEL_TOOL - # ── 1. Model-scoped tool policies (highest priority) ───────── + # ── 1. Model-scoped tool policies ───────────────────────── for model in sorted(model_columns): ctx_map = model_policies.get(model, {}) for ctx in sorted(ctx_map): @@ -107,10 +80,9 @@ def config_to_yaml( }) priority_counter += 1 - # Reset counter for next band priority_counter = _PRIORITY_CTX_TOOL - # ── 2. Context-scoped tool policies ────────────────────────── + # ── 2. Context-scoped tool policies ────────────────────── for ctx in sorted(tool_policies): tool_map = tool_policies[ctx] for tool in sorted(tool_map): @@ -123,7 +95,7 @@ def config_to_yaml( }) priority_counter += 1 - # ── 3. Legacy rules ────────────────────────────────────────── + # ── 3. Legacy rules ────────────────────────────────────── if rules: priority_counter = max(priority_counter, _PRIORITY_RULE) for rule in rules: @@ -151,7 +123,7 @@ def config_to_yaml( policies[-1]["channel"] = "phone" priority_counter += 1 - # ── 4. Context-level defaults ──────────────────────────────── + # ── 4. Context-level defaults ──────────────────────────── priority_counter = _PRIORITY_CTX_DEFAULT for ctx in sorted(context_defaults): effect = context_defaults[ctx] @@ -163,13 +135,13 @@ def config_to_yaml( }) priority_counter += 1 - # ── Context fallbacks for background agents ────────────────── + # ── Context fallbacks for background agents ── context_fallbacks: dict[str, str] = {} for agent_id in sorted(_BG_AGENT_IDS): if agent_id != "background": context_fallbacks[agent_id] = "background" - # ── Determine effective default ────────────────────────────── + # ── Effective default ── effective_default = default_action if hitl_enabled else "allow" doc: dict[str, Any] = { @@ -190,12 +162,7 @@ def config_to_yaml( def yaml_to_config(yaml_text: str) -> dict[str, Any]: - """Parse an agent-policy YAML string back into guardrails config fields. - - Returns a dict with ``context_defaults``, ``tool_policies``, - ``model_policies``, ``model_columns``, ``default_action``, and - ``default_channel`` that can be applied to the GuardrailsConfig. - """ + """Parse an agent-policy YAML string back into guardrails config fields.""" ps = load_policy_set_from_str(yaml_text) default_action = ps.defaults.effect.value diff --git a/app/runtime/agent/tools/cards.py b/app/runtime/agent/tools/cards.py index bfbba5f..1c68ab3 100644 --- a/app/runtime/agent/tools/cards.py +++ b/app/runtime/agent/tools/cards.py @@ -1,8 +1,4 @@ -"""Card tool definitions for the Copilot agent. - -Wraps the card queue and attachment builders from the messaging layer -into ``@define_tool`` functions that the LLM can invoke. -""" +"""Card tool definitions for the Copilot agent.""" from __future__ import annotations @@ -35,12 +31,7 @@ class HeroCardParams(BaseModel): buttons: str = Field(default="[]", description="JSON array of button objects.") -class ThumbnailCardParams(BaseModel): - title: str = Field(default="", description="Card title") - subtitle: str = Field(default="", description="Card subtitle") - text: str = Field(default="", description="Card body text") - image_url: str | None = Field(default=None, description="URL of the thumbnail image") - buttons: str = Field(default="[]", description="JSON array of button objects.") +ThumbnailCardParams = HeroCardParams class CardCarouselParams(BaseModel): @@ -62,22 +53,25 @@ def send_adaptive_card(params: AdaptiveCardParams) -> dict: return {"status": "queued", "fallback_text": params.fallback_text, "elements": len(card_data.get("body", []))} +def _parse_buttons(raw: str | list) -> list: + if isinstance(raw, str): + try: + return json.loads(raw) if raw else [] + except json.JSONDecodeError: + return [] + return raw + + @define_tool(description="Send a Hero Card with large image, title, and action buttons.") def send_hero_card(params: HeroCardParams) -> dict: - try: - buttons = json.loads(params.buttons) if params.buttons else [] - except json.JSONDecodeError: - buttons = [] + buttons = _parse_buttons(params.buttons) _default_queue.enqueue(_hero_card_attachment(title=params.title, subtitle=params.subtitle, text=params.text, image_url=params.image_url, buttons=buttons)) return {"status": "queued", "title": params.title} @define_tool(description="Send a Thumbnail Card with smaller image and compact layout.") def send_thumbnail_card(params: ThumbnailCardParams) -> dict: - try: - buttons = json.loads(params.buttons) if params.buttons else [] - except json.JSONDecodeError: - buttons = [] + buttons = _parse_buttons(params.buttons) _default_queue.enqueue(_thumbnail_card_attachment(title=params.title, subtitle=params.subtitle, text=params.text, image_url=params.image_url, buttons=buttons)) return {"status": "queued", "title": params.title} @@ -91,21 +85,17 @@ def send_card_carousel(params: CardCarouselParams) -> dict: if not isinstance(cards, list): return {"error": "cards_json must be a JSON array."} + _CARD_BUILDERS = { + "adaptive": lambda c: _adaptive_card_attachment(c), + "hero": lambda c: _hero_card_attachment(title=c.get("title", ""), subtitle=c.get("subtitle", ""), text=c.get("text", ""), image_url=c.get("image_url"), buttons=_parse_buttons(c.get("buttons", []))), + "thumbnail": lambda c: _thumbnail_card_attachment(title=c.get("title", ""), subtitle=c.get("subtitle", ""), text=c.get("text", ""), image_url=c.get("image_url"), buttons=_parse_buttons(c.get("buttons", []))), + } + count = 0 for card in cards: card_type = card.pop("type", "hero") - if card_type == "adaptive": - _default_queue.enqueue(_adaptive_card_attachment(card)) - elif card_type == "thumbnail": - buttons = card.get("buttons", []) - if isinstance(buttons, str): - buttons = json.loads(buttons) - _default_queue.enqueue(_thumbnail_card_attachment(title=card.get("title", ""), subtitle=card.get("subtitle", ""), text=card.get("text", ""), image_url=card.get("image_url"), buttons=buttons)) - else: - buttons = card.get("buttons", []) - if isinstance(buttons, str): - buttons = json.loads(buttons) - _default_queue.enqueue(_hero_card_attachment(title=card.get("title", ""), subtitle=card.get("subtitle", ""), text=card.get("text", ""), image_url=card.get("image_url"), buttons=buttons)) + builder = _CARD_BUILDERS.get(card_type, _CARD_BUILDERS["hero"]) + _default_queue.enqueue(builder(card)) count += 1 return {"status": "queued", "card_count": count} diff --git a/app/runtime/config/settings.py b/app/runtime/config/settings.py index 1dc73a6..5bf15ab 100644 --- a/app/runtime/config/settings.py +++ b/app/runtime/config/settings.py @@ -5,7 +5,6 @@ import enum import os import secrets -from dataclasses import dataclass, field from pathlib import Path from typing import ClassVar @@ -32,39 +31,6 @@ class ServerMode(enum.Enum): runtime = "runtime" -@dataclass -class BotConfig: - resource_group: str = "polyclaw-rg" - location: str = "eastus" - display_name: str = "polyclaw" - bot_handle: str = "" - - -@dataclass -class VoiceConfig: - acs_connection_string: str = "" - acs_source_number: str = "" - voice_target_number: str = "" - azure_openai_endpoint: str = "" - azure_openai_api_key: str = "" - azure_openai_realtime_deployment: str = "" - acs_callback_token: str = "" - acs_resource_id: str = "" - - -@dataclass -class AdminConfig: - port: int = 8000 - lockdown_mode: bool = False - tunnel_restricted: bool = False - - -@dataclass -class ModelConfig: - copilot_model: str = "gpt-4.1" - copilot_agent: str = "" - - class Settings: _DATA_DIR_ENV: ClassVar[str] = "POLYCLAW_DATA_DIR" @@ -84,7 +50,11 @@ def __init__(self) -> None: def reload(self) -> None: e = self._read - raw_mode = os.getenv("POLYCLAW_SERVER_MODE", "combined").lower() + raw_mode = ( + os.getenv("POLYCLAW_SERVER_MODE") + or os.getenv("POLYCLAW_MODE") + or "combined" + ).lower() try: self.server_mode: ServerMode = ServerMode(raw_mode) except ValueError: @@ -97,8 +67,6 @@ def reload(self) -> None: self.copilot_model: str = e("COPILOT_MODEL") or "gpt-4.1" self.copilot_agent: str = e("COPILOT_AGENT") or "" - - # Foundry (BYOK) configuration self.foundry_endpoint: str = e("FOUNDRY_ENDPOINT") self.foundry_name: str = e("FOUNDRY_NAME") self.foundry_resource_group: str = e("FOUNDRY_RESOURCE_GROUP") @@ -112,9 +80,7 @@ def reload(self) -> None: self.voice_target_number: str = e("VOICE_TARGET_NUMBER") self.azure_openai_endpoint: str = e("AZURE_OPENAI_ENDPOINT") self.azure_openai_api_key: str = e("AZURE_OPENAI_API_KEY") - self.azure_openai_realtime_deployment: str = ( - e("AZURE_OPENAI_REALTIME_DEPLOYMENT") or "gpt-realtime-mini" - ) + self.azure_openai_realtime_deployment: str = e("AZURE_OPENAI_REALTIME_DEPLOYMENT") or "gpt-realtime-mini" self._acs_callback_token = e("ACS_CALLBACK_TOKEN") or secrets.token_urlsafe(32) self.acs_resource_id: str = self._derive_acs_resource_id() @@ -123,7 +89,9 @@ def reload(self) -> None: self.memory_model: str = e("MEMORY_MODEL") or "gpt-4.1" self.memory_idle_minutes: int = int(e("MEMORY_IDLE_MINUTES") or "5") - self.proactive_enabled: bool = e("PROACTIVE_ENABLED").lower() in ("1", "true", "yes") if e("PROACTIVE_ENABLED") else False + self.proactive_enabled: bool = e("PROACTIVE_ENABLED").lower() in ( + "1", "true", "yes", + ) if e("PROACTIVE_ENABLED") else False self.runtime_sp_app_id: str = e("RUNTIME_SP_APP_ID") self.runtime_sp_password: str = e("RUNTIME_SP_PASSWORD") @@ -161,10 +129,6 @@ def memory_daily_dir(self) -> Path: def memory_topics_dir(self) -> Path: return self.memory_dir / "topics" - @property - def skills_dir(self) -> Path: - return self.data_dir / "skills" - @property def user_skills_dir(self) -> Path: return self.data_dir / "skills" @@ -255,7 +219,7 @@ def ensure_dirs(self) -> None: self.memory_dir, self.memory_daily_dir, self.memory_topics_dir, - self.skills_dir, + self.user_skills_dir, self.sessions_dir, ): d.mkdir(parents=True, exist_ok=True) diff --git a/app/runtime/env_cli.py b/app/runtime/env_cli.py index 75dbacb..5edba2d 100644 --- a/app/runtime/env_cli.py +++ b/app/runtime/env_cli.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse +import functools import json import sys from dataclasses import asdict @@ -13,37 +14,44 @@ from .state.deploy_state import DeployStateStore -def _bold(text: str) -> str: - return f"\033[1m{text}\033[0m" +def _ansi(code: int, text: str) -> str: + return f"\033[{code}m{text}\033[0m" -def _red(text: str) -> str: - return f"\033[31m{text}\033[0m" +_bold = functools.partial(_ansi, 1) +_red = functools.partial(_ansi, 31) +_green = functools.partial(_ansi, 32) +_yellow = functools.partial(_ansi, 33) +_cyan = functools.partial(_ansi, 36) +_SEVERITY_COLORS = {"critical": _red, "high": _red, "medium": _yellow, "low": _cyan, "info": _green} -def _green(text: str) -> str: - return f"\033[32m{text}\033[0m" +def _severity_color(severity: str) -> str: + fn = _SEVERITY_COLORS.get(severity) + return fn(severity.upper()) if fn else severity.upper() -def _yellow(text: str) -> str: - return f"\033[33m{text}\033[0m" +def _status_color(status: str) -> str: + return {"active": _green, "destroyed": _red}.get(status, _yellow)(status) -def _cyan(text: str) -> str: - return f"\033[36m{text}\033[0m" +def _require_deploy(store: DeployStateStore, deploy_id: str) -> object: + rec = store.get(deploy_id) + if not rec: + print(f"Deployment '{deploy_id}' not found.") + sys.exit(1) + return rec -def _severity_color(severity: str) -> str: - colors = {"critical": _red, "high": _red, "medium": _yellow, "low": _cyan, "info": _green} - return colors.get(severity, str)(severity.upper()) +def _confirm(prompt: str) -> bool: + return input(f"{prompt} [y/N] ").strip().lower() in ("y", "yes") -def _status_color(status: str) -> str: - if status == "active": - return _green(status) - if status == "destroyed": - return _red(status) - return _yellow(status) + +def _make_tracker() -> tuple: + az = AzureCLI() + store = DeployStateStore() + return az, store, ResourceTracker(az, store) def cmd_list(_args: argparse.Namespace) -> None: @@ -67,22 +75,18 @@ def cmd_list(_args: argparse.Namespace) -> None: def cmd_show(args: argparse.Namespace) -> None: store = DeployStateStore() - rec = store.get(args.deploy_id) - if not rec: - print(f"Deployment '{args.deploy_id}' not found.") - sys.exit(1) + rec = _require_deploy(store, args.deploy_id) if args.json: print(json.dumps(asdict(rec), indent=2)) return - print(f"{_bold('Deploy ID:')} {rec.deploy_id}") - print(f"{_bold('Tag:')} {rec.tag}") - print(f"{_bold('Kind:')} {rec.kind}") - print(f"{_bold('Status:')} {_status_color(rec.status)}") - print(f"{_bold('Created:')} {rec.created_at}") - print(f"{_bold('Updated:')} {rec.updated_at}") - print(f"{_bold('RGs:')} {', '.join(rec.resource_groups) or '-'}") + for label, val in [ + ("Deploy ID:", rec.deploy_id), ("Tag:", rec.tag), ("Kind:", rec.kind), + ("Status:", _status_color(rec.status)), ("Created:", rec.created_at), + ("Updated:", rec.updated_at), ("RGs:", ", ".join(rec.resource_groups) or "-"), + ]: + print(f"{_bold(f'{label:<12}')} {val}") print() if rec.resources: @@ -101,9 +105,7 @@ def cmd_show(args: argparse.Namespace) -> None: def cmd_audit(args: argparse.Namespace) -> None: - az = AzureCLI() - store = DeployStateStore() - tracker = ResourceTracker(az, store) + _az, _store, tracker = _make_tracker() print("Scanning Azure subscription for resources...") result = tracker.audit() @@ -149,18 +151,12 @@ def cmd_misconfig(args: argparse.Namespace) -> None: store = DeployStateStore() checker = MisconfigChecker(az) - resource_groups: list[str] = [] if args.deploy_id: - rec = store.get(args.deploy_id) - if not rec: - print(f"Deployment '{args.deploy_id}' not found.") - sys.exit(1) + rec = _require_deploy(store, args.deploy_id) resource_groups = rec.resource_groups print(f"Scanning resource groups for deployment {args.deploy_id}...") else: - for rec in store.all_deployments.values(): - resource_groups.extend(rec.resource_groups) - resource_groups = list(set(resource_groups)) + resource_groups = list({rg for r in store.all_deployments.values() for rg in r.resource_groups}) print(f"Scanning all tracked resource groups ({len(resource_groups)})...") if not resource_groups: @@ -193,21 +189,14 @@ def cmd_misconfig(args: argparse.Namespace) -> None: def cmd_cleanup(args: argparse.Namespace) -> None: - az = AzureCLI() - store = DeployStateStore() - tracker = ResourceTracker(az, store) - - rec = store.get(args.deploy_id) - if not rec: - print(f"Deployment '{args.deploy_id}' not found.") - sys.exit(1) + _az, store, tracker = _make_tracker() + rec = _require_deploy(store, args.deploy_id) if not args.yes: print(f"This will delete all Azure resource groups for deployment {args.deploy_id}:") for rg in rec.resource_groups: print(f" - {rg}") - answer = input("Proceed? [y/N] ").strip().lower() - if answer not in ("y", "yes"): + if not _confirm("Proceed?"): print("Aborted.") return @@ -227,9 +216,7 @@ def cmd_remove(args: argparse.Namespace) -> None: def cmd_cleanup_orphans(args: argparse.Namespace) -> None: - az = AzureCLI() - store = DeployStateStore() - tracker = ResourceTracker(az, store) + _az, _store, tracker = _make_tracker() print("Running audit to find orphaned resource groups...") result = tracker.audit() @@ -243,8 +230,7 @@ def cmd_cleanup_orphans(args: argparse.Namespace) -> None: print(f" - {g.name}") if not args.yes: - answer = input("Delete all orphaned resource groups? [y/N] ").strip().lower() - if answer not in ("y", "yes"): + if not _confirm("Delete all orphaned resource groups?"): print("Aborted.") return diff --git a/app/runtime/keyvault_resolve.py b/app/runtime/keyvault_resolve.py index 6baf1cf..c63f4c6 100644 --- a/app/runtime/keyvault_resolve.py +++ b/app/runtime/keyvault_resolve.py @@ -15,25 +15,15 @@ def main() -> None: if not kv.enabled: return - refs: dict[str, str] = {} for key, value in os.environ.items(): - if is_kv_ref(value): - refs[key] = value - - if not refs: - return - - resolved: dict[str, str] = {} - failed: list[str] = [] - for key, value in refs.items(): + if not is_kv_ref(value): + continue try: result = kv.resolve({key: value}) - resolved.update(result) + for rk, rv in result.items(): + print(f"export {rk}={shlex.quote(rv)}") except Exception: - failed.append(key) - - for key, value in resolved.items(): - print(f"export {key}={shlex.quote(value)}") + pass if __name__ == "__main__": diff --git a/app/runtime/media/classify.py b/app/runtime/media/classify.py index c237ea4..7898cae 100644 --- a/app/runtime/media/classify.py +++ b/app/runtime/media/classify.py @@ -21,18 +21,11 @@ ".mov": "video/quicktime", } -_IMAGE_TYPES = {v for v in EXTENSION_TO_MIME.values() if v.startswith("image/")} -_AUDIO_TYPES = {v for v in EXTENSION_TO_MIME.values() if v.startswith("audio/")} -_VIDEO_TYPES = {v for v in EXTENSION_TO_MIME.values() if v.startswith("video/")} +_KNOWN_MIMES = frozenset(EXTENSION_TO_MIME.values()) def classify(content_type: str) -> str: """Return ``'image'``, ``'audio'``, ``'video'``, or ``'file'``.""" mime = content_type.lower().split(";")[0].strip() - if mime in _IMAGE_TYPES: - return "image" - if mime in _AUDIO_TYPES: - return "audio" - if mime in _VIDEO_TYPES: - return "video" - return "file" + prefix = mime.split("/")[0] + return prefix if prefix in ("image", "audio", "video") and mime in _KNOWN_MIMES else "file" diff --git a/app/runtime/media/incoming.py b/app/runtime/media/incoming.py index 19721f1..64850a5 100644 --- a/app/runtime/media/incoming.py +++ b/app/runtime/media/incoming.py @@ -42,11 +42,9 @@ async def download_attachment(attachment: Attachment, channel_id: str) -> dict | try: await run_sync(_sync_download, url, str(local_path)) - content_type = ( - attachment.content_type - or mimetypes.guess_type(name)[0] - or "application/octet-stream" - ) + content_type = (attachment.content_type + or mimetypes.guess_type(name)[0] + or "application/octet-stream") return { "filename": name, "local_path": str(local_path), diff --git a/app/runtime/media/outgoing.py b/app/runtime/media/outgoing.py index 97fb736..3ad251b 100644 --- a/app/runtime/media/outgoing.py +++ b/app/runtime/media/outgoing.py @@ -32,16 +32,17 @@ def _try_resize_image(entry: Path, max_bytes: int) -> bool: logger.warning("Pillow not installed -- cannot auto-resize images") return False + _FORMAT_MAP = { + ".png": ("PNG", ".png"), ".webp": ("WEBP", ".webp"), + } + try: img = Image.open(entry) - output_format = "JPEG" - output_ext = ".jpg" - if entry.suffix.lower() == ".png" and img.mode == "RGBA": - output_format = "PNG" - output_ext = ".png" - elif entry.suffix.lower() == ".webp": - output_format = "WEBP" - output_ext = ".webp" + ext_lower = entry.suffix.lower() + if ext_lower == ".png" and img.mode != "RGBA": + output_format, output_ext = "JPEG", ".jpg" + else: + output_format, output_ext = _FORMAT_MAP.get(ext_lower, ("JPEG", ".jpg")) if img.mode not in ("RGB", "RGBA"): img = img.convert("RGB") @@ -114,6 +115,8 @@ def collect_pending_outgoing() -> list[Attachment]: continue if file_size > MAX_OUTGOING_FILE_BYTES: + too_large = (f"File too large: {file_size:,} bytes " + f"(limit is {MAX_OUTGOING_FILE_BYTES:,} bytes / ~190 KB).") if _try_resize_image(entry, MAX_OUTGOING_FILE_BYTES): resized = next( (c for c in entry.parent.glob(f"{entry.stem}.*") @@ -124,10 +127,10 @@ def collect_pending_outgoing() -> list[Attachment]: entry = resized file_size = entry.stat().st_size else: - _move_to_error(entry, _too_large_msg(file_size, "Auto-resize produced no output.")) + _move_to_error(entry, f"{too_large} Auto-resize produced no output.") continue else: - _move_to_error(entry, _too_large_msg(file_size)) + _move_to_error(entry, too_large) continue content_type = ( @@ -157,16 +160,6 @@ def collect_pending_outgoing() -> list[Attachment]: return attachments -def _too_large_msg(file_size: int, extra: str = "") -> str: - msg = ( - f"File too large: {file_size:,} bytes " - f"(limit is {MAX_OUTGOING_FILE_BYTES:,} bytes / ~190 KB)." - ) - if extra: - msg += f" {extra}" - return msg - - def move_attachments_to_error(attachments: list[Attachment], reason: str) -> None: error_dir = cfg.media_outgoing_error_dir error_dir.mkdir(parents=True, exist_ok=True) diff --git a/app/runtime/messaging/bot.py b/app/runtime/messaging/bot.py index 7c944f5..28ceb96 100644 --- a/app/runtime/messaging/bot.py +++ b/app/runtime/messaging/bot.py @@ -1,8 +1,4 @@ -"""Bot Framework ActivityHandler -- routes channel messages to the Agent. - -Uses background processing + proactive messaging so the Bot Framework -webhook returns within the 15-second timeout. -""" +"""Bot Framework ActivityHandler -- routes channel messages to the Agent.""" from __future__ import annotations @@ -97,27 +93,15 @@ async def _handle_message(self, turn_context: TurnContext) -> None: if not user_text and not media_attachments: return - # If there is a pending HITL approval from a scheduled task, - # resolve it first so the background session can continue. - if self._scheduler and self._scheduler.has_pending_approval and user_text: - resolved = self._scheduler.resolve_pending_approval(user_text) - if resolved: - logger.info( - "[bot] resolved pending SCHEDULER approval with text=%r", - user_text[:60], - ) - return - - # If there is a pending HITL approval, resolve it with the user's text - # instead of starting a new agent turn. - if self._hitl and self._hitl.has_pending_approval and user_text: - resolved = self._hitl.resolve_bot_reply(user_text) - if resolved: - logger.info( - "[bot] resolved pending HITL approval with text=%r", - user_text[:60], - ) - return + # If there is a pending HITL approval (scheduler or direct), resolve it. + if user_text: + for source, obj in (("SCHEDULER", self._scheduler), ("HITL", self._hitl)): + if obj and obj.has_pending_approval: + resolve = (obj.resolve_pending_approval if source == "SCHEDULER" + else obj.resolve_bot_reply) + if resolve(user_text): + logger.info("[bot] resolved pending %s approval with text=%r", source, user_text[:60]) + return channel = (turn_context.activity.channel_id or "unknown").lower() @@ -176,9 +160,8 @@ def _is_authorized(turn_context: TurnContext) -> bool: channel = (turn_context.activity.channel_id or "").lower() if channel != "telegram" or not cfg.telegram_whitelist: return True - sender_id = ( - turn_context.activity.from_property.id if turn_context.activity.from_property else "" - ) + sender_id = (turn_context.activity.from_property.id + if turn_context.activity.from_property else "") if sender_id not in cfg.telegram_whitelist: logger.warning("Blocked Telegram user %s (not in whitelist)", sender_id) return False diff --git a/app/runtime/messaging/cards.py b/app/runtime/messaging/cards.py index 7dc5a89..ec0ab22 100644 --- a/app/runtime/messaging/cards.py +++ b/app/runtime/messaging/cards.py @@ -1,8 +1,4 @@ -"""Rich card support -- Adaptive Cards, Hero Cards, and carousels. - -Cards are queued in-memory and drained by the bot / proactive messaging -layer when the response is delivered. Thread-safe via internal lock. -""" +"""Rich card support -- Adaptive Cards, Hero Cards, and carousels.""" from __future__ import annotations @@ -103,24 +99,12 @@ def _simple_card_attachment( return Attachment(content_type=content_type, content=card) -def _hero_card_attachment( - title: str = "", - subtitle: str = "", - text: str = "", - image_url: str | None = None, - buttons: list[dict] | None = None, -) -> Attachment: - return _simple_card_attachment(HeroCard, "application/vnd.microsoft.card.hero", title, subtitle, text, image_url, buttons) +def _hero_card_attachment(**kwargs: Any) -> Attachment: + return _simple_card_attachment(HeroCard, "application/vnd.microsoft.card.hero", **kwargs) -def _thumbnail_card_attachment( - title: str = "", - subtitle: str = "", - text: str = "", - image_url: str | None = None, - buttons: list[dict] | None = None, -) -> Attachment: - return _simple_card_attachment(ThumbnailCard, "application/vnd.microsoft.card.thumbnail", title, subtitle, text, image_url, buttons) +def _thumbnail_card_attachment(**kwargs: Any) -> Attachment: + return _simple_card_attachment(ThumbnailCard, "application/vnd.microsoft.card.thumbnail", **kwargs) # -- serialization --------------------------------------------------------- diff --git a/app/runtime/messaging/commands/__init__.py b/app/runtime/messaging/commands/__init__.py index 209745d..e8e9c50 100644 --- a/app/runtime/messaging/commands/__init__.py +++ b/app/runtime/messaging/commands/__init__.py @@ -1,11 +1,4 @@ -"""Slash-command dispatcher and command implementations. - -Sub-modules group commands by domain: - -- ``agent`` -- skills, plugins, MCP, schedules -- ``session`` -- session lifecycle and model switching -- ``system`` -- status, infra, and connectivity commands -""" +"""Slash-command dispatcher and command implementations.""" from ._dispatcher import ( ChannelContext, diff --git a/app/runtime/messaging/commands/_dispatcher.py b/app/runtime/messaging/commands/_dispatcher.py index a8beee3..d4dc459 100644 --- a/app/runtime/messaging/commands/_dispatcher.py +++ b/app/runtime/messaging/commands/_dispatcher.py @@ -1,8 +1,4 @@ -"""Shared slash-command dispatcher. - -Centralises all slash-command logic so both the Bot Framework handler -and the WebSocket chat handler share a single implementation. -""" +"""Shared slash-command dispatcher.""" from __future__ import annotations @@ -40,40 +36,32 @@ class CommandContext: channel_ctx: ChannelContext | None = None +# Maps command name -> (module, function_name) +_CMD_TABLE: dict[str, tuple[object, str]] = {} + + +def _register(module: object, *names: str) -> None: + for name in names: + _CMD_TABLE[name] = (module, f"cmd_{name.lstrip('/')}") + + +def _init_commands() -> None: + _register(_session_cmds, "/new", "/model", "/models", "/session", + "/sessions", "/change", "/clear") + _register(_agent_cmds, "/skills", "/addskill", "/removeskill", + "/plugins", "/plugin", "/mcp", "/schedules", "/schedule") + _register(_system_cmds, "/status", "/channels", "/profile", "/config", + "/preflight", "/phone", "/call", "/lockdown", "/help") + + +# Sub-command routing: /sessions clear -> cmd_sessions_sub, /session delete -> cmd_session_sub +_SUB_DISPATCH: dict[str, tuple[object, str]] = { + "/sessions": (_session_cmds, "cmd_sessions_sub"), + "/session": (_session_cmds, "cmd_session_sub"), +} + + class CommandDispatcher: - _EXACT_COMMANDS: dict[str, str] = { - "/new": "_cmd_new", - "/status": "_cmd_status", - "/skills": "_cmd_skills", - "/session": "_cmd_session", - "/channels": "_cmd_channels", - "/clear": "_cmd_clear", - "/help": "_cmd_help", - "/plugins": "_cmd_plugins", - "/mcp": "_cmd_mcp", - "/schedules": "_cmd_schedules", - "/sessions": "_cmd_sessions", - "/profile": "_cmd_profile", - "/config": "_cmd_config", - "/preflight": "_cmd_preflight", - "/call": "_cmd_call", - "/models": "_cmd_models", - "/change": "_cmd_change", - } - - _PREFIX_COMMANDS: tuple[tuple[str, str], ...] = ( - ("/removeskill", "_cmd_removeskill"), - ("/addskill", "_cmd_addskill"), - ("/model", "_cmd_model"), - ("/plugin", "_cmd_plugin"), - ("/mcp", "_cmd_mcp"), - ("/schedule", "_cmd_schedule"), - ("/sessions", "_cmd_sessions_sub"), - ("/session", "_cmd_session_sub"), - ("/config", "_cmd_config"), - ("/phone", "_cmd_phone"), - ("/lockdown", "_cmd_lockdown"), - ) def __init__( self, @@ -84,6 +72,8 @@ def __init__( self._agent = agent self._session_store = session_store self._infra = infra + if not _CMD_TABLE: + _init_commands() @property def infra(self) -> InfraConfigStore: @@ -102,98 +92,25 @@ async def try_handle( lower = text.lower() ctx = CommandContext(text=text, reply=reply, channel=channel, channel_ctx=channel_ctx) - handler_name = self._EXACT_COMMANDS.get(lower) - if handler_name: - await getattr(self, handler_name)(ctx) + # Check for sub-commands first (e.g. "/sessions clear") + for prefix, (mod, fn_name) in _SUB_DISPATCH.items(): + if lower.startswith(prefix + " "): + parts = lower.split(None, 2) + if len(parts) >= 2: + await getattr(mod, fn_name)(self, ctx) + return True + + # Exact match + entry = _CMD_TABLE.get(lower) + if entry: + mod, fn_name = entry + await getattr(mod, fn_name)(self, ctx) return True - for prefix, handler_name in self._PREFIX_COMMANDS: - if lower.startswith(prefix): - await getattr(self, handler_name)(ctx) + # Prefix match (e.g. "/model gpt-4o" matches "/model") + for prefix, (mod, fn_name) in _CMD_TABLE.items(): + if lower.startswith(prefix + " "): + await getattr(mod, fn_name)(self, ctx) return True return False - - # -- Session & model commands (delegated to commands_session) ----------- - - async def _cmd_new(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_new(self, ctx) - - async def _cmd_model(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_model(self, ctx) - - async def _cmd_models(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_models(self, ctx) - - async def _cmd_session(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_session(self, ctx) - - async def _cmd_sessions(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_sessions(self, ctx) - - async def _cmd_sessions_sub(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_sessions_sub(self, ctx) - - async def _cmd_session_sub(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_session_sub(self, ctx) - - async def _cmd_change(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_change(self, ctx) - - async def _cmd_clear(self, ctx: CommandContext) -> None: - await _session_cmds.cmd_clear(self, ctx) - - # -- Agent commands (delegated to commands_agent) ---------------------- - - async def _cmd_skills(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_skills(self, ctx) - - async def _cmd_addskill(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_addskill(self, ctx) - - async def _cmd_removeskill(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_removeskill(self, ctx) - - async def _cmd_plugins(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_plugins(self, ctx) - - async def _cmd_plugin(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_plugin(self, ctx) - - async def _cmd_mcp(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_mcp(self, ctx) - - async def _cmd_schedules(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_schedules(self, ctx) - - async def _cmd_schedule(self, ctx: CommandContext) -> None: - await _agent_cmds.cmd_schedule(self, ctx) - - # -- System commands (delegated to commands_system) -------------------- - - async def _cmd_status(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_status(self, ctx) - - async def _cmd_channels(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_channels(self, ctx) - - async def _cmd_profile(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_profile(self, ctx) - - async def _cmd_config(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_config(self, ctx) - - async def _cmd_preflight(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_preflight(self, ctx) - - async def _cmd_phone(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_phone(self, ctx) - - async def _cmd_call(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_call(self, ctx) - - async def _cmd_lockdown(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_lockdown(self, ctx) - - async def _cmd_help(self, ctx: CommandContext) -> None: - await _system_cmds.cmd_help(self, ctx) diff --git a/app/runtime/messaging/message_processor.py b/app/runtime/messaging/message_processor.py index ab08992..394c417 100644 --- a/app/runtime/messaging/message_processor.py +++ b/app/runtime/messaging/message_processor.py @@ -115,12 +115,8 @@ async def _send_proactive_reply( text = response or "(no response)" outgoing = extract_outgoing_attachments(text) if response else [] - pending = collect_pending_outgoing() - if pending: - outgoing.extend(pending) - card_attachments = drain_pending_cards() - if card_attachments: - outgoing.extend(card_attachments) + outgoing.extend(collect_pending_outgoing()) + outgoing.extend(drain_pending_cards()) async def _callback( turn_context: TurnContext, diff --git a/app/runtime/messaging/proactive.py b/app/runtime/messaging/proactive.py index 0607870..e1b3ef7 100644 --- a/app/runtime/messaging/proactive.py +++ b/app/runtime/messaging/proactive.py @@ -35,13 +35,14 @@ def _conversation_account(data: dict | None) -> ConversationAccount | None: def _serialize_ref(ref: ConversationReference) -> dict: + def _acct(a: Any) -> dict | None: + return {"id": a.id, "name": a.name} if a else None return { "activity_id": ref.activity_id, - "user": {"id": ref.user.id, "name": ref.user.name} if ref.user else None, - "bot": {"id": ref.bot.id, "name": ref.bot.name} if ref.bot else None, + "user": _acct(ref.user), + "bot": _acct(ref.bot), "conversation": { - "id": ref.conversation.id, - "name": ref.conversation.name, + "id": ref.conversation.id, "name": ref.conversation.name, "is_group": getattr(ref.conversation, "is_group", None), } if ref.conversation else None, "channel_id": ref.channel_id, @@ -115,8 +116,7 @@ async def send_proactive_message( return False logger.debug( - "[proactive-send] attempting to send to %d conversation ref(s), app_id=%s", - len(refs), app_id, + "[proactive-send] attempting to send to %d ref(s), app_id=%s", len(refs), app_id, ) succeeded = 0 for ref in refs: @@ -132,10 +132,8 @@ async def _callback( _send_ok: list = send_ok, _ref_key: str = ref_key, ) -> None: - pending = collect_pending_outgoing() - cards = drain_pending_cards() + all_attachments = (collect_pending_outgoing() or []) + (drain_pending_cards() or []) activity = Activity(type=ActivityTypes.message, text=_msg) - all_attachments = (pending or []) + (cards or []) if all_attachments: activity.attachments = all_attachments if _ch == "telegram": diff --git a/app/runtime/messaging/proactive_loop.py b/app/runtime/messaging/proactive_loop.py index d09579b..0ff8a99 100644 --- a/app/runtime/messaging/proactive_loop.py +++ b/app/runtime/messaging/proactive_loop.py @@ -1,11 +1,4 @@ -"""Proactive message delivery -- background loop. - -Two responsibilities: -1. **Deliver** pending messages that are due (scheduled by memory agent). -2. **Generate** new proactive messages autonomously when nothing is - pending and enough idle time has passed, using memory context to - craft something genuinely useful. -""" +"""Proactive message delivery -- background loop.""" from __future__ import annotations @@ -27,19 +20,21 @@ _TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "templates" -# Minimum hours since last user activity before we proactively reach out. -_MIN_USER_IDLE_HOURS = 1.0 +_RETRY_DELAY_MINUTES = 5 + -# Cooldown between autonomous generation attempts (even if LLM says NO_FOLLOWUP). +def _schedule_retry(store: object, pending: object) -> None: + """Re-queue a pending message for delivery in *_RETRY_DELAY_MINUTES*.""" + retry_at = (datetime.now(UTC) + timedelta(minutes=_RETRY_DELAY_MINUTES)).isoformat() + store.schedule_followup(message=pending.message, deliver_at=retry_at, context=pending.context) + + +_MIN_USER_IDLE_HOURS = 1.0 _GENERATION_COOLDOWN_MINUTES = 60 def _in_preferred_window(prefs_times: str) -> bool: - """Return True if current UTC hour falls within any preferred time range. - - *prefs_times* is a comma-separated string like ``"9:00-12:00, 14:00-17:00"``. - If empty/blank, any time is allowed. - """ + """Return True if current UTC hour falls in any preferred time range.""" if not prefs_times or not prefs_times.strip(): return True @@ -64,10 +59,8 @@ def _in_preferred_window(prefs_times: str) -> bool: def _gather_memory_context() -> str: - """Read the most recent daily log and a few topic files for LLM context.""" + """Read recent daily logs and topic files for LLM context.""" lines: list[str] = [] - - # Latest daily log daily_dir = cfg.memory_daily_dir if daily_dir.is_dir(): logs = sorted(daily_dir.glob("*.md"), reverse=True) @@ -77,8 +70,6 @@ def _gather_memory_context() -> str: lines.append(f"--- {log_path.name} ---\n{content}") except OSError: pass - - # A couple of topic notes topics_dir = cfg.memory_topics_dir if topics_dir.is_dir(): topics = sorted(topics_dir.glob("*.md"), key=lambda p: p.stat().st_mtime, reverse=True) @@ -109,12 +100,10 @@ def _hours_since_last_session() -> float | None: """Return hours since the most recent session's last update.""" from ..state.session_store import SessionStore - store = SessionStore() - sessions = store.list_sessions() + sessions = SessionStore().list_sessions() if not sessions: return None - latest = sessions[0] - ts = latest.get("updated_at") or latest.get("created_at") + ts = sessions[0].get("updated_at") or sessions[0].get("created_at") if not ts: return None try: @@ -132,17 +121,11 @@ def _hours_since_last_session() -> float | None: async def _generate_proactive_message() -> str | None: - """Use a one-shot LLM call to generate a proactive message. - - Returns the message string or ``None`` if the LLM decided nothing - is worth sending (``NO_FOLLOWUP``). - """ + """Use a one-shot LLM call to generate a proactive message.""" from ..agent.one_shot import run_one_shot template = (_TEMPLATES_DIR / "proactive_generate_prompt.md").read_text() store = get_proactive_store() - - # Build history context history = store.history[-5:] if history: history_lines = [] @@ -182,7 +165,6 @@ async def _generate_proactive_message() -> str | None: logger.info("[proactive] LLM decided: NO_FOLLOWUP") return None - # Sanity: reject very short or suspiciously long responses if len(text) < 10 or len(text) > 500: logger.warning("[proactive] LLM response rejected (len=%d): %s", len(text), text[:80]) return None @@ -194,30 +176,20 @@ def _should_auto_generate(store: "ProactiveStore") -> bool: # noqa: F821 """Decide whether the loop should autonomously generate a proactive message.""" if not store.enabled: return False - - # Already have a pending message if store.pending: return False prefs = store.preferences - - # Daily limit if store.messages_sent_today() >= prefs.max_daily: logger.debug("[proactive] daily limit reached (%d/%d)", store.messages_sent_today(), prefs.max_daily) return False - - # Min gap since last sent hours_since = store.hours_since_last_sent() if hours_since is not None and hours_since < prefs.min_gap_hours: logger.debug("[proactive] too soon since last sent (%.1fh < %dh)", hours_since, prefs.min_gap_hours) return False - - # Preferred time window if not _in_preferred_window(prefs.preferred_times): logger.debug("[proactive] outside preferred time window") return False - - # User must have been idle for a minimum period user_idle = _hours_since_last_session() if user_idle is not None and user_idle < _MIN_USER_IDLE_HOURS: logger.debug("[proactive] user active too recently (%.1fh)", user_idle) @@ -231,11 +203,7 @@ async def proactive_delivery_loop( interval_seconds: int = 60, session_store: "SessionStore | None" = None, ) -> None: - """Check every *interval_seconds* for due proactive messages and deliver them. - - Also autonomously generates proactive messages when nothing is pending - and the conditions are right (idle time, preferred window, limits). - """ + """Check every *interval_seconds* for due messages and deliver or generate them.""" store = get_proactive_store() logger.info("[proactive] delivery loop started (interval=%ds)", interval_seconds) @@ -253,7 +221,7 @@ async def proactive_delivery_loop( store.is_due() if pending_obj else "n/a", ) - # ── 1. Deliver pending messages that are due ────────────── + # ── 1. Deliver pending messages ── if store.enabled and store.is_due(): pending = store.clear_pending() if pending: @@ -262,7 +230,7 @@ async def proactive_delivery_loop( elif not store.enabled and pending_obj: logger.debug("[proactive] has pending message but proactive is DISABLED") - # ── 2. Autonomous generation when nothing is pending ────── + # ── 2. Autonomous generation ── elif _should_auto_generate(store): cooldown_ok = ( last_generation_attempt is None @@ -324,17 +292,7 @@ async def _deliver_message( "[proactive] message NOT delivered (no active channels): %s -- will retry in 5 min", pending.id, ) - retry_at = (datetime.now(UTC) + timedelta(minutes=5)).isoformat() - store.schedule_followup( - message=pending.message, - deliver_at=retry_at, - context=pending.context, - ) + _schedule_retry(store, pending) except Exception as exc: logger.error("[proactive] delivery failed: %s", exc, exc_info=True) - retry_at = (datetime.now(UTC) + timedelta(minutes=5)).isoformat() - store.schedule_followup( - message=pending.message, - deliver_at=retry_at, - context=pending.context, - ) + _schedule_retry(store, pending) diff --git a/app/runtime/realtime/tools.py b/app/runtime/realtime/tools.py index cc9e87c..3b7f95e 100644 --- a/app/runtime/realtime/tools.py +++ b/app/runtime/realtime/tools.py @@ -13,7 +13,7 @@ from typing import Any from ..config.settings import cfg -from ..util.singletons import register_singleton +from ..util.singletons import Singleton logger = logging.getLogger(__name__) @@ -67,22 +67,7 @@ def fail(self, task_id: str, error: str) -> None: task.completed_at = datetime.now(UTC).isoformat() -_task_store: TaskStore | None = None - - -def get_task_store() -> TaskStore: - global _task_store - if _task_store is None: - _task_store = TaskStore() - return _task_store - - -def _reset_task_store() -> None: - global _task_store - _task_store = None - - -register_singleton(_reset_task_store) +get_task_store, _reset_task_store = Singleton.create(TaskStore) INVOKE_AGENT_SCHEMA = { diff --git a/app/runtime/registries/plugins.py b/app/runtime/registries/plugins.py index 533f0c0..14f3525 100644 --- a/app/runtime/registries/plugins.py +++ b/app/runtime/registries/plugins.py @@ -11,7 +11,7 @@ from ..config.settings import cfg from ..state.plugin_config import PluginConfigStore -from ..util.singletons import register_singleton +from ..util.singletons import Singleton logger = logging.getLogger(__name__) @@ -100,6 +100,10 @@ def _discover(self) -> None: manifest = _parse_manifest(manifest_path) if manifest and manifest.id not in self._plugins: self._plugins[manifest.id] = manifest + logger.info( + " plugin: %s (%s) -- %d skill(s)", + manifest.id, manifest.name, len(manifest.skills), + ) logger.info("Discovered %d plugin(s)", len(self._plugins)) def refresh(self) -> None: @@ -242,19 +246,4 @@ def remove_user_plugin(self, plugin_id: str) -> bool: return True -_registry: PluginRegistry | None = None - - -def get_plugin_registry() -> PluginRegistry: - global _registry - if _registry is None: - _registry = PluginRegistry() - return _registry - - -def _reset_plugin_registry() -> None: - global _registry - _registry = None - - -register_singleton(_reset_plugin_registry) +get_plugin_registry, _reset_plugin_registry = Singleton.create(PluginRegistry) diff --git a/app/runtime/registries/skills.py b/app/runtime/registries/skills.py index 454cf13..2b63b86 100644 --- a/app/runtime/registries/skills.py +++ b/app/runtime/registries/skills.py @@ -11,7 +11,7 @@ from typing import Any from ..config.settings import cfg -from ..util.singletons import register_singleton +from ..util.singletons import Singleton logger = logging.getLogger(__name__) @@ -221,24 +221,8 @@ async def install(self, name: str) -> str | None: return None -_registry: SkillRegistry | None = None - - -def get_registry() -> SkillRegistry: - global _registry - if _registry is None: - _registry = SkillRegistry() - return _registry +get_registry, _reset_registry = Singleton.create(SkillRegistry) def set_registry(instance: SkillRegistry) -> None: - global _registry - _registry = instance - - -def _reset_registry() -> None: - global _registry - _registry = None - - -register_singleton(_reset_registry) + _reset_registry(instance) diff --git a/app/runtime/sandbox/executor.py b/app/runtime/sandbox/executor.py index b9be188..de1983d 100644 --- a/app/runtime/sandbox/executor.py +++ b/app/runtime/sandbox/executor.py @@ -303,37 +303,14 @@ async def _execute_in_session( self, http: aiohttp.ClientSession, endpoint: str, session_id: str, headers: dict[str, str], timeout: int, ) -> dict[str, Any]: - url = f"{endpoint}/code/execute?api-version={API_VERSION}&identifier={session_id}" - payload = { - "properties": { - "codeInputType": "inline", - "executionType": "synchronous", - "code": ( - "import subprocess, json, sys\n" - "r = subprocess.run(['bash', '/mnt/data/bootstrap.sh'], " - f"capture_output=True, text=True, timeout={timeout})\n" - "print(json.dumps({'stdout': r.stdout, 'stderr': r.stderr, " - "'rc': r.returncode}))\n" - ), - } - } - try: - async with http.post( - url, json=payload, headers={**headers, "Content-Type": "application/json"}, - timeout=aiohttp.ClientTimeout(total=timeout + 30), - ) as resp: - if resp.status not in (200, 201, 202): - text = await resp.text() - return {"success": False, "error": f"Execution failed: {resp.status} {text[:300]}"} - result = await resp.json() - props = result.get("properties", {}) - return self._parse_exec_result( - props.get("stdout", ""), - fallback_stderr=props.get("stderr", ""), - ) - except Exception as exc: - logger.error("Sandbox exec exception: %s", exc, exc_info=True) - return {"success": False, "error": str(exc)} + code = ( + "import subprocess, json, sys\n" + "r = subprocess.run(['bash', '/mnt/data/bootstrap.sh'], " + f"capture_output=True, text=True, timeout={timeout})\n" + "print(json.dumps({'stdout': r.stdout, 'stderr': r.stderr, " + "'rc': r.returncode}))\n" + ) + return await self._run_code(http, endpoint, session_id, code, headers, timeout) async def provision_session(self, session_id: str) -> dict[str, Any]: start = time.time() @@ -399,7 +376,6 @@ async def _execute_code( self, http: aiohttp.ClientSession, endpoint: str, session_id: str, command: str, headers: dict[str, str], timeout: int, ) -> dict[str, Any]: - url = f"{endpoint}/code/execute?api-version={API_VERSION}&identifier={session_id}" cmd_b64 = base64.b64encode(command.encode()).decode() code = ( "import subprocess, json, base64\n" @@ -409,6 +385,13 @@ async def _execute_code( "env={**__import__('os').environ, 'HOME': '/mnt/data/agent_home'})\n" "print(json.dumps({'stdout': r.stdout, 'stderr': r.stderr, 'rc': r.returncode}))\n" ) + return await self._run_code(http, endpoint, session_id, code, headers, timeout) + + async def _run_code( + self, http: aiohttp.ClientSession, endpoint: str, session_id: str, + code: str, headers: dict[str, str], timeout: int, + ) -> dict[str, Any]: + url = f"{endpoint}/code/execute?api-version={API_VERSION}&identifier={session_id}" payload = {"properties": {"codeInputType": "inline", "executionType": "synchronous", "code": code}} try: async with http.post( @@ -425,7 +408,7 @@ async def _execute_code( fallback_stderr=props.get("stderr", ""), ) except Exception as exc: - logger.error("Session exec exception: %s", exc, exc_info=True) + logger.error("Sandbox exec exception: %s", exc, exc_info=True) return {"success": False, "error": str(exc)} @staticmethod diff --git a/app/runtime/scheduler/engine.py b/app/runtime/scheduler/engine.py index 897f65a..a89e407 100644 --- a/app/runtime/scheduler/engine.py +++ b/app/runtime/scheduler/engine.py @@ -16,7 +16,7 @@ from ..agent import one_shot as one_shot_mod from ..config.settings import cfg -from ..util.singletons import register_singleton +from ..util.singletons import Singleton logger = logging.getLogger(__name__) @@ -330,27 +330,11 @@ async def _send_notification(self, task: ScheduledTask, result: str | None) -> N ) -_scheduler: Scheduler | None = None - - -def get_scheduler() -> Scheduler: - global _scheduler - if _scheduler is None: - _scheduler = Scheduler() - return _scheduler +get_scheduler, _reset_scheduler = Singleton.create(Scheduler) def set_scheduler(instance: Scheduler) -> None: - global _scheduler - _scheduler = instance - - -def _reset_scheduler() -> None: - global _scheduler - _scheduler = None - - -register_singleton(_reset_scheduler) + _reset_scheduler(instance) async def scheduler_loop(interval_seconds: int = 60) -> None: diff --git a/app/runtime/server/app.py b/app/runtime/server/app.py index 28e52e2..0f0b657 100644 --- a/app/runtime/server/app.py +++ b/app/runtime/server/app.py @@ -111,9 +111,13 @@ async def build(self) -> web.Application: self._mode = cfg.server_mode cfg.ensure_dirs() self._ensure_admin_secret() + logger.info("[build] mode=%s -- initializing core ...", self._mode.value) await self._init_core() + logger.info("[build] initializing services ...") self._init_services() + logger.info("[build] cross-wiring components ...") self._cross_wire() + logger.info("[build] initializing voice ...") self._init_voice() middlewares = [lockdown_middleware, tunnel_restriction_middleware, auth_middleware] @@ -131,6 +135,12 @@ async def build(self) -> web.Application: if proxy_mw is not None: app.on_cleanup.append(proxy_mw.cleanup) + logger.info( + "[build] application ready (mode=%s voice=%s proxy=%s)", + self._mode.value, + self._voice_routes is not None, + proxy_mw is not None, + ) return app # -- Properties -------------------------------------------------------- @@ -190,13 +200,16 @@ def _init_services(self) -> None: if self._is_runtime and self._agent: if self._sandbox_executor: self._agent.set_sandbox(self._sandbox_executor) + logger.info("[build] sandbox executor wired into agent") self._agent.set_guardrails(self._guardrails_store) + logger.info("[build] guardrails store wired into agent") def _cross_wire(self) -> None: """Wire cross-cutting references that span core and services.""" if self._bot and self._agent and self._agent.hitl_interceptor: self._bot._hitl = self._agent.hitl_interceptor self._bot._processor._hitl = self._agent.hitl_interceptor + logger.info("[cross_wire] HITL interceptor wired into bot") if self._scheduler and self._agent and self._agent.hitl_interceptor: self._scheduler.set_hitl_interceptor(self._agent.hitl_interceptor) diff --git a/app/runtime/server/lifecycle.py b/app/runtime/server/lifecycle.py index ff1b64e..ad57335 100644 --- a/app/runtime/server/lifecycle.py +++ b/app/runtime/server/lifecycle.py @@ -49,14 +49,23 @@ async def on_startup_runtime( # Bootstrap OTel if monitoring is configured mon = monitoring_store if mon.is_configured: + logger.info( + "[startup.runtime] configuring OpenTelemetry " + "(sampling=%.2f live_metrics=%s)", + mon.config.sampling_ratio, + mon.config.enable_live_metrics, + ) configure_otel( mon.connection_string, sampling_ratio=mon.config.sampling_ratio, enable_live_metrics=mon.config.enable_live_metrics, ) + else: + logger.info("[startup.runtime] OpenTelemetry not configured -- skipping") rebuild_adapter() + logger.info("[startup.runtime] starting background tasks ...") app["scheduler_task"] = asyncio.create_task(scheduler_loop()) app["proactive_task"] = asyncio.create_task( proactive_delivery_loop(make_notify(), session_store=session_store), diff --git a/app/runtime/server/routes/_helpers.py b/app/runtime/server/routes/_helpers.py index dbe9eda..16782d0 100644 --- a/app/runtime/server/routes/_helpers.py +++ b/app/runtime/server/routes/_helpers.py @@ -2,16 +2,32 @@ from __future__ import annotations +import functools +import json +import logging +from collections.abc import Awaitable, Callable from typing import Any from aiohttp import web +logger = logging.getLogger(__name__) + +# -- response helpers ------------------------------------------------------ + + +def ok_response(**data: Any) -> web.Response: + """Return a standard ``{"status": "ok", ...}`` JSON response.""" + return web.json_response({"status": "ok", **data}) + + +def error_response(message: str, *, status: int = 400) -> web.Response: + """Return a standard ``{"status": "error", "message": ...}`` JSON response.""" + return web.json_response({"status": "error", "message": message}, status=status) + def no_az() -> web.Response: """Return a standard error when Azure CLI is unavailable.""" - return web.json_response( - {"status": "error", "message": "Azure CLI not available"}, status=500 - ) + return error_response("Azure CLI not available", status=500) def fail_response(steps: list[dict[str, Any]]) -> web.Response: @@ -22,3 +38,44 @@ def fail_response(steps: list[dict[str, Any]]) -> web.Response: {"status": "error", "steps": steps, "message": f"Provisioning failed: {msg}"}, status=500, ) + + +# -- request helpers ------------------------------------------------------- + + +async def parse_json(req: web.Request) -> dict[str, Any]: + """Parse JSON body, raising ``ValueError`` on invalid input.""" + try: + body = await req.json() + except (json.JSONDecodeError, Exception) as exc: + raise ValueError("Invalid JSON body") from exc + if not isinstance(body, dict): + raise ValueError("JSON body must be an object") + return body + + +# -- decorator ------------------------------------------------------------- + +Handler = Callable[..., Awaitable[web.Response]] + + +def api_handler(fn: Handler) -> Handler: + """Wrap a route handler with standard JSON error handling. + + Catches ``ValueError`` (→ 400) and unexpected exceptions (→ 500), + returning the standard error envelope. + """ + + @functools.wraps(fn) + async def wrapper(*args: Any, **kwargs: Any) -> web.Response: + try: + return await fn(*args, **kwargs) + except ValueError as exc: + return error_response(str(exc)) + except web.HTTPException: + raise + except Exception as exc: + logger.exception("Unhandled error in %s: %s", fn.__qualname__, exc) + return error_response("Internal server error", status=500) + + return wrapper diff --git a/app/runtime/server/routes/content_safety_routes.py b/app/runtime/server/routes/content_safety_routes.py index fc6e761..9920f63 100644 --- a/app/runtime/server/routes/content_safety_routes.py +++ b/app/runtime/server/routes/content_safety_routes.py @@ -15,6 +15,7 @@ from ...state.deploy_state import DeployStateStore from ...state.guardrails import GuardrailsConfigStore from ...util.async_helpers import run_sync +from ._helpers import error_response, ok_response logger = logging.getLogger(__name__) @@ -53,14 +54,13 @@ def register(self, router: web.UrlDispatcher) -> None: async def _status(self, _req: web.Request) -> web.Response: """Return current Content Safety configuration status.""" if not self._store: - return web.json_response({"status": "ok", "deployed": False}) + return ok_response(deployed=False) config = self._store.config - return web.json_response({ - "status": "ok", - "deployed": bool(config.content_safety_endpoint), - "endpoint": config.content_safety_endpoint, - "filter_mode": config.filter_mode, - }) + return ok_response( + deployed=bool(config.content_safety_endpoint), + endpoint=config.content_safety_endpoint, + filter_mode=config.filter_mode, + ) async def _test(self, _req: web.Request) -> web.Response: """Dry-run: send a harmless probe to the Prompt Shields API. @@ -70,18 +70,14 @@ async def _test(self, _req: web.Request) -> web.Response: correctly *before* relying on the shield to block attacks. """ if not self._store: - return web.json_response( - {"status": "error", "message": "Guardrails store not available"}, - status=500, - ) + return error_response("Guardrails store not available", status=500) endpoint = self._store.config.content_safety_endpoint if not endpoint: - return web.json_response({ - "status": "ok", - "passed": False, - "detail": "No endpoint configured -- deploy first", - }) + return ok_response( + passed=False, + detail="No endpoint configured -- deploy first", + ) shield = PromptShieldService(endpoint=endpoint) result = await run_sync(shield.dry_run) @@ -90,24 +86,17 @@ async def _test(self, _req: web.Request) -> web.Response: "[content-safety.test] dry-run passed=%s detail=%s", passed, result.detail, ) - return web.json_response({ - "status": "ok", - "passed": passed, - "detail": result.detail, - }) + return ok_response( + passed=passed, + detail=result.detail, + ) async def _deploy(self, req: web.Request) -> web.Response: """Provision an Azure AI Content Safety resource via the central Bicep template.""" if not self._bicep: - return web.json_response( - {"status": "error", "message": "Azure CLI or deploy store not available"}, - status=400, - ) + return error_response("Azure CLI or deploy store not available") if not self._store: - return web.json_response( - {"status": "error", "message": "Guardrails store not available"}, - status=500, - ) + return error_response("Guardrails store not available", status=500) try: data = await req.json() @@ -117,9 +106,15 @@ async def _deploy(self, req: web.Request) -> web.Response: resource_group = data.get("resource_group", _DEFAULT_RG).strip() location = data.get("location", _DEFAULT_LOCATION).strip() + # Reuse the Foundry base name so Content Safety lands in the same + # naming scheme (e.g. ``e2esetup-content-safety`` instead of a + # random ``polyclaw--content-safety``). + base_name = data.get("base_name", "").strip() or cfg.env.read("FOUNDRY_NAME") or "" + bicep_req = BicepDeployRequest( resource_group=resource_group, location=location, + base_name=base_name, deploy_foundry=False, deploy_key_vault=False, deploy_content_safety=True, @@ -145,12 +140,11 @@ async def _deploy(self, req: web.Request) -> web.Response: ), }) - return web.json_response({ - "status": "ok", - "steps": result.steps, - "endpoint": result.content_safety_endpoint, - "filter_mode": "prompt_shields", - }) + return ok_response( + steps=result.steps, + endpoint=result.content_safety_endpoint, + filter_mode="prompt_shields", + ) # ------------------------------------------------------------------ # Public API -- called from admin startup @@ -262,9 +256,18 @@ def _match_endpoint( return acct.get("id", "") return "" - async def _resolve_runtime_principal( - self, - ) -> tuple[str, str]: + async def _sp_object_id(self, client_id: str) -> str: + """Resolve service principal object-id from *client_id*.""" + info = await run_sync( + functools.partial( + self._az.json, "ad", "sp", "show", "--id", client_id, quiet=True, + ), + ) + if isinstance(info, dict): + return info.get("id", "") or info.get("objectId", "") + return "" + + async def _resolve_runtime_principal(self) -> tuple[str, str]: """Detect the runtime identity for RBAC assignment. Resolution order: @@ -273,49 +276,23 @@ async def _resolve_runtime_principal( 3. Current Azure CLI identity (signed-in user or SP). Returns ``(principal_object_id, principal_type)``. - Either may be empty when detection fails. """ assert self._az is not None - # 1. Explicit service principal - sp_app_id = cfg.runtime_sp_app_id - if sp_app_id: - sp_info = await run_sync( - functools.partial( - self._az.json, "ad", "sp", "show", "--id", sp_app_id, quiet=True, - ), - ) - pid = "" - if isinstance(sp_info, dict): - pid = sp_info.get("id", "") or sp_info.get("objectId", "") - if pid: - return pid, "ServicePrincipal" - logger.warning( - "[content-safety.rbac] Cannot resolve object-id for " - "RUNTIME_SP_APP_ID=%s, trying fallbacks", - sp_app_id, - ) - - # 2. User-assigned managed identity - mi_client_id = cfg.aca_mi_client_id - if mi_client_id: - mi_info = await run_sync( - functools.partial( - self._az.json, "ad", "sp", "show", "--id", mi_client_id, quiet=True, - ), - ) - pid = "" - if isinstance(mi_info, dict): - pid = mi_info.get("id", "") or mi_info.get("objectId", "") - if pid: - return pid, "ServicePrincipal" - logger.warning( - "[content-safety.rbac] Cannot resolve object-id for " - "ACA_MI_CLIENT_ID=%s, trying CLI identity", - mi_client_id, - ) + for label, client_id in [ + ("RUNTIME_SP_APP_ID", cfg.runtime_sp_app_id), + ("ACA_MI_CLIENT_ID", cfg.aca_mi_client_id), + ]: + if client_id: + pid = await self._sp_object_id(client_id) + if pid: + return pid, "ServicePrincipal" + logger.warning( + "[content-safety.rbac] Cannot resolve object-id for " + "%s=%s, trying fallbacks", label, client_id, + ) - # 3. Current Azure CLI identity + # Signed-in user fallback user_info = await run_sync( functools.partial( self._az.json, "ad", "signed-in-user", "show", quiet=True, @@ -324,17 +301,14 @@ async def _resolve_runtime_principal( if isinstance(user_info, dict) and user_info.get("id"): return user_info["id"], "User" + # Account SP fallback account = self._az.account_info() if account: name = account.get("user", {}).get("name", "") if name: - sp_info = await run_sync( - functools.partial( - self._az.json, "ad", "sp", "show", "--id", name, quiet=True, - ), - ) - if isinstance(sp_info, dict) and sp_info.get("id"): - return sp_info["id"], "ServicePrincipal" + pid = await self._sp_object_id(name) + if pid: + return pid, "ServicePrincipal" return "", "" diff --git a/app/runtime/server/routes/env_routes.py b/app/runtime/server/routes/env_routes.py index c109155..6ef33c1 100644 --- a/app/runtime/server/routes/env_routes.py +++ b/app/runtime/server/routes/env_routes.py @@ -7,10 +7,11 @@ from aiohttp import web from ...services.cloud.azure import AzureCLI -from ...services.security.misconfig_checker import MisconfigChecker from ...services.resource_tracker import ResourceTracker +from ...services.security.misconfig_checker import MisconfigChecker from ...state.deploy_state import DeployStateStore from ...util.async_helpers import run_sync +from ._helpers import error_response, ok_response from ._helpers import no_az as _no_az logger = logging.getLogger(__name__) @@ -44,9 +45,7 @@ async def _get(self, req: web.Request) -> web.Response: deploy_id = req.match_info["deploy_id"] rec = self._store.get(deploy_id) if not rec: - return web.json_response( - {"status": "error", "message": "Deployment not found"}, status=404 - ) + return error_response("Deployment not found", status=404) from dataclasses import asdict return web.json_response(asdict(rec)) @@ -56,7 +55,7 @@ async def _destroy(self, req: web.Request) -> web.Response: deploy_id = req.match_info["deploy_id"] tracker = ResourceTracker(self._az, self._store) steps = await run_sync(tracker.cleanup_deployment, deploy_id) - return web.json_response({"status": "ok", "steps": steps}) + return ok_response(steps=steps) async def _cleanup(self, req: web.Request) -> web.Response: if not self._az: @@ -64,15 +63,13 @@ async def _cleanup(self, req: web.Request) -> web.Response: deploy_id = req.match_info["deploy_id"] tracker = ResourceTracker(self._az, self._store) steps = await run_sync(tracker.cleanup_deployment, deploy_id) - return web.json_response({"status": "ok", "steps": steps}) + return ok_response(steps=steps) async def _remove_record(self, req: web.Request) -> web.Response: deploy_id = req.match_info["deploy_id"] if self._store.remove(deploy_id): - return web.json_response({"status": "ok"}) - return web.json_response( - {"status": "error", "message": "Not found"}, status=404 - ) + return ok_response() + return error_response("Not found", status=404) async def _audit(self, _req: web.Request) -> web.Response: if not self._az: @@ -94,7 +91,7 @@ async def _audit_cleanup(self, _req: web.Request) -> web.Response: "status": "ok" if ok else "failed", "detail": msg or "", }) - return web.json_response({"status": "ok", "steps": steps}) + return ok_response(steps=steps) async def _misconfig_check(self, req: web.Request) -> web.Response: if not self._az: @@ -113,7 +110,7 @@ async def _misconfig_check(self, req: web.Request) -> web.Response: resource_groups = list(set(resource_groups)) if not resource_groups: - return web.json_response({"status": "ok", "message": "No resource groups"}) + return ok_response(message="No resource groups") checker = MisconfigChecker(self._az) result = await run_sync(checker.check_all, resource_groups) diff --git a/app/runtime/server/routes/foundry_iq_routes.py b/app/runtime/server/routes/foundry_iq_routes.py index 6acc0bc..7cf873b 100644 --- a/app/runtime/server/routes/foundry_iq_routes.py +++ b/app/runtime/server/routes/foundry_iq_routes.py @@ -3,7 +3,6 @@ from __future__ import annotations import logging -import secrets as _secrets from typing import Any from aiohttp import web @@ -22,7 +21,9 @@ from ...state.deploy_state import DeployStateStore from ...state.foundry_iq_config import FoundryIQConfigStore from ...util.async_helpers import run_sync -from ._helpers import fail_response as _fail_response, no_az as _no_az +from ._helpers import api_handler, error_response, ok_response, parse_json +from ._helpers import fail_response as _fail_response +from ._helpers import no_az as _no_az logger = logging.getLogger(__name__) @@ -59,10 +60,11 @@ def register(self, router: web.UrlDispatcher) -> None: async def _get_config(self, _req: web.Request) -> web.Response: return web.json_response(self._store.to_safe_dict()) + @api_handler async def _save_config(self, req: web.Request) -> web.Response: - data = await req.json() + data = await parse_json(req) self._store.save(**data) - return web.json_response({"status": "ok", "config": self._store.to_safe_dict()}) + return ok_response(config=self._store.to_safe_dict()) async def _test_search(self, _req: web.Request) -> web.Response: result = await run_sync(test_search_connection, self._store) @@ -85,23 +87,19 @@ async def _run_indexing(self, _req: web.Request) -> web.Response: result = await run_sync(index_memories, self._store) except Exception as exc: logger.exception("Indexing failed") - return web.json_response( - {"status": "error", "message": f"Indexing crashed: {exc}"}, - status=500, - ) + return error_response(f"Indexing crashed: {exc}", status=500) return web.json_response(result) async def _get_stats(self, _req: web.Request) -> web.Response: result = await run_sync(get_index_stats, self._store) return web.json_response(result) + @api_handler async def _search(self, req: web.Request) -> web.Response: - data = await req.json() + data = await parse_json(req) query = data.get("query", "").strip() if not query: - return web.json_response( - {"status": "error", "message": "Query is required"}, status=400 - ) + return error_response("Query is required") top = data.get("top", 5) result = await run_sync(search_memories, query, top, self._store) return web.json_response(result) @@ -110,12 +108,11 @@ async def _provision(self, req: web.Request) -> web.Response: if not self._bicep: return _no_az() if self._store.is_provisioned: - return web.json_response({ - "status": "ok", - "message": "Already provisioned", - "steps": [], - "config": self._store.to_safe_dict(), - }) + return ok_response( + message="Already provisioned", + steps=[], + config=self._store.to_safe_dict(), + ) try: body = await req.json() if req.can_read_body else {} @@ -203,20 +200,17 @@ async def _provision(self, req: web.Request) -> web.Response: "Foundry IQ provisioned (Bicep): search=%s, aoai=%s", result.search_name, result.embedding_aoai_name, ) - return web.json_response({ - "status": "ok", - "message": f"Foundry IQ provisioned in {rg}", - "steps": result.steps, - "config": self._store.to_safe_dict(), - }) + return ok_response( + message=f"Foundry IQ provisioned in {rg}", + steps=result.steps, + config=self._store.to_safe_dict(), + ) async def _decommission(self, _req: web.Request) -> web.Response: if not self._az: return _no_az() if not self._store.is_provisioned: - return web.json_response( - {"status": "error", "message": "Nothing provisioned"}, status=400 - ) + return error_response("Nothing provisioned") steps: list[dict[str, Any]] = [] rg = self._store.config.resource_group @@ -278,6 +272,4 @@ async def _decommission(self, _req: web.Request) -> web.Response: steps.append({"step": "clear_config", "status": "ok", "detail": "Cleared"}) logger.info("Foundry IQ decommissioned: %s, %s", search_name, openai_name) - return web.json_response({ - "status": "ok", "message": "Resources removed", "steps": steps - }) \ No newline at end of file + return ok_response(message="Resources removed", steps=steps) diff --git a/app/runtime/server/routes/guardrails_routes.py b/app/runtime/server/routes/guardrails_routes.py index 50a1ce1..9ef72bf 100644 --- a/app/runtime/server/routes/guardrails_routes.py +++ b/app/runtime/server/routes/guardrails_routes.py @@ -2,7 +2,6 @@ from __future__ import annotations -import logging from collections.abc import Callable from typing import Any @@ -92,33 +91,28 @@ async def _update_config(self, req: web.Request) -> web.Response: self._store.set_hitl_enabled(bool(data["hitl_enabled"])) # Validated fields -- accept both frontend and backend key names - for key in ("default_strategy", "default_action"): - err = self._apply_validated_field( - data, key, self._store.set_default_action, - ) - if err: - return err - for key in ("hitl_channel", "default_channel"): - err = self._apply_validated_field( - data, key, self._store.set_default_channel, - ) - if err: - return err - err = self._apply_validated_field( - data, "filter_mode", self._store.set_filter_mode, - ) - if err: - return err + _VALIDATED_FIELDS: list[tuple[tuple[str, ...], Callable]] = [ + (("default_strategy", "default_action"), self._store.set_default_action), + (("hitl_channel", "default_channel"), self._store.set_default_channel), + (("filter_mode",), self._store.set_filter_mode), + ] + for keys, setter in _VALIDATED_FIELDS: + for key in keys: + err = self._apply_validated_field(data, key, setter) + if err: + return err # Simple fields (no validation) - if "phone_number" in data: - self._store.set_phone_number(data["phone_number"]) - if "aitl_model" in data: - self._store.set_aitl_model(data["aitl_model"]) + _SIMPLE_FIELDS: list[tuple[str, Callable]] = [ + ("phone_number", self._store.set_phone_number), + ("aitl_model", self._store.set_aitl_model), + ("content_safety_endpoint", self._store.set_content_safety_endpoint), + ] + for key, setter in _SIMPLE_FIELDS: + if key in data: + setter(data[key]) if "aitl_spotlighting" in data: self._store.set_aitl_spotlighting(bool(data["aitl_spotlighting"])) - if "content_safety_endpoint" in data: - self._store.set_content_safety_endpoint(data["content_safety_endpoint"]) # Context defaults (batch update) if "context_defaults" in data: @@ -205,20 +199,17 @@ async def _bulk_rules(self, req: web.Request) -> web.Response: if hitl_enabled is not None: self._store.set_hitl_enabled(bool(hitl_enabled)) - if default_action: - try: - self._store.set_default_action(default_action) - except ValueError as exc: - return web.json_response( - {"status": "error", "message": str(exc)}, status=400 - ) - if default_channel: - try: - self._store.set_default_channel(default_channel) - except ValueError as exc: - return web.json_response( - {"status": "error", "message": str(exc)}, status=400 - ) + for value, setter in [ + (default_action, self._store.set_default_action), + (default_channel, self._store.set_default_channel), + ]: + if value: + try: + setter(value) + except ValueError as exc: + return web.json_response( + {"status": "error", "message": str(exc)}, status=400 + ) if phone_number is not None: self._store.set_phone_number(phone_number) diff --git a/app/runtime/server/routes/guardrails_routes_meta.py b/app/runtime/server/routes/guardrails_routes_meta.py index 15096ec..63b8877 100644 --- a/app/runtime/server/routes/guardrails_routes_meta.py +++ b/app/runtime/server/routes/guardrails_routes_meta.py @@ -63,7 +63,6 @@ async def list_contexts_handler(_req: web.Request) -> web.Response: async def list_templates_handler(_req: web.Request) -> web.Response: """Return the list of prompt template names.""" - from pathlib import Path as _Path from ...agent.prompt import TEMPLATES_DIR from ...config.settings import cfg diff --git a/app/runtime/server/routes/identity_routes.py b/app/runtime/server/routes/identity_routes.py index 761e5ab..bb3e39d 100644 --- a/app/runtime/server/routes/identity_routes.py +++ b/app/runtime/server/routes/identity_routes.py @@ -64,6 +64,12 @@ "role_id": "0fb8eba5-a2bb-4abe-b1c1-49dfad359bb0", "data_action": "", }, + { + "feature": "Foundry BYOK (OpenAI Chat)", + "role": "Cognitive Services OpenAI User", + "role_id": "5e0bd9bd-7b93-4f28-af87-19fc36ad61bd", + "data_action": "", + }, ] @@ -278,6 +284,12 @@ async def _fix_roles(self, req: web.Request) -> web.Response: ), }) + # Fix Foundry OpenAI role (BYOK chat) + await self._fix_foundry_openai_role( + principal_id, principal_type, steps, + use_object_id=use_object_id, + ) + # Fix Session Pool Executor role await self._fix_session_pool_role( principal_id, principal_type, steps, @@ -354,6 +366,67 @@ def _check_session_executor_scope( f"Found: {scopes_str}" ) + async def _fix_foundry_openai_role( + self, + principal_id: str, + principal_type: str, + steps: list[dict[str, Any]], + *, + use_object_id: bool = True, + ) -> None: + """Assign Cognitive Services OpenAI User on the Foundry AI resource.""" + assert self._az is not None + resource_id = await self._resolve_foundry_resource() + if not resource_id: + steps.append({ + "step": "foundry_openai_rbac", + "status": "skipped", + "detail": "No Foundry endpoint configured or resource not found", + }) + return + + await self._assign_role( + principal_id, principal_type, + "5e0bd9bd-7b93-4f28-af87-19fc36ad61bd", + resource_id, + "Cognitive Services OpenAI User", + steps, + use_object_id=use_object_id, + ) + + async def _resolve_foundry_resource(self) -> str: + """Resolve the ARM resource ID for the configured FOUNDRY_ENDPOINT.""" + endpoint = cfg.foundry_endpoint + if not endpoint or not self._az: + return "" + + stripped = endpoint.rstrip("/").lower() + for prefix in ("https://", "http://"): + if stripped.startswith(prefix): + stripped = stripped[len(prefix):] + break + host = stripped.split("/")[0] + + resource_name = "" + for suffix in (".cognitiveservices.azure.com", ".openai.azure.com"): + if suffix in host: + resource_name = host.split(suffix)[0] + break + + if resource_name: + resources = await run_sync( + self._az.json, + "resource", "list", + "--name", resource_name, + "--resource-type", "Microsoft.CognitiveServices/accounts", + "--query", "[].id", + ) + if isinstance(resources, list) and resources: + logger.info("[identity.resolve] Foundry resource: %s", resources[0]) + return resources[0] + + return "" + async def _fix_session_pool_role( self, principal_id: str, @@ -482,56 +555,40 @@ async def _resolve_cs_resource(self, endpoint: str) -> str: # Fallback to RG-scoped discovery return await self._discover_cs_resource() - async def _discover_cs_resource(self) -> str: - """Find a ContentSafety Cognitive Services account. - - Checks the configured / default resource group first, then falls - back to a subscription-wide search (covers dedicated service RGs). - """ + async def _discover_resource( + self, resource_type: str, query: str, label: str, + ) -> str: + """Find an Azure resource by type, checking configured RG then subscription-wide.""" if not self._az: return "" rg = cfg.env.read("FOUNDRY_RESOURCE_GROUP") or _DEFAULT_RG - for rg_args in ( - ["--resource-group", rg], - [], # subscription-wide fallback - ): + for rg_args in (["--resource-group", rg], []): resources = await run_sync( self._az.json, "resource", "list", *rg_args, - "--resource-type", "Microsoft.CognitiveServices/accounts", - "--query", "[?kind=='ContentSafety'].id", + "--resource-type", resource_type, + "--query", query, ) if isinstance(resources, list) and resources: rid = resources[0] - logger.info("[identity.discover] found CS resource: %s", rid) + logger.info("[identity.discover] found %s: %s", label, rid) return rid return "" - async def _discover_session_pool(self) -> str: - """Find a session pool ARM id. + async def _discover_cs_resource(self) -> str: + """Find a ContentSafety Cognitive Services account.""" + return await self._discover_resource( + "Microsoft.CognitiveServices/accounts", + "[?kind=='ContentSafety'].id", + "CS resource", + ) - Checks the default RG first, then falls back to subscription-wide. - """ - if not self._az: - return "" - rg = cfg.env.read("FOUNDRY_RESOURCE_GROUP") or _DEFAULT_RG - for rg_args in ( - ["--resource-group", rg], - [], # subscription-wide fallback - ): - resources = await run_sync( - self._az.json, - "resource", "list", - *rg_args, - "--resource-type", "Microsoft.App/sessionPools", - "--query", "[].id", - ) - if isinstance(resources, list) and resources: - rid = resources[0] - logger.info("[identity.discover] found session pool: %s", rid) - return rid - return "" + async def _discover_session_pool(self) -> str: + """Find a session pool ARM id.""" + return await self._discover_resource( + "Microsoft.App/sessionPools", "[].id", "session pool", + ) async def _assign_role( self, @@ -564,22 +621,12 @@ async def _assign_role( "--role", role_id, "--scope", scope, ) + step_name = f"assign_{role_name.lower().replace(' ', '_')}" if ok: - steps.append({ - "step": f"assign_{role_name.lower().replace(' ', '_')}", - "status": "ok", - "detail": f"{role_name} assigned to {principal_id}", - }) + status, detail = "ok", f"{role_name} assigned to {principal_id}" elif "already exists" in (msg or "").lower() or "conflict" in (msg or "").lower(): - steps.append({ - "step": f"assign_{role_name.lower().replace(' ', '_')}", - "status": "ok", - "detail": "Already assigned", - }) + status, detail = "ok", "Already assigned" else: - steps.append({ - "step": f"assign_{role_name.lower().replace(' ', '_')}", - "status": "failed", - "detail": f"Assignment failed: {msg}", - }) + status, detail = "failed", f"Assignment failed: {msg}" logger.warning("[identity.fix] role assignment failed: %s", msg, exc_info=True) + steps.append({"step": step_name, "status": status, "detail": detail}) diff --git a/app/runtime/server/routes/mcp_routes.py b/app/runtime/server/routes/mcp_routes.py index 46e9d94..6512a8f 100644 --- a/app/runtime/server/routes/mcp_routes.py +++ b/app/runtime/server/routes/mcp_routes.py @@ -8,6 +8,7 @@ from aiohttp import web from ...state.mcp_config import McpConfigStore +from ._helpers import api_handler, error_response, ok_response, parse_json logger = logging.getLogger(__name__) @@ -31,73 +32,56 @@ def register(self, router: web.UrlDispatcher) -> None: router.add_get("/api/mcp/registry", self._registry) async def _list(self, _req: web.Request) -> web.Response: - return web.json_response({"status": "ok", "servers": self._store.list_servers()}) + return ok_response(servers=self._store.list_servers()) async def _get(self, req: web.Request) -> web.Response: server_id = req.match_info["server_id"] server = self._store.get_server(server_id) if not server: - return web.json_response( - {"status": "error", "message": "Server not found"}, status=404 - ) + return error_response("Server not found", status=404) return web.json_response(server) + @api_handler async def _add(self, req: web.Request) -> web.Response: - data = await req.json() - try: - server = self._store.add_server( - name=data.get("name", ""), - server_type=data.get("type", ""), - command=data.get("command", ""), - args=data.get("args"), - env=data.get("env"), - url=data.get("url", ""), - tools=data.get("tools"), - enabled=data.get("enabled", True), - description=data.get("description", ""), - ) - return web.json_response({"status": "ok", "server": server}) - except (ValueError, KeyError) as exc: - return web.json_response( - {"status": "error", "message": str(exc)}, status=400 - ) + data = await parse_json(req) + server = self._store.add_server( + name=data.get("name", ""), + server_type=data.get("type", ""), + command=data.get("command", ""), + args=data.get("args"), + env=data.get("env"), + url=data.get("url", ""), + tools=data.get("tools"), + enabled=data.get("enabled", True), + description=data.get("description", ""), + ) + return ok_response(server=server) async def _update(self, req: web.Request) -> web.Response: server_id = req.match_info["server_id"] - data = await req.json() + data = await parse_json(req) updated = self._store.update_server(server_id, **data) if not updated: - return web.json_response( - {"status": "error", "message": "Server not found"}, status=404 - ) - return web.json_response({"status": "ok", "server": updated}) + return error_response("Server not found", status=404) + return ok_response(server=updated) async def _enable(self, req: web.Request) -> web.Response: server_id = req.match_info["server_id"] - ok = self._store.set_enabled(server_id, True) - if not ok: - return web.json_response( - {"status": "error", "message": "Server not found"}, status=404 - ) - return web.json_response({"status": "ok"}) + if not self._store.set_enabled(server_id, True): + return error_response("Server not found", status=404) + return ok_response() async def _disable(self, req: web.Request) -> web.Response: server_id = req.match_info["server_id"] - ok = self._store.set_enabled(server_id, False) - if not ok: - return web.json_response( - {"status": "error", "message": "Server not found"}, status=404 - ) - return web.json_response({"status": "ok"}) + if not self._store.set_enabled(server_id, False): + return error_response("Server not found", status=404) + return ok_response() async def _remove(self, req: web.Request) -> web.Response: server_id = req.match_info["server_id"] - ok = self._store.remove_server(server_id) - if not ok: - return web.json_response( - {"status": "error", "message": "Server not found"}, status=404 - ) - return web.json_response({"status": "ok"}) + if not self._store.remove_server(server_id): + return error_response("Server not found", status=404) + return ok_response() async def _registry(self, req: web.Request) -> web.Response: page = req.query.get("page", "1") diff --git a/app/runtime/server/routes/monitoring_routes.py b/app/runtime/server/routes/monitoring_routes.py index ec74dfc..4b10386 100644 --- a/app/runtime/server/routes/monitoring_routes.py +++ b/app/runtime/server/routes/monitoring_routes.py @@ -3,7 +3,6 @@ from __future__ import annotations import logging -import secrets as _secrets from typing import Any from aiohttp import web @@ -14,7 +13,9 @@ from ...state.deploy_state import DeployStateStore from ...state.monitoring_config import MonitoringConfigStore from ...util.async_helpers import run_sync -from ._helpers import fail_response as _fail_response, no_az as _no_az +from ._helpers import api_handler, error_response, ok_response, parse_json +from ._helpers import fail_response as _fail_response +from ._helpers import no_az as _no_az logger = logging.getLogger(__name__) @@ -56,8 +57,9 @@ async def _get_config(self, _req: web.Request) -> web.Response: data["otel_status"] = status return web.json_response(data) + @api_handler async def _save_config(self, req: web.Request) -> web.Response: - data = await req.json() + data = await parse_json(req) connection_string = data.get("connection_string") enabled = data.get("enabled") @@ -86,10 +88,9 @@ async def _save_config(self, req: web.Request) -> web.Response: enable_live_metrics=cfg.enable_live_metrics, ) if ok: - return web.json_response({ - "status": "ok", - "message": "Monitoring enabled -- telemetry is being exported to Application Insights.", - }) + return ok_response( + message="Monitoring enabled -- telemetry is being exported to Application Insights.", + ) return web.json_response({ "status": "warning", "message": ( @@ -100,25 +101,22 @@ async def _save_config(self, req: web.Request) -> web.Response: if not cfg.enabled and is_active(): shutdown_otel() - return web.json_response({ - "status": "ok", - "message": "Monitoring disabled. OTel providers shut down. Full cleanup requires a restart.", - }) + return ok_response( + message="Monitoring disabled. OTel providers shut down. Full cleanup requires a restart.", + ) - return web.json_response({"status": "ok", "message": "Monitoring configuration saved."}) + return ok_response(message="Monitoring configuration saved.") async def _get_status(self, _req: web.Request) -> web.Response: return web.json_response(get_status()) + @api_handler async def _test_connection(self, req: web.Request) -> web.Response: """Quick validation that the connection string looks correct.""" - data = await req.json() + data = await parse_json(req) cs = data.get("connection_string", "") if not cs: - return web.json_response( - {"status": "error", "message": "No connection string provided."}, - status=400, - ) + return error_response("No connection string provided.") # Parse the connection string to validate format parts: dict[str, str] = {} @@ -131,22 +129,15 @@ async def _test_connection(self, req: web.Request) -> web.Response: ingestion = parts.get("ingestionendpoint", "") if not ikey: - return web.json_response( - {"status": "error", "message": "Connection string missing InstrumentationKey."}, - status=400, - ) + return error_response("Connection string missing InstrumentationKey.") if not ingestion: - return web.json_response( - {"status": "error", "message": "Connection string missing IngestionEndpoint."}, - status=400, - ) + return error_response("Connection string missing IngestionEndpoint.") - return web.json_response({ - "status": "ok", - "message": "Connection string format is valid.", - "instrumentation_key": f"{ikey[:8]}...{ikey[-4:]}" if len(ikey) > 12 else ikey, - "ingestion_endpoint": ingestion, - }) + return ok_response( + message="Connection string format is valid.", + instrumentation_key=f"{ikey[:8]}...{ikey[-4:]}" if len(ikey) > 12 else ikey, + ingestion_endpoint=ingestion, + ) # ------------------------------------------------------------------ # Provisioning -- create / destroy App Insights via Azure CLI @@ -155,12 +146,11 @@ async def _test_connection(self, req: web.Request) -> web.Response: async def _provision(self, req: web.Request) -> web.Response: """Provision Log Analytics + Application Insights via the central Bicep template.""" if self._store.is_provisioned: - return web.json_response({ - "status": "ok", - "message": f"Already provisioned: {self._store.config.app_insights_name}", - "steps": [], + return ok_response( + message=f"Already provisioned: {self._store.config.app_insights_name}", + steps=[], **self._store.to_dict(), - }) + ) if not self._bicep: return _no_az() @@ -212,22 +202,18 @@ async def _provision(self, req: web.Request) -> web.Response: "[monitoring.provision] App Insights '%s' provisioned via Bicep (rg=%s)", result.app_insights_name, rg, ) - return web.json_response({ - "status": "ok", - "message": f"Application Insights '{result.app_insights_name}' provisioned and monitoring enabled.", - "steps": result.steps, + return ok_response( + message=f"Application Insights '{result.app_insights_name}' provisioned and monitoring enabled.", + steps=result.steps, **self._store.to_dict(), - }) + ) async def _decommission(self, _req: web.Request) -> web.Response: """Delete the provisioned App Insights + Log Analytics resources.""" if not self._az: return _no_az() if not self._store.is_provisioned: - return web.json_response( - {"status": "error", "message": "No monitoring resources provisioned."}, - status=400, - ) + return error_response("No monitoring resources provisioned.") steps: list[dict[str, Any]] = [] ai_name = self._store.config.app_insights_name @@ -291,9 +277,8 @@ async def _decommission(self, _req: web.Request) -> web.Response: steps.append({"step": "clear_config", "status": "ok", "detail": "Configuration cleared"}) logger.info("[monitoring.decommission] App Insights '%s' removed", ai_name) - return web.json_response({ - "status": "ok", - "message": f"Application Insights '{ai_name}' decommissioned.", - "steps": steps, + return ok_response( + message=f"Application Insights '{ai_name}' decommissioned.", + steps=steps, **self._store.to_dict(), - }) + ) diff --git a/app/runtime/server/routes/network_audit.py b/app/runtime/server/routes/network_audit.py index d598bee..0dc0b65 100644 --- a/app/runtime/server/routes/network_audit.py +++ b/app/runtime/server/routes/network_audit.py @@ -56,235 +56,153 @@ def audit_resource( return None +def _get_private_endpoints(props: dict[str, Any]) -> list[str]: + """Extract private endpoint names from a resource's properties.""" + pe_conns = props.get("privateEndpointConnections", []) + return [ + pec.get("privateEndpoint", {}).get("id", "").rsplit("/", 1)[-1] + for pec in pe_conns + if pec.get("privateEndpoint", {}).get("id") + ] + + +def _parse_network( + info: dict[str, Any], + acl_key: str = "networkAcls", +) -> dict[str, Any]: + """Extract common network-audit fields from a resource response.""" + props = info.get("properties") or info + net_acls = ( + props.get(acl_key) or props.get("networkRuleSet") + or props.get("networkAcls") or {} + ) + default_action = net_acls.get("defaultAction") or "Allow" + ip_rules = net_acls.get("ipRules") or [] + vnet_rules = net_acls.get("virtualNetworkRules") or [] + public_access_field = props.get("publicNetworkAccess", "Enabled") + return { + "default_action": default_action, + "allowed_ips": [ + r.get("value", r.get("ipAddressOrRange", "")) for r in ip_rules + ], + "allowed_vnets": [r.get("id", "") for r in vnet_rules], + "private_endpoints": _get_private_endpoints(props), + "public_access_field": public_access_field, + "props": props, + } + + +def _base_result( + name: str, rg: str, rtype: str, icon: str, + public_access: bool, net: dict[str, Any], + extra: dict[str, Any] | None = None, +) -> dict[str, Any]: + return { + "name": name, "resource_group": rg, + "type": rtype, "icon": icon, + "public_access": public_access, + "default_action": net["default_action"], + "allowed_ips": net["allowed_ips"], + "allowed_vnets": net["allowed_vnets"], + "private_endpoints": net["private_endpoints"], + "extra": extra or {}, + } + + +def _stub_result(name: str, rg: str, rtype: str, icon: str) -> dict[str, Any]: + """Return a minimal audit dict for resources without CLI inspection.""" + return _base_result(name, rg, rtype, icon, True, + {"default_action": "Allow", "allowed_ips": [], + "allowed_vnets": [], "private_endpoints": []}) + + # ------------------------------------------------------------------ # Per-resource audit functions # ------------------------------------------------------------------ -def _audit_storage( - az: AzureCLI, rg: str, name: str, -) -> dict[str, Any] | None: +def _audit_storage(az: AzureCLI, rg: str, name: str) -> dict[str, Any] | None: info = az.json("storage", "account", "show", "--name", name, "--resource-group", rg) if not isinstance(info, dict): return None - props = info.get("properties") or info - net_rules = props.get("networkRuleSet") or props.get("networkAcls") or {} - default_action = net_rules.get("defaultAction") or "Allow" - ip_rules = net_rules.get("ipRules") or [] - vnet_rules = net_rules.get("virtualNetworkRules") or [] - allowed_ips = [r.get("value", r.get("ipAddressOrRange", "")) for r in ip_rules] - allowed_vnets = [r.get("id", "") for r in vnet_rules] - public_blob = props.get("allowBlobPublicAccess", True) - https_only = info.get("enableHttpsTrafficOnly", props.get("supportsHttpsTrafficOnly", True)) - min_tls = props.get("minimumTlsVersion", "TLS1_0") - private_eps = _get_private_endpoints(props) - - return { - "name": name, - "resource_group": rg, - "type": "Storage Account", - "icon": "storage", - "public_access": default_action == "Allow", - "default_action": default_action, - "allowed_ips": allowed_ips, - "allowed_vnets": allowed_vnets, - "private_endpoints": private_eps, - "https_only": https_only, - "min_tls_version": min_tls, - "extra": { - "public_blob_access": public_blob, - }, - } - - -def _audit_keyvault( - az: AzureCLI, rg: str, name: str, -) -> dict[str, Any] | None: + net = _parse_network(info, "networkRuleSet") + props = net["props"] + return _base_result(name, rg, "Storage Account", "storage", + net["default_action"] == "Allow", net, { + "public_blob_access": props.get("allowBlobPublicAccess", True), + "https_only": info.get("enableHttpsTrafficOnly", + props.get("supportsHttpsTrafficOnly", True)), + "min_tls_version": props.get("minimumTlsVersion", "TLS1_0"), + }) + + +def _audit_keyvault(az: AzureCLI, rg: str, name: str) -> dict[str, Any] | None: info = az.json("keyvault", "show", "--name", name, "--resource-group", rg) if not isinstance(info, dict): return None - props = info.get("properties") or info - net_acls = props.get("networkAcls") or {} - default_action = net_acls.get("defaultAction") or "Allow" - ip_rules = net_acls.get("ipRules") or [] - vnet_rules = net_acls.get("virtualNetworkRules") or [] - allowed_ips = [r.get("value", "") for r in ip_rules] - allowed_vnets = [r.get("id", "") for r in vnet_rules] - public_access = props.get("publicNetworkAccess", "Enabled") - private_eps = _get_private_endpoints(props) - rbac = props.get("enableRbacAuthorization", False) - soft_delete = props.get("enableSoftDelete", False) - purge_protect = props.get("enablePurgeProtection", False) - - return { - "name": name, - "resource_group": rg, - "type": "Key Vault", - "icon": "keyvault", - "public_access": public_access != "Disabled" and default_action == "Allow", - "default_action": default_action, - "allowed_ips": allowed_ips, - "allowed_vnets": allowed_vnets, - "private_endpoints": private_eps, - "extra": { - "public_network_access": public_access, - "rbac_authorization": rbac, - "soft_delete": soft_delete, - "purge_protection": purge_protect, - }, - } - - -def _audit_cognitive( - az: AzureCLI, rg: str, name: str, -) -> dict[str, Any] | None: - """Audit Azure OpenAI / Cognitive Services accounts.""" - info = az.json( - "cognitiveservices", "account", "show", - "--name", name, "--resource-group", rg, - ) + net = _parse_network(info) + props = net["props"] + pa = props.get("publicNetworkAccess", "Enabled") + return _base_result(name, rg, "Key Vault", "keyvault", + pa != "Disabled" and net["default_action"] == "Allow", net, { + "public_network_access": pa, + "rbac_authorization": props.get("enableRbacAuthorization", False), + "soft_delete": props.get("enableSoftDelete", False), + "purge_protection": props.get("enablePurgeProtection", False), + }) + + +def _audit_cognitive(az: AzureCLI, rg: str, name: str) -> dict[str, Any] | None: + info = az.json("cognitiveservices", "account", "show", + "--name", name, "--resource-group", rg) if not isinstance(info, dict): return None - props = info.get("properties") or info - net_acls = props.get("networkAcls") or {} - default_action = net_acls.get("defaultAction") or "Allow" - ip_rules = net_acls.get("ipRules") or [] - vnet_rules = net_acls.get("virtualNetworkRules") or [] - allowed_ips = [r.get("value", "") for r in ip_rules] - allowed_vnets = [r.get("id", "") for r in vnet_rules] - public_access = props.get("publicNetworkAccess", "Enabled") - private_eps = _get_private_endpoints(props) + net = _parse_network(info) + props = net["props"] + pa = props.get("publicNetworkAccess", "Enabled") kind = info.get("kind", "CognitiveServices") - endpoint = ( - props.get("endpoint") - or (props.get("endpoints") or {}).get("OpenAI Language Model Instance API", "") - ) - + endpoint = (props.get("endpoint") + or (props.get("endpoints") or {}).get( + "OpenAI Language Model Instance API", "")) label = "Azure OpenAI" if kind.lower() == "openai" else f"Cognitive Services ({kind})" + return _base_result(name, rg, label, "ai", + pa != "Disabled" and net["default_action"] == "Allow", net, { + "public_network_access": pa, "kind": kind, "endpoint": endpoint, + }) - return { - "name": name, - "resource_group": rg, - "type": label, - "icon": "ai", - "public_access": public_access != "Disabled" and default_action == "Allow", - "default_action": default_action, - "allowed_ips": allowed_ips, - "allowed_vnets": allowed_vnets, - "private_endpoints": private_eps, - "extra": { - "public_network_access": public_access, - "kind": kind, - "endpoint": endpoint, - }, - } - -def _audit_search( - az: AzureCLI, rg: str, name: str, -) -> dict[str, Any] | None: - """Audit Azure AI Search service.""" - info = az.json( - "search", "service", "show", - "--name", name, "--resource-group", rg, - ) +def _audit_search(az: AzureCLI, rg: str, name: str) -> dict[str, Any] | None: + info = az.json("search", "service", "show", + "--name", name, "--resource-group", rg) if not isinstance(info, dict): return None - props = info.get("properties") or info - public_access = props.get("publicNetworkAccess", "enabled") - ip_rules = (props.get("networkRuleSet") or {}).get("ipRules") or [] - allowed_ips = [r.get("value", "") for r in ip_rules] - private_eps = _get_private_endpoints(props) - - return { - "name": name, - "resource_group": rg, - "type": "Azure AI Search", - "icon": "search", - "public_access": public_access.lower() != "disabled", - "default_action": "Allow" if public_access.lower() != "disabled" else "Deny", - "allowed_ips": allowed_ips, - "allowed_vnets": [], - "private_endpoints": private_eps, - "extra": { - "public_network_access": public_access, - "sku": info.get("sku", {}).get("name", ""), - }, - } + net = _parse_network(info) + pa = net["public_access_field"] + public = pa.lower() != "disabled" if isinstance(pa, str) else bool(pa) + return _base_result(name, rg, "Azure AI Search", "search", public, net, { + "public_network_access": pa, + "sku": info.get("sku", {}).get("name", ""), + }) -def _audit_acr( - az: AzureCLI, rg: str, name: str, -) -> dict[str, Any] | None: +def _audit_acr(az: AzureCLI, rg: str, name: str) -> dict[str, Any] | None: info = az.json("acr", "show", "--name", name, "--resource-group", rg) if not isinstance(info, dict): return None - public_access = info.get("publicNetworkAccess", "Enabled") - net_rules = info.get("networkRuleSet") or {} - default_action = net_rules.get("defaultAction") or "Allow" - ip_rules = net_rules.get("ipRules") or [] - allowed_ips = [r.get("value", "") for r in ip_rules] - admin_enabled = info.get("adminUserEnabled", False) - - return { - "name": name, - "resource_group": rg, - "type": "Container Registry", - "icon": "acr", - "public_access": public_access == "Enabled", - "default_action": default_action, - "allowed_ips": allowed_ips, - "allowed_vnets": [], - "private_endpoints": [], - "extra": { - "admin_user_enabled": admin_enabled, - "sku": info.get("sku", {}).get("name", ""), - }, - } + net = _parse_network(info) + pa = info.get("publicNetworkAccess", "Enabled") + return _base_result(name, rg, "Container Registry", "acr", pa == "Enabled", net, { + "admin_user_enabled": info.get("adminUserEnabled", False), + "sku": info.get("sku", {}).get("name", ""), + }) def _audit_session_pool(rg: str, name: str, **_kw: Any) -> dict[str, Any]: - """Audit Azure Container Apps session pool.""" - return { - "name": name, - "resource_group": rg, - "type": "Session Pool", - "icon": "sandbox", - "public_access": True, - "default_action": "Allow", - "allowed_ips": [], - "allowed_vnets": [], - "private_endpoints": [], - "extra": {}, - } + return _stub_result(name, rg, "Session Pool", "sandbox") def _audit_acs(rg: str, name: str, **_kw: Any) -> dict[str, Any]: - """Audit Azure Communication Services.""" - return { - "name": name, - "resource_group": rg, - "type": "Communication Services", - "icon": "communication", - "public_access": True, - "default_action": "Allow", - "allowed_ips": [], - "allowed_vnets": [], - "private_endpoints": [], - "extra": {}, - } - - -def _get_private_endpoints(props: dict[str, Any]) -> list[str]: - """Extract private endpoint names from a resource's properties.""" - pe_conns = props.get("privateEndpointConnections", []) - results: list[str] = [] - for pec in pe_conns: - pe = pec.get("privateEndpoint", {}) - pe_id = pe.get("id", "") - if pe_id: - results.append(pe_id.rsplit("/", 1)[-1]) - return results + return _stub_result(name, rg, "Communication Services", "communication") # Populate the dispatch table now that all audit functions are defined. diff --git a/app/runtime/server/routes/plugin_routes.py b/app/runtime/server/routes/plugin_routes.py index 9236f27..f68c2ac 100644 --- a/app/runtime/server/routes/plugin_routes.py +++ b/app/runtime/server/routes/plugin_routes.py @@ -6,13 +6,13 @@ import json import logging import zipfile -from pathlib import Path from aiohttp import web from ...config.settings import cfg from ...registries.plugins import PluginRegistry from ...state.plugin_config import PluginConfigStore +from ._helpers import error_response, ok_response logger = logging.getLogger(__name__) @@ -39,68 +39,48 @@ def register(self, router: web.UrlDispatcher) -> None: router.add_delete("/api/plugins/{plugin_id}", self._remove) async def _list(self, _req: web.Request) -> web.Response: - return web.json_response({"status": "ok", "plugins": self._registry.list_plugins()}) + return ok_response(plugins=self._registry.list_plugins()) async def _get(self, req: web.Request) -> web.Response: plugin_id = req.match_info["plugin_id"] plugin = self._registry.get_plugin(plugin_id) if not plugin: - return web.json_response( - {"status": "error", "message": "Plugin not found"}, status=404 - ) + return error_response("Plugin not found", status=404) return web.json_response(plugin) async def _enable(self, req: web.Request) -> web.Response: plugin_id = req.match_info["plugin_id"] result = self._registry.enable_plugin(plugin_id) if not result: - return web.json_response( - {"status": "error", "message": "Plugin not found"}, status=404 - ) - return web.json_response({ - "status": "ok", - "message": f"Plugin '{result['name']}' enabled", - "plugin": result, - }) + return error_response("Plugin not found", status=404) + return ok_response(message=f"Plugin '{result['name']}' enabled", plugin=result) async def _disable(self, req: web.Request) -> web.Response: plugin_id = req.match_info["plugin_id"] result = self._registry.disable_plugin(plugin_id) if not result: - return web.json_response( - {"status": "error", "message": "Plugin not found"}, status=404 - ) - return web.json_response({ - "status": "ok", - "message": f"Plugin '{result['name']}' disabled", - "plugin": result, - }) + return error_response("Plugin not found", status=404) + return ok_response(message=f"Plugin '{result['name']}' disabled", plugin=result) async def _setup_content(self, req: web.Request) -> web.Response: plugin_id = req.match_info["plugin_id"] manifest = self._registry.get_manifest(plugin_id) if not manifest: - return web.json_response( - {"status": "error", "message": "Plugin not found"}, status=404 - ) + return error_response("Plugin not found", status=404) setup_md = manifest.setup_message or "No setup instructions available." - return web.json_response({"status": "ok", "content": setup_md}) + return ok_response(content=setup_md) async def _complete_setup(self, req: web.Request) -> web.Response: plugin_id = req.match_info["plugin_id"] if not self._registry.get_manifest(plugin_id): - return web.json_response( - {"status": "error", "message": "Plugin not found"}, status=404 - ) + return error_response("Plugin not found", status=404) self._config.mark_setup_completed(plugin_id) - return web.json_response({"status": "ok"}) + return ok_response() async def _import_zip(self, req: web.Request) -> web.Response: data = await req.read() if not data: - return web.json_response( - {"status": "error", "message": "Empty body"}, status=400 - ) + return error_response("Empty body") try: with zipfile.ZipFile(io.BytesIO(data)) as zf: names = zf.namelist() @@ -108,34 +88,23 @@ async def _import_zip(self, req: web.Request) -> web.Response: (n for n in names if n.endswith("manifest.json")), None ) if not manifest_name: - return web.json_response( - {"status": "error", "message": "No manifest.json found"}, - status=400, - ) + return error_response("No manifest.json found") manifest_data = json.loads(zf.read(manifest_name)) plugin_id = manifest_data.get("id", "") if not plugin_id: - return web.json_response( - {"status": "error", "message": "manifest.json missing 'id'"}, - status=400, - ) + return error_response("manifest.json missing 'id'") dest = cfg.plugins_dir / plugin_id dest.mkdir(parents=True, exist_ok=True) zf.extractall(dest) except (zipfile.BadZipFile, json.JSONDecodeError, KeyError) as exc: - return web.json_response( - {"status": "error", "message": f"Invalid plugin archive: {exc}"}, - status=400, - ) + return error_response(f"Invalid plugin archive: {exc}") self._registry.refresh() - return web.json_response({"status": "ok", "plugin_id": plugin_id}) + return ok_response(plugin_id=plugin_id) async def _remove(self, req: web.Request) -> web.Response: plugin_id = req.match_info["plugin_id"] if not self._registry.get_manifest(plugin_id): - return web.json_response( - {"status": "error", "message": "Plugin not found"}, status=404 - ) + return error_response("Plugin not found", status=404) self._config.reset(plugin_id) - return web.json_response({"status": "ok"}) + return ok_response() diff --git a/app/runtime/server/routes/profile_routes.py b/app/runtime/server/routes/profile_routes.py index 0893dfc..bccf5e7 100644 --- a/app/runtime/server/routes/profile_routes.py +++ b/app/runtime/server/routes/profile_routes.py @@ -5,6 +5,7 @@ from aiohttp import web from ...state.profile import get_full_profile, load_profile, save_profile +from ._helpers import ok_response, parse_json class ProfileRoutes: @@ -18,7 +19,7 @@ async def _get(self, _req: web.Request) -> web.Response: return web.json_response(get_full_profile()) async def _update(self, req: web.Request) -> web.Response: - data = await req.json() + data = await parse_json(req) current = load_profile() for key in ("name", "emoji", "location", "emotional_state"): if key in data: @@ -26,4 +27,4 @@ async def _update(self, req: web.Request) -> web.Response: if "preferences" in data and isinstance(data["preferences"], dict): current["preferences"].update(data["preferences"]) save_profile(current) - return web.json_response({"status": "ok", "message": "Profile updated"}) + return ok_response(message="Profile updated") diff --git a/app/runtime/server/routes/sandbox_routes.py b/app/runtime/server/routes/sandbox_routes.py index 0c93ab3..ac71d3f 100644 --- a/app/runtime/server/routes/sandbox_routes.py +++ b/app/runtime/server/routes/sandbox_routes.py @@ -3,7 +3,6 @@ from __future__ import annotations import logging -import secrets as _secrets from typing import Any from aiohttp import web @@ -14,7 +13,9 @@ from ...state.deploy_state import DeployStateStore from ...state.sandbox_config import BLACKLIST, DEFAULT_WHITELIST, SandboxConfigStore from ...util.async_helpers import run_sync -from ._helpers import fail_response as _fail_response, no_az as _no_az +from ._helpers import api_handler, error_response, ok_response, parse_json +from ._helpers import fail_response as _fail_response +from ._helpers import no_az as _no_az logger = logging.getLogger(__name__) @@ -57,15 +58,11 @@ async def get_config(self, _req: web.Request) -> web.Response: "in parallel. Data is synced back on session teardown using " "last-writer-wins -- concurrent changes will be overwritten.", ] - return web.json_response({"status": "ok", **data}) + return ok_response(**data) + @api_handler async def update_config(self, req: web.Request) -> web.Response: - try: - body = await req.json() - except Exception: - return web.json_response( - {"status": "error", "message": "Invalid JSON"}, status=400 - ) + body = await parse_json(req) if "enabled" in body: self._store.set_enabled(bool(body["enabled"])) @@ -77,33 +74,24 @@ async def update_config(self, req: web.Request) -> web.Response: if "whitelist" in body: wl = body["whitelist"] if not isinstance(wl, list): - return web.json_response( - {"status": "error", "message": "whitelist must be a list"}, - status=400, - ) + return error_response("whitelist must be a list") self._store.set_whitelist(wl) if "add_whitelist" in body: item = str(body["add_whitelist"]) if not self._store.add_whitelist_item(item): - return web.json_response( - {"status": "error", "message": f"'{item}' is blacklisted"}, - status=400, - ) + return error_response(f"'{item}' is blacklisted") if "remove_whitelist" in body: self._store.remove_whitelist_item(str(body["remove_whitelist"])) if body.get("reset_whitelist"): self._store.reset_whitelist() - return web.json_response({"status": "ok", **self._store.to_dict()}) + return ok_response(**self._store.to_dict()) async def test_sandbox(self, req: web.Request) -> web.Response: if not self._store.session_pool_endpoint: - return web.json_response( - {"status": "error", "message": "Session pool endpoint not configured"}, - status=400, - ) + return error_response("Session pool endpoint not configured") try: body = await req.json() except Exception: @@ -131,13 +119,12 @@ async def provision_pool(self, req: web.Request) -> web.Response: rg = body.get("resource_group", "").strip() or _DEFAULT_SANDBOX_RG if self._store.is_provisioned: - return web.json_response({ - "status": "ok", - "message": f"Already provisioned: {self._store.pool_name}", - "steps": [], + return ok_response( + message=f"Already provisioned: {self._store.pool_name}", + steps=[], **self._store.to_dict(), - "is_provisioned": True, - }) + is_provisioned=True, + ) bicep_req = BicepDeployRequest( resource_group=rg, @@ -163,21 +150,18 @@ async def provision_pool(self, req: web.Request) -> web.Response: }) logger.info("Sandbox pool provisioned (Bicep): %s (rg=%s)", result.session_pool_name, rg) - return web.json_response({ - "status": "ok", - "message": f"Session pool '{result.session_pool_name}' provisioned", - "steps": result.steps, + return ok_response( + message=f"Session pool '{result.session_pool_name}' provisioned", + steps=result.steps, **self._store.to_dict(), - "is_provisioned": True, - }) + is_provisioned=True, + ) async def remove_pool(self, _req: web.Request) -> web.Response: if not self._az: return _no_az() if not self._store.is_provisioned: - return web.json_response( - {"status": "error", "message": "No pool provisioned"}, status=400 - ) + return error_response("No pool provisioned") steps: list[dict[str, Any]] = [] pool_name = self._store.pool_name @@ -222,10 +206,9 @@ async def remove_pool(self, _req: web.Request) -> web.Response: }) logger.info("Sandbox pool removed: %s", pool_name) - return web.json_response({ - "status": "ok", - "message": f"Session pool '{pool_name}' removed", - "steps": steps, + return ok_response( + message=f"Session pool '{pool_name}' removed", + steps=steps, **self._store.to_dict(), - "is_provisioned": False, - }) + is_provisioned=False, + ) diff --git a/app/runtime/server/routes/scheduler_routes.py b/app/runtime/server/routes/scheduler_routes.py index 005925d..0b1478e 100644 --- a/app/runtime/server/routes/scheduler_routes.py +++ b/app/runtime/server/routes/scheduler_routes.py @@ -7,6 +7,7 @@ from aiohttp import web from ...scheduler import Scheduler +from ._helpers import api_handler, error_response, ok_response, parse_json class SchedulerRoutes: @@ -25,46 +26,34 @@ async def _list(self, _req: web.Request) -> web.Response: tasks = self._scheduler.list_tasks() return web.json_response([asdict(t) for t in tasks]) + @api_handler async def _create(self, req: web.Request) -> web.Response: - data = await req.json() - try: - task = self._scheduler.add( - description=data.get("description") or data.get("name", ""), - prompt=data.get("prompt", ""), - cron=data.get("cron") or data.get("schedule"), - run_at=data.get("run_at"), - ) - return web.json_response({"status": "ok", "task": asdict(task)}) - except ValueError as exc: - return web.json_response( - {"status": "error", "message": str(exc)}, status=400 - ) - + data = await parse_json(req) + task = self._scheduler.add( + description=data.get("description") or data.get("name", ""), + prompt=data.get("prompt", ""), + cron=data.get("cron") or data.get("schedule"), + run_at=data.get("run_at"), + ) + return ok_response(task=asdict(task)) + + @api_handler async def _update(self, req: web.Request) -> web.Response: task_id = req.match_info["task_id"] - data = await req.json() + data = await parse_json(req) # Normalise frontend field aliases if "schedule" in data and "cron" not in data: data["cron"] = data.pop("schedule") if "name" in data and "description" not in data: data["description"] = data.pop("name") - try: - task = self._scheduler.update(task_id, **data) - except ValueError as exc: - return web.json_response( - {"status": "error", "message": str(exc)}, status=400 - ) + task = self._scheduler.update(task_id, **data) if not task: - return web.json_response( - {"status": "error", "message": "Task not found"}, status=404 - ) - return web.json_response({"status": "ok", "task": asdict(task)}) + return error_response("Task not found", status=404) + return ok_response(task=asdict(task)) async def _delete(self, req: web.Request) -> web.Response: task_id = req.match_info["task_id"] removed = self._scheduler.remove(task_id) if not removed: - return web.json_response( - {"status": "error", "message": "Task not found"}, status=404 - ) - return web.json_response({"status": "ok"}) + return error_response("Task not found", status=404) + return ok_response() diff --git a/app/runtime/server/routes/session_routes.py b/app/runtime/server/routes/session_routes.py index 1d3f79d..21c6bb5 100644 --- a/app/runtime/server/routes/session_routes.py +++ b/app/runtime/server/routes/session_routes.py @@ -5,6 +5,7 @@ from aiohttp import web from ...state.session_store import ARCHIVAL_OPTIONS, SessionStore +from ._helpers import error_response, ok_response, parse_json class SessionRoutes: @@ -29,23 +30,19 @@ async def _get(self, req: web.Request) -> web.Response: session_id = req.match_info["session_id"] data = self._store.get_session(session_id) if not data: - return web.json_response( - {"status": "error", "message": "Session not found"}, status=404 - ) + return error_response("Session not found", status=404) return web.json_response(data) async def _delete(self, req: web.Request) -> web.Response: session_id = req.match_info["session_id"] removed = self._store.delete_session(session_id) if not removed: - return web.json_response( - {"status": "error", "message": "Session not found"}, status=404 - ) - return web.json_response({"status": "ok"}) + return error_response("Session not found", status=404) + return ok_response() async def _clear(self, _req: web.Request) -> web.Response: count = self._store.clear_all() - return web.json_response({"status": "ok", "deleted": count}) + return ok_response(deleted=count) async def _stats(self, _req: web.Request) -> web.Response: return web.json_response(self._store.get_session_stats()) @@ -57,15 +54,11 @@ async def _get_policy(self, _req: web.Request) -> web.Response: }) async def _set_policy(self, req: web.Request) -> web.Response: - body = await req.json() + body = await parse_json(req) policy = body.get("policy", "") if policy not in ARCHIVAL_OPTIONS: - return web.json_response( - { - "status": "error", - "message": f"Invalid policy. Valid: {list(ARCHIVAL_OPTIONS.keys())}", - }, - status=400, + return error_response( + f"Invalid policy. Valid: {list(ARCHIVAL_OPTIONS.keys())}", ) self._store.set_archival_policy(policy) stats = self._store.get_session_stats() diff --git a/app/runtime/server/routes/tool_activity_routes.py b/app/runtime/server/routes/tool_activity_routes.py index c1c16f8..3a09c21 100644 --- a/app/runtime/server/routes/tool_activity_routes.py +++ b/app/runtime/server/routes/tool_activity_routes.py @@ -8,6 +8,7 @@ from ...state.session_store import SessionStore from ...state.tool_activity_store import ToolActivityStore +from ._helpers import api_handler, error_response, ok_response, parse_json logger = logging.getLogger(__name__) @@ -49,42 +50,37 @@ async def _list(self, req: web.Request) -> web.Response: limit=min(int(params.get("limit", "200")), 1000), offset=int(params.get("offset", "0")), ) - return web.json_response({"status": "ok", **result}) + return ok_response(**result) async def _summary(self, _req: web.Request) -> web.Response: """Return aggregate statistics.""" summary = self._store.get_summary() - return web.json_response({"status": "ok", **summary}) + return ok_response(**summary) async def _get(self, req: web.Request) -> web.Response: """Get a single tool activity entry.""" entry_id = req.match_info["entry_id"] entry = self._store.get_entry(entry_id) if not entry: - return web.json_response( - {"status": "error", "message": "Entry not found"}, status=404, - ) - return web.json_response({"status": "ok", "entry": entry}) + return error_response("Entry not found", status=404) + return ok_response(entry=entry) + @api_handler async def _flag(self, req: web.Request) -> web.Response: """Manually flag an entry as suspicious.""" entry_id = req.match_info["entry_id"] - body = await req.json() + body = await parse_json(req) reason = body.get("reason", "") if self._store.flag_entry(entry_id, reason): - return web.json_response({"status": "ok"}) - return web.json_response( - {"status": "error", "message": "Entry not found"}, status=404, - ) + return ok_response() + return error_response("Entry not found", status=404) async def _unflag(self, req: web.Request) -> web.Response: """Remove flag from an entry.""" entry_id = req.match_info["entry_id"] if self._store.unflag_entry(entry_id): - return web.json_response({"status": "ok"}) - return web.json_response( - {"status": "error", "message": "Entry not found"}, status=404, - ) + return ok_response() + return error_response("Entry not found", status=404) async def _timeline(self, req: web.Request) -> web.Response: """Return time-bucketed tool activity data.""" @@ -94,12 +90,12 @@ async def _timeline(self, req: web.Request) -> web.Response: since=float(params.get("since", "0")), until=float(params.get("until", "0")), ) - return web.json_response({"status": "ok", "buckets": data}) + return ok_response(buckets=data) async def _sessions_breakdown(self, _req: web.Request) -> web.Response: """Return per-session aggregation.""" data = self._store.get_session_breakdown() - return web.json_response({"status": "ok", "sessions": data}) + return ok_response(sessions=data) async def _export(self, req: web.Request) -> web.Response: """Export tool activity as CSV.""" @@ -122,4 +118,4 @@ async def _export(self, req: web.Request) -> web.Response: async def _import(self, _req: web.Request) -> web.Response: """Backfill tool activity from existing session history.""" count = self._store.import_from_sessions(self._sessions) - return web.json_response({"status": "ok", "imported": count}) + return ok_response(imported=count) diff --git a/app/runtime/server/setup/_helpers.py b/app/runtime/server/setup/_helpers.py index 6a0fe99..b6c4aba 100644 --- a/app/runtime/server/setup/_helpers.py +++ b/app/runtime/server/setup/_helpers.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import Any + from aiohttp import web @@ -13,3 +15,19 @@ def ok_response(message: str) -> web.Response: def error_response(message: str, status: int = 500) -> web.Response: """Return a standard error response.""" return web.json_response({"status": "error", "message": message}, status=status) + + +def fail_response( + steps: list[dict[str, Any]], + prefix: str = "Failed", + *, + key: str = "detail", + status: int = 500, +) -> web.Response: + """Return an error response with the first failed step's detail.""" + failed = [s for s in steps if s.get("status") == "failed"] + msg = failed[0].get(key, "Unknown") if failed else "Unknown" + return web.json_response( + {"status": "error", "steps": steps, "message": f"{prefix}: {msg}"}, + status=status, + ) diff --git a/app/runtime/server/setup/_routes.py b/app/runtime/server/setup/_routes.py index c617399..5e840e0 100644 --- a/app/runtime/server/setup/_routes.py +++ b/app/runtime/server/setup/_routes.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import logging import os import subprocess @@ -19,14 +20,15 @@ from ...state.deploy_state import DeployStateStore from ...state.infra_config import InfraConfigStore from ...util.async_helpers import run_sync +from ..smoke_test import SmokeTestRunner +from ._helpers import error_response as _error +from ._helpers import ok_response as _ok from .azure import AzureSetupRoutes -from ._helpers import error_response as _error, ok_response as _ok from .deploy import DeploymentRoutes from .foundry import FoundryDeployRoutes from .preflight import PreflightRoutes from .prerequisites import PrerequisitesRoutes from .voice import VoiceSetupRoutes -from ..smoke_test import SmokeTestRunner logger = logging.getLogger(__name__) @@ -153,67 +155,19 @@ async def copilot_set_token(self, req: web.Request) -> web.Response: return _error("GitHub token is no longer used. Configure FOUNDRY_ENDPOINT instead.", 410) async def _restart_runtime(self) -> None: - """Restart or reload the runtime container. - - Docker mode: full ``docker restart`` so the entrypoint re-runs - ``az login --service-principal`` with the SP credentials from - ``/data/.env``. A soft reload cannot replicate this. - - Non-Docker: HTTP POST to ``/api/internal/reload``. - """ + """Restart or reload the runtime container.""" runtime_url = os.getenv("RUNTIME_URL", "") if not runtime_url or cfg.server_mode == ServerMode.combined: return - # Docker mode -- hard restart (re-runs entrypoint + az login) - # Falls back to ``docker compose up -d runtime`` when the container - # does not exist yet (first provision). if os.getenv("POLYCLAW_CONTAINER") == "1": - try: - proc = await run_sync( - subprocess.run, - ["docker", "restart", "polyclaw-runtime"], - capture_output=True, text=True, timeout=60, - ) - if proc.returncode == 0: - logger.info("[setup.restart_runtime] docker restart succeeded") - else: - stderr = proc.stderr.strip() - logger.warning( - "[setup.restart_runtime] docker restart failed: %s", stderr, - ) - # Container doesn't exist yet -- start it via compose - if "No such container" in stderr or "not found" in stderr.lower(): - logger.info( - "[setup.restart_runtime] container missing, " - "attempting docker compose up -d runtime", - ) - up = await run_sync( - subprocess.run, - ["docker", "compose", "up", "-d", "runtime"], - capture_output=True, text=True, timeout=120, - ) - if up.returncode == 0: - logger.info( - "[setup.restart_runtime] compose up succeeded", - ) - else: - logger.warning( - "[setup.restart_runtime] compose up failed: %s", - up.stderr.strip(), - ) - except Exception as exc: - logger.warning( - "[setup.restart_runtime] docker restart error: %s", - exc, exc_info=True, - ) + await self._docker_restart() return url = f"{runtime_url.rstrip('/')}/api/internal/reload" headers: dict[str, str] = {} if cfg.admin_secret: headers["Authorization"] = f"Bearer {cfg.admin_secret}" - try: async with _aiohttp.ClientSession() as session: async with session.post( @@ -231,6 +185,66 @@ async def _restart_runtime(self) -> None: exc, exc_info=True, ) + @staticmethod + async def _docker_restart() -> None: + """Hard-restart the runtime container, falling back to compose up. + + After restart, waits for the runtime health endpoint to confirm + the container is alive with valid credentials. + """ + try: + proc = await run_sync( + subprocess.run, + ["docker", "restart", "polyclaw-runtime"], + capture_output=True, text=True, timeout=60, + ) + if proc.returncode == 0: + logger.info("[setup.restart_runtime] docker restart succeeded") + else: + stderr = proc.stderr.strip() + logger.warning("[setup.restart_runtime] docker restart failed: %s", stderr) + if "No such container" not in stderr and "not found" not in stderr.lower(): + return + logger.info("[setup.restart_runtime] container missing, trying compose up") + up = await run_sync( + subprocess.run, + ["docker", "compose", "up", "-d", "runtime"], + capture_output=True, text=True, timeout=120, + ) + if up.returncode == 0: + logger.info("[setup.restart_runtime] compose up succeeded") + else: + logger.warning( + "[setup.restart_runtime] compose up failed: %s", + up.stderr.strip(), + ) + return + + # Wait for the runtime to become healthy (up to 30 s). + runtime_url = os.getenv("RUNTIME_URL", "http://runtime:8080") + health_url = f"{runtime_url.rstrip('/')}/health" + for attempt in range(15): + await asyncio.sleep(2) + try: + async with _aiohttp.ClientSession() as session: + async with session.get( + health_url, + timeout=_aiohttp.ClientTimeout(total=3), + ) as resp: + if resp.status == 200: + logger.info( + "[setup.restart_runtime] runtime healthy after %ds", + (attempt + 1) * 2, + ) + return + except Exception: + pass + logger.warning("[setup.restart_runtime] runtime did not become healthy in 30s") + except Exception as exc: + logger.warning( + "[setup.restart_runtime] docker restart error: %s", exc, exc_info=True, + ) + async def smoke_test(self, _req: web.Request) -> web.Response: runner = SmokeTestRunner(self._gh) result = await runner.run() @@ -386,7 +400,9 @@ async def save_configuration(self, req: web.Request) -> web.Response: }) try: - migrated = self._prerequisites_routes._migrate_existing_secrets() + migrated = await run_sync( + self._prerequisites_routes._migrate_existing_secrets, + ) if migrated: steps.append({ "step": "migrate_env", "status": "ok", diff --git a/app/runtime/server/setup/azure.py b/app/runtime/server/setup/azure.py index 66e1dea..653c9d8 100644 --- a/app/runtime/server/setup/azure.py +++ b/app/runtime/server/setup/azure.py @@ -26,15 +26,20 @@ def register(self, router: web.UrlDispatcher) -> None: router.add_post("/api/setup/azure/subscription", self.set_subscription) router.add_get("/api/setup/azure/resource-groups", self.list_resource_groups) - async def azure_login(self, _req: web.Request) -> web.Response: + def _account_status(self) -> tuple[dict | None, bool]: + """Return (account, needs_subscription).""" account = self._az.account_info() - if account and not account.get("_no_default_subscription"): + return account, bool(account and account.get("_no_default_subscription")) + + async def azure_login(self, _req: web.Request) -> web.Response: + account, needs_sub = self._account_status() + if account and not needs_sub: return web.json_response({ "status": "already_logged_in", "user": account.get("user", {}).get("name"), "subscription": account.get("name"), }) - if account and account.get("_no_default_subscription"): + if needs_sub: return web.json_response({ "status": "needs_subscription", "message": "Logged in but no default subscription. Please select one.", @@ -43,14 +48,14 @@ async def azure_login(self, _req: web.Request) -> web.Response: return web.json_response({"status": "device_code_pending", **info}) async def azure_check(self, _req: web.Request) -> web.Response: - account = self._az.account_info() - if account and not account.get("_no_default_subscription"): + account, needs_sub = self._account_status() + if account and not needs_sub: return web.json_response({ "status": "logged_in", "user": account.get("user", {}).get("name"), "subscription": account.get("name"), }) - if account and account.get("_no_default_subscription"): + if needs_sub: return web.json_response({"status": "needs_subscription"}) return web.json_response({"status": "pending"}) diff --git a/app/runtime/server/setup/foundry.py b/app/runtime/server/setup/foundry.py index 1de40b1..f9137be 100644 --- a/app/runtime/server/setup/foundry.py +++ b/app/runtime/server/setup/foundry.py @@ -13,12 +13,11 @@ from aiohttp import web -from ...config.settings import cfg from ...services.cloud.azure import AzureCLI from ...services.deployment.bicep_deployer import BicepDeployer, BicepDeployRequest +from ...services.keyvault import kv as _kv from ...state.deploy_state import DeployStateStore from ...util.async_helpers import run_sync -from ._helpers import error_response as _error, ok_response as _ok logger = logging.getLogger(__name__) @@ -42,13 +41,9 @@ def register(self, router: web.UrlDispatcher) -> None: router.add_get("/api/setup/foundry/deploy/stream", self.foundry_deploy_stream) router.add_post("/api/setup/foundry/decommission", self.foundry_decommission) - async def foundry_status(self, _req: web.Request) -> web.Response: - status = self._deployer.status() - return web.json_response(status) - - async def foundry_deploy(self, req: web.Request) -> web.Response: - body = await req.json() if req.can_read_body else {} - deploy_req = BicepDeployRequest( + @staticmethod + def _build_request(body: dict[str, Any]) -> BicepDeployRequest: + req = BicepDeployRequest( resource_group=body.get("resource_group", "polyclaw-rg"), location=body.get("location", "eastus"), base_name=body.get("base_name", ""), @@ -61,10 +56,22 @@ async def foundry_deploy(self, req: web.Request) -> web.Response: deploy_session_pool=body.get("deploy_session_pool", False), ) if body.get("models"): - deploy_req.models = body["models"] + req.models = body["models"] + return req + + async def foundry_status(self, _req: web.Request) -> web.Response: + status = self._deployer.status() + return web.json_response(status) + + async def foundry_deploy(self, req: web.Request) -> web.Response: + body = await req.json() if req.can_read_body else {} + deploy_req = self._build_request(body) result = await run_sync(self._deployer.deploy, deploy_req) + if result.ok and result.key_vault_url: + _kv.reinit() + if result.ok and self._restart_runtime: try: await self._restart_runtime() @@ -95,20 +102,7 @@ async def foundry_deploy_stream(self, req: web.Request) -> web.StreamResponse: except json.JSONDecodeError: body = {} - deploy_req = BicepDeployRequest( - resource_group=body.get("resource_group", "polyclaw-rg"), - location=body.get("location", "eastus"), - base_name=body.get("base_name", ""), - deploy_key_vault=body.get("deploy_key_vault", True), - deploy_acs=body.get("deploy_acs", False), - deploy_content_safety=body.get("deploy_content_safety", False), - deploy_search=body.get("deploy_search", False), - deploy_embedding_aoai=body.get("deploy_embedding_aoai", False), - deploy_monitoring=body.get("deploy_monitoring", False), - deploy_session_pool=body.get("deploy_session_pool", False), - ) - if body.get("models"): - deploy_req.models = body["models"] + deploy_req = self._build_request(body) resp = web.StreamResponse( status=200, @@ -151,6 +145,9 @@ async def _run_deploy() -> Any: result = await task + if result.ok and result.key_vault_url: + _kv.reinit() + if result.ok and self._restart_runtime: try: await self._restart_runtime() diff --git a/app/runtime/server/setup/preflight.py b/app/runtime/server/setup/preflight.py index fcd9830..f0ccc6b 100644 --- a/app/runtime/server/setup/preflight.py +++ b/app/runtime/server/setup/preflight.py @@ -15,6 +15,15 @@ logger = logging.getLogger(__name__) +def _summarize( + sub: list[dict[str, Any]], ok_msg: str, +) -> tuple[bool, str, list[dict[str, Any]]]: + """Return (all_ok, detail, sub) with a summary of failed check names.""" + all_ok = all(s["ok"] for s in sub) + detail = ok_msg if all_ok else "Issues: %s" % ", ".join(s["name"] for s in sub if not s["ok"]) + return all_ok, detail, sub + + class PreflightRoutes: """/api/setup/preflight endpoint and sub-checks.""" @@ -129,19 +138,14 @@ async def _check_acs_config( "detail": "ACS_CONNECTION_STRING not set", }) - src = cfg.acs_source_number - sub.append({ - "name": "acs_source_number", - "ok": bool(src), - "detail": f"Source: {src}" if src else "ACS_SOURCE_NUMBER not set", - }) - - tgt = cfg.voice_target_number - sub.append({ - "name": "voice_target_number", - "ok": bool(tgt), - "detail": f"Target: {tgt}" if tgt else "VOICE_TARGET_NUMBER not set", - }) + for name, val, label in ( + ("acs_source_number", cfg.acs_source_number, "Source"), + ("voice_target_number", cfg.voice_target_number, "Target"), + ): + sub.append({ + "name": name, "ok": bool(val), + "detail": f"{label}: {val}" if val else f"{name.upper()} not set", + }) aoai = cfg.azure_openai_endpoint if aoai: @@ -180,10 +184,7 @@ async def _check_acs_config( "detail": f"Deployment: {dep}" if dep else "Not set", }) - all_ok = all(s["ok"] for s in sub) - issues = [s["name"] for s in sub if not s["ok"]] - detail = "ACS voice OK" if all_ok else f"Issues: {', '.join(issues)}" - return all_ok, detail, sub + return _summarize(sub, "ACS voice OK") async def _check_acs_callback_security( self, *, voice_routes_active: bool = False, @@ -275,11 +276,7 @@ async def _check_acs_callback_security( "detail": f"Resource ID: {res_id}" if res_id else "Cannot derive", }) - all_ok = all(s["ok"] for s in sub) - detail = "ACS callback security OK" if all_ok else ( - f"Issues: {', '.join(s['name'] for s in sub if not s['ok'])}" - ) - return all_ok, detail, sub + return _summarize(sub, "ACS callback security OK") async def _check_jwt_validation(self) -> tuple[bool, str]: url = f"http://127.0.0.1:{cfg.admin_port}/api/messages" @@ -408,29 +405,19 @@ async def _check_telegram_security( "detail": f"Valid -- @{username}", }) - can_join = info.get("can_join_groups", False) - sub.append({ - "name": "groups", - "ok": not can_join, - "detail": "Cannot join groups" if not can_join - else "Can join groups (risk)", - }) - - can_read = info.get("can_read_all_group_messages", False) - sub.append({ - "name": "group_privacy", - "ok": not can_read, - "detail": "Privacy mode on" if not can_read - else "Reads ALL group messages", - }) - - inline = info.get("supports_inline_queries", False) - sub.append({ - "name": "inline", - "ok": not inline, - "detail": "Inline disabled" if not inline - else "Inline enabled (risk)", - }) + for key, name, off_msg, on_msg in ( + ("can_join_groups", "groups", + "Cannot join groups", "Can join groups (risk)"), + ("can_read_all_group_messages", "group_privacy", + "Privacy mode on", "Reads ALL group messages"), + ("supports_inline_queries", "inline", + "Inline disabled", "Inline enabled (risk)"), + ): + val = info.get(key, False) + sub.append({ + "name": name, "ok": not val, + "detail": off_msg if not val else on_msg, + }) else: sub.append({ "name": "token_valid", "ok": False, @@ -442,7 +429,4 @@ async def _check_telegram_security( "detail": f"Cannot reach API: {exc}", }) - all_ok = all(s["ok"] for s in sub) - issues = [s["name"] for s in sub if not s["ok"]] - detail = "Telegram security OK" if all_ok else f"Issues: {', '.join(issues)}" - return all_ok, detail, sub + return _summarize(sub, "Telegram security OK") diff --git a/app/runtime/server/setup/prerequisites.py b/app/runtime/server/setup/prerequisites.py index 2e70c68..5d28c7a 100644 --- a/app/runtime/server/setup/prerequisites.py +++ b/app/runtime/server/setup/prerequisites.py @@ -17,6 +17,7 @@ from ...state.deploy_state import DeployStateStore from ...state.infra_config import InfraConfigStore from ...util.async_helpers import run_sync +from ._helpers import fail_response logger = logging.getLogger(__name__) @@ -255,9 +256,4 @@ def _migrate_existing_secrets( def _fail(steps: list[dict]) -> web.Response: - failed = [s for s in steps if s.get("status") == "failed"] - msg = failed[0].get("detail", "Unknown") if failed else "Unknown" - return web.json_response( - {"status": "error", "steps": steps, "message": f"Prerequisites failed: {msg}"}, - status=500, - ) + return fail_response(steps, "Prerequisites failed") diff --git a/app/runtime/server/setup/voice.py b/app/runtime/server/setup/voice.py index 8d8be63..4aeae51 100644 --- a/app/runtime/server/setup/voice.py +++ b/app/runtime/server/setup/voice.py @@ -17,7 +17,7 @@ ensure_rg, persist_config, ) -from ._helpers import error_response as _error, ok_response as _ok +from ._helpers import error_response as _error, fail_response, ok_response as _ok logger = logging.getLogger(__name__) @@ -463,8 +463,4 @@ async def connect_existing(self, req: web.Request) -> web.Response: def _voice_fail(steps: list[dict]) -> web.Response: - failed = [s for s in steps if s.get("status") == "failed"] - msg = failed[0].get("name", "Unknown step") if failed else "Unknown error" - return web.json_response( - {"status": "error", "steps": steps, "message": f"Voice deploy failed at: {msg}"}, - ) + return fail_response(steps, "Voice deploy failed at", key="name", status=200) diff --git a/app/runtime/server/setup/voice_provision.py b/app/runtime/server/setup/voice_provision.py index f5e1a35..4b4281d 100644 --- a/app/runtime/server/setup/voice_provision.py +++ b/app/runtime/server/setup/voice_provision.py @@ -9,6 +9,7 @@ import functools import logging import secrets +from typing import Any from ...config.settings import cfg from ...services.cloud.azure import AzureCLI @@ -19,7 +20,7 @@ async def ensure_rbac( - az: AzureCLI, aoai_name: str, rg: str, steps: list[dict], + az: AzureCLI, aoai_name: str, rg: str, steps: list[dict[str, Any]], ) -> None: """Assign *Cognitive Services OpenAI User* role to the current principal.""" account = az.account_info() @@ -30,24 +31,7 @@ async def ensure_rbac( }) return - principal_id = "" - principal_type = "User" - - user_info = await run_sync( - functools.partial(az.json, "ad", "signed-in-user", "show", quiet=True), - ) - if isinstance(user_info, dict) and user_info.get("id"): - principal_id = user_info["id"] - else: - sp_id = account.get("user", {}).get("name", "") - if sp_id: - sp_info = await run_sync( - functools.partial(az.json, "ad", "sp", "show", "--id", sp_id, quiet=True), - ) - if isinstance(sp_info, dict) and sp_info.get("id"): - principal_id = sp_info["id"] - principal_type = "ServicePrincipal" - + principal_id, principal_type = _resolve_principal(az, account) if not principal_id: steps.append({ "step": "rbac_assign", "status": "skip", @@ -88,8 +72,21 @@ async def ensure_rbac( logger.warning("RBAC role assignment failed (non-fatal): %s", msg) +def _resolve_principal(az: AzureCLI, account: dict) -> tuple[str, str]: + """Return (principal_id, principal_type) from az account info.""" + user_info = az.json("ad", "signed-in-user", "show", quiet=True) + if isinstance(user_info, dict) and user_info.get("id"): + return user_info["id"], "User" + sp_id = account.get("user", {}).get("name", "") + if sp_id: + sp_info = az.json("ad", "sp", "show", "--id", sp_id, quiet=True) + if isinstance(sp_info, dict) and sp_info.get("id"): + return sp_info["id"], "ServicePrincipal" + return "", "User" + + async def ensure_rg( - az: AzureCLI, rg: str, location: str, steps: list[dict], + az: AzureCLI, rg: str, location: str, steps: list[dict[str, Any]], ) -> bool: """Ensure a resource group exists, creating it if necessary.""" existing = await run_sync(az.json, "group", "show", "--name", rg) @@ -109,7 +106,7 @@ async def ensure_rg( async def create_acs( - az: AzureCLI, rg: str, steps: list[dict], + az: AzureCLI, rg: str, steps: list[dict[str, Any]], ) -> tuple[str, str]: """Create an ACS resource and retrieve its connection string.""" acs_name = "polyclaw-acs-%s" % secrets.token_hex(4) @@ -137,7 +134,7 @@ async def create_acs( async def create_aoai( - az: AzureCLI, rg: str, location: str, steps: list[dict], + az: AzureCLI, rg: str, location: str, steps: list[dict[str, Any]], ) -> tuple[str, str, str, str]: """Create an Azure OpenAI resource with a realtime model deployment. @@ -192,12 +189,8 @@ async def create_aoai( logger.error("Voice deploy FAILED retrieving AOAI endpoint") return aoai_name, "", "", "" - if aoai_key: - steps.append({"step": "aoai_keys", "status": "ok"}) - else: - steps.append({"step": "aoai_keys", "status": "ok", - "detail": "Using Entra ID auth"}) - + steps.append({"step": "aoai_keys", "status": "ok", + **({} if aoai_key else {"detail": "Using Entra ID auth"})}) return aoai_name, aoai_endpoint, aoai_key, deployment_name @@ -211,7 +204,7 @@ def persist_config( aoai_endpoint: str, aoai_key: str, deployment_name: str, - steps: list[dict], + steps: list[dict[str, Any]], ) -> None: """Write voice configuration to the infra config store and ``.env``.""" store.save_voice_call( diff --git a/app/runtime/server/wiring.py b/app/runtime/server/wiring.py index ec209ba..3d5fea1 100644 --- a/app/runtime/server/wiring.py +++ b/app/runtime/server/wiring.py @@ -147,6 +147,7 @@ async def init_core(mode: ServerMode) -> dict[str, Any]: await agent.start() logger.info("[init_core] Agent started successfully") + logger.info("[init_core] creating adapter, stores, and bot ...") adapter = create_adapter() conv_store = ConversationReferenceStore() session_store = SessionStore() @@ -156,7 +157,14 @@ async def init_core(mode: ServerMode) -> dict[str, Any]: bot.session_store = session_store bot.adapter = adapter bot_ep = BotEndpoint(adapter, bot) - logger.info("[init_core] core initialization complete") + logger.info( + "[init_core] core initialization complete " + "(adapter=%s conv_store=%s session_store=%s hitl=%s)", + type(adapter).__name__, + type(conv_store).__name__, + type(session_store).__name__, + hitl is not None, + ) result.update( agent=agent, adapter=adapter, conv_store=conv_store, @@ -188,15 +196,25 @@ def init_services(mode: ServerMode) -> dict[str, Any]: is_admin = mode in (ServerMode.admin, ServerMode.combined) is_runtime = mode in (ServerMode.runtime, ServerMode.combined) + logger.info("[init_services] mode=%s (admin=%s runtime=%s)", mode.value, is_admin, is_runtime) + + logger.info("[init_services] initializing state stores ...") + infra_store = get_infra_config() + monitoring_store = get_monitoring_config() + sandbox_store = get_sandbox_config() + foundry_iq_store = get_foundry_iq_config() + guardrails_store = get_guardrails_config() + mcp_store = McpConfigStore() + result: dict[str, Any] = { "tunnel": None, "deploy_store": DeployStateStore(), - "infra_store": get_infra_config(), - "mcp_store": McpConfigStore(), - "sandbox_store": get_sandbox_config(), - "foundry_iq_store": get_foundry_iq_config(), - "guardrails_store": get_guardrails_config(), - "monitoring_store": get_monitoring_config(), + "infra_store": infra_store, + "mcp_store": mcp_store, + "sandbox_store": sandbox_store, + "foundry_iq_store": foundry_iq_store, + "guardrails_store": guardrails_store, + "monitoring_store": monitoring_store, "az": None, "gh": None, "deployer": None, @@ -207,10 +225,23 @@ def init_services(mode: ServerMode) -> dict[str, Any]: "sandbox_executor": None, } + logger.info( + "[init_services] stores ready: " + "infra(bot=%s tg=%s) monitoring=%s sandbox=%s foundry_iq=%s guardrails=%s mcp=%s", + infra_store.bot_configured if infra_store else "?", + infra_store.telegram_configured if infra_store else "?", + monitoring_store.is_configured if monitoring_store else False, + sandbox_store is not None, + getattr(foundry_iq_store, "enabled", False), + guardrails_store is not None, + mcp_store is not None, + ) + if is_runtime: from ..services.tunnel import CloudflareTunnel result["tunnel"] = CloudflareTunnel() + logger.info("[init_services] tunnel client created") # Admin-side services if is_admin: @@ -220,6 +251,7 @@ def init_services(mode: ServerMode) -> dict[str, Any]: from ..services.deployment.deployer import BotDeployer from ..services.deployment.provisioner import Provisioner + logger.info("[init_services] creating cloud services (admin) ...") az = AzureCLI() deployer = BotDeployer(az, result["deploy_store"]) result.update( @@ -233,6 +265,7 @@ def init_services(mode: ServerMode) -> dict[str, Any]: ), aca_deployer=AcaDeployer(az, result["deploy_store"]), ) + logger.info("[init_services] cloud services ready (az, gh, deployer, provisioner, aca)") elif is_runtime: from ..services.cloud.azure import AzureCLI from ..services.deployment.deployer import BotDeployer @@ -256,10 +289,12 @@ def init_services(mode: ServerMode) -> dict[str, Any]: from ..scheduler import get_scheduler from ..state.proactive import get_proactive_store + logger.info("[init_services] creating runtime services (scheduler, proactive, sandbox) ...") result.update( scheduler=get_scheduler(), proactive_store=get_proactive_store(), sandbox_executor=SandboxExecutor(result["sandbox_store"]), ) + logger.info("[init_services] initialization complete") return result diff --git a/app/runtime/services/cloud/azure.py b/app/runtime/services/cloud/azure.py index 0af9515..3901458 100644 --- a/app/runtime/services/cloud/azure.py +++ b/app/runtime/services/cloud/azure.py @@ -75,12 +75,15 @@ def _run(self, cmd: list[str], cmd_summary: str, timeout: int | None = None) -> stderr=proc.stderr.read() if proc.stderr else "", ) - def json(self, *args: str, quiet: bool = False) -> dict | list | None: + def _exec( + self, args: list[str], *, quiet: bool = False, + ) -> subprocess.CompletedProcess[str]: + """Run an ``az`` command with unified logging.""" cmd_summary = " ".join(args[:5]) _log = logger.debug if quiet else logger.info _log("[az] starting: az %s", cmd_summary) t0 = _time() - result = self._run(["az", *args, "--output", "json"], cmd_summary) + result = self._run(["az", *args], cmd_summary) elapsed = _time() - t0 self.last_stderr = result.stderr.strip() if result.returncode != 0: @@ -88,12 +91,18 @@ def json(self, *args: str, quiet: bool = False) -> dict | list | None: "[az] FAILED (%.1fs, rc=%d): az %s -- %s", elapsed, result.returncode, cmd_summary, self.last_stderr[:800], ) + else: + _log("[az] OK (%.1fs): az %s", elapsed, cmd_summary) + return result + + def json(self, *args: str, quiet: bool = False) -> dict | list | None: + result = self._exec([*args, "--output", "json"], quiet=quiet) + if result.returncode != 0: return None - _log("[az] OK (%.1fs): az %s", elapsed, cmd_summary) try: return json.loads(result.stdout) except (json.JSONDecodeError, ValueError): - logger.warning("[az] could not parse JSON output for: az %s", cmd_summary) + logger.warning("[az] could not parse JSON output for: az %s", " ".join(args[:5])) return None def json_cached(self, *args: str, ttl: int | None = None) -> dict | list | None: @@ -116,20 +125,8 @@ def invalidate_cache(self, *args: str) -> None: self._cache.clear() def ok(self, *args: str) -> Result: - cmd_summary = " ".join(args[:5]) - logger.info("[az] starting: az %s", cmd_summary) - t0 = _time() - result = self._run(["az", *args], cmd_summary) - elapsed = _time() - t0 - success = result.returncode == 0 - if success: - logger.info("[az] OK (%.1fs): az %s", elapsed, cmd_summary) - else: - logger.warning( - "[az] FAILED (%.1fs, rc=%d): az %s -- %s", - elapsed, result.returncode, cmd_summary, result.stderr.strip()[:300], - ) - return Result(success=success, message=result.stderr.strip()) + result = self._exec(list(args)) + return Result(success=result.returncode == 0, message=self.last_stderr) def account_info(self) -> dict[str, Any] | None: """Return the active subscription, or ``None`` if not logged in. diff --git a/app/runtime/services/cloud/runtime_identity.py b/app/runtime/services/cloud/runtime_identity.py index fd1270f..ea09386 100644 --- a/app/runtime/services/cloud/runtime_identity.py +++ b/app/runtime/services/cloud/runtime_identity.py @@ -132,21 +132,8 @@ def provision(self, resource_group: str) -> dict[str, Any]: tenant = cred.get("tenant", tenant) steps.append({"step": "rotate_creds", "status": "ok"}) - # 6. Assign RBAC roles on the resource group - rg_scope = f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" - self._assign_role(app_id, _BOT_CONTRIBUTOR_ROLE, rg_scope, steps) - self._assign_role(app_id, _RG_READER_ROLE, rg_scope, steps) - - # Key Vault may live in a different RG (e.g. polyclaw-prereq-rg). - # Scope the secrets role to the vault resource itself so the SP - # can resolve @kv: references regardless of which RG the vault is in. - kv_scope = self._keyvault_scope(sub_id) - self._assign_role(app_id, _KV_SECRETS_ROLE, kv_scope or rg_scope, steps) - - # Session pool executor (needed for sandbox / code interpreter) - session_scope = self._session_pool_scope(sub_id) - if session_scope: - self._assign_role(app_id, _SESSION_EXECUTOR_ROLE, session_scope, steps) + # 6. Assign RBAC roles + self._assign_standard_roles(app_id, sub_id, resource_group, steps) # 7. Write the SP credentials to the shared .env from ...config.settings import cfg @@ -245,18 +232,7 @@ def provision_managed_identity( steps.append({"step": "create_mi", "status": "ok", "detail": _MI_NAME}) # Assign RBAC - rg_scope = f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" - self._assign_role(principal_id, _BOT_CONTRIBUTOR_ROLE, rg_scope, steps) - self._assign_role(principal_id, _RG_READER_ROLE, rg_scope, steps) - - # Key Vault may live in a different RG -- scope to the vault resource. - kv_scope = self._keyvault_scope(sub_id) - self._assign_role(principal_id, _KV_SECRETS_ROLE, kv_scope or rg_scope, steps) - - # Session pool executor (needed for sandbox / code interpreter) - session_scope = self._session_pool_scope(sub_id) - if session_scope: - self._assign_role(principal_id, _SESSION_EXECUTOR_ROLE, session_scope, steps) + self._assign_standard_roles(principal_id, sub_id, resource_group, steps) # Write MI config to .env so the ACA deployer can reference it from ...config.settings import cfg @@ -334,6 +310,25 @@ def _keyvault_scope(self, subscription_id: str) -> str | None: ) return None + def _assign_standard_roles( + self, + assignee: str, + sub_id: str, + resource_group: str, + steps: list[dict[str, str]], + ) -> None: + """Assign the standard set of RBAC roles for the runtime identity.""" + rg_scope = f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" + self._assign_role(assignee, _BOT_CONTRIBUTOR_ROLE, rg_scope, steps) + self._assign_role(assignee, _RG_READER_ROLE, rg_scope, steps) + + kv_scope = self._keyvault_scope(sub_id) + self._assign_role(assignee, _KV_SECRETS_ROLE, kv_scope or rg_scope, steps) + + session_scope = self._session_pool_scope(sub_id) + if session_scope: + self._assign_role(assignee, _SESSION_EXECUTOR_ROLE, session_scope, steps) + def _assign_role( self, app_id: str, role: str, scope: str, steps: list[dict[str, str]], ) -> None: diff --git a/app/runtime/services/deployment/__init__.py b/app/runtime/services/deployment/__init__.py index e67a4ba..6aa2b5f 100644 --- a/app/runtime/services/deployment/__init__.py +++ b/app/runtime/services/deployment/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations +from ._models import StepTracker from .aca_deployer import AcaDeployer from .bicep_deployer import BicepDeployer, BicepDeployRequest, BicepDeployResult from .deployer import BotDeployer @@ -9,5 +10,5 @@ __all__ = [ "AcaDeployer", "BicepDeployer", "BicepDeployRequest", "BicepDeployResult", - "BotDeployer", "Provisioner", + "BotDeployer", "Provisioner", "StepTracker", ] diff --git a/app/runtime/services/deployment/_models.py b/app/runtime/services/deployment/_models.py new file mode 100644 index 0000000..8bbd273 --- /dev/null +++ b/app/runtime/services/deployment/_models.py @@ -0,0 +1,94 @@ +"""Shared deployment data types and step-tracking helpers.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any + + +@dataclass +class DeployStep: + """Single step in a deployment pipeline.""" + + step: str + status: str # "ok" | "failed" | "warn" | "skipped" + detail: str = "" + name: str = "" + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = {"step": self.step, "status": self.status} + if self.detail: + d["detail"] = self.detail + if self.name: + d["name"] = self.name + return d + + +class StepTracker: + """Accumulates deployment steps with optional live callback. + + Usage:: + + tracker = StepTracker() + tracker.ok("resource_group", name="polyclaw-rg") + tracker.fail("bot_resource", detail="CLI error") + steps_list = tracker.to_list() + """ + + def __init__(self, callback: Callable[[dict[str, Any]], None] | None = None) -> None: + self._steps: list[dict[str, Any]] = [] + self._cb = callback + + def _append(self, d: dict[str, Any]) -> None: + self._steps.append(d) + if self._cb: + try: + self._cb(d) + except Exception: # noqa: BLE001 + pass + + def _build(self, step: str, status: str, **kw: Any) -> None: + d: dict[str, Any] = {"step": step, "status": status} + d.update(kw) + self._append(d) + + def ok(self, step: str, **kw: Any) -> None: + self._build(step, "ok", **kw) + + def fail(self, step: str, **kw: Any) -> None: + self._build(step, "failed", **kw) + + def warn(self, step: str, **kw: Any) -> None: + self._build(step, "warn", **kw) + + def skip(self, step: str, **kw: Any) -> None: + self._build(step, "skip", **kw) + + def warning(self, step: str, **kw: Any) -> None: + self._build(step, "warning", **kw) + + def record(self, step: str, *, ok: bool, **kw: Any) -> None: + self._build(step, "ok" if ok else "failed", **kw) + + def append(self, item: dict[str, Any]) -> None: + """Raw append for backward compatibility.""" + self._append(item) + + def extend(self, items: list[dict[str, Any]]) -> None: + """Append multiple raw step dicts.""" + for item in items: + self._append(item) + + @property + def has_failures(self) -> bool: + return any(s.get("status") == "failed" for s in self._steps) + + def to_list(self) -> list[dict[str, Any]]: + return list(self._steps) + + def __iter__(self): # noqa: ANN204 + return iter(self._steps) + + def __len__(self) -> int: + return len(self._steps) diff --git a/app/runtime/services/deployment/aca_deployer.py b/app/runtime/services/deployment/aca_deployer.py index d62c6b9..4e72d59 100644 --- a/app/runtime/services/deployment/aca_deployer.py +++ b/app/runtime/services/deployment/aca_deployer.py @@ -11,12 +11,12 @@ from ...config.settings import cfg from ...state.deploy_state import DeploymentRecord, DeployStateStore from ..cloud.azure import AzureCLI -from ..cloud._azure_rbac import IMAGE_NAME as _IMAGE_NAME +from ._models import StepTracker from .aca_provision import ( assign_rbac, configure_ip_whitelist, - ensure_acr, ensure_aca_environment, + ensure_acr, ensure_managed_identity, ensure_runtime_app, get_acr_credentials, @@ -60,8 +60,8 @@ def __init__(self, az: AzureCLI, deploy_store: DeployStateStore | None = None) - self._deploy_store = deploy_store def deploy(self, req: AcaDeployRequest) -> AcaDeployResult: - steps: list[dict[str, Any]] = [] - result = AcaDeployResult(steps=steps) + steps = StepTracker() + result = AcaDeployResult(steps=steps._steps) # noqa: SLF001 logger.info("[aca] Starting ACA deployment: rg=%s, location=%s", req.resource_group, req.location) @@ -136,7 +136,7 @@ def deploy(self, req: AcaDeployRequest) -> AcaDeployResult: ) os.environ["RUNTIME_URL"] = runtime_url logger.info("[aca] RUNTIME_URL set to %s", runtime_url) - steps.append({"step": "write_aca_config", "status": "ok"}) + steps.ok("write_aca_config") result.ok = True logger.info("[aca] Deployment complete: runtime=%s", runtime_fqdn) @@ -144,7 +144,7 @@ def deploy(self, req: AcaDeployRequest) -> AcaDeployResult: except Exception as exc: logger.error("[aca] Deployment failed: %s", exc, exc_info=True) result.error = str(exc) - steps.append({"step": "unexpected_error", "status": "failed", "detail": str(exc)}) + steps.fail("unexpected_error", detail=str(exc)) if self._deploy_store and rec: if result.ok: @@ -159,8 +159,8 @@ def deploy(self, req: AcaDeployRequest) -> AcaDeployResult: return result def destroy(self, deploy_id: str | None = None) -> AcaDeployResult: - steps: list[dict[str, Any]] = [] - result = AcaDeployResult(steps=steps) + steps = StepTracker() + result = AcaDeployResult(steps=steps._steps) # noqa: SLF001 rec = None if deploy_id and self._deploy_store: @@ -188,7 +188,7 @@ def destroy(self, deploy_id: str | None = None) -> AcaDeployResult: ACA_MI_CLIENT_ID="", RUNTIME_URL="", ) - steps.append({"step": "clear_aca_config", "status": "ok"}) + steps.ok("clear_aca_config") if rec and self._deploy_store: rec.mark_destroyed() @@ -262,7 +262,7 @@ def restart(self) -> dict[str, Any]: return {"ok": ok, "results": [result_detail]} def _delete_aca_resources( - self, rg: str, steps: list[dict], *, step_label: str = "cleanup", + self, rg: str, steps: StepTracker, *, step_label: str = "cleanup", ) -> list[str]: rg_exists = self._az.json("group", "show", "--name", rg, quiet=True) if not isinstance(rg_exists, dict): @@ -271,122 +271,58 @@ def _delete_aca_resources( cleaned: list[str] = [] - apps = self._az.json( - "containerapp", "list", - "--resource-group", rg, quiet=True, - ) - for app in (apps if isinstance(apps, list) else []): - name = app.get("name", "") - if not name: - continue - logger.info("[aca] Deleting container app: %s (waiting)", name) - ok, _ = self._az.ok( - "containerapp", "delete", "--name", name, - "--resource-group", rg, "--yes", - ) - if ok: - cleaned.append(f"containerapp/{name}") - steps.append({"step": f"{step_label}/containerapp/{name}", - "status": "ok" if ok else "failed"}) - - identities = self._az.json( - "identity", "list", - "--resource-group", rg, quiet=True, - ) - for mi in (identities if isinstance(identities, list) else []): - name = mi.get("name", "") - if not name: - continue - logger.info("[aca] Deleting managed identity: %s (waiting)", name) - ok, _ = self._az.ok( - "identity", "delete", "--name", name, - "--resource-group", rg, - ) - if ok: - cleaned.append(f"identity/{name}") - steps.append({"step": f"{step_label}/identity/{name}", - "status": "ok" if ok else "failed"}) - - envs = self._az.json( - "containerapp", "env", "list", - "--resource-group", rg, quiet=True, - ) - for env in (envs if isinstance(envs, list) else []): - name = env.get("name", "") - if not name: - continue - logger.info("[aca] Deleting ACA environment: %s (no-wait)", name) - ok, _ = self._az.ok( - "containerapp", "env", "delete", "--name", name, - "--resource-group", rg, "--yes", "--no-wait", - ) - if ok: - cleaned.append(f"aca-env/{name}") - steps.append({"step": f"{step_label}/aca-env/{name}", - "status": "ok" if ok else "failed"}) - - acrs = self._az.json( - "acr", "list", - "--resource-group", rg, quiet=True, - ) - for acr in (acrs if isinstance(acrs, list) else []): - name = acr.get("name", "") - if not name: - continue - logger.info("[aca] Deleting ACR: %s", name) - ok, _ = self._az.ok( - "acr", "delete", "--name", name, - "--resource-group", rg, "--yes", - ) - if ok: - cleaned.append(f"acr/{name}") - steps.append({"step": f"{step_label}/acr/{name}", - "status": "ok" if ok else "failed"}) - - workspaces = self._az.json( - "monitor", "log-analytics", "workspace", "list", - "--resource-group", rg, quiet=True, - ) - for ws in (workspaces if isinstance(workspaces, list) else []): - name = ws.get("name", "") - if not name: - continue - logger.info("[aca] Deleting Log Analytics workspace: %s", name) - ok, _ = self._az.ok( - "monitor", "log-analytics", "workspace", "delete", - "--workspace-name", name, - "--resource-group", rg, "--yes", "--force", - ) - if ok: - cleaned.append(f"log-analytics/{name}") - steps.append({"step": f"{step_label}/log-analytics/{name}", - "status": "ok" if ok else "failed"}) - - storage_accounts = self._az.json( - "storage", "account", "list", - "--resource-group", rg, quiet=True, - ) - for sa in (storage_accounts if isinstance(storage_accounts, list) else []): - name = sa.get("name", "") - if not name: - continue - tags = sa.get("tags", {}) or {} - kind = sa.get("kind", "") - if "polyclaw_deploy" in tags or kind == "StorageV2": - logger.info("[aca] Deleting storage account: %s", name) - ok, _ = self._az.ok( - "storage", "account", "delete", "--name", name, - "--resource-group", rg, "--yes", - ) + # (resource_kind, list_cmd, delete_cmd, name_field, extra_delete_args, filter_fn) + _RESOURCE_TYPES: list[tuple[str, list[str], list[str], str, list[str], + Any]] = [ + ("containerapp", + ["containerapp", "list", "--resource-group", rg], + ["containerapp", "delete", "--resource-group", rg, "--yes"], + "name", [], None), + ("identity", + ["identity", "list", "--resource-group", rg], + ["identity", "delete", "--resource-group", rg], + "name", [], None), + ("aca-env", + ["containerapp", "env", "list", "--resource-group", rg], + ["containerapp", "env", "delete", "--resource-group", rg, "--yes", "--no-wait"], + "name", [], None), + ("acr", + ["acr", "list", "--resource-group", rg], + ["acr", "delete", "--resource-group", rg, "--yes"], + "name", [], None), + ("log-analytics", + ["monitor", "log-analytics", "workspace", "list", "--resource-group", rg], + ["monitor", "log-analytics", "workspace", "delete", "--resource-group", rg, + "--yes", "--force"], + "name", ["--workspace-name"], None), + ("storage", + ["storage", "account", "list", "--resource-group", rg], + ["storage", "account", "delete", "--resource-group", rg, "--yes"], + "name", [], + lambda r: "polyclaw_deploy" in (r.get("tags") or {}) + or r.get("kind") == "StorageV2"), + ] + + for kind, list_cmd, delete_cmd, name_field, extra_del, filter_fn in _RESOURCE_TYPES: + resources = self._az.json(*list_cmd, quiet=True) + for res in (resources if isinstance(resources, list) else []): + name = res.get(name_field, "") + if not name: + continue + if filter_fn and not filter_fn(res): + continue + logger.info("[aca] Deleting %s: %s", kind, name) + # Some commands use --name, log-analytics uses --workspace-name + name_arg = extra_del + [name] if extra_del else ["--name", name] + ok, _ = self._az.ok(*delete_cmd, *name_arg) if ok: - cleaned.append(f"storage/{name}") - steps.append({"step": f"{step_label}/storage/{name}", - "status": "ok" if ok else "failed"}) + cleaned.append(f"{kind}/{name}") + steps.record(f"{step_label}/{kind}/{name}", ok=ok) return cleaned def _cleanup_stale_resources( - self, req: AcaDeployRequest, steps: list[dict], + self, req: AcaDeployRequest, steps: StepTracker, ) -> None: logger.info("[aca] Pre-flight: cleaning all ACA resources in %s ...", req.resource_group) cleaned = self._delete_aca_resources(req.resource_group, steps, step_label="cleanup") @@ -395,10 +331,10 @@ def _cleanup_stale_resources( logger.info("[aca] Cleaned %d resource(s): %s", len(cleaned), detail) else: logger.info("[aca] No resources to clean") - steps.append({"step": "cleanup", "status": "ok", "detail": "nothing to clean"}) + steps.ok("cleanup", detail="nothing to clean") def _ensure_resource_group( - self, req: AcaDeployRequest, steps: list[dict], rec: DeploymentRecord, + self, req: AcaDeployRequest, steps: StepTracker, rec: DeploymentRecord, ) -> bool: logger.info("[aca] Step 1/10: Ensuring resource group %s ...", req.resource_group) tag_args = ["--tags", f"polyclaw_deploy={rec.tag}"] @@ -407,14 +343,14 @@ def _ensure_resource_group( "--location", req.location, *tag_args, ) if result: - steps.append({"step": "resource_group", "status": "ok", "detail": req.resource_group}) + steps.ok("resource_group", detail=req.resource_group) if req.resource_group not in rec.resource_groups: rec.resource_groups.append(req.resource_group) return True - steps.append({"step": "resource_group", "status": "failed", "detail": self._az.last_stderr}) + steps.fail("resource_group", detail=self._az.last_stderr) return False - def _load_env_vars(self, steps: list[dict]) -> dict[str, str]: + def _load_env_vars(self, steps: StepTracker) -> dict[str, str]: from ..keyvault import is_kv_ref, kv env_map = cfg.env.read_all() @@ -452,6 +388,6 @@ def _load_env_vars(self, steps: list[dict]) -> dict[str, str]: "(%d @kv: references resolved)", count, resolved_count, ) - steps.append({"step": "load_env_vars", "status": "ok", - "detail": f"{count} variable(s), {resolved_count} @kv: resolved"}) + steps.ok("load_env_vars", + detail=f"{count} variable(s), {resolved_count} @kv: resolved") return filtered diff --git a/app/runtime/services/deployment/aca_provision.py b/app/runtime/services/deployment/aca_provision.py index 3ea2b61..b9c6a52 100644 --- a/app/runtime/services/deployment/aca_provision.py +++ b/app/runtime/services/deployment/aca_provision.py @@ -10,15 +10,26 @@ from ...config.settings import cfg from ...state.deploy_state import DeploymentRecord -from ..cloud.azure import AzureCLI from ..cloud._azure_rbac import ( BOT_CONTRIBUTOR_ROLE as _BOT_CONTRIBUTOR_ROLE, +) +from ..cloud._azure_rbac import ( IMAGE_NAME as _IMAGE_NAME, +) +from ..cloud._azure_rbac import ( MI_NAME as _MI_NAME, +) +from ..cloud._azure_rbac import ( RG_READER_ROLE as _RG_READER_ROLE, +) +from ..cloud._azure_rbac import ( SESSION_EXECUTOR_ROLE as _SESSION_EXECUTOR_ROLE, +) +from ..cloud._azure_rbac import ( session_pool_scope as _session_pool_scope, ) +from ..cloud.azure import AzureCLI +from ._models import StepTracker logger = logging.getLogger(__name__) @@ -29,7 +40,7 @@ def ensure_acr( az: AzureCLI, resource_group: str, location: str, - steps: list[dict], + steps: StepTracker, rec: DeploymentRecord, acr_name: str = "", ) -> str: @@ -48,12 +59,9 @@ def ensure_acr( "--location", location, ) if not result: - steps.append({ - "step": "acr_create", "status": "failed", - "detail": az.last_stderr, - }) + steps.fail("acr_create", detail=az.last_stderr) return "" - steps.append({"step": "acr_create", "status": "ok", "detail": acr_name}) + steps.ok("acr_create", detail=acr_name) rec.add_resource("acr", resource_group, acr_name, "Container registry") return acr_name @@ -73,7 +81,7 @@ def push_image( az: AzureCLI, acr_name: str, tag: str, - steps: list[dict], + steps: StepTracker, ) -> bool: """Build, tag, and push the local Docker image to ACR.""" logger.info("[aca] Step 4/10: Pushing pre-built image to ACR ...") @@ -91,7 +99,7 @@ def push_image( f"-t {local_image} ." ) logger.error("[aca] %s", detail) - steps.append({"step": "image_push", "status": "failed", "detail": detail}) + steps.fail("image_push", detail=detail) return False logger.info("[aca] Logging in to ACR %s ...", acr_name) @@ -99,7 +107,7 @@ def push_image( if not ok: detail = f"ACR login failed: {msg or az.last_stderr}" logger.error("[aca] %s", detail) - steps.append({"step": "image_push", "status": "failed", "detail": detail}) + steps.fail("image_push", detail=detail) return False logger.info("[aca] Tagging %s -> %s", local_image, remote_image) @@ -110,7 +118,7 @@ def push_image( if tag_result.returncode != 0: detail = f"docker tag failed: {tag_result.stderr.strip()}" logger.error("[aca] %s", detail) - steps.append({"step": "image_push", "status": "failed", "detail": detail}) + steps.fail("image_push", detail=detail) return False logger.info("[aca] Pushing %s (this may take 1-2 minutes) ...", remote_image) @@ -121,11 +129,11 @@ def push_image( if push_result.returncode != 0: detail = f"docker push failed: {push_result.stderr.strip()[:500]}" logger.error("[aca] %s", detail) - steps.append({"step": "image_push", "status": "failed", "detail": detail}) + steps.fail("image_push", detail=detail) return False logger.info("[aca] Image pushed: %s", remote_image) - steps.append({"step": "image_push", "status": "ok", "detail": remote_image}) + steps.ok("image_push", detail=remote_image) return True @@ -133,7 +141,7 @@ def ensure_managed_identity( az: AzureCLI, resource_group: str, location: str, - steps: list[dict], + steps: StepTracker, rec: DeploymentRecord, ) -> tuple[str, str]: """Create a user-assigned managed identity. Returns ``(id, client_id)``.""" @@ -145,23 +153,50 @@ def ensure_managed_identity( "--location", location, ) if not isinstance(result, dict): - steps.append({"step": "managed_identity", "status": "failed", - "detail": az.last_stderr}) + steps.fail("managed_identity", detail=az.last_stderr) return "", "" mi_id = result.get("id", "") client_id = result.get("clientId", "") - steps.append({"step": "managed_identity", "status": "ok", "detail": _MI_NAME}) + steps.ok("managed_identity", detail=_MI_NAME) rec.add_resource("managed_identity", resource_group, _MI_NAME, "Runtime scoped identity") return mi_id, client_id +def _assign_role_with_retry( + az: AzureCLI, + assignee: str, + role: str, + scope: str, + steps: StepTracker, +) -> None: + """Assign an RBAC role with retries for eventual consistency.""" + label = role.lower().replace(" ", "_") + assigned = False + for attempt in range(4): + if attempt: + delay = 10 * attempt + logger.info("[aca] RBAC retry %d/3 for %s in %ds ...", attempt, label, delay) + time.sleep(delay) + ok, _msg = az.ok( + "role", "assignment", "create", + "--assignee", assignee, + "--role", role, + "--scope", scope, + ) + if ok or "already exists" in (az.last_stderr or "").lower(): + assigned = True + break + detail = f"{role} on {scope.rsplit('/', 1)[-1]}" if assigned else az.last_stderr + steps.record(f"rbac_{label}", ok=assigned, detail=detail) + + def assign_rbac( az: AzureCLI, mi_principal_id: str, resource_group: str, - steps: list[dict], + steps: StepTracker, ) -> None: """Assign RBAC roles to the managed identity.""" logger.info("[aca] Step 6/10: Assigning RBAC ...") @@ -170,66 +205,20 @@ def assign_rbac( rg_scope = f"/subscriptions/{sub_id}/resourceGroups/{resource_group}" for role in (_BOT_CONTRIBUTOR_ROLE, _RG_READER_ROLE): - label = role.lower().replace(" ", "_") - assigned = False - for attempt in range(4): - if attempt: - delay = 10 * attempt - logger.info( - "[aca] RBAC retry %d/3 for %s in %ds ...", - attempt, label, delay, - ) - time.sleep(delay) - ok, _msg = az.ok( - "role", "assignment", "create", - "--assignee", mi_principal_id, - "--role", role, - "--scope", rg_scope, - ) - if ok or "already exists" in (az.last_stderr or "").lower(): - assigned = True - break - if assigned: - steps.append({"step": f"rbac_{label}", "status": "ok", - "detail": f"{role} on {resource_group}"}) - else: - steps.append({"step": f"rbac_{label}", "status": "failed", - "detail": az.last_stderr}) + _assign_role_with_retry(az, mi_principal_id, role, rg_scope, steps) session_scope = _session_pool_scope(sub_id) if session_scope: - label = _SESSION_EXECUTOR_ROLE.lower().replace(" ", "_") - assigned = False - for attempt in range(4): - if attempt: - delay = 10 * attempt - logger.info( - "[aca] RBAC retry %d/3 for %s in %ds ...", - attempt, label, delay, - ) - time.sleep(delay) - ok, _msg = az.ok( - "role", "assignment", "create", - "--assignee", mi_principal_id, - "--role", _SESSION_EXECUTOR_ROLE, - "--scope", session_scope, - ) - if ok or "already exists" in (az.last_stderr or "").lower(): - assigned = True - break - if assigned: - steps.append({"step": f"rbac_{label}", "status": "ok", - "detail": f"{_SESSION_EXECUTOR_ROLE} on session pool"}) - else: - steps.append({"step": f"rbac_{label}", "status": "failed", - "detail": az.last_stderr}) + _assign_role_with_retry( + az, mi_principal_id, _SESSION_EXECUTOR_ROLE, session_scope, steps, + ) def ensure_aca_environment( az: AzureCLI, resource_group: str, location: str, - steps: list[dict], + steps: StepTracker, rec: DeploymentRecord, env_name: str = "", ) -> tuple[str, str]: @@ -245,14 +234,11 @@ def ensure_aca_environment( "--location", location, ) if not isinstance(result, dict): - steps.append({ - "step": "aca_environment", "status": "failed", - "detail": az.last_stderr, - }) + steps.fail("aca_environment", detail=az.last_stderr) return "", "" env_id = result.get("id", "") - steps.append({"step": "aca_environment", "status": "ok", "detail": env_name}) + steps.ok("aca_environment", detail=env_name) rec.add_resource("aca_environment", resource_group, env_name, "Container Apps environment") return env_name, env_id @@ -270,7 +256,7 @@ def ensure_runtime_app( env_vars: dict[str, str], image_tag: str, runtime_port: int, - steps: list[dict], + steps: StepTracker, rec: DeploymentRecord, ) -> str: """Create the runtime container app. Returns the FQDN, or ``""`` on failure.""" @@ -342,10 +328,7 @@ def ensure_runtime_app( if not isinstance(result, dict): detail = az.last_stderr logger.error("[aca] containerapp create failed: %s", detail[:1000]) - steps.append({ - "step": "runtime_container_app", "status": "failed", - "detail": detail[:500], - }) + steps.fail("runtime_container_app", detail=detail[:500]) return "" logger.info("[aca] Assigning managed identity to container app ...") @@ -371,7 +354,7 @@ def ensure_runtime_app( "--set-env-vars", f"BOT_ENDPOINT={bot_endpoint}", ) - steps.append({"step": "runtime_container_app", "status": "ok", "detail": fqdn}) + steps.ok("runtime_container_app", detail=fqdn) rec.add_resource("container_app", resource_group, app_name, "Runtime data plane (MI-scoped)") return fqdn @@ -382,16 +365,13 @@ def configure_ip_whitelist( resource_group: str, ) -> list[dict[str, Any]]: """Restrict the runtime container's ingress to the deployer's IP.""" - ip_steps: list[dict[str, Any]] = [] + steps = StepTracker() public_ip = detect_public_ip() if not public_ip: - ip_steps.append({ - "step": "ip_whitelist", - "status": "skipped", - "detail": "Could not detect public IP -- runtime ingress unrestricted", - }) - return ip_steps + steps.skip("ip_whitelist", + detail="Could not detect public IP -- runtime ingress unrestricted") + return steps.to_list() ok, msg = az.ok( "containerapp", "ingress", "access-restriction", "set", @@ -403,19 +383,11 @@ def configure_ip_whitelist( "--description", "Allow deployer IP", ) if ok: - ip_steps.append({ - "step": "ip_whitelist", - "status": "ok", - "detail": f"Runtime restricted to {public_ip}/32", - }) + steps.ok("ip_whitelist", detail=f"Runtime restricted to {public_ip}/32") else: - ip_steps.append({ - "step": "ip_whitelist", - "status": "warning", - "detail": f"Could not set IP restriction: {msg}", - }) + steps.warning("ip_whitelist", detail=f"Could not set IP restriction: {msg}") - return ip_steps + return steps.to_list() def detect_public_ip() -> str: diff --git a/app/runtime/services/deployment/bicep_deployer.py b/app/runtime/services/deployment/bicep_deployer.py index b751e02..7296bee 100644 --- a/app/runtime/services/deployment/bicep_deployer.py +++ b/app/runtime/services/deployment/bicep_deployer.py @@ -16,8 +16,14 @@ from typing import Any from ...config.settings import cfg -from ...state.deploy_state import DeployStateStore, DeploymentRecord, ResourceEntry, generate_deploy_id +from ...state.deploy_state import ( + DeploymentRecord, + DeployStateStore, + ResourceEntry, + generate_deploy_id, +) from ..cloud.azure import AzureCLI +from ._models import StepTracker logger = logging.getLogger(__name__) @@ -132,22 +138,6 @@ class BicepDeployResult: session_pool_name: str = "" -class _ObservableSteps(list): - """A list subclass that fires a callback on every ``append``.""" - - def __init__(self, callback: Callable[[dict[str, str]], None] | None = None) -> None: - super().__init__() - self._cb = callback - - def append(self, item: Any) -> None: # type: ignore[override] - super().append(item) - if self._cb is not None: - try: - self._cb(item) - except Exception: - pass # never let callback errors abort the deploy - - class BicepDeployer: """Orchestrates infrastructure via a single Bicep template.""" @@ -177,8 +167,8 @@ def deploy( result.deploy_id = deploy_id # Wrap the steps list so that append() also fires the callback. - steps = _ObservableSteps(on_step) - result.steps = steps # type: ignore[assignment] + steps = StepTracker(on_step) + result.steps = steps._steps # noqa: SLF001 -- share underlying list # 1. Ensure resource group if not self._ensure_resource_group(req, steps): @@ -199,13 +189,25 @@ def deploy( runtime_sp = self._ensure_runtime_sp(req, steps) if needs_sp else None # 3. Run Bicep deployment + # When no new SP was created, reuse the existing one for RBAC + # assignments (e.g. Content Safety deploy after Foundry). runtime_sp_oid = runtime_sp["object_id"] if runtime_sp else "" + if not runtime_sp_oid: + existing_sp_id = cfg.env.read("RUNTIME_SP_APP_ID") or "" + if existing_sp_id: + sp_show = self._az.json( + "ad", "sp", "show", "--id", existing_sp_id, quiet=True, + ) + if isinstance(sp_show, dict) and sp_show.get("id"): + runtime_sp_oid = sp_show["id"] + steps.ok("reuse_runtime_sp", + detail="Existing SP object_id=%s" % runtime_sp_oid) outputs = self._run_bicep(req, principal_id, principal_type, runtime_sp_oid, steps) if outputs is None: result.error = "Bicep deployment failed" return result - # 4. Extract outputs + # 4. Extract outputs via mapping def _out(key: str) -> str: return outputs.get(key, {}).get("value", "") @@ -213,28 +215,32 @@ def _out_list(key: str) -> list[str]: val = outputs.get(key, {}).get("value", []) return val if isinstance(val, list) else [] - result.foundry_endpoint = _out("foundryEndpoint") - result.foundry_name = _out("foundryName") - result.foundry_resource_id = _out("foundryResourceId") + _OUTPUT_MAP: list[tuple[str, str]] = [ + ("foundry_endpoint", "foundryEndpoint"), + ("foundry_name", "foundryName"), + ("foundry_resource_id", "foundryResourceId"), + ("key_vault_url", "keyVaultUrl"), + ("key_vault_name", "keyVaultName"), + ("acs_name", "acsName"), + ("content_safety_endpoint", "contentSafetyEndpoint"), + ("content_safety_name", "contentSafetyName"), + ("content_safety_resource_id", "contentSafetyResourceId"), + ("search_endpoint", "searchEndpoint"), + ("search_name", "searchName"), + ("embedding_aoai_endpoint", "embeddingAoaiEndpoint"), + ("embedding_aoai_name", "embeddingAoaiName"), + ("embedding_deployment_name", "embeddingDeploymentName"), + ("app_insights_connection_string", "appInsightsConnectionString"), + ("app_insights_name", "appInsightsName"), + ("log_analytics_workspace_name", "logAnalyticsWorkspaceName"), + ("session_pool_endpoint", "sessionPoolEndpoint"), + ("session_pool_id", "sessionPoolId"), + ("session_pool_name", "sessionPoolName"), + ] + for attr, key in _OUTPUT_MAP: + setattr(result, attr, _out(key)) result.deployed_models = _out_list("deployedModels") - result.key_vault_url = _out("keyVaultUrl") - result.key_vault_name = _out("keyVaultName") - result.acs_name = _out("acsName") - result.content_safety_endpoint = _out("contentSafetyEndpoint") - result.content_safety_name = _out("contentSafetyName") - result.content_safety_resource_id = _out("contentSafetyResourceId") - result.search_endpoint = _out("searchEndpoint") - result.search_name = _out("searchName") - result.embedding_aoai_endpoint = _out("embeddingAoaiEndpoint") - result.embedding_aoai_name = _out("embeddingAoaiName") - result.embedding_deployment_name = _out("embeddingDeploymentName") - result.app_insights_connection_string = _out("appInsightsConnectionString") - result.app_insights_name = _out("appInsightsName") - result.log_analytics_workspace_name = _out("logAnalyticsWorkspaceName") - result.session_pool_endpoint = _out("sessionPoolEndpoint") - result.session_pool_id = _out("sessionPoolId") - result.session_pool_name = _out("sessionPoolName") - steps.append({"step": "extract_outputs", "status": "ok"}) + steps.ok("extract_outputs") # 5. Persist to .env and state store self._persist(req, result, deploy_id, steps, runtime_sp=runtime_sp) @@ -274,30 +280,24 @@ def status(self) -> dict[str, Any]: def decommission(self, resource_group: str = "") -> list[dict[str, Any]]: """Delete the resource group (cascade deletes everything).""" rg = resource_group or cfg.env.read("FOUNDRY_RESOURCE_GROUP") or "" - steps: list[dict[str, Any]] = [] + steps = StepTracker() if not rg: - steps.append({"step": "decommission", "status": "skip", "detail": "No RG configured"}) - return steps + steps.skip("decommission", detail="No RG configured") + return steps.to_list() ok, msg = self._az.ok( "group", "delete", "--name", rg, "--yes", "--no-wait", ) - steps.append({ - "step": "delete_resource_group", - "status": "ok" if ok else "failed", - "detail": rg if ok else msg, - }) + steps.record("delete_resource_group", ok=ok, detail=rg if ok else msg) if ok: # Clean up the runtime service principal sp_app_id = cfg.env.read("RUNTIME_SP_APP_ID") or "" if sp_app_id: del_ok, del_msg = self._az.ok("ad", "sp", "delete", "--id", sp_app_id) - steps.append({ - "step": "delete_runtime_sp", - "status": "ok" if del_ok else "warning", - "detail": sp_app_id if del_ok else del_msg, - }) + steps.append({"step": "delete_runtime_sp", + "status": "ok" if del_ok else "warning", + "detail": sp_app_id if del_ok else del_msg}) cfg.write_env( FOUNDRY_ENDPOINT="", @@ -310,19 +310,18 @@ def decommission(self, resource_group: str = "") -> list[dict[str, Any]]: RUNTIME_SP_PASSWORD="", RUNTIME_SP_TENANT="", ) - steps.append({"step": "clear_env", "status": "ok"}) + steps.ok("clear_env") - return steps + return steps.to_list() # -- internal helpers -------------------------------------------------- def _ensure_resource_group( - self, req: BicepDeployRequest, steps: list[dict], + self, req: BicepDeployRequest, steps: StepTracker, ) -> bool: existing = self._az.json("group", "show", "--name", req.resource_group, quiet=True) if existing: - steps.append({"step": "resource_group", "status": "ok", - "detail": "%s (existing)" % req.resource_group}) + steps.ok("resource_group", detail="%s (existing)" % req.resource_group) return True result = self._az.json( @@ -331,28 +330,23 @@ def _ensure_resource_group( "--location", req.location, ) ok = bool(result) - steps.append({ - "step": "resource_group", - "status": "ok" if ok else "failed", - "detail": req.resource_group, - }) + steps.record("resource_group", ok=ok, detail=req.resource_group) if not ok: logger.error("RG creation failed: %s", self._az.last_stderr) return ok - def _resolve_principal(self, steps: list[dict]) -> tuple[str, str]: + def _resolve_principal(self, steps: StepTracker) -> tuple[str, str]: """Return ``(principal_id, principal_type)`` for the signed-in identity.""" account = self._az.account_info() if not account: - steps.append({"step": "resolve_principal", "status": "failed", - "detail": "Not logged in"}) + steps.fail("resolve_principal", detail="Not logged in") return "", "" # Try user principal first user_info = self._az.json("ad", "signed-in-user", "show", quiet=True) if isinstance(user_info, dict) and user_info.get("id"): - steps.append({"step": "resolve_principal", "status": "ok", - "detail": "User: %s" % user_info.get("userPrincipalName", "")}) + steps.ok("resolve_principal", + detail="User: %s" % user_info.get("userPrincipalName", "")) return user_info["id"], "User" # Fall back to service principal @@ -360,16 +354,45 @@ def _resolve_principal(self, steps: list[dict]) -> tuple[str, str]: if sp_name: sp_info = self._az.json("ad", "sp", "show", "--id", sp_name, quiet=True) if isinstance(sp_info, dict) and sp_info.get("id"): - steps.append({"step": "resolve_principal", "status": "ok", - "detail": "ServicePrincipal: %s" % sp_name}) + steps.ok("resolve_principal", detail="ServicePrincipal: %s" % sp_name) return sp_info["id"], "ServicePrincipal" - steps.append({"step": "resolve_principal", "status": "failed", - "detail": "Cannot determine principal"}) + # Last resort: extract oid from the ARM access token. + # This works even when Graph API calls fail due to CAE challenges + # (TokenCreatedWithOutdatedPolicies / InteractionRequired). + oid = self._oid_from_token() + if oid: + principal_type = "ServicePrincipal" if sp_name else "User" + steps.ok("resolve_principal", detail="From token oid: %s" % oid) + return oid, principal_type + + steps.fail("resolve_principal", detail="Cannot determine principal") return "", "" + def _oid_from_token(self) -> str: + """Extract the ``oid`` claim from the current ARM access token.""" + import base64 + + token_info = self._az.json("account", "get-access-token", quiet=True) + if not isinstance(token_info, dict): + return "" + token = token_info.get("accessToken", "") + if not token: + return "" + parts = token.split(".") + if len(parts) < 2: + return "" + try: + padded = parts[1] + "=" * (-len(parts[1]) % 4) + payload = base64.urlsafe_b64decode(padded) + claims = json.loads(payload) + return claims.get("oid", "") + except Exception: + logger.debug("[bicep._oid_from_token] failed to decode JWT", exc_info=True) + return "" + def _ensure_runtime_sp( - self, req: BicepDeployRequest, steps: list[dict], + self, req: BicepDeployRequest, steps: StepTracker, ) -> dict[str, str] | None: """Create or reuse a service principal for the runtime container. @@ -389,10 +412,7 @@ def _ensure_runtime_sp( if existing_id and existing_pw and existing_tenant: sp_info = self._az.json("ad", "sp", "show", "--id", existing_id, quiet=True) if isinstance(sp_info, dict) and sp_info.get("id"): - steps.append({ - "step": "runtime_sp", "status": "ok", - "detail": "Reusing existing SP: %s" % existing_id, - }) + steps.ok("runtime_sp", detail="Reusing existing SP: %s" % existing_id) return { "app_id": existing_id, "password": existing_pw, @@ -447,18 +467,14 @@ def _ensure_runtime_sp( sp["password"] = cred["password"] sp["tenant"] = cred.get("tenant", sp.get("tenant", "")) else: - steps.append({ - "step": "runtime_sp", "status": "failed", - "detail": "Short-lived credential creation failed: %s" - % self._az.last_stderr[:200], - }) + steps.fail("runtime_sp", + detail="Short-lived credential creation failed: %s" + % self._az.last_stderr[:200]) return None if not isinstance(sp, dict) or not sp.get("appId"): - steps.append({ - "step": "runtime_sp", "status": "failed", - "detail": "az ad sp create-for-rbac failed: %s" % self._az.last_stderr[:300], - }) + steps.fail("runtime_sp", + detail="az ad sp create-for-rbac failed: %s" % self._az.last_stderr[:300]) logger.error("[bicep.runtime_sp] SP creation failed: %s", self._az.last_stderr) return None @@ -466,16 +482,12 @@ def _ensure_runtime_sp( sp_show = self._az.json("ad", "sp", "show", "--id", sp["appId"], quiet=True) object_id = sp_show["id"] if isinstance(sp_show, dict) and sp_show.get("id") else "" if not object_id: - steps.append({ - "step": "runtime_sp", "status": "failed", - "detail": "Could not resolve SP object ID for %s" % sp["appId"], - }) + steps.fail("runtime_sp", + detail="Could not resolve SP object ID for %s" % sp["appId"]) return None - steps.append({ - "step": "runtime_sp", "status": "ok", - "detail": "Created SP: %s (object_id=%s)" % (sp_name, object_id), - }) + steps.ok("runtime_sp", + detail="Created SP: %s (object_id=%s)" % (sp_name, object_id)) logger.info( "[bicep.runtime_sp] created: name=%s app_id=%s object_id=%s", sp_name, sp["appId"], object_id, @@ -493,12 +505,11 @@ def _run_bicep( principal_id: str, principal_type: str, runtime_sp_object_id: str, - steps: list[dict], + steps: StepTracker, ) -> dict[str, Any] | None: """Execute ``az deployment group create`` with the Bicep template.""" if not _BICEP_TEMPLATE.exists(): - steps.append({"step": "bicep_deploy", "status": "failed", - "detail": "Template not found: %s" % _BICEP_TEMPLATE}) + steps.fail("bicep_deploy", detail="Template not found: %s" % _BICEP_TEMPLATE) logger.error("Bicep template not found at %s", _BICEP_TEMPLATE) return None @@ -544,11 +555,21 @@ def _run_bicep( # bug (e.g. "The content for this response was already consumed" in # az 2.77.0). Check if the deployment actually succeeded by querying it. if result is None: - stderr = self._az.last_stderr + stderr = self._az.last_stderr or "" logger.warning( "[bicep.deploy] create returned None; checking deployment status: %s", stderr[:200], ) + + # Auto-purge soft-deleted resources and retry once. + if "FlagMustBeSetForRestore" in stderr: + purge_result = self._purge_soft_deleted_and_retry( + req, deploy_name, params_json, steps, + ) + if purge_result is not None: + steps.ok("bicep_deploy", detail="Deployment succeeded after purge + retry") + return purge_result if isinstance(purge_result, dict) else {} + result = self._az.json( "deployment", "group", "show", "--resource-group", req.resource_group, @@ -557,8 +578,7 @@ def _run_bicep( quiet=True, ) if result is None: - steps.append({"step": "bicep_deploy", "status": "failed", - "detail": stderr[:500]}) + steps.fail("bicep_deploy", detail=stderr[:500]) logger.error("Bicep deployment failed: %s", stderr) return None @@ -568,23 +588,107 @@ def _run_bicep( if isinstance(result, dict): result = result.get("properties", result).get("outputs", result) - steps.append({"step": "bicep_deploy", "status": "ok", - "detail": "Deployment succeeded"}) + steps.ok("bicep_deploy", detail="Deployment succeeded") return result if isinstance(result, dict) else {} + def _purge_soft_deleted_and_retry( + self, + req: BicepDeployRequest, + deploy_name: str, + params_json: str, + steps: StepTracker, + ) -> dict[str, Any] | None: + """Purge soft-deleted Cognitive Services resources and retry deploy.""" + import re + import time + + stderr = self._az.last_stderr or "" + # Extract resource names from the error message. + pattern = r"accounts/([^\s'\"]+)" + matches = re.findall(pattern, stderr) + if not matches: + logger.warning("[bicep.deploy] FlagMustBeSetForRestore but cannot parse resource name") + return None + + # List soft-deleted resources to get location info. + deleted = self._az.json( + "cognitiveservices", "account", "list-deleted", quiet=True, + ) + if not isinstance(deleted, list): + return None + + purged = False + for item in deleted: + name = item.get("name", "") + if name not in matches: + continue + rg = item.get("resourceGroup") or self._rg_from_id(item.get("id", "")) + loc = item.get("location", "") + logger.info("[bicep.deploy] purging soft-deleted resource: %s (rg=%s)", name, rg) + self._az.ok( + "cognitiveservices", "account", "purge", + "--name", name, + "--resource-group", rg, + "--location", loc, + ) + purged = True + + if not purged: + return None + + steps.ok("purge_soft_deleted", detail="Purged resources: %s" % ", ".join(matches)) + + # Wait for purge to propagate (up to 90 s). + deadline = time.monotonic() + 90 + while time.monotonic() < deadline: + time.sleep(10) + still = self._az.json( + "cognitiveservices", "account", "list-deleted", quiet=True, + ) + if not isinstance(still, list): + break + remaining = [d.get("name", "") for d in still if d.get("name", "") in matches] + if not remaining: + logger.info("[bicep.deploy] purge confirmed, retrying deployment") + break + logger.info("[bicep.deploy] waiting for purge: %s", remaining) + + # Retry the deployment. + result = self._az.json( + "deployment", "group", "create", + "--resource-group", req.resource_group, + "--name", deploy_name, + "--template-file", str(_BICEP_TEMPLATE), + "--parameters", params_json, + ) + if isinstance(result, dict): + result = result.get("properties", result).get("outputs", result) + return result + return None + + @staticmethod + def _rg_from_id(resource_id: str) -> str: + """Extract resource group from a soft-deleted resource's ID.""" + parts = resource_id.split("/") + for i, part in enumerate(parts): + if part.lower() == "resourcegroups" and i + 1 < len(parts): + return parts[i + 1] + return "" + def _persist( self, req: BicepDeployRequest, result: BicepDeployResult, deploy_id: str, - steps: list[dict], + steps: StepTracker, runtime_sp: dict[str, str] | None = None, ) -> None: """Write deployment outputs to .env and the deploy state store.""" env_vars: dict[str, str] = {} - if result.foundry_endpoint: - env_vars.update({ + # Mapping: (condition, env_key_to_value_pairs) + _ENV_BLOCKS: list[tuple[bool, dict[str, str]]] = [ + (bool(result.foundry_endpoint), { "FOUNDRY_ENDPOINT": result.foundry_endpoint, "FOUNDRY_NAME": result.foundry_name, "FOUNDRY_RESOURCE_GROUP": req.resource_group, @@ -592,53 +696,51 @@ def _persist( result.deployed_models[0] if result.deployed_models else "gpt-4.1" ), "DEPLOYED_MODELS": ",".join(result.deployed_models), - }) - if result.key_vault_url: - env_vars.update({ + }), + (bool(result.key_vault_url), { "KEY_VAULT_URL": result.key_vault_url, "KEY_VAULT_NAME": result.key_vault_name, "KEY_VAULT_RG": req.resource_group, - }) - if runtime_sp: - env_vars.update({ - "RUNTIME_SP_APP_ID": runtime_sp["app_id"], - "RUNTIME_SP_PASSWORD": runtime_sp["password"], - "RUNTIME_SP_TENANT": runtime_sp["tenant"], - }) - if result.content_safety_endpoint: - env_vars.update({ + }), + (bool(runtime_sp), { + "RUNTIME_SP_APP_ID": (runtime_sp or {}).get("app_id", ""), + "RUNTIME_SP_PASSWORD": (runtime_sp or {}).get("password", ""), + "RUNTIME_SP_TENANT": (runtime_sp or {}).get("tenant", ""), + }), + (bool(result.content_safety_endpoint), { "CONTENT_SAFETY_ENDPOINT": result.content_safety_endpoint, "CONTENT_SAFETY_NAME": result.content_safety_name, - }) - if result.search_endpoint: - env_vars.update({ + }), + (bool(result.search_endpoint), { "SEARCH_ENDPOINT": result.search_endpoint, "SEARCH_NAME": result.search_name, - }) - if result.embedding_aoai_endpoint: - env_vars.update({ + }), + (bool(result.embedding_aoai_endpoint), { "EMBEDDING_AOAI_ENDPOINT": result.embedding_aoai_endpoint, "EMBEDDING_AOAI_NAME": result.embedding_aoai_name, "EMBEDDING_DEPLOYMENT_NAME": result.embedding_deployment_name, - }) - if result.app_insights_connection_string: - env_vars.update({ + }), + (bool(result.app_insights_connection_string), { "APP_INSIGHTS_CONNECTION_STRING": result.app_insights_connection_string, "APP_INSIGHTS_NAME": result.app_insights_name, "LOG_ANALYTICS_WORKSPACE_NAME": result.log_analytics_workspace_name, - }) - if result.session_pool_endpoint: - env_vars.update({ + }), + (bool(result.session_pool_endpoint), { "SESSION_POOL_ENDPOINT": result.session_pool_endpoint, "SESSION_POOL_ID": result.session_pool_id, "SESSION_POOL_NAME": result.session_pool_name, - }) - if result.acs_name: - env_vars["ACS_RESOURCE_NAME"] = result.acs_name + }), + (bool(result.acs_name), { + "ACS_RESOURCE_NAME": result.acs_name, + }), + ] + for enabled, block in _ENV_BLOCKS: + if enabled: + env_vars.update(block) if env_vars: cfg.write_env(**env_vars) - steps.append({"step": "persist_env", "status": "ok"}) + steps.ok("persist_env") # Auto-configure feature stores from deployment outputs self._configure_stores(req, result, steps) @@ -690,135 +792,106 @@ def _persist( )) self._store.register(rec) - steps.append({"step": "persist_state", "status": "ok"}) + steps.ok("persist_state") def _configure_stores( self, req: BicepDeployRequest, result: BicepDeployResult, - steps: list[dict], + steps: StepTracker, ) -> None: - """Auto-configure feature JSON stores from Bicep outputs. - - After a one-click deploy the features should be immediately usable - without any manual configuration steps in the admin GUI. - """ - # -- Content Safety / Prompt Shields ---------------------------------- - if result.content_safety_endpoint: - try: - from ...state.guardrails.config import get_guardrails_config - gs = get_guardrails_config() - gs.set_content_safety_endpoint(result.content_safety_endpoint) - gs.set_filter_mode("prompt_shields") - steps.append({ - "step": "configure_content_safety", "status": "ok", - "detail": result.content_safety_endpoint, - }) - except Exception as exc: - logger.warning("[bicep.configure] content safety: %s", exc, exc_info=True) - steps.append({ - "step": "configure_content_safety", "status": "failed", - "detail": str(exc)[:200], - }) - - # -- Foundry IQ (Azure AI Search + Embedding) ------------------------- - if result.search_endpoint and result.embedding_aoai_endpoint: - try: - self._configure_foundry_iq(req, result, steps) - except Exception as exc: - logger.warning("[bicep.configure] foundry_iq: %s", exc, exc_info=True) - steps.append({ - "step": "configure_foundry_iq", "status": "failed", - "detail": str(exc)[:200], - }) - - # -- Monitoring (App Insights + OTel) --------------------------------- - if result.app_insights_connection_string: - try: - from ...state.monitoring_config import get_monitoring_config - account = self._az.account_info() - sub_id = account.get("id", "") if account else "" - ms = get_monitoring_config() - ms.set_provisioned_metadata( - app_insights_name=result.app_insights_name, - workspace_name=result.log_analytics_workspace_name, - resource_group=req.resource_group, - location=req.location, - connection_string=result.app_insights_connection_string, - subscription_id=sub_id, - ) - steps.append({ - "step": "configure_monitoring", "status": "ok", - "detail": result.app_insights_name, - }) - except Exception as exc: - logger.warning("[bicep.configure] monitoring: %s", exc, exc_info=True) - steps.append({ - "step": "configure_monitoring", "status": "failed", - "detail": str(exc)[:200], - }) - - # -- Sandbox (Session Pool) ------------------------------------------- - if result.session_pool_endpoint: - try: - from ...state.sandbox_config import get_sandbox_config - ss = get_sandbox_config() - ss.set_pool_metadata( - resource_group=req.resource_group, - location=req.location, - pool_name=result.session_pool_name, - pool_id=result.session_pool_id, - endpoint=result.session_pool_endpoint, - ) - steps.append({ - "step": "configure_session_pool", "status": "ok", - "detail": result.session_pool_name, - }) - except Exception as exc: - logger.warning("[bicep.configure] session pool: %s", exc, exc_info=True) - steps.append({ - "step": "configure_session_pool", "status": "failed", - "detail": str(exc)[:200], - }) - - # -- Voice / ACS ------------------------------------------------------- - if result.acs_name: + """Auto-configure feature JSON stores from Bicep outputs.""" + configs: list[tuple[bool, str, str, Any]] = [ + (bool(result.content_safety_endpoint), + "configure_content_safety", result.content_safety_endpoint, + lambda: self._cfg_content_safety(result)), + (bool(result.search_endpoint and result.embedding_aoai_endpoint), + "configure_foundry_iq", "", + lambda: self._configure_foundry_iq(req, result, steps)), + (bool(result.app_insights_connection_string), + "configure_monitoring", result.app_insights_name, + lambda: self._cfg_monitoring(req, result)), + (bool(result.session_pool_endpoint), + "configure_session_pool", result.session_pool_name, + lambda: self._cfg_sandbox(req, result)), + (bool(result.acs_name), + "configure_acs", result.acs_name, + lambda: self._cfg_acs(req, result)), + ] + for enabled, step_name, detail, fn in configs: + if not enabled: + continue + # foundry_iq manages its own steps + if step_name == "configure_foundry_iq": + try: + fn() + except Exception as exc: + logger.warning("[bicep.configure] foundry_iq: %s", exc, exc_info=True) + steps.fail(step_name, detail=str(exc)[:200]) + continue try: - from ...state.infra_config import get_infra_config - # Fetch the ACS connection string for voice calling - keys = self._az.json( - "communication", "list-key", - "--name", result.acs_name, - "--resource-group", req.resource_group, - quiet=True, - ) - conn_string = ( - keys.get("primaryConnectionString", "") - if isinstance(keys, dict) else "" - ) - infra = get_infra_config() - infra.save_voice_call( - acs_resource_name=result.acs_name, - acs_connection_string=conn_string, - resource_group=req.resource_group, - location=req.location, - ) - steps.append({ - "step": "configure_acs", "status": "ok", - "detail": result.acs_name, - }) + fn() + steps.ok(step_name, detail=detail) except Exception as exc: - logger.warning("[bicep.configure] acs: %s", exc, exc_info=True) - steps.append({ - "step": "configure_acs", "status": "failed", - "detail": str(exc)[:200], - }) + logger.warning("[bicep.configure] %s: %s", step_name, exc, exc_info=True) + steps.fail(step_name, detail=str(exc)[:200]) + + def _cfg_content_safety(self, result: BicepDeployResult) -> None: + from ...state.guardrails.config import get_guardrails_config + gs = get_guardrails_config() + gs.set_content_safety_endpoint(result.content_safety_endpoint) + gs.set_filter_mode("prompt_shields") + + def _cfg_monitoring(self, req: BicepDeployRequest, result: BicepDeployResult) -> None: + from ...state.monitoring_config import get_monitoring_config + account = self._az.account_info() + sub_id = account.get("id", "") if account else "" + ms = get_monitoring_config() + ms.set_provisioned_metadata( + app_insights_name=result.app_insights_name, + workspace_name=result.log_analytics_workspace_name, + resource_group=req.resource_group, + location=req.location, + connection_string=result.app_insights_connection_string, + subscription_id=sub_id, + ) + + def _cfg_sandbox(self, req: BicepDeployRequest, result: BicepDeployResult) -> None: + from ...state.sandbox_config import get_sandbox_config + ss = get_sandbox_config() + ss.set_pool_metadata( + resource_group=req.resource_group, + location=req.location, + pool_name=result.session_pool_name, + pool_id=result.session_pool_id, + endpoint=result.session_pool_endpoint, + ) + + def _cfg_acs(self, req: BicepDeployRequest, result: BicepDeployResult) -> None: + from ...state.infra_config import get_infra_config + keys = self._az.json( + "communication", "list-key", + "--name", result.acs_name, + "--resource-group", req.resource_group, + quiet=True, + ) + conn_string = ( + keys.get("primaryConnectionString", "") + if isinstance(keys, dict) else "" + ) + infra = get_infra_config() + infra.save_voice_call( + acs_resource_name=result.acs_name, + acs_connection_string=conn_string, + resource_group=req.resource_group, + location=req.location, + ) def _configure_foundry_iq( self, req: BicepDeployRequest, result: BicepDeployResult, - steps: list[dict], + steps: StepTracker, ) -> None: """Wire up Azure AI Search + Embedding AOAI for Foundry IQ.""" from ...state.foundry_iq_config import get_foundry_iq_config @@ -845,23 +918,16 @@ def _configure_foundry_iq( provisioned=True, enabled=True, ) - steps.append({ - "step": "configure_foundry_iq", "status": "ok", - "detail": "search=%s aoai=%s" % (result.search_name, result.embedding_aoai_name), - }) + steps.ok("configure_foundry_iq", + detail="search=%s aoai=%s" % (result.search_name, result.embedding_aoai_name)) # Create the search index try: from ..foundry_iq import ensure_index idx_result = ensure_index(fiq) idx_ok = idx_result.get("status") == "ok" - steps.append({ - "step": "create_search_index", - "status": "ok" if idx_ok else "warning", - "detail": idx_result.get("detail", ""), - }) + steps.append({"step": "create_search_index", + "status": "ok" if idx_ok else "warning", + "detail": idx_result.get("detail", "")}) except Exception as exc: - steps.append({ - "step": "create_search_index", "status": "warning", - "detail": str(exc)[:200], - }) + steps.warning("create_search_index", detail=str(exc)[:200]) diff --git a/app/runtime/services/deployment/deployer.py b/app/runtime/services/deployment/deployer.py index 035f576..e89c76d 100644 --- a/app/runtime/services/deployment/deployer.py +++ b/app/runtime/services/deployment/deployer.py @@ -60,19 +60,12 @@ def deploy(self, req: DeployRequest) -> DeployResult: ) try: - logger.info("Step 1/4: Creating resource group '%s' in '%s'...", req.resource_group, req.location) - if not self._create_resource_group(req.resource_group, req.location, steps): - return DeployResult(ok=False, steps=steps, error=f"Resource group failed: {self._az.last_stderr}") - - logger.info("Step 2/4: Registering app '%s'...", req.display_name) - app_id = self._register_app(req.display_name, steps) - if not app_id: - return DeployResult(ok=False, steps=steps, error=f"App registration failed: {self._az.last_stderr}") - - logger.info("Step 3/4: Creating credentials for app %s...", app_id) - password, tenant_id = self._create_credentials(app_id, steps) - if not password: - return DeployResult(ok=False, steps=steps, error=f"Credential reset failed: {self._az.last_stderr}") + result = self._provision_app(req, steps) + if not result: + return result + app_id = result.app_id or "" + password = result._password + tenant_id = result._tenant_id logger.info("Step 4/4: Creating bot resource '%s'...", handle) actual_handle = self._create_bot_resource( @@ -127,33 +120,14 @@ def register_app(self, req: DeployRequest) -> DeployResult: ) try: - logger.info("Step 1/3: Creating resource group '%s' in '%s'...", - req.resource_group, req.location) - if not self._create_resource_group(req.resource_group, req.location, steps): - return DeployResult( - ok=False, steps=steps, - error=f"Resource group failed: {self._az.last_stderr}", - ) - - logger.info("Step 2/3: Registering app '%s'...", req.display_name) - app_id = self._register_app(req.display_name, steps) - if not app_id: - return DeployResult( - ok=False, steps=steps, - error=f"App registration failed: {self._az.last_stderr}", - ) - - logger.info("Step 3/3: Creating credentials for app %s...", app_id) - password, tenant_id = self._create_credentials(app_id, steps) - if not password: - return DeployResult( - ok=False, steps=steps, - error=f"Credential reset failed: {self._az.last_stderr}", - ) + result = self._provision_app(req, steps) + if not result: + return result + app_id = result.app_id or "" cfg.write_env( - BOT_APP_ID=app_id, BOT_APP_PASSWORD=password, - BOT_APP_TENANT_ID=tenant_id, + BOT_APP_ID=app_id, BOT_APP_PASSWORD=result._password, + BOT_APP_TENANT_ID=result._tenant_id, BOT_RESOURCE_GROUP=req.resource_group, ) @@ -179,6 +153,40 @@ def register_app(self, req: DeployRequest) -> DeployResult: self._az.ok("ad", "app", "delete", "--id", app_id) raise + def _provision_app( + self, req: DeployRequest, steps: list[dict[str, Any]], + ) -> DeployResult: + """Create resource group, register Entra app, and generate credentials. + + On success the returned ``DeployResult`` carries ``app_id`` and + internal ``_password`` / ``_tenant_id`` fields. On failure + ``result.ok`` is ``False``. + """ + if not self._create_resource_group(req.resource_group, req.location, steps): + return DeployResult( + ok=False, steps=steps, + error=f"Resource group failed: {self._az.last_stderr}", + ) + + app_id = self._register_app(req.display_name, steps) + if not app_id: + return DeployResult( + ok=False, steps=steps, + error=f"App registration failed: {self._az.last_stderr}", + ) + + password, tenant_id = self._create_credentials(app_id, steps) + if not password: + return DeployResult( + ok=False, steps=steps, + error=f"Credential reset failed: {self._az.last_stderr}", + ) + + result = DeployResult(ok=True, steps=steps, app_id=app_id) + result._password = password # type: ignore[attr-defined] + result._tenant_id = tenant_id # type: ignore[attr-defined] + return result + def delete(self) -> DeployResult: rg = cfg.env.read("BOT_RESOURCE_GROUP") name = cfg.env.read("BOT_NAME") diff --git a/app/runtime/services/deployment/provisioner.py b/app/runtime/services/deployment/provisioner.py index d83fd30..6eefc16 100644 --- a/app/runtime/services/deployment/provisioner.py +++ b/app/runtime/services/deployment/provisioner.py @@ -9,8 +9,9 @@ from ...state.deploy_state import DeploymentRecord, DeployStateStore from ...state.infra_config import InfraConfigStore from ..cloud.azure import AzureCLI -from .deployer import BotDeployer, DeployRequest from ..cloud.runtime_identity import RuntimeIdentityProvisioner +from ._models import StepTracker +from .deployer import BotDeployer, DeployRequest logger = logging.getLogger(__name__) @@ -41,14 +42,14 @@ def provision(self) -> list[dict[str, Any]]: container creates it at startup when a messaging channel (e.g. Telegram) is configured. See :meth:`recreate_endpoint`. """ - steps: list[dict[str, Any]] = [] + steps = StepTracker() bc = self._store.bot logger.info("Provisioning started") if not self._store.bot_configured: logger.info("No bot configured -- skipping provisioning") - steps.append({"step": "bot_config", "status": "skip", "detail": "No bot configured"}) - return steps + steps.skip("bot_config", detail="No bot configured") + return steps.to_list() if self._deploy_store: rec = self._deploy_store.current_local() @@ -62,17 +63,17 @@ def provision(self) -> list[dict[str, Any]]: logger.info("Provision step 1/2: Registering Entra ID app...") if not self._ensure_app_registration(bc, steps): logger.error("Provisioning aborted: app registration failed") - return steps + return steps.to_list() # Step 2: Provision scoped identity for the agent container. logger.info("Provision step 2/2: Provisioning runtime identity...") self._ensure_runtime_identity(bc.resource_group, steps) logger.info("Provisioning completed: %d steps", len(steps)) - return steps + return steps.to_list() def _ensure_app_registration( - self, bc: Any, steps: list[dict], + self, bc: Any, steps: StepTracker, ) -> bool: """Create or re-use the Entra ID app registration (no bot service).""" req = DeployRequest( @@ -83,37 +84,27 @@ def _ensure_app_registration( ) result = self._deployer.register_app(req) steps.extend(result.steps) - if result.ok: - steps.append({ - "step": "app_registration", - "status": "ok", - "detail": result.app_id, - }) - else: - steps.append({ - "step": "app_registration", - "status": "failed", - "detail": result.error, - }) + steps.record("app_registration", ok=result.ok, + detail=result.app_id if result.ok else result.error) return result.ok - def _ensure_channels(self, steps: list[dict]) -> None: + def _ensure_channels(self, steps: StepTracker) -> None: tg = self._store.channels.telegram if tg.token: tok_ok, tok_detail = self._az.validate_telegram_token(tg.token) if not tok_ok: - steps.append({"step": "telegram_validate", "status": "failed", "detail": tok_detail}) + steps.fail("telegram_validate", detail=tok_detail) return - steps.append({"step": "telegram_validate", "status": "ok", "detail": tok_detail}) + steps.ok("telegram_validate", detail=tok_detail) # Pass validated_name so configure_telegram skips a redundant API call. ok, msg = self._az.configure_telegram(tg.token, validated_name=tok_detail) - steps.append({"step": "telegram_channel", "status": "ok" if ok else "failed", "detail": msg}) + steps.record("telegram_channel", ok=ok, detail=msg) if ok and tg.whitelist: cfg.write_env(TELEGRAM_WHITELIST=tg.whitelist) else: - steps.append({"step": "telegram", "status": "skip", "detail": "Not configured"}) + steps.skip("telegram", detail="Not configured") - def _ensure_runtime_identity(self, resource_group: str, steps: list[dict]) -> None: + def _ensure_runtime_identity(self, resource_group: str, steps: StepTracker) -> None: """Provision a scoped identity for the agent runtime container. Uses a service principal for Docker Compose deployments. ACA @@ -122,36 +113,20 @@ def _ensure_runtime_identity(self, resource_group: str, steps: list[dict]) -> No """ # Skip if a managed identity is already set (ACA deployment) if cfg.env.read("ACA_MI_CLIENT_ID"): - steps.append({ - "step": "runtime_identity", - "status": "skip", - "detail": "Managed identity already configured (ACA)", - }) + steps.skip("runtime_identity", + detail="Managed identity already configured (ACA)") return try: result = self._runtime_identity.provision(resource_group) sub_steps = result.get("steps", []) steps.extend(sub_steps) - if result.get("ok"): - steps.append({ - "step": "runtime_identity", - "status": "ok", - "detail": f"SP {result.get('app_id')} scoped to {resource_group}", - }) - else: - steps.append({ - "step": "runtime_identity", - "status": "failed", - "detail": result.get("error", "Unknown error"), - }) + steps.record("runtime_identity", ok=bool(result.get("ok")), + detail=f"SP {result.get('app_id')} scoped to {resource_group}" + if result.get("ok") else result.get("error", "Unknown error")) except Exception as exc: logger.warning("Runtime identity provisioning failed (non-fatal): %s", exc, exc_info=True) - steps.append({ - "step": "runtime_identity", - "status": "failed", - "detail": str(exc), - }) + steps.fail("runtime_identity", detail=str(exc)) def recreate_endpoint(self, endpoint_url: str) -> list[dict[str, Any]]: """Recreate the bot resource with a new messaging endpoint. @@ -160,28 +135,27 @@ def recreate_endpoint(self, endpoint_url: str) -> list[dict[str, Any]]: touches the Bot Service ARM resource and reconfigures channels. The Entra ID app registration and credentials are preserved. """ - steps: list[dict[str, Any]] = [] + steps = StepTracker() logger.info("recreate_endpoint: endpoint=%s", endpoint_url) if not self._store.bot_configured: - steps.append({"step": "bot_config", "status": "skip", - "detail": "No bot configured"}) - return steps + steps.skip("bot_config", detail="No bot configured") + return steps.to_list() result = self._deployer.recreate(endpoint_url) steps.extend(result.steps) if not result.ok: logger.error("recreate_endpoint: bot recreate failed: %s", result.error) - return steps + return steps.to_list() # Reconfigure channels (Telegram, etc.) on the fresh bot resource self._ensure_channels(steps) logger.info("recreate_endpoint: completed -- %d steps", len(steps)) - return steps + return steps.to_list() def decommission(self) -> list[dict[str, Any]]: - steps: list[dict[str, Any]] = [] + steps = StepTracker() logger.info("Decommissioning started") rg = cfg.env.read("BOT_RESOURCE_GROUP") @@ -196,28 +170,22 @@ def decommission(self) -> list[dict[str, Any]]: bot_exists = self._az.json("bot", "show", "--resource-group", rg, "--name", name) is not None if bot_exists and self._store.telegram_configured: ok, msg = self._az.remove_channel("telegram") - steps.append({"step": "telegram_remove", "status": "ok" if ok else "failed", "detail": msg}) + steps.record("telegram_remove", ok=ok, detail=msg) elif not bot_exists: - steps.append({"step": "telegram_remove", "status": "skip", "detail": "Bot resource not found"}) + steps.skip("telegram_remove", detail="Bot resource not found") if name: result = self._deployer.delete() steps.extend(result.steps) - steps.append({ - "step": "bot_delete", - "status": "ok" if result.ok else "failed", - "detail": "Bot deleted" if result.ok else (result.error or "Failed"), - }) + steps.record("bot_delete", ok=result.ok, + detail="Bot deleted" if result.ok else (result.error or "Failed")) elif app_id: # Entra app exists but agent hasn't created the bot service yet. ok, _ = self._az.ok("ad", "app", "delete", "--id", app_id) - steps.append({ - "step": "app_delete", - "status": "ok" if ok else "failed", - "detail": f"Deleted Entra app {app_id[:12]}..." if ok else "Delete failed", - }) + steps.record("app_delete", ok=ok, + detail=f"Deleted Entra app {app_id[:12]}..." if ok else "Delete failed") else: - steps.append({"step": "bot_delete", "status": "skip", "detail": "No bot deployed"}) + steps.skip("bot_delete", detail="No bot deployed") voice_rg = self._store.channels.voice_call.voice_resource_group or "" prereq_rg = cfg.env.read("KEY_VAULT_RG") or "" @@ -232,14 +200,16 @@ def decommission(self) -> list[dict[str, Any]]: reason.append("prerequisites") label = " & ".join(reason) logger.info("Skipping RG deletion: %s is the %s resource group", rg, label) - steps.append({"step": "resource_group_delete", "status": "skip", "detail": f"{rg} is the {label} RG -- not deleting"}) + steps.skip("resource_group_delete", + detail=f"{rg} is the {label} RG -- not deleting") else: rg_exists = self._az.json("group", "show", "--name", rg) is not None if rg_exists: ok, msg = self._az.ok("group", "delete", "--name", rg, "--yes", "--no-wait") - steps.append({"step": "resource_group_delete", "status": "ok" if ok else "failed", "detail": f"Deleting {rg}" if ok else msg}) + steps.record("resource_group_delete", ok=ok, + detail=f"Deleting {rg}" if ok else msg) else: - steps.append({"step": "resource_group_delete", "status": "skip", "detail": "RG not found"}) + steps.skip("resource_group_delete", detail="RG not found") cfg.write_env( BOT_APP_ID="", BOT_APP_PASSWORD="", BOT_APP_TENANT_ID="", @@ -252,7 +222,7 @@ def decommission(self) -> list[dict[str, Any]]: rec.mark_stopped() self._deploy_store.update(rec) - return steps + return steps.to_list() def status(self) -> dict[str, Any]: result: dict[str, Any] = { diff --git a/app/runtime/services/otel.py b/app/runtime/services/otel.py index cd29744..c594557 100644 --- a/app/runtime/services/otel.py +++ b/app/runtime/services/otel.py @@ -99,17 +99,13 @@ def shutdown_otel() -> None: from opentelemetry import trace, metrics from opentelemetry._logs import get_logger_provider - tp = trace.get_tracer_provider() - if hasattr(tp, "shutdown"): - tp.shutdown() - - mp = metrics.get_meter_provider() - if hasattr(mp, "shutdown"): - mp.shutdown() - - lp = get_logger_provider() - if hasattr(lp, "shutdown"): - lp.shutdown() + for provider in ( + trace.get_tracer_provider(), + metrics.get_meter_provider(), + get_logger_provider(), + ): + if hasattr(provider, "shutdown"): + provider.shutdown() _otel_active = False logger.info("[otel.shutdown] OpenTelemetry providers shut down") diff --git a/app/runtime/services/resource_tracker.py b/app/runtime/services/resource_tracker.py index aab7819..4e16b85 100644 --- a/app/runtime/services/resource_tracker.py +++ b/app/runtime/services/resource_tracker.py @@ -247,17 +247,16 @@ def cleanup_orphan_group(self, rg: str) -> tuple[bool, str]: return self.delete_resource_group(rg) def to_dict(self, audit_result: AuditResult) -> dict[str, Any]: + from dataclasses import asdict + + def _resource_fields(r: AzureResource) -> dict[str, Any]: + d = asdict(r) + d.pop("tags", None) + return d + return { - "tracked_resources": [ - {"id": r.id, "name": r.name, "resource_group": r.resource_group, - "resource_type": r.resource_type, "location": r.location, "deploy_tag": r.deploy_tag} - for r in audit_result.tracked_resources - ], - "orphaned_resources": [ - {"id": r.id, "name": r.name, "resource_group": r.resource_group, - "resource_type": r.resource_type, "location": r.location, "deploy_tag": r.deploy_tag} - for r in audit_result.orphaned_resources - ], + "tracked_resources": [_resource_fields(r) for r in audit_result.tracked_resources], + "orphaned_resources": [_resource_fields(r) for r in audit_result.orphaned_resources], "orphaned_groups": [ {"name": g.name, "location": g.location, "deploy_tag": g.deploy_tag} for g in audit_result.orphaned_groups diff --git a/app/runtime/services/security/misconfig_checker.py b/app/runtime/services/security/misconfig_checker.py index 9e7cdb7..a452092 100644 --- a/app/runtime/services/security/misconfig_checker.py +++ b/app/runtime/services/security/misconfig_checker.py @@ -63,138 +63,134 @@ def check_all(self, resource_groups: list[str]) -> CheckResult: self._check_acr(rg, rname, result) return result + @staticmethod + def _assert( + result: CheckResult, fail: bool, *, + severity: str, category: str, rtype: str, + rg: str, name: str, title: str, detail: str, + recommendation: str = "", + ) -> None: + """Record a pass or fail finding on *result*.""" + if fail: + result.findings.append(Finding( + severity=severity, category=category, resource_name=name, + resource_group=rg, resource_type=rtype, title=title, + detail=detail, recommendation=recommendation, + )) + result.checks_failed += 1 + else: + result.checks_passed += 1 + def _check_storage_account(self, rg: str, name: str, result: CheckResult) -> None: info = self._az.json("storage", "account", "show", "--name", name, "--resource-group", rg) if not isinstance(info, dict): return props = info.get("properties", info) + kw = dict(category="storage", rtype="Storage Account", rg=rg, name=name) - if props.get("allowBlobPublicAccess", True): - result.findings.append(Finding( - severity="high", category="storage", resource_name=name, resource_group=rg, - resource_type="Storage Account", title="Public blob access enabled", - detail=f"Storage account '{name}' allows public access to blobs.", - recommendation=f"az storage account update --name {name} --resource-group {rg} --allow-blob-public-access false", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 + self._assert( + result, props.get("allowBlobPublicAccess", True), + severity="high", title="Public blob access enabled", + detail=f"Storage account '{name}' allows public access to blobs.", + recommendation=f"az storage account update --name {name} --resource-group {rg} --allow-blob-public-access false", + **kw, + ) https_only = info.get("enableHttpsTrafficOnly", props.get("supportsHttpsTrafficOnly", True)) - if not https_only: - result.findings.append(Finding( - severity="high", category="storage", resource_name=name, resource_group=rg, - resource_type="Storage Account", title="HTTP traffic allowed (HTTPS not enforced)", - detail=f"Storage account '{name}' allows non-HTTPS traffic.", - recommendation=f"az storage account update --name {name} --resource-group {rg} --https-only true", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 + self._assert( + result, not https_only, + severity="high", title="HTTP traffic allowed (HTTPS not enforced)", + detail=f"Storage account '{name}' allows non-HTTPS traffic.", + recommendation=f"az storage account update --name {name} --resource-group {rg} --https-only true", + **kw, + ) net_rules = props.get("networkRuleSet", props.get("networkAcls", {})) default_action = (net_rules.get("defaultAction") or "Allow").lower() - if default_action == "allow": - result.findings.append(Finding( - severity="medium", category="storage", resource_name=name, resource_group=rg, - resource_type="Storage Account", title="Network access not restricted", - detail=f"Storage account '{name}' allows access from all networks.", - recommendation=f"az storage account update --name {name} --resource-group {rg} --default-action Deny", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 + self._assert( + result, default_action == "allow", + severity="medium", title="Network access not restricted", + detail=f"Storage account '{name}' allows access from all networks.", + recommendation=f"az storage account update --name {name} --resource-group {rg} --default-action Deny", + **kw, + ) min_tls = props.get("minimumTlsVersion", "TLS1_0") - if min_tls in ("TLS1_0", "TLS1_1"): - result.findings.append(Finding( - severity="medium", category="storage", resource_name=name, resource_group=rg, - resource_type="Storage Account", title=f"Weak minimum TLS version ({min_tls})", - detail=f"Storage account '{name}' allows {min_tls} connections.", - recommendation=f"az storage account update --name {name} --resource-group {rg} --min-tls-version TLS1_2", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 + self._assert( + result, min_tls in ("TLS1_0", "TLS1_1"), + severity="medium", title=f"Weak minimum TLS version ({min_tls})", + detail=f"Storage account '{name}' allows {min_tls} connections.", + recommendation=f"az storage account update --name {name} --resource-group {rg} --min-tls-version TLS1_2", + **kw, + ) def _check_keyvault(self, rg: str, name: str, result: CheckResult) -> None: info = self._az.json("keyvault", "show", "--name", name, "--resource-group", rg) if not isinstance(info, dict): return props = info.get("properties", info) + kw = dict(category="keyvault", rtype="Key Vault", rg=rg, name=name) - if not props.get("enableRbacAuthorization", False): - result.findings.append(Finding( - severity="high", category="keyvault", resource_name=name, resource_group=rg, - resource_type="Key Vault", title="RBAC authorization not enabled", - detail=f"Key Vault '{name}' uses access policies instead of RBAC.", - recommendation=f"az keyvault update --name {name} --resource-group {rg} --enable-rbac-authorization true", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 + self._assert( + result, not props.get("enableRbacAuthorization", False), + severity="high", title="RBAC authorization not enabled", + detail=f"Key Vault '{name}' uses access policies instead of RBAC.", + recommendation=f"az keyvault update --name {name} --resource-group {rg} --enable-rbac-authorization true", + **kw, + ) soft_delete = props.get("enableSoftDelete", False) purge_protect = props.get("enablePurgeProtection", False) if not soft_delete: - result.findings.append(Finding( - severity="medium", category="keyvault", resource_name=name, resource_group=rg, - resource_type="Key Vault", title="Soft delete not enabled", + self._assert( + result, True, severity="medium", + title="Soft delete not enabled", detail=f"Key Vault '{name}' does not have soft delete enabled.", recommendation="Enable soft delete (default for new vaults).", - )) - result.checks_failed += 1 + **kw, + ) elif not purge_protect: - result.findings.append(Finding( - severity="low", category="keyvault", resource_name=name, resource_group=rg, - resource_type="Key Vault", title="Purge protection not enabled", + self._assert( + result, True, severity="low", + title="Purge protection not enabled", detail=f"Key Vault '{name}' has soft delete but not purge protection.", recommendation=f"az keyvault update --name {name} --enable-purge-protection true", - )) - result.checks_failed += 1 + **kw, + ) else: result.checks_passed += 1 net_acls = props.get("networkAcls", {}) network_default = (net_acls.get("defaultAction") or "Allow").lower() public_access = props.get("publicNetworkAccess", "Enabled") - if network_default == "allow" and public_access != "Disabled": - result.findings.append(Finding( - severity="medium", category="keyvault", resource_name=name, resource_group=rg, - resource_type="Key Vault", title="Public network access not restricted", - detail=f"Key Vault '{name}' is accessible from all networks.", - recommendation="Restrict network access or use private endpoints.", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 + self._assert( + result, network_default == "allow" and public_access != "Disabled", + severity="medium", title="Public network access not restricted", + detail=f"Key Vault '{name}' is accessible from all networks.", + recommendation="Restrict network access or use private endpoints.", + **kw, + ) def _check_acr(self, rg: str, name: str, result: CheckResult) -> None: info = self._az.json("acr", "show", "--name", name, "--resource-group", rg) if not isinstance(info, dict): return - - if info.get("adminUserEnabled", False): - result.findings.append(Finding( - severity="medium", category="acr", resource_name=name, resource_group=rg, - resource_type="Container Registry", title="Admin user enabled", - detail=f"Container Registry '{name}' has the admin user enabled.", - recommendation=f"az acr update --name {name} --admin-enabled false", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 - - if info.get("publicNetworkAccess", "Enabled") == "Enabled": - result.findings.append(Finding( - severity="low", category="acr", resource_name=name, resource_group=rg, - resource_type="Container Registry", title="Public network access enabled", - detail=f"Container Registry '{name}' is accessible from the public internet.", - recommendation="Consider restricting via firewall rules or private endpoints.", - )) - result.checks_failed += 1 - else: - result.checks_passed += 1 + kw = dict(category="acr", rtype="Container Registry", rg=rg, name=name) + + self._assert( + result, info.get("adminUserEnabled", False), + severity="medium", title="Admin user enabled", + detail=f"Container Registry '{name}' has the admin user enabled.", + recommendation=f"az acr update --name {name} --admin-enabled false", + **kw, + ) + self._assert( + result, info.get("publicNetworkAccess", "Enabled") == "Enabled", + severity="low", title="Public network access enabled", + detail=f"Container Registry '{name}' is accessible from the public internet.", + recommendation="Consider restricting via firewall rules or private endpoints.", + **kw, + ) @staticmethod def to_dict(result: CheckResult) -> dict[str, Any]: diff --git a/app/runtime/services/security/preflight_identity.py b/app/runtime/services/security/preflight_identity.py index 4cb8c5a..f9e08ad 100644 --- a/app/runtime/services/security/preflight_identity.py +++ b/app/runtime/services/security/preflight_identity.py @@ -129,30 +129,16 @@ def check_identity_valid( if info["strategy"] == "sp": app_id = info["app_id"] cmd = f"az ad sp show --id {app_id}" - sp = az.json("ad", "sp", "show", "--id", app_id) - if isinstance(sp, dict) and sp.get("appId"): - display = sp.get("displayName", "?") - _add( - result, id="identity_valid", category="identity", - name="Service Principal Exists in Azure AD", - status="pass", - detail=f"{display} ({app_id})", - evidence=( - f"displayName={display}\n" - f"appId={app_id}\n" - f"objectId={sp.get('id', '?')}" - ), - command=cmd, - ) + data = az.json("ad", "sp", "show", "--id", app_id) + label = "Service Principal" + ok = isinstance(data, dict) and data.get("appId") + if ok: + display = data.get("displayName", "?") + detail = f"{display} ({app_id})" + evidence = f"displayName={display}\nappId={app_id}\nobjectId={data.get('id', '?')}" else: - _add( - result, id="identity_valid", category="identity", - name="Service Principal Exists in Azure AD", - status="fail", - detail=f"SP not found: {app_id}", - evidence=az.last_stderr or "No response", - command=cmd, - ) + detail = f"SP not found: {app_id}" + evidence = az.last_stderr or "No response" else: resource_id = info.get("resource_id", "") if not resource_id: @@ -164,29 +150,26 @@ def check_identity_valid( ) return cmd = f"az identity show --ids {resource_id}" - mi = az.json("identity", "show", "--ids", resource_id) - if isinstance(mi, dict) and mi.get("clientId"): - _add( - result, id="identity_valid", category="identity", - name="Managed Identity Exists", - status="pass", - detail=f"{mi.get('name', '?')} (client={mi.get('clientId', '?')})", - evidence=( - f"name={mi.get('name', '?')}\n" - f"clientId={mi.get('clientId', '?')}\n" - f"principalId={mi.get('principalId', '?')}" - ), - command=cmd, + data = az.json("identity", "show", "--ids", resource_id) + label = "Managed Identity" + ok = isinstance(data, dict) and data.get("clientId") + if ok: + detail = f"{data.get('name', '?')} (client={data.get('clientId', '?')})" + evidence = ( + f"name={data.get('name', '?')}\n" + f"clientId={data.get('clientId', '?')}\n" + f"principalId={data.get('principalId', '?')}" ) else: - _add( - result, id="identity_valid", category="identity", - name="Managed Identity Exists", - status="fail", - detail=f"MI not found: {resource_id}", - evidence=az.last_stderr or "No response", - command=cmd, - ) + detail = f"MI not found: {resource_id}" + evidence = az.last_stderr or "No response" + + _add( + result, id="identity_valid", category="identity", + name=f"{label} Exists in Azure AD", + status="pass" if ok else "fail", + detail=detail, evidence=evidence, command=cmd, + ) def check_credential_expiry( diff --git a/app/runtime/services/security/preflight_rbac.py b/app/runtime/services/security/preflight_rbac.py index e1f610a..97c1d27 100644 --- a/app/runtime/services/security/preflight_rbac.py +++ b/app/runtime/services/security/preflight_rbac.py @@ -13,6 +13,16 @@ add_check as _add, ) +_FILTERED = "Filtered from role assignment list" + + +def _rbac( + result: PreflightResult, *, id: str, name: str, + status: str, detail: str, evidence: str = "", command: str = _FILTERED, +) -> None: + _add(result, id=id, category="rbac", name=name, status=status, + detail=detail, evidence=evidence, command=command) + def check_rbac_list( az: AzureCLI, result: PreflightResult, info: IdentityInfo, @@ -26,13 +36,11 @@ def check_rbac_list( "role", "assignment", "list", "--assignee", assignee, "--all", ) if not isinstance(assignments, list): - _add( - result, id="rbac_assignments_list", category="rbac", - name="RBAC Assignments Retrieved", - status="fail", + _rbac( + result, id="rbac_assignments_list", + name="RBAC Assignments Retrieved", status="fail", detail="Could not list RBAC assignments", - evidence=az.last_stderr or "No response", - command=cmd, + evidence=az.last_stderr or "No response", command=cmd, ) return None @@ -41,16 +49,14 @@ def check_rbac_list( f"{a.get('scope', '?').rsplit('/', 1)[-1]}" for a in assignments ) - _add( - result, id="rbac_assignments_list", category="rbac", - name="RBAC Assignments Retrieved", - status="pass", + _rbac( + result, id="rbac_assignments_list", + name="RBAC Assignments Retrieved", status="pass", detail=f"{len(assignments)} assignment(s): {summary}", evidence="\n".join( f"- {a.get('roleDefinitionName', '?')} on {a.get('scope', '?')}" for a in assignments - ), - command=cmd, + ), command=cmd, ) return assignments @@ -72,26 +78,20 @@ def check_rbac_has_role( ] if matching: scopes = [a.get("scope", "") for a in matching] - _add( - result, id=check_id, category="rbac", - name=check_name, - status="pass", + _rbac( + result, id=check_id, name=check_name, status="pass", detail=f"{role_name} assigned ({len(matching)} assignment(s))", evidence="\n".join(f"scope={s}" for s in scopes), - command="Filtered from role assignment list", ) else: detail = missing_detail or f"{role_name} NOT found in assignments" - _add( - result, id=check_id, category="rbac", - name=check_name, - status=missing_severity, + _rbac( + result, id=check_id, name=check_name, status=missing_severity, detail=detail, evidence=( f"Expected '{role_name}' but not present " f"in {len(assignments)} assignment(s)" ), - command="Filtered from role assignment list", ) @@ -106,13 +106,10 @@ def check_rbac_kv_access( ] if not kv_roles: - _add( - result, id="rbac_kv_access", category="rbac", - name="Key Vault Access Role", - status="warn", - detail="No Key Vault role assignment found", + _rbac( + result, id="rbac_kv_access", name="Key Vault Access Role", + status="warn", detail="No Key Vault role assignment found", evidence=f"Checked {len(assignments)} assignments for 'Key Vault' roles", - command="Filtered from role assignment list", ) return @@ -137,16 +134,13 @@ def check_rbac_kv_access( status = "pass" detail = f"Key Vault role: {', '.join(role_names)}" - _add( - result, id="rbac_kv_access", category="rbac", - name="Key Vault Access Role", - status=status, - detail=detail, + _rbac( + result, id="rbac_kv_access", name="Key Vault Access Role", + status=status, detail=detail, evidence="\n".join( f"- {a.get('roleDefinitionName', '?')} on {a.get('scope', '?')}" for a in kv_roles ), - command="Filtered from role assignment list", ) @@ -169,36 +163,27 @@ def check_rbac_session_pool( ] if matching: names = [a.get("roleDefinitionName", "?") for a in matching] - _add( - result, id="rbac_session_pool", category="rbac", - name="Session Pool Executor", - status="pass", - detail=f"Session role: {', '.join(names)}", - evidence="\n".join( - f"scope={a.get('scope', '?')}" for a in matching - ), - command="Filtered from role assignment list", + _rbac( + result, id="rbac_session_pool", name="Session Pool Executor", + status="pass", detail=f"Session role: {', '.join(names)}", + evidence="\n".join(f"scope={a.get('scope', '?')}" for a in matching), ) elif sandbox_enabled or sandbox_configured: - _add( - result, id="rbac_session_pool", category="rbac", - name="Session Pool Executor", + _rbac( + result, id="rbac_session_pool", name="Session Pool Executor", status="fail", detail=( "Azure ContainerApps Session Executor NOT found -- " "required for sandbox (HTTP 403 on file upload/execute)" ), evidence=f"Not present in {len(assignments)} assignment(s)", - command="Filtered from role assignment list", ) else: - _add( - result, id="rbac_session_pool", category="rbac", - name="Session Pool Executor", + _rbac( + result, id="rbac_session_pool", name="Session Pool Executor", status="warn", detail="ContainerApps Session Executor NOT found (needed if sandbox is enabled)", evidence=f"Not present in {len(assignments)} assignment(s)", - command="Filtered from role assignment list", ) @@ -210,21 +195,18 @@ def check_rbac_no_elevated( if a.get("roleDefinitionName") in _ELEVATED_ROLES ] if not elevated: - _add( - result, id="rbac_no_elevated", category="rbac", - name="No Elevated Roles", + _rbac( + result, id="rbac_no_elevated", name="No Elevated Roles", status="pass", detail="No Owner, Contributor, or User Access Administrator roles", evidence=( f"Checked {len(assignments)} assignment(s) against: " f"{', '.join(sorted(_ELEVATED_ROLES))}" ), - command="Filtered from role assignment list", ) else: - _add( - result, id="rbac_no_elevated", category="rbac", - name="No Elevated Roles", + _rbac( + result, id="rbac_no_elevated", name="No Elevated Roles", status="fail", detail=( f"ELEVATED roles found: " @@ -234,7 +216,6 @@ def check_rbac_no_elevated( f"- {a.get('roleDefinitionName', '?')} on {a.get('scope', '?')}" for a in elevated ), - command="Filtered from role assignment list", ) @@ -246,10 +227,9 @@ def check_rbac_scope_contained( if "/resourcegroups/" not in (a.get("scope") or "").lower() ] if not out_of_scope: - _add( - result, id="rbac_scope_contained", category="rbac", - name="Scope Limited to Resource Group", - status="pass", + _rbac( + result, id="rbac_scope_contained", + name="Scope Limited to Resource Group", status="pass", detail=( f"All {len(assignments)} assignment(s) scoped to " f"resource group level or below" @@ -260,10 +240,9 @@ def check_rbac_scope_contained( command="Scope analysis from role assignment list", ) else: - _add( - result, id="rbac_scope_contained", category="rbac", - name="Scope Limited to Resource Group", - status="fail", + _rbac( + result, id="rbac_scope_contained", + name="Scope Limited to Resource Group", status="fail", detail=( f"{len(out_of_scope)} assignment(s) at subscription or management " f"group level" diff --git a/app/runtime/services/security/preflight_secrets.py b/app/runtime/services/security/preflight_secrets.py index 819f508..3d71697 100644 --- a/app/runtime/services/security/preflight_secrets.py +++ b/app/runtime/services/security/preflight_secrets.py @@ -9,6 +9,14 @@ from .security_preflight import PreflightCheck, PreflightResult, add_check as _add +def _sec( + result: PreflightResult, *, id: str, name: str, + status: str, detail: str, evidence: str = "", command: str = "", +) -> None: + _add(result, id=id, category="secrets", name=name, status=status, + detail=detail, evidence=evidence, command=command) + + def run_secret_checks(result: PreflightResult) -> None: """Execute all secret-isolation checks.""" check_admin_cli_isolated(result) @@ -24,53 +32,37 @@ def check_admin_cli_isolated(result: PreflightResult) -> None: admin_home = os.environ.get("POLYCLAW_ADMIN_HOME", "/admin-home") azure_dir = Path(admin_home) / ".azure" mode = cfg.server_mode.value + exists = azure_dir.exists() + env_info = ( + f"HOME={os.environ.get('HOME', '?')}\n" + f"AZURE_CONFIG_DIR={os.environ.get('AZURE_CONFIG_DIR', '?')}\n" + f"{azure_dir} exists={exists}" + ) if mode == "admin": - exists = azure_dir.exists() - _add( - result, id="secret_admin_cli_isolated", category="secrets", - name="Admin CLI Session Isolated", - status="pass" if exists else "warn", - detail=( - f"Azure CLI config at {azure_dir}: " - f"{'present' if exists else 'not found'}" - ), - evidence=( - f"HOME={os.environ.get('HOME', '?')}\n" - f"AZURE_CONFIG_DIR={os.environ.get('AZURE_CONFIG_DIR', '?')}\n" - f"exists={exists}" - ), - command=f"os.path.exists({azure_dir})", - ) + status = "pass" if exists else "warn" + detail = f"Azure CLI config at {azure_dir}: {'present' if exists else 'not found'}" elif mode == "runtime": - exists = azure_dir.exists() - _add( - result, id="secret_admin_cli_isolated", category="secrets", - name="Admin CLI Session Isolated", - status="pass" if not exists else "fail", - detail=( - "Admin CLI config not accessible from runtime" - if not exists - else f"RISK: Admin CLI config accessible at {azure_dir}" - ), - evidence=( - f"HOME={os.environ.get('HOME', '?')}\n" - f"{azure_dir} exists={exists}" - ), - command=f"os.path.exists({azure_dir})", + status = "pass" if not exists else "fail" + detail = ( + "Admin CLI config not accessible from runtime" + if not exists + else f"RISK: Admin CLI config accessible at {azure_dir}" ) else: - _add( - result, id="secret_admin_cli_isolated", category="secrets", - name="Admin CLI Session Isolated", - status="warn", - detail=( - "Combined mode -- admin and runtime share the same " - "container (no credential isolation)" - ), - evidence=f"POLYCLAW_SERVER_MODE={mode}", - command="cfg.server_mode", + status = "warn" + detail = ( + "Combined mode -- admin and runtime share the same " + "container (no credential isolation)" ) + env_info = f"POLYCLAW_SERVER_MODE={mode}" + + _sec( + result, id="secret_admin_cli_isolated", + name="Admin CLI Session Isolated", + status=status, detail=detail, evidence=env_info, + command=f"os.path.exists({azure_dir})" if mode != "combined" else "cfg.server_mode", + ) def check_bot_credentials(result: PreflightResult) -> None: @@ -79,8 +71,8 @@ def check_bot_credentials(result: PreflightResult) -> None: app_pw = env_data.get("BOT_APP_PASSWORD", "") both = bool(app_id and app_pw) - _add( - result, id="secret_bot_creds", category="secrets", + _sec( + result, id="secret_bot_creds", name="Bot Credentials Present", status="pass" if both else ("warn" if app_id else "skip"), detail=( @@ -97,8 +89,8 @@ def check_bot_credentials(result: PreflightResult) -> None: def check_admin_secret(result: PreflightResult) -> None: secret = cfg.admin_secret - _add( - result, id="secret_admin_secret", category="secrets", + _sec( + result, id="secret_admin_secret", name="Admin Secret Configured", status="pass" if secret else "fail", detail=( @@ -115,10 +107,9 @@ def check_kv_reachable(result: PreflightResult) -> None: from ..keyvault import kv as _kv if not _kv.enabled: - _add( - result, id="secret_kv_reachable", category="secrets", - name="Key Vault Reachable", - status="skip", + _sec( + result, id="secret_kv_reachable", + name="Key Vault Reachable", status="skip", detail="Key Vault not configured", evidence=f"KEY_VAULT_URL={cfg.env.read('KEY_VAULT_URL') or '(empty)'}", command="keyvault.enabled", @@ -127,19 +118,17 @@ def check_kv_reachable(result: PreflightResult) -> None: try: secrets_list = _kv.list_secrets() - _add( - result, id="secret_kv_reachable", category="secrets", - name="Key Vault Reachable", - status="pass", + _sec( + result, id="secret_kv_reachable", + name="Key Vault Reachable", status="pass", detail=f"Key Vault accessible, {len(secrets_list)} secret(s) readable", evidence=f"url={_kv.url}\nsecrets_count={len(secrets_list)}", command="keyvault.list_secrets()", ) except Exception as exc: - _add( - result, id="secret_kv_reachable", category="secrets", - name="Key Vault Reachable", - status="fail", + _sec( + result, id="secret_kv_reachable", + name="Key Vault Reachable", status="fail", detail=f"Key Vault NOT reachable: {exc}", evidence=f"url={_kv.url}\nerror={exc}", command="keyvault.list_secrets()", @@ -154,8 +143,8 @@ def check_acs_credential(result: PreflightResult) -> None: for k, _, v in (seg.partition("=") for seg in conn.split(";") if "=" in seg) } has_ep = bool(parts.get("endpoint")) - _add( - result, id="secret_acs_present", category="secrets", + _sec( + result, id="secret_acs_present", name="ACS Connection String", status="pass" if has_ep else "warn", detail=( @@ -166,10 +155,9 @@ def check_acs_credential(result: PreflightResult) -> None: command="env: ACS_CONNECTION_STRING", ) else: - _add( - result, id="secret_acs_present", category="secrets", - name="ACS Connection String", - status="skip", + _sec( + result, id="secret_acs_present", + name="ACS Connection String", status="skip", detail="ACS not configured", evidence="ACS_CONNECTION_STRING=(empty)", command="env: ACS_CONNECTION_STRING", @@ -181,10 +169,9 @@ def check_aoai_credential(result: PreflightResult) -> None: key = cfg.azure_openai_api_key if endpoint: - _add( - result, id="secret_aoai_present", category="secrets", - name="Azure OpenAI Configuration", - status="pass", + _sec( + result, id="secret_aoai_present", + name="Azure OpenAI Configuration", status="pass", detail=f"Endpoint configured, {'API key' if key else 'identity auth'} mode", evidence=( f"AZURE_OPENAI_ENDPOINT={endpoint}\n" @@ -193,10 +180,9 @@ def check_aoai_credential(result: PreflightResult) -> None: command="env: AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_KEY", ) else: - _add( - result, id="secret_aoai_present", category="secrets", - name="Azure OpenAI Configuration", - status="skip", + _sec( + result, id="secret_aoai_present", + name="Azure OpenAI Configuration", status="skip", detail="Azure OpenAI not configured", evidence="AZURE_OPENAI_ENDPOINT=(empty)", command="env: AZURE_OPENAI_ENDPOINT", @@ -212,10 +198,9 @@ def check_sp_creds_written(result: PreflightResult) -> None: if not sp_id: mi_id = env_data.get("ACA_MI_CLIENT_ID", "") if mi_id: - _add( - result, id="secret_identity_creds", category="secrets", - name="Runtime Identity Credentials in .env", - status="pass", + _sec( + result, id="secret_identity_creds", + name="Runtime Identity Credentials in .env", status="pass", detail="Managed identity credentials written to .env", evidence=( f"ACA_MI_CLIENT_ID={mi_id}\n" @@ -224,10 +209,9 @@ def check_sp_creds_written(result: PreflightResult) -> None: command="env: ACA_MI_CLIENT_ID, ACA_MI_RESOURCE_ID", ) else: - _add( - result, id="secret_identity_creds", category="secrets", - name="Runtime Identity Credentials in .env", - status="skip", + _sec( + result, id="secret_identity_creds", + name="Runtime Identity Credentials in .env", status="skip", detail="No runtime identity credentials in .env", evidence="RUNTIME_SP_APP_ID=(empty)\nACA_MI_CLIENT_ID=(empty)", command="env: RUNTIME_SP_APP_ID, ACA_MI_CLIENT_ID", @@ -235,8 +219,8 @@ def check_sp_creds_written(result: PreflightResult) -> None: return all_set = bool(sp_id and sp_pw and sp_tenant) - _add( - result, id="secret_identity_creds", category="secrets", + _sec( + result, id="secret_identity_creds", name="SP Credentials in .env", status="pass" if all_set else "fail", detail=( diff --git a/app/runtime/services/security/prompt_shield.py b/app/runtime/services/security/prompt_shield.py index b5494e7..e5d34e7 100644 --- a/app/runtime/services/security/prompt_shield.py +++ b/app/runtime/services/security/prompt_shield.py @@ -88,11 +88,7 @@ def check(self, text: str) -> ShieldResult: logger.warning( "[prompt_shield.check] no endpoint configured -- skipping check" ) - return ShieldResult( - attack_detected=False, - mode="prompt_shields", - detail="Content Safety endpoint not configured -- check skipped", - ) + return self._result(False, "Content Safety endpoint not configured -- check skipped") logger.info( "[prompt_shield.check] scanning %d chars via Content Safety API", len(text), @@ -107,8 +103,8 @@ def _get_auth_header(self) -> dict[str, str]: logger.debug("[prompt_shield] using Entra ID bearer token") return {"Authorization": f"Bearer {token}"} - def _api_check(self, text: str) -> ShieldResult: - """Call the Prompt Shields REST API.""" + def _build_request(self, text: str) -> urllib.request.Request: + """Build a Prompt Shields API request.""" url = ( f"{self._endpoint}/contentsafety/text:shieldPrompt" f"?api-version={_API_VERSION}" @@ -116,12 +112,14 @@ def _api_check(self, text: str) -> ShieldResult: body = json.dumps({"userPrompt": text, "documents": []}).encode("utf-8") headers = {"Content-Type": "application/json"} headers.update(self._get_auth_header()) - req = urllib.request.Request( - url, - data=body, - headers=headers, - method="POST", - ) + return urllib.request.Request(url, data=body, headers=headers, method="POST") + + def _result(self, attack: bool, detail: str) -> ShieldResult: + return ShieldResult(attack_detected=attack, mode="prompt_shields", detail=detail) + + def _api_check(self, text: str) -> ShieldResult: + """Call the Prompt Shields REST API.""" + req = self._build_request(text) t0 = time.monotonic() try: with urllib.request.urlopen(req, timeout=10) as resp: @@ -135,11 +133,7 @@ def _api_check(self, text: str) -> ShieldResult: "[prompt_shield.api] result=%s elapsed=%.0fms detail=%s", "ATTACK" if detected else "CLEAN", elapsed_ms, detail, ) - return ShieldResult( - attack_detected=detected, - mode="prompt_shields", - detail=detail, - ) + return self._result(detected, detail) except urllib.error.HTTPError as exc: elapsed_ms = (time.monotonic() - t0) * 1000 body_text = exc.read().decode("utf-8", errors="replace")[:500] @@ -147,81 +141,35 @@ def _api_check(self, text: str) -> ShieldResult: "[prompt_shield.api] HTTP %s elapsed=%.0fms body=%s", exc.code, elapsed_ms, body_text, ) - # Any API error blocks the call -- no silent fallback. - return ShieldResult( - attack_detected=True, - mode="prompt_shields", - detail=f"Content Safety API error (HTTP {exc.code}) -- blocking for safety", - ) + return self._result(True, f"Content Safety API error (HTTP {exc.code}) -- blocking for safety") except Exception as exc: elapsed_ms = (time.monotonic() - t0) * 1000 logger.error( "[prompt_shield.api] request failed elapsed=%.0fms error=%s", elapsed_ms, exc, exc_info=True, ) - return ShieldResult( - attack_detected=True, - mode="prompt_shields", - detail=f"Content Safety API unreachable -- blocking for safety: {exc}", - ) + return self._result(True, f"Content Safety API unreachable -- blocking for safety: {exc}") def dry_run(self) -> ShieldResult: - """Send a harmless probe to verify API connectivity and RBAC. - - Returns a ``ShieldResult`` whose ``attack_detected`` is ``False`` - when the API accepted the call (permissions OK) and ``True`` when - auth or connectivity failed. The ``detail`` field contains a - human-readable explanation. - """ + """Send a harmless probe to verify API connectivity and RBAC.""" if not self.configured: - return ShieldResult( - attack_detected=True, - mode="prompt_shields", - detail="No endpoint configured", - ) - - url = ( - f"{self._endpoint}/contentsafety/text:shieldPrompt" - f"?api-version={_API_VERSION}" - ) - body = json.dumps( - {"userPrompt": "Hello, this is a connectivity test.", "documents": []}, - ).encode("utf-8") - headers = {"Content-Type": "application/json"} + return self._result(True, "No endpoint configured") try: - headers.update(self._get_auth_header()) + req = self._build_request("Hello, this is a connectivity test.") except Exception as exc: - return ShieldResult( - attack_detected=True, - mode="prompt_shields", - detail=f"Token acquisition failed: {exc}", - ) - - req = urllib.request.Request(url, data=body, headers=headers, method="POST") + return self._result(True, f"Token acquisition failed: {exc}") try: with urllib.request.urlopen(req, timeout=10) as resp: resp.read() logger.info("[prompt_shield.dry_run] API reachable, auth OK") - return ShieldResult( - attack_detected=False, - mode="prompt_shields", - detail="API reachable, auth OK", - ) + return self._result(False, "API reachable, auth OK") except urllib.error.HTTPError as exc: body_text = exc.read().decode("utf-8", errors="replace")[:500] logger.error("[prompt_shield.dry_run] HTTP %s: %s", exc.code, body_text) - return ShieldResult( - attack_detected=True, - mode="prompt_shields", - detail=f"HTTP {exc.code}: {body_text}", - ) + return self._result(True, f"HTTP {exc.code}: {body_text}") except Exception as exc: logger.error("[prompt_shield.dry_run] connection failed: %s", exc, exc_info=True) - return ShieldResult( - attack_detected=True, - mode="prompt_shields", - detail=f"Connection failed: {exc}", - ) + return self._result(True, f"Connection failed: {exc}") class _BearerTokenProvider: diff --git a/app/runtime/services/security/security_preflight.py b/app/runtime/services/security/security_preflight.py index be6e82b..8ed6124 100644 --- a/app/runtime/services/security/security_preflight.py +++ b/app/runtime/services/security/security_preflight.py @@ -143,12 +143,11 @@ def to_dict(result: PreflightResult) -> dict[str, Any]: @staticmethod def _tally(result: PreflightResult) -> None: + counts = {"pass": 0, "fail": 0, "warn": 0, "skip": 0} for c in result.checks: - if c.status == "pass": - result.passed += 1 - elif c.status == "fail": - result.failed += 1 - elif c.status == "warn": - result.warnings += 1 - elif c.status == "skip": - result.skipped += 1 + if c.status in counts: + counts[c.status] += 1 + result.passed = counts["pass"] + result.failed = counts["fail"] + result.warnings = counts["warn"] + result.skipped = counts["skip"] diff --git a/app/runtime/state/deploy_state.py b/app/runtime/state/deploy_state.py index 2e53003..7a9c120 100644 --- a/app/runtime/state/deploy_state.py +++ b/app/runtime/state/deploy_state.py @@ -128,19 +128,15 @@ def active_deployments(self) -> list[DeploymentRecord]: def by_kind(self, kind: str) -> list[DeploymentRecord]: return [d for d in self._deployments.values() if d.kind == kind] + def _latest_active(self, kind: str) -> DeploymentRecord | None: + active = [d for d in self._deployments.values() if d.kind == kind and d.status == "active"] + return max(active, key=lambda d: d.updated_at) if active else None + def current_local(self) -> DeploymentRecord | None: - local = [ - d for d in self._deployments.values() - if d.kind == "local" and d.status == "active" - ] - return max(local, key=lambda d: d.updated_at) if local else None + return self._latest_active("local") def current_aca(self) -> DeploymentRecord | None: - aca = [ - d for d in self._deployments.values() - if d.kind == "aca" and d.status == "active" - ] - return max(aca, key=lambda d: d.updated_at) if aca else None + return self._latest_active("aca") def register(self, record: DeploymentRecord) -> None: self._deployments[record.deploy_id] = record @@ -174,18 +170,15 @@ def to_dict(self) -> dict[str, Any]: return {"deployments": {did: asdict(rec) for did, rec in self._deployments.items()}} def summary(self) -> list[dict[str, Any]]: - result = [] - for rec in self._deployments.values(): - result.append({ - "deploy_id": rec.deploy_id, - "tag": rec.tag, - "kind": rec.kind, - "status": rec.status, - "created_at": rec.created_at, - "updated_at": rec.updated_at, - "resource_groups": rec.resource_groups, - "resource_count": len(rec.resources), - }) + result = [ + { + "deploy_id": r.deploy_id, "tag": r.tag, "kind": r.kind, + "status": r.status, "created_at": r.created_at, + "updated_at": r.updated_at, "resource_groups": r.resource_groups, + "resource_count": len(r.resources), + } + for r in self._deployments.values() + ] return sorted(result, key=lambda r: r["updated_at"], reverse=True) def _load(self) -> None: diff --git a/app/runtime/state/foundry_iq_config.py b/app/runtime/state/foundry_iq_config.py index d3df172..1336a0a 100644 --- a/app/runtime/state/foundry_iq_config.py +++ b/app/runtime/state/foundry_iq_config.py @@ -109,20 +109,6 @@ def _save_data(self) -> dict[str, Any]: # -- singleton ------------------------------------------------------------- -_store: FoundryIQConfigStore | None = None +from ..util.singletons import Singleton # noqa: E402 - -def get_foundry_iq_config() -> FoundryIQConfigStore: - global _store - if _store is None: - _store = FoundryIQConfigStore() - return _store - - -def _reset_store() -> None: - global _store - _store = None - - -from ..util.singletons import register_singleton -register_singleton(_reset_store) +get_foundry_iq_config, _reset_foundry_iq_config = Singleton.create(FoundryIQConfigStore) diff --git a/app/runtime/state/guardrails/config.py b/app/runtime/state/guardrails/config.py index 69b1bc0..5a22678 100644 --- a/app/runtime/state/guardrails/config.py +++ b/app/runtime/state/guardrails/config.py @@ -44,8 +44,6 @@ logger = logging.getLogger(__name__) -_instance: GuardrailsConfigStore | None = None - class GuardrailsConfigStore: """JSON-file-backed guardrails configuration. @@ -108,31 +106,29 @@ def set_default_channel(self, channel: str) -> None: self._config.default_channel = channel self._save() - def set_phone_number(self, number: str) -> None: - self._config.phone_number = number + def _set_and_save(self, attr: str, value: Any) -> None: + setattr(self._config, attr, value) self._save() + def set_phone_number(self, number: str) -> None: + self._set_and_save("phone_number", number) + def set_aitl_model(self, model: str) -> None: - self._config.aitl_model = model - self._save() + self._set_and_save("aitl_model", model) def set_aitl_spotlighting(self, enabled: bool) -> None: - self._config.aitl_spotlighting = enabled - self._save() - - def set_filter_mode(self, mode: str) -> None: - if mode != "prompt_shields": - raise ValueError("filter_mode must be 'prompt_shields'") - self._config.filter_mode = mode - self._save() + self._set_and_save("aitl_spotlighting", enabled) def set_content_safety_endpoint(self, endpoint: str) -> None: - self._config.content_safety_endpoint = endpoint - self._save() + self._set_and_save("content_safety_endpoint", endpoint) def set_content_safety_key(self, key: str) -> None: - self._config.content_safety_key = key - self._save() + self._set_and_save("content_safety_key", key) + + def set_filter_mode(self, mode: str) -> None: + if mode != "prompt_shields": + raise ValueError("filter_mode must be 'prompt_shields'") + self._set_and_save("filter_mode", mode) def set_context_default(self, context: str, strategy: str) -> None: if strategy not in _VALID_STRATEGIES: @@ -343,34 +339,31 @@ def resolve_channel( return self._config.default_channel def to_dict(self) -> dict[str, Any]: + c = self._config + ctx_defaults = dict(c.context_defaults) + tool_policies = {ctx: dict(p) for ctx, p in c.tool_policies.items()} + model_cols = list(c.model_columns) + model_policies = { + m: {ctx: dict(tm) for ctx, tm in cp.items()} + for m, cp in c.model_policies.items() + } return { - # Frontend-canonical fields - "enabled": self._config.hitl_enabled, - "default_strategy": self._config.default_action, - "hitl_channel": self._config.default_channel, - "context_defaults": dict(self._config.context_defaults), - "tool_policies": { - ctx: dict(policies) - for ctx, policies in self._config.tool_policies.items() - }, - "model_columns": list(self._config.model_columns), - "model_policies": { - model: { - ctx: dict(tool_map) - for ctx, tool_map in ctx_policies.items() - } - for model, ctx_policies in self._config.model_policies.items() - }, - # Backend / legacy fields - "hitl_enabled": self._config.hitl_enabled, - "default_action": self._config.default_action, - "default_channel": self._config.default_channel, - "phone_number": self._config.phone_number, - "aitl_model": self._config.aitl_model, - "aitl_spotlighting": self._config.aitl_spotlighting, - "filter_mode": self._config.filter_mode, - "content_safety_endpoint": self._config.content_safety_endpoint, - "rules": [asdict(r) for r in self._config.rules], + "enabled": c.hitl_enabled, + "default_strategy": c.default_action, + "hitl_channel": c.default_channel, + "context_defaults": ctx_defaults, + "tool_policies": tool_policies, + "model_columns": model_cols, + "model_policies": model_policies, + "hitl_enabled": c.hitl_enabled, + "default_action": c.default_action, + "default_channel": c.default_channel, + "phone_number": c.phone_number, + "aitl_model": c.aitl_model, + "aitl_spotlighting": c.aitl_spotlighting, + "filter_mode": c.filter_mode, + "content_safety_endpoint": c.content_safety_endpoint, + "rules": [asdict(r) for r in c.rules], } @staticmethod @@ -487,19 +480,6 @@ def _save(self) -> None: self._rebuild_engine() -def get_guardrails_config(path: Path | None = None) -> GuardrailsConfigStore: - """Module-level singleton accessor.""" - global _instance - if _instance is None: - _instance = GuardrailsConfigStore(path) - return _instance - - -def _reset_guardrails_config() -> None: - global _instance - _instance = None - - -from ...util.singletons import register_singleton # noqa: E402 +from ...util.singletons import Singleton # noqa: E402 -register_singleton(_reset_guardrails_config) +get_guardrails_config, _reset_guardrails_config = Singleton.create(GuardrailsConfigStore) diff --git a/app/runtime/state/guardrails/risk.py b/app/runtime/state/guardrails/risk.py index 9dd059e..addfd85 100644 --- a/app/runtime/state/guardrails/risk.py +++ b/app/runtime/state/guardrails/risk.py @@ -68,21 +68,16 @@ def _risk_of(tool_id: str) -> str: """Return the risk level for any tool/MCP/skill id.""" - if tool_id in _MCP_RISK: - return _MCP_RISK[tool_id] - if tool_id in _SKILL_RISK: - return _SKILL_RISK[tool_id] - if tool_id in _CUSTOM_TOOL_RISK: - return _CUSTOM_TOOL_RISK[tool_id] - # SDK tools + combined = {**_MCP_RISK, **_SKILL_RISK, **_CUSTOM_TOOL_RISK} + if tool_id in combined: + return combined[tool_id] if tool_id in ("view", "grep", "glob"): return "low" if tool_id in ("create", "edit"): return "medium" if tool_id in ("run", "bash"): return "high" - # Unknown MCP or skill -- default to high for safety - if tool_id.startswith("mcp:") or tool_id.startswith("skill:"): + if tool_id.startswith(("mcp:", "skill:")): return "high" return "medium" diff --git a/app/runtime/state/infra_config.py b/app/runtime/state/infra_config.py index 8c9d625..4af87f9 100644 --- a/app/runtime/state/infra_config.py +++ b/app/runtime/state/infra_config.py @@ -84,27 +84,18 @@ def voice_call_configured(self) -> bool: return bool(self.channels.voice_call.acs_connection_string) def _apply_raw(self, raw: dict[str, Any]) -> None: - bot_data = raw.get("bot", {}) - for k, v in bot_data.items(): - if hasattr(self._config.bot, k): - try: - setattr(self._config.bot, k, self._resolve_secret(v)) - except Exception: - logger.warning("Failed to resolve bot.%s -- skipping", k, exc_info=True) - tg_data = raw.get("channels", {}).get("telegram", {}) - for k, v in tg_data.items(): - if hasattr(self._config.channels.telegram, k): - try: - setattr(self._config.channels.telegram, k, self._resolve_secret(v)) - except Exception: - logger.warning("Failed to resolve telegram.%s -- skipping", k, exc_info=True) - vc_data = raw.get("channels", {}).get("voice_call", {}) - for k, v in vc_data.items(): - if hasattr(self._config.channels.voice_call, k): - try: - setattr(self._config.channels.voice_call, k, self._resolve_secret(v)) - except Exception: - logger.warning("Failed to resolve voice_call.%s -- skipping", k, exc_info=True) + sections = [ + (raw.get("bot", {}), self._config.bot, "bot"), + (raw.get("channels", {}).get("telegram", {}), self._config.channels.telegram, "telegram"), + (raw.get("channels", {}).get("voice_call", {}), self._config.channels.voice_call, "voice_call"), + ] + for data, target, label in sections: + for k, v in data.items(): + if hasattr(target, k): + try: + setattr(target, k, self._resolve_secret(v)) + except Exception: + logger.warning("Failed to resolve %s.%s -- skipping", label, k, exc_info=True) def _save_data(self) -> dict[str, Any]: return { @@ -159,21 +150,6 @@ def _mask_secrets(self, d: dict[str, Any]) -> dict[str, Any]: # -- singleton ------------------------------------------------------------- -_instance: InfraConfigStore | None = None - - -def get_infra_config() -> InfraConfigStore: - global _instance - if _instance is None: - _instance = InfraConfigStore() - return _instance - - -def _reset_infra_config() -> None: - global _instance - _instance = None - - -from ..util.singletons import register_singleton # noqa: E402 +from ..util.singletons import Singleton # noqa: E402 -register_singleton(_reset_infra_config) +get_infra_config, _reset_infra_config = Singleton.create(InfraConfigStore) diff --git a/app/runtime/state/memory.py b/app/runtime/state/memory.py index f74d05d..ead2438 100644 --- a/app/runtime/state/memory.py +++ b/app/runtime/state/memory.py @@ -371,20 +371,6 @@ def _process_proactive_reaction() -> None: # -- singleton ------------------------------------------------------------- -_memory: MemoryFormation | None = None +from ..util.singletons import Singleton # noqa: E402 - -def get_memory() -> MemoryFormation: - global _memory - if _memory is None: - _memory = MemoryFormation() - return _memory - - -def _reset_memory() -> None: - global _memory - _memory = None - - -from ..util.singletons import register_singleton -register_singleton(_reset_memory) +get_memory, _reset_memory = Singleton.create(MemoryFormation) diff --git a/app/runtime/state/monitoring_config.py b/app/runtime/state/monitoring_config.py index f898f16..c611d13 100644 --- a/app/runtime/state/monitoring_config.py +++ b/app/runtime/state/monitoring_config.py @@ -58,6 +58,11 @@ def update(self, **kwargs: Any) -> None: setattr(self._config, k, v) self._save() + _PROVISIONING_FIELDS = ( + "app_insights_name", "workspace_name", "resource_group", + "location", "connection_string", "subscription_id", + ) + def set_provisioned_metadata( self, *, @@ -69,25 +74,18 @@ def set_provisioned_metadata( subscription_id: str = "", ) -> None: """Persist provisioning metadata after a successful deploy.""" + kw = {k: v for k, v in locals().items() if k != "self"} + for k, v in kw.items(): + setattr(self._config, k, v) self._config.provisioned = True - self._config.app_insights_name = app_insights_name - self._config.workspace_name = workspace_name - self._config.resource_group = resource_group - self._config.location = location - self._config.connection_string = connection_string - self._config.subscription_id = subscription_id self._config.enabled = True self._save() def clear_provisioned_metadata(self) -> None: """Clear all provisioning metadata after decommission.""" + for f in self._PROVISIONING_FIELDS: + setattr(self._config, f, "") self._config.provisioned = False - self._config.app_insights_name = "" - self._config.workspace_name = "" - self._config.resource_group = "" - self._config.location = "" - self._config.connection_string = "" - self._config.subscription_id = "" self._config.enabled = False self._save() @@ -146,23 +144,8 @@ def to_dict_full(self) -> dict[str, Any]: # -- singleton ------------------------------------------------------------- -_instance: MonitoringConfigStore | None = None - - -def get_monitoring_config() -> MonitoringConfigStore: - global _instance - if _instance is None: - _instance = MonitoringConfigStore() - return _instance - - -def _reset_monitoring_config() -> None: - global _instance - _instance = None - - -from ..util.singletons import register_singleton # noqa: E402 +from ..util.singletons import Singleton # noqa: E402 -register_singleton(_reset_monitoring_config) +get_monitoring_config, _reset_monitoring_config = Singleton.create(MonitoringConfigStore) diff --git a/app/runtime/state/proactive.py b/app/runtime/state/proactive.py index 185dfcc..23462f1 100644 --- a/app/runtime/state/proactive.py +++ b/app/runtime/state/proactive.py @@ -244,20 +244,6 @@ def get_full_state(self) -> dict[str, Any]: # -- singleton ------------------------------------------------------------- -_store: ProactiveStore | None = None +from ..util.singletons import Singleton # noqa: E402 - -def get_proactive_store() -> ProactiveStore: - global _store - if _store is None: - _store = ProactiveStore() - return _store - - -def _reset_proactive_store() -> None: - global _store - _store = None - - -from ..util.singletons import register_singleton -register_singleton(_reset_proactive_store) +get_proactive_store, _reset_proactive_store = Singleton.create(ProactiveStore) diff --git a/app/runtime/state/profile.py b/app/runtime/state/profile.py index 355b85c..03bc38e 100644 --- a/app/runtime/state/profile.py +++ b/app/runtime/state/profile.py @@ -19,13 +19,26 @@ } -def _profile_path() -> Path: - return cfg.data_dir / "agent_profile.json" +def _load_json(path: Path, default: Any = None) -> Any: + """Load a JSON file, returning *default* on any error.""" + if default is None: + default = {} + if not path.exists(): + return default() if callable(default) else (dict(default) if isinstance(default, dict) else list(default) if isinstance(default, list) else default) + try: + return json.loads(path.read_text()) + except (json.JSONDecodeError, OSError): + return default() if callable(default) else (dict(default) if isinstance(default, dict) else list(default) if isinstance(default, list) else default) + + +def _write_json(path: Path, data: Any) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2) + "\n") def profile_path() -> Path: """Return the path to the agent profile JSON file.""" - return _profile_path() + return cfg.data_dir / "agent_profile.json" def _usage_path() -> Path: @@ -37,69 +50,40 @@ def _interactions_path() -> Path: def load_profile() -> dict[str, Any]: - path = _profile_path() - if not path.exists(): - return dict(_DEFAULT_PROFILE) - try: - data = json.loads(path.read_text()) - for key, default in _DEFAULT_PROFILE.items(): - data.setdefault(key, default) - return data - except (json.JSONDecodeError, OSError): - return dict(_DEFAULT_PROFILE) + data = _load_json(profile_path(), _DEFAULT_PROFILE) + for key, default in _DEFAULT_PROFILE.items(): + data.setdefault(key, default) + return data def save_profile(profile: dict[str, Any]) -> None: - path = _profile_path() - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(profile, indent=2) + "\n") + _write_json(profile_path(), profile) def load_skill_usage() -> dict[str, int]: - path = _usage_path() - if not path.exists(): - return {} - try: - return json.loads(path.read_text()) - except (json.JSONDecodeError, OSError): - return {} + return _load_json(_usage_path(), {}) def increment_skill_usage(skill_name: str) -> None: usage = load_skill_usage() usage[skill_name] = usage.get(skill_name, 0) + 1 - _usage_path().parent.mkdir(parents=True, exist_ok=True) - _usage_path().write_text(json.dumps(usage, indent=2) + "\n") + _write_json(_usage_path(), usage) def log_interaction(interaction_type: str, channel: str = "") -> None: path = _interactions_path() - path.parent.mkdir(parents=True, exist_ok=True) - interactions: list[dict[str, Any]] = [] - if path.exists(): - try: - interactions = json.loads(path.read_text()) - except (json.JSONDecodeError, OSError): - pass + interactions = _load_json(path, []) interactions.append({ "type": interaction_type, "channel": channel, "timestamp": time.time(), }) - # Keep only last 1000 interactions - interactions = interactions[-1000:] - path.write_text(json.dumps(interactions, indent=2) + "\n") + _write_json(path, interactions[-1000:]) def load_interactions() -> list[dict[str, Any]]: """Load the raw interaction log.""" - path = _interactions_path() - if not path.exists(): - return [] - try: - return json.loads(path.read_text()) - except (json.JSONDecodeError, OSError): - return [] + return _load_json(_interactions_path(), []) def get_contributions(days: int = 365) -> list[dict[str, Any]]: @@ -117,17 +101,8 @@ def get_contributions(days: int = 365) -> list[dict[str, Any]]: buckets: dict[str, dict[str, int]] = defaultdict(lambda: {"user": 0, "scheduled": 0}) for entry in interactions: - ts = entry.get("timestamp") - if ts is None: - continue - try: - if isinstance(ts, (int, float)): - d = datetime.fromtimestamp(ts, tz=timezone.utc).date() - else: - d = datetime.fromisoformat(str(ts)).date() - except (ValueError, OSError): - continue - if d < start: + d = _parse_interaction_date(entry) + if d is None or d < start: continue key = d.isoformat() itype = entry.get("type", "user") @@ -146,6 +121,21 @@ def get_contributions(days: int = 365) -> list[dict[str, Any]]: return result +def _parse_interaction_date(entry: dict[str, Any]) -> Any: + """Parse an interaction's timestamp to a date, or return None.""" + from datetime import datetime, timezone + + ts = entry.get("timestamp") + if ts is None: + return None + try: + if isinstance(ts, (int, float)): + return datetime.fromtimestamp(ts, tz=timezone.utc).date() + return datetime.fromisoformat(str(ts)).date() + except (ValueError, OSError): + return None + + def get_activity_stats() -> dict[str, Any]: """Compute summary activity statistics from interactions.""" from datetime import datetime, timedelta, timezone @@ -155,24 +145,14 @@ def get_activity_stats() -> dict[str, Any]: today = now.date() week_start = today - timedelta(days=today.weekday()) - total = len(interactions) today_count = 0 week_count = 0 month_count = 0 - streak = 0 - - # Build a set of active days for streak calculation active_days: set[str] = set() + for entry in interactions: - ts = entry.get("timestamp") - if ts is None: - continue - try: - if isinstance(ts, (int, float)): - d = datetime.fromtimestamp(ts, tz=timezone.utc).date() - else: - d = datetime.fromisoformat(str(ts)).date() - except (ValueError, OSError): + d = _parse_interaction_date(entry) + if d is None: continue active_days.add(d.isoformat()) if d == today: @@ -183,6 +163,7 @@ def get_activity_stats() -> dict[str, Any]: month_count += 1 # Calculate current streak (consecutive days ending today or yesterday) + streak = 0 check = today if check.isoformat() not in active_days: check = today - timedelta(days=1) @@ -191,7 +172,7 @@ def get_activity_stats() -> dict[str, Any]: check -= timedelta(days=1) return { - "total": total, + "total": len(interactions), "today": today_count, "this_week": week_count, "this_month": month_count, diff --git a/app/runtime/state/sandbox_config.py b/app/runtime/state/sandbox_config.py index 6d46b3a..b3cab01 100644 --- a/app/runtime/state/sandbox_config.py +++ b/app/runtime/state/sandbox_config.py @@ -144,23 +144,8 @@ def update(self, **kwargs: Any) -> None: # -- singleton ------------------------------------------------------------- -_instance: SandboxConfigStore | None = None +from ..util.singletons import Singleton # noqa: E402 - -def get_sandbox_config() -> SandboxConfigStore: - global _instance - if _instance is None: - _instance = SandboxConfigStore() - return _instance - - -def _reset_sandbox_config() -> None: - global _instance - _instance = None - - -from ..util.singletons import register_singleton # noqa: E402 - -register_singleton(_reset_sandbox_config) +get_sandbox_config, _reset_sandbox_config = Singleton.create(SandboxConfigStore) diff --git a/app/runtime/state/tool_activity_csv.py b/app/runtime/state/tool_activity_csv.py new file mode 100644 index 0000000..98d3b8e --- /dev/null +++ b/app/runtime/state/tool_activity_csv.py @@ -0,0 +1,102 @@ +"""CSV export / session import helpers for tool activity.""" + +from __future__ import annotations + +import csv +import io +import time +from typing import TYPE_CHECKING, Any + +from .tool_activity_models import ToolActivityEntry, check_suspicious + +if TYPE_CHECKING: + from .tool_activity_store import ToolActivityStore + + +_CSV_COLUMNS = [ + "id", "timestamp", "session_id", "tool", "category", + "model", "status", "interaction_type", "duration_ms", "risk_score", "flagged", + "flag_reason", "shield_result", "shield_detail", "shield_elapsed_ms", + "arguments", "result", +] + + +def export_csv(store: ToolActivityStore, **filters: Any) -> str: + """Export filtered entries as CSV string.""" + data = store.query(**filters, limit=10000) + output = io.StringIO() + writer = csv.writer(output) + writer.writerow(_CSV_COLUMNS) + for e in data["entries"]: + writer.writerow([ + e["id"], + time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(e["timestamp"])), + e["session_id"], + e["tool"], + e["category"], + e.get("model", ""), + e["status"], + e.get("interaction_type", ""), + e.get("duration_ms") or "", + e.get("risk_score", 0), + "Yes" if e.get("flagged") else "No", + e.get("flag_reason", ""), + e.get("shield_result", ""), + e.get("shield_detail", ""), + e.get("shield_elapsed_ms") or "", + (e.get("arguments") or "")[:500], + (e.get("result") or "")[:500], + ]) + return output.getvalue() + + +def import_from_sessions(store: ToolActivityStore, session_store: object) -> int: + """Backfill tool activity from existing session data.""" + import logging + + from .session_store import SessionStore + + logger = logging.getLogger(__name__) + + if not isinstance(session_store, SessionStore): + return 0 + + existing_ids: set[str] = set() + with store._lock: # noqa: SLF001 + existing_ids = {f"{e.session_id}:{e.call_id}" for e in store._entries} # noqa: SLF001 + + count = 0 + for session_summary in session_store.list_sessions(): + sid = session_summary["id"] + session_data = session_store.get_session(sid) + if not session_data: + continue + for msg in session_data.get("messages", []): + for tc in msg.get("tool_calls", []): + key = f"{sid}:{tc.get('name', '')}:{msg.get('timestamp', 0)}" + if key in existing_ids: + continue + entry = ToolActivityEntry( + id=store._next_id(), # noqa: SLF001 + session_id=sid, + tool=tc.get("name", "unknown"), + call_id="", + category=store._infer_category(tc.get("name", "")), # noqa: SLF001 + arguments=tc.get("arguments", ""), + result=tc.get("result", "")[:2000], + status="completed", + timestamp=msg.get("timestamp", 0), + ) + flagged, reason, risk, factors = check_suspicious( + entry.arguments, entry.result, + ) + entry.flagged = flagged + entry.flag_reason = reason + entry.risk_score = risk + entry.risk_factors = factors + store._append(entry) # noqa: SLF001 + existing_ids.add(key) + count += 1 + + logger.info("[tool_activity] imported %d entries from sessions", count) + return count diff --git a/app/runtime/state/tool_activity_store.py b/app/runtime/state/tool_activity_store.py index 93f309d..b71e491 100644 --- a/app/runtime/state/tool_activity_store.py +++ b/app/runtime/state/tool_activity_store.py @@ -2,8 +2,6 @@ from __future__ import annotations -import csv -import io import json import logging import threading @@ -13,7 +11,7 @@ from typing import Any from ..config.settings import cfg -from ..util.singletons import register_singleton +from ..util.singletons import Singleton from .tool_activity_models import ToolActivityEntry, check_suspicious logger = logging.getLogger(__name__) @@ -127,48 +125,27 @@ def record_complete( ) -> ToolActivityEntry | None: """Record the completion of a tool invocation.""" pending = self._pending_starts.pop(call_id, None) - if pending: - pending.result = result[:2000] if result else "" - pending.status = status - pending.duration_ms = (time.time() - pending.timestamp) * 1000 - flagged, reason, risk, factors = check_suspicious(pending.arguments, result) - if flagged and not pending.flagged: - pending.flagged = True - pending.flag_reason = reason - if risk > pending.risk_score: - pending.risk_score = risk - pending.risk_factors = list(set(pending.risk_factors + factors)) - # Update the in-memory entry (already appended) - # Append a completion record so the file has the full story - completion = ToolActivityEntry( - id=pending.id, - session_id=pending.session_id, - tool=pending.tool, - call_id=call_id, - category=pending.category, - arguments=pending.arguments, - result=pending.result, - status=status, - timestamp=pending.timestamp, - duration_ms=pending.duration_ms, - flagged=pending.flagged, - flag_reason=pending.flag_reason, - model=pending.model, - interaction_type=pending.interaction_type, - shield_result=pending.shield_result, - shield_detail=pending.shield_detail, - shield_elapsed_ms=pending.shield_elapsed_ms, - ) - # Replace the in-memory start entry with completed version - with self._lock: - for i, e in enumerate(self._entries): - if e.id == pending.id: - self._entries[i] = completion - break - with open(self._path, "a") as f: - f.write(json.dumps(asdict(completion), default=str) + "\n") - return completion - return None + if not pending: + return None + pending.result = result[:2000] if result else "" + pending.status = status + pending.duration_ms = (time.time() - pending.timestamp) * 1000 + flagged, reason, risk, factors = check_suspicious(pending.arguments, result) + if flagged and not pending.flagged: + pending.flagged = True + pending.flag_reason = reason + if risk > pending.risk_score: + pending.risk_score = risk + pending.risk_factors = list(set(pending.risk_factors + factors)) + # Replace the in-memory start entry with completed version + with self._lock: + for i, e in enumerate(self._entries): + if e.id == pending.id: + self._entries[i] = pending + break + with open(self._path, "a") as f: + f.write(json.dumps(asdict(pending), default=str) + "\n") + return pending def query( self, @@ -220,63 +197,54 @@ def query( def get_summary(self) -> dict[str, Any]: """Get aggregate statistics about tool activity.""" + from collections import Counter + with self._lock: entries = self._deduplicated() total = len(entries) flagged = sum(1 for e in entries if e.flagged) - by_tool: dict[str, int] = {} - by_category: dict[str, int] = {} - by_status: dict[str, int] = {} - by_session: dict[str, int] = {} - by_model: dict[str, int] = {} - by_interaction_type: dict[str, int] = {} + by_tool: Counter[str] = Counter() + by_category: Counter[str] = Counter() + by_status: Counter[str] = Counter() + by_session: Counter[str] = Counter() + by_model: Counter[str] = Counter() + by_interaction_type: Counter[str] = Counter() durations: list[float] = [] risk_scores: list[int] = [] for e in entries: - by_tool[e.tool] = by_tool.get(e.tool, 0) + 1 - by_category[e.category] = by_category.get(e.category, 0) + 1 - by_status[e.status] = by_status.get(e.status, 0) + 1 - by_session[e.session_id] = by_session.get(e.session_id, 0) + 1 + by_tool[e.tool] += 1 + by_category[e.category] += 1 + by_status[e.status] += 1 + by_session[e.session_id] += 1 if e.model: - by_model[e.model] = by_model.get(e.model, 0) + 1 + by_model[e.model] += 1 if e.interaction_type: - by_interaction_type[e.interaction_type] = ( - by_interaction_type.get(e.interaction_type, 0) + 1 - ) + by_interaction_type[e.interaction_type] += 1 if e.duration_ms is not None: durations.append(e.duration_ms) if e.risk_score > 0: risk_scores.append(e.risk_score) - # Top tools sorted by count - top_tools = sorted(by_tool.items(), key=lambda x: x[1], reverse=True)[:20] - - # Duration stats avg_duration = sum(durations) / len(durations) if durations else 0 max_duration = max(durations) if durations else 0 p95_duration = sorted(durations)[int(len(durations) * 0.95)] if durations else 0 - # Risk distribution - high_risk = sum(1 for s in risk_scores if s >= 70) - medium_risk = sum(1 for s in risk_scores if 40 <= s < 70) - low_risk = sum(1 for s in risk_scores if 0 < s < 40) - return { "total": total, "flagged": flagged, - "by_tool": dict(top_tools), - "by_category": by_category, - "by_status": by_status, - "by_model": by_model, - "by_interaction_type": by_interaction_type, + "by_tool": dict(by_tool.most_common(20)), + "by_category": dict(by_category), + "by_status": dict(by_status), + "by_model": dict(by_model), + "by_interaction_type": dict(by_interaction_type), "sessions_with_activity": len(by_session), "avg_duration_ms": round(avg_duration, 1), "max_duration_ms": round(max_duration, 1), "p95_duration_ms": round(p95_duration, 1), - "risk_high": high_risk, - "risk_medium": medium_risk, - "risk_low": low_risk, + "risk_high": sum(1 for s in risk_scores if s >= 70), + "risk_medium": sum(1 for s in risk_scores if 40 <= s < 70), + "risk_low": sum(1 for s in risk_scores if 0 < s < 40), } def get_entry(self, entry_id: str) -> dict[str, Any] | None: @@ -410,36 +378,8 @@ def get_session_breakdown(self) -> list[dict[str, Any]]: def export_csv(self, **filters: Any) -> str: """Export filtered entries as CSV string.""" - data = self.query(**filters, limit=10000) - output = io.StringIO() - writer = csv.writer(output) - writer.writerow([ - "id", "timestamp", "session_id", "tool", "category", - "model", "status", "interaction_type", "duration_ms", "risk_score", "flagged", - "flag_reason", "shield_result", "shield_detail", "shield_elapsed_ms", - "arguments", "result", - ]) - for e in data["entries"]: - writer.writerow([ - e["id"], - time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(e["timestamp"])), - e["session_id"], - e["tool"], - e["category"], - e.get("model", ""), - e["status"], - e.get("interaction_type", ""), - e.get("duration_ms") or "", - e.get("risk_score", 0), - "Yes" if e.get("flagged") else "No", - e.get("flag_reason", ""), - e.get("shield_result", ""), - e.get("shield_detail", ""), - e.get("shield_elapsed_ms") or "", - (e.get("arguments") or "")[:500], - (e.get("result") or "")[:500], - ]) - return output.getvalue() + from .tool_activity_csv import export_csv + return export_csv(self, **filters) @staticmethod def _infer_category(tool: str) -> str: @@ -453,66 +393,10 @@ def _infer_category(tool: str) -> str: def import_from_sessions(self, session_store: object) -> int: """Backfill tool activity from existing session data.""" - from .session_store import SessionStore - - if not isinstance(session_store, SessionStore): - return 0 - - existing_ids: set[str] = set() - with self._lock: - existing_ids = {f"{e.session_id}:{e.call_id}" for e in self._entries} - - count = 0 - for session_summary in session_store.list_sessions(): - sid = session_summary["id"] - session_data = session_store.get_session(sid) - if not session_data: - continue - for msg in session_data.get("messages", []): - for tc in msg.get("tool_calls", []): - key = f"{sid}:{tc.get('name', '')}:{msg.get('timestamp', 0)}" - if key in existing_ids: - continue - entry = ToolActivityEntry( - id=self._next_id(), - session_id=sid, - tool=tc.get("name", "unknown"), - call_id="", - category=self._infer_category(tc.get("name", "")), - arguments=tc.get("arguments", ""), - result=tc.get("result", "")[:2000], - status="completed", - timestamp=msg.get("timestamp", 0), - ) - flagged, reason, risk, factors = check_suspicious(entry.arguments, entry.result) - entry.flagged = flagged - entry.flag_reason = reason - entry.risk_score = risk - entry.risk_factors = factors - self._append(entry) - existing_ids.add(key) - count += 1 - - logger.info("[tool_activity] imported %d entries from sessions", count) - return count + from .tool_activity_csv import import_from_sessions + return import_from_sessions(self, session_store) # -- Singleton access ------------------------------------------------------ -_instance: ToolActivityStore | None = None - - -def get_tool_activity_store() -> ToolActivityStore: - """Return the global ToolActivityStore singleton.""" - global _instance - if _instance is None: - _instance = ToolActivityStore() - return _instance - - -def _reset_tool_activity_store() -> None: - global _instance - _instance = None - - -register_singleton(_reset_tool_activity_store) +get_tool_activity_store, _reset_tool_activity_store = Singleton.create(ToolActivityStore) diff --git a/app/runtime/tests/test_bicep_deploy.py b/app/runtime/tests/test_bicep_deploy.py index 506ba63..8786b55 100644 --- a/app/runtime/tests/test_bicep_deploy.py +++ b/app/runtime/tests/test_bicep_deploy.py @@ -8,12 +8,12 @@ import pytest +from app.runtime.services.deployment import StepTracker from app.runtime.services.deployment.bicep_deployer import ( BicepDeployer, BicepDeployRequest, BicepDeployResult, _BICEP_TEMPLATE, - _ObservableSteps, ) from app.runtime.state.deploy_state import DeployStateStore @@ -190,7 +190,7 @@ def test_ensure_runtime_sp_creates_new(self) -> None: }) req = BicepDeployRequest(base_name="test", resource_group="rg") - result = deployer._ensure_runtime_sp(req, []) + result = deployer._ensure_runtime_sp(req, StepTracker()) assert result is not None assert result["app_id"] == "new-sp-id" @@ -214,7 +214,7 @@ def test_ensure_runtime_sp_reuses_existing(self) -> None: }) req = BicepDeployRequest(base_name="test", resource_group="rg") - result = deployer._ensure_runtime_sp(req, []) + result = deployer._ensure_runtime_sp(req, StepTracker()) assert result is not None assert result["app_id"] == "existing-id" @@ -392,12 +392,12 @@ def test_default_memory_model_is_gpt41(self) -> None: assert s.memory_model == "gpt-4.1" -class TestObservableSteps: - """Tests for the _ObservableSteps callback list.""" +class TestStepTracker: + """Tests for the StepTracker callback list.""" def test_callback_fires_on_append(self) -> None: received: list[dict] = [] - steps = _ObservableSteps(lambda s: received.append(s)) + steps = StepTracker(lambda s: received.append(s)) steps.append({"step": "a", "status": "ok"}) steps.append({"step": "b", "status": "failed"}) assert len(received) == 2 @@ -406,7 +406,7 @@ def test_callback_fires_on_append(self) -> None: assert list(steps) == received def test_no_callback(self) -> None: - steps = _ObservableSteps(None) + steps = StepTracker(None) steps.append({"step": "a", "status": "ok"}) assert len(steps) == 1 @@ -414,7 +414,7 @@ def test_callback_exception_does_not_abort(self) -> None: def bad_cb(_: dict) -> None: raise RuntimeError("boom") - steps = _ObservableSteps(bad_cb) + steps = StepTracker(bad_cb) steps.append({"step": "a", "status": "ok"}) assert len(steps) == 1 diff --git a/app/runtime/tests/test_e2e_aca_lifecycle.py b/app/runtime/tests/test_e2e_aca_lifecycle.py new file mode 100644 index 0000000..991d36f --- /dev/null +++ b/app/runtime/tests/test_e2e_aca_lifecycle.py @@ -0,0 +1,432 @@ +"""End-to-end lifecycle test for ACA deployment via the TUI headless mode. + +Uses the TUI's ``aca-setup``, ``aca-restart``, ``run``, ``health``, and +``aca-decommission`` CLI modes so the test exercises the same code path a +real user deploying to Azure Container Apps would follow. + +Architecture: local admin (permanent) + runtime on ACA. + + 1. ``bun run src/index.ts aca-setup`` -- build images, start admin, + Azure check, Foundry deploy, ACA deploy, chat probe. + 2. ``bun run src/index.ts aca-restart`` + ``run`` -- verify chat + survives an ACA revision restart. + 3. ``docker restart polyclaw-admin`` + ``run`` -- verify chat survives + admin container restart (ACA runtime stays up). + 4. ``bun run src/index.ts aca-decommission`` -- tear down ACA + Foundry. + +Usage:: + + pytest app/runtime/tests/test_e2e_aca_lifecycle.py --run-e2e-setup -s -v + +Requirements: + - Docker + Bun running locally + - Active ``az login`` session (TUI bind-mounts ``~/.azure``) + - Sufficient Azure quota in the target region +""" + +from __future__ import annotations + +import json +import logging +import os +import random +import subprocess +import time +from pathlib import Path + +import pytest + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_PROJECT_ROOT = Path(__file__).resolve().parents[3] +_TUI_DIR = _PROJECT_ROOT / "app" / "tui" +_ADMIN_CONTAINER = "polyclaw-admin" +_ADMIN_URL = "http://localhost:9090" +_HEALTH_URL = f"{_ADMIN_URL}/health" + +_BOOT_TIMEOUT = 120 +_HEALTH_POLL = 5 + +_RG = "polyclaw-e2e-aca-rg" +_LOCATION = "eastus" +_BASE_NAME = "ac" + os.urandom(3).hex() +_SUBSCRIPTION = os.environ.get( + "E2E_SUBSCRIPTION_ID", "546bf80c-9de8-4f7c-95db-43b72afbec60", +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run( + cmd: list[str], + *, + timeout: int = 60, + check: bool = True, + cwd: Path | None = None, +) -> subprocess.CompletedProcess[str]: + return subprocess.run( + cmd, capture_output=True, text=True, + timeout=timeout, check=check, cwd=cwd or _PROJECT_ROOT, + ) + + +def _compose(*args: str, timeout: int = 60) -> subprocess.CompletedProcess[str]: + return _run(["docker", "compose", *args], timeout=timeout) + + +def _tui( + *args: str, + timeout: int = 60, + check: bool = True, + extra_env: dict[str, str] | None = None, +) -> subprocess.CompletedProcess[str]: + """Run a TUI CLI command via ``bun run src/index.ts ``.""" + env = {**os.environ, **(extra_env or {})} + return subprocess.run( + ["bun", "run", "src/index.ts", *args], + capture_output=True, text=True, + timeout=timeout, check=check, cwd=_TUI_DIR, env=env, + ) + + +def _aca_setup_env() -> dict[str, str]: + """Environment variables for the TUI ``aca-setup`` mode.""" + return { + "POLYCLAW_SETUP_RG": _RG, + "POLYCLAW_SETUP_LOCATION": _LOCATION, + "POLYCLAW_SETUP_BASE_NAME": _BASE_NAME, + "POLYCLAW_SETUP_SUBSCRIPTION_ID": _SUBSCRIPTION, + } + + +def _tui_run_chat(timeout: int = 180) -> tuple[str, int]: + """Send a chat probe via ``bun run src/index.ts run``.""" + r = _tui( + "run", "Reply with exactly: PROBE_OK", + timeout=timeout, check=False, + ) + return (r.stdout + r.stderr).strip(), r.returncode + + +def _poll_health(timeout: float = _BOOT_TIMEOUT) -> dict | None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + r = _run(["curl", "-sf", "--max-time", "5", _HEALTH_URL], check=False, timeout=15) + if r.returncode == 0 and r.stdout.strip(): + return json.loads(r.stdout) + except Exception: + pass + time.sleep(_HEALTH_POLL) + return None + + +def _container_logs(container: str, tail: int = 200) -> str: + try: + r = _run(["docker", "logs", "--tail", str(tail), container], check=False, timeout=15) + return (r.stdout + r.stderr).strip() + except Exception as exc: + return f"" + + +def _diag(phase: str) -> str: + lines = [f"\n{'='*72}", f"DIAGNOSTICS -- {phase}", f"{'='*72}"] + lines.append(f"\n--- {_ADMIN_CONTAINER} ---") + lines.append(_container_logs(_ADMIN_CONTAINER, tail=100)) + lines.append("=" * 72) + return "\n".join(lines) + + +def _wait_for_tui_chat(deadline_seconds: int = 300) -> tuple[str | None, str]: + """Poll chat via ``bun run src/index.ts run`` until it succeeds.""" + deadline = time.monotonic() + deadline_seconds + while time.monotonic() < deadline: + output, rc = _tui_run_chat(timeout=120) + if rc == 0 and output: + return output, "ok" + logger.info("TUI chat probe: rc=%d -- retrying in 10s", rc) + time.sleep(10) + return None, "timeout" + + +def _purge_soft_deleted_resources() -> None: + try: + r = _run( + ["az", "cognitiveservices", "account", "list-deleted", "-o", "json"], + check=False, timeout=30, + ) + if r.returncode != 0: + return + deleted = json.loads(r.stdout) if r.stdout.strip() else [] + for item in deleted: + name = item.get("name", "") + loc = item.get("location", "") + res_id = item.get("id", "") + rg = "" + if "/resourceGroups/" in res_id: + rg = res_id.split("/resourceGroups/")[1].split("/")[0] + if _BASE_NAME in name or rg == _RG: + logger.info("Purging soft-deleted: %s (rg=%s)", name, rg) + _run( + ["az", "cognitiveservices", "account", "purge", + "--name", name, "--resource-group", rg, "--location", loc], + check=False, timeout=60, + ) + except Exception as exc: + logger.warning("Soft-delete purge failed: %s", exc) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="module") +def _ensure_bun(): + try: + _run(["bun", "--version"], timeout=10) + except Exception: + pytest.skip("Bun is not installed -- required for TUI headless tests") + + +@pytest.fixture(scope="module") +def _ensure_tui_deps(_ensure_bun): + if not (_TUI_DIR / "node_modules").exists(): + logger.info("Installing TUI dependencies ...") + _run(["bun", "install"], cwd=_TUI_DIR, timeout=60) + + +@pytest.fixture(scope="module") +def aca_setup(_ensure_tui_deps): + """Run ``bun run src/index.ts aca-setup`` -- full ACA provisioning. + + Builds images, starts local admin, deploys Foundry + ACA, + verifies the first chat probe works through the admin proxy. + """ + try: + _run(["docker", "info"], timeout=15) + except Exception: + pytest.skip("Docker not available") + + # Clean slate -- stop any local containers + _compose("down", "-v", "--remove-orphans", timeout=60) + + logger.info("Running TUI ACA headless setup ...") + r = _tui( + "aca-setup", + timeout=2700, # 45 min max + check=False, + extra_env=_aca_setup_env(), + ) + + for line in (r.stdout + r.stderr).splitlines(): + if line.strip(): + logger.info("[tui:aca-setup] %s", line.rstrip()) + + if r.returncode != 0: + pytest.fail( + f"TUI ACA setup failed (exit {r.returncode}).\n" + f"stdout:\n{r.stdout[-3000:]}\n" + f"stderr:\n{r.stderr[-3000:]}\n" + f"{_diag('aca-setup')}" + ) + + # Parse structured JSON result + result = {} + for line in reversed(r.stdout.strip().splitlines()): + try: + result = json.loads(line) + break + except (json.JSONDecodeError, ValueError): + continue + + assert result.get("status") == "ok", f"ACA setup did not return ok: {result}" + logger.info("ACA setup complete: %s", json.dumps(result)) + + yield result + + # Teardown -- stop admin, ACA resources cleaned in _cleanup_azure + logger.info("Stopping admin container ...") + _compose("down", "-v", "--remove-orphans", timeout=60) + + +@pytest.fixture(scope="module", autouse=True) +def _cleanup_azure(): + _purge_soft_deleted_resources() + yield + logger.info("Cleaning up Azure RG %s ...", _RG) + try: + _run(["az", "group", "delete", "--name", _RG, "--yes", "--no-wait"], + check=False, timeout=30) + except Exception as exc: + logger.warning("RG cleanup failed: %s", exc) + + +# =================================================================== +# Tests +# =================================================================== + + +@pytest.mark.e2e_setup +class TestAcaLifecycle01Setup: + """ACA headless setup: build, Foundry + ACA deploy, first inference.""" + + def test_setup_completed(self, aca_setup) -> None: + assert aca_setup["status"] == "ok" + assert aca_setup.get("target") == "aca" + logger.info("ACA setup OK in %ss", aca_setup.get("elapsed_seconds", "?")) + + def test_first_chat_via_tui(self, aca_setup) -> None: + probe = aca_setup.get("probe_response", "") + assert probe, "ACA setup did not return a chat probe response" + logger.info("First inference (from ACA setup): %s", probe[:200]) + + def test_admin_running_locally(self, aca_setup) -> None: + """Admin container must be running locally.""" + r = _run( + ["docker", "inspect", "--format", "{{.State.Running}}", _ADMIN_CONTAINER], + check=False, timeout=10, + ) + assert r.stdout.strip() == "true", ( + f"Admin container not running: {r.stdout} {r.stderr}" + ) + + def test_aca_status(self, aca_setup) -> None: + """ACA status endpoint must show deployment info.""" + r = _run( + ["curl", "-sf", "--max-time", "10", f"{_ADMIN_URL}/api/setup/aca/status"], + check=False, timeout=15, + ) + if r.returncode == 0 and r.stdout.strip(): + data = json.loads(r.stdout) + assert data.get("deployed"), f"ACA not marked as deployed: {data}" + assert data.get("runtime_fqdn"), "No runtime FQDN in ACA status" + logger.info("ACA status: fqdn=%s acr=%s", data["runtime_fqdn"], data.get("acr_name")) + + def test_sp_written_to_env(self, aca_setup) -> None: + r = _run( + ["docker", "exec", _ADMIN_CONTAINER, "cat", "/data/.env"], + check=False, timeout=15, + ) + assert r.returncode == 0, f"Could not read .env: {r.stderr}" + env = {} + for line in r.stdout.strip().splitlines(): + if "=" in line: + k, v = line.split("=", 1) + env[k] = v + assert env.get("FOUNDRY_ENDPOINT"), "FOUNDRY_ENDPOINT not in .env" + assert env.get("ACA_RUNTIME_FQDN"), "ACA_RUNTIME_FQDN not in .env" + assert env.get("RUNTIME_URL"), "RUNTIME_URL not in .env" + + +@pytest.mark.e2e_setup +class TestAcaLifecycle02AcaRestart: + """Restart the ACA runtime revision -- chat must still work.""" + + def test_aca_restart(self, aca_setup) -> None: + """Trigger an ACA revision restart via the TUI.""" + r = _tui("aca-restart", timeout=120, check=False, extra_env=_aca_setup_env()) + for line in (r.stdout + r.stderr).splitlines(): + if line.strip(): + logger.info("[tui:aca-restart] %s", line.rstrip()) + assert r.returncode == 0, ( + f"TUI aca-restart failed: {r.stdout[-500:]} {r.stderr[-500:]}" + ) + + def test_chat_after_aca_restart(self, aca_setup) -> None: + """Chat must work after ACA restart (cold start may take a minute).""" + # ACA restart creates a new revision -- give it time + time.sleep(30) + text, status = _wait_for_tui_chat(deadline_seconds=300) + if status != "ok": + pytest.fail( + f"TUI chat failed after ACA restart. status={status}\n" + f"{_diag('aca-restart-chat')}" + ) + assert text, "Chat returned empty response" + logger.info("Post ACA-restart inference OK: %s", text[:200]) + + +@pytest.mark.e2e_setup +class TestAcaLifecycle03AdminRestart: + """Restart the local admin container -- ACA runtime stays up.""" + + def test_admin_restart(self, aca_setup) -> None: + r = _run(["docker", "restart", _ADMIN_CONTAINER], check=False, timeout=60) + assert r.returncode == 0, f"docker restart admin failed: {r.stderr}" + logger.info("Admin container restarted") + + def test_admin_healthy_after_restart(self, aca_setup) -> None: + health = _poll_health(timeout=90) + if health is None: + pytest.fail( + f"Admin not healthy after restart.\n{_diag('admin-restart')}" + ) + assert health["status"] == "ok" + + def test_chat_after_admin_restart(self, aca_setup) -> None: + """Chat must work -- admin reconnects to ACA runtime.""" + text, status = _wait_for_tui_chat(deadline_seconds=300) + if status != "ok": + pytest.fail( + f"TUI chat failed after admin restart. status={status}\n" + f"{_diag('admin-restart-chat')}" + ) + assert text, "Chat returned empty response" + logger.info("Post admin-restart inference OK: %s", text[:200]) + + +@pytest.mark.e2e_setup +class TestAcaLifecycle04RandomRestarts: + """Random restarts of admin container with pauses.""" + + def test_random_admin_restarts(self, aca_setup) -> None: + rounds = random.randint(2, 3) + logger.info("Running %d random admin restarts ...", rounds) + for i in range(rounds): + pause = random.uniform(3, 10) + logger.info("Admin restart %d/%d -- pausing %.1fs", i + 1, rounds, pause) + time.sleep(pause) + r = _run(["docker", "restart", _ADMIN_CONTAINER], check=False, timeout=60) + assert r.returncode == 0, f"docker restart #{i + 1} failed: {r.stderr}" + logger.info("All %d admin restarts issued", rounds) + + def test_admin_healthy_after_random_restarts(self, aca_setup) -> None: + health = _poll_health(timeout=120) + if health is None: + pytest.fail( + f"Admin not healthy after random restarts.\n{_diag('random-restart')}" + ) + assert health["status"] == "ok" + + def test_chat_after_random_restarts(self, aca_setup) -> None: + text, status = _wait_for_tui_chat(deadline_seconds=300) + if status != "ok": + pytest.fail( + f"TUI chat failed after random restarts. status={status}\n" + f"{_diag('random-restart-chat')}" + ) + assert text, "Chat returned empty response" + logger.info("Post random-restart inference OK: %s", text[:200]) + + +@pytest.mark.e2e_setup +class TestAcaLifecycle05Decommission: + """Tear down ACA + Foundry resources via the TUI.""" + + def test_decommission_via_tui(self, aca_setup) -> None: + r = _tui( + "aca-decommission", + timeout=600, + check=False, + extra_env={"POLYCLAW_SETUP_RG": _RG}, + ) + for line in (r.stdout + r.stderr).splitlines(): + if line.strip(): + logger.info("[tui:aca-decommission] %s", line.rstrip()) + # Best-effort -- don't assert exit code diff --git a/app/runtime/tests/test_e2e_lifecycle.py b/app/runtime/tests/test_e2e_lifecycle.py new file mode 100644 index 0000000..41a62bf --- /dev/null +++ b/app/runtime/tests/test_e2e_lifecycle.py @@ -0,0 +1,471 @@ +"""End-to-end lifecycle test via the TUI headless mode. + +Uses the TUI's ``setup``, ``run``, ``health``, and ``stop`` CLI modes +so the test exercises the same code path a real user would follow. + + 1. ``bun run src/index.ts setup`` -- build, start, Azure check, Foundry + deploy, wait for BYOK, chat probe. + 2. ``docker restart polyclaw-runtime`` + ``bun run src/index.ts run`` -- + verify chat survives a container restart. + 3. ``docker compose stop`` / ``up -d`` + ``bun run src/index.ts run`` -- + verify chat survives a full stop/start cycle. + 4. ``bun run src/index.ts decommission`` -- tear down Azure resources. + +Usage:: + + pytest app/runtime/tests/test_e2e_lifecycle.py --run-e2e-setup -s -v + +Requirements: + - Docker + Bun running locally + - Active ``az login`` session (TUI bind-mounts ``~/.azure``) + - Sufficient Azure quota in the target region +""" + +from __future__ import annotations + +import json +import logging +import os +import random +import subprocess +import time +from pathlib import Path + +import pytest + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +_PROJECT_ROOT = Path(__file__).resolve().parents[3] +_TUI_DIR = _PROJECT_ROOT / "app" / "tui" +_ADMIN_CONTAINER = "polyclaw-admin" +_RUNTIME_CONTAINER = "polyclaw-runtime" +_ADMIN_URL = "http://localhost:9090" +_HEALTH_URL = f"{_ADMIN_URL}/health" + +_BOOT_TIMEOUT = 120 +_HEALTH_POLL = 3 + +_RG = "polyclaw-e2e-lifecycle-rg" +_LOCATION = "eastus" +_BASE_NAME = "lc" + os.urandom(3).hex() +_SUBSCRIPTION = os.environ.get( + "E2E_SUBSCRIPTION_ID", "546bf80c-9de8-4f7c-95db-43b72afbec60", +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _run( + cmd: list[str], + *, + timeout: int = 60, + check: bool = True, + cwd: Path | None = None, +) -> subprocess.CompletedProcess[str]: + return subprocess.run( + cmd, capture_output=True, text=True, + timeout=timeout, check=check, cwd=cwd or _PROJECT_ROOT, + ) + + +def _compose(*args: str, timeout: int = 60) -> subprocess.CompletedProcess[str]: + return _run(["docker", "compose", *args], timeout=timeout) + + +def _tui( + *args: str, + timeout: int = 60, + check: bool = True, + extra_env: dict[str, str] | None = None, +) -> subprocess.CompletedProcess[str]: + """Run a TUI CLI command via ``bun run src/index.ts ``.""" + env = {**os.environ, **(extra_env or {})} + return subprocess.run( + ["bun", "run", "src/index.ts", *args], + capture_output=True, text=True, + timeout=timeout, check=check, cwd=_TUI_DIR, env=env, + ) + + +def _tui_setup_env() -> dict[str, str]: + """Environment variables for the TUI ``setup`` mode.""" + return { + "POLYCLAW_SETUP_RG": _RG, + "POLYCLAW_SETUP_LOCATION": _LOCATION, + "POLYCLAW_SETUP_BASE_NAME": _BASE_NAME, + "POLYCLAW_SETUP_SUBSCRIPTION_ID": _SUBSCRIPTION, + } + + +def _tui_run_chat(timeout: int = 180) -> tuple[str, int]: + """Send a chat probe via ``bun run src/index.ts run``.""" + r = _tui( + "run", "Reply with exactly: PROBE_OK", + timeout=timeout, check=False, + ) + return (r.stdout + r.stderr).strip(), r.returncode + + +def _poll_health(timeout: float = _BOOT_TIMEOUT) -> dict | None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + r = _run(["curl", "-sf", "--max-time", "3", _HEALTH_URL], check=False, timeout=10) + if r.returncode == 0 and r.stdout.strip(): + return json.loads(r.stdout) + except Exception: + pass + time.sleep(_HEALTH_POLL) + return None + + +def _poll_runtime_health(timeout: float = _BOOT_TIMEOUT) -> dict | None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + r = _run( + ["docker", "exec", _RUNTIME_CONTAINER, + "curl", "-sf", "--max-time", "3", "http://localhost:8080/health"], + check=False, timeout=10, + ) + if r.returncode == 0 and r.stdout.strip(): + return json.loads(r.stdout) + except Exception: + pass + time.sleep(_HEALTH_POLL) + return None + + +def _wait_for_runtime_ready(timeout: float = 120) -> bool: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + logs = _container_logs(_RUNTIME_CONTAINER, tail=80) + if "BYOK provider injected" in logs or "BYOK mode" in logs: + logger.info("Runtime BYOK mode confirmed") + return True + time.sleep(5) + return False + + +def _container_logs(container: str, tail: int = 200) -> str: + try: + r = _run(["docker", "logs", "--tail", str(tail), container], check=False, timeout=15) + return (r.stdout + r.stderr).strip() + except Exception as exc: + return f"" + + +def _diag(phase: str) -> str: + lines = [f"\n{'='*72}", f"DIAGNOSTICS -- {phase}", f"{'='*72}"] + for c in (_ADMIN_CONTAINER, _RUNTIME_CONTAINER): + lines.append(f"\n--- {c} ---") + lines.append(_container_logs(c, tail=100)) + lines.append("=" * 72) + return "\n".join(lines) + + +def _wait_for_tui_chat(deadline_seconds: int = 180) -> tuple[str | None, str]: + """Poll chat via ``bun run src/index.ts run`` until it succeeds.""" + deadline = time.monotonic() + deadline_seconds + while time.monotonic() < deadline: + output, rc = _tui_run_chat(timeout=90) + if rc == 0 and output: + return output, "ok" + logger.info("TUI chat probe: rc=%d -- retrying in 8s", rc) + time.sleep(8) + return None, "timeout" + + +def _purge_soft_deleted_resources() -> None: + try: + r = _run( + ["az", "cognitiveservices", "account", "list-deleted", "-o", "json"], + check=False, timeout=30, + ) + if r.returncode != 0: + return + deleted = json.loads(r.stdout) if r.stdout.strip() else [] + for item in deleted: + name = item.get("name", "") + loc = item.get("location", "") + res_id = item.get("id", "") + rg = "" + if "/resourceGroups/" in res_id: + rg = res_id.split("/resourceGroups/")[1].split("/")[0] + if _BASE_NAME in name or rg == _RG: + logger.info("Purging soft-deleted: %s (rg=%s)", name, rg) + _run( + ["az", "cognitiveservices", "account", "purge", + "--name", name, "--resource-group", rg, "--location", loc], + check=False, timeout=60, + ) + except Exception as exc: + logger.warning("Soft-delete purge failed: %s", exc) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="module") +def _ensure_bun(): + """Skip the entire module if Bun is not installed.""" + try: + _run(["bun", "--version"], timeout=10) + except Exception: + pytest.skip("Bun is not installed -- required for TUI headless tests") + + +@pytest.fixture(scope="module") +def _ensure_tui_deps(_ensure_bun): + """Install TUI dependencies if needed.""" + if not (_TUI_DIR / "node_modules").exists(): + logger.info("Installing TUI dependencies ...") + _run(["bun", "install"], cwd=_TUI_DIR, timeout=60) + + +@pytest.fixture(scope="module") +def tui_setup(_ensure_tui_deps): + """Run ``bun run src/index.ts setup`` -- the full headless provisioning. + + This replaces the old ``stack`` fixture that manually called Docker + and curl. The TUI handles: Docker build, compose up, Azure cred + mount, subscription selection, Foundry deploy, runtime readiness + poll, and the first chat probe. + """ + try: + _run(["docker", "info"], timeout=15) + except Exception: + pytest.skip("Docker not available") + + # Clean slate + _compose("down", "-v", "--remove-orphans", timeout=60) + + logger.info("Running TUI headless setup (build + deploy + chat probe) ...") + r = _tui( + "setup", + timeout=900, + check=False, + extra_env=_tui_setup_env(), + ) + + # Log all TUI output for visibility + for line in (r.stdout + r.stderr).splitlines(): + if line.strip(): + logger.info("[tui:setup] %s", line.rstrip()) + + if r.returncode != 0: + pytest.fail( + f"TUI setup failed (exit {r.returncode}).\n" + f"stdout:\n{r.stdout[-2000:]}\n" + f"stderr:\n{r.stderr[-2000:]}\n" + f"{_diag('tui-setup')}" + ) + + # Parse structured result from the last line of stdout + result = {} + for line in reversed(r.stdout.strip().splitlines()): + try: + result = json.loads(line) + break + except (json.JSONDecodeError, ValueError): + continue + + assert result.get("status") == "ok", ( + f"TUI setup did not return ok: {result}" + ) + logger.info("TUI setup complete: %s", json.dumps(result)) + + yield result + + # Teardown + logger.info("Tearing down ...") + _compose("down", "-v", "--remove-orphans", timeout=60) + + +@pytest.fixture(scope="module", autouse=True) +def _cleanup_azure(): + _purge_soft_deleted_resources() + yield + logger.info("Cleaning up Azure RG %s ...", _RG) + try: + _run(["az", "group", "delete", "--name", _RG, "--yes", "--no-wait"], + check=False, timeout=30) + except Exception as exc: + logger.warning("RG cleanup failed: %s", exc) + + +# =================================================================== +# Tests -- ordered to run sequentially +# =================================================================== + + +@pytest.mark.e2e_setup +class TestLifecycle01TuiSetup: + """TUI headless setup: build, deploy, first inference.""" + + def test_setup_completed(self, tui_setup) -> None: + """The TUI ``setup`` command must exit 0 with status=ok.""" + assert tui_setup["status"] == "ok" + logger.info("TUI setup OK in %ss", tui_setup.get("elapsed_seconds", "?")) + + def test_first_chat_via_tui(self, tui_setup) -> None: + """The setup command already ran a chat probe -- verify it worked.""" + probe = tui_setup.get("probe_response", "") + assert probe, "TUI setup did not return a chat probe response" + logger.info("First inference (from TUI setup): %s", probe[:200]) + + def test_sp_written_to_env(self, tui_setup) -> None: + """SP creds must be in /data/.env after deploy.""" + r = _run( + ["docker", "exec", _ADMIN_CONTAINER, "cat", "/data/.env"], + check=False, timeout=15, + ) + assert r.returncode == 0, f"Could not read .env: {r.stderr}" + env = {} + for line in r.stdout.strip().splitlines(): + if "=" in line: + k, v = line.split("=", 1) + env[k] = v + assert env.get("RUNTIME_SP_APP_ID"), "RUNTIME_SP_APP_ID not in .env" + assert env.get("RUNTIME_SP_PASSWORD"), "RUNTIME_SP_PASSWORD not in .env" + assert env.get("RUNTIME_SP_TENANT"), "RUNTIME_SP_TENANT not in .env" + assert env.get("FOUNDRY_ENDPOINT"), "FOUNDRY_ENDPOINT not in .env" + + +@pytest.mark.e2e_setup +class TestLifecycle02RestartSurvival: + """After ``docker restart``, chat via TUI ``run`` must still work.""" + + def test_restart_runtime(self, tui_setup) -> None: + r = _run(["docker", "restart", _RUNTIME_CONTAINER], check=False, timeout=60) + assert r.returncode == 0, f"docker restart failed: {r.stderr}" + logger.info("Runtime container restarted") + + def test_runtime_healthy_after_restart(self, tui_setup) -> None: + health = _poll_runtime_health(timeout=90) + if health is None: + pytest.fail( + f"Runtime not healthy after restart.\n{_diag('restart-health')}" + ) + assert health["status"] == "ok" + + def test_runtime_identity_after_restart(self, tui_setup) -> None: + ready = _wait_for_runtime_ready(timeout=90) + if not ready: + logs = _container_logs(_RUNTIME_CONTAINER, tail=200) + pytest.fail( + f"Runtime BYOK mode not confirmed after restart.\n" + f"Logs (last 1000 chars):\n{logs[-1000:]}" + ) + + def test_chat_after_restart_via_tui(self, tui_setup) -> None: + """Send chat probe using ``bun run src/index.ts run``.""" + text, status = _wait_for_tui_chat(deadline_seconds=180) + if status != "ok": + pytest.fail( + f"TUI chat failed after restart. status={status}\n" + f"{_diag('restart-chat')}" + ) + assert text, "Chat returned empty response" + logger.info("Post-restart inference OK (via TUI run): %s", text[:200]) + + +@pytest.mark.e2e_setup +class TestLifecycle02bRandomRestarts: + """Restart the runtime 2-3 times with random pauses in between.""" + + def test_rapid_restarts(self, tui_setup) -> None: + """Restart runtime 2-3 times with random 3-15s pauses, then verify chat.""" + rounds = random.randint(2, 3) + logger.info("Running %d rapid restarts with random pauses ...", rounds) + for i in range(rounds): + pause = random.uniform(3, 15) + logger.info("Restart %d/%d -- pausing %.1fs before restart", i + 1, rounds, pause) + time.sleep(pause) + r = _run(["docker", "restart", _RUNTIME_CONTAINER], check=False, timeout=60) + assert r.returncode == 0, f"docker restart #{i + 1} failed: {r.stderr}" + logger.info("All %d restarts issued", rounds) + + def test_healthy_after_rapid_restarts(self, tui_setup) -> None: + health = _poll_runtime_health(timeout=120) + if health is None: + pytest.fail( + f"Runtime not healthy after rapid restarts.\n{_diag('rapid-restart')}" + ) + assert health["status"] == "ok" + + def test_byok_after_rapid_restarts(self, tui_setup) -> None: + ready = _wait_for_runtime_ready(timeout=120) + if not ready: + logs = _container_logs(_RUNTIME_CONTAINER, tail=200) + pytest.fail( + f"BYOK not confirmed after rapid restarts.\n" + f"Logs (last 1000 chars):\n{logs[-1000:]}" + ) + + def test_chat_after_rapid_restarts(self, tui_setup) -> None: + text, status = _wait_for_tui_chat(deadline_seconds=180) + if status != "ok": + pytest.fail( + f"TUI chat failed after rapid restarts. status={status}\n" + f"{_diag('rapid-restart-chat')}" + ) + assert text, "Chat returned empty response" + logger.info("Post rapid-restart inference OK: %s", text[:200]) + + +@pytest.mark.e2e_setup +class TestLifecycle03StopStartSurvival: + """Stop the stack and start it again -- TUI health + chat must work.""" + + def test_stop_start_cycle(self, tui_setup) -> None: + _compose("stop", timeout=30) + time.sleep(2) + _compose("up", "-d", timeout=60) + health = _poll_health(timeout=_BOOT_TIMEOUT) + assert health is not None, ( + f"Admin not healthy after stop/start.\n{_diag('stop-start')}" + ) + + def test_health_via_tui(self, tui_setup) -> None: + """TUI ``health`` command must succeed.""" + # Wait for stack to be fully up + time.sleep(5) + r = _tui("health", timeout=30, check=False) + assert r.returncode == 0, ( + f"TUI health failed: {r.stdout} {r.stderr}" + ) + logger.info("TUI health OK: %s", r.stdout[:200]) + + def test_chat_after_stop_start_via_tui(self, tui_setup) -> None: + text, status = _wait_for_tui_chat(deadline_seconds=180) + if status != "ok": + pytest.fail( + f"TUI chat failed after stop/start. status={status}\n" + f"{_diag('stop-start-chat')}" + ) + assert text, "Chat returned empty response" + logger.info("Post stop/start inference OK (via TUI run): %s", text[:200]) + + +@pytest.mark.e2e_setup +class TestLifecycle04Decommission: + """Tear down Azure resources via the TUI.""" + + def test_decommission_via_tui(self, tui_setup) -> None: + r = _tui( + "decommission", + timeout=480, + check=False, + extra_env={"POLYCLAW_SETUP_RG": _RG}, + ) + for line in (r.stdout + r.stderr).splitlines(): + if line.strip(): + logger.info("[tui:decommission] %s", line.rstrip()) + # Best-effort -- don't assert exit code diff --git a/app/runtime/tests/test_e2e_setup_process.py b/app/runtime/tests/test_e2e_setup_process.py index b7f9cd2..2f1f23b 100644 --- a/app/runtime/tests/test_e2e_setup_process.py +++ b/app/runtime/tests/test_e2e_setup_process.py @@ -78,7 +78,7 @@ _DEPLOY_TIMEOUT = 480 _HEALTH_POLL = 3 _API_TIMEOUT = 30 -_CHAT_TIMEOUT = 90 +_CHAT_TIMEOUT = 60 _RG = "polyclaw-e2e-setup-rg" _LOCATION = "eastus" @@ -93,14 +93,7 @@ import asyncio, json, sys, os, aiohttp async def main(): - secret = "" - try: - with open("/data/.env") as f: - for line in f: - if line.startswith("ADMIN_SECRET="): - secret = line.split("=", 1)[1].strip().strip('"') - except FileNotFoundError: - pass + secret = os.environ.get("_PROBE_SECRET", "") port = os.environ.get("ADMIN_PORT", "8080") url = f"http://localhost:{port}/api/chat/ws" @@ -108,7 +101,7 @@ async def main(): if secret: headers["Authorization"] = f"Bearer {secret}" - timeout = aiohttp.ClientTimeout(total=80) + timeout = aiohttp.ClientTimeout(total=45) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.ws_connect(url, headers=headers) as ws: await ws.send_json({"action": "send", "text": "Reply with exactly: PROBE_OK"}) @@ -177,6 +170,33 @@ def _compose(*args: str, timeout: int = 60) -> subprocess.CompletedProcess[str]: return _run(["docker", "compose", *args], timeout=timeout) +def _start_stack(timeout: int = 60) -> None: + """Start containers, falling back to ``up -d`` if ``start`` fails.""" + r = _run(["docker", "compose", "start"], check=False, timeout=timeout) + if r.returncode == 0: + return + logger.warning( + "docker compose start failed (rc=%d), falling back to up -d: %s", + r.returncode, (r.stderr or r.stdout)[:300], + ) + _compose("up", "-d", timeout=timeout) + + +def _recover_stack() -> dict | None: + """Bring the stack back up and return health, or ``None`` on failure.""" + logger.warning("Recovering stack via docker compose up -d ...") + try: + _compose("up", "-d", timeout=120) + except Exception as exc: + logger.error("Stack recovery failed: %s", exc) + return None + health = _poll_health(timeout=_BOOT_TIMEOUT) + if health: + _copy_azure_creds() + time.sleep(5) + return health + + def _api( path: str, *, @@ -299,11 +319,13 @@ def _copy_azure_creds() -> bool: return False -def _send_chat_probe() -> tuple[str | None, str]: +def _send_chat_probe(secret: str = "") -> tuple[str | None, str]: """Returns ``(text, status)`` where status is ok|error|not_authenticated|empty.""" try: r = _run( - ["docker", "exec", _RUNTIME_CONTAINER, "python", "-c", _CHAT_PROBE_SCRIPT], + ["docker", "exec"] + + (["-e", f"_PROBE_SECRET={secret}"] if secret else []) + + [_RUNTIME_CONTAINER, "python", "-c", _CHAT_PROBE_SCRIPT], check=False, timeout=_CHAT_TIMEOUT, ) if r.returncode == 2: @@ -330,8 +352,22 @@ def _diag(phase: str) -> str: return "\n".join(lines) +def _extract_rg_from_id(resource_id: str) -> str: + """Extract resource group from a soft-deleted resource's ID string.""" + # ID format: .../resourceGroups//deletedAccounts/ + parts = resource_id.split("/") + for i, part in enumerate(parts): + if part.lower() == "resourcegroups" and i + 1 < len(parts): + return parts[i + 1] + return "" + + def _purge_soft_deleted_resources() -> None: - """Purge any soft-deleted Cognitive Services accounts matching _BASE_NAME.""" + """Purge any soft-deleted Cognitive Services accounts matching _BASE_NAME. + + After issuing purge commands, polls ``list-deleted`` until Azure + confirms none of the matching resources remain (up to 120 s). + """ try: r = _run( ["az", "cognitiveservices", "account", "list-deleted", "-o", "json"], @@ -340,9 +376,10 @@ def _purge_soft_deleted_resources() -> None: if r.returncode != 0: return deleted = json.loads(r.stdout) if r.stdout.strip() else [] + purged_names: list[str] = [] for item in deleted: name = item.get("name", "") - rg = item.get("resourceGroup", "") + rg = item.get("resourceGroup") or _extract_rg_from_id(item.get("id", "")) loc = item.get("location", "") if _BASE_NAME in name or rg == _RG: logger.info("Purging soft-deleted resource: %s (rg=%s)", name, rg) @@ -355,10 +392,52 @@ def _purge_soft_deleted_resources() -> None: ], check=False, timeout=60, ) + purged_names.append(name) + + # Wait for Azure to confirm the purge propagated. + if purged_names: + deadline = time.monotonic() + 120 + while time.monotonic() < deadline: + r2 = _run( + ["az", "cognitiveservices", "account", "list-deleted", "-o", "json"], + check=False, timeout=30, + ) + if r2.returncode != 0: + break + still = json.loads(r2.stdout) if r2.stdout.strip() else [] + remaining = [ + d.get("name", "") for d in still + if d.get("name", "") in purged_names + ] + if not remaining: + logger.info("All purged resources confirmed gone") + break + logger.info("Waiting for purge propagation: %s", remaining) + time.sleep(10) except Exception as exc: logger.warning("Soft-delete purge failed: %s", exc) +def _cleanup_runtime_sps() -> None: + """Delete leftover runtime service principals from previous runs.""" + try: + r = _run( + ["az", "ad", "sp", "list", + "--display-name", f"polyclaw-runtime-{_BASE_NAME}", + "--query", "[].appId", "-o", "json"], + check=False, timeout=30, + ) + if r.returncode != 0: + return + sp_ids = json.loads(r.stdout) if r.stdout.strip() else [] + for sp_id in sp_ids: + logger.info("Deleting leftover SP: %s", sp_id) + _run(["az", "ad", "sp", "delete", "--id", sp_id], + check=False, timeout=30) + except Exception as exc: + logger.warning("SP cleanup failed: %s", exc) + + # --------------------------------------------------------------------------- # Fixtures (module-scoped -- one Docker stack for the whole file) # --------------------------------------------------------------------------- @@ -382,6 +461,11 @@ def stack(admin_secret): except Exception: pytest.skip("Docker not available") + # Clean stale containers and volumes from previous runs to avoid + # leftover .env / FOUNDRY_ENDPOINT pointing to deleted resources. + logger.info("Cleaning stale containers and volumes ...") + _compose("down", "-v", "--remove-orphans", timeout=60) + # Build logger.info("Building Docker image ...") try: @@ -450,6 +534,7 @@ def _cleanup_azure_rg(): """ # Pre-clean: purge any soft-deleted resources from previous runs _purge_soft_deleted_resources() + _cleanup_runtime_sps() yield logger.info("Initiating cleanup of %s ...", _RG) try: @@ -678,11 +763,13 @@ def test_chat_works_after_foundry_no_bot(self, stack, admin_secret) -> None: time.sleep(10) _poll_health(timeout=60) - # Chat must work -- Foundry is provisioned, bot is NOT required - deadline = time.monotonic() + 90 + # Chat must work -- Foundry is provisioned, bot is NOT required. + # Allow up to 3 minutes: after restart the Copilot CLI needs time to + # re-download its runtime, authenticate via BYOK, and start a session. + deadline = time.monotonic() + 180 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat works without bot: %r", text[:200]) return @@ -783,15 +870,15 @@ def test_status_shows_bot_configured(self, stack, admin_secret, telegram_config) else: logger.info("Telegram not configured (optional -- no secret file)") - def test_chat_works_with_bot_config_no_tunnel(self, stack) -> None: + def test_chat_works_with_bot_config_no_tunnel(self, stack, admin_secret) -> None: """Bot configured but no tunnel -- chat MUST still work. The bot service is optional and should not block core chat. """ - deadline = time.monotonic() + 60 + deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat works with bot config, no tunnel: %s", text[:200]) return @@ -856,13 +943,13 @@ def test_status_full_stack(self, stack, admin_secret) -> None: if not data["foundry"]["deployed"]: pytest.xfail("Foundry not deployed (deploy may have failed earlier)") - def test_chat_full_stack(self, stack) -> None: + def test_chat_full_stack(self, stack, admin_secret) -> None: """With full stack running, chat MUST work end-to-end.""" time.sleep(8) - deadline = time.monotonic() + 90 + deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Full stack chat response: %s", text[:300]) return @@ -1198,13 +1285,13 @@ def test_redeploy_content_safety(self, stack, admin_secret) -> None: ) assert data.get("status") == "ok" - def test_chat_still_works_after_redeploy(self, stack) -> None: + def test_chat_still_works_after_redeploy(self, stack, admin_secret) -> None: """Chat MUST survive an idempotent redeploy.""" time.sleep(5) - deadline = time.monotonic() + 90 + deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat OK after redeploy: %s", text[:200]) return @@ -1244,6 +1331,15 @@ def test_save_configuration(self, stack, admin_secret, telegram_config) -> None: logger.info("Combined save steps: %s", data.get("steps")) def test_status_after_combined_save(self, stack, admin_secret, telegram_config) -> None: + # Combined save restarts the runtime container; the admin may also + # restart briefly (health-check churn, KV re-init). Poll to let + # the stack settle before asserting. + health = _poll_health(timeout=60) + if health is None: + health = _recover_stack() + assert health is not None, ( + f"Admin not healthy after combined save.\n{_diag('post-combined-save')}" + ) data = _api_ok("/api/setup/status", secret=admin_secret) assert data["bot_configured"] token, _ = telegram_config @@ -1277,8 +1373,10 @@ def test_stop_stack(self, stack, admin_secret) -> None: def test_start_stack_again(self, stack, admin_secret) -> None: """Start containers back up.""" - _compose("start", timeout=60) + _start_stack(timeout=60) health = _poll_health(timeout=_BOOT_TIMEOUT) + if health is None: + health = _recover_stack() assert health is not None, ( f"Admin not healthy after restart.\n{_diag('lifecycle-start-1')}" ) @@ -1323,12 +1421,12 @@ def test_config_survives_restart(self, stack, admin_secret) -> None: f"Profile name changed after restart: {profile['name']}" ) - def test_chat_works_after_restart(self, stack) -> None: + def test_chat_works_after_restart(self, stack, admin_secret) -> None: """Chat MUST work after a stop/start cycle.""" deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat works after restart: %s", text[:200]) return @@ -1343,8 +1441,10 @@ def test_stop_and_start_again(self, stack, admin_secret) -> None: """Second stop/start cycle to verify repeated restarts work.""" _compose("stop", timeout=60) time.sleep(3) - _compose("start", timeout=60) + _start_stack(timeout=60) health = _poll_health(timeout=_BOOT_TIMEOUT) + if health is None: + health = _recover_stack() assert health is not None, ( f"Admin not healthy after second restart.\n{_diag('lifecycle-start-2')}" ) @@ -1353,12 +1453,12 @@ def test_stop_and_start_again(self, stack, admin_secret) -> None: time.sleep(5) logger.info("Stack healthy after second restart: %s", health) - def test_chat_works_after_second_restart(self, stack) -> None: + def test_chat_works_after_second_restart(self, stack, admin_secret) -> None: """Chat MUST still work after two stop/start cycles.""" deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat OK after 2nd restart: %s", text[:200]) return @@ -1405,6 +1505,11 @@ class TestPhase17ConfigChange: def test_change_profile(self, stack, admin_secret) -> None: """Update the agent name and personality.""" + # Ensure the stack survived Phase 15-16 before proceeding. + health = _poll_health(timeout=10) + if health is None: + health = _recover_stack() + assert health is not None, "Stack unrecoverable before Phase 17" data = _api_ok( "/api/profile", method="POST", @@ -1444,12 +1549,12 @@ def test_restart_after_config_change(self, stack, admin_secret) -> None: time.sleep(10) _poll_health(timeout=60) - def test_chat_works_after_config_change(self, stack) -> None: + def test_chat_works_after_config_change(self, stack, admin_secret) -> None: """Chat MUST still work after config changes + restart.""" deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat OK after config change: %s", text[:200]) return @@ -1490,6 +1595,11 @@ class TestPhase18BotServiceToggle: def test_remove_telegram_config(self, stack, admin_secret, telegram_config) -> None: """Remove Telegram channel config.""" + # Ensure the stack survived previous phases. + health = _poll_health(timeout=10) + if health is None: + health = _recover_stack() + assert health is not None, "Stack unrecoverable before Phase 18" token, _ = telegram_config if not token: pytest.skip("Telegram was never configured") @@ -1532,12 +1642,12 @@ def test_restart_after_bot_removal(self, stack, admin_secret) -> None: f"{_diag('bot-removal-restart')}" ) - def test_chat_works_without_bot_service(self, stack) -> None: + def test_chat_works_without_bot_service(self, stack, admin_secret) -> None: """Chat MUST work with no bot config at all -- only Foundry.""" deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat works without bot service: %s", text[:200]) return @@ -1577,12 +1687,12 @@ def test_re_add_telegram_config(self, stack, admin_secret, telegram_config) -> N secret=admin_secret, ) - def test_chat_works_after_bot_re_add(self, stack) -> None: + def test_chat_works_after_bot_re_add(self, stack, admin_secret) -> None: """Chat MUST work after re-adding bot config.""" - deadline = time.monotonic() + 90 + deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat works after bot re-add: %s", text[:200]) return @@ -1602,13 +1712,13 @@ def test_remove_bot_config_again(self, stack, admin_secret) -> None: secret=admin_secret, ) - def test_chat_still_works_after_second_removal(self, stack) -> None: + def test_chat_still_works_after_second_removal(self, stack, admin_secret) -> None: """Chat MUST work after the second bot removal.""" time.sleep(5) - deadline = time.monotonic() + 90 + deadline = time.monotonic() + 120 last_status = "" while time.monotonic() < deadline: - text, last_status = _send_chat_probe() + text, last_status = _send_chat_probe(admin_secret) if last_status == "ok" and text: logger.info("Chat after 2nd bot removal: %s", text[:200]) return @@ -1654,6 +1764,10 @@ class TestPhase19Voice: """Deploy ACS for voice calls.""" def test_voice_config(self, stack, admin_secret) -> None: + health = _poll_health(timeout=10) + if health is None: + health = _recover_stack() + assert health is not None, "Stack unrecoverable before Phase 19" data = _api_ok("/api/setup/voice/config", secret=admin_secret) logger.info("Voice config: %s", json.dumps(data, indent=2)[:500]) @@ -1685,6 +1799,10 @@ class TestPhase20Lockdown: """Test lockdown mode toggle.""" def test_lockdown_status(self, stack, admin_secret) -> None: + health = _poll_health(timeout=10) + if health is None: + health = _recover_stack() + assert health is not None, "Stack unrecoverable before Phase 20" data = _api_ok("/api/setup/lockdown", secret=admin_secret) logger.info("Lockdown: %s", data) @@ -1716,6 +1834,10 @@ class TestPhase21Decommission: """Tear down all Azure resources.""" def test_decommission_foundry(self, stack, admin_secret) -> None: + health = _poll_health(timeout=10) + if health is None: + health = _recover_stack() + assert health is not None, "Stack unrecoverable before Phase 21" body = {"resource_group": _RG} code, data = _api( "/api/setup/foundry/decommission", @@ -1809,6 +1931,10 @@ def test_tui_health(self, stack) -> None: pytest.skip("Bun not installed") if not _TUI_ENTRY.exists(): pytest.skip("TUI source not found") + health = _poll_health(timeout=10) + if health is None: + health = _recover_stack() + assert health is not None, "Stack unrecoverable before Phase 22" r = _run( ["bun", "run", str(_TUI_ENTRY), "health"], check=False, timeout=30, cwd=_TUI_DIR, diff --git a/app/runtime/tests/test_identity_routes.py b/app/runtime/tests/test_identity_routes.py index e003dbf..ff2f8a0 100644 --- a/app/runtime/tests/test_identity_routes.py +++ b/app/runtime/tests/test_identity_routes.py @@ -108,6 +108,7 @@ async def test_roles_with_assignments(self, mock_cfg) -> None: mock_cfg.runtime_sp_app_id = "app-id" mock_cfg.aca_mi_client_id = "" mock_cfg.runtime_sp_tenant = "" + mock_cfg.env.read.return_value = "polyclaw-rg" az = MagicMock() az.json.side_effect = [ @@ -124,6 +125,8 @@ async def test_roles_with_assignments(self, mock_cfg) -> None: "condition": "", }, ], + [], # _discover_session_pool (RG-scoped) + [], # _discover_session_pool (subscription-wide) ] routes = IdentityRoutes(az=az) @@ -133,14 +136,15 @@ async def test_roles_with_assignments(self, mock_cfg) -> None: assert resp.status == 200 data = await resp.json() assert len(data["assignments"]) == 2 - assert len(data["checks"]) == 5 + assert len(data["checks"]) == 8 - checks = {c["role"]: c["present"] for c in data["checks"]} - assert checks["Cognitive Services User"] is True - assert checks["Reader"] is True - assert checks["Azure Bot Service Contributor Role"] is False - assert checks["Key Vault Secrets Officer"] is False - assert checks["Azure ContainerApps Session Executor"] is False + checks = {c["feature"]: c["present"] for c in data["checks"]} + assert checks["Prompt Shields (Content Safety)"] is True + assert checks["Resource Group Visibility"] is True + assert checks["Bot Service Management"] is False + assert checks["Key Vault Secrets"] is False + assert checks["Sandbox / Code Interpreter"] is False + assert checks["Foundry BYOK (OpenAI Chat)"] is False # Session executor check should include detail about missing role se_check = next( @@ -309,11 +313,16 @@ async def test_fix_skips_without_endpoint(self, mock_cfg, tmp_path) -> None: mock_cfg.runtime_sp_app_id = "app-id" mock_cfg.aca_mi_client_id = "" mock_cfg.runtime_sp_tenant = "" + mock_cfg.foundry_endpoint = "" az = MagicMock() az.json.side_effect = [ {"id": "sp-oid", "objectId": "sp-oid"}, # resolve principal + [], # _discover_session_pool (RG-scoped) + [], # _discover_session_pool (subscription-wide) ] + az.ok.return_value = (True, "") + mock_cfg.env.read.return_value = "polyclaw-rg" store = GuardrailsConfigStore(tmp_path / "g.json") routes = IdentityRoutes(az=az, guardrails_store=store) @@ -322,7 +331,9 @@ async def test_fix_skips_without_endpoint(self, mock_cfg, tmp_path) -> None: resp = await client.post("/api/identity/fix-roles") assert resp.status == 200 data = await resp.json() - assert data["steps"][0]["status"] == "skipped" + # First step is content_safety_rbac (skipped: no endpoint) + cs_step = next(s for s in data["steps"] if "content_safety" in s["step"]) + assert cs_step["status"] == "skipped" @pytest.mark.asyncio @patch("app.runtime.server.routes.identity_routes.cfg") @@ -330,14 +341,16 @@ async def test_fix_assigns_role(self, mock_cfg, tmp_path) -> None: mock_cfg.runtime_sp_app_id = "app-id" mock_cfg.aca_mi_client_id = "" mock_cfg.runtime_sp_tenant = "" + mock_cfg.foundry_endpoint = "" + mock_cfg.env.read.return_value = "polyclaw-rg" az = MagicMock() az.last_stderr = "" az.json.side_effect = [ {"id": "sp-oid"}, # resolve principal - [{"id": "/sub/rg/cs", "properties": { - "endpoint": "https://my-cs.cognitiveservices.azure.com/", - }}], # account list (scoped to RG) + ["/sub/rg/cs"], # _resolve_cs_resource (name lookup) + [], # _discover_session_pool (RG-scoped) + [], # _discover_session_pool (subscription-wide) ] az.ok.return_value = (True, "") @@ -363,14 +376,16 @@ async def test_fix_falls_back_to_assignee_on_sp_failure(self, mock_cfg, tmp_path mock_cfg.runtime_sp_app_id = "app-id" mock_cfg.aca_mi_client_id = "" mock_cfg.runtime_sp_tenant = "" + mock_cfg.foundry_endpoint = "" + mock_cfg.env.read.return_value = "polyclaw-rg" az = MagicMock() az.last_stderr = "" az.json.side_effect = [ None, # resolve principal fails (CAE error) - [{"id": "/sub/rg/cs", "properties": { - "endpoint": "https://my-cs.cognitiveservices.azure.com/", - }}], # account list + ["/sub/rg/cs"], # _resolve_cs_resource (name lookup) + [], # _discover_session_pool (RG-scoped) + [], # _discover_session_pool (subscription-wide) ] az.ok.return_value = (True, "") diff --git a/app/runtime/tests/test_media_outgoing.py b/app/runtime/tests/test_media_outgoing.py index e72a439..87da8e1 100644 --- a/app/runtime/tests/test_media_outgoing.py +++ b/app/runtime/tests/test_media_outgoing.py @@ -13,24 +13,12 @@ from app.runtime.media.outgoing import ( MAX_OUTGOING_FILE_BYTES, _move_to_error, - _too_large_msg, collect_pending_outgoing, move_attachments_to_error, read_error_details, ) -class TestTooLargeMsg: - def test_basic(self): - msg = _too_large_msg(500_000) - assert "500,000" in msg - assert "190" in msg - - def test_with_extra(self): - msg = _too_large_msg(500_000, "Resize failed.") - assert "Resize failed." in msg - - class TestMoveToError: def test_moves_file(self, data_dir: Path): from app.runtime.config.settings import cfg diff --git a/app/runtime/tests/test_provisioner.py b/app/runtime/tests/test_provisioner.py index 68bbe5e..23582e0 100644 --- a/app/runtime/tests/test_provisioner.py +++ b/app/runtime/tests/test_provisioner.py @@ -7,6 +7,7 @@ import pytest from app.runtime.config.settings import cfg +from app.runtime.services.deployment._models import StepTracker from app.runtime.services.deployment.provisioner import Provisioner from app.runtime.state.deploy_state import DeployStateStore from app.runtime.state.infra_config import InfraConfigStore @@ -62,7 +63,7 @@ def test_calls_register_app(self, provisioner, deployer, data_dir): ) bc = MagicMock(resource_group="rg", location="eastus", display_name="polyclaw", bot_handle="") - steps: list[dict] = [] + steps = StepTracker() result = provisioner._ensure_app_registration(bc, steps) assert result is True @@ -75,7 +76,7 @@ def test_returns_false_on_failure(self, provisioner, deployer, data_dir): ) bc = MagicMock(resource_group="rg", location="eastus", display_name="polyclaw", bot_handle="") - steps: list[dict] = [] + steps = StepTracker() result = provisioner._ensure_app_registration(bc, steps) assert result is False diff --git a/app/runtime/util/__init__.py b/app/runtime/util/__init__.py index c37108d..b2d7a1f 100644 --- a/app/runtime/util/__init__.py +++ b/app/runtime/util/__init__.py @@ -3,12 +3,13 @@ from .async_helpers import run_sync from .env_file import EnvFile from .result import Result -from .singletons import register_singleton, reset_all_singletons +from .singletons import Singleton, register_singleton, reset_all_singletons from .spotlight import spotlight __all__ = [ "EnvFile", "Result", + "Singleton", "register_singleton", "reset_all_singletons", "run_sync", diff --git a/app/runtime/util/singletons.py b/app/runtime/util/singletons.py index 97ad4b9..c999095 100644 --- a/app/runtime/util/singletons.py +++ b/app/runtime/util/singletons.py @@ -3,9 +3,12 @@ from __future__ import annotations from collections.abc import Callable +from typing import Generic, TypeVar, overload _reset_fns: list[Callable[[], None]] = [] +T = TypeVar("T") + def register_singleton(reset_fn: Callable[[], None]) -> None: """Register a reset function to be called during test teardown.""" @@ -16,3 +19,40 @@ def reset_all_singletons() -> None: """Reset every registered singleton -- intended for test isolation.""" for fn in _reset_fns: fn() + + +class Singleton(Generic[T]): + """Descriptor that lazily creates a singleton and registers it for test reset. + + Usage at module level:: + + get_foo, _reset_foo = Singleton.create(FooClass) + + Or with a custom factory:: + + get_foo, _reset_foo = Singleton.create(FooClass, factory=lambda: FooClass(arg)) + """ + + @staticmethod + def create( + cls: type[T], + *, + factory: Callable[[], T] | None = None, + ) -> tuple[Callable[[], T], Callable[[T | None], None]]: + """Return a ``(getter, resetter)`` pair for *cls*. + + The *resetter* can be called with no args (or ``None``) to clear the + singleton, or with an instance to replace it (useful in tests). + """ + instance: list[T | None] = [None] + + def get() -> T: + if instance[0] is None: + instance[0] = factory() if factory else cls() + return instance[0] # type: ignore[return-value] + + def reset(value: T | None = None) -> None: + instance[0] = value + + register_singleton(reset) + return get, reset diff --git a/app/runtime/util/spotlight.py b/app/runtime/util/spotlight.py index 8acf0e0..58aca6f 100644 --- a/app/runtime/util/spotlight.py +++ b/app/runtime/util/spotlight.py @@ -1,26 +1,9 @@ -"""Spotlighting helpers to defend against indirect prompt injection. - -Implements the *data marking* technique described in the Microsoft -research paper (arXiv:2403.14720). Untrusted text is transformed so -that LLMs can clearly distinguish it from trusted system instructions. - -Two strategies are provided: - -* **Data marking** -- replaces whitespace with a sentinel token so the - model sees a visually distinct block of text. -* **Delimiting** -- wraps the text in unique boundary tokens. - -Both approaches are lightweight (no external API calls) and can be -applied before feeding untrusted content into any LLM prompt. -""" +"""Spotlighting helpers to defend against indirect prompt injection.""" from __future__ import annotations import re -# Default sentinel used for data-marking. The caret (^) is recommended -# by Microsoft's documentation because it rarely appears in natural text -# and does not conflict with common markup languages. _DEFAULT_MARKER = "^" @@ -53,26 +36,7 @@ def spotlight( ) -> str: """Apply a spotlighting transformation to untrusted *text*. - Parameters - ---------- - text: - The untrusted content to transform. - method: - ``"datamark"`` (default) or ``"delimit"``. - marker: - Sentinel token for data-marking (default ``^``). - tag: - Boundary tag for delimiting. - - Returns - ------- - str - The transformed text. - - Raises - ------ - ValueError - If *method* is not recognized. + *method* is ``"datamark"`` (default) or ``"delimit"``. """ if method == "datamark": return datamark(text, marker=marker) diff --git a/app/tui/src/deploy/docker.ts b/app/tui/src/deploy/docker.ts index f79c7e4..51034d2 100644 --- a/app/tui/src/deploy/docker.ts +++ b/app/tui/src/deploy/docker.ts @@ -99,6 +99,9 @@ export async function buildAcaImage( [ "docker", "build", "--platform", "linux/amd64", + "--provenance=false", + "--sbom=false", + "--output", "type=docker", "-t", `polyclaw:${tag}`, ".", ], diff --git a/app/tui/src/headless/aca_setup.ts b/app/tui/src/headless/aca_setup.ts new file mode 100644 index 0000000..f875cfb --- /dev/null +++ b/app/tui/src/headless/aca_setup.ts @@ -0,0 +1,425 @@ +/** + * Headless ACA setup mode -- full day-one provisioning with ACA runtime. + * + * Orchestrates: + * 1. Docker build (compose + linux/amd64 for ACA) + * 2. Start admin container locally + * 3. Azure CLI check + subscription selection + * 4. Foundry deploy (Bicep) + * 5. ACA deploy (ACR push, ACA environment, container app) + * 6. Wait for runtime readiness via admin proxy + * 7. Chat probe via WebSocket + * + * Designed for CI and E2E tests -- all output goes to stdout/stderr. + * + * Environment: + * POLYCLAW_SETUP_RG Resource group (default: polyclaw-e2e-aca-rg) + * POLYCLAW_SETUP_LOCATION Azure region (default: eastus) + * POLYCLAW_SETUP_BASE_NAME Cognitive Services base name (auto if empty) + * POLYCLAW_SETUP_SUBSCRIPTION_ID Target subscription ID (first if empty) + * POLYCLAW_SETUP_ACA_IMAGE_TAG ACA image tag (default: aca) + */ + +import { + buildImage, + buildAcaImage, + getAdminSecret, + waitForReady, + writeAzureOverride, + stopContainer, +} from "../deploy/docker.js"; +import { exec } from "../deploy/process.js"; +import { resolve } from "path"; + +const PROJECT_ROOT = resolve(import.meta.dir, "../../../.."); + +// --------------------------------------------------------------------------- +// Config from environment +// --------------------------------------------------------------------------- + +const RG = process.env.POLYCLAW_SETUP_RG || "polyclaw-e2e-aca-rg"; +const LOCATION = process.env.POLYCLAW_SETUP_LOCATION || "eastus"; +const BASE_NAME = process.env.POLYCLAW_SETUP_BASE_NAME || ""; +const SUBSCRIPTION_ID = process.env.POLYCLAW_SETUP_SUBSCRIPTION_ID || ""; +const DEPLOY_KV = process.env.POLYCLAW_SETUP_DEPLOY_KV !== "0"; +const IMAGE_TAG = process.env.POLYCLAW_SETUP_ACA_IMAGE_TAG || "aca"; +const COMPOSE_ADMIN_PORT = 9090; +const BASE_URL = `http://localhost:${COMPOSE_ADMIN_PORT}`; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function log(msg: string): void { + const ts = new Date().toISOString().slice(11, 19); + console.log(`[${ts}] ${msg}`); +} + +function fail(msg: string): never { + console.error(`FATAL: ${msg}`); + process.exit(1); +} + +async function api>( + path: string, + opts?: { method?: string; body?: unknown; timeoutMs?: number }, +): Promise<{ status: number; data: T }> { + const secret = await getAdminSecret(); + const headers: Record = { "Content-Type": "application/json" }; + if (secret) headers["Authorization"] = `Bearer ${secret}`; + + const res = await fetch(`${BASE_URL}${path}`, { + method: opts?.method || "GET", + headers, + body: opts?.body ? JSON.stringify(opts.body) : undefined, + signal: AbortSignal.timeout(opts?.timeoutMs || 30_000), + }); + const data = await res.json().catch(() => null) as T; + return { status: res.status, data }; +} + +async function sleep(ms: number): Promise { + await Bun.sleep(ms); +} + +// --------------------------------------------------------------------------- +// Steps +// --------------------------------------------------------------------------- + +async function stepBuildImages(): Promise { + log("Building Docker image (compose) ..."); + writeAzureOverride(); + + const buildOk = await buildImage((line) => { + if (process.env.VERBOSE) console.log(line); + }); + if (!buildOk) fail("Docker compose build failed"); + + log(`Building linux/amd64 image for ACA (tag=${IMAGE_TAG}) ...`); + const acaOk = await buildAcaImage(IMAGE_TAG, (line) => { + if (process.env.VERBOSE) console.log(line); + }); + if (!acaOk) fail("Docker build (linux/amd64) failed"); +} + +async function stepStartAdminOnly(): Promise { + log("Stopping any existing stack ..."); + try { + await exec(["docker", "compose", "down", "--remove-orphans"], PROJECT_ROOT); + } catch { /* may not be running */ } + + writeAzureOverride(); + + log("Starting admin container only ..."); + const { exitCode, stderr } = await exec( + ["docker", "compose", "up", "-d", "admin"], + PROJECT_ROOT, + ); + if (exitCode !== 0) fail(`docker compose up admin failed (exit ${exitCode}): ${stderr}`); + + log("Waiting for admin health ..."); + const ready = await waitForReady(BASE_URL, 120_000); + if (!ready) fail("Admin did not become healthy within 120s"); + log("Admin is healthy"); +} + +async function stepAzureCheck(): Promise { + log("Checking Azure CLI status ..."); + const deadline = Date.now() + 120_000; + + while (Date.now() < deadline) { + try { + const { status, data } = await api>( + "/api/setup/azure/check", + { timeoutMs: 60_000 }, + ); + if (status === 200 && data) { + const st = data.status; + if (st === "logged_in") { + log(`Azure logged in: ${data.user || "?"} (${data.subscription || "?"})`); + return; + } + if (st === "needs_subscription") { + log("Azure needs subscription selection"); + await stepSetSubscription(); + const { data: d2 } = await api>( + "/api/setup/azure/check", + { timeoutMs: 60_000 }, + ); + if (d2?.status === "logged_in") { + log(`Azure logged in: ${d2.user || "?"} (${d2.subscription || "?"})`); + return; + } + } + } + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + log(`Azure check attempt failed: ${msg} -- retrying ...`); + } + await sleep(5_000); + } + fail("Azure CLI not logged in within 120s -- ensure ~/.azure exists"); +} + +async function stepSetSubscription(): Promise { + if (SUBSCRIPTION_ID) { + log(`Setting subscription: ${SUBSCRIPTION_ID}`); + await api("/api/setup/azure/subscription", { + method: "POST", + body: { subscription_id: SUBSCRIPTION_ID }, + }); + return; + } + + const { data } = await api>>("/api/setup/azure/subscriptions"); + const subs = Array.isArray(data) ? data : []; + if (subs.length === 0) fail("No Azure subscriptions available"); + + const sub = subs[0]; + log(`Auto-selecting subscription: ${sub.name} (${sub.id})`); + await api("/api/setup/azure/subscription", { + method: "POST", + body: { subscription_id: sub.id }, + }); +} + +async function stepDeployFoundry(): Promise> { + log(`Deploying Foundry: rg=${RG} location=${LOCATION} base_name=${BASE_NAME || "(auto)"}`); + const body: Record = { + resource_group: RG, + location: LOCATION, + deploy_key_vault: DEPLOY_KV, + }; + if (BASE_NAME) body.base_name = BASE_NAME; + + const { status, data } = await api>("/api/setup/foundry/deploy", { + method: "POST", + body, + timeoutMs: 480_000, + }); + + if (status !== 200 || data?.status !== "ok") { + fail(`Foundry deploy failed (${status}): ${JSON.stringify(data)}`); + } + + log(`Foundry deployed: endpoint=${data.foundry_endpoint}`); + log(` Models: ${JSON.stringify(data.deployed_models)}`); + if (data.key_vault_url) log(` Key Vault: ${data.key_vault_url}`); + return data; +} + +async function stepDeployAca(): Promise> { + log(`Deploying ACA: rg=${RG} location=${LOCATION} image_tag=${IMAGE_TAG}`); + + const { status, data } = await api>("/api/setup/aca/deploy", { + method: "POST", + body: { + resource_group: RG, + location: LOCATION, + runtime_port: 8080, + admin_port: 9090, + image_tag: IMAGE_TAG, + }, + timeoutMs: 2_700_000, // 45 min + }); + + if (status !== 200 || data?.status !== "ok") { + fail(`ACA deploy failed (${status}): ${JSON.stringify(data)}`); + } + + // Log each step + const steps = (data.steps || []) as Array<{ step: string; status: string; detail?: string }>; + for (const step of steps) { + const icon = step.status === "ok" ? "+" : step.status === "skipped" ? "-" : "!"; + log(` [${icon}] ${step.step}${step.detail ? `: ${step.detail}` : ""}`); + } + + log(`ACA runtime FQDN: ${data.runtime_fqdn}`); + return data; +} + +async function stepWaitForAcaRuntime(): Promise { + log("Waiting for ACA runtime to become ready via admin proxy ..."); + const deadline = Date.now() + 300_000; // 5 min -- ACA cold start can be slow + + while (Date.now() < deadline) { + try { + const res = await fetch(`${BASE_URL}/health`, { signal: AbortSignal.timeout(5_000) }); + if (res.ok) { + log("Admin health OK (runtime proxied)"); + break; + } + } catch { /* not ready */ } + await sleep(5_000); + } + + // ACA cold start -- give extra time + await sleep(10_000); + log("ACA runtime health check passed"); +} + +async function stepChatProbe(): Promise { + log("Sending chat probe via WebSocket ..."); + const secret = await getAdminSecret(); + const wsUrl = secret + ? `ws://localhost:${COMPOSE_ADMIN_PORT}/api/chat/ws?token=${secret}` + : `ws://localhost:${COMPOSE_ADMIN_PORT}/api/chat/ws`; + + const deadline = Date.now() + 300_000; // 5 min -- ACA can be slower + let lastError = ""; + + while (Date.now() < deadline) { + try { + const text = await chatOnce(wsUrl); + if (text) { + log(`Chat probe OK: ${text.slice(0, 100)}`); + return text; + } + } catch (err: unknown) { + lastError = err instanceof Error ? err.message : String(err); + log(`Chat probe failed: ${lastError} -- retrying in 10s`); + } + await sleep(10_000); + } + fail(`Chat probe did not succeed within 300s. Last error: ${lastError}`); +} + +function chatOnce(wsUrl: string): Promise { + return new Promise((resolve, reject) => { + const ws = new WebSocket(wsUrl); + const timeout = setTimeout(() => { + ws.close(); + reject(new Error("Chat response timed out after 90s")); + }, 90_000); + + const chunks: string[] = []; + + ws.onopen = () => { + ws.send(JSON.stringify({ + action: "send", + text: "Reply with exactly: PROBE_OK", + })); + }; + + ws.onmessage = (event) => { + try { + const data = JSON.parse(String(event.data)); + if (data.type === "delta" && data.content) { + chunks.push(data.content); + } else if (data.type === "done" || data.type === "end") { + clearTimeout(timeout); + ws.close(); + resolve(chunks.join("")); + } else if (data.type === "error") { + clearTimeout(timeout); + ws.close(); + reject(new Error(data.content || data.message || "Chat error")); + } + } catch { /* non-JSON */ } + }; + + ws.onerror = () => { + clearTimeout(timeout); + reject(new Error("WebSocket connection error")); + }; + + ws.onclose = () => { + clearTimeout(timeout); + if (chunks.length > 0) resolve(chunks.join("")); + else reject(new Error("WebSocket closed without response")); + }; + }); +} + +async function stepDestroyAca(): Promise { + log("Destroying ACA deployment ..."); + try { + const { status, data } = await api>("/api/setup/aca/destroy", { + method: "POST", + body: {}, + timeoutMs: 120_000, + }); + log(`ACA destroy: ${status} ${JSON.stringify(data)}`); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + log(`ACA destroy failed (best-effort): ${msg}`); + } +} + +async function stepDecommissionFoundry(): Promise { + log(`Decommissioning Foundry: rg=${RG}`); + try { + const { status, data } = await api>("/api/setup/foundry/decommission", { + method: "POST", + body: { resource_group: RG }, + timeoutMs: 480_000, + }); + log(`Decommission: ${status} ${JSON.stringify(data)}`); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + log(`Decommission failed (best-effort): ${msg}`); + } +} + +async function stepAcaRestart(): Promise { + log("Triggering ACA runtime restart ..."); + try { + const { status, data } = await api>("/api/setup/container/restart", { + method: "POST", + body: {}, + timeoutMs: 60_000, + }); + log(`ACA restart: ${status} ${JSON.stringify(data)}`); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + log(`ACA restart failed: ${msg}`); + throw new Error(`ACA restart failed: ${msg}`); + } +} + +// --------------------------------------------------------------------------- +// Main exports +// --------------------------------------------------------------------------- + +export async function runAcaHeadlessSetup(): Promise { + const startTime = Date.now(); + + try { + await stepBuildImages(); + await stepStartAdminOnly(); + await stepAzureCheck(); + await stepDeployFoundry(); + await stepDeployAca(); + await stepWaitForAcaRuntime(); + const probeText = await stepChatProbe(); + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(0); + log("========================================"); + log(`ACA SETUP COMPLETE in ${elapsed}s`); + log(` Chat probe: ${probeText.slice(0, 100)}`); + log("========================================"); + + console.log(JSON.stringify({ + status: "ok", + elapsed_seconds: parseInt(elapsed), + probe_response: probeText.slice(0, 200), + target: "aca", + })); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`\nFATAL: ACA Setup failed: ${msg}`); + process.exit(1); + } +} + +export async function runAcaHeadlessDecommission(): Promise { + await stepDestroyAca(); + await stepDecommissionFoundry(); + // Stop local admin container + log("Stopping admin container ..."); + await stopContainer("polyclaw-admin"); +} + +export async function runAcaHeadlessRestart(): Promise { + await stepAcaRestart(); +} diff --git a/app/tui/src/headless/setup.ts b/app/tui/src/headless/setup.ts new file mode 100644 index 0000000..c73a42c --- /dev/null +++ b/app/tui/src/headless/setup.ts @@ -0,0 +1,332 @@ +/** + * Headless setup mode -- full day-one provisioning without interactive TUI. + * + * Orchestrates: + * 1. Docker build + start + * 2. Wait for admin health + * 3. Azure CLI check + subscription selection + * 4. Foundry deploy (Bicep) + * 5. Wait for runtime BYOK readiness + * 6. Chat probe via WebSocket + * + * Designed for CI and E2E tests -- all output goes to stdout/stderr. + * + * Environment: + * POLYCLAW_SETUP_RG Resource group (default: polyclaw-e2e-rg) + * POLYCLAW_SETUP_LOCATION Azure region (default: eastus) + * POLYCLAW_SETUP_BASE_NAME Cognitive Services base name (auto-generated if empty) + * POLYCLAW_SETUP_SUBSCRIPTION_ID Target subscription ID (picks first if empty) + * ADMIN_SECRET Pre-set admin secret (auto-generated if empty) + */ + +import { + buildImage, + startContainer, + getAdminSecret, + waitForReady, + writeAzureOverride, +} from "../deploy/docker.js"; + +// --------------------------------------------------------------------------- +// Config from environment +// --------------------------------------------------------------------------- + +const RG = process.env.POLYCLAW_SETUP_RG || "polyclaw-e2e-rg"; +const LOCATION = process.env.POLYCLAW_SETUP_LOCATION || "eastus"; +const BASE_NAME = process.env.POLYCLAW_SETUP_BASE_NAME || ""; +const SUBSCRIPTION_ID = process.env.POLYCLAW_SETUP_SUBSCRIPTION_ID || ""; +const DEPLOY_KV = process.env.POLYCLAW_SETUP_DEPLOY_KV !== "0"; +const ADMIN_PORT = parseInt(process.env.ADMIN_PORT || "8080", 10); +const BOT_PORT = parseInt(process.env.BOT_PORT || "3978", 10); +const COMPOSE_ADMIN_PORT = 9090; +const BASE_URL = `http://localhost:${COMPOSE_ADMIN_PORT}`; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function log(msg: string): void { + const ts = new Date().toISOString().slice(11, 19); + console.log(`[${ts}] ${msg}`); +} + +function fail(msg: string): never { + console.error(`FATAL: ${msg}`); + process.exit(1); +} + +async function api>( + path: string, + opts?: { method?: string; body?: unknown; timeoutMs?: number }, +): Promise<{ status: number; data: T }> { + const secret = await getAdminSecret(); + const headers: Record = { "Content-Type": "application/json" }; + if (secret) headers["Authorization"] = `Bearer ${secret}`; + + const res = await fetch(`${BASE_URL}${path}`, { + method: opts?.method || "GET", + headers, + body: opts?.body ? JSON.stringify(opts.body) : undefined, + signal: AbortSignal.timeout(opts?.timeoutMs || 30_000), + }); + const data = await res.json().catch(() => null) as T; + return { status: res.status, data }; +} + +async function sleep(ms: number): Promise { + await Bun.sleep(ms); +} + +// --------------------------------------------------------------------------- +// Steps +// --------------------------------------------------------------------------- + +async function stepBuildAndStart(): Promise { + log("Building Docker image ..."); + writeAzureOverride(); + + const buildOk = await buildImage((line) => { + if (process.env.VERBOSE) console.log(line); + }); + if (!buildOk) fail("Docker build failed"); + + log("Starting Docker stack ..."); + const instanceId = await startContainer(ADMIN_PORT, BOT_PORT, "setup"); + + log("Waiting for admin health ..."); + const ready = await waitForReady(BASE_URL, 120_000); + if (!ready) fail("Admin did not become healthy within 120s"); + + log("Admin is healthy"); + return instanceId; +} + +async function stepAzureCheck(): Promise { + log("Checking Azure CLI status ..."); + const deadline = Date.now() + 120_000; + + while (Date.now() < deadline) { + try { + const { status, data } = await api>( + "/api/setup/azure/check", + { timeoutMs: 60_000 }, + ); + if (status === 200 && data) { + const st = data.status; + if (st === "logged_in") { + log(`Azure logged in: ${data.user || "?"} (${data.subscription || "?"})`); + return; + } + if (st === "needs_subscription") { + log("Azure needs subscription selection"); + await stepSetSubscription(); + // Re-check after setting subscription + const { data: d2 } = await api>( + "/api/setup/azure/check", + { timeoutMs: 60_000 }, + ); + if (d2?.status === "logged_in") { + log(`Azure logged in: ${d2.user || "?"} (${d2.subscription || "?"})`); + return; + } + } + } + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + log(`Azure check attempt failed: ${msg} -- retrying ...`); + } + await sleep(5_000); + } + fail("Azure CLI not logged in within 120s -- ensure ~/.azure exists"); +} + +async function stepSetSubscription(): Promise { + if (SUBSCRIPTION_ID) { + log(`Setting subscription: ${SUBSCRIPTION_ID}`); + await api("/api/setup/azure/subscription", { + method: "POST", + body: { subscription_id: SUBSCRIPTION_ID }, + }); + return; + } + + // Auto-pick first enabled subscription + const { data } = await api>>("/api/setup/azure/subscriptions"); + const subs = Array.isArray(data) ? data : []; + if (subs.length === 0) fail("No Azure subscriptions available"); + + const sub = subs[0]; + log(`Auto-selecting subscription: ${sub.name} (${sub.id})`); + await api("/api/setup/azure/subscription", { + method: "POST", + body: { subscription_id: sub.id }, + }); +} + +async function stepDeployFoundry(): Promise> { + log(`Deploying Foundry: rg=${RG} location=${LOCATION} base_name=${BASE_NAME || "(auto)"}`); + const body: Record = { + resource_group: RG, + location: LOCATION, + deploy_key_vault: DEPLOY_KV, + }; + if (BASE_NAME) body.base_name = BASE_NAME; + + const { status, data } = await api>("/api/setup/foundry/deploy", { + method: "POST", + body, + timeoutMs: 480_000, + }); + + if (status !== 200 || data?.status !== "ok") { + fail(`Foundry deploy failed (${status}): ${JSON.stringify(data)}`); + } + + log(`Foundry deployed: endpoint=${data.foundry_endpoint}`); + log(` Models: ${JSON.stringify(data.deployed_models)}`); + if (data.key_vault_url) log(` Key Vault: ${data.key_vault_url}`); + return data; +} + +async function stepWaitForRuntime(): Promise { + log("Waiting for runtime to become ready (BYOK mode) ..."); + const deadline = Date.now() + 180_000; + + while (Date.now() < deadline) { + try { + const res = await fetch(`${BASE_URL}/health`, { signal: AbortSignal.timeout(3_000) }); + if (res.ok) { + // Check runtime container logs for BYOK marker via admin + // We can't access runtime directly, but the health endpoint works + break; + } + } catch { /* not ready */ } + await sleep(5_000); + } + + // Give RBAC a moment to propagate + await sleep(5_000); + log("Runtime health check passed"); +} + +async function stepChatProbe(): Promise { + log("Sending chat probe via WebSocket ..."); + const secret = await getAdminSecret(); + const wsUrl = secret + ? `ws://localhost:${COMPOSE_ADMIN_PORT}/api/chat/ws?token=${secret}` + : `ws://localhost:${COMPOSE_ADMIN_PORT}/api/chat/ws`; + + const deadline = Date.now() + 180_000; + let lastError = ""; + + while (Date.now() < deadline) { + try { + const text = await chatOnce(wsUrl); + if (text) { + log(`Chat probe OK: ${text.slice(0, 100)}`); + return text; + } + } catch (err: unknown) { + lastError = err instanceof Error ? err.message : String(err); + log(`Chat probe failed: ${lastError} -- retrying in 8s`); + } + await sleep(8_000); + } + fail(`Chat probe did not succeed within 180s. Last error: ${lastError}`); +} + +async function chatOnce(wsUrl: string): Promise { + return new Promise((resolve, reject) => { + const ws = new WebSocket(wsUrl); + const timeout = setTimeout(() => { + ws.close(); + reject(new Error("Chat response timed out after 60s")); + }, 60_000); + + const chunks: string[] = []; + + ws.onopen = () => { + ws.send(JSON.stringify({ + action: "send", + text: "Reply with exactly: PROBE_OK", + })); + }; + + ws.onmessage = (event) => { + try { + const data = JSON.parse(String(event.data)); + if (data.type === "delta" && data.content) { + chunks.push(data.content); + } else if (data.type === "done" || data.type === "end") { + clearTimeout(timeout); + ws.close(); + resolve(chunks.join("")); + } else if (data.type === "error") { + clearTimeout(timeout); + ws.close(); + reject(new Error(data.content || data.message || "Chat error")); + } + } catch { /* non-JSON */ } + }; + + ws.onerror = () => { + clearTimeout(timeout); + reject(new Error("WebSocket connection error")); + }; + + ws.onclose = () => { + clearTimeout(timeout); + if (chunks.length > 0) resolve(chunks.join("")); + else reject(new Error("WebSocket closed without response")); + }; + }); +} + +async function stepDecommission(): Promise { + log(`Decommissioning: rg=${RG}`); + const { status, data } = await api>("/api/setup/foundry/decommission", { + method: "POST", + body: { resource_group: RG }, + timeoutMs: 480_000, + }); + log(`Decommission: ${status} ${JSON.stringify(data)}`); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +export async function runHeadlessSetup(): Promise { + const startTime = Date.now(); + + await stepBuildAndStart(); + + try { + await stepAzureCheck(); + await stepDeployFoundry(); + await stepWaitForRuntime(); + const probeText = await stepChatProbe(); + + const elapsed = ((Date.now() - startTime) / 1000).toFixed(0); + log("========================================"); + log(`SETUP COMPLETE in ${elapsed}s`); + log(` Chat probe: ${probeText.slice(0, 100)}`); + log("========================================"); + + // Output structured result for test consumption + console.log(JSON.stringify({ + status: "ok", + elapsed_seconds: parseInt(elapsed), + probe_response: probeText.slice(0, 200), + })); + } catch (err) { + // On failure, log clearly but leave the stack running for diagnostics + const msg = err instanceof Error ? err.message : String(err); + console.error(`\nFATAL: Setup failed: ${msg}`); + process.exit(1); + } +} + +export async function runHeadlessDecommission(): Promise { + await stepDecommission(); +} diff --git a/app/tui/src/index.ts b/app/tui/src/index.ts index f41f36b..8e6b0fa 100644 --- a/app/tui/src/index.ts +++ b/app/tui/src/index.ts @@ -41,16 +41,28 @@ function usage(): void { console.log(" bot Bot Framework server only (headless)"); console.log(" start Build, start, and print admin URL (scriptable)"); console.log(" run Start stack, send prompt, print response, exit"); + console.log(" setup Headless full setup: build, deploy Foundry, verify chat"); + console.log(" decommission Tear down Azure resources provisioned by setup"); + console.log(" aca-setup Headless ACA setup: build, Foundry + ACA deploy, verify chat"); + console.log(" aca-decommission Tear down ACA + Foundry resources"); + console.log(" aca-restart Restart the ACA runtime container"); + console.log(" aca-setup Headless ACA setup: build, Foundry + ACA deploy, verify chat"); + console.log(" aca-decommission Tear down ACA + Foundry resources"); + console.log(" aca-restart Restart the ACA runtime container"); console.log(" health Check if the stack is running and healthy"); console.log(" stop Stop the running stack"); console.log(""); console.log("Environment:"); console.log(" ADMIN_PORT Admin server port (default: 8080)"); console.log(" BOT_PORT Bot Framework port (default: 3978)"); + console.log(" POLYCLAW_SETUP_RG Resource group for setup (default: polyclaw-e2e-rg)"); + console.log(" POLYCLAW_SETUP_LOCATION Azure region (default: eastus)"); + console.log(" POLYCLAW_SETUP_BASE_NAME Cognitive Services base name (auto if empty)"); + console.log(" POLYCLAW_SETUP_SUBSCRIPTION_ID Target subscription ID (first if empty)"); console.log(""); } -const VALID_MODES = ["admin", "bot", "start", "run", "health", "stop"]; +const VALID_MODES = ["admin", "bot", "start", "run", "setup", "decommission", "aca-setup", "aca-decommission", "aca-restart", "health", "stop"]; // ----------------------------------------------------------------------- // CLI helpers @@ -141,6 +153,39 @@ async function main(): Promise { return; } + // ---- Headless setup mode ----------------------------------------------- + if (mode === "setup") { + const { runHeadlessSetup } = await import("./headless/setup.js"); + await runHeadlessSetup(); + return; + } + + // ---- Headless decommission mode ---------------------------------------- + if (mode === "decommission") { + const { runHeadlessDecommission } = await import("./headless/setup.js"); + await runHeadlessDecommission(); + return; + } + + // ---- ACA headless modes ------------------------------------------------- + if (mode === "aca-setup") { + const { runAcaHeadlessSetup } = await import("./headless/aca_setup.js"); + await runAcaHeadlessSetup(); + return; + } + + if (mode === "aca-decommission") { + const { runAcaHeadlessDecommission } = await import("./headless/aca_setup.js"); + await runAcaHeadlessDecommission(); + return; + } + + if (mode === "aca-restart") { + const { runAcaHeadlessRestart } = await import("./headless/aca_setup.js"); + await runAcaHeadlessRestart(); + return; + } + // ---- Health check (no build, no start) -------------------------------- if (mode === "health") { try { @@ -195,17 +240,29 @@ async function main(): Promise { process.exit(1); } - console.log("Building and starting polyclaw..."); - const instanceId = await ensureStack(adminPort, botPort, (line) => { - // Suppress build output in run mode unless verbose - if (process.env.VERBOSE) console.log(line); - }); - - const { secret } = await resolveAdminUrl(composeAdminPort); const baseUrl = `http://localhost:${composeAdminPort}`; - console.log("Waiting for server..."); - await waitOrDie(baseUrl, instanceId); + // Check if the stack is already running -- skip build/start if so. + let instanceId = ""; + let alreadyRunning = false; + try { + const res = await fetch(`${baseUrl}/health`, { signal: AbortSignal.timeout(3_000) }); + alreadyRunning = res.ok; + } catch { /* not running */ } + + if (alreadyRunning) { + instanceId = "polyclaw-admin"; + } else { + console.log("Building and starting polyclaw..."); + instanceId = await ensureStack(adminPort, botPort, (line) => { + if (process.env.VERBOSE) console.log(line); + }); + + console.log("Waiting for server..."); + await waitOrDie(baseUrl, instanceId); + } + + const { secret } = await resolveAdminUrl(composeAdminPort); // Send the prompt via the chat WebSocket let response = ""; @@ -264,12 +321,12 @@ async function main(): Promise { } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err); console.error(`Chat failed: ${msg}`); - await stopContainer(instanceId); + if (!alreadyRunning) await stopContainer(instanceId); process.exit(1); } console.log(response); - await stopContainer(instanceId); + if (!alreadyRunning) await stopContainer(instanceId); process.exit(0); } diff --git a/entrypoint.sh b/entrypoint.sh index 7df7925..74d24aa 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -134,16 +134,26 @@ if [[ "$MODE" == "runtime" ]]; then fi elif [[ -n "${RUNTIME_SP_APP_ID:-}" && -n "${RUNTIME_SP_PASSWORD:-}" && -n "${RUNTIME_SP_TENANT:-}" ]]; then echo "Runtime (Docker): logging in with scoped service principal..." - if az login --service-principal \ - -u "$RUNTIME_SP_APP_ID" \ - -p "$RUNTIME_SP_PASSWORD" \ - --tenant "$RUNTIME_SP_TENANT" \ - --output none 2>/dev/null; then - echo "Runtime (Docker): Azure CLI authenticated (scoped SP)." - _RUNTIME_AUTH_OK=true - else - echo "Runtime (Docker): WARNING -- service principal login failed. Bot endpoint sync will be unavailable." - fi + _SP_ATTEMPTS=0 + _SP_MAX=3 + while (( _SP_ATTEMPTS < _SP_MAX )); do + (( _SP_ATTEMPTS++ )) || true + if az login --service-principal \ + -u "$RUNTIME_SP_APP_ID" \ + -p "$RUNTIME_SP_PASSWORD" \ + --tenant "$RUNTIME_SP_TENANT" \ + --output none 2>/dev/null; then + echo "Runtime (Docker): Azure CLI authenticated (scoped SP)." + _RUNTIME_AUTH_OK=true + break + fi + if (( _SP_ATTEMPTS < _SP_MAX )); then + echo "Runtime (Docker): SP login attempt $_SP_ATTEMPTS/$_SP_MAX failed -- retrying in 10s (credential propagation)..." + sleep 10 + else + echo "Runtime (Docker): WARNING -- service principal login failed after $_SP_MAX attempts. Bot endpoint sync will be unavailable." + fi + done else echo "Runtime: no identity credentials found. Running without Azure CLI access." echo " Bot endpoint updates will not work until admin provisions a runtime identity." From 1b1f173909c142c065054d16d2733624da1f8dbf Mon Sep 17 00:00:00 2001 From: Aymen Date: Tue, 7 Apr 2026 23:49:39 +0200 Subject: [PATCH 3/5] feat: enhance test cases for identity roles with additional mock responses --- app/runtime/tests/test_identity_routes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/runtime/tests/test_identity_routes.py b/app/runtime/tests/test_identity_routes.py index ff2f8a0..d3e1b12 100644 --- a/app/runtime/tests/test_identity_routes.py +++ b/app/runtime/tests/test_identity_routes.py @@ -255,11 +255,14 @@ async def test_roles_sp_show_fails_uses_app_id(self, mock_cfg) -> None: mock_cfg.runtime_sp_app_id = "app-id" mock_cfg.aca_mi_client_id = "" mock_cfg.runtime_sp_tenant = "" + mock_cfg.env.read.return_value = "polyclaw-rg" az = MagicMock() az.json.side_effect = [ None, # _sp_show fails [{"roleDefinitionName": "Reader", "scope": "/sub/rg", "condition": ""}], + [], # _discover_session_pool (RG-scoped) + [], # _discover_session_pool (subscription-wide) ] routes = IdentityRoutes(az=az) @@ -318,6 +321,8 @@ async def test_fix_skips_without_endpoint(self, mock_cfg, tmp_path) -> None: az = MagicMock() az.json.side_effect = [ {"id": "sp-oid", "objectId": "sp-oid"}, # resolve principal + [], # _discover_cs_resource (RG-scoped) + [], # _discover_cs_resource (subscription-wide) [], # _discover_session_pool (RG-scoped) [], # _discover_session_pool (subscription-wide) ] From 489be2a4a90990076d89ec32afee4dd0797e8243 Mon Sep 17 00:00:00 2001 From: Aymen Date: Wed, 8 Apr 2026 09:12:30 +0200 Subject: [PATCH 4/5] feat: Enhance Polyclaw documentation and infrastructure setup --- docs/content/_index.md | 4 +- docs/content/api/rest.md | 20 +++++ docs/content/architecture/_index.md | 7 +- docs/content/architecture/agent-core.md | 19 ++++- docs/content/architecture/server.md | 1 + docs/content/architecture/services.md | 22 +++++ docs/content/configuration/_index.md | 19 +++-- docs/content/configuration/keyvault.md | 8 +- docs/content/deployment/_index.md | 1 + docs/content/deployment/azure.md | 2 + docs/content/deployment/bicep.md | 85 +++++++++++++++++++ docs/content/deployment/docker.md | 12 ++- docs/content/deployment/runtime-isolation.md | 9 +- docs/content/features/agent-identity.md | 24 +++--- docs/content/features/monitoring.md | 2 + docs/content/getting-started/prerequisites.md | 21 ++--- docs/content/getting-started/quickstart.md | 4 +- docs/content/getting-started/setup-wizard.md | 18 ++-- docs/content/responsible-ai/_index.md | 16 ++-- 19 files changed, 224 insertions(+), 70 deletions(-) create mode 100644 docs/content/deployment/bicep.md diff --git a/docs/content/_index.md b/docs/content/_index.md index fc0a7c1..3816f33 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -20,11 +20,13 @@ title: "Polyclaw Documentation" ![polyclaw web chat interface](/screenshots/web-newchat-try-asking.png) -polyclaw is an autonomous AI copilot built on the **GitHub Copilot SDK**. It messages you on Telegram, uses skills to get things done, and can even call your phone for a real-time voice conversation. Through the Copilot SDK it can write and execute code to solve problems on the spot. It comes with its own web browser that skills can drive to navigate websites and perform tasks. Under the hood the agent is powered by a cron-based scheduler, a plugin ecosystem, a self-extending skill system, and MCP servers. +polyclaw is an autonomous AI copilot built on the **Copilot SDK** with **Foundry BYOK** (Bring Your Own Key) support. It messages you on Telegram, uses skills to get things done, and can even call your phone for a real-time voice conversation. When `FOUNDRY_ENDPOINT` is configured, the agent uses your Azure AI Services resource directly for LLM inference with Entra ID authentication. It can write and execute code to solve problems on the spot. It comes with its own web browser that skills can drive to navigate websites and perform tasks. Under the hood the agent is powered by a cron-based scheduler, a plugin ecosystem, a self-extending skill system, and MCP servers. ### Key Capabilities - **Streaming AI responses** with tool execution and multi-model support +- **Foundry BYOK** -- bring your own Azure AI Services resource with Entra ID authentication +- **Bicep infrastructure** -- one-click provisioning of AI Services, Key Vault, Content Safety, and more - **Multi-channel bot** with adaptive cards, media handling, and proactive messaging - **Realtime voice** using Azure Communication Services and OpenAI Realtime API - **Persistent workspace** -- files, databases, and scripts the agent creates survive across sessions, like a personal drive for your agent diff --git a/docs/content/api/rest.md b/docs/content/api/rest.md index cdca641..1e4daf3 100644 --- a/docs/content/api/rest.md +++ b/docs/content/api/rest.md @@ -499,6 +499,26 @@ Get the current preflight security check status. Re-run all preflight security checks. +## Foundry Infrastructure + +Bicep-based infrastructure provisioning. See [Bicep Infrastructure](/deployment/bicep/) for details. + +### `GET /api/setup/foundry/status` + +Get current Foundry infrastructure deployment status. + +### `POST /api/setup/foundry/deploy` + +Deploy Foundry infrastructure via Bicep. Body: `{ resource_group?, location?, base_name?, deploy_key_vault?, deploy_acs?, deploy_content_safety?, deploy_search?, deploy_embedding_aoai?, deploy_monitoring?, deploy_session_pool?, models? }`. + +### `GET /api/setup/foundry/deploy/stream` + +SSE stream for Bicep deployment progress. Returns step-by-step status updates in real time. + +### `POST /api/setup/foundry/decommission` + +Decommission Foundry infrastructure. Tears down resources deployed by the Bicep template. + ## Content Safety ### `GET /api/content-safety/status` diff --git a/docs/content/architecture/_index.md b/docs/content/architecture/_index.md index 9f45eba..8e9bd01 100644 --- a/docs/content/architecture/_index.md +++ b/docs/content/architecture/_index.md @@ -13,7 +13,8 @@ Polyclaw is a monorepo containing a Python backend, a React frontend, and a Node | Layer | Technology | Role | |---|---|---| -| **Agent Core** | Python, GitHub Copilot SDK | LLM sessions, tool execution, streaming | +| **Agent Core** | Python, Copilot SDK, Foundry BYOK | LLM sessions, tool execution, streaming | +| **Infrastructure** | Bicep, Azure CLI | AI Services, Key Vault, Content Safety provisioning | | **Web Admin** | React 19, Vite, TypeScript | SPA dashboard with WebSocket chat | | **Bot Endpoint** | aiohttp, Bot Framework SDK | Teams, Telegram, and other channels | | **Voice** | Azure Communication Services, OpenAI Realtime | Phone call routing and real-time speech | @@ -82,14 +83,14 @@ All persistent state is stored as JSON files under `~/.polyclaw/` (configurable The system is organized into these runtime modules: -- **`agent/`** -- Copilot SDK wrapper, tools, prompt builder +- **`agent/`** -- Copilot SDK wrapper, BYOK provider, tools, prompt builder - **`config/`** -- Settings singleton, environment loading - **`media/`** -- MIME classification, attachment handling - **`messaging/`** -- Bot, cards, commands, proactive delivery - **`realtime/`** -- Voice routes, ACS middleware, auth - **`registries/`** -- Plugin and skill registries - **`server/`** -- aiohttp app, routes, middleware, chat handler -- **`services/`** -- Tunnel, deployer, Key Vault, Azure CLI wrapper +- **`services/`** -- Tunnel, deployer, Bicep deployer, Key Vault, Azure CLI wrapper - **`state/`** -- Session, memory, profile, MCP config, proactive state Dive deeper: diff --git a/docs/content/architecture/agent-core.md b/docs/content/architecture/agent-core.md index 07930f3..bf65e90 100644 --- a/docs/content/architecture/agent-core.md +++ b/docs/content/architecture/agent-core.md @@ -5,7 +5,7 @@ weight: 1 # Agent Core -The agent core is the heart of Polyclaw. It wraps the GitHub Copilot SDK to provide streaming AI conversations with tool execution. +The agent core is the heart of Polyclaw. It wraps the GitHub Copilot SDK to provide streaming AI conversations with tool execution. When `FOUNDRY_ENDPOINT` is configured, the agent operates in BYOK (Bring Your Own Key) mode, routing LLM inference through your Azure AI Services resource with Entra ID bearer tokens. ## CopilotAgent @@ -27,12 +27,13 @@ Each session is configured with: | Parameter | Description | |---|---| -| `model` | LLM model identifier (default: `claude-sonnet-4-20250514`) | +| `model` | LLM model identifier (default: `gpt-4.1`) | | `streaming` | Enable token-by-token streaming | | `tools` | List of callable tool definitions | | `system_message` | System prompt assembled by the prompt builder | | `mcp_servers` | MCP server configurations to attach | | `skill_dirs` | Skill directories to load | +| `provider` | BYOK provider config (when `FOUNDRY_ENDPOINT` is set) | ### Timeouts and Retries @@ -81,12 +82,22 @@ Located in `app/runtime/agent/prompt.py`, `build_system_prompt()` assembles the Prompt templates are Markdown files stored in `app/runtime/templates/`. They support variable interpolation for agent profile fields and dynamic MCP server listings. +## BYOK Mode + +When `FOUNDRY_ENDPOINT` is set in the configuration, the agent activates BYOK mode. Located in `app/runtime/agent/byok.py`, this module: + +1. Acquires an Entra ID bearer token via `az account get-access-token --resource https://cognitiveservices.azure.com` +2. Configures the Copilot SDK session with a custom provider (`type: azure`, `base_url: `, `bearer_token: `) +3. Overrides the session model and provider for every conversation + +The runtime service principal (or managed identity) must have the `Cognitive Services OpenAI User` role on the AI Services resource. The fix-roles endpoint (`POST /api/identity/fix-roles`) assigns this role automatically if missing. + ## Multi-Model Support -Polyclaw supports any model available through the Copilot SDK. The default model is configured via `COPILOT_MODEL`. Users can switch models at runtime: +Polyclaw supports any model deployed on the configured Azure AI Services resource (BYOK mode) or available through the Copilot SDK. The default model is configured via `COPILOT_MODEL` (default: `gpt-4.1`). Users can switch models at runtime: - Via slash command: `/model ` - Via the web dashboard model selector - Via API: `GET /api/models` lists available models -The memory system uses a separate model (`MEMORY_MODEL`) for consolidation tasks. +The memory system uses a separate model (`MEMORY_MODEL`, default: `gpt-4.1`) for consolidation tasks. diff --git a/docs/content/architecture/server.md b/docs/content/architecture/server.md index e6f99f6..53570ed 100644 --- a/docs/content/architecture/server.md +++ b/docs/content/architecture/server.md @@ -84,6 +84,7 @@ Routes are split between admin-only and runtime-only handlers. | Handler | Prefix | Purpose | |---|---|---| | `SetupRoutes` | `/api/setup/` | Setup wizard, lockdown toggle | +| `FoundryDeployRoutes` | `/api/setup/foundry/` | Bicep-based Foundry infrastructure deployment | | `VoiceSetupRoutes` | `/api/voice/setup/` | Voice configuration | | `WorkspaceHandler` | `/api/workspace/` | Workspace files | | `EnvironmentRoutes` | `/api/environments/` | Deployment environments | diff --git a/docs/content/architecture/services.md b/docs/content/architecture/services.md index 65bbc85..27cddaf 100644 --- a/docs/content/architecture/services.md +++ b/docs/content/architecture/services.md @@ -80,6 +80,26 @@ Channel configuration (Telegram) is applied as part of `provision()` when a toke Key operations: `deploy(req)`, `destroy(deploy_id)`, `status()`, `restart()`. +## Bicep Deployer + +**Module**: `app/runtime/services/deployment/bicep_deployer.py` + +`BicepDeployer` replaces ad-hoc Azure CLI provisioning with a single `az deployment group create` driven by `infra/main.bicep`. All resource creation is parameterised from internal config state. + +| Feature | Description | +|---|---| +| Foundry (AI Services) | Deploys Azure AI Services with configurable model deployments (gpt-4.1, gpt-5, gpt-5-mini) | +| Key Vault | Provisions Key Vault with firewall rules and access policies | +| Content Safety | Optional Azure AI Content Safety resource | +| Session Pool | Optional Azure Container Apps session pool for sandbox execution | +| ACS | Optional Azure Communication Services for voice | +| Azure AI Search | Optional search index for Foundry IQ memory | +| Monitoring | Optional Application Insights and Log Analytics workspace | +| Idempotent | Tracks deployments in `DeployStateStore` for safe re-runs | +| Streaming | Provides step-by-step progress via SSE stream endpoint | + +Key operations: `deploy(req)`, `deploy_stream(req, callback)`, `decommission(deploy_id)`. + ## Runtime Identity Provisioner **Module**: `app/runtime/services/runtime_identity.py` @@ -99,6 +119,8 @@ RBAC roles granted (scoped to the resource group): | Reader | Enumerate resources in the resource group | | Key Vault Secrets Officer | Read/write bot credentials stored in Key Vault | | Azure ContainerApps Session Executor | Invoke ACA Dynamic Sessions for code execution | +| Cognitive Services User | Call Content Safety APIs (Prompt Shields, content moderation) | +| Cognitive Services OpenAI User | BYOK inference on the Foundry AI Services resource | Key operations: `provision(resource_group)`, `revoke()`, `provision_managed_identity()`, `revoke_managed_identity()`, `status()`. diff --git a/docs/content/configuration/_index.md b/docs/content/configuration/_index.md index 8c0dc62..1706ef7 100644 --- a/docs/content/configuration/_index.md +++ b/docs/content/configuration/_index.md @@ -9,8 +9,7 @@ Polyclaw is configured through environment variables loaded from a `.env` file o | Variable | Default | Description | |---|---|---| -| `GITHUB_TOKEN` | -- | GitHub PAT with Copilot access. Supports `@kv:` prefix. | -| `COPILOT_MODEL` | `claude-sonnet-4.6` | Default LLM model for conversations | +| `COPILOT_MODEL` | `gpt-4.1` | Default LLM model for conversations | | `COPILOT_AGENT` | -- | Optional Copilot agent name | | `ADMIN_PORT` | `9090` | Admin server listen port | | `ADMIN_SECRET` | -- | Bearer token for API authentication. Supports `@kv:` prefix. | @@ -18,6 +17,16 @@ Polyclaw is configured through environment variables loaded from a `.env` file o | `DOTENV_PATH` | -- | Custom path to `.env` file | | `POLYCLAW_SERVER_MODE` | `combined` | Server mode: `combined`, `admin`, or `runtime` | +## Foundry (BYOK) + +When `FOUNDRY_ENDPOINT` is set, Polyclaw operates in Bring Your Own Key (BYOK) mode. The agent uses your Azure AI Services resource directly instead of the GitHub Copilot SDK backend. Authentication is handled via `az account get-access-token` with Entra ID bearer tokens. + +| Variable | Default | Description | +|---|---|---| +| `FOUNDRY_ENDPOINT` | -- | Azure AI Services endpoint (e.g. `https://.cognitiveservices.azure.com/`). Enables BYOK mode when set. | +| `FOUNDRY_NAME` | -- | Display name of the Foundry resource | +| `FOUNDRY_RESOURCE_GROUP` | -- | Resource group containing the Foundry resource | + ## Bot Framework | Variable | Default | Description | @@ -43,7 +52,7 @@ Polyclaw is configured through environment variables loaded from a `.env` file o | Variable | Default | Description | |---|---|---| -| `MEMORY_MODEL` | `claude-sonnet-4.6` | Model used for memory consolidation | +| `MEMORY_MODEL` | `gpt-4.1` | Model used for memory consolidation | | `MEMORY_IDLE_MINUTES` | `5` | Minutes of inactivity before memory formation triggers | ## Proactive Messaging @@ -84,13 +93,13 @@ All paths are computed relative to `POLYCLAW_DATA_DIR`: ## Secret Resolution -The following environment variables support `@kv:` prefix resolution from Azure Key Vault: `GITHUB_TOKEN`, `ADMIN_SECRET`, `BOT_APP_PASSWORD`, `ACS_CONNECTION_STRING`, `AZURE_OPENAI_API_KEY`. The Docker entrypoint additionally resolves all `@kv:` prefixed variables via a shell-level pass. +The following environment variables support `@kv:` prefix resolution from Azure Key Vault: `ADMIN_SECRET`, `BOT_APP_PASSWORD`, `ACS_CONNECTION_STRING`, `AZURE_OPENAI_API_KEY`. The Docker entrypoint additionally resolves all `@kv:` prefixed variables via a shell-level pass. For example: ```bash -GITHUB_TOKEN=@kv:polyclaw-github-token ADMIN_SECRET=@kv:polyclaw-admin-secret +BOT_APP_PASSWORD=@kv:polyclaw-bot-password ``` This requires `KEY_VAULT_URL` to be set and valid Azure credentials (via `az login` or managed identity). diff --git a/docs/content/configuration/keyvault.md b/docs/content/configuration/keyvault.md index 244f793..77d4e9e 100644 --- a/docs/content/configuration/keyvault.md +++ b/docs/content/configuration/keyvault.md @@ -5,6 +5,8 @@ weight: 1 # Key Vault Integration +> **Warning:** Key Vault integration does not yet cover all secret variables reliably. This is actively being worked on. + Polyclaw integrates with Azure Key Vault to separate sensitive credentials from the agent's working data. The `.env` file still holds non-secret configuration, but secrets are stored in Key Vault instead. The agent can still read resolved secrets at runtime, so Key Vault does not hide them from the LLM. The value is in keeping secrets out of the workspace filesystem, which reduces the risk of accidentally copying, committing, or leaking them alongside regular configuration and data. ## Configuration @@ -24,8 +26,8 @@ KEY_VAULT_RG=my-rg ```bash az keyvault secret set \ --vault-name polyclaw-kv \ - --name github-token \ - --value "ghp_xxxxxxxxxxxx" + --name admin-secret \ + --value "your-admin-secret" ``` ### Reference Secrets @@ -33,7 +35,6 @@ az keyvault secret set \ In your `.env` file, use `@kv:` prefixed values: ```bash -GITHUB_TOKEN=@kv:github-token BOT_APP_PASSWORD=@kv:bot-app-password ADMIN_SECRET=@kv:admin-secret ACS_CONNECTION_STRING=@kv:acs-connection @@ -70,7 +71,6 @@ When saving settings through the admin API: The following variables support in-process `@kv:` resolution: -- `GITHUB_TOKEN` - `BOT_APP_PASSWORD` - `ADMIN_SECRET` - `ACS_CONNECTION_STRING` diff --git a/docs/content/deployment/_index.md b/docs/content/deployment/_index.md index b4bba77..182106f 100644 --- a/docs/content/deployment/_index.md +++ b/docs/content/deployment/_index.md @@ -11,6 +11,7 @@ Polyclaw supports two deployment targets, both managed through the TUI or CLI to |---|---| | [Local Docker](/deployment/docker/) | Builds the image locally and runs a container on your machine. The container lifecycle is tied to the TUI process. | | [Azure Container Apps](/deployment/azure/) | **Experimental.** Pushes the image to Azure Container Registry and deploys a persistent Container App with optional Bot Service, ACS, and Key Vault integration. | +| [Bicep Infrastructure](/deployment/bicep/) | Provisions Azure AI Services, Key Vault, Content Safety, session pools, and monitoring via Bicep templates from the web dashboard. | | [Runtime Isolation](/deployment/runtime-isolation/) | Separated admin and agent runtime architecture with least-privilege managed identity. | Both targets are selected through the TUI deployment picker when you run `./scripts/run-tui.sh`. The TUI handles the build, push, deploy, and health-check steps automatically. diff --git a/docs/content/deployment/azure.md b/docs/content/deployment/azure.md index 693641d..f018e9e 100644 --- a/docs/content/deployment/azure.md +++ b/docs/content/deployment/azure.md @@ -9,6 +9,8 @@ weight: 2 When you select **Azure Container Apps** in the TUI target picker, the TUI provisions all Azure infrastructure, pushes the image, and deploys a persistent Container App. Unlike Local Docker, the container keeps running after you exit the TUI. +For provisioning Azure AI Services, Key Vault, Content Safety, and other foundational infrastructure, see [Bicep Infrastructure](/deployment/bicep/). The ACA deployer provisions container hosting; the Bicep deployer provisions AI and supporting resources. + ## Prerequisites - **Azure CLI** (`az`) installed and logged in diff --git a/docs/content/deployment/bicep.md b/docs/content/deployment/bicep.md new file mode 100644 index 0000000..5a60869 --- /dev/null +++ b/docs/content/deployment/bicep.md @@ -0,0 +1,85 @@ +--- +title: "Bicep Infrastructure" +weight: 4 +--- + +# Bicep Infrastructure + +Polyclaw provisions Azure infrastructure through a single Bicep template (`infra/main.bicep`) driven by `az deployment group create`. This replaces ad-hoc Azure CLI provisioning with a declarative, parameterised approach. + +## How It Works + +The **Deploy Infrastructure** button in the Setup Wizard (or the `POST /api/setup/foundry/deploy` API) triggers a Bicep deployment. The `BicepDeployer` service assembles parameters from internal config state and runs `az deployment group create` against the template. + +Progress is streamed in real time via the `GET /api/setup/foundry/deploy/stream` SSE endpoint. + +## What Gets Provisioned + +Each resource block in the Bicep template is gated by a boolean flag. Callers enable only the subset they need. + +| Resource | Flag | Default | Description | +|---|---|---|---| +| Azure AI Services (Foundry) | `deploy_foundry` | enabled | AI Services account with model deployments (gpt-4.1, gpt-5, gpt-5-mini) | +| Key Vault | `deploy_key_vault` | enabled | Centralized secret management with firewall rules | +| Azure AI Content Safety | `deploy_content_safety` | disabled | Prompt Shields and content moderation | +| Azure Container Apps Session Pool | `deploy_session_pool` | disabled | Sandboxed code execution | +| Azure Communication Services | `deploy_acs` | disabled | Inbound and outbound voice calls | +| Azure AI Search | `deploy_search` | disabled | Search index for Foundry IQ memory | +| Azure OpenAI (Embedding) | `deploy_embedding_aoai` | disabled | Embedding model for Foundry IQ | +| Application Insights | `deploy_monitoring` | disabled | Distributed tracing and log analytics | + +## Model Deployments + +The Foundry AI Services resource deploys models as configured in the request. The default set is: + +| Model | Version | SKU | Capacity | +|---|---|---|---| +| gpt-4.1 | 2025-04-14 | GlobalStandard | 10 | +| gpt-5 | 2025-08-07 | GlobalStandard | 10 | +| gpt-5-mini | 2025-08-07 | GlobalStandard | 10 | + +Custom model lists can be passed in the deploy request body. + +## Deployment Parameters + +| Parameter | Default | Description | +|---|---|---| +| `resource_group` | `polyclaw-rg` | Target resource group | +| `location` | `eastus` | Azure region | +| `base_name` | auto-generated | Base name for all resources (generates unique suffix) | + +## API Endpoints + +| Method | Path | Description | +|---|---|---| +| `GET` | `/api/setup/foundry/status` | Current deployment status | +| `POST` | `/api/setup/foundry/deploy` | Trigger a Bicep deployment | +| `GET` | `/api/setup/foundry/deploy/stream` | SSE stream for deployment progress | +| `POST` | `/api/setup/foundry/decommission` | Tear down deployed resources | + +## Post-Deployment + +After a successful Bicep deployment, the deployer writes the following to the `.env` file: + +- `FOUNDRY_ENDPOINT` -- the AI Services endpoint URL +- `FOUNDRY_NAME` -- the resource display name +- `FOUNDRY_RESOURCE_GROUP` -- the resource group name +- `KEY_VAULT_URL` -- the Key Vault URL (if deployed) + +The runtime container picks up these values on restart and activates BYOK mode when `FOUNDRY_ENDPOINT` is present. + +## RBAC + +The Bicep deployment itself runs under your personal Azure CLI session on the admin container. After deployment, the runtime service principal needs the `Cognitive Services OpenAI User` role on the AI Services resource to perform BYOK inference. Use the **Fix Roles** button on the Agent Identity page (or `POST /api/identity/fix-roles`) to assign missing roles automatically. + +## Idempotent Re-runs + +Deployments are tracked in `DeployStateStore`. Re-running a deployment updates existing resources rather than creating duplicates. The Bicep template uses Azure Resource Manager's built-in idempotency. + +## Decommissioning + +`POST /api/setup/foundry/decommission` tears down resources created by the Bicep template. The deployment record is removed from the local store. + +## Template Location + +The Bicep template is at `infra/main.bicep` in the repository root. The compiled ARM template (`infra/main.json`) is also committed for environments where `az bicep` is not available. diff --git a/docs/content/deployment/docker.md b/docs/content/deployment/docker.md index 6e6dd88..4781572 100644 --- a/docs/content/deployment/docker.md +++ b/docs/content/deployment/docker.md @@ -31,9 +31,7 @@ The Dockerfile uses a two-stage build: The image includes everything the agent needs to operate: -- **GitHub Copilot CLI** (`@github/copilot`) -- the agent engine -- **GitHub CLI** (`gh`) -- authentication -- **Azure CLI** (`az`) -- infrastructure provisioning and bot registration +- **Azure CLI** (`az`) -- infrastructure provisioning, Bicep deployments, BYOK token acquisition - **Cloudflare tunnel** (`cloudflared`) -- automatic public endpoint for webhooks - **Playwright MCP + Chromium** -- headless browser for web-based skills - **Python runtime** -- the Polyclaw server, agent, and all backend services @@ -53,7 +51,7 @@ The TUI creates two Docker named volumes that persist across restarts: | Volume | Mount | Container | Contents | |---|---|---|---| | `polyclaw-data` | `/data` | both | Agent config, `.env`, skills, plugins, memory, scheduler state | -| `polyclaw-admin-home` | `/admin-home` | admin only | GitHub and Azure CLI authentication state | +| `polyclaw-admin-home` | `/admin-home` | admin only | Azure CLI authentication state | Because these are named Docker volumes, your data survives even when the containers are stopped and recreated on the next TUI launch. @@ -61,11 +59,11 @@ Because these are named Docker volumes, your data survives even when the contain Each container runs the same entrypoint script, which branches on `POLYCLAW_MODE`: -1. Sets `HOME` based on container mode: `/admin-home` (admin container) or `/runtime-home` (runtime container) -2. Cleans stale Copilot CLI runtime cache (keeps only the matching version) +1. Sets `HOME` based on container mode: `/admin-home` (admin container), `/runtime-home` (runtime container), or `/data` (combined/legacy) +2. Symlinks the Bicep binary into `$AZURE_CONFIG_DIR/bin` so `az bicep` works regardless of HOME 3. Loads environment variables from the shared persisted `.env` file 4. Resolves any `@kv:` Key Vault secret references (if configured) -5. Authenticates the runtime container's Azure identity (service principal or managed identity) +5. Authenticates the runtime container's Azure identity (service principal with retries, or managed identity) 6. Starts the server: `polyclaw-admin --admin-only` (admin) or `polyclaw-admin --runtime-only` (runtime) ## What Happens on Exit diff --git a/docs/content/deployment/runtime-isolation.md b/docs/content/deployment/runtime-isolation.md index 897b852..f032824 100644 --- a/docs/content/deployment/runtime-isolation.md +++ b/docs/content/deployment/runtime-isolation.md @@ -13,10 +13,10 @@ Polyclaw separates the admin plane from the agent runtime into independent conta ## Container Split -| Container | Port | Purpose | GitHub Token | Admin Secret | -|-----------|------|---------|-------------|-------------| -| **Admin** | 9090 | UI, configuration, deployment, MCP management, identity provisioning | Yes | Yes | -| **Runtime** | 8080 (internal) / 3978 (Bot webhook) | Agent execution, tool invocation, chat, bot webhook | No | No | +| Container | Port | Purpose | Admin Secret | +|-----------|------|---------|-------------| +| **Admin** | 9090 | UI, configuration, deployment, MCP management, identity provisioning | Yes | +| **Runtime** | 8080 (internal) / 3978 (Bot webhook) | Agent execution, tool invocation, chat, bot webhook | No | Both containers share a `/data` volume for session data and configuration. Each has its own HOME directory (`/admin-home` and `/runtime-home` respectively). @@ -133,7 +133,6 @@ The admin container proxies unmatched `/api/*` requests to the runtime container The security preflight checker validates the separated runtime setup: - HOME directories are separated (`secret_admin_cli_isolated`) -- GitHub token is not present in the runtime environment (`secret_no_github_runtime`) - Runtime identity exists and has valid credentials - RBAC assignments are correct and scoped to resource group level - No elevated roles are assigned diff --git a/docs/content/features/agent-identity.md b/docs/content/features/agent-identity.md index 3a2e63c..f19495c 100644 --- a/docs/content/features/agent-identity.md +++ b/docs/content/features/agent-identity.md @@ -15,7 +15,7 @@ Polyclaw provisions a dedicated Azure identity for the agent runtime with least- In earlier versions, the agent shared your Azure CLI session. Every Azure API call the agent made carried your personal credentials. If your account could delete a resource group, so could the agent. -The agent identity model changes this. The runtime container authenticates as a service principal (Docker) or user-assigned managed identity (Azure Container Apps) with only the roles it needs. Your personal Azure session stays on the admin container, which handles configuration and deployment. The runtime never sees your GitHub token, admin secret, or personal Azure credentials. +The agent identity model changes this. The runtime container authenticates as a service principal (Docker) or user-assigned managed identity (Azure Container Apps) with only the roles it needs. In BYOK mode, the runtime uses Entra ID bearer tokens to call the Foundry AI Services endpoint. Your personal Azure session stays on the admin container, which handles configuration and deployment. The runtime never sees your admin secret or personal Azure credentials. --- @@ -40,8 +40,7 @@ The runtime identity is assigned the minimum roles required for agent operation: | Reader | Resource group | Enumerate resources in the group | | Key Vault Secrets Officer | Key Vault resource | Read and write secrets (bot credentials, env vars) | | Azure ContainerApps Session Executor | Session pool | Execute code in sandbox sessions (if sandbox is configured) | -| Cognitive Services User | Content Safety resource | Call Prompt Shields and content moderation APIs (if content safety is configured) | - +| Cognitive Services User | Content Safety resource | Call Prompt Shields and content moderation APIs (if content safety is configured) || Cognitive Services OpenAI User | AI Services resource | BYOK inference on the Foundry endpoint (if `FOUNDRY_ENDPOINT` is configured) | No elevated roles (Owner, Contributor, User Access Administrator, Role Based Access Control Administrator) are assigned. The [security preflight checker](/features/guardrails/) verifies this and warns if any elevated roles are detected. --- @@ -50,10 +49,10 @@ No elevated roles (Owner, Contributor, User Access Administrator, Role Based Acc The separated container architecture enforces credential separation at the filesystem level: -| Container | HOME Directory | GitHub Token | Admin Secret | Azure CLI Session | -|-----------|---------------|-------------|-------------|-------------------| -| **Admin** | `/admin-home` | Yes | Yes | Personal (your identity) | -| **Runtime** | `/runtime-home` | No | No | Service principal or managed identity | +| Container | HOME Directory | Admin Secret | Azure CLI Session | +|-----------|---------------|-------------|------------------| +| **Admin** | `/admin-home` | Yes | Personal (your identity) | +| **Runtime** | `/runtime-home` | No | Service principal or managed identity | The runtime container authenticates using its provisioned identity on startup. It tries managed identity first (when running on Azure Container Apps), falls back to service principal (Docker), and degrades gracefully if neither is available. @@ -83,7 +82,7 @@ The identity inspection API provides read-only and remediation operations: - `GET /api/identity/info` -- resolved identity details (strategy, app ID, display name, principal ID) - `GET /api/identity/roles` -- full RBAC assignment list with per-role compliance checks -- `POST /api/identity/fix-roles` -- assign missing required roles (Content Safety, Session Executor) +- `POST /api/identity/fix-roles` -- assign missing required roles (Content Safety, Foundry OpenAI User, Session Executor) --- @@ -99,13 +98,12 @@ The security preflight checker validates the identity configuration with evidenc **RBAC checks:** - Role assignments can be enumerated -- Required roles (Bot Contributor, Reader, KV access, Session Executor, Cognitive Services User) are present +- Required roles (Bot Contributor, Reader, KV access, Session Executor, Cognitive Services User, Cognitive Services OpenAI User) are present - No elevated roles are assigned - All assignments are scoped to the resource group level or below (no subscription or management group scope) **Secret isolation checks:** - HOME directories are separated between admin and runtime -- GitHub token is not present in the runtime environment - Bot credentials, admin secret, and ACS callback tokens are properly configured - Key Vault is reachable - SP credentials are persisted to the environment file @@ -114,8 +112,8 @@ Each check produces a pass, fail, warn, or skip status along with the raw eviden --- -## GitHub Authentication +## Foundry BYOK Authentication -GitHub authentication is still required for polyclaw to function. The Copilot SDK is the agent's reasoning engine, and it requires a valid GitHub token. This authentication is handled by the **admin container** and is not shared with the runtime. +When `FOUNDRY_ENDPOINT` is configured, the runtime authenticates with Azure AI Services using Entra ID bearer tokens. The service principal (or managed identity) must have the `Cognitive Services OpenAI User` role on the AI Services resource. The fix-roles endpoint (`POST /api/identity/fix-roles`) resolves the Foundry resource from the endpoint hostname and assigns this role automatically if missing. -The plan is to revisit GitHub authentication in a future release to explore alternative authentication flows. +The BYOK provider acquires tokens via `az account get-access-token --resource https://cognitiveservices.azure.com` and passes them to the Copilot SDK session configuration. diff --git a/docs/content/features/monitoring.md b/docs/content/features/monitoring.md index 4be0079..2b9ef52 100644 --- a/docs/content/features/monitoring.md +++ b/docs/content/features/monitoring.md @@ -34,6 +34,8 @@ From the Infrastructure Settings page, select the **Monitoring** tab and choose The connection string is automatically written to the agent configuration. No manual setup required. +Alternatively, monitoring resources can be provisioned as part of the Bicep infrastructure deployment by enabling the `deploy_monitoring` flag. See [Bicep Infrastructure](/deployment/bicep/) for details. + ### Connect Existing If you already have an Application Insights resource, paste its connection string in the **Connect Existing** section. The agent validates the connection string format before saving. diff --git a/docs/content/getting-started/prerequisites.md b/docs/content/getting-started/prerequisites.md index 0560efe..bbc45c2 100644 --- a/docs/content/getting-started/prerequisites.md +++ b/docs/content/getting-started/prerequisites.md @@ -13,10 +13,17 @@ These are needed regardless of which deployment target you choose. |---|---|---| | [Bun](https://bun.sh) | latest | Runs the TUI (`app/tui`) | | [Docker](https://www.docker.com/) | 20+ | Builds and runs the Polyclaw container | +| [Azure CLI](https://aka.ms/installazurecli) (`az`) | 2.60+ | Infrastructure provisioning, Foundry authentication, Bicep deployments | | Git | any | Cloning the repository | The TUI installs its own Node dependencies automatically via `bun install` on first run. +Log in to Azure before launching the TUI: + +```bash +az login +``` + > The container image includes Python, Node.js, the frontend build, and all runtime dependencies. You do not need to install them on your host machine. ## Optional -- Azure Container Apps Target @@ -25,17 +32,10 @@ If you want to deploy to Azure instead of running locally, you also need: | Dependency | Purpose | |---|---| -| [Azure CLI](https://aka.ms/installazurecli) (`az`) | Provisioning ACA, ACR, storage, and VNet resources | | Azure subscription | Hosting the Container App and associated resources | The TUI checks for `az` availability and login status automatically. If `az` is not found or you are not logged in, the ACA target is disabled in the picker with a descriptive message. -Log in before launching the TUI: - -```bash -az login -``` - ## Optional -- Extended Features These are not required for basic operation but enable additional capabilities once polyclaw is running. Items marked **auto-deployed** are set up automatically during the initial deployment; the rest require manual configuration. @@ -45,11 +45,12 @@ These are not required for basic operation but enable additional capabilities on | Cloudflare CLI (`cloudflared`) | Tunnel to expose bot endpoint | **auto-deployed** | | Playwright (`npx playwright install chromium`) | Browser automation MCP server | **auto-deployed** | | Azure Bot Service | Telegram channel messaging | **auto-deployed** | +| Azure AI Services (Foundry) | LLM inference in BYOK mode | Bicep deploy | | Azure Communication Services | Inbound and outbound voice calls | manual | -| Azure Key Vault | Centralized secret management | manual | -| Azure Container Apps Dynamic Sessions | Sandboxed code execution | manual | +| Azure Key Vault | Centralized secret management | Bicep deploy | +| Azure Container Apps Dynamic Sessions | Sandboxed code execution | Bicep deploy | +| Azure AI Content Safety | Prompt Shields and content moderation | Bicep deploy | | Azure OpenAI | Realtime voice model (gpt-4o-realtime) | manual | -| GitHub CLI (`gh`) | GitHub MCP server plugin | manual | These services are configured through the TUI setup screen or the web dashboard after initial deployment. diff --git a/docs/content/getting-started/quickstart.md b/docs/content/getting-started/quickstart.md index 70a398f..e44ecfd 100644 --- a/docs/content/getting-started/quickstart.md +++ b/docs/content/getting-started/quickstart.md @@ -76,7 +76,9 @@ The following services are deployed automatically during this step: - **Playwright browser** -- headless browser for web-based skills - **Bot Service** -- Bot Framework registration for Telegram and other channels -All other integrations (voice via ACS, Key Vault secrets, additional MCP servers) are optional and can be configured later through the [Setup Wizard](/getting-started/setup-wizard/) or [Configuration](/configuration/). +All other integrations (Foundry BYOK, voice via ACS, Key Vault secrets, additional MCP servers) are optional and can be configured later through the [Setup Wizard](/getting-started/setup-wizard/) or [Configuration](/configuration/). + +To deploy Azure AI Services infrastructure (Foundry, Key Vault, Content Safety, etc.) use the **Deploy Infrastructure** button in the Setup Wizard or Bicep deployment. See [Bicep Infrastructure](/deployment/bicep/) for details. ![TUI interactive chat](/screenshots/tui-chat.png) diff --git a/docs/content/getting-started/setup-wizard.md b/docs/content/getting-started/setup-wizard.md index 58d956b..0b6be54 100644 --- a/docs/content/getting-started/setup-wizard.md +++ b/docs/content/getting-started/setup-wizard.md @@ -15,11 +15,11 @@ Once polyclaw finishes building and passes its health check, the TUI automatical

Understand what these logins mean

-

Both Azure Login and GitHub Login are required during setup. You cannot skip either one at this stage.

+

Azure Login is required during setup.

-

GitHub Login authenticates with GitHub Copilot. The Copilot SDK is the agent’s reasoning engine—without it, polyclaw cannot function. This authentication must remain active for the lifetime of the agent. Your GitHub account determines which Copilot models and rate limits are available.

+

Azure Login signs you in with the Azure CLI. During setup, your Azure identity is used to provision infrastructure (Foundry AI Services, Bot Service, Key Vault, etc.) via Bicep templates. After setup, the agent runtime operates under its own Azure identity—a service principal (Docker) or user-assigned managed identity (Azure Container Apps) with least-privilege RBAC. See Agent Identity for details.

-

Azure Login signs you in with the Azure CLI. During setup, your Azure identity is used to provision infrastructure (Bot Service, Container Registry, Key Vault, etc.). After setup, the agent runtime operates under its own Azure identity—a service principal (Docker) or user-assigned managed identity (Azure Container Apps) with least-privilege RBAC. See Agent Identity for details.

+

Foundry BYOK (Bring Your Own Key) is the default authentication mode. When FOUNDRY_ENDPOINT is configured, the agent uses your Azure AI Services resource directly for LLM inference. The runtime authenticates via Entra ID bearer tokens (az account get-access-token) and requires the Cognitive Services OpenAI User role on the Foundry resource.

The runtime identity is scoped to:

    @@ -27,22 +27,22 @@ Once polyclaw finishes building and passes its health check, the TUI automatical
  • Reader on the resource group (enumerate resources)
  • Key Vault access (read/write secrets)
  • Session Executor (if sandbox is configured)
  • +
  • Cognitive Services User (Content Safety, if configured)
  • +
  • Cognitive Services OpenAI User (Foundry AI Services, for BYOK inference)

No elevated roles (Owner, Contributor, User Access Administrator) are assigned to the runtime. The security preflight checker verifies this. Your personal Azure CLI session remains on the admin container and is not shared with the runtime.

To further limit exposure, enable Guardrails to require human approval before the agent executes high-risk tools. Enable Sandbox Execution to redirect code execution to isolated Azure Container Apps sessions.

-Status indicators for Azure, GitHub, and tunnel connectivity. Each can be initiated directly from this page: +Status indicators for Azure and tunnel connectivity. Each can be initiated directly from this page: - **Azure Login** -- opens device-code flow for Azure CLI authentication - **Azure Logout** -- signs out of the current Azure CLI session -- **GitHub Login** -- authenticates with GitHub Copilot via device code -- **Set GitHub Token** -- manually configure a GitHub PAT - **Start Tunnel** -- starts a Cloudflare tunnel to expose the bot endpoint publicly

You can sign out of Azure after setup

-

Your personal Azure CLI session is used during setup for provisioning infrastructure and the runtime identity. Once the runtime identity is provisioned (service principal or managed identity), the agent authenticates independently. If you sign out of Azure on the admin container, core agent functionality (chat, skills, scheduling) continues to work. Operations that require your personal Azure CLI session (e.g., provisioning new infrastructure) will fail until you sign back in.

+

Your personal Azure CLI session is used during setup for provisioning infrastructure and the runtime identity. Once the runtime identity is provisioned (service principal or managed identity), the agent authenticates independently using Entra ID bearer tokens for BYOK inference and scoped RBAC for Azure resource management. If you sign out of Azure on the admin container, core agent functionality (chat, skills, scheduling) continues to work. Operations that require your personal Azure CLI session (e.g., provisioning new infrastructure) will fail until you sign back in.

### Bot Configuration @@ -63,13 +63,13 @@ A form for configuring the Bot Framework deployment: ### Infrastructure Actions - **Save Configuration** -- persists bot and channel settings -- **Deploy Infrastructure** -- provisions Azure Bot Service, channels, and related resources +- **Deploy Infrastructure** -- provisions Azure infrastructure via Bicep (AI Services, Key Vault, Content Safety, etc.). See [Bicep Infrastructure](/deployment/bicep/). - **Deploy Content Safety** -- provisions Azure AI Content Safety for Prompt Shields integration (recommended) - **Provision Agent Identity** -- creates the runtime service principal or managed identity with least-privilege RBAC - **Decommission Infrastructure** -- tears down deployed Azure resources - **Run Preflight Checks** -- validates bot credentials, JWT, tunnel, endpoint auth, channel security, identity, and RBAC - **Run Security Preflight** -- comprehensive evidence-based validation of identity, RBAC roles, secret isolation, and credential separation -- **Run Smoke Test** -- end-to-end connectivity test for Copilot +- **Run Smoke Test** -- end-to-end connectivity test ![Preflight checks](/screenshots/web-infra-preflight.png) diff --git a/docs/content/responsible-ai/_index.md b/docs/content/responsible-ai/_index.md index b849091..8498101 100644 --- a/docs/content/responsible-ai/_index.md +++ b/docs/content/responsible-ai/_index.md @@ -9,7 +9,7 @@ Polyclaw is in **early preview**. Treat it as experimental software and read thi ## Understand the Risks -Polyclaw is an autonomous agent. The agent runtime is architecturally separated from the admin plane and operates under its **own Azure managed identity** with least-privilege RBAC -- it does not share your personal Azure credentials. However, it can still execute code, deploy infrastructure, send messages, and make phone calls within the scope of its assigned roles. GitHub authentication remains a prerequisite for the Copilot SDK. +Polyclaw is an autonomous agent. The agent runtime is architecturally separated from the admin plane and operates under its **own Azure managed identity** with least-privilege RBAC -- it does not share your personal Azure credentials. However, it can still execute code, deploy infrastructure, send messages, and make phone calls within the scope of its assigned roles. **What can go wrong:** @@ -18,7 +18,7 @@ Polyclaw is an autonomous agent. The agent runtime is architecturally separated - **Cost overruns.** The agent can spin up Azure resources, make API calls, and schedule recurring tasks. Without monitoring, a runaway loop could generate unexpected cloud bills. - **Code execution.** The agent can execute arbitrary code in the runtime container or in a [sandbox](/features/sandbox/). [Guardrails](/features/guardrails/) can require human approval before code execution occurs. - **Data leakage.** Conversations, files, and tool outputs pass through the Copilot SDK and any configured channels. Sensitive data in your workspace could be included in agent context unintentionally. -- **Availability of external services.** The agent depends on the GitHub Copilot SDK, Azure services, and third-party APIs. Outages in any of these can cause failures or degraded behavior. +- **Availability of external services.** The agent depends on Azure AI Services (Foundry BYOK), Azure services, and third-party APIs. Outages in any of these can cause failures or degraded behavior. This is not a theoretical list. These are real failure modes of autonomous agents. You should be comfortable with these risks before deploying Polyclaw in any environment that matters. @@ -48,18 +48,18 @@ The agent runtime operates under its own Azure identity rather than your persona | Service Principal | Docker / Docker Compose | `polyclaw-runtime` SP with client secret | | Managed Identity | Azure Container Apps | `polyclaw-runtime-mi` user-assigned MI | -The runtime identity is assigned least-privilege RBAC roles (Bot Service Contributor, Reader, Key Vault access, Session Executor). No elevated roles (Owner, Contributor, User Access Administrator, Role Based Access Control Administrator) are assigned. The security preflight checker verifies this. +The runtime identity is assigned least-privilege RBAC roles (Bot Service Contributor, Reader, Key Vault access, Session Executor, Cognitive Services OpenAI User). No elevated roles (Owner, Contributor, User Access Administrator, Role Based Access Control Administrator) are assigned. The security preflight checker verifies this. ### Separated Admin and Agent Runtime The application is split into two containers to enforce credential isolation: -| Container | Purpose | GitHub Token | Admin Secret | Azure Identity | -|-----------|---------|-------------|-------------|----------------| -| **Admin** | UI, configuration, deployment | Yes | Yes | Your personal CLI session | -| **Runtime** | Agent execution, tool invocation | No | No | Service principal or managed identity | +| Container | Purpose | Admin Secret | Azure Identity | +|-----------|---------|-------------|----------------| +| **Admin** | UI, configuration, deployment | Yes | Your personal CLI session | +| **Runtime** | Agent execution, tool invocation | No | Service principal or managed identity | -Each container has its own HOME directory. The runtime container never sees the GitHub token, admin secret, or personal Azure credentials. +Each container has its own HOME directory. The runtime container never sees the admin secret or personal Azure credentials. ### Guardrails From 4dfd43124d7b414951309e5bfd762737192a5efe Mon Sep 17 00:00:00 2001 From: Aymen Date: Wed, 8 Apr 2026 10:02:05 +0200 Subject: [PATCH 5/5] feat: Update README with Foundry BYOK support --- README.md | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4d8a007..bd0b2a0 100644 --- a/README.md +++ b/README.md @@ -19,9 +19,23 @@ --- -> **Warning:** Polyclaw is an autonomous agent. It can execute code, deploy infrastructure, send messages to real people, and make phone calls. The agent runtime is architecturally separated from the admin plane and operates under its **own Azure managed identity** with least-privilege RBAC -- it does **not** share your personal Azure credentials. GitHub authentication is still a prerequisite (the Copilot SDK is the agent's reasoning engine). Understand the [risks](https://aymenfurter.github.io/polyclaw/responsible-ai/) before running it. +## What's New -Polyclaw is an autonomous AI copilot built on the **GitHub Copilot SDK**. It gives you the full power of GitHub Copilot -- untethered from the IDE. It writes code, interacts with your repos via the GitHub CLI, authors its own skills at runtime, reaches out to you proactively when something matters, schedules tasks for the future, and can even call you on the phone for urgent matters. +**Foundry BYOK (Bring Your Own Key).** Polyclaw now supports using your own Azure AI Services resource for LLM inference. Set `FOUNDRY_ENDPOINT` and the agent authenticates via Entra ID bearer tokens -- no GitHub token required. The runtime service principal gets the `Cognitive Services OpenAI User` role automatically via the fix-roles endpoint. + +**Bicep infrastructure deployment.** A single `infra/main.bicep` template replaces scattered Azure CLI provisioning. Deploy AI Services (with model deployments like gpt-4.1), Key Vault, Content Safety, session pools, monitoring, and more from the Setup Wizard or API. All resource creation is parameterised and idempotent. + +**Default model changed to gpt-4.1.** Both `COPILOT_MODEL` and `MEMORY_MODEL` now default to `gpt-4.1`. + +**Improved container HOME isolation.** The entrypoint now sets HOME to `/admin-home`, `/runtime-home`, or `/data` depending on container mode, with Bicep binary symlinking so `az bicep` works in all modes. + +**Headless TUI setup.** New `app/tui/src/headless/` modules for non-interactive setup and ACA provisioning. + +--- + +> **Warning:** Polyclaw is an autonomous agent. It can execute code, deploy infrastructure, send messages to real people, and make phone calls. The agent runtime is architecturally separated from the admin plane and operates under its **own Azure managed identity** with least-privilege RBAC -- it does **not** share your personal Azure credentials. Understand the [risks](https://aymenfurter.github.io/polyclaw/responsible-ai/) before running it. + +Polyclaw is an autonomous AI copilot built on the **Copilot SDK** with **Foundry BYOK** support. It gives you the full power of AI inference through your own Azure AI Services resource -- untethered from the IDE. It writes code, authors its own skills at runtime, reaches out to you proactively when something matters, schedules tasks for the future, and can even call you on the phone for urgent matters. ## Why Polyclaw? @@ -93,9 +107,8 @@ For full setup instructions, configuration reference, and feature guides, see th ## Prerequisites - Docker -- A GitHub account with a Copilot subscription -- An Azure subscription (needed for voice, bot channels, and Foundry integration) -- [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) (if deploying to Azure) +- [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) (`az login` required) +- An Azure subscription (for Foundry BYOK inference, voice, bot channels, and infrastructure provisioning) ## Security, Governance & Responsible AI @@ -103,7 +116,7 @@ Polyclaw is in **early preview**. Treat it as experimental software and read thi ### Understand the Risks -Polyclaw is an autonomous agent. The agent runtime is architecturally separated from the admin plane and operates under its **own Azure managed identity** with least-privilege RBAC -- it does **not** share your personal Azure credentials. However, it can still execute code, deploy infrastructure, send messages, and make phone calls within the scope of its assigned roles. GitHub authentication remains a prerequisite for using the Copilot SDK. +Polyclaw is an autonomous agent. The agent runtime is architecturally separated from the admin plane and operates under its **own Azure managed identity** with least-privilege RBAC -- it does **not** share your personal Azure credentials. However, it can still execute code, deploy infrastructure, send messages, and make phone calls within the scope of its assigned roles. **What can go wrong:** unintended actions from misunderstood instructions, credential exposure via prompt injection or badly written skills, cost overruns from runaway loops provisioning Azure resources, arbitrary code execution without human review, and data leakage through conversations and tool outputs passing through configured channels.