diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts index 32e0f76..b37e33b 100644 --- a/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts +++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/route.ts @@ -178,7 +178,7 @@ import AgentSessionService from 'server/services/agentSession'; * properties: * stage: * type: string - * enum: [create_session, connect_runtime] + * enum: [create_session, connect_runtime, attach_services] * title: * type: string * message: diff --git a/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts b/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts index 2d2bed9..0ae4ba0 100644 --- a/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts +++ b/src/app/api/v2/ai/agent/sessions/[sessionId]/services/route.ts @@ -215,7 +215,7 @@ function isRequestedSessionServiceRef(value: unknown): value is RequestedAgentSe * properties: * stage: * type: string - * enum: [create_session, connect_runtime] + * enum: [create_session, connect_runtime, attach_services] * title: * type: string * message: diff --git a/src/app/api/v2/ai/agent/sessions/route.ts b/src/app/api/v2/ai/agent/sessions/route.ts index 86b75e5..c0f1f7b 100644 --- a/src/app/api/v2/ai/agent/sessions/route.ts +++ b/src/app/api/v2/ai/agent/sessions/route.ts @@ -325,7 +325,7 @@ async function resolveRequestedServices( * properties: * stage: * type: string - * enum: [create_session, connect_runtime] + * enum: [create_session, connect_runtime, attach_services] * title: * type: string * message: @@ -520,7 +520,7 @@ async function resolveRequestedServices( * properties: * stage: * type: string - * enum: [create_session, connect_runtime] + * enum: [create_session, connect_runtime, attach_services] * title: * type: string * message: diff --git a/src/server/lib/agentSession/__tests__/podFactory.test.ts b/src/server/lib/agentSession/__tests__/podFactory.test.ts index a01bd0a..22928b0 100644 --- a/src/server/lib/agentSession/__tests__/podFactory.test.ts +++ b/src/server/lib/agentSession/__tests__/podFactory.test.ts @@ -50,10 +50,13 @@ jest.mock('server/lib/logger', () => ({ import { buildSessionWorkspacePodSpec, + createSessionWorkspacePodWithoutWaiting, createSessionWorkspacePod, deleteSessionWorkspacePod, SessionWorkspacePodOptions, SESSION_WORKSPACE_GATEWAY_PORT_NAME, + waitForSessionWorkspacePodReady, + waitForSessionWorkspacePodScheduled, } from '../podFactory'; const baseOpts: SessionWorkspacePodOptions = { @@ -859,6 +862,15 @@ describe('podFactory', () => { }); describe('createSessionWorkspacePod', () => { + it('creates pod without waiting when requested explicitly', async () => { + mockCreatePod.mockResolvedValue({ body: { metadata: { name: 'agent-abc123' } } }); + + await createSessionWorkspacePodWithoutWaiting(baseOpts); + + expect(mockCreatePod).toHaveBeenCalledTimes(1); + expect(mockReadPod).not.toHaveBeenCalled(); + }); + it('creates pod via K8s API', async () => { mockCreatePod.mockResolvedValue({ body: { metadata: { name: 'agent-abc123' } } }); @@ -1001,6 +1013,63 @@ describe('podFactory', () => { }) ).rejects.toThrow('Session workspace pod did not become ready within 1ms'); }); + + it('returns once the pod is scheduled even before readiness succeeds', async () => { + mockReadPod + .mockResolvedValueOnce({ + body: { + spec: { + nodeName: 'worker-a', + }, + status: { + phase: 'Pending', + }, + }, + }) + .mockResolvedValue({ + body: { + status: { + phase: 'Running', + conditions: [{ type: 'Ready', status: 'True' }], + }, + }, + }); + + await expect(waitForSessionWorkspacePodScheduled('test-ns', 'agent-abc123')).resolves.toEqual( + expect.objectContaining({ + spec: expect.objectContaining({ + nodeName: 'worker-a', + }), + }) + ); + }); + + it('times out when the pod never gets a node assignment', async () => { + mockReadPod.mockResolvedValue({ + body: { + status: { + phase: 'Pending', + }, + }, + }); + + await expect( + waitForSessionWorkspacePodScheduled('test-ns', 'agent-abc123', { + timeoutMs: 1, + pollMs: 0, + }) + ).rejects.toThrow('Session workspace pod was not scheduled within 1ms'); + }); + + it('keeps the ready wait available as a standalone helper', async () => { + await expect(waitForSessionWorkspacePodReady('test-ns', 'agent-abc123')).resolves.toEqual( + expect.objectContaining({ + status: expect.objectContaining({ + conditions: [{ type: 'Ready', status: 'True' }], + }), + }) + ); + }); }); describe('deleteSessionWorkspacePod', () => { diff --git a/src/server/lib/agentSession/podFactory.ts b/src/server/lib/agentSession/podFactory.ts index 29c1d31..869010a 100644 --- a/src/server/lib/agentSession/podFactory.ts +++ b/src/server/lib/agentSession/podFactory.ts @@ -758,20 +758,77 @@ function summarizeLogLine(logs: string | null): string | null { return firstLine || null; } -async function waitForSessionWorkspacePodReady( +function resolveSessionWorkspacePodWaitConfig(readiness?: SessionWorkspacePodOptions['readiness']): { + timeoutMs: number; + pollMs: number; +} { + return { + timeoutMs: + normalizeNonNegativeInteger(readiness?.timeoutMs) ?? + normalizeNonNegativeInteger(process.env.AGENT_SESSION_WORKSPACE_READY_TIMEOUT_MS) ?? + 60000, + pollMs: + normalizeNonNegativeInteger(readiness?.pollMs) ?? + normalizeNonNegativeInteger(process.env.AGENT_SESSION_WORKSPACE_READY_POLL_MS) ?? + 1000, + }; +} + +function getFailingPodStatusContainerName(pod: k8s.V1Pod): string | null { + return ( + [...(pod.status?.initContainerStatuses || []), ...(pod.status?.containerStatuses || [])].find((status) => { + const waiting = status.state?.waiting; + if ( + waiting?.reason && + [ + 'ErrImagePull', + 'ImagePullBackOff', + 'CrashLoopBackOff', + 'CreateContainerConfigError', + 'RunContainerError', + ].includes(waiting.reason) + ) { + return true; + } + + const terminated = status.state?.terminated; + return !!(terminated?.reason && terminated.exitCode !== 0); + })?.name || null + ); +} + +async function throwIfSessionWorkspacePodFailedToStart( + coreApi: k8s.CoreV1Api, + namespace: string, + podName: string, + pod: k8s.V1Pod +): Promise { + const failure = getPodStartupFailure(pod); + if (!failure) { + return; + } + + const failingContainer = getFailingPodStatusContainerName(pod); + const containerLogs = failingContainer ? await getContainerLogs(coreApi, namespace, podName, failingContainer) : null; + + if (containerLogs) { + getLogger().error( + { namespace, podName, containerName: failingContainer, logs: containerLogs }, + `Session: startup logs captured containerName=${failingContainer} namespace=${namespace} podName=${podName}` + ); + } + + const logSummary = summarizeLogLine(containerLogs); + throw new Error(`Session workspace pod failed to start: ${failure}${logSummary ? ` - ${logSummary}` : ''}`); +} + +async function waitForSessionWorkspacePodReadyInternal( coreApi: k8s.CoreV1Api, namespace: string, podName: string, readiness?: SessionWorkspacePodOptions['readiness'] ): Promise { - const readyTimeoutMs = - normalizeNonNegativeInteger(readiness?.timeoutMs) ?? - normalizeNonNegativeInteger(process.env.AGENT_SESSION_WORKSPACE_READY_TIMEOUT_MS) ?? - 60000; - const readyPollMs = - normalizeNonNegativeInteger(readiness?.pollMs) ?? - normalizeNonNegativeInteger(process.env.AGENT_SESSION_WORKSPACE_READY_POLL_MS) ?? - 1000; + const { timeoutMs: readyTimeoutMs, pollMs: readyPollMs } = resolveSessionWorkspacePodWaitConfig(readiness); const deadline = Date.now() + readyTimeoutMs; let lastObservedState = 'pending'; let lastPod: k8s.V1Pod | null = null; @@ -779,41 +836,7 @@ async function waitForSessionWorkspacePodReady( while (Date.now() < deadline) { const { body: pod } = await coreApi.readNamespacedPod(podName, namespace); lastPod = pod; - const failure = getPodStartupFailure(pod); - if (failure) { - const failingContainer = - [...(pod.status?.initContainerStatuses || []), ...(pod.status?.containerStatuses || [])].find((status) => { - const waiting = status.state?.waiting; - if ( - waiting?.reason && - [ - 'ErrImagePull', - 'ImagePullBackOff', - 'CrashLoopBackOff', - 'CreateContainerConfigError', - 'RunContainerError', - ].includes(waiting.reason) - ) { - return true; - } - - const terminated = status.state?.terminated; - return !!(terminated?.reason && terminated.exitCode !== 0); - })?.name || null; - - const containerLogs = failingContainer - ? await getContainerLogs(coreApi, namespace, podName, failingContainer) - : null; - if (containerLogs) { - getLogger().error( - { namespace, podName, containerName: failingContainer, logs: containerLogs }, - `Session: startup logs captured containerName=${failingContainer} namespace=${namespace} podName=${podName}` - ); - } - - const logSummary = summarizeLogLine(containerLogs); - throw new Error(`Session workspace pod failed to start: ${failure}${logSummary ? ` - ${logSummary}` : ''}`); - } + await throwIfSessionWorkspacePodFailedToStart(coreApi, namespace, podName, pod); if (isPodReady(pod)) { return pod; @@ -842,7 +865,7 @@ async function waitForSessionWorkspacePodReady( ); } -export async function createSessionWorkspacePod(opts: SessionWorkspacePodOptions): Promise { +export async function createSessionWorkspacePodWithoutWaiting(opts: SessionWorkspacePodOptions): Promise { const logger = getLogger(); const coreApi = getCoreApi(); const pod = buildSessionWorkspacePodSpec(opts); @@ -864,8 +887,46 @@ export async function createSessionWorkspacePod(opts: SessionWorkspacePodOptions throw error; } +} + +export async function waitForSessionWorkspacePodScheduled( + namespace: string, + podName: string, + readiness?: SessionWorkspacePodOptions['readiness'] +): Promise { + const coreApi = getCoreApi(); + const { timeoutMs, pollMs } = resolveSessionWorkspacePodWaitConfig(readiness); + const deadline = Date.now() + timeoutMs; + let lastObservedState = 'pending'; + + while (Date.now() < deadline) { + const { body: pod } = await coreApi.readNamespacedPod(podName, namespace); + await throwIfSessionWorkspacePodFailedToStart(coreApi, namespace, podName, pod); + + if (pod.spec?.nodeName) { + return pod; + } + + lastObservedState = summarizePodState(pod); + await sleep(pollMs); + } + + throw new Error(`Session workspace pod was not scheduled within ${timeoutMs}ms: ${lastObservedState}`); +} + +export async function waitForSessionWorkspacePodReady( + namespace: string, + podName: string, + readiness?: SessionWorkspacePodOptions['readiness'] +): Promise { + return waitForSessionWorkspacePodReadyInternal(getCoreApi(), namespace, podName, readiness); +} + +export async function createSessionWorkspacePod(opts: SessionWorkspacePodOptions): Promise { + const logger = getLogger(); + await createSessionWorkspacePodWithoutWaiting(opts); - const result = await waitForSessionWorkspacePodReady(coreApi, opts.namespace, opts.podName, opts.readiness); + const result = await waitForSessionWorkspacePodReady(opts.namespace, opts.podName, opts.readiness); logger.info(`Session: workspace pod ready podName=${opts.podName} namespace=${opts.namespace}`); return result; } diff --git a/src/server/lib/agentSession/startupFailureState.ts b/src/server/lib/agentSession/startupFailureState.ts index 3a4f994..883f8f5 100644 --- a/src/server/lib/agentSession/startupFailureState.ts +++ b/src/server/lib/agentSession/startupFailureState.ts @@ -20,7 +20,7 @@ const AGENT_SESSION_STARTUP_FAILURE_REDIS_PREFIX = 'lifecycle:agent:session:star const AGENT_SESSION_STARTUP_FAILURE_TTL_SECONDS = 60 * 60; const AGENT_SESSION_STARTUP_FAILURE_MESSAGE_MAX_LENGTH = 4000; -export type AgentSessionStartupFailureStage = 'create_session' | 'connect_runtime'; +export type AgentSessionStartupFailureStage = 'create_session' | 'connect_runtime' | 'attach_services'; export interface AgentSessionStartupFailureState { sessionId: string; @@ -112,7 +112,12 @@ function classifyFailure( } return { - title: stage === 'create_session' ? 'Agent session failed to start' : 'Session workspace connection failed', + title: + stage === 'create_session' + ? 'Agent session failed to start' + : stage === 'attach_services' + ? 'Attached services failed to start' + : 'Session workspace connection failed', message, }; } diff --git a/src/server/services/__tests__/agentSession.test.ts b/src/server/services/__tests__/agentSession.test.ts index 7d727d9..1064a23 100644 --- a/src/server/services/__tests__/agentSession.test.ts +++ b/src/server/services/__tests__/agentSession.test.ts @@ -208,7 +208,13 @@ import Build from 'server/models/Build'; import Deploy from 'server/models/Deploy'; import { createAgentPvc, deleteAgentPvc } from 'server/lib/agentSession/pvcFactory'; import { createAgentApiKeySecret, deleteAgentApiKeySecret } from 'server/lib/agentSession/apiKeySecretFactory'; -import { createSessionWorkspacePod, deleteSessionWorkspacePod } from 'server/lib/agentSession/podFactory'; +import { + createSessionWorkspacePod, + createSessionWorkspacePodWithoutWaiting, + deleteSessionWorkspacePod, + waitForSessionWorkspacePodReady, + waitForSessionWorkspacePodScheduled, +} from 'server/lib/agentSession/podFactory'; import { createSessionWorkspaceService, deleteSessionWorkspaceService, @@ -288,6 +294,9 @@ const mockDisableDevMode = jest.fn().mockResolvedValue(undefined); (createAgentPvc as jest.Mock).mockResolvedValue({}); (createAgentApiKeySecret as jest.Mock).mockResolvedValue({}); (createSessionWorkspacePod as jest.Mock).mockResolvedValue({ spec: { nodeName: 'agent-node-a' } }); +(createSessionWorkspacePodWithoutWaiting as jest.Mock).mockResolvedValue(undefined); +(waitForSessionWorkspacePodReady as jest.Mock).mockResolvedValue({ spec: { nodeName: 'agent-node-a' } }); +(waitForSessionWorkspacePodScheduled as jest.Mock).mockResolvedValue({ spec: { nodeName: 'agent-node-a' } }); (createSessionWorkspaceService as jest.Mock).mockResolvedValue({}); (ensureAgentSessionServiceAccount as jest.Mock).mockResolvedValue('agent-sa'); (deleteSessionWorkspacePod as jest.Mock).mockResolvedValue(undefined); @@ -401,6 +410,9 @@ describe('AgentSessionService', () => { (createAgentPvc as jest.Mock).mockResolvedValue({}); (createAgentApiKeySecret as jest.Mock).mockResolvedValue({}); (createSessionWorkspacePod as jest.Mock).mockResolvedValue({ spec: { nodeName: 'agent-node-a' } }); + (createSessionWorkspacePodWithoutWaiting as jest.Mock).mockResolvedValue(undefined); + (waitForSessionWorkspacePodReady as jest.Mock).mockResolvedValue({ spec: { nodeName: 'agent-node-a' } }); + (waitForSessionWorkspacePodScheduled as jest.Mock).mockResolvedValue({ spec: { nodeName: 'agent-node-a' } }); (createSessionWorkspaceService as jest.Mock).mockResolvedValue({}); (ensureAgentSessionServiceAccount as jest.Mock).mockResolvedValue('agent-sa'); (deleteSessionWorkspacePod as jest.Mock).mockResolvedValue(undefined); @@ -615,12 +627,15 @@ describe('AgentSessionService', () => { }) ); expect(createAgentPvc).not.toHaveBeenCalled(); - expect(createSessionWorkspacePod).toHaveBeenCalledWith( + expect(createSessionWorkspacePod).not.toHaveBeenCalled(); + expect(createSessionWorkspacePodWithoutWaiting).toHaveBeenCalledWith( expect.objectContaining({ pvcName: 'agent-prewarm-pvc-1234', skipWorkspaceBootstrap: true, }) ); + expect(waitForSessionWorkspacePodScheduled).toHaveBeenCalledWith('test-ns', 'agent-build-123', undefined); + expect(waitForSessionWorkspacePodReady).toHaveBeenCalledWith('test-ns', 'agent-build-123', undefined); expect(mockSessionQuery.insertAndFetch).toHaveBeenCalledWith( expect.objectContaining({ pvcName: 'agent-prewarm-pvc-1234', @@ -739,7 +754,8 @@ describe('AgentSessionService', () => { }) ); expect(createAgentPvc).not.toHaveBeenCalled(); - expect(createSessionWorkspacePod).toHaveBeenCalledWith( + expect(createSessionWorkspacePod).not.toHaveBeenCalled(); + expect(createSessionWorkspacePodWithoutWaiting).toHaveBeenCalledWith( expect.objectContaining({ pvcName: 'agent-prewarm-pvc-5678', skipWorkspaceBootstrap: true, @@ -752,6 +768,8 @@ describe('AgentSessionService', () => { ]), }) ); + expect(waitForSessionWorkspacePodScheduled).toHaveBeenCalledWith('test-ns', 'agent-build-123', undefined); + expect(waitForSessionWorkspacePodReady).toHaveBeenCalledWith('test-ns', 'agent-build-123', undefined); }); it('passes resolved agent-session resources through to pod creation when provided', async () => { @@ -1007,6 +1025,97 @@ describe('AgentSessionService', () => { await expect(createPromise).resolves.toEqual(expect.objectContaining({ status: 'active' })); }); + it('starts attached services after scheduling but before the agent pod is ready for prewarmed same-node sessions', async () => { + const scheduled = createDeferred<{ spec: { nodeName: string } }>(); + const ready = createDeferred<{ spec: { nodeName: string } }>(); + + mockGetCompatibleReadyPrewarm.mockResolvedValue({ + uuid: 'prewarm-1', + pvcName: 'agent-prewarm-pvc-1234', + services: ['web'], + status: 'ready', + }); + (waitForSessionWorkspacePodScheduled as jest.Mock).mockImplementationOnce(() => scheduled.promise); + (waitForSessionWorkspacePodReady as jest.Mock).mockImplementationOnce(() => ready.promise); + + const optsWithServices: CreateSessionOptions = { + ...baseOpts, + buildUuid: 'build-123', + services: [ + { + name: 'web', + deployId: 1, + resourceName: 'web-build-uuid', + devConfig: { image: 'node:20', command: 'pnpm dev' }, + }, + ], + }; + + const createPromise = AgentSessionService.createSession(optsWithServices); + await new Promise((resolve) => setImmediate(resolve)); + + expect(createSessionWorkspacePodWithoutWaiting).toHaveBeenCalled(); + expect(mockEnableDevMode).not.toHaveBeenCalled(); + + scheduled.resolve({ spec: { nodeName: 'agent-node-a' } }); + await new Promise((resolve) => setImmediate(resolve)); + + expect(mockEnableDevMode).toHaveBeenCalledWith( + expect.objectContaining({ + deploymentName: 'web-build-uuid', + requiredNodeName: 'agent-node-a', + }) + ); + expect(mockSessionQuery.patch).not.toHaveBeenCalledWith(expect.objectContaining({ status: 'active' })); + + ready.resolve({ spec: { nodeName: 'agent-node-a' } }); + + await expect(createPromise).resolves.toEqual(expect.objectContaining({ status: 'active' })); + }); + + it('starts attached services immediately for prewarmed sessions when same-node placement is disabled', async () => { + const ready = createDeferred<{ spec: { nodeName: string } }>(); + + mockGetCompatibleReadyPrewarm.mockResolvedValue({ + uuid: 'prewarm-1', + pvcName: 'agent-prewarm-pvc-1234', + services: ['web'], + status: 'ready', + }); + (waitForSessionWorkspacePodReady as jest.Mock).mockImplementationOnce(() => ready.promise); + + const optsWithServices: CreateSessionOptions = { + ...baseOpts, + buildUuid: 'build-123', + keepAttachedServicesOnSessionNode: false, + services: [ + { + name: 'web', + deployId: 1, + resourceName: 'web-build-uuid', + devConfig: { image: 'node:20', command: 'pnpm dev' }, + }, + ], + }; + + const createPromise = AgentSessionService.createSession(optsWithServices); + await new Promise((resolve) => setImmediate(resolve)); + + expect(createSessionWorkspacePodWithoutWaiting).toHaveBeenCalled(); + expect(waitForSessionWorkspacePodScheduled).not.toHaveBeenCalled(); + expect(mockEnableDevMode).toHaveBeenCalledWith( + expect.objectContaining({ + deploymentName: 'web-build-uuid', + requiredNodeName: undefined, + }) + ); + expect(mockSessionQuery.patch).not.toHaveBeenCalledWith(expect.objectContaining({ status: 'active' })); + + ready.resolve({ spec: { nodeName: 'agent-node-a' } }); + + await expect(createPromise).resolves.toEqual(expect.objectContaining({ status: 'active' })); + }); + it('does not pin services to the session node when same-node placement is disabled', async () => { const optsWithServices: CreateSessionOptions = { ...baseOpts, @@ -1036,6 +1145,36 @@ describe('AgentSessionService', () => { ); }); + it('persists attach-service failures under the attach_services startup stage', async () => { + mockGetCompatibleReadyPrewarm.mockResolvedValue({ + uuid: 'prewarm-1', + pvcName: 'agent-prewarm-pvc-1234', + services: ['web'], + status: 'ready', + }); + mockEnableDevMode.mockRejectedValueOnce(new Error('service attach failed')); + + const optsWithServices: CreateSessionOptions = { + ...baseOpts, + buildUuid: 'build-123', + keepAttachedServicesOnSessionNode: false, + services: [ + { + name: 'web', + deployId: 1, + resourceName: 'web-build-uuid', + devConfig: { image: 'node:20', command: 'pnpm dev' }, + }, + ], + }; + + await expect(AgentSessionService.createSession(optsWithServices)).rejects.toThrow('service attach failed'); + + const startupFailurePayload = JSON.parse(mockRedis.setex.mock.calls[0][2]); + expect(startupFailurePayload.stage).toBe('attach_services'); + expect(startupFailurePayload.title).toBe('Attached services failed to start'); + }); + it('restores successful sibling services when one parallel dev-mode enable fails', async () => { const optsWithServices: CreateSessionOptions = { ...baseOpts, diff --git a/src/server/services/agentSession.ts b/src/server/services/agentSession.ts index 0e0f411..051024f 100644 --- a/src/server/services/agentSession.ts +++ b/src/server/services/agentSession.ts @@ -27,8 +27,11 @@ import { createAgentPvc, deleteAgentPvc } from 'server/lib/agentSession/pvcFacto import { createAgentApiKeySecret, deleteAgentApiKeySecret } from 'server/lib/agentSession/apiKeySecretFactory'; import { SESSION_WORKSPACE_GATEWAY_CONTAINER_NAME, + createSessionWorkspacePodWithoutWaiting, createSessionWorkspacePod, deleteSessionWorkspacePod, + waitForSessionWorkspacePodReady, + waitForSessionWorkspacePodScheduled, } from 'server/lib/agentSession/podFactory'; import { createSessionWorkspaceService, @@ -267,10 +270,43 @@ class DevModeBatchEnableError extends Error { } } +class AgentSessionStageError extends Error { + stage: AgentSessionStartupFailureStage; + causeError: unknown; + + constructor(stage: AgentSessionStartupFailureStage, error: unknown) { + super(error instanceof Error ? error.message : String(error ?? 'Agent session startup failed')); + this.name = 'AgentSessionStageError'; + this.stage = stage; + this.causeError = error; + } +} + function buildSnapshotMapFromEnabledServices(enabledServices: DevModeEnabledService[]): SessionSnapshotMap { return Object.fromEntries(enabledServices.map((service) => [String(service.deployId), service.snapshot])); } +function recordEnabledServicesFromResult( + result: DevModeEnabledService[], + enabledDevModeDeployIds: number[], + devModeSnapshots: SessionSnapshotMap +): void { + enabledDevModeDeployIds.push(...result.map((service) => service.deployId)); + Object.assign(devModeSnapshots, buildSnapshotMapFromEnabledServices(result)); +} + +function recordEnabledServicesFromError( + error: unknown, + enabledDevModeDeployIds: number[], + devModeSnapshots: SessionSnapshotMap +): void { + if (!(error instanceof DevModeBatchEnableError)) { + return; + } + + recordEnabledServicesFromResult(error.successfulServices, enabledDevModeDeployIds, devModeSnapshots); +} + async function enableServicesInDevModeParallel(opts: { namespace: string; pvcName: string; @@ -841,6 +877,8 @@ export default class AgentSessionService { const devModeSnapshots: SessionSnapshotMap = {}; const enabledDevModeDeployIds: number[] = []; const persistedDevModeDeployIds: number[] = []; + let pendingEnabledServicesPromise: Promise | null = null; + let pendingWorkspacePodReadyPromise: Promise | null = null; let failureStage: AgentSessionStartupFailureStage = 'create_session'; let sessionPersisted = false; let session: AgentSession | null = null; @@ -968,8 +1006,8 @@ export default class AgentSessionService { const infraSetupMs = elapsedMs(infraSetupStartedAt); failureStage = 'connect_runtime'; - const podStartupStartedAt = Date.now(); - const workspacePod = await createSessionWorkspacePod({ + const servicesToEnable = resolvedServices || []; + const workspacePodOptions = { podName, namespace: opts.namespace, pvcName, @@ -996,30 +1034,77 @@ export default class AgentSessionService { skipWorkspaceBootstrap: Boolean(compatiblePrewarm), serviceAccountName: agentServiceAccountName, resources: opts.resources, - }); - const podStartupMs = elapsedMs(podStartupStartedAt); - const agentNodeName = workspacePod.spec?.nodeName || null; + }; + const startEnabledServices = (requiredNodeName?: string): Promise => + enableServicesInDevModeParallel({ + namespace: opts.namespace, + pvcName, + services: servicesToEnable, + requiredNodeName, + }) + .then((enabledServices) => { + recordEnabledServicesFromResult(enabledServices, enabledDevModeDeployIds, devModeSnapshots); + return enabledServices; + }) + .catch((error) => { + recordEnabledServicesFromError(error, enabledDevModeDeployIds, devModeSnapshots); + throw new AgentSessionStageError('attach_services', error); + }); + const podStartupStartedAt = Date.now(); + let enabledServices: DevModeEnabledService[]; + const shouldOverlapPrewarmServiceAttach = Boolean(compatiblePrewarm && servicesToEnable.length > 0); + + if (shouldOverlapPrewarmServiceAttach) { + logger().info( + `Session: overlap start sessionId=${sessionUuid} namespace=${opts.namespace} podName=${podName} sameNode=${ + keepAttachedServicesOnSessionNode ? 'true' : 'false' + } services=${resolvedServiceNames.join(',')}` + ); - if ((resolvedServices || []).length > 0 && keepAttachedServicesOnSessionNode && !agentNodeName) { - throw new Error(`Session workspace pod ${podName} did not report a scheduled node`); - } + await createSessionWorkspacePodWithoutWaiting(workspacePodOptions); + pendingWorkspacePodReadyPromise = waitForSessionWorkspacePodReady( + opts.namespace, + podName, + opts.readiness + ).catch((error) => { + throw new AgentSessionStageError('connect_runtime', error); + }); - const enabledServices = await enableServicesInDevModeParallel({ - namespace: opts.namespace, - pvcName, - services: resolvedServices || [], - requiredNodeName: keepAttachedServicesOnSessionNode ? agentNodeName || undefined : undefined, - }).catch((error) => { - if (error instanceof DevModeBatchEnableError) { - enabledDevModeDeployIds.push(...error.successfulServices.map((service) => service.deployId)); - Object.assign(devModeSnapshots, buildSnapshotMapFromEnabledServices(error.successfulServices)); + if (keepAttachedServicesOnSessionNode) { + const scheduledPod = await waitForSessionWorkspacePodScheduled(opts.namespace, podName, opts.readiness).catch( + (error) => { + throw new AgentSessionStageError('connect_runtime', error); + } + ); + const agentNodeName = scheduledPod.spec?.nodeName || null; + + if (!agentNodeName) { + throw new AgentSessionStageError( + 'connect_runtime', + new Error(`Session workspace pod ${podName} did not report a scheduled node`) + ); + } + + pendingEnabledServicesPromise = startEnabledServices(agentNodeName); + } else { + pendingEnabledServicesPromise = startEnabledServices(); } - throw error; - }); + [, enabledServices] = await Promise.all([pendingWorkspacePodReadyPromise, pendingEnabledServicesPromise]); + } else { + const workspacePod = await createSessionWorkspacePod(workspacePodOptions); + const agentNodeName = workspacePod.spec?.nodeName || null; - enabledDevModeDeployIds.push(...enabledServices.map((service) => service.deployId)); - Object.assign(devModeSnapshots, buildSnapshotMapFromEnabledServices(enabledServices)); + if (servicesToEnable.length > 0 && keepAttachedServicesOnSessionNode && !agentNodeName) { + throw new Error(`Session workspace pod ${podName} did not report a scheduled node`); + } + + enabledServices = await startEnabledServices( + keepAttachedServicesOnSessionNode ? agentNodeName || undefined : undefined + ); + } + + const podStartupMs = elapsedMs(podStartupStartedAt); if (enabledServices.length > 0) { await AgentSession.query() @@ -1066,7 +1151,9 @@ export default class AgentSessionService { resolvedServiceNames.join(',') || 'none' } prewarm=${compatiblePrewarm ? 'reused' : 'new'} durationMs=${elapsedMs( sessionStartedAt - )} preflightMs=${preflightMs} infraMs=${infraSetupMs} podMs=${podStartupMs} finalizeMs=${finalizeMs}` + )} preflightMs=${preflightMs} infraMs=${infraSetupMs} podMs=${podStartupMs} finalizeMs=${finalizeMs} overlap=${ + shouldOverlapPrewarmServiceAttach ? 'true' : 'false' + }` ); const readySession = session; @@ -1085,16 +1172,26 @@ export default class AgentSessionService { return session!; } catch (err) { + if (pendingWorkspacePodReadyPromise) { + void pendingWorkspacePodReadyPromise.catch(() => {}); + } + + if (pendingEnabledServicesPromise) { + await pendingEnabledServicesPromise.catch(() => {}); + } + + const startupError = err instanceof AgentSessionStageError ? err.causeError : err; + failureStage = err instanceof AgentSessionStageError ? err.stage : failureStage; const startupFailure = buildAgentSessionStartupFailure({ sessionId: sessionUuid, - error: err, + error: startupError, stage: failureStage, }); if ( buildKind === BuildKind.ENVIRONMENT && opts.buildUuid && - isUniqueConstraintError(err, ACTIVE_ENVIRONMENT_SESSION_UNIQUE_INDEX) + isUniqueConstraintError(startupError, ACTIVE_ENVIRONMENT_SESSION_UNIQUE_INDEX) ) { const activeSession = await AgentSessionService.getEnvironmentActiveSession(opts.buildUuid, opts.userId); if (activeSession) { @@ -1103,7 +1200,7 @@ export default class AgentSessionService { } logger().error( - { error: err, sessionId: sessionUuid, failureStage }, + { error: startupError, sessionId: sessionUuid, failureStage }, `Session: startup failed sessionId=${sessionUuid} stage=${failureStage}` ); @@ -1176,7 +1273,7 @@ export default class AgentSessionService { .catch(() => {}); } - throw err; + throw startupError; } }