diff --git a/sdks/sandbox/kotlin/sandbox/Module.md b/sdks/sandbox/kotlin/sandbox/Module.md index 14e36358..07fdbd35 100644 --- a/sdks/sandbox/kotlin/sandbox/Module.md +++ b/sdks/sandbox/kotlin/sandbox/Module.md @@ -11,6 +11,7 @@ The Open Sandbox SDK provides a comprehensive interface for creating and managin - **🔄 Lifecycle Management**: Create, pause, resume, terminate operations - **💚 Health Monitoring**: Automatic readiness detection and status tracking - **🏗️ Fluent API**: Type-safe builder pattern with DSL support +- **📦 Client-Side Sandbox Pool**: Idle-buffer pool for predictable acquire latency (opt-in, see [Sandbox Pool](#sandbox-pool)) ## Quick Start @@ -100,6 +101,34 @@ sandbox.commands.executeStreaming("long-running-task").collect { event -> } ``` +## Sandbox Pool + +Optional client-side pool for acquiring ready sandboxes with lower latency. The pool maintains an idle buffer; use `SandboxPool.builder()` with `stateStore`, `connectionConfig`, and `creationSpec`, then `start()`, `acquire()`, and `shutdown()`. + +```kotlin +import com.alibaba.opensandbox.sandbox.pool.SandboxPool +import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec +import com.alibaba.opensandbox.sandbox.infrastructure.pool.InMemoryPoolStateStore + +val pool = SandboxPool.builder() + .poolName("my-pool") + .ownerId("worker-1") + .maxIdle(5) + .stateStore(InMemoryPoolStateStore()) + .connectionConfig(connectionConfig) + .creationSpec(PoolCreationSpec.builder().image("ubuntu:22.04").build()) + .build() +pool.start() + +val sandbox = pool.acquire(sandboxTimeout = Duration.ofMinutes(30)) +try { + // use sandbox +} finally { + sandbox.kill() +} +pool.shutdown(graceful = true) +``` + ## Key Components ### Sandbox diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt index da033beb..d836752e 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt @@ -88,6 +88,42 @@ class InvalidArgumentException( error = SandboxError(SandboxError.INVALID_ARGUMENT, message), ) +/** + * Thrown when acquire is called with FAIL_FAST policy and no idle sandbox is available. + */ +class PoolEmptyException( + message: String? = "No idle sandbox available and policy is FAIL_FAST", + cause: Throwable? = null, +) : SandboxException( + message = message, + cause = cause, + error = SandboxError(SandboxError.POOL_EMPTY, message), + ) + +/** + * Thrown when the pool state store is unavailable during idle take/put/lock operations. + */ +class PoolStateStoreUnavailableException( + message: String? = null, + cause: Throwable? = null, +) : SandboxException( + message = message, + cause = cause, + error = SandboxError(SandboxError.POOL_STATE_STORE_UNAVAILABLE, message), + ) + +/** + * Thrown when atomic take or lock-update conflicts occur in the state store. + */ +class PoolStateStoreContentionException( + message: String? = null, + cause: Throwable? = null, +) : SandboxException( + message = message, + cause = cause, + error = SandboxError(SandboxError.POOL_STATE_STORE_CONTENTION, message), + ) + /** * Defines standardized common error codes and messages for the Sandbox SDK. */ @@ -101,5 +137,14 @@ data class SandboxError( const val UNHEALTHY = "UNHEALTHY" const val INVALID_ARGUMENT = "INVALID_ARGUMENT" const val UNEXPECTED_RESPONSE = "UNEXPECTED_RESPONSE" + + /** Pool-specific: no idle sandbox and policy is FAIL_FAST. */ + const val POOL_EMPTY = "POOL_EMPTY" + + /** Pool state store unavailable during operations. */ + const val POOL_STATE_STORE_UNAVAILABLE = "POOL_STATE_STORE_UNAVAILABLE" + + /** Pool state store contention (atomic take or lock conflicts). */ + const val POOL_STATE_STORE_CONTENTION = "POOL_STATE_STORE_CONTENTION" } } diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt new file mode 100644 index 00000000..3491610d --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt @@ -0,0 +1,33 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException + +/** + * Policy for acquire when the idle buffer is empty. + * + * - FAIL_FAST: throw [PoolEmptyException] (POOL_EMPTY). + * - DIRECT_CREATE: attempt direct create via lifecycle API, then connect and return. + */ +enum class AcquirePolicy { + /** When no idle sandbox is available, fail immediately with POOL_EMPTY. */ + FAIL_FAST, + + /** When no idle sandbox is available, create a new sandbox via lifecycle API. */ + DIRECT_CREATE, +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/EmptyBehavior.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/EmptyBehavior.kt new file mode 100644 index 00000000..c4b9bde3 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/EmptyBehavior.kt @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * Behavior when the idle buffer is empty at acquire time. + * + * - FAIL_FAST: throw POOL_EMPTY. + * - DIRECT_CREATE: attempt direct create (default). + */ +enum class EmptyBehavior { + /** Throw POOL_EMPTY when no idle sandbox is available. */ + FAIL_FAST, + + /** Create a new sandbox via lifecycle API when no idle is available. */ + DIRECT_CREATE, +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolConfig.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolConfig.kt new file mode 100644 index 00000000..9ecde0bd --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolConfig.kt @@ -0,0 +1,173 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import kotlin.math.ceil + +/** + * Configuration for a client-side sandbox pool. + * + * @property poolName User-defined name and namespace for this logical pool (required). + * @property ownerId Unique process identity for primary lock ownership (required in distributed mode). + * @property maxIdle Standby idle target/cap (required). + * @property warmupConcurrency Max concurrent creation workers during replenish (default: max(1, ceil(maxIdle * 0.2))). + * @property primaryLockTtl Lock TTL for distributed primary ownership (default: 60s). + * @property emptyBehavior Behavior when idle buffer is empty (default: DIRECT_CREATE). + * @property stateStore Injected [PoolStateStore] implementation (required). + * @property connectionConfig Connection config for lifecycle API (required). + * @property creationSpec Template for creating sandboxes (replenish and direct-create) (required). + * @property reconcileInterval Interval between reconcile ticks (default: 30s). + * @property degradedThreshold Consecutive create failures before transitioning to DEGRADED (default: 3). + * @property drainTimeout Max wait during graceful shutdown for in-flight ops (default: 30s). + */ +data class PoolConfig( + val poolName: String, + val ownerId: String, + val maxIdle: Int, + val warmupConcurrency: Int, + val primaryLockTtl: java.time.Duration, + val emptyBehavior: EmptyBehavior, + val stateStore: PoolStateStore, + val connectionConfig: ConnectionConfig, + val creationSpec: PoolCreationSpec, + val reconcileInterval: java.time.Duration, + val degradedThreshold: Int, + val drainTimeout: java.time.Duration, +) { + init { + require(poolName.isNotBlank()) { "poolName must not be blank" } + require(ownerId.isNotBlank()) { "ownerId must not be blank" } + require(maxIdle >= 0) { "maxIdle must be >= 0" } + require(warmupConcurrency > 0) { "warmupConcurrency must be positive" } + require(degradedThreshold > 0) { "degradedThreshold must be positive" } + require(!reconcileInterval.isNegative && !reconcileInterval.isZero) { "reconcileInterval must be positive" } + require(!primaryLockTtl.isNegative && !primaryLockTtl.isZero) { "primaryLockTtl must be positive" } + require(!drainTimeout.isNegative) { "drainTimeout must be non-negative" } + } + + companion object { + private val DEFAULT_RECONCILE_INTERVAL = java.time.Duration.ofSeconds(30) + private val DEFAULT_PRIMARY_LOCK_TTL = java.time.Duration.ofSeconds(60) + private const val DEFAULT_DEGRADED_THRESHOLD = 3 + private val DEFAULT_DRAIN_TIMEOUT = java.time.Duration.ofSeconds(30) + + @JvmStatic + fun builder(): Builder = Builder() + } + + class Builder { + private var poolName: String? = null + private var ownerId: String? = null + private var maxIdle: Int? = null + private var warmupConcurrency: Int? = null + private var primaryLockTtl: java.time.Duration = DEFAULT_PRIMARY_LOCK_TTL + private var emptyBehavior: EmptyBehavior = EmptyBehavior.DIRECT_CREATE + private var stateStore: PoolStateStore? = null + private var connectionConfig: ConnectionConfig? = null + private var creationSpec: PoolCreationSpec? = null + private var reconcileInterval: java.time.Duration = DEFAULT_RECONCILE_INTERVAL + private var degradedThreshold: Int = DEFAULT_DEGRADED_THRESHOLD + private var drainTimeout: java.time.Duration = DEFAULT_DRAIN_TIMEOUT + + fun poolName(poolName: String): Builder { + this.poolName = poolName + return this + } + + fun ownerId(ownerId: String): Builder { + this.ownerId = ownerId + return this + } + + fun maxIdle(maxIdle: Int): Builder { + this.maxIdle = maxIdle + return this + } + + fun warmupConcurrency(warmupConcurrency: Int): Builder { + this.warmupConcurrency = warmupConcurrency + return this + } + + fun primaryLockTtl(primaryLockTtl: java.time.Duration): Builder { + this.primaryLockTtl = primaryLockTtl + return this + } + + fun emptyBehavior(emptyBehavior: EmptyBehavior): Builder { + this.emptyBehavior = emptyBehavior + return this + } + + fun stateStore(stateStore: PoolStateStore): Builder { + this.stateStore = stateStore + return this + } + + fun connectionConfig(connectionConfig: ConnectionConfig): Builder { + this.connectionConfig = connectionConfig + return this + } + + fun creationSpec(creationSpec: PoolCreationSpec): Builder { + this.creationSpec = creationSpec + return this + } + + fun reconcileInterval(reconcileInterval: java.time.Duration): Builder { + this.reconcileInterval = reconcileInterval + return this + } + + fun degradedThreshold(degradedThreshold: Int): Builder { + this.degradedThreshold = degradedThreshold + return this + } + + fun drainTimeout(drainTimeout: java.time.Duration): Builder { + this.drainTimeout = drainTimeout + return this + } + + fun build(): PoolConfig { + val name = poolName ?: throw IllegalArgumentException("poolName is required") + val owner = ownerId ?: throw IllegalArgumentException("ownerId is required") + val max = maxIdle ?: throw IllegalArgumentException("maxIdle is required") + val store = stateStore ?: throw IllegalArgumentException("stateStore is required") + val conn = connectionConfig ?: throw IllegalArgumentException("connectionConfig is required") + val spec = creationSpec ?: throw IllegalArgumentException("creationSpec is required") + + val warmup = warmupConcurrency ?: ceil(max * 0.2).toInt().coerceAtLeast(1) + + return PoolConfig( + poolName = name, + ownerId = owner, + maxIdle = max, + warmupConcurrency = warmup, + primaryLockTtl = primaryLockTtl, + emptyBehavior = emptyBehavior, + stateStore = store, + connectionConfig = conn, + creationSpec = spec, + reconcileInterval = reconcileInterval, + degradedThreshold = degradedThreshold, + drainTimeout = drainTimeout, + ) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt new file mode 100644 index 00000000..7fc2b604 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt @@ -0,0 +1,136 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +import com.alibaba.opensandbox.sandbox.Sandbox +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume + +/** + * Template for creating sandboxes in the pool (replenish and direct-create). + * + * Pool always uses a fixed 24h timeout for created sandboxes; other parameters + * are taken from this spec. Defaults align with [Sandbox.Builder]. + * + * @property imageSpec Container image specification (required). + * @property entrypoint Entrypoint command (default: tail -f /dev/null). + * @property resource Resource limits (default: cpu=1, memory=2Gi). + * @property env Environment variables. + * @property metadata User-defined metadata. + * @property networkPolicy Optional outbound network policy. + * @property volumes Optional volume mounts. + */ +data class PoolCreationSpec( + val imageSpec: SandboxImageSpec, + val entrypoint: List = DEFAULT_ENTRYPOINT, + val resource: Map = DEFAULT_RESOURCE, + val env: Map = emptyMap(), + val metadata: Map = emptyMap(), + val networkPolicy: NetworkPolicy? = null, + val volumes: List? = null, +) { + companion object { + /** Default entrypoint: keep container running. */ + val DEFAULT_ENTRYPOINT: List = listOf("tail", "-f", "/dev/null") + + /** Default resource limits. */ + val DEFAULT_RESOURCE: Map = + mapOf( + "cpu" to "1", + "memory" to "2Gi", + ) + + @JvmStatic + fun builder(): Builder = Builder() + } + + class Builder { + private var imageSpec: SandboxImageSpec? = null + private var entrypoint: List = DEFAULT_ENTRYPOINT + private var resource: Map = DEFAULT_RESOURCE + private var env: Map = emptyMap() + private var metadata: Map = emptyMap() + private var networkPolicy: NetworkPolicy? = null + private var volumes: List? = null + + fun imageSpec(imageSpec: SandboxImageSpec): Builder { + this.imageSpec = imageSpec + return this + } + + fun image(image: String): Builder { + this.imageSpec = SandboxImageSpec.builder().image(image).build() + return this + } + + fun entrypoint(entrypoint: List): Builder { + this.entrypoint = entrypoint + return this + } + + fun entrypoint(vararg entrypoint: String): Builder { + this.entrypoint = entrypoint.toList() + return this + } + + fun resource(resource: Map): Builder { + this.resource = resource + return this + } + + fun resource(configure: MutableMap.() -> Unit): Builder { + val map = DEFAULT_RESOURCE.toMutableMap() + map.configure() + this.resource = map + return this + } + + fun env(env: Map): Builder { + this.env = env + return this + } + + fun metadata(metadata: Map): Builder { + this.metadata = metadata + return this + } + + fun networkPolicy(networkPolicy: NetworkPolicy?): Builder { + this.networkPolicy = networkPolicy + return this + } + + fun volumes(volumes: List?): Builder { + this.volumes = volumes + return this + } + + fun build(): PoolCreationSpec { + val spec = imageSpec ?: throw IllegalArgumentException("PoolCreationSpec imageSpec (or image) must be specified") + return PoolCreationSpec( + imageSpec = spec, + entrypoint = entrypoint, + resource = resource, + env = env, + metadata = metadata, + networkPolicy = networkPolicy, + volumes = volumes, + ) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolSnapshot.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolSnapshot.kt new file mode 100644 index 00000000..0bcccaf8 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolSnapshot.kt @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * Point-in-time snapshot of pool state for observability. + * + * @property state Current pool state (HEALTHY, DEGRADED, DRAINING, STOPPED). + * @property idleCount Number of idle sandboxes in the store. + * @property lastError Last error message if pool is DEGRADED or after failure; null otherwise. + */ +data class PoolSnapshot( + val state: PoolState, + val idleCount: Int, + val lastError: String? = null, +) diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolState.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolState.kt new file mode 100644 index 00000000..e23d5d1a --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolState.kt @@ -0,0 +1,40 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * High-level state of the sandbox pool. + * + * Transitions: + * - HEALTHY -> DEGRADED: consecutive create failures exceed threshold + * - DEGRADED -> HEALTHY: probe or create succeeds, failure counter resets + * - HEALTHY/DEGRADED -> DRAINING: shutdown(graceful=true) called + * - any -> STOPPED: shutdown(graceful=false) or drain completes + */ +enum class PoolState { + /** Pool is operating normally. */ + HEALTHY, + + /** Replenish is failing; backoff applied; acquire still served from existing idle. */ + DEGRADED, + + /** Graceful shutdown in progress; no new replenish, waiting for in-flight ops. */ + DRAINING, + + /** Pool is stopped; no acquire or replenish. */ + STOPPED, +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt new file mode 100644 index 00000000..803a462d --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt @@ -0,0 +1,99 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +import java.time.Duration +import java.time.Instant + +/** + * Abstraction for storing pool coordination state and idle sandbox membership. + * + * All operations are namespaced by [poolName]. Implementations must ensure: + * - Atomic take: one idle sandbox can only be taken by one acquire. + * - Idempotent put/remove for idle membership. + * - tryTakeIdle should prefer FIFO (oldest idle first) as best-effort. + * + * In distributed mode, only the current primary lock holder may execute + * reconcile maintenance writes (putIdle, reapExpiredIdle). Foreground + * acquire-path writes (tryTakeIdle, removeIdle) are allowed on all nodes. + */ +interface PoolStateStore { + /** + * Atomically removes and returns one idle sandbox ID for the pool, or null if none. + * Best-effort FIFO (oldest first). + */ + fun tryTakeIdle(poolName: String): String? + + /** + * Adds a sandbox ID to the idle set for the pool. + * Idempotent: duplicate put for same sandboxId leaves membership single-copy. + */ + fun putIdle( + poolName: String, + sandboxId: String, + ) + + /** + * Removes a sandbox ID from the idle set. + * Idempotent: duplicate remove is no-op. + */ + fun removeIdle( + poolName: String, + sandboxId: String, + ) + + /** + * Tries to acquire the primary (leader) lock for this pool. + * Best-effort mutually exclusive by poolName. Returns true if this node is now primary. + */ + fun tryAcquirePrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean + + /** + * Renews the primary lock for the current owner. Non-owner renew is rejected. + */ + fun renewPrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean + + /** + * Releases the primary lock for the given owner. + */ + fun releasePrimaryLock( + poolName: String, + ownerId: String, + ) + + /** + * Removes expired idle entries. In-memory store performs sweep; TTL-backed stores may no-op. + */ + fun reapExpiredIdle( + poolName: String, + now: Instant, + ) + + /** + * Returns a snapshot of counters for the pool (at least idle count). + * Eventually consistent for distributed stores. + */ + fun snapshotCounters(poolName: String): StoreCounters +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/StoreCounters.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/StoreCounters.kt new file mode 100644 index 00000000..d3e3c338 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/StoreCounters.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * Snapshot of pool store counters. + * + * @property idleCount Number of sandbox IDs currently in the idle set. + */ +data class StoreCounters( + val idleCount: Int, +) diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt new file mode 100644 index 00000000..a88611d7 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt @@ -0,0 +1,161 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import com.alibaba.opensandbox.sandbox.domain.pool.StoreCounters +import java.time.Duration +import java.time.Instant +import java.util.ArrayDeque +import java.util.concurrent.ConcurrentHashMap + +/** + * In-memory implementation of [PoolStateStore] for single-node use. + * + * - Idle entries use a fixed 24h TTL; expired entries are removed on take, put, reap, or snapshot. + * - tryTakeIdle returns oldest (FIFO) non-expired idle sandbox ID. + * - Primary lock is process-local and time-bounded; no distributed coordination. + */ +class InMemoryPoolStateStore : PoolStateStore { + /** Fixed idle TTL per OSEP (24h). */ + private val idleTtl: Duration = Duration.ofHours(24) + + private val lock = Any() + + /** Per pool: queue of (sandboxId, expiresAt) in insertion order for FIFO take. */ + private val idleQueues = ConcurrentHashMap>() + + /** Per pool: primary lock (ownerId, expiresAt). */ + private val primaryLocks = ConcurrentHashMap() + + override fun tryTakeIdle(poolName: String): String? { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return null + evictExpired(poolName, queue, Instant.now()) + val entry = queue.pollFirst() ?: return null + if (queue.isEmpty()) idleQueues.remove(poolName) + return entry.sandboxId + } + } + + override fun putIdle( + poolName: String, + sandboxId: String, + ) { + val expiresAt = Instant.now().plus(idleTtl) + synchronized(lock) { + val queue = idleQueues.getOrPut(poolName) { ArrayDeque() } + evictExpired(poolName, queue, Instant.now()) + if (queue.any { it.sandboxId == sandboxId }) return + queue.addLast(IdleEntry(sandboxId, expiresAt)) + } + } + + override fun removeIdle( + poolName: String, + sandboxId: String, + ) { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return + queue.removeIf { it.sandboxId == sandboxId } + if (queue.isEmpty()) idleQueues.remove(poolName) + } + } + + override fun tryAcquirePrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean { + val now = Instant.now() + val expiresAt = now.plus(ttl) + synchronized(lock) { + val current = primaryLocks[poolName] + if (current == null || current.expiresAt.isBefore(now)) { + primaryLocks[poolName] = PrimaryLockHolder(ownerId, expiresAt) + return true + } + if (current.ownerId == ownerId) { + primaryLocks[poolName] = PrimaryLockHolder(ownerId, expiresAt) + return true + } + return false + } + } + + override fun renewPrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean { + val now = Instant.now() + val expiresAt = now.plus(ttl) + synchronized(lock) { + val current = primaryLocks[poolName] ?: return false + if (current.ownerId != ownerId || current.expiresAt.isBefore(now)) return false + primaryLocks[poolName] = PrimaryLockHolder(ownerId, expiresAt) + return true + } + } + + override fun releasePrimaryLock( + poolName: String, + ownerId: String, + ) { + synchronized(lock) { + val current = primaryLocks[poolName] ?: return + if (current.ownerId == ownerId) primaryLocks.remove(poolName) + } + } + + override fun reapExpiredIdle( + poolName: String, + now: Instant, + ) { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return + evictExpired(poolName, queue, now) + if (queue.isEmpty()) idleQueues.remove(poolName) + } + } + + override fun snapshotCounters(poolName: String): StoreCounters { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return StoreCounters(idleCount = 0) + evictExpired(poolName, queue, Instant.now()) + return StoreCounters(idleCount = queue.size) + } + } + + private fun evictExpired( + poolName: String, + queue: ArrayDeque, + now: Instant, + ) { + while (queue.isNotEmpty()) { + val head = queue.peekFirst() ?: break + if (head.expiresAt.isAfter(now)) break + queue.pollFirst() + } + // Do not remove pool from map here when queue is empty: putIdle may have just + // getOrPut an empty queue and will add to it after this call. + } + + private data class IdleEntry(val sandboxId: String, val expiresAt: Instant) + + private data class PrimaryLockHolder(val ownerId: String, val expiresAt: Instant) +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt new file mode 100644 index 00000000..1b6f8eb0 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt @@ -0,0 +1,118 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import org.slf4j.LoggerFactory +import java.time.Instant + +/** + * Runs one reconcile tick: leader-gated replenish and TTL reap. + * + * Only the current primary lock holder performs create + putIdle. + * Call from a periodic scheduler; [createOne] should call lifecycle create and return the new sandbox ID or null on failure. + */ +internal object PoolReconciler { + private val logger = LoggerFactory.getLogger(PoolReconciler::class.java) + + /** + * Runs a single reconcile tick. If this node does not hold the primary lock, returns immediately. + * Otherwise: reaps expired idle, snapshots counters, then creates up to min(deficit, warmupConcurrency) + * sandboxes via [createOne], putting each success into the store and updating [reconcileState] on failure. + */ + fun runReconcileTick( + config: PoolConfig, + stateStore: PoolStateStore, + createOne: () -> String?, + reconcileState: ReconcileState, + ) { + val poolName = config.poolName + val ownerId = config.ownerId + val ttl = config.primaryLockTtl + + if (!stateStore.tryAcquirePrimaryLock(poolName, ownerId, ttl)) { + logger.trace("Reconcile skip (not primary): pool_name={}", poolName) + return + } + try { + runPrimaryReplenishOnce(config, stateStore, createOne, reconcileState) + } finally { + stateStore.releasePrimaryLock(poolName, ownerId) + } + } + + private fun runPrimaryReplenishOnce( + config: PoolConfig, + stateStore: PoolStateStore, + createOne: () -> String?, + reconcileState: ReconcileState, + ) { + val poolName = config.poolName + val ownerId = config.ownerId + val ttl = config.primaryLockTtl + val now = Instant.now() + + stateStore.reapExpiredIdle(poolName, now) + val counters = stateStore.snapshotCounters(poolName) + val deficit = (config.maxIdle - counters.idleCount).coerceAtLeast(0) + val toCreate = minOf(deficit, config.warmupConcurrency) + + if (toCreate == 0 || reconcileState.isBackoffActive(now)) { + stateStore.renewPrimaryLock(poolName, ownerId, ttl) + logger.debug( + "Reconcile tick: pool_name={} idle={} deficit={} toCreate=0 (backoff={})", + poolName, + counters.idleCount, + deficit, + reconcileState.isBackoffActive(now), + ) + return + } + + logger.debug( + "Reconcile tick: pool_name={} idle={} deficit={} toCreate={}", + poolName, + counters.idleCount, + deficit, + toCreate, + ) + var created = 0 + repeat(toCreate) { + if (!stateStore.renewPrimaryLock(poolName, ownerId, ttl)) { + return + } + val newId = + try { + createOne() + } catch (e: Exception) { + reconcileState.recordFailure(e.message) + null + } + if (newId != null) { + stateStore.putIdle(poolName, newId) + created++ + reconcileState.recordSuccess() + } else { + reconcileState.recordFailure(null) + } + } + if (created > 0) { + logger.debug("Reconcile created {} sandboxes: pool_name={}", created, poolName) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt new file mode 100644 index 00000000..82b66f0b --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt @@ -0,0 +1,82 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState +import java.time.Duration +import java.time.Instant + +/** + * Mutable state for reconcile loop: failure count, pool state, and exponential backoff. + * + * Thread-safe for use from reconcile worker and from pool snapshot. + */ +internal class ReconcileState( + private val degradedThreshold: Int, + private val backoffBase: Duration = Duration.ofSeconds(1), + private val backoffMax: Duration = Duration.ofSeconds(60), +) { + @Volatile + var failureCount: Int = 0 + private set + + @Volatile + var state: PoolState = PoolState.HEALTHY + private set + + @Volatile + var lastError: String? = null + private set + + @Volatile + private var backoffUntil: Instant? = null + + private var backoffAttempts: Int = 0 + + @Synchronized + fun recordSuccess() { + failureCount = 0 + if (state == PoolState.DEGRADED) state = PoolState.HEALTHY + backoffUntil = null + backoffAttempts = 0 + lastError = null + } + + @Synchronized + fun recordFailure(errorMessage: String?) { + failureCount++ + lastError = errorMessage + if (failureCount > degradedThreshold) { + state = PoolState.DEGRADED + backoffAttempts++ + val exponent = backoffAttempts.coerceAtMost(10) + val delaySeconds = backoffBase.seconds * (1L shl exponent) + val delayMs = + minOf( + Duration.ofSeconds(delaySeconds).toMillis(), + backoffMax.toMillis(), + ) + backoffUntil = Instant.now().plusMillis(delayMs) + } + } + + /** True if reconciler should skip create attempts this tick (in backoff window). */ + fun isBackoffActive(now: Instant = Instant.now()): Boolean { + val until = backoffUntil ?: return false + return state == PoolState.DEGRADED && now.isBefore(until) + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt new file mode 100644 index 00000000..def33fa1 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt @@ -0,0 +1,416 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.pool + +import com.alibaba.opensandbox.sandbox.HttpClientProvider +import com.alibaba.opensandbox.sandbox.Sandbox +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException +import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException +import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy +import com.alibaba.opensandbox.sandbox.domain.pool.EmptyBehavior +import com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig +import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec +import com.alibaba.opensandbox.sandbox.domain.pool.PoolSnapshot +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes +import com.alibaba.opensandbox.sandbox.infrastructure.factory.AdapterFactory +import com.alibaba.opensandbox.sandbox.infrastructure.pool.PoolReconciler +import com.alibaba.opensandbox.sandbox.infrastructure.pool.ReconcileState +import org.slf4j.LoggerFactory +import java.time.Duration +import java.util.concurrent.Executors +import java.util.concurrent.ScheduledExecutorService +import java.util.concurrent.ScheduledFuture +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicReference + +/** + * Client-side sandbox pool for acquiring ready sandboxes with predictable latency. + * + * The pool maintains an idle buffer of clean, borrowable sandboxes. Callers [acquire] a sandbox, + * use it, and terminate it via [Sandbox.kill] when done. No return/finalize API; sandboxes are ephemeral. + * + * Uses [PoolStateStore] for idle membership and primary lock; runs a background reconcile loop + * when started. Replenish is leader-gated; acquire is allowed on all nodes. + * + * ## Usage + * + * ```kotlin + * val pool = SandboxPool.builder() + * .poolName("my-pool") + * .ownerId("worker-1") + * .maxIdle(5) + * .stateStore(InMemoryPoolStateStore()) + * .connectionConfig(connectionConfig) + * .creationSpec(PoolCreationSpec.builder().image("ubuntu:22.04").build()) + * .build() + * pool.start() + * + * val sandbox = pool.acquire(sandboxTimeout = Duration.ofMinutes(30), policy = AcquirePolicy.DIRECT_CREATE) + * try { + * // use sandbox + * } finally { + * sandbox.kill() + * } + * + * pool.shutdown(graceful = true) + * ``` + * + * @see PoolConfig + */ +class SandboxPool internal constructor( + config: PoolConfig, +) { + private val logger = LoggerFactory.getLogger(SandboxPool::class.java) + + /** Fixed idle TTL per OSEP (24h). */ + private val idleTtl = Duration.ofHours(24) + + private val config: PoolConfig = config + private val stateStore: PoolStateStore = config.stateStore + private val connectionConfig: ConnectionConfig = config.connectionConfig + private val creationSpec: PoolCreationSpec = config.creationSpec + private val reconcileState = ReconcileState(config.degradedThreshold) + + @Volatile + private var currentMaxIdle: Int = config.maxIdle + + private val lifecycleState = AtomicReference(LifecycleState.NOT_STARTED) + private var httpClientProvider: HttpClientProvider? = null + private var sandboxService: Sandboxes? = null + private var scheduler: ScheduledExecutorService? = null + private var reconcileTask: ScheduledFuture<*>? = null + + /** + * Starts the pool: begins the background reconcile loop and, if [PoolConfig.maxIdle] > 0, + * triggers an immediate warmup tick. + */ + fun start() { + if (lifecycleState.get() == LifecycleState.RUNNING || lifecycleState.get() == LifecycleState.STARTING) { + return + } + lifecycleState.set(LifecycleState.STARTING) + val provider = HttpClientProvider(connectionConfig) + httpClientProvider = provider + val factory = AdapterFactory(provider) + sandboxService = factory.createSandboxes() + val exec = + Executors.newSingleThreadScheduledExecutor { r -> + Thread(r, "sandbox-pool-reconcile-${config.poolName}").apply { isDaemon = true } + } + scheduler = exec + val reconcileIntervalMs = config.reconcileInterval.toMillis() + reconcileTask = + exec.scheduleAtFixedRate( + { runReconcileTick() }, + if (config.maxIdle > 0) 0 else reconcileIntervalMs, + reconcileIntervalMs, + TimeUnit.MILLISECONDS, + ) + lifecycleState.set(LifecycleState.RUNNING) + logger.info( + "Pool started: pool_name={} state={} maxIdle={}", + config.poolName, + LifecycleState.RUNNING, + currentMaxIdle, + ) + } + + /** + * Acquires a sandbox from the pool or creates one directly per policy. + * + * 1. Tries to take an idle sandbox ID from the store and connect. + * 2. If connect fails (stale ID), removes the ID, best-effort kill, then falls back to direct create. + * 3. If no idle and [policy] is [AcquirePolicy.FAIL_FAST], throws [PoolEmptyException]. + * 4. If no idle and [policy] is [AcquirePolicy.DIRECT_CREATE], creates a new sandbox via lifecycle API and returns it. + * + * @param sandboxTimeout Optional duration to set on the acquired sandbox (applied via renew after connect). + * @param policy Behavior when idle buffer is empty (default: [AcquirePolicy.DIRECT_CREATE]). + * @return A connected [Sandbox] instance. Caller must call [Sandbox.kill] when done. + * @throws PoolEmptyException when policy is FAIL_FAST and no idle is available. + * @throws SandboxException for lifecycle create/connect/renew errors. + */ + fun acquire( + sandboxTimeout: Duration? = null, + policy: AcquirePolicy = AcquirePolicy.DIRECT_CREATE, + ): Sandbox { + if (lifecycleState.get() != LifecycleState.RUNNING) { + throw IllegalStateException("Pool is not running: ${lifecycleState.get()}") + } + val poolName = config.poolName + val sandboxId = stateStore.tryTakeIdle(poolName) + if (sandboxId != null) { + try { + val sandbox = + Sandbox.connector() + .sandboxId(sandboxId) + .connectionConfig(connectionConfig) + .connect() + sandboxTimeout?.let { sandbox.renew(it) } + logger.debug( + "Acquire from idle: pool_name={} sandbox_id={} policy={}", + poolName, + sandboxId, + policy, + ) + return sandbox + } catch (e: Exception) { + logger.debug( + "Idle connect failed, falling back to direct create: pool_name={} sandbox_id={}", + poolName, + sandboxId, + ) + stateStore.removeIdle(poolName, sandboxId) + try { + sandboxService?.killSandbox(sandboxId) + } catch (_: Exception) { + // best-effort kill; do not replace original error + } + // fall through to direct create + } + } + if (policy == AcquirePolicy.FAIL_FAST) { + logger.debug("Acquire FAIL_FAST with empty idle: pool_name={}", poolName) + throw PoolEmptyException("No idle sandbox available and policy is FAIL_FAST") + } + logger.debug("Acquire direct create: pool_name={} policy={}", poolName, policy) + return directCreate(sandboxTimeout) + } + + /** + * Updates the maximum idle target. Triggers a reconcile tick without blocking on convergence. + */ + fun resize(maxIdle: Int) { + require(maxIdle >= 0) { "maxIdle must be >= 0" } + currentMaxIdle = maxIdle + scheduler?.execute { runReconcileTick() } + } + + /** + * Returns a point-in-time snapshot of pool state for observability. + */ + fun snapshot(): PoolSnapshot { + val state = + when (lifecycleState.get()) { + LifecycleState.NOT_STARTED, + LifecycleState.STOPPED, + -> PoolState.STOPPED + LifecycleState.DRAINING -> PoolState.DRAINING + else -> reconcileState.state + } + val counters = stateStore.snapshotCounters(config.poolName) + return PoolSnapshot( + state = state, + idleCount = counters.idleCount, + lastError = reconcileState.lastError, + ) + } + + /** + * Stops the pool. If [graceful] is true, stops accepting new acquires, stops the reconcile worker, + * and waits up to drainTimeout for in-flight operations. Otherwise stops immediately. + */ + fun shutdown(graceful: Boolean = true) { + if (lifecycleState.get() == LifecycleState.STOPPED) return + if (!graceful) { + stopReconcile() + lifecycleState.set(LifecycleState.STOPPED) + closeProvider() + logger.info("Pool stopped (non-graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) + return + } + lifecycleState.set(LifecycleState.DRAINING) + stopReconcile() + val drainMs = config.drainTimeout.toMillis() + if (drainMs > 0) { + Thread.sleep(drainMs) + } + lifecycleState.set(LifecycleState.STOPPED) + closeProvider() + logger.info("Pool stopped (graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) + } + + private fun runReconcileTick() { + if (lifecycleState.get() != LifecycleState.RUNNING) return + val service = sandboxService ?: return + val reconcileConfig = config.copy(maxIdle = currentMaxIdle) + PoolReconciler.runReconcileTick( + config = reconcileConfig, + stateStore = stateStore, + createOne = { createOneSandbox(service) }, + reconcileState = reconcileState, + ) + } + + private fun createOneSandbox(service: Sandboxes): String? { + return try { + val response = + service.createSandbox( + spec = creationSpec.imageSpec, + entrypoint = creationSpec.entrypoint, + env = creationSpec.env, + metadata = creationSpec.metadata, + timeout = idleTtl, + resource = creationSpec.resource, + networkPolicy = creationSpec.networkPolicy, + extensions = emptyMap(), + volumes = creationSpec.volumes, + ) + response.id + } catch (e: Exception) { + logger.warn("Pool create sandbox failed: poolName={}", config.poolName, e) + throw e + } + } + + private fun directCreate(sandboxTimeout: Duration?): Sandbox { + val service = sandboxService ?: throw IllegalStateException("Pool not started") + val response = + service.createSandbox( + spec = creationSpec.imageSpec, + entrypoint = creationSpec.entrypoint, + env = creationSpec.env, + metadata = creationSpec.metadata, + timeout = idleTtl, + resource = creationSpec.resource, + networkPolicy = creationSpec.networkPolicy, + extensions = emptyMap(), + volumes = creationSpec.volumes, + ) + val sandbox = + Sandbox.connector() + .sandboxId(response.id) + .connectionConfig(connectionConfig) + .connect() + sandboxTimeout?.let { sandbox.renew(it) } + return sandbox + } + + private fun stopReconcile() { + reconcileTask?.cancel(false) + reconcileTask = null + scheduler?.shutdown() + try { + scheduler?.awaitTermination(5, TimeUnit.SECONDS) + } catch (_: InterruptedException) { + Thread.currentThread().interrupt() + } + scheduler = null + } + + private fun closeProvider() { + try { + httpClientProvider?.close() + } catch (e: Exception) { + logger.warn("Error closing pool HTTP client", e) + } + httpClientProvider = null + sandboxService = null + } + + @Suppress("ktlint:standard:property-naming") + private enum class LifecycleState { + NOT_STARTED, + STARTING, + RUNNING, + DRAINING, + STOPPED, + } + + companion object { + @JvmStatic + fun builder(): Builder = Builder() + } + + class Builder internal constructor() { + private var config: PoolConfig? = null + + fun config(config: PoolConfig): Builder { + this.config = config + return this + } + + fun poolName(poolName: String): Builder { + configBuilder.poolName(poolName) + return this + } + + fun ownerId(ownerId: String): Builder { + configBuilder.ownerId(ownerId) + return this + } + + fun maxIdle(maxIdle: Int): Builder { + configBuilder.maxIdle(maxIdle) + return this + } + + fun stateStore(stateStore: PoolStateStore): Builder { + configBuilder.stateStore(stateStore) + return this + } + + fun connectionConfig(connectionConfig: ConnectionConfig): Builder { + configBuilder.connectionConfig(connectionConfig) + return this + } + + fun creationSpec(creationSpec: PoolCreationSpec): Builder { + configBuilder.creationSpec(creationSpec) + return this + } + + fun emptyBehavior(emptyBehavior: EmptyBehavior): Builder { + configBuilder.emptyBehavior(emptyBehavior) + return this + } + + fun warmupConcurrency(warmupConcurrency: Int): Builder { + configBuilder.warmupConcurrency(warmupConcurrency) + return this + } + + fun primaryLockTtl(primaryLockTtl: Duration): Builder { + configBuilder.primaryLockTtl(primaryLockTtl) + return this + } + + fun reconcileInterval(reconcileInterval: Duration): Builder { + configBuilder.reconcileInterval(reconcileInterval) + return this + } + + fun degradedThreshold(degradedThreshold: Int): Builder { + configBuilder.degradedThreshold(degradedThreshold) + return this + } + + fun drainTimeout(drainTimeout: Duration): Builder { + configBuilder.drainTimeout(drainTimeout) + return this + } + + private val configBuilder = PoolConfig.builder() + + fun build(): SandboxPool { + val cfg = config ?: configBuilder.build() + return SandboxPool(cfg) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt new file mode 100644 index 00000000..81d92623 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt @@ -0,0 +1,150 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import com.alibaba.opensandbox.sandbox.domain.pool.StoreCounters +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import java.time.Duration + +/** + * Contract and behavior tests for [InMemoryPoolStateStore]. + */ +class InMemoryPoolStateStoreTest { + private lateinit var store: PoolStateStore + private val poolName = "test-pool" + + @BeforeEach + fun setUp() { + store = InMemoryPoolStateStore() + } + + @Test + fun `tryTakeIdle returns null when empty`() { + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `putIdle and tryTakeIdle round-trip`() { + store.putIdle(poolName, "id-1") + assertEquals("id-1", store.tryTakeIdle(poolName)) + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `tryTakeIdle prefers FIFO`() { + store.putIdle(poolName, "id-1") + store.putIdle(poolName, "id-2") + store.putIdle(poolName, "id-3") + assertEquals("id-1", store.tryTakeIdle(poolName)) + assertEquals("id-2", store.tryTakeIdle(poolName)) + assertEquals("id-3", store.tryTakeIdle(poolName)) + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `removeIdle removes entry`() { + store.putIdle(poolName, "id-1") + store.removeIdle(poolName, "id-1") + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `removeIdle is idempotent`() { + store.putIdle(poolName, "id-1") + store.removeIdle(poolName, "id-1") + store.removeIdle(poolName, "id-1") + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `putIdle is idempotent - single copy`() { + store.putIdle(poolName, "id-1") + store.putIdle(poolName, "id-1") + assertEquals("id-1", store.tryTakeIdle(poolName)) + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `snapshotCounters returns idle count`() { + assertEquals(0, store.snapshotCounters(poolName).idleCount) + store.putIdle(poolName, "id-1") + store.putIdle(poolName, "id-2") + assertEquals(2, store.snapshotCounters(poolName).idleCount) + store.tryTakeIdle(poolName) + assertEquals(1, store.snapshotCounters(poolName).idleCount) + } + + @Test + fun `tryAcquirePrimaryLock acquires when free`() { + val ok = store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + assertTrue(ok) + } + + @Test + fun `tryAcquirePrimaryLock fails for different owner when held`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + val ok = store.tryAcquirePrimaryLock(poolName, "owner-2", Duration.ofSeconds(60)) + assertFalse(ok) + } + + @Test + fun `renewPrimaryLock succeeds for owner`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + assertTrue(store.renewPrimaryLock(poolName, "owner-1", Duration.ofSeconds(60))) + } + + @Test + fun `renewPrimaryLock fails for non-owner`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + assertFalse(store.renewPrimaryLock(poolName, "owner-2", Duration.ofSeconds(60))) + } + + @Test + fun `releasePrimaryLock allows new owner`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + store.releasePrimaryLock(poolName, "owner-1") + assertTrue(store.tryAcquirePrimaryLock(poolName, "owner-2", Duration.ofSeconds(60))) + } + + @Test + fun `pool isolation - different pools do not share idle`() { + store.putIdle("pool-a", "id-a") + store.putIdle("pool-b", "id-b") + assertEquals("id-a", store.tryTakeIdle("pool-a")) + assertEquals("id-b", store.tryTakeIdle("pool-b")) + assertNull(store.tryTakeIdle("pool-a")) + } + + @Test + fun `pool isolation - different pools do not share lock`() { + store.tryAcquirePrimaryLock("pool-a", "owner-a", Duration.ofSeconds(60)) + assertTrue(store.tryAcquirePrimaryLock("pool-b", "owner-b", Duration.ofSeconds(60))) + } + + @Test + fun `reapExpiredIdle removes expired entries`() { + store.putIdle(poolName, "id-1") + store.reapExpiredIdle(poolName, java.time.Instant.now().plus(java.time.Duration.ofHours(25))) + assertEquals(StoreCounters(0), store.snapshotCounters(poolName)) + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPoolTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPoolTest.kt new file mode 100644 index 00000000..e98c68d4 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPoolTest.kt @@ -0,0 +1,117 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.pool + +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException +import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy +import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState +import com.alibaba.opensandbox.sandbox.infrastructure.pool.InMemoryPoolStateStore +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertThrows +import org.junit.jupiter.api.Test +import java.time.Duration + +class SandboxPoolTest { + @Test + fun `snapshot before start returns STOPPED and zero idle`() { + val pool = buildPool() + val snap = pool.snapshot() + assertEquals(PoolState.STOPPED, snap.state) + assertEquals(0, snap.idleCount) + } + + @Test + fun `start then snapshot returns RUNNING`() { + val pool = buildPool() + pool.start() + try { + val snap = pool.snapshot() + assertEquals(PoolState.HEALTHY, snap.state) + } finally { + pool.shutdown(graceful = false) + } + } + + @Test + fun `resize updates maxIdle`() { + val pool = buildPool() + pool.start() + try { + pool.resize(10) + val snap = pool.snapshot() + assertEquals(PoolState.HEALTHY, snap.state) + } finally { + pool.shutdown(graceful = false) + } + } + + @Test + fun `shutdown graceful then snapshot returns STOPPED`() { + val pool = buildPool() + pool.start() + pool.shutdown(graceful = true) + val snap = pool.snapshot() + assertEquals(PoolState.STOPPED, snap.state) + } + + @Test + fun `shutdown non-graceful then snapshot returns STOPPED`() { + val pool = buildPool() + pool.start() + pool.shutdown(graceful = false) + val snap = pool.snapshot() + assertEquals(PoolState.STOPPED, snap.state) + } + + @Test + fun `acquire with FAIL_FAST and empty idle throws PoolEmptyException`() { + val pool = buildPool() + pool.start() + try { + assertThrows(PoolEmptyException::class.java) { + pool.acquire(policy = AcquirePolicy.FAIL_FAST) + } + } finally { + pool.shutdown(graceful = false) + } + } + + @Test + fun `acquire when not running throws IllegalStateException`() { + val pool = buildPool() + assertThrows(IllegalStateException::class.java) { + pool.acquire(policy = AcquirePolicy.DIRECT_CREATE) + } + } + + private fun buildPool(): SandboxPool { + val config = ConnectionConfig.builder().build() + val spec = PoolCreationSpec.builder().image("ubuntu:22.04").build() + return SandboxPool.builder() + .poolName("test-pool") + .ownerId("test-owner") + .maxIdle(2) + .stateStore(InMemoryPoolStateStore()) + .connectionConfig(config) + .creationSpec(spec) + .drainTimeout(Duration.ofMillis(50)) + .reconcileInterval(Duration.ofSeconds(30)) + .build() + } +}