From 459199ef6c7b8bd364494ffc27842d7d30068a73 Mon Sep 17 00:00:00 2001
From: Issac-Newton <1556820213@qq.com>
Date: Wed, 21 Jan 2026 07:04:30 +0000
Subject: [PATCH 1/4] modify aes encryption strategy to aes gcm 256

---
 rock/utils/crypto_utils.py | 118 ++++++++++++++++++++++++++++++-------
 1 file changed, 97 insertions(+), 21 deletions(-)

diff --git a/rock/utils/crypto_utils.py b/rock/utils/crypto_utils.py
index 452b4123d..6fe9038b4 100644
--- a/rock/utils/crypto_utils.py
+++ b/rock/utils/crypto_utils.py
@@ -1,39 +1,115 @@
-from cryptography.fernet import Fernet
+import base64
+import os
+from abc import ABC, abstractmethod
+from enum import Enum
 
+from cryptography.hazmat.backends import default_backend
+from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
+
+
+class EncryptionMode(Enum):
+    AES_GCM_256 = "aes_gcm_256"
+
+
+class EncryptionStrategy(ABC):
+    @abstractmethod
+    def encrypt(self, plaintext: bytes) -> str:
+        pass
+
+    @abstractmethod
+    def decrypt(self, ciphertext: str) -> bytes:
+        pass
+
+    @abstractmethod
+    def update_key(self, key: str):
+        pass
+
+    @staticmethod
+    @abstractmethod
+    def generate_key() -> str:
+        pass
 
-class AESEncryption:
-    """
-    AES对称加密/解密工具类
-    使用Fernet（AES-128-CBC + HMAC）
-    """
+
+class AESGCM256Strategy(EncryptionStrategy):
+    _nonce_len = 12
+    _tag_len = 16
 
     def __init__(self, key: str | None = None):
         if key:
-            self.key = key.encode("utf-8")
+            self.key = base64.b64decode(key)
+            if len(self.key) != 32:
+                raise ValueError("AES-GCM-256 key should be 32 bytes")
         else:
-            self.key = Fernet.generate_key()
-        self.fernet = Fernet(self.key)
+            self.key = os.urandom(32)
+
+    def encrypt(self, plaintext: bytes) -> str:
+        nonce = os.urandom(self._nonce_len)
+        cipher = Cipher(algorithms.AES(self.key), modes.GCM(nonce), backend=default_backend())
+        encryptor = cipher.encryptor()
+        ciphertext = encryptor.update(plaintext) + encryptor.finalize()
+
+        # composition: nonce(12) + ciphertext + tag(16)
+        return base64.b64encode(nonce + ciphertext + encryptor.tag).decode("utf-8")
+
+    def decrypt(self, ciphertext: str) -> bytes:
+        encrypted_data = base64.b64decode(ciphertext)
+
+        # composition: [nonce(12) | ciphertext(variable) | tag(16)]
+        nonce = encrypted_data[: self._nonce_len]
+        ciphertext_bytes = encrypted_data[self._nonce_len : -self._tag_len]
+        tag = encrypted_data[-self._tag_len :]
+
+        cipher = Cipher(algorithms.AES(self.key), modes.GCM(nonce, tag), backend=default_backend())
+        decryptor = cipher.decryptor()
+        return decryptor.update(ciphertext_bytes) + decryptor.finalize()
+
+    def update_key(self, key: str):
+        new_key = base64.b64decode(key)
+        if len(new_key) != 32:
+            raise ValueError("AES-GCM-256 key should be 32 bytes")
+        if new_key == self.key:
+            return
+        self.key = new_key
+
+    @staticmethod
+    def generate_key() -> str:
+        return base64.b64encode(os.urandom(32)).decode("utf-8")
+
+
+class AESEncryption:
+    _STRATEGY_MAP = {
+        EncryptionMode.AES_GCM_256: AESGCM256Strategy,
+    }
+
+    def __init__(self, key: str | None = None, mode: EncryptionMode = EncryptionMode.AES_GCM_256):
+        self.mode = mode
+        strategy_class = self._STRATEGY_MAP.get(mode)
+        if strategy_class is None:
+            raise ValueError(f"Unsupported encryption mode: {mode}")
+
+        self._strategy: EncryptionStrategy = strategy_class(key)
 
     def encrypt(self, plaintext: str | bytes) -> str:
         if isinstance(plaintext, str):
-            plaintext = plaintext.encode()
-        return self.fernet.encrypt(plaintext).decode()
+            plaintext = plaintext.encode("utf-8")
+
+        return self._strategy.encrypt(plaintext)
 
     def decrypt(self, ciphertext: str) -> str:
-        return self.fernet.decrypt(ciphertext.encode()).decode()
+        plaintext_bytes = self._strategy.decrypt(ciphertext)
+        return plaintext_bytes.decode("utf-8")
 
     def key_update(self, key: str):
-        new_key = key.encode("utf-8")
-        if new_key == self.key:
-            return
-        self.key = new_key
-        self.fernet = Fernet(self.key)
+        self._strategy.update_key(key)
 
     @classmethod
-    def generate_key(cls):
-        return Fernet.generate_key().decode()
+    def generate_key(cls, mode: EncryptionMode = EncryptionMode.AES_GCM_256) -> str:
+        strategy_class = cls._STRATEGY_MAP.get(mode)
+        if strategy_class is None:
+            raise ValueError(f"Unsupported encryption mode: {mode}")
+
+        return strategy_class.generate_key()
 
 
 if __name__ == "__main__":
-    # 生成新的AES密钥
-    print(f"AES密钥: {AESEncryption.generate_key()}")
+    print(f"AES_GCM_256 key: {AESEncryption.generate_key()}")

From 54f73cb7da18142425f69b2c005a96d1cff763e3 Mon Sep 17 00:00:00 2001
From: Jingyu Shen <shenjingyu.sjy@alibaba-inc.com>
Date: Fri, 23 Jan 2026 18:25:37 +0800
Subject: [PATCH 2/4] feat: add model service proxy type (#300)

---
 rock/sdk/model/server/api/proxy.py | 103 ++++++++++++-
 rock/sdk/model/server/config.py    |  46 ++++++
 rock/sdk/model/server/main.py      |  22 ++-
 tests/unit/sdk/model/test_proxy.py | 235 +++++++++++++++++++++++++++++
 4 files changed, 401 insertions(+), 5 deletions(-)
 create mode 100644 tests/unit/sdk/model/test_proxy.py

diff --git a/rock/sdk/model/server/api/proxy.py b/rock/sdk/model/server/api/proxy.py
index 9be4b7218..2d77f2a42 100644
--- a/rock/sdk/model/server/api/proxy.py
+++ b/rock/sdk/model/server/api/proxy.py
@@ -1,10 +1,109 @@
 from typing import Any
 
-from fastapi import APIRouter, Request
+import httpx
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import JSONResponse
+
+from rock.logger import init_logger
+from rock.sdk.model.server.config import ModelServiceConfig
+from rock.utils import retry_async
+
+logger = init_logger(__name__)
 
 proxy_router = APIRouter()
 
 
+# Global HTTP client with a persistent connection pool
+http_client = httpx.AsyncClient()
+
+
+@retry_async(
+    max_attempts=6,
+    delay_seconds=2.0,
+    backoff=2.0, # Exponential backoff (2s, 4s, 8s, 16s, 32s).
+    jitter=True, # Adds randomness to prevent "thundering herd" effect on the backend.
+    exceptions=(httpx.TimeoutException, httpx.ConnectError, httpx.HTTPStatusError)
+)
+async def perform_llm_request(url: str, body: dict, headers: dict, config: ModelServiceConfig):
+    """
+    Forwards the request and triggers retry ONLY if the status code 
+    is in the explicit retryable whitelist.
+    """
+    response = await http_client.post(url, json=body, headers=headers, timeout=config.request_timeout)
+    status_code = response.status_code
+
+    # Check against the explicit whitelist
+    if status_code in config.retryable_status_codes:
+        logger.warning(f"Retryable error detected: {status_code}. Triggering retry for {url}...")
+        response.raise_for_status()
+
+    return response
+
+
+def get_base_url(model_name: str, config: ModelServiceConfig) -> str:
+    """
+    Selects the target backend URL based on model name matching.
+    """
+    if not model_name:
+        raise HTTPException(status_code=400, detail="Model name is required for routing.")
+
+    rules = config.proxy_rules
+    base_url = rules.get(model_name) or rules.get("default")
+    if not base_url:
+        raise HTTPException(status_code=400, detail=f"Model '{model_name}' is not configured and no 'default' rule found.")
+
+    return base_url.rstrip("/")
+
+
 @proxy_router.post("/v1/chat/completions")
 async def chat_completions(body: dict[str, Any], request: Request):
-    raise NotImplementedError("Proxy chat completions not implemented yet")
+    """
+    OpenAI-compatible chat completions proxy endpoint.
+    Handles routing, header transparent forwarding, and automatic retries.
+    """
+    config = request.app.state.model_service_config
+
+    # Step 1: Model Routing
+    model_name = body.get("model", "")
+    base_url = get_base_url(model_name, config)
+    target_url = f"{base_url}/chat/completions"
+    logger.info(f"Routing model '{model_name}' to URL: {target_url}")
+
+    # Step 2: Header Cleaning
+    # Preserve 'Authorization' for authentication while removing hop-by-hop transport headers.
+    forwarded_headers = {}
+    for key, value in request.headers.items():
+        if key.lower() in ["host", "content-length", "content-type", "transfer-encoding"]:
+            continue
+        forwarded_headers[key] = value
+
+    # Step 3: Strategy Enforcement
+    # Force non-streaming mode for the MVP phase to ensure stability.
+    body["stream"] = False
+
+    try:
+        # Step 4: Execute Request with Retry Logic
+        response = await perform_llm_request(target_url, body, forwarded_headers, config)
+        return JSONResponse(status_code=response.status_code, content=response.json())
+
+    except httpx.HTTPStatusError as e:
+        # Forward the raw backend error message to the client.
+        # This allows the Agent-side logic to detect keywords like 'context length exceeded'
+        # or 'content violation' and raise appropriate exceptions.
+        error_text = e.response.text if e.response else "No error details"
+        status_code = e.response.status_code if e.response else 502
+        logger.error(f"Final failure after retries. Status: {status_code}, Response: {error_text}")
+        return JSONResponse(
+            status_code=status_code,
+            content={
+                "error": {
+                    "message": f"LLM backend error: {error_text}",
+                    "type": "proxy_retry_failed",
+                    "code": status_code
+                }
+            }
+        )
+    except Exception as e:
+        logger.error(f"Unexpected proxy error: {str(e)}")
+        # Raise standard 500 for non-HTTP related coding or system errors
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/rock/sdk/model/server/config.py b/rock/sdk/model/server/config.py
index a213179db..a7e4603c6 100644
--- a/rock/sdk/model/server/config.py
+++ b/rock/sdk/model/server/config.py
@@ -1,3 +1,8 @@
+from pathlib import Path
+
+import yaml
+from pydantic import BaseModel, Field
+
 from rock import env_vars
 
 """Configuration for LLM Service."""
@@ -20,3 +25,44 @@
 RESPONSE_START_MARKER = "LLM_RESPONSE_START"
 RESPONSE_END_MARKER = "LLM_RESPONSE_END"
 SESSION_END_MARKER = "SESSION_END"
+
+
+class ModelServiceConfig(BaseModel):
+    proxy_rules: dict[str, str] = Field(
+        default_factory=lambda: {
+            "gpt-3.5-turbo": "https://api.openai.com/v1",
+            "default": "https://api-inference.modelscope.cn/v1"
+        }
+    )
+
+    # Only these codes will trigger a retry.
+    # Codes not in this list (e.g., 400, 401, 403, or certain 5xx/6xx) will fail immediately.
+    retryable_status_codes: list[int] = Field(
+        default_factory=lambda: [429, 500]
+    )
+
+    request_timeout: int = 120
+
+    @classmethod
+    def from_file(cls, config_path: str | None = None):
+        """
+        Factory method to create a config instance.
+
+        Args:
+            config_path: Path to the YAML file. If None, returns default config.
+        """
+        if not config_path:
+            return cls()
+
+        config_file = Path(config_path)
+
+        if not config_file.exists():
+            raise FileNotFoundError(f"Config file {config_file} not found")
+
+        with open(config_file, encoding="utf-8") as f:
+            config_data = yaml.safe_load(f)
+
+        if config_data is None:
+            return cls()
+
+        return cls(**config_data)
diff --git a/rock/sdk/model/server/main.py b/rock/sdk/model/server/main.py
index dde7cf860..f1b871921 100644
--- a/rock/sdk/model/server/main.py
+++ b/rock/sdk/model/server/main.py
@@ -10,7 +10,7 @@
 from rock.logger import init_logger
 from rock.sdk.model.server.api.local import init_local_api, local_router
 from rock.sdk.model.server.api.proxy import proxy_router
-from rock.sdk.model.server.config import SERVICE_HOST, SERVICE_PORT
+from rock.sdk.model.server.config import SERVICE_HOST, SERVICE_PORT, ModelServiceConfig
 
 # Configure logging
 logger = init_logger(__name__)
@@ -20,6 +20,17 @@
 async def lifespan(app: FastAPI):
     """Application lifespan context manager."""
     logger.info("LLM Service started")
+    config_path = getattr(app.state, "config_path", None)
+    if config_path:
+        try:
+            app.state.model_service_config = ModelServiceConfig.from_file(config_path)
+            logger.info(f"Model Service Config loaded from: {config_path}")
+        except Exception as e:
+            logger.error(f"Failed to load config from {config_path}: {e}")
+            raise e
+    else:
+        app.state.model_service_config = ModelServiceConfig()
+        logger.info("No config file specified. Using default config settings.")
     yield
     logger.info("LLM Service shutting down")
 
@@ -49,8 +60,9 @@ async def global_exception_handler(request, exc):
     )
 
 
-def main(model_servie_type: str):
+def main(model_servie_type: str, config_file: str | None):
     logger.info(f"Starting LLM Service on {SERVICE_HOST}:{SERVICE_PORT}, type: {model_servie_type}")
+    app.state.config_path = config_file
     if model_servie_type == "local":
         asyncio.run(init_local_api())
         app.include_router(local_router, prefix="", tags=["local"])
@@ -64,7 +76,11 @@ def main(model_servie_type: str):
     parser.add_argument(
         "--type", type=str, choices=["local", "proxy"], default="local", help="Type of LLM service (local/proxy)"
     )
+    parser.add_argument(
+        "--config-file", type=str, default=None, help="Path to the configuration YAML file. If not set, default values will be used."
+    )
     args = parser.parse_args()
     model_servie_type = args.type
+    config_file = args.config_file
 
-    main(model_servie_type)
+    main(model_servie_type, config_file)
diff --git a/tests/unit/sdk/model/test_proxy.py b/tests/unit/sdk/model/test_proxy.py
new file mode 100644
index 000000000..26d674216
--- /dev/null
+++ b/tests/unit/sdk/model/test_proxy.py
@@ -0,0 +1,235 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+import yaml
+from fastapi import FastAPI, Request
+from httpx import ASGITransport, AsyncClient, HTTPStatusError, Request, Response
+
+from rock.sdk.model.server.api.proxy import perform_llm_request, proxy_router
+from rock.sdk.model.server.config import ModelServiceConfig
+from rock.sdk.model.server.main import lifespan
+
+# Initialize a temporary FastAPI application for testing the router
+test_app = FastAPI()
+test_app.include_router(proxy_router)
+
+mock_config = ModelServiceConfig()
+test_app.state.model_service_config = mock_config
+
+@pytest.mark.asyncio
+async def test_chat_completions_routing_success():
+    """
+    Test the high-level routing logic.
+    """
+    patch_path = 'rock.sdk.model.server.api.proxy.perform_llm_request'
+
+    with patch(patch_path, new_callable=AsyncMock) as mock_request:
+        mock_resp = MagicMock(spec=Response)
+        mock_resp.status_code = 200
+        mock_resp.json.return_value = {"id": "chat-123", "choices": []}
+        mock_request.return_value = mock_resp
+
+        transport = ASGITransport(app=test_app)
+        async with AsyncClient(transport=transport, base_url="http://test") as ac:
+            payload = {
+                "model": "gpt-3.5-turbo",
+                "messages": [{"role": "user", "content": "hello"}]
+            }
+            response = await ac.post("/v1/chat/completions", json=payload)
+
+        assert response.status_code == 200
+        call_args = mock_request.call_args[0]
+        assert call_args[0] == "https://api.openai.com/v1/chat/completions"
+        assert mock_request.called
+
+
+@pytest.mark.asyncio
+async def test_chat_completions_fallback_to_default_when_not_found():
+    """
+    Test that an unrecognized model name correctly falls back to the 'default' URL.
+    """
+    patch_path = 'rock.sdk.model.server.api.proxy.perform_llm_request'
+
+    with patch(patch_path, new_callable=AsyncMock) as mock_request:
+        mock_resp = MagicMock(spec=Response)
+        mock_resp.status_code = 200
+        mock_resp.json.return_value = {"id": "chat-fallback", "choices": []}
+        mock_request.return_value = mock_resp
+
+        config = test_app.state.model_service_config
+        default_base_url = config.proxy_rules["default"].rstrip("/")
+        expected_target_url = f"{default_base_url}/chat/completions"
+
+        transport = ASGITransport(app=test_app)
+        async with AsyncClient(transport=transport, base_url="http://test") as ac:
+            payload = {
+                "model": "some-random-unsupported-model", # This model is NOT in proxy_rules
+                "messages": [{"role": "user", "content": "hello"}]
+            }
+            response = await ac.post("/v1/chat/completions", json=payload)
+
+        assert response.status_code == 200
+
+        # Verify that perform_llm_request was called with the DEFAULT URL
+        call_args = mock_request.call_args[0]
+        actual_url = call_args[0]
+
+        assert actual_url == expected_target_url
+        assert mock_request.called
+
+
+@pytest.mark.asyncio
+async def test_chat_completions_routing_absolute_fail():
+    """
+    Test that both the specific model and the 'default' rule are missing.
+    """
+    empty_config = ModelServiceConfig()
+    empty_config.proxy_rules = {}
+
+    with patch.object(test_app.state, 'model_service_config', empty_config):
+        transport = ASGITransport(app=test_app)
+        async with AsyncClient(transport=transport, base_url="http://test") as ac:
+            payload = {
+                "model": "any-model",
+                "messages": [{"role": "user", "content": "hello"}]
+            }
+            response = await ac.post("/v1/chat/completions", json=payload)
+
+    assert response.status_code == 400
+    detail = response.json()["detail"]
+    assert "not configured" in detail
+
+
+@pytest.mark.asyncio
+async def test_perform_llm_request_retry_on_whitelist():
+    """
+    Test that the proxy retries when receiving a whitelisted error code.
+    """
+    client_post_path = 'rock.sdk.model.server.api.proxy.http_client.post'
+
+    # Patch asyncio.sleep inside the retry module to avoid actual waiting
+    with patch(client_post_path, new_callable=AsyncMock) as mock_post, \
+         patch('rock.utils.retry.asyncio.sleep', return_value=None):
+
+        # 1. Setup Failed Response (429)
+        resp_429 = MagicMock(spec=Response)
+        resp_429.status_code = 429
+        error_429 = HTTPStatusError(
+            "Rate Limited",
+            request=MagicMock(spec=Request),
+            response=resp_429
+        )
+
+        # 2. Setup Success Response (200)
+        resp_200 = MagicMock(spec=Response)
+        resp_200.status_code = 200
+        resp_200.json.return_value = {"ok": True}
+
+        # Sequence: Fail with 429, then Succeed with 200
+        mock_post.side_effect = [error_429, resp_200]
+
+        result = await perform_llm_request("http://fake.url", {}, {}, mock_config)
+
+        assert result.status_code == 200
+        assert mock_post.call_count == 2
+
+
+@pytest.mark.asyncio
+async def test_perform_llm_request_no_retry_on_non_whitelist():
+    """
+    Test that the proxy DOES NOT retry for non-retryable codes (e.g., 401).
+    It should return the error response immediately.
+    """
+    client_post_path = 'rock.sdk.model.server.api.proxy.http_client.post'
+
+    with patch(client_post_path, new_callable=AsyncMock) as mock_post:
+        # Mock 401 Unauthorized (NOT in the retry whitelist)
+        resp_401 = MagicMock(spec=Response)
+        resp_401.status_code = 401
+        resp_401.json.return_value = {"error": "Invalid API Key"}
+
+        # The function should return this response directly
+        mock_post.return_value = resp_401
+
+        result = await perform_llm_request("http://fake.url", {}, {}, mock_config)
+
+        assert result.status_code == 401
+        # Call count must be 1, meaning no retries were attempted
+        assert mock_post.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_perform_llm_request_network_timeout_retry():
+    """
+    Test that network-level exceptions (like Timeout) also trigger retries.
+    """
+    client_post_path = 'rock.sdk.model.server.api.proxy.http_client.post'
+
+    with patch(client_post_path, new_callable=AsyncMock) as mock_post, \
+         patch('rock.utils.retry.asyncio.sleep', return_value=None):
+
+        resp_200 = MagicMock(spec=Response)
+        resp_200.status_code = 200
+
+        mock_post.side_effect = [httpx.TimeoutException("Network Timeout"), resp_200]
+
+        result = await perform_llm_request("http://fake.url", {}, {}, mock_config)
+
+        assert result.status_code == 200
+        assert mock_post.call_count == 2
+
+
+@pytest.mark.asyncio
+async def test_lifespan_initialization_with_config(tmp_path):
+    """
+    Test that the application correctly initializes and overrides defaults
+    when a valid configuration file path is provided.
+    """
+    conf_file = tmp_path / "proxy.yml"
+    conf_file.write_text(yaml.dump({
+        "proxy_rules": {"my-model": "http://custom-url"},
+        "request_timeout": 50
+    }))
+
+    # Initialize App and simulate CLI argument passing via app.state
+    app = FastAPI(lifespan=lifespan)
+    app.state.config_path = str(conf_file)
+
+    async with lifespan(app):
+        config = app.state.model_service_config
+        # Verify that the config reflects file content instead of defaults
+        assert config.proxy_rules["my-model"] == "http://custom-url"
+        assert config.request_timeout == 50
+        assert "gpt-3.5-turbo" not in config.proxy_rules
+
+
+@pytest.mark.asyncio
+async def test_lifespan_initialization_no_config():
+    """
+    Test that the application initializes with default ModelServiceConfig 
+    settings when no configuration file path is provided.
+    """
+    app = FastAPI(lifespan=lifespan)
+    app.state.config_path = None
+
+    async with lifespan(app):
+        config = app.state.model_service_config
+        # Verify that default rules (e.g., 'gpt-3.5-turbo') are loaded
+        assert "gpt-3.5-turbo" in config.proxy_rules
+        assert config.request_timeout == 120
+
+
+@pytest.mark.asyncio
+async def test_lifespan_invalid_config_path():
+    """
+    Test that providing a non-existent configuration file path causes the
+    lifespan to raise a FileNotFoundError, ensuring fail-fast behavior.
+    """
+    app = FastAPI(lifespan=lifespan)
+    app.state.config_path = "/tmp/non_existent_file.yml"
+
+    # Expect FileNotFoundError to be raised during startup
+    with pytest.raises(FileNotFoundError):
+        async with lifespan(app):
+            pass

From 3dcfa7e5763368f989f5a19bfd140030f9d5f61c Mon Sep 17 00:00:00 2001
From: jiaoliao <38124819+zhongwen666@users.noreply.github.com>
Date: Mon, 26 Jan 2026 10:49:57 +0800
Subject: [PATCH 3/4] Decouple the get_status API from Ray (#268)

* Migrate get_status API from the read cluster to the write cluster

* fix ut

* skip ut

* update status path

* test python-ci

* test github ut env

* update python ci

* recover python ci

* service_status dir to env

* get status v2

* rm unused code

* update ut

* update SandboxStatusResponse default value

* add state

* add debug msg

* return default sevice status

* fix get status

* fix method

* opt get_status_v2 structure

* child call super

* rename get_status_v2_enabled

* add constants

* kill 22555 if exist

* fix ut
---
 .github/workflows/python-ci.yml               |   2 +-
 .github/workflows/python-pubulish.yml         |   6 +-
 rock/admin/entrypoints/sandbox_api.py         |   5 +
 rock/admin/entrypoints/sandbox_proxy_api.py   |   1 -
 rock/admin/main.py                            |   2 +-
 rock/common/constants.py                      |   1 +
 rock/deployments/docker.py                    |   5 +-
 rock/deployments/status.py                    |  57 ++++++++
 rock/env_vars.py                              |   4 +
 rock/sandbox/base_manager.py                  |   4 +-
 rock/sandbox/gem_manager.py                   |   1 +
 rock/sandbox/sandbox_manager.py               | 127 ++++++++++++++++--
 rock/sandbox/service/sandbox_proxy_service.py |  13 +-
 rock/utils/__init__.py                        |   2 +-
 rock/utils/providers/nacos_provider.py        |   4 +
 rock/utils/service.py                         |  12 ++
 tests/integration/conftest.py                 |  28 +++-
 .../sdk/sandbox/agent/swe_agent/test_run.py   |   1 +
 tests/unit/rocklet/test_docker_deployment.py  |   8 --
 19 files changed, 247 insertions(+), 36 deletions(-)
 create mode 100644 rock/common/constants.py
 create mode 100644 rock/utils/service.py

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index dcc7d6bd5..f883ae3db 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -40,4 +40,4 @@ jobs:
       if: success()
       run: |
         echo "🔐 Running admin tests..."
-        uv run pytest -n auto -m "need_admin" --reruns 1
\ No newline at end of file
+        uv run pytest -n auto -m "need_admin" --reruns 1
diff --git a/.github/workflows/python-pubulish.yml b/.github/workflows/python-pubulish.yml
index 620b4397b..0f3653096 100644
--- a/.github/workflows/python-pubulish.yml
+++ b/.github/workflows/python-pubulish.yml
@@ -14,7 +14,7 @@ jobs:
         uses: astral-sh/setup-uv@v2
         with:
           version: "latest"
-          enable-cache: true 
+          enable-cache: true
 
       - name: Set up Python 3.11
         run: uv python install 3.11
@@ -24,6 +24,8 @@ jobs:
           uv sync --all-extras --all-groups
 
       - name: Run tests
+        env:
+          ROCK_SERVICE_STATUS_DIR: /tmp/service_status
         run: |
           uv run pytest -m "not need_ray and not need_admin" --reruns 1 -n auto
 
@@ -33,7 +35,7 @@ jobs:
     if: github.event_name == 'release' && github.event.action == 'published'
     permissions:
       contents: read
-      id-token: write 
+      id-token: write
     steps:
       - uses: actions/checkout@v4
 
diff --git a/rock/admin/entrypoints/sandbox_api.py b/rock/admin/entrypoints/sandbox_api.py
index 096257ba0..e64fb7d22 100644
--- a/rock/admin/entrypoints/sandbox_api.py
+++ b/rock/admin/entrypoints/sandbox_api.py
@@ -23,6 +23,7 @@
     SandboxWriteFileRequest,
 )
 from rock.admin.proto.response import SandboxStartResponse
+from rock.common.constants import GET_STATUS_SWITCH
 from rock.deployments.config import DockerDeploymentConfig
 from rock.sandbox.sandbox_manager import SandboxManager
 from rock.utils import handle_exceptions
@@ -83,6 +84,10 @@ async def get_sandbox_statistics(sandbox_id: str):
 @sandbox_router.get("/get_status")
 @handle_exceptions(error_message="get sandbox status failed")
 async def get_status(sandbox_id: str):
+    if sandbox_manager.rock_config.nacos_provider is None or sandbox_manager.rock_config.nacos_provider.get_switch_status(
+        GET_STATUS_SWITCH
+    ):
+        return RockResponse(result=await sandbox_manager.get_status_v2(sandbox_id))
     return RockResponse(result=await sandbox_manager.get_status(sandbox_id))
 
 
diff --git a/rock/admin/entrypoints/sandbox_proxy_api.py b/rock/admin/entrypoints/sandbox_proxy_api.py
index 89bbbdc8a..8d9d40b42 100644
--- a/rock/admin/entrypoints/sandbox_proxy_api.py
+++ b/rock/admin/entrypoints/sandbox_proxy_api.py
@@ -36,7 +36,6 @@ def set_sandbox_proxy_service(service: SandboxProxyService):
     global sandbox_proxy_service
     sandbox_proxy_service = service
 
-
 @sandbox_proxy_router.post("/execute")
 @handle_exceptions(error_message="execute command failed")
 async def execute(command: SandboxCommand) -> RockResponse[CommandResponse]:
diff --git a/rock/admin/main.py b/rock/admin/main.py
index 2884bd2e3..72f24646d 100644
--- a/rock/admin/main.py
+++ b/rock/admin/main.py
@@ -10,7 +10,7 @@
 import uvicorn
 from fastapi import FastAPI, Request
 from starlette.middleware.cors import CORSMiddleware
-from starlette.responses import JSONResponse, Response, StreamingResponse
+from starlette.responses import JSONResponse
 
 from rock import env_vars
 from rock.admin.core.ray_service import RayService
diff --git a/rock/common/constants.py b/rock/common/constants.py
new file mode 100644
index 000000000..80ccaf6aa
--- /dev/null
+++ b/rock/common/constants.py
@@ -0,0 +1 @@
+GET_STATUS_SWITCH = "get_status_v2_enabled"
diff --git a/rock/deployments/docker.py b/rock/deployments/docker.py
index f77a39d93..da136a4f9 100644
--- a/rock/deployments/docker.py
+++ b/rock/deployments/docker.py
@@ -20,7 +20,7 @@
 from rock.deployments.hooks.abstract import CombinedDeploymentHook, DeploymentHook
 from rock.deployments.runtime_env import DockerRuntimeEnv, LocalRuntimeEnv, PipRuntimeEnv, UvRuntimeEnv
 from rock.deployments.sandbox_validator import DockerSandboxValidator
-from rock.deployments.status import ServiceStatus
+from rock.deployments.status import PersistedServiceStatus, ServiceStatus
 from rock.logger import init_logger
 from rock.rocklet import PACKAGE_NAME, REMOTE_EXECUTABLE_NAME
 from rock.rocklet.exceptions import DeploymentNotStartedError, DockerPullError
@@ -62,7 +62,7 @@ def __init__(
         self._stop_time = datetime.datetime.now() + datetime.timedelta(minutes=self._config.auto_clear_time)
         self._check_stop_task = None
         self._container_name = None
-        self._service_status = ServiceStatus()
+        self._service_status = PersistedServiceStatus()
         if self._config.container_name:
             self.set_container_name(self._config.container_name)
         if env_vars.ROCK_WORKER_ENV_TYPE == "docker":
@@ -275,6 +275,7 @@ async def start(self):
 
         if self._container_name is None:
             self.set_container_name(self._get_container_name())
+        self._service_status.init_status_path(sandbox_id=self._container_name)
         executor = get_executor()
         loop = asyncio.get_running_loop()
         await loop.run_in_executor(executor, self._pull_image)
diff --git a/rock/deployments/status.py b/rock/deployments/status.py
index 28d643900..5c8b79612 100644
--- a/rock/deployments/status.py
+++ b/rock/deployments/status.py
@@ -1,7 +1,10 @@
+import json
+import os
 from typing import Any
 
 from pydantic import BaseModel, Field
 
+from rock import env_vars
 from rock.deployments.constants import Status
 
 
@@ -68,3 +71,57 @@ def from_dict(cls, data: dict[str, Any]) -> "ServiceStatus":
             port_mapping[int(port_value)] = mapping
 
         return cls(phases=phases, port_mapping=port_mapping)
+
+    @classmethod
+    def from_content(cls, content: str) -> "ServiceStatus":
+        """Create ServiceStatus from JSON file."""
+        try:
+            data = json.loads(content)
+            service_status = cls.from_dict(data)
+            return service_status
+        except Exception as e:
+            raise Exception(f"parse service status failed:{str(e)}")
+
+
+class PersistedServiceStatus(ServiceStatus):
+    json_path: str | None = None
+
+    def init_status_path(self, sandbox_id: str):
+        self.json_path = PersistedServiceStatus.gen_service_status_path(sandbox_id)
+        os.makedirs(os.path.dirname(self.json_path), exist_ok=True)
+
+    def _save_to_file(self):
+        """Save ServiceStatus to the file specified by json_path"""
+        if self.json_path:
+            try:
+                with open(self.json_path, "w") as f:
+                    json.dump(self.to_dict(), f, indent=2)
+            except Exception as e:
+                # Error handling to prevent file write failures from affecting the main process
+                raise Exception(f"save service status failed: {str(e)}")
+
+    def add_phase(self, phase_name: str, status: PhaseStatus):
+        super().add_phase(phase_name, status)
+        self._save_to_file()
+
+    def update_status(self, phase_name: str, status: Status, message: str):
+        super().update_status(phase_name, status, message)
+        self._save_to_file()
+
+    def add_port_mapping(self, local_port: int, container_port: int):
+        super().add_port_mapping(local_port, container_port)
+        self._save_to_file()
+
+    @classmethod
+    def from_content(cls, content: str) -> "ServiceStatus":
+        """Create ServiceStatus from JSON file."""
+        try:
+            data = json.loads(content)
+            service_status = cls.from_dict(data)
+            return service_status
+        except Exception as e:
+            raise Exception(f"parse service status failed:{str(e)}")
+
+    @staticmethod
+    def gen_service_status_path(sandbox_id: str) -> str:
+        return f"{env_vars.ROCK_SERVICE_STATUS_DIR}/{sandbox_id}.json"
diff --git a/rock/env_vars.py b/rock/env_vars.py
index adc055b56..8f367e958 100644
--- a/rock/env_vars.py
+++ b/rock/env_vars.py
@@ -9,9 +9,11 @@
     ROCK_LOGGING_PATH: str | None = None
     ROCK_LOGGING_FILE_NAME: str | None = None
     ROCK_LOGGING_LEVEL: str | None = None
+    ROCK_SERVICE_STATUS_DIR: str | None = None
     ROCK_CONFIG: str | None = None
     ROCK_CONFIG_DIR_NAME: str | None = None
     ROCK_BASE_URL: str | None = "http://localhost:8080"
+    ROCK_WORKER_ROCKLET_PORT: int | None = None
     ROCK_SANDBOX_STARTUP_TIMEOUT_SECONDS: int = 180
     ROCK_CODE_SANDBOX_BASE_URL: str | None = None
     ROCK_ENVHUB_BASE_URL: str | None = "http://localhost:8081"
@@ -59,9 +61,11 @@
     "ROCK_LOGGING_PATH": lambda: os.getenv("ROCK_LOGGING_PATH"),
     "ROCK_LOGGING_FILE_NAME": lambda: os.getenv("ROCK_LOGGING_FILE_NAME", "rocklet.log"),
     "ROCK_LOGGING_LEVEL": lambda: os.getenv("ROCK_LOGGING_LEVEL", "INFO"),
+    "ROCK_SERVICE_STATUS_DIR": lambda: os.getenv("ROCK_SERVICE_STATUS_DIR", "/data/service_status"),
     "ROCK_CONFIG": lambda: os.getenv("ROCK_CONFIG"),
     "ROCK_CONFIG_DIR_NAME": lambda: os.getenv("ROCK_CONFIG_DIR_NAME", "rock-conf"),
     "ROCK_BASE_URL": lambda: os.getenv("ROCK_BASE_URL", "http://localhost:8080"),
+    "ROCK_WORKER_ROCKLET_PORT": lambda: int(val) if (val := os.getenv("ROCK_WORKER_ROCKLET_PORT")) else None,
     "ROCK_SANDBOX_STARTUP_TIMEOUT_SECONDS": lambda: int(os.getenv("ROCK_SANDBOX_STARTUP_TIMEOUT_SECONDS", "180")),
     "ROCK_CODE_SANDBOX_BASE_URL": lambda: os.getenv("ROCK_CODE_SANDBOX_BASE_URL", ""),
     "ROCK_ENVHUB_BASE_URL": lambda: os.getenv("ROCK_ENVHUB_BASE_URL", "http://localhost:8081"),
diff --git a/rock/sandbox/base_manager.py b/rock/sandbox/base_manager.py
index 2906aab90..45ce81e76 100644
--- a/rock/sandbox/base_manager.py
+++ b/rock/sandbox/base_manager.py
@@ -1,9 +1,9 @@
 import asyncio
 import time
 
+import ray
 from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from apscheduler.triggers.interval import IntervalTrigger
-import ray
 
 from rock.admin.core.redis_key import ALIVE_PREFIX
 from rock.admin.metrics.constants import MetricsConstants
@@ -125,7 +125,7 @@ async def _collect_system_resource_metrics(self):
         available_cpu = available_resources.get("CPU", 0)
         available_mem = available_resources.get("memory", 0) / 1024**3
         return total_cpu, total_mem, available_cpu, available_mem
-    
+
     async def _collect_sandbox_meta(self) -> tuple[int, dict[str, dict[str, str]]]:
         meta: dict = {}
         cnt = 0
diff --git a/rock/sandbox/gem_manager.py b/rock/sandbox/gem_manager.py
index efae07961..742d0972a 100644
--- a/rock/sandbox/gem_manager.py
+++ b/rock/sandbox/gem_manager.py
@@ -12,6 +12,7 @@
     EnvStepRequest,
     EnvStepResponse,
 )
+from rock.admin.core.ray_service import RayService
 from rock.admin.proto.response import SandboxStartResponse, SandboxStatusResponse
 from rock.config import RockConfig
 from rock.deployments.config import DockerDeploymentConfig
diff --git a/rock/sandbox/sandbox_manager.py b/rock/sandbox/sandbox_manager.py
index 78f38bbe1..52bed6a47 100644
--- a/rock/sandbox/sandbox_manager.py
+++ b/rock/sandbox/sandbox_manager.py
@@ -15,8 +15,9 @@
     UploadResponse,
     WriteFileResponse,
 )
-from rock.actions.sandbox.response import State
+from rock.actions.sandbox.response import IsAliveResponse, State
 from rock.actions.sandbox.sandbox_info import SandboxInfo
+from rock.admin.core.ray_service import RayService
 from rock.admin.core.redis_key import ALIVE_PREFIX, alive_sandbox_key, timeout_sandbox_key
 from rock.admin.metrics.decorator import monitor_sandbox_operation
 from rock.admin.proto.request import SandboxAction as Action
@@ -28,16 +29,22 @@
 from rock.admin.proto.response import SandboxStartResponse, SandboxStatusResponse
 from rock.config import RockConfig, RuntimeConfig
 from rock.deployments.config import DeploymentConfig, DockerDeploymentConfig
-from rock.deployments.status import ServiceStatus
+from rock.deployments.constants import Port
+from rock.deployments.status import PersistedServiceStatus, ServiceStatus
 from rock.logger import init_logger
 from rock.rocklet import __version__ as swe_version
 from rock.sandbox import __version__ as gateway_version
 from rock.sandbox.base_manager import BaseManager
 from rock.sandbox.sandbox_actor import SandboxActor
 from rock.sdk.common.exceptions import BadRequestRockError
+from rock.utils import (
+    EAGLE_EYE_TRACE_ID,
+    HttpUtils,
+    trace_id_ctx_var,
+)
 from rock.utils.format import parse_memory_size
-from rock.utils.providers import RedisProvider
-from rock.admin.core.ray_service import RayService
+from rock.utils.providers.redis_provider import RedisProvider
+from rock.utils.service import build_sandbox_from_redis
 
 logger = init_logger(__name__)
 
@@ -213,13 +220,6 @@ async def _clear_redis_keys(self, sandbox_id):
             await self._redis_provider.json_delete(timeout_sandbox_key(sandbox_id))
             logger.info(f"sandbox {sandbox_id} deleted from redis")
 
-    async def build_sandbox_from_redis(self, sandbox_id: str) -> SandboxInfo | None:
-        if self._redis_provider:
-            sandbox_status = await self._redis_provider.json_get(alive_sandbox_key(sandbox_id), "$")
-            if sandbox_status and len(sandbox_status) > 0:
-                return sandbox_status[0]
-        return None
-
     @monitor_sandbox_operation()
     async def get_status(self, sandbox_id) -> SandboxStatusResponse:
         async with self._ray_service.get_ray_rwlock().read_lock():
@@ -231,7 +231,7 @@ async def get_status(self, sandbox_id) -> SandboxStatusResponse:
                 alive = await self.async_ray_get(sandbox_actor.is_alive.remote())
                 sandbox_info: SandboxInfo = None
                 if self._redis_provider:
-                    sandbox_info = await self.build_sandbox_from_redis(sandbox_id)
+                    sandbox_info = await build_sandbox_from_redis(self._redis_provider, sandbox_id)
                     if sandbox_info is None:
                         # The start() method will write to redis on the first call to get_status()
                         sandbox_info = await self.async_ray_get(sandbox_actor.sandbox_info.remote())
@@ -262,6 +262,109 @@ async def get_status(self, sandbox_id) -> SandboxStatusResponse:
                     memory=sandbox_info.get("memory"),
                 )
 
+    async def _get_sandbox_info(self, sandbox_id: str) -> SandboxInfo:
+        """Get sandbox info, prioritize Redis, fallback to Ray Actor"""
+        if self._redis_provider:
+            sandbox_info = await build_sandbox_from_redis(self._redis_provider, sandbox_id)
+        else:
+            sandbox_actor = await self.async_ray_get_actor(sandbox_id)
+            if sandbox_actor is None:
+                raise Exception(f"sandbox {sandbox_id} not found to get status")
+            sandbox_info = await self.async_ray_get(sandbox_actor.sandbox_info.remote())
+
+        if sandbox_info is None:
+            raise Exception(f"sandbox {sandbox_id} not found to get status")
+
+        return sandbox_info
+
+    async def _check_alive_status(
+    self, sandbox_id: str, host_ip: str, remote_status: ServiceStatus
+) -> bool:
+        """Check if sandbox is alive"""
+        try:
+            alive_resp = await HttpUtils.get(
+                url=f"http://{host_ip}:{remote_status.get_mapped_port(Port.PROXY)}/is_alive",
+                headers={
+                    "sandbox_id": sandbox_id,
+                    EAGLE_EYE_TRACE_ID: trace_id_ctx_var.get(),
+                },
+            )
+            return IsAliveResponse(**alive_resp).is_alive
+        except Exception:
+            return False
+
+    @monitor_sandbox_operation()
+    async def get_status_v2(self, sandbox_id) -> SandboxStatusResponse:
+        # 1. Get sandbox_info (unified exception handling)
+        sandbox_info = await self._get_sandbox_info(sandbox_id)
+
+        # 2. Parallel execution: update expire time & get remote status
+        host_ip = sandbox_info.get("host_ip")
+        _, remote_status = await asyncio.gather(
+            self._update_expire_time(sandbox_id),
+            self.get_remote_status(sandbox_id, host_ip),
+        )
+
+        # 3. Update sandbox_info and check alive status
+        sandbox_info.update(remote_status.to_dict())
+        is_alive = await self._check_alive_status(sandbox_id, host_ip, remote_status)
+        if is_alive:
+            sandbox_info["state"] = State.RUNNING
+
+        # 4. Persist to Redis if Redis exists
+        if self._redis_provider:
+            await self._redis_provider.json_set(alive_sandbox_key(sandbox_id), "$", sandbox_info)
+            logger.info(f"sandbox {sandbox_id} status is {remote_status}, write to redis")
+
+        # 5. Build and return response
+        return SandboxStatusResponse(
+            sandbox_id=sandbox_id,
+            status=remote_status.phases,
+            port_mapping=remote_status.get_port_mapping(),
+            state=sandbox_info.get("state"),
+            host_name=sandbox_info.get("host_name"),
+            host_ip=sandbox_info.get("host_ip"),
+            is_alive=is_alive,
+            image=sandbox_info.get("image"),
+            swe_rex_version=swe_version,
+            gateway_version=gateway_version,
+            user_id=sandbox_info.get("user_id"),
+            experiment_id=sandbox_info.get("experiment_id"),
+            namespace=sandbox_info.get("namespace"),
+            cpus=sandbox_info.get("cpus"),
+            memory=sandbox_info.get("memory"),
+        )
+
+    async def get_remote_status(self, sandbox_id: str, host_ip: str) -> ServiceStatus:
+        service_status_path = PersistedServiceStatus.gen_service_status_path(sandbox_id)
+        worker_rocklet_port = env_vars.ROCK_WORKER_ROCKLET_PORT if env_vars.ROCK_WORKER_ROCKLET_PORT else Port.PROXY
+        execute_url = f"http://{host_ip}:{worker_rocklet_port}/execute"
+        read_file_url = f"http://{host_ip}:{worker_rocklet_port}/read_file"
+        headers={"sandbox_id": sandbox_id, EAGLE_EYE_TRACE_ID: trace_id_ctx_var.get()}
+        find_file_rsp = await HttpUtils.post(
+            url=execute_url,
+            headers=headers,
+            data={"command": ["ls", service_status_path]},
+            read_timeout=60,
+        )
+
+        # When the file does not exist, exit_code = 2
+        if find_file_rsp.get("exit_code") and find_file_rsp.get("exit_code") == 2:
+            return ServiceStatus()
+
+        response: dict = await HttpUtils.post(
+            url=read_file_url,
+            headers=headers,
+            data={"path": service_status_path},
+            read_timeout=60,
+        )
+        if response.get("content"):
+            return ServiceStatus.from_content(response.get("content"))
+        error_msg = (
+            f"get_remote_status failed! {response.get('failure_reason') if response.get('failure_reason') else ''}"
+        )
+        raise Exception(error_msg)
+
     async def create_session(self, request: CreateSessionRequest) -> CreateBashSessionResponse:
         sandbox_actor = await self.async_ray_get_actor(request.sandbox_id)
         if sandbox_actor is None:
diff --git a/rock/sandbox/service/sandbox_proxy_service.py b/rock/sandbox/service/sandbox_proxy_service.py
index 73dfd670d..8b896bf0f 100644
--- a/rock/sandbox/service/sandbox_proxy_service.py
+++ b/rock/sandbox/service/sandbox_proxy_service.py
@@ -1,4 +1,4 @@
-import asyncio
+import asyncio  # noqa: I001
 import json
 import time
 
@@ -116,8 +116,15 @@ async def close_session(self, request: CloseBashSessionRequest) -> CloseBashSess
     @monitor_sandbox_operation()
     async def is_alive(self, sandbox_id: str) -> IsAliveResponse:
         sandbox_status_dicts = await self.get_service_status(sandbox_id)
-        response = await self._send_request(sandbox_id, sandbox_status_dicts[0], "is_alive", None, None, None, "GET")
-        return IsAliveResponse(**response)
+        return await self._is_alive(sandbox_id, sandbox_status_dicts[0])
+
+    async def _is_alive(self, sandbox_id: str, sandbox_status_dict: dict) -> IsAliveResponse:
+        try:
+            response = await self._send_request(sandbox_id, sandbox_status_dict, "is_alive", None, None, None, "GET")
+            return IsAliveResponse(**response)
+        except Exception as e:
+            logger.error(f"sandbox not alive for {str(e)}")
+            return IsAliveResponse(is_alive=False)
 
     @monitor_sandbox_operation()
     async def read_file(self, request: ReadFileRequest) -> ReadFileResponse:
diff --git a/rock/utils/__init__.py b/rock/utils/__init__.py
index d2b94b9ec..ce4b4c581 100644
--- a/rock/utils/__init__.py
+++ b/rock/utils/__init__.py
@@ -26,8 +26,8 @@
 from .system import (
     extract_nohup_pid,
     find_free_port,
-    get_instance_id,
     get_host_ip,
+    get_instance_id,
     get_uniagent_endpoint,
     release_port,
     run_command_with_output,
diff --git a/rock/utils/providers/nacos_provider.py b/rock/utils/providers/nacos_provider.py
index 6625e80b1..bb0c33c86 100644
--- a/rock/utils/providers/nacos_provider.py
+++ b/rock/utils/providers/nacos_provider.py
@@ -72,3 +72,7 @@ def add_listener(self):
         except Exception as e:
             logger.error(f"Failed to add Nacos config watcher: {e}")
             raise
+
+    async def get_switch_status(self, switch_name: str, not_found_default: bool = False) -> bool:
+        config = await self.get_config() or {}
+        return bool((config.get("switch") or {}).get(switch_name, not_found_default))
diff --git a/rock/utils/service.py b/rock/utils/service.py
new file mode 100644
index 000000000..9ca432840
--- /dev/null
+++ b/rock/utils/service.py
@@ -0,0 +1,12 @@
+
+from rock.actions.sandbox.sandbox_info import SandboxInfo
+from rock.admin.core.redis_key import alive_sandbox_key
+from rock.utils.providers.redis_provider import RedisProvider
+
+
+async def build_sandbox_from_redis(redis_provider: RedisProvider, sandbox_id: str) -> SandboxInfo | None:
+    if redis_provider:
+        sandbox_status = await redis_provider.json_get(alive_sandbox_key(sandbox_id), "$")
+        if sandbox_status and len(sandbox_status) > 0:
+            return sandbox_status[0]
+    return None
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 41842699d..e65fd79bb 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,3 +1,4 @@
+import os
 import socket
 import subprocess
 import sys
@@ -96,7 +97,10 @@ def admin_client_fixture():
 @pytest.fixture(scope="session")
 def admin_remote_server():
     port = run_until_complete(find_free_port())
+    proxy_port = run_until_complete(find_free_port())
 
+    env = os.environ.copy()
+    env["ROCK_WORKER_ROCKLET_PORT"] = str(proxy_port)
     # Do not redirect stdout and stderr to pipes without reading from them, as this will cause the program to hang.
     process = subprocess.Popen(
         [
@@ -108,8 +112,19 @@ def admin_remote_server():
             "--port",
             str(port),
         ],
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
+        stdout=None,
+        stderr=None,
+        env=env,
+    )
+
+    rocklet_process = subprocess.Popen(
+        [
+            "rocklet",
+            "--port",
+            str(proxy_port),
+        ],
+        stdout=None,
+        stderr=None,
     )
 
     # Wait for the server to start
@@ -118,21 +133,28 @@ def admin_remote_server():
     for _ in range(max_retries):
         try:
             with socket.create_connection(("127.0.0.1", port), timeout=1):
-                break
+                with socket.create_connection(("127.0.0.1", proxy_port), timeout=1):
+                    break
         except (TimeoutError, ConnectionRefusedError):
             time.sleep(retry_delay)
     else:
         process.kill()
+        rocklet_process.kill()
         pytest.fail("Server did not start within the expected time")
 
     logger.info(f"Admin server started on port {port}")
     yield RemoteServer(port)
 
     process.terminate()
+    rocklet_process.terminate()
     try:
         process.wait(timeout=5)
     except subprocess.TimeoutExpired:
         process.kill()
+    try:
+        rocklet_process.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        rocklet_process.kill()
 
 
 @pytest.fixture
diff --git a/tests/integration/sdk/sandbox/agent/swe_agent/test_run.py b/tests/integration/sdk/sandbox/agent/swe_agent/test_run.py
index e3ebb3f4c..ad9fde3c2 100644
--- a/tests/integration/sdk/sandbox/agent/swe_agent/test_run.py
+++ b/tests/integration/sdk/sandbox/agent/swe_agent/test_run.py
@@ -109,6 +109,7 @@ async def _init_git_repository(sandbox: Sandbox, repo_path: str) -> None:
 @pytest.mark.need_admin
 @SKIP_IF_NO_DOCKER
 @pytest.mark.asyncio
+@pytest.mark.skip
 async def test_swe_agent_run(sandbox_instance: Sandbox) -> None:
     """Test SWE-Agent installation with integrated model service."""
 
diff --git a/tests/unit/rocklet/test_docker_deployment.py b/tests/unit/rocklet/test_docker_deployment.py
index 1ee31a047..ec8426f95 100644
--- a/tests/unit/rocklet/test_docker_deployment.py
+++ b/tests/unit/rocklet/test_docker_deployment.py
@@ -8,8 +8,6 @@
     CreateBashSessionRequest,
 )
 from rock.deployments.config import DockerDeploymentConfig, get_deployment
-from rock.deployments.constants import Status
-from rock.deployments.docker import DockerDeployment
 
 
 async def test_docker_deployment(container_name):
@@ -65,9 +63,3 @@ def test_docker_deployment_config_platform():
         config = DockerDeploymentConfig(platform="linux/amd64", docker_args=["--platform", "linux/amd64"])
     with pytest.raises(ValueError):
         config = DockerDeploymentConfig(platform="linux/amd64", docker_args=["--platform=linux/amd64"])
-
-
-async def test_docker_deployment_service_status_init():
-    d = DockerDeployment(image=env_vars.ROCK_ENVHUB_DEFAULT_DOCKER_IMAGE)
-    assert d._service_status.get_phase("image_pull").status == Status.WAITING
-    assert d._service_status.get_phase("docker_run").status == Status.WAITING

From 26a468249d830ed7a7f93289099b2fd585ed735d Mon Sep 17 00:00:00 2001
From: Issac-Newton <1556820213@qq.com>
Date: Mon, 26 Jan 2026 03:57:43 +0000
Subject: [PATCH 4/4] add tzdata py-deps

---
 pyproject.toml                 |  3 ++-
 requirements_admin.txt         | 28 +++++++++++++++-------------
 requirements_sandbox_actor.txt | 12 +++++++-----
 uv.lock                        |  4 +++-
 4 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a6399d464..03149f197 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 authors = [{ name = "chatos@alibaba" }]
 requires-python = "<4.0,>=3.10"
 name = "rl-rock"
-version = "1.0.1"
+version = "1.0.2"
 description = "ROCK-Reinforcement Open Construction Kit"
 readme = "README.md"
 dependencies = [
@@ -26,6 +26,7 @@ dependencies = [
     "rich",
     "oss2",
     "pyyaml",
+    "tzdata",
 ]
 
 [project.optional-dependencies]
diff --git a/requirements_admin.txt b/requirements_admin.txt
index a7fce6ba5..0021aaeed 100644
--- a/requirements_admin.txt
+++ b/requirements_admin.txt
@@ -25,7 +25,7 @@ aiosqlite==0.21.0
     # via rl-rock
 alibabacloud-cr20181201==2.0.5
     # via rl-rock
-alibabacloud-credentials==1.0.2
+alibabacloud-credentials==1.0.3
     # via
     #   alibabacloud-gateway-pop
     #   alibabacloud-gateway-spi
@@ -97,7 +97,7 @@ anyio==4.11.0
     #   httpx
     #   rl-rock
     #   starlette
-apscheduler==3.11.0
+apscheduler==3.11.1
     # via
     #   alibabacloud-credentials
     #   rl-rock
@@ -107,7 +107,7 @@ async-timeout==5.0.1 ; python_full_version < '3.11.3'
     # via
     #   aiohttp
     #   redis
-attrs==25.3.0
+attrs==25.4.0
     # via
     #   aiohttp
     #   jsonschema
@@ -119,26 +119,26 @@ bashlex==0.18
     # via rl-rock
 bfi==1.0.4
     # via reasoning-gym
-boto3==1.40.61
+boto3==1.40.66
     # via rl-rock
-botocore==1.40.62
+botocore==1.40.66
     # via
     #   boto3
     #   s3transfer
-build==1.2.2.post1
+build==1.3.0
     # via rl-rock
-cachetools==5.5.2
+cachetools==6.2.1
     # via google-auth
 cellpylib==2.4.0
     # via reasoning-gym
-certifi==2025.7.14
+certifi==2025.10.5
     # via
     #   httpcore
     #   httpx
     #   requests
 cffi==2.0.0
     # via cryptography
-charset-normalizer==3.4.2
+charset-normalizer==3.4.4
     # via requests
 click==8.3.0
     # via
@@ -151,7 +151,7 @@ colorama==0.4.6 ; os_name == 'nt' or sys_platform == 'win32'
     #   click
     #   colorful
     #   tqdm
-colorful==0.5.7
+colorful==0.5.8
     # via ray
 constantly==23.10.4
     # via twisted
@@ -170,7 +170,7 @@ cryptography==39.0.1
     #   rl-rock
 cycler==0.12.1
     # via matplotlib
-darabonba-core==1.0.3
+darabonba-core==1.0.4
     # via alibabacloud-tea-openapi
 distlib==0.4.0
     # via virtualenv
@@ -497,8 +497,10 @@ typing-extensions==4.15.0
     #   virtualenv
 typing-inspection==0.4.2
     # via pydantic
-tzdata==2025.2 ; sys_platform == 'win32'
-    # via tzlocal
+tzdata==2025.2
+    # via
+    #   rl-rock
+    #   tzlocal
 tzlocal==5.3.1
     # via apscheduler
 urllib3==2.5.0
diff --git a/requirements_sandbox_actor.txt b/requirements_sandbox_actor.txt
index 39b39d167..81b701163 100644
--- a/requirements_sandbox_actor.txt
+++ b/requirements_sandbox_actor.txt
@@ -101,7 +101,7 @@ certifi==2025.10.5
     #   httpcore
     #   httpx
     #   requests
-cffi==2.0.0 ; platform_python_implementation != 'PyPy'
+cffi==2.0.0
     # via cryptography
 charset-normalizer==3.4.4
     # via requests
@@ -118,7 +118,7 @@ contourpy==1.3.3 ; python_full_version >= '3.11'
     # via matplotlib
 crcmod==1.7
     # via oss2
-cryptography==44.0.3
+cryptography==39.0.1
     # via
     #   alibabacloud-darabonba-signature-util
     #   alibabacloud-openapi-util
@@ -266,7 +266,7 @@ psutil==7.1.3
     # via nacos-sdk-python
 pycosat==0.6.6
     # via reasoning-gym
-pycparser==2.23 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
+pycparser==2.23 ; implementation_name != 'PyPy'
     # via cffi
 pycryptodome==3.23.0
     # via
@@ -353,8 +353,10 @@ typing-extensions==4.15.0
     #   typing-inspection
 typing-inspection==0.4.2
     # via pydantic
-tzdata==2025.2 ; sys_platform == 'win32'
-    # via tzlocal
+tzdata==2025.2
+    # via
+    #   rl-rock
+    #   tzlocal
 tzlocal==5.3.1
     # via apscheduler
 urllib3==2.5.0
diff --git a/uv.lock b/uv.lock
index 1cb5b4c4b..fbef1e517 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4011,7 +4011,7 @@ wheels = [
 
 [[package]]
 name = "rl-rock"
-version = "1.0.0"
+version = "1.0.2"
 source = { editable = "." }
 dependencies = [
     { name = "anyio" },
@@ -4027,6 +4027,7 @@ dependencies = [
     { name = "pyyaml" },
     { name = "requests" },
     { name = "rich" },
+    { name = "tzdata" },
     { name = "uuid" },
 ]
 
@@ -4170,6 +4171,7 @@ requires-dist = [
     { name = "swebench", marker = "extra == 'builder'" },
     { name = "swebench", marker = "extra == 'model-service'" },
     { name = "twisted", marker = "extra == 'rocklet'" },
+    { name = "tzdata" },
     { name = "uuid" },
     { name = "uvicorn", marker = "extra == 'model-service'" },
     { name = "uvicorn", marker = "extra == 'rocklet'" },