From 310bce1613a5fe544fc285d1f10042ff71e675f1 Mon Sep 17 00:00:00 2001 From: akseljoonas Date: Tue, 28 Apr 2026 21:30:44 +0300 Subject: [PATCH] Let agents use user-authenticated gh and hf CLIs The sandbox already exposes bash as the escape hatch for live repository and Hub operations, so this change teaches the model when to use gh/hf there and installs the CLIs in the generated sandbox image. GitHub credentials stay user-owned: HF_TOKEN is injected from the user's HF session, while GH_TOKEN/GITHUB_TOKEN are only added when the user explicitly supplies a GitHub token to sandbox_create. Constraint: Issue #167 reports that models cannot access GitHub reliably through the current guidance. Rejected: Reusing a server or maintainer GitHub token | GitHub access must be scoped to the user's own account. Confidence: high Scope-risk: narrow Directive: Do not broaden GitHub token propagation without a user-owned auth path. Tested: UV_CACHE_DIR=/tmp/uv-cache uv run --extra dev pytest tests/unit/test_sandbox_cli_support.py tests/unit/test_sandbox_api_auth.py tests/unit/test_sandbox_already_active_message.py Tested: Live cpu-basic sandbox akseljoonas/ml-intern-cli-live-f54f2f70 verified gh path/version, hf path/version, hf auth whoami, HF_TOKEN present, then deleted. Not-tested: Authenticated GitHub CLI operations inside a sandbox with a real user GitHub token; no user GitHub token was supplied for live testing. --- agent/context_manager/manager.py | 7 ++++- agent/prompts/system_prompt_v3.yaml | 7 +++++ agent/tools/local_tools.py | 5 ++++ agent/tools/sandbox_client.py | 10 +++++-- agent/tools/sandbox_tool.py | 15 ++++++++++ tests/unit/test_sandbox_cli_support.py | 39 ++++++++++++++++++++++++++ 6 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 tests/unit/test_sandbox_cli_support.py diff --git a/agent/context_manager/manager.py b/agent/context_manager/manager.py index c842c884..0198a13b 100644 --- a/agent/context_manager/manager.py +++ b/agent/context_manager/manager.py @@ -204,7 +204,12 @@ def _load_system_prompt( f"Working directory: {cwd}\n" f"Use absolute paths or paths relative to the working directory. " f"Do NOT use /app/ paths — that is a sandbox convention that does not apply here.\n" - f"The sandbox_create tool is NOT available. Run code directly with bash." + f"The sandbox_create tool is NOT available. Run code directly with bash.\n" + f"The gh and hf CLIs may be installed and authenticated on this machine. " + f"Use them through bash for live GitHub and Hugging Face operations. " + f"If authentication is missing, ask the user to authenticate their own " + f"account with gh auth login / hf auth login or set their own token; " + f"never ask for or use a maintainer/developer GitHub PAT." ) static_prompt += local_context diff --git a/agent/prompts/system_prompt_v3.yaml b/agent/prompts/system_prompt_v3.yaml index cb63c901..85ba7bd0 100644 --- a/agent/prompts/system_prompt_v3.yaml +++ b/agent/prompts/system_prompt_v3.yaml @@ -127,6 +127,13 @@ system_prompt: | Use GPU sandbox (t4-small minimum) when testing code that uses CUDA, bf16, or model loading. CPU sandboxes cannot test GPU code paths. + # GitHub and Hugging Face CLIs + + Use the `gh` and `hf` CLIs through bash when they are the most direct way to inspect repos, issues, PRs, releases, or Hub state. Prefer dedicated docs/research tools for API documentation and code examples; use the CLIs for live repository/HF Hub operations. + + In sandboxes, `gh` and `hf` are preinstalled. The sandbox receives the user's HF_TOKEN automatically, so `hf` can operate as that user. GitHub auth is available only if the user supplied their own GitHub token to sandbox_create, which is exposed as GH_TOKEN/GITHUB_TOKEN for `gh`. If GitHub auth is missing and private access or higher rate limits are required, ask the user to authenticate or provide their own GitHub token. Never ask for, use, or imply access to a maintainer/developer GitHub PAT. + + In CLI/local mode, bash runs on the user's machine. Use `gh ...` and `hf ...` directly if installed and authenticated there. If auth is missing, ask the user to run `gh auth login` / `hf auth login` or provide their own token in their local environment. # When a task has 3+ steps diff --git a/agent/tools/local_tools.py b/agent/tools/local_tools.py index fc456f68..9bd26bc7 100644 --- a/agent/tools/local_tools.py +++ b/agent/tools/local_tools.py @@ -255,6 +255,11 @@ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]: " kill -0 2>/dev/null && echo 'running' || echo 'done'\n" " tail -n 50 /tmp/output.log\n" "\n" + "Use the gh and hf CLIs through bash for live GitHub and Hugging Face " + "operations when they are installed/authenticated on the user's machine. " + "If auth is missing, ask the user to authenticate their own account or " + "set their own token; never ask for or use a maintainer/developer GitHub PAT.\n" + "\n" "Timeout default 120s, max 36000s." ), "parameters": { diff --git a/agent/tools/sandbox_client.py b/agent/tools/sandbox_client.py index 967d946c..5d10dfd2 100644 --- a/agent/tools/sandbox_client.py +++ b/agent/tools/sandbox_client.py @@ -71,12 +71,12 @@ RUN apt-get update && \\ apt-get install -y \\ - bash git git-lfs wget curl procps \\ + bash git git-lfs gh wget curl procps \\ htop vim nano jq tmux \\ build-essential && \\ rm -rf /var/lib/apt/lists/* -RUN uv pip install --system fastapi uvicorn python-multipart +RUN uv pip install --system fastapi uvicorn python-multipart "huggingface_hub[cli]" RUN useradd -m -u 1000 user USER user @@ -920,6 +920,12 @@ def kill_all(self) -> ToolResult: " kill -0 2>/dev/null && echo 'running' || echo 'done'\n" " tail -n 50 /app/output.log\n" "\n" + "The gh and hf CLIs are preinstalled. Use them through bash for GitHub " + "and Hugging Face operations that are not covered by dedicated tools. " + "HF_TOKEN is available as the user's HF token. GH_TOKEN/GITHUB_TOKEN " + "are available only when the user supplied their own GitHub token to " + "sandbox_create.\n" + "\n" "Timeout default 240s, max 1200s." ), "parameters": { diff --git a/agent/tools/sandbox_tool.py b/agent/tools/sandbox_tool.py index a5c26aca..7013cb88 100644 --- a/agent/tools/sandbox_tool.py +++ b/agent/tools/sandbox_tool.py @@ -306,6 +306,9 @@ async def _watch_cancel(): "If you intend to run a training script in this sandbox that uses report_to='trackio', " "pass `trackio_space_id` (e.g. '/mlintern-<8char>') and `trackio_project` so they " "are set as TRACKIO_SPACE_ID/TRACKIO_PROJECT secrets in the sandbox and the UI can embed the live dashboard.\n\n" + "The sandbox has the `gh` and `hf` CLIs preinstalled. HF_TOKEN is injected automatically from the user's " + "Hugging Face session. To access private GitHub repos or higher GitHub API rate limits, pass `github_token` " + "with the user's own GitHub token; never use a maintainer/developer PAT.\n\n" "Hardware: " + ", ".join([e.value for e in SpaceHardware]) + ".\n" ), "parameters": { @@ -339,6 +342,14 @@ async def _watch_cancel(): "used by the UI to filter the embedded dashboard to this project." ), }, + "github_token": { + "type": "string", + "description": ( + "Optional. The user's own GitHub token for sandbox `gh` CLI and GitHub API access. " + "Injected as GH_TOKEN and GITHUB_TOKEN. Ask the user for their own token when needed; " + "do not use a maintainer or developer PAT." + ), + }, }, }, } @@ -351,6 +362,7 @@ async def sandbox_create_handler( hardware = args.get("hardware", "cpu-basic") trackio_space_id = args.get("trackio_space_id") or None trackio_project = args.get("trackio_project") or None + github_token = args.get("github_token") or None async def _emit_trackio_state(sb: Sandbox) -> None: """Tell the frontend which trackio dashboard to embed for this sandbox.""" @@ -395,6 +407,9 @@ async def _emit_trackio_state(sb: Sandbox) -> None: await _seed_trackio_dashboard_safe(session, trackio_space_id) if trackio_project: extra_secrets["TRACKIO_PROJECT"] = trackio_project + if github_token: + extra_secrets["GH_TOKEN"] = github_token + extra_secrets["GITHUB_TOKEN"] = github_token try: sb, error = await _ensure_sandbox( diff --git a/tests/unit/test_sandbox_cli_support.py b/tests/unit/test_sandbox_cli_support.py new file mode 100644 index 00000000..e53275db --- /dev/null +++ b/tests/unit/test_sandbox_cli_support.py @@ -0,0 +1,39 @@ +import asyncio +from types import SimpleNamespace + +import agent.tools.sandbox_tool as sandbox_tool +from agent.tools.sandbox_client import _DOCKERFILE, Sandbox + + +def test_sandbox_image_installs_gh_and_hf_clis(): + assert "git-lfs gh wget" in _DOCKERFILE + assert '"huggingface_hub[cli]"' in _DOCKERFILE + assert "gh and hf CLIs are preinstalled" in Sandbox.TOOLS["bash"]["description"] + + +def test_sandbox_create_forwards_user_github_token(monkeypatch): + captured = {} + + async def fake_ensure_sandbox(session, **kwargs): + captured.update(kwargs) + return ( + SimpleNamespace( + space_id="user/sandbox-abc123", + url="https://huggingface.co/spaces/user/sandbox-abc123", + ), + None, + ) + + monkeypatch.setattr(sandbox_tool, "_ensure_sandbox", fake_ensure_sandbox) + + session = SimpleNamespace(sandbox=None, hf_token="hf-token") + out, ok = asyncio.run( + sandbox_tool.sandbox_create_handler( + {"github_token": "github_pat_user_owned"}, session=session + ) + ) + + assert ok is True + assert "github_pat_user_owned" not in out + assert captured["extra_secrets"]["GH_TOKEN"] == "github_pat_user_owned" + assert captured["extra_secrets"]["GITHUB_TOKEN"] == "github_pat_user_owned"