Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion agent/context_manager/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,12 @@ def _load_system_prompt(
f"Working directory: {cwd}\n"
f"Use absolute paths or paths relative to the working directory. "
f"Do NOT use /app/ paths — that is a sandbox convention that does not apply here.\n"
f"The sandbox_create tool is NOT available. Run code directly with bash."
f"The sandbox_create tool is NOT available. Run code directly with bash.\n"
f"The gh and hf CLIs may be installed and authenticated on this machine. "
f"Use them through bash for live GitHub and Hugging Face operations. "
f"If authentication is missing, ask the user to authenticate their own "
f"account with gh auth login / hf auth login or set their own token; "
f"never ask for or use a maintainer/developer GitHub PAT."
)
static_prompt += local_context

Expand Down
7 changes: 7 additions & 0 deletions agent/prompts/system_prompt_v3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ system_prompt: |

Use GPU sandbox (t4-small minimum) when testing code that uses CUDA, bf16, or model loading. CPU sandboxes cannot test GPU code paths.

# GitHub and Hugging Face CLIs

Use the `gh` and `hf` CLIs through bash when they are the most direct way to inspect repos, issues, PRs, releases, or Hub state. Prefer dedicated docs/research tools for API documentation and code examples; use the CLIs for live repository/HF Hub operations.

In sandboxes, `gh` and `hf` are preinstalled. The sandbox receives the user's HF_TOKEN automatically, so `hf` can operate as that user. GitHub auth is available only if the user supplied their own GitHub token to sandbox_create, which is exposed as GH_TOKEN/GITHUB_TOKEN for `gh`. If GitHub auth is missing and private access or higher rate limits are required, ask the user to authenticate or provide their own GitHub token. Never ask for, use, or imply access to a maintainer/developer GitHub PAT.

In CLI/local mode, bash runs on the user's machine. Use `gh ...` and `hf ...` directly if installed and authenticated there. If auth is missing, ask the user to run `gh auth login` / `hf auth login` or provide their own token in their local environment.

# When a task has 3+ steps

Expand Down
5 changes: 5 additions & 0 deletions agent/tools/local_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
" kill -0 <PID> 2>/dev/null && echo 'running' || echo 'done'\n"
" tail -n 50 /tmp/output.log\n"
"\n"
"Use the gh and hf CLIs through bash for live GitHub and Hugging Face "
"operations when they are installed/authenticated on the user's machine. "
"If auth is missing, ask the user to authenticate their own account or "
"set their own token; never ask for or use a maintainer/developer GitHub PAT.\n"
"\n"
"Timeout default 120s, max 36000s."
),
"parameters": {
Expand Down
10 changes: 8 additions & 2 deletions agent/tools/sandbox_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@

RUN apt-get update && \\
apt-get install -y \\
bash git git-lfs wget curl procps \\
bash git git-lfs gh wget curl procps \\
htop vim nano jq tmux \\
build-essential && \\
rm -rf /var/lib/apt/lists/*

RUN uv pip install --system fastapi uvicorn python-multipart
RUN uv pip install --system fastapi uvicorn python-multipart "huggingface_hub[cli]"

RUN useradd -m -u 1000 user
USER user
Expand Down Expand Up @@ -920,6 +920,12 @@ def kill_all(self) -> ToolResult:
" kill -0 <PID> 2>/dev/null && echo 'running' || echo 'done'\n"
" tail -n 50 /app/output.log\n"
"\n"
"The gh and hf CLIs are preinstalled. Use them through bash for GitHub "
"and Hugging Face operations that are not covered by dedicated tools. "
"HF_TOKEN is available as the user's HF token. GH_TOKEN/GITHUB_TOKEN "
"are available only when the user supplied their own GitHub token to "
"sandbox_create.\n"
"\n"
"Timeout default 240s, max 1200s."
),
"parameters": {
Expand Down
15 changes: 15 additions & 0 deletions agent/tools/sandbox_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,9 @@ async def _watch_cancel():
"If you intend to run a training script in this sandbox that uses report_to='trackio', "
"pass `trackio_space_id` (e.g. '<username>/mlintern-<8char>') and `trackio_project` so they "
"are set as TRACKIO_SPACE_ID/TRACKIO_PROJECT secrets in the sandbox and the UI can embed the live dashboard.\n\n"
"The sandbox has the `gh` and `hf` CLIs preinstalled. HF_TOKEN is injected automatically from the user's "
"Hugging Face session. To access private GitHub repos or higher GitHub API rate limits, pass `github_token` "
"with the user's own GitHub token; never use a maintainer/developer PAT.\n\n"
"Hardware: " + ", ".join([e.value for e in SpaceHardware]) + ".\n"
),
"parameters": {
Expand Down Expand Up @@ -339,6 +342,14 @@ async def _watch_cancel():
"used by the UI to filter the embedded dashboard to this project."
),
},
"github_token": {
"type": "string",
"description": (
"Optional. The user's own GitHub token for sandbox `gh` CLI and GitHub API access. "
"Injected as GH_TOKEN and GITHUB_TOKEN. Ask the user for their own token when needed; "
"do not use a maintainer or developer PAT."
),
},
},
},
}
Expand All @@ -351,6 +362,7 @@ async def sandbox_create_handler(
hardware = args.get("hardware", "cpu-basic")
trackio_space_id = args.get("trackio_space_id") or None
trackio_project = args.get("trackio_project") or None
github_token = args.get("github_token") or None

async def _emit_trackio_state(sb: Sandbox) -> None:
"""Tell the frontend which trackio dashboard to embed for this sandbox."""
Expand Down Expand Up @@ -395,6 +407,9 @@ async def _emit_trackio_state(sb: Sandbox) -> None:
await _seed_trackio_dashboard_safe(session, trackio_space_id)
if trackio_project:
extra_secrets["TRACKIO_PROJECT"] = trackio_project
if github_token:
extra_secrets["GH_TOKEN"] = github_token
extra_secrets["GITHUB_TOKEN"] = github_token

try:
sb, error = await _ensure_sandbox(
Expand Down
39 changes: 39 additions & 0 deletions tests/unit/test_sandbox_cli_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import asyncio
from types import SimpleNamespace

import agent.tools.sandbox_tool as sandbox_tool
from agent.tools.sandbox_client import _DOCKERFILE, Sandbox


def test_sandbox_image_installs_gh_and_hf_clis():
assert "git-lfs gh wget" in _DOCKERFILE
assert '"huggingface_hub[cli]"' in _DOCKERFILE
assert "gh and hf CLIs are preinstalled" in Sandbox.TOOLS["bash"]["description"]


def test_sandbox_create_forwards_user_github_token(monkeypatch):
captured = {}

async def fake_ensure_sandbox(session, **kwargs):
captured.update(kwargs)
return (
SimpleNamespace(
space_id="user/sandbox-abc123",
url="https://huggingface.co/spaces/user/sandbox-abc123",
),
None,
)

monkeypatch.setattr(sandbox_tool, "_ensure_sandbox", fake_ensure_sandbox)

session = SimpleNamespace(sandbox=None, hf_token="hf-token")
out, ok = asyncio.run(
sandbox_tool.sandbox_create_handler(
{"github_token": "github_pat_user_owned"}, session=session
)
)

assert ok is True
assert "github_pat_user_owned" not in out
assert captured["extra_secrets"]["GH_TOKEN"] == "github_pat_user_owned"
assert captured["extra_secrets"]["GITHUB_TOKEN"] == "github_pat_user_owned"
Loading