benchflow-ai · xdotli · Apr 25, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/docs/api-reference.md b/docs/api-reference.md
@@ -125,6 +125,10 @@ Trial.run()
   ├─ setup()          — resolve config, create env object
   ├─ start()          — spin up sandbox, upload task files, start services
   ├─ install_agent()  — install agent binary, credentials, sandbox user
+  │                    (sandbox user setup: create non-root user, prepare
+  │                     small config/auth dirs, chown the workspace — no
+  │                     recursive copy of /root tool trees; agent binaries
+  │                     must live on shared prefixes like /usr/local/bin)
   ├─ for scene in scenes:
   │    └─ _run_scene(scene)
   │         ├─ setup /app/.outbox/ — (multi-role scenes only)

diff --git a/docs/task-authoring.md b/docs/task-authoring.md
@@ -50,6 +50,8 @@ env             = { OPENAI_API_KEY = "${OPENAI_API_KEY}" }  # host vars to injec
 
 **Built-in mock services** — if the Dockerfile references a service binary (`claw-gmail`, `claw-slack`, `claw-gcal`, `claw-gdoc`, `claw-gdrive`), BenchFlow starts it automatically. No `[services]` section needed.
 
+**Install tooling to shared prefixes, not `/root`** — when a task image ships Node.js, Python tools, or agent binaries that the sandbox user must execute, install them to `/usr/local/bin`, `/usr/local/lib`, or `/opt`, not `/root/.nvm` or `/root/.local/bin`. `setup_sandbox_user()` creates the non-root user, prepares small config/auth dirs, and chowns the workspace — it does not clone `/root` into the sandbox home. Legacy images that already install tools under `/root` still work via a narrow symlink fallback, but shared prefixes are the supported path. Pre-creating the sandbox user in the Dockerfile is an optional speedup, not a requirement.
+
 ---
 
 ## instruction.md

diff --git a/src/benchflow/_agent_setup.py b/src/benchflow/_agent_setup.py
@@ -30,6 +30,47 @@
 logger = logging.getLogger(__name__)
 
 
+def _skill_link_cmd(source: str, dest: str) -> str:
+    """Link a shared skills tree into an agent discovery path."""
+    if source == dest:
+        return f"mkdir -p {shlex.quote(dest)}"
+
+    parent = shlex.quote(str(Path(dest).parent))
+    q_source = shlex.quote(source)
+    q_dest = shlex.quote(dest)
+    return (
+        f"mkdir -p {parent} && "
+        f"rm -rf {q_dest} && "
+        f"ln -sfn {q_source} {q_dest}"
+    )
+
+
+async def _link_skill_paths(env, source: str, skill_paths: list[str], home: str, cwd: str) -> int:
+    """Link one shared skills tree into each configured discovery path."""
+    parts = []
+    for sp in skill_paths:
+        expanded = sp.replace("$HOME", home).replace("$WORKSPACE", cwd)
+        parts.append(_skill_link_cmd(source, expanded))
+    if parts:
+        cmd = " && ".join(parts)
+        result = await env.exec(cmd, timeout_sec=15)
+        if result.return_code != 0:
+            stdout = (getattr(result, "stdout", "") or "").strip()
+            stderr = (getattr(result, "stderr", "") or "").strip()
+            details = [
+                f"exit code {result.return_code}",
+                f"command: {cmd}",
+            ]
+            if stdout:
+                details.append(f"stdout: {stdout}")
+            if stderr:
+                details.append(f"stderr: {stderr}")
+            raise RuntimeError(
+                f"Failed to link skills from {source}: {'; '.join(details)}"
+            )
+    return len(parts)
+
+
 async def install_agent(env, agent: str, trial_dir: Path) -> AgentConfig | None:
     """Install agent in sandbox and return its config."""
     agent_base = agent.split()[0]
@@ -86,6 +127,9 @@ async def deploy_skills(
     task: "Task",
 ) -> None:
     """Deploy and distribute skills into sandbox."""
+    task_skills_dir = task.config.environment.skills_dir
+    effective_skills = task_skills_dir
+
     # Runtime upload (fallback if not baked into Dockerfile)
     if skills_dir:
         dockerfile = task_path / "environment" / "Dockerfile"
@@ -98,38 +142,24 @@ async def deploy_skills(
             if skills_path.is_dir():
                 logger.info(f"Deploying skills via runtime upload from {skills_path}")
                 await env.upload_dir(skills_path, "/skills")
-                if agent_cfg and agent_cfg.skill_paths:
-                    parts = []
-                    for sp in agent_cfg.skill_paths:
-                        expanded = sp.replace("$HOME", "/root").replace(
-                            "$WORKSPACE", "/app"
-                        )
-                        parent = str(Path(expanded).parent)
-                        parts.append(
-                            f"mkdir -p '{parent}' && ln -sf /skills '{expanded}'"
-                        )
-                    await env.exec(" && ".join(parts), timeout_sec=10)
-                logger.info("Skills deployed to /skills and symlinked")
+                logger.info("Skills deployed to /skills")
+                effective_skills = "/skills"
             else:
                 logger.warning(f"Skills dir not found: {skills_path}")
         else:
             logger.info("Skills already injected via Dockerfile")
-        else:
-            logger.info("Skills already injected via Dockerfile")
+        else:
+            logger.info("Skills already injected via Dockerfile")
+            effective_skills = "/skills"
-        else:
-            logger.info("Skills already injected via Dockerfile")
+        else:
+            logger.info("Skills already injected via Dockerfile")
+            effective_skills = "/skills"
 
     # Distribute to agent-specific discovery paths
-    task_skills_dir = task.config.environment.skills_dir
-    effective_skills = "/skills" if skills_dir else task_skills_dir
     if effective_skills and agent_cfg and agent_cfg.skill_paths:
         home = f"/home/{sandbox_user}" if sandbox_user else "/root"
-        parts = []
-        for sp in agent_cfg.skill_paths:
-            expanded = sp.replace("$HOME", home).replace("$WORKSPACE", agent_cwd)
-            q_expanded = shlex.quote(expanded)
-            q_skills = shlex.quote(effective_skills)
-            parts.append(
-                f"mkdir -p {q_expanded} && cp -r {q_skills}/. {q_expanded}/ 2>/dev/null"
-            )
-        if parts:
-            await env.exec("; ".join(parts), timeout_sec=15)
+        count = await _link_skill_paths(
+            env,
+            effective_skills,
+            agent_cfg.skill_paths,
+            home,
+            agent_cwd,
+        )
+        if count:
             logger.info(
-                f"Skills distributed to {len(parts)} paths for {agent_cfg.name}"
+                f"Skills distributed to {count} paths for {agent_cfg.name}"
             )
diff --git a/src/benchflow/_sandbox.py b/src/benchflow/_sandbox.py
@@ -1,7 +1,7 @@
 """Sandbox user setup, path lockdown, and verifier hardening.
 
 Owns the "agent runs as non-root" lifecycle:
-    - Creating the sandbox user and copying root's tooling into its home
+    - Creating the sandbox user and preparing minimal home state it needs
     - Building the privilege-drop wrapper (setpriv / su) for agent launch
     - Locking down solution/test paths so the sandbox user cannot read them
     - Hardening the environment before the verifier runs
@@ -98,6 +98,20 @@ def build_priv_drop_cmd(agent_launch: str, sandbox_user: str) -> str:
     )
 
 
+def _legacy_root_tool_link_cmd(source: str, dest: str) -> str:
+    """Link legacy root-only tool dirs into the sandbox home when needed."""
+    src = shlex.quote(source)
+    dst = shlex.quote(dest)
+    parent = shlex.quote(str(Path(dest).parent))
+    return (
+        f"if [ -e {src} ] && [ ! -L {dst} ]; then "
+        f"mkdir -p {parent} && "
+        f"rmdir {dst} 2>/dev/null || true; "
+        f"[ -e {dst} ] || ln -s {src} {dst}; "
+        "fi"
+    )
+
+
 async def setup_sandbox_user(
     env, sandbox_user: str, workspace: str, *, timeout_sec: int = 120
 ) -> str:
@@ -107,18 +121,17 @@ async def setup_sandbox_user(
             f"Invalid sandbox_user: {sandbox_user!r} (must be alphanumeric)"
         )
     logger.info(f"Setting up sandbox user: {sandbox_user}")
+    home = f"/home/{sandbox_user}"
+    home_dirs = sorted(d for d in get_sandbox_home_dirs() if d != ".local")
     await env.exec(
         f"id -u {sandbox_user} >/dev/null 2>&1 || "
         f"useradd -m -s /bin/bash {sandbox_user} && "
-        f"mkdir -p /home/{sandbox_user}/.local/bin && "
-        "if [ -d /root/.local/bin ]; then "
-        f"cp -aL /root/.local/bin/. /home/{sandbox_user}/.local/bin/ 2>/dev/null || true; fi && "
-        "if [ -d /root/.nvm ]; then "
-        f"cp -a /root/.nvm/. /home/{sandbox_user}/.nvm/ 2>/dev/null || true; fi && "
-        f"for d in {' '.join(sorted(get_sandbox_home_dirs()))}; do "
-        f"if [ -d /root/$d ]; then mkdir -p /home/{sandbox_user}/$d && "
-        f"cp -a /root/$d/. /home/{sandbox_user}/$d/ 2>/dev/null || true; fi; done && "
-        f"chown -R {sandbox_user}:{sandbox_user} /home/{sandbox_user} && "
+        f"{_legacy_root_tool_link_cmd('/root/.local/bin', f'{home}/.local/bin')} && "
+        f"{_legacy_root_tool_link_cmd('/root/.nvm', f'{home}/.nvm')} && "
+        f"for d in {' '.join(home_dirs)}; do "
+        f"if [ -d /root/$d ]; then mkdir -p {home}/$d && "
+        f"cp -a /root/$d/. {home}/$d/ 2>/dev/null || true; fi; done && "
+        f"chown -R {sandbox_user}:{sandbox_user} {home} && "
         f"chown -R {sandbox_user}:{sandbox_user} {shlex.quote(workspace)}",
         timeout_sec=timeout_sec,
     )

diff --git a/src/benchflow/agents/registry.py b/src/benchflow/agents/registry.py
@@ -374,21 +374,18 @@ class AgentConfig:
 
 
 def get_sandbox_home_dirs() -> set[str]:
-    """Collect all dot-dirs under $HOME that sandbox user setup should copy.
+    """Collect user home config/auth dirs BenchFlow may materialize for the sandbox user.
 
     Derives from three sources across all registered agents:
-    - skill_paths: $HOME/.foo/... → ".foo"
     - credential_files: {home}/.foo/... → ".foo"
+    - subscription_auth.files: {home}/.foo/... → ".foo"
     - home_dirs: explicit extras (e.g. ".openclaw")
 
-    Always includes ".local" (pip scripts, etc.).
+    Skill paths are excluded: deploy_skills() now links those paths directly to a
+    shared skills tree instead of relying on sandbox-home copies.
     """
-    dirs: set[str] = {".local"}
+    dirs: set[str] = set()
     for cfg in AGENTS.values():
-        for sp in cfg.skill_paths:
-            if sp.startswith("$HOME/."):
-                dirname = sp.removeprefix("$HOME/").split("/")[0]
-                dirs.add(dirname)
         for cf in cfg.credential_files:
             # path uses {home}/.foo/... placeholder
             path = cf.path

diff --git a/tests/test_agent_setup.py b/tests/test_agent_setup.py
@@ -1,13 +1,151 @@
+"""Tests for agent install and skill deployment setup helpers."""
+
 from pathlib import Path
 from types import SimpleNamespace
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
-from benchflow._agent_setup import install_agent
+from benchflow._agent_setup import deploy_skills, install_agent
+from benchflow.agents.registry import AgentConfig
 from benchflow.models import AgentInstallError
 
 
+def _make_task(skills_dir: str | None):
+    return SimpleNamespace(
+        config=SimpleNamespace(
+            environment=SimpleNamespace(
+                skills_dir=skills_dir,
+            )
+        )
+    )
+
+
+@pytest.mark.asyncio
+async def test_deploy_skills_symlinks_agent_skill_paths_instead_of_copying(tmp_path):
+    env = MagicMock()
+    env.exec = AsyncMock(return_value=MagicMock(return_code=0, stdout=""))
+    env.upload_dir = AsyncMock()
+    agent_cfg = AgentConfig(
+        name="test-agent",
+        install_cmd="true",
+        launch_cmd="true",
+        skill_paths=["$HOME/.agents/skills", "$WORKSPACE/skills"],
+    )
+
+    await deploy_skills(
+        env=env,
+        task_path=tmp_path,
+        skills_dir=None,
+        agent_cfg=agent_cfg,
+        sandbox_user="agent",
+        agent_cwd="/app",
+        task=_make_task("/opt/benchflow/skills"),
+    )
+
+    env.upload_dir.assert_not_called()
+    env.exec.assert_awaited_once()
+
+    cmd = env.exec.await_args.args[0]
+    assert "cp -r" not in cmd
+    assert " && " in cmd
+    assert ";" not in cmd
+    assert "ln -sfn /opt/benchflow/skills /home/agent/.agents/skills" in cmd
+    assert "ln -sfn /opt/benchflow/skills /app/skills" in cmd
+
+
+@pytest.mark.asyncio
+async def test_deploy_skills_uploads_runtime_skills_and_links_shared_tree(tmp_path):
+    env = MagicMock()
+    env.exec = AsyncMock(return_value=MagicMock(return_code=0, stdout=""))
+    env.upload_dir = AsyncMock()
+    agent_cfg = AgentConfig(
+        name="test-agent",
+        install_cmd="true",
+        launch_cmd="true",
+        skill_paths=["$HOME/.agents/skills", "$WORKSPACE/skills"],
+    )
+    skills_dir = tmp_path / "skills"
+    skills_dir.mkdir()
+
+    await deploy_skills(
+        env=env,
+        task_path=tmp_path,
+        skills_dir=skills_dir,
+        agent_cfg=agent_cfg,
+        sandbox_user="agent",
+        agent_cwd="/workspace",
+        task=_make_task("/opt/benchflow/skills"),
+    )
+
+    env.upload_dir.assert_awaited_once_with(skills_dir, "/skills")
+    env.exec.assert_awaited_once()
+
+    distributed_link_cmd = env.exec.await_args.args[0]
+    assert " && " in distributed_link_cmd
+    assert ";" not in distributed_link_cmd
+    assert "ln -sfn /skills /home/agent/.agents/skills" in distributed_link_cmd
+    assert "ln -sfn /skills /workspace/skills" in distributed_link_cmd
+    assert "/root/.agents/skills" not in distributed_link_cmd
+    assert "/app/skills" not in distributed_link_cmd
+
+
+@pytest.mark.asyncio
+async def test_deploy_skills_falls_back_when_local_skills_dir_is_missing(tmp_path):
+    env = MagicMock()
+    env.exec = AsyncMock(return_value=MagicMock(return_code=0, stdout=""))
+    env.upload_dir = AsyncMock()
+    agent_cfg = AgentConfig(
+        name="test-agent",
+        install_cmd="true",
+        launch_cmd="true",
+        skill_paths=["$HOME/.agents/skills", "$WORKSPACE/skills"],
+    )
+
+    await deploy_skills(
+        env=env,
+        task_path=tmp_path,
+        skills_dir=tmp_path / "missing-skills",
+        agent_cfg=agent_cfg,
+        sandbox_user="agent",
+        agent_cwd="/workspace",
+        task=_make_task("/opt/benchflow/skills"),
+    )
+
+    env.upload_dir.assert_not_called()
+    env.exec.assert_awaited_once()
+
+    distributed_link_cmd = env.exec.await_args.args[0]
+    assert "ln -sfn /opt/benchflow/skills /home/agent/.agents/skills" in distributed_link_cmd
+    assert "ln -sfn /opt/benchflow/skills /workspace/skills" in distributed_link_cmd
+    assert "ln -sfn /skills /home/agent/.agents/skills" not in distributed_link_cmd
+    assert "ln -sfn /skills /workspace/skills" not in distributed_link_cmd
+
+
+@pytest.mark.asyncio
+async def test_deploy_skills_raises_when_skill_linking_fails(tmp_path):
+    env = MagicMock()
+    env.exec = AsyncMock(return_value=MagicMock(return_code=17, stdout="link failed"))
+    env.upload_dir = AsyncMock()
+    agent_cfg = AgentConfig(
+        name="test-agent",
+        install_cmd="true",
+        launch_cmd="true",
+        skill_paths=["$HOME/.agents/skills"],
+    )
+
+    with pytest.raises(RuntimeError, match="Failed to link skills"):
+        await deploy_skills(
+            env=env,
+            task_path=tmp_path,
+            skills_dir=None,
+            agent_cfg=agent_cfg,
+            sandbox_user="agent",
+            agent_cwd="/app",
+            task=_make_task("/opt/benchflow/skills"),
+        )
+
+
 @pytest.mark.asyncio
 async def test_install_agent_writes_command_stdout_and_stderr_on_failure(tmp_path: Path):
     env = SimpleNamespace()

diff --git a/tests/test_registry_invariants.py b/tests/test_registry_invariants.py
@@ -76,6 +76,22 @@ def test_agent_collection_invariants(name, cfg):
         assert d.startswith("."), f"home_dirs entry {d!r} must start with '.'"
 
 
+@pytest.mark.parametrize("name,cfg", AGENTS.items(), ids=list(AGENTS.keys()))
+def test_agent_install_cmd_targets_shared_paths(name, cfg):
+    """Installed binaries must land in shared prefixes, not a root-only home.
+
+    setup_sandbox_user() no longer recursively copies /root/.nvm or
+    /root/.local/bin into the sandbox home. If an install_cmd placed its
+    binary there, the sandbox user would silently lose access to the agent.
+    """
+    forbidden_binary_prefixes = ("/root/.nvm/", "/root/.local/bin/", "$HOME/.nvm/")
+    for prefix in forbidden_binary_prefixes:
+        assert prefix not in cfg.install_cmd, (
+            f"{name!r} install_cmd writes under {prefix!r}; use /usr/local/bin "
+            f"or another shared prefix so the sandbox user inherits the tool"
+        )
+
+
 @pytest.mark.parametrize("name,cfg", AGENTS.items(), ids=list(AGENTS.keys()))
 def test_agent_credential_and_subscription_auth(name, cfg):
     """Optional credential_files and subscription_auth structures."""
-Original file line number
+Diff line change
@@ Expand Up @@
     **Built-in mock services** — if the Dockerfile references a service binary (`claw-gmail`, `claw-slack`, `claw-gcal`, `claw-gdoc`, `claw-gdrive`), BenchFlow starts it automatically. No `[services]` section needed.
+    **Install tooling to shared prefixes, not `/root`** — when a task image ships Node.js, Python tools, or agent binaries that the sandbox user must execute, install them to `/usr/local/bin`, `/usr/local/lib`, or `/opt`, not `/root/.nvm` or `/root/.local/bin`. `setup_sandbox_user()` creates the non-root user, prepares small config/auth dirs, and chowns the workspace — it does not clone `/root` into the sandbox home. Legacy images that already install tools under `/root` still work via a narrow symlink fallback, but shared prefixes are the supported path. Pre-creating the sandbox user in the Dockerfile is an optional speedup, not a requirement.
     ---
     ## instruction.md
@@ Expand Down @@