From 6d81ce655129daf013e66eb283952fa14f2ee092 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Fri, 8 May 2026 15:53:06 +0200 Subject: [PATCH 1/2] Use hyphenated ml-intern Trackio prefixes Co-authored-by: OpenAI Codex --- agent/prompts/system_prompt_v3.yaml | 2 +- agent/tools/jobs_tool.py | 11 ++- agent/tools/sandbox_tool.py | 11 ++- agent/tools/trackio_seed.py | 19 +++++ tests/unit/test_trackio_space_ids.py | 114 +++++++++++++++++++++++++++ 5 files changed, 148 insertions(+), 9 deletions(-) create mode 100644 tests/unit/test_trackio_space_ids.py diff --git a/agent/prompts/system_prompt_v3.yaml b/agent/prompts/system_prompt_v3.yaml index 4543048f..ae668e85 100644 --- a/agent/prompts/system_prompt_v3.yaml +++ b/agent/prompts/system_prompt_v3.yaml @@ -66,7 +66,7 @@ system_prompt: | report_to="trackio" run_name="" # e.g. "sft_qwen3-4b_lr2e-5_bs128" project="" # keeps related runs grouped so you can compare them - trackio_space_id="/mlintern-<8-char-id>" # creates a public dashboard Space + trackio_space_id="/ml-intern-<8-char-id>" # creates a public dashboard Space `project` and `trackio_space_id` can also be set via TRACKIO_PROJECT / TRACKIO_SPACE_ID env vars. Alerts are how iterations decide what to change. Use trackio.alert(title, text, level) at every decision point in training. Levels: diff --git a/agent/tools/jobs_tool.py b/agent/tools/jobs_tool.py index 29d6b301..c3b32719 100644 --- a/agent/tools/jobs_tool.py +++ b/agent/tools/jobs_tool.py @@ -23,7 +23,10 @@ ) from agent.core.hub_artifacts import build_hub_artifact_sitecustomize from agent.core.session import Event -from agent.tools.trackio_seed import ensure_trackio_dashboard +from agent.tools.trackio_seed import ( + ensure_trackio_dashboard, + normalize_trackio_space_id, +) from agent.tools.types import ToolResult from agent.tools.utilities import ( format_job_details, @@ -592,7 +595,7 @@ async def _run_job(self, args: Dict[str, Any]) -> ToolResult: # so trackio.init() picks them up automatically. We also surface them # in tool_state_change so the frontend can embed the dashboard. env_dict = _add_default_env(args.get("env")) - trackio_space_id = args.get("trackio_space_id") + trackio_space_id = normalize_trackio_space_id(args.get("trackio_space_id")) trackio_project = args.get("trackio_project") if trackio_space_id: env_dict["TRACKIO_SPACE_ID"] = trackio_space_id @@ -1116,7 +1119,7 @@ async def _resume_scheduled_job(self, args: Dict[str, Any]) -> ToolResult: "Job storage is EPHEMERAL — all files are deleted when the job ends. Without push_to_hub, trained models are lost permanently.\n" "- Include trackio monitoring and provide the dashboard URL to the user. " "When the script uses report_to='trackio', also pass `trackio_space_id` " - "(e.g. '/mlintern-<8char>') and `trackio_project` as tool args — " + "(e.g. '/ml-intern-<8char>') and `trackio_project` as tool args — " "they are injected as TRACKIO_SPACE_ID/TRACKIO_PROJECT env vars and let the UI embed the live dashboard.\n\n" "BATCH/ABLATION JOBS: Submit ONE job first. Check logs to confirm it starts training successfully. " "Only then submit the remaining jobs. Never submit all at once — if there's a bug, all jobs fail.\n\n" @@ -1204,7 +1207,7 @@ async def _resume_scheduled_job(self, args: Dict[str, Any]) -> ToolResult: "type": "string", "description": ( "Optional. The HF Space hosting the trackio dashboard for this run " - "(e.g. '/mlintern-<8char>', under YOUR HF namespace). " + "(e.g. '/ml-intern-<8char>', under YOUR HF namespace). " "Injected as TRACKIO_SPACE_ID env var and used by the UI to embed " "the live dashboard. Set this whenever the script uses " "report_to='trackio'. The Space is auto-created and seeded with the " diff --git a/agent/tools/sandbox_tool.py b/agent/tools/sandbox_tool.py index fbc6a41f..deb74e08 100644 --- a/agent/tools/sandbox_tool.py +++ b/agent/tools/sandbox_tool.py @@ -24,7 +24,10 @@ from agent.core.hub_artifacts import wrap_shell_command_with_hub_artifact_bootstrap from agent.core.session import Event from agent.tools.sandbox_client import Sandbox -from agent.tools.trackio_seed import ensure_trackio_dashboard +from agent.tools.trackio_seed import ( + ensure_trackio_dashboard, + normalize_trackio_space_id, +) logger = logging.getLogger(__name__) @@ -542,7 +545,7 @@ async def teardown_session_sandbox(session: Any) -> None: "Common picks: t4-small (16GB VRAM, fits ≤1-3B), a10g-small (24GB, ≤7B), a100-large (80GB, ≤30B). " "If the model won't fit, pick larger hardware upfront — OOM on a sandbox wastes time.\n\n" "If you intend to run a training script in this sandbox that uses report_to='trackio', " - "pass `trackio_space_id` (e.g. '/mlintern-<8char>') and `trackio_project` so they " + "pass `trackio_space_id` (e.g. '/ml-intern-<8char>') and `trackio_project` so they " "are set as TRACKIO_SPACE_ID/TRACKIO_PROJECT secrets in the sandbox and the UI can embed the live dashboard.\n\n" "Hardware: " + ", ".join([e.value for e in SpaceHardware]) + ".\n" ), @@ -563,7 +566,7 @@ async def teardown_session_sandbox(session: Any) -> None: "type": "string", "description": ( "Optional. The HF Space hosting the trackio dashboard for runs in this sandbox " - "(e.g. '/mlintern-<8char>', under YOUR HF namespace). Injected as " + "(e.g. '/ml-intern-<8char>', under YOUR HF namespace). Injected as " "TRACKIO_SPACE_ID secret and surfaced to the UI. The Space is auto-created and " "seeded with the trackio dashboard — DO NOT pre-create it via hf_repo_git, " "that produces an empty Space that breaks the embed." @@ -586,7 +589,7 @@ async def sandbox_create_handler( ) -> tuple[str, bool]: """Handle sandbox_create tool calls.""" hardware = args.get("hardware", DEFAULT_CPU_SANDBOX_HARDWARE) - trackio_space_id = args.get("trackio_space_id") or None + trackio_space_id = normalize_trackio_space_id(args.get("trackio_space_id") or None) trackio_project = args.get("trackio_project") or None async def _emit_trackio_state(sb: Sandbox) -> None: diff --git a/agent/tools/trackio_seed.py b/agent/tools/trackio_seed.py index 1062e1b5..cb14ff1e 100644 --- a/agent/tools/trackio_seed.py +++ b/agent/tools/trackio_seed.py @@ -67,6 +67,25 @@ "app.py": _APP_PY, } +_LEGACY_TRACKIO_REPO_PREFIX = "mlintern-" +_TRACKIO_REPO_PREFIX = "ml-intern-" + + +def normalize_trackio_space_id(space_id: str | None) -> str | None: + """Rewrite the old dashboard repo prefix while preserving custom names.""" + if not space_id: + return space_id + + parts = space_id.rsplit("/", 1) + repo_name = parts[-1] + if not repo_name.startswith(_LEGACY_TRACKIO_REPO_PREFIX): + return space_id + + repo_name = _TRACKIO_REPO_PREFIX + repo_name[len(_LEGACY_TRACKIO_REPO_PREFIX) :] + if len(parts) == 1: + return repo_name + return f"{parts[0]}/{repo_name}" + def _already_seeded(api: HfApi, space_id: str) -> bool: """Cheap check: does the Space already have a trackio dashboard app.py? diff --git a/tests/unit/test_trackio_space_ids.py b/tests/unit/test_trackio_space_ids.py new file mode 100644 index 00000000..70f5e333 --- /dev/null +++ b/tests/unit/test_trackio_space_ids.py @@ -0,0 +1,114 @@ +import asyncio +import json +from pathlib import Path +from types import SimpleNamespace + +from agent.tools import sandbox_tool +from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool +from agent.tools.sandbox_tool import SANDBOX_CREATE_TOOL_SPEC, sandbox_create_handler +from agent.tools.trackio_seed import normalize_trackio_space_id + + +def _legacy_space_id(suffix: str = "abcd1234") -> str: + return "alice/" + "ml" + "intern" + f"-{suffix}" + + +def test_trackio_space_examples_use_hyphenated_ml_intern_prefix(): + prompt = Path("agent/prompts/system_prompt_v3.yaml").read_text() + tool_specs = json.dumps([HF_JOBS_TOOL_SPEC, SANDBOX_CREATE_TOOL_SPEC]) + legacy_prefix = "ml" + "intern" + + assert "/ml-intern-<8-char-id>" in prompt + assert "/ml-intern-<8char>" in tool_specs + assert legacy_prefix not in prompt + assert legacy_prefix not in tool_specs + + +def test_normalize_trackio_space_id_rewrites_legacy_prefix(): + assert normalize_trackio_space_id(_legacy_space_id()) == "alice/ml-intern-abcd1234" + assert ( + normalize_trackio_space_id("alice/custom-dashboard") == "alice/custom-dashboard" + ) + assert normalize_trackio_space_id(None) is None + + +def test_sandbox_create_normalizes_trackio_space_id(monkeypatch): + captured: dict[str, object] = {} + + async def fake_seed_trackio_dashboard(session, space_id): + captured["seeded_space_id"] = space_id + + async def fake_ensure_sandbox( + session, + hardware="cpu-basic", + extra_secrets=None, + **create_kwargs, + ): + captured["extra_secrets"] = extra_secrets + return ( + SimpleNamespace( + space_id="alice/sandbox-12345678", + url="https://huggingface.co/spaces/alice/sandbox-12345678", + ), + None, + ) + + monkeypatch.setattr( + sandbox_tool, "_seed_trackio_dashboard_safe", fake_seed_trackio_dashboard + ) + monkeypatch.setattr(sandbox_tool, "_ensure_sandbox", fake_ensure_sandbox) + + out, ok = asyncio.run( + sandbox_create_handler( + {"trackio_space_id": _legacy_space_id()}, + session=SimpleNamespace(sandbox=None), + ) + ) + + assert ok is True + assert "Visibility: private" in out + assert captured["seeded_space_id"] == "alice/ml-intern-abcd1234" + assert captured["extra_secrets"] == {"TRACKIO_SPACE_ID": "alice/ml-intern-abcd1234"} + + +def test_hf_jobs_normalizes_trackio_space_id(monkeypatch): + class FakeApi: + def __init__(self): + self.run_kwargs = None + + def run_job(self, **kwargs): + self.run_kwargs = kwargs + return SimpleNamespace( + id="job-123", + url="https://huggingface.co/jobs/job-123", + ) + + api = FakeApi() + tool = HfJobsTool(hf_token="hf-token", namespace="alice") + tool.api = api + seeded_space_ids: list[str] = [] + + async def fake_seed_trackio_dashboard(space_id): + seeded_space_ids.append(space_id) + + async def fake_wait_for_job_completion(job_id, namespace=None): + return "COMPLETED", ["done"] + + monkeypatch.setattr(tool, "_seed_trackio_dashboard", fake_seed_trackio_dashboard) + monkeypatch.setattr(tool, "_wait_for_job_completion", fake_wait_for_job_completion) + + result = asyncio.run( + tool.execute( + { + "operation": "run", + "command": ["python", "-c", "print('ok')"], + "trackio_space_id": _legacy_space_id(), + "trackio_project": "demo", + } + ) + ) + + assert result["totalResults"] == 1 + assert seeded_space_ids == ["alice/ml-intern-abcd1234"] + assert api.run_kwargs["env"]["TRACKIO_SPACE_ID"] == "alice/ml-intern-abcd1234" + assert api.run_kwargs["env"]["TRACKIO_PROJECT"] == "demo" From e4a2f56835d7d070c0e138d85f660f60de1f737c Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Fri, 8 May 2026 20:03:49 +0200 Subject: [PATCH 2/2] Drop legacy Trackio prefix handling Co-authored-by: OpenAI Codex --- agent/tools/jobs_tool.py | 7 +- agent/tools/sandbox_tool.py | 7 +- agent/tools/trackio_seed.py | 19 ----- tests/unit/test_trackio_space_ids.py | 102 +-------------------------- 4 files changed, 6 insertions(+), 129 deletions(-) diff --git a/agent/tools/jobs_tool.py b/agent/tools/jobs_tool.py index c3b32719..c0b793f5 100644 --- a/agent/tools/jobs_tool.py +++ b/agent/tools/jobs_tool.py @@ -23,10 +23,7 @@ ) from agent.core.hub_artifacts import build_hub_artifact_sitecustomize from agent.core.session import Event -from agent.tools.trackio_seed import ( - ensure_trackio_dashboard, - normalize_trackio_space_id, -) +from agent.tools.trackio_seed import ensure_trackio_dashboard from agent.tools.types import ToolResult from agent.tools.utilities import ( format_job_details, @@ -595,7 +592,7 @@ async def _run_job(self, args: Dict[str, Any]) -> ToolResult: # so trackio.init() picks them up automatically. We also surface them # in tool_state_change so the frontend can embed the dashboard. env_dict = _add_default_env(args.get("env")) - trackio_space_id = normalize_trackio_space_id(args.get("trackio_space_id")) + trackio_space_id = args.get("trackio_space_id") trackio_project = args.get("trackio_project") if trackio_space_id: env_dict["TRACKIO_SPACE_ID"] = trackio_space_id diff --git a/agent/tools/sandbox_tool.py b/agent/tools/sandbox_tool.py index deb74e08..0631194f 100644 --- a/agent/tools/sandbox_tool.py +++ b/agent/tools/sandbox_tool.py @@ -24,10 +24,7 @@ from agent.core.hub_artifacts import wrap_shell_command_with_hub_artifact_bootstrap from agent.core.session import Event from agent.tools.sandbox_client import Sandbox -from agent.tools.trackio_seed import ( - ensure_trackio_dashboard, - normalize_trackio_space_id, -) +from agent.tools.trackio_seed import ensure_trackio_dashboard logger = logging.getLogger(__name__) @@ -589,7 +586,7 @@ async def sandbox_create_handler( ) -> tuple[str, bool]: """Handle sandbox_create tool calls.""" hardware = args.get("hardware", DEFAULT_CPU_SANDBOX_HARDWARE) - trackio_space_id = normalize_trackio_space_id(args.get("trackio_space_id") or None) + trackio_space_id = args.get("trackio_space_id") or None trackio_project = args.get("trackio_project") or None async def _emit_trackio_state(sb: Sandbox) -> None: diff --git a/agent/tools/trackio_seed.py b/agent/tools/trackio_seed.py index cb14ff1e..1062e1b5 100644 --- a/agent/tools/trackio_seed.py +++ b/agent/tools/trackio_seed.py @@ -67,25 +67,6 @@ "app.py": _APP_PY, } -_LEGACY_TRACKIO_REPO_PREFIX = "mlintern-" -_TRACKIO_REPO_PREFIX = "ml-intern-" - - -def normalize_trackio_space_id(space_id: str | None) -> str | None: - """Rewrite the old dashboard repo prefix while preserving custom names.""" - if not space_id: - return space_id - - parts = space_id.rsplit("/", 1) - repo_name = parts[-1] - if not repo_name.startswith(_LEGACY_TRACKIO_REPO_PREFIX): - return space_id - - repo_name = _TRACKIO_REPO_PREFIX + repo_name[len(_LEGACY_TRACKIO_REPO_PREFIX) :] - if len(parts) == 1: - return repo_name - return f"{parts[0]}/{repo_name}" - def _already_seeded(api: HfApi, space_id: str) -> bool: """Cheap check: does the Space already have a trackio dashboard app.py? diff --git a/tests/unit/test_trackio_space_ids.py b/tests/unit/test_trackio_space_ids.py index 70f5e333..c73a2a05 100644 --- a/tests/unit/test_trackio_space_ids.py +++ b/tests/unit/test_trackio_space_ids.py @@ -1,16 +1,8 @@ -import asyncio import json from pathlib import Path -from types import SimpleNamespace -from agent.tools import sandbox_tool -from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool -from agent.tools.sandbox_tool import SANDBOX_CREATE_TOOL_SPEC, sandbox_create_handler -from agent.tools.trackio_seed import normalize_trackio_space_id - - -def _legacy_space_id(suffix: str = "abcd1234") -> str: - return "alice/" + "ml" + "intern" + f"-{suffix}" +from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC +from agent.tools.sandbox_tool import SANDBOX_CREATE_TOOL_SPEC def test_trackio_space_examples_use_hyphenated_ml_intern_prefix(): @@ -22,93 +14,3 @@ def test_trackio_space_examples_use_hyphenated_ml_intern_prefix(): assert "/ml-intern-<8char>" in tool_specs assert legacy_prefix not in prompt assert legacy_prefix not in tool_specs - - -def test_normalize_trackio_space_id_rewrites_legacy_prefix(): - assert normalize_trackio_space_id(_legacy_space_id()) == "alice/ml-intern-abcd1234" - assert ( - normalize_trackio_space_id("alice/custom-dashboard") == "alice/custom-dashboard" - ) - assert normalize_trackio_space_id(None) is None - - -def test_sandbox_create_normalizes_trackio_space_id(monkeypatch): - captured: dict[str, object] = {} - - async def fake_seed_trackio_dashboard(session, space_id): - captured["seeded_space_id"] = space_id - - async def fake_ensure_sandbox( - session, - hardware="cpu-basic", - extra_secrets=None, - **create_kwargs, - ): - captured["extra_secrets"] = extra_secrets - return ( - SimpleNamespace( - space_id="alice/sandbox-12345678", - url="https://huggingface.co/spaces/alice/sandbox-12345678", - ), - None, - ) - - monkeypatch.setattr( - sandbox_tool, "_seed_trackio_dashboard_safe", fake_seed_trackio_dashboard - ) - monkeypatch.setattr(sandbox_tool, "_ensure_sandbox", fake_ensure_sandbox) - - out, ok = asyncio.run( - sandbox_create_handler( - {"trackio_space_id": _legacy_space_id()}, - session=SimpleNamespace(sandbox=None), - ) - ) - - assert ok is True - assert "Visibility: private" in out - assert captured["seeded_space_id"] == "alice/ml-intern-abcd1234" - assert captured["extra_secrets"] == {"TRACKIO_SPACE_ID": "alice/ml-intern-abcd1234"} - - -def test_hf_jobs_normalizes_trackio_space_id(monkeypatch): - class FakeApi: - def __init__(self): - self.run_kwargs = None - - def run_job(self, **kwargs): - self.run_kwargs = kwargs - return SimpleNamespace( - id="job-123", - url="https://huggingface.co/jobs/job-123", - ) - - api = FakeApi() - tool = HfJobsTool(hf_token="hf-token", namespace="alice") - tool.api = api - seeded_space_ids: list[str] = [] - - async def fake_seed_trackio_dashboard(space_id): - seeded_space_ids.append(space_id) - - async def fake_wait_for_job_completion(job_id, namespace=None): - return "COMPLETED", ["done"] - - monkeypatch.setattr(tool, "_seed_trackio_dashboard", fake_seed_trackio_dashboard) - monkeypatch.setattr(tool, "_wait_for_job_completion", fake_wait_for_job_completion) - - result = asyncio.run( - tool.execute( - { - "operation": "run", - "command": ["python", "-c", "print('ok')"], - "trackio_space_id": _legacy_space_id(), - "trackio_project": "demo", - } - ) - ) - - assert result["totalResults"] == 1 - assert seeded_space_ids == ["alice/ml-intern-abcd1234"] - assert api.run_kwargs["env"]["TRACKIO_SPACE_ID"] == "alice/ml-intern-abcd1234" - assert api.run_kwargs["env"]["TRACKIO_PROJECT"] == "demo"