From 5fd5495cd9fdd06bb94cb25edb4fd97517c6ad7a Mon Sep 17 00:00:00 2001 From: Xiangyi Li Date: Sat, 25 Apr 2026 03:12:04 -0700 Subject: [PATCH 1/2] fix: env-file path mismatch in DinD compose mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Devin caught a real bug introduced by PR #193 (DinD compose ACP): src/benchflow/process.py:325 sets remote_env_path = "/tmp/benchflow_env_$$.env" expecting the remote shell to expand $$ to its PID. But shlex.join() at line 329 single-quotes the --env-file argument, so docker compose receives the literal string "/tmp/benchflow_env_$$.env" while the cat heredoc that writes the file (line 339, raw f-string) does expand $$. The file is written to /tmp/benchflow_env_.env and read from /tmp/benchflow_env_$$.env — silent mismatch, env vars (incl. API keys) silently dropped in DinD compose tasks. Fix: use uuid.uuid4().hex[:16] for the unique suffix instead of relying on shell-side $$ expansion. The path is then a literal that survives quoting. Apply the same fix to the direct (non-DinD) Daytona branch even though it was working — uniformity makes the path robust against future quoting changes. Also fix a pre-existing SIM103 lint error in _daytona_patches.py that ruff caught while validating the test changes. Tests: tests/test_process.py +2 regression tests pinning that no remote command contains a literal "$$" — would catch this exact regression. 8/8 process tests pass; ruff clean. --- src/benchflow/_daytona_patches.py | 4 +- src/benchflow/process.py | 13 ++++- tests/test_process.py | 93 +++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/src/benchflow/_daytona_patches.py b/src/benchflow/_daytona_patches.py index 2541bf06..56abe783 100644 --- a/src/benchflow/_daytona_patches.py +++ b/src/benchflow/_daytona_patches.py @@ -54,9 +54,7 @@ def apply() -> None: def _is_malformed_logs_error(exc: BaseException) -> bool: if isinstance(exc, ValidationError): return True - if isinstance(exc, DaytonaError) and _MALFORMED_MARKER in str(exc): - return True - return False + return isinstance(exc, DaytonaError) and _MALFORMED_MARKER in str(exc) async def _patched_get_session_command_logs( self: Any, session_id: str, command_id: str diff --git a/src/benchflow/process.py b/src/benchflow/process.py index f387c523..b9d2c212 100644 --- a/src/benchflow/process.py +++ b/src/benchflow/process.py @@ -12,6 +12,7 @@ import logging import os import shlex +import uuid from abc import ABC, abstractmethod from typing import Any @@ -320,9 +321,15 @@ async def start( inner_parts.extend(["-w", cwd]) # Write env vars to a temp file on the remote VM instead of passing # as -e K=V args (which are visible in ps aux on the remote host). + # Use a Python-generated unique suffix instead of a shell `$$` + # expansion: shlex.join() (below) single-quotes the --env-file arg, + # so `$$` would survive as a literal in the docker compose call + # while the cat > ... heredoc would expand it — the file would be + # written to one path and read from another. uuid.uuid4 sidesteps + # the entire shell-expansion-vs-quoting problem. remote_env_path = None if env: - remote_env_path = "/tmp/benchflow_env_$$.env" + remote_env_path = f"/tmp/benchflow_env_{uuid.uuid4().hex[:16]}.env" env_lines = "\n".join(f"{k}={v}" for k, v in env.items()) inner_parts.extend(["--env-file", remote_env_path]) inner_parts.extend(["main", "bash", "-c", command]) @@ -349,7 +356,9 @@ async def start( env_prefix = "" remote_env_path = None if env: - remote_env_path = "/tmp/benchflow_env_$$.env" + # Python-generated unique suffix; see DinD branch above for why + # $$ shell expansion is fragile across quoting boundaries. + remote_env_path = f"/tmp/benchflow_env_{uuid.uuid4().hex[:16]}.env" env_lines = "\n".join( f"export {k}={shlex.quote(v)}" for k, v in env.items() ) diff --git a/tests/test_process.py b/tests/test_process.py index c604a580..a170f7c2 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -202,3 +202,96 @@ async def capture_communicate(data=None): assert ( f"export SINGLE_QUOTE={shlex.quote('it' + chr(39) + 's a test')}" in content ) + + +class TestDaytonaProcessEnvFilePath: + """Regression: env-file path must be unique without relying on shell `$$` expansion. + + The DinD branch builds an inner `docker compose exec --env-file PATH ...` + command and runs it through `shlex.join()`, which single-quotes any `$$` + (preventing remote shell expansion). The `cat > PATH` heredoc that writes + the file uses raw f-string interpolation where `$$` IS expanded. If the + path contains `$$`, the file is written to one path and read from another + — env vars silently disappear. + + The direct (non-DinD) branch uses raw f-string in both write and read, so + `$$` would expand consistently — but uuid is robust against future quoting + changes. Pin both branches. + """ + + @pytest.mark.asyncio + async def test_dind_env_file_path_does_not_use_shell_pid_expansion(self): + """DinD path must not use $$ — shlex.join would quote it literally.""" + from unittest.mock import MagicMock + + from benchflow.process import DaytonaProcess + + sandbox = MagicMock() + sandbox.create_ssh_access = AsyncMock( + return_value=MagicMock(token="abc") + ) + proc = DaytonaProcess( + sandbox=sandbox, + is_dind=True, + compose_cmd_prefix="", + compose_cmd_base="docker compose -p test", + ) + + captured = [] + + async def fake_exec(*args, **kwargs): + captured.append(list(args)) + mock_proc = AsyncMock() + mock_proc.pid = 12345 + mock_proc.returncode = 0 + mock_proc.stdin = AsyncMock() + mock_proc.stdout = AsyncMock() + mock_proc.stderr = AsyncMock() + mock_proc.communicate = AsyncMock(return_value=(b"", b"")) + return mock_proc + + with patch("asyncio.create_subprocess_exec", side_effect=fake_exec): + await proc.start(command="echo hi", env={"FOO": "bar"}) + + # Last arg of ssh is the remote command. Search it for $$ + remote_cmd = captured[0][-1] + assert "$$" not in remote_cmd, ( + "$$ in remote command — shlex.join() will quote it, mismatching " + f"the cat heredoc that does expand it. Got: {remote_cmd[:200]!r}" + ) + # And: a real path was used (literal hex suffix, no shell variable) + assert "/tmp/benchflow_env_" in remote_cmd + assert "--env-file" in remote_cmd + + @pytest.mark.asyncio + async def test_direct_sandbox_env_file_path_does_not_use_shell_pid_expansion(self): + """Direct (non-DinD) path is currently safe with $$, but pin the uuid form for robustness.""" + from unittest.mock import MagicMock + + from benchflow.process import DaytonaProcess + + sandbox = MagicMock() + sandbox.create_ssh_access = AsyncMock( + return_value=MagicMock(token="abc") + ) + proc = DaytonaProcess(sandbox=sandbox, is_dind=False) + + captured = [] + + async def fake_exec(*args, **kwargs): + captured.append(list(args)) + mock_proc = AsyncMock() + mock_proc.pid = 12345 + mock_proc.returncode = 0 + mock_proc.stdin = AsyncMock() + mock_proc.stdout = AsyncMock() + mock_proc.stderr = AsyncMock() + mock_proc.communicate = AsyncMock(return_value=(b"", b"")) + return mock_proc + + with patch("asyncio.create_subprocess_exec", side_effect=fake_exec): + await proc.start(command="echo hi", env={"FOO": "bar"}) + + remote_cmd = captured[0][-1] + assert "$$" not in remote_cmd + assert "/tmp/benchflow_env_" in remote_cmd From eeb30d7ede695df6c3286c7d92f96f3b9e9a68a8 Mon Sep 17 00:00:00 2001 From: Xiangyi Li Date: Sat, 25 Apr 2026 03:32:00 -0700 Subject: [PATCH 2/2] test: reference PR #193 / #198 in regression test docstring Devin caught: CLAUDE.md mandates regression tests name the commit/PR they guard. Updated TestDaytonaProcessEnvFilePath docstring to cite PR #198 (the fix) and PR #193 / commit cdccac7 (the regression). --- tests/test_process.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_process.py b/tests/test_process.py index a170f7c2..68ea0a0c 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -207,6 +207,9 @@ async def capture_communicate(data=None): class TestDaytonaProcessEnvFilePath: """Regression: env-file path must be unique without relying on shell `$$` expansion. + Guards the fix from PR #198 against the regression introduced by PR #193 + (DinD compose ACP via Daytona PTY WebSocket, commit cdccac7). + The DinD branch builds an inner `docker compose exec --env-file PATH ...` command and runs it through `shlex.join()`, which single-quotes any `$$` (preventing remote shell expansion). The `cat > PATH` heredoc that writes