diff --git a/src/benchflow/_daytona_patches.py b/src/benchflow/_daytona_patches.py index 2541bf0..56abe78 100644 --- a/src/benchflow/_daytona_patches.py +++ b/src/benchflow/_daytona_patches.py @@ -54,9 +54,7 @@ def apply() -> None: def _is_malformed_logs_error(exc: BaseException) -> bool: if isinstance(exc, ValidationError): return True - if isinstance(exc, DaytonaError) and _MALFORMED_MARKER in str(exc): - return True - return False + return isinstance(exc, DaytonaError) and _MALFORMED_MARKER in str(exc) async def _patched_get_session_command_logs( self: Any, session_id: str, command_id: str diff --git a/src/benchflow/process.py b/src/benchflow/process.py index f387c52..b9d2c21 100644 --- a/src/benchflow/process.py +++ b/src/benchflow/process.py @@ -12,6 +12,7 @@ import logging import os import shlex +import uuid from abc import ABC, abstractmethod from typing import Any @@ -320,9 +321,15 @@ async def start( inner_parts.extend(["-w", cwd]) # Write env vars to a temp file on the remote VM instead of passing # as -e K=V args (which are visible in ps aux on the remote host). + # Use a Python-generated unique suffix instead of a shell `$$` + # expansion: shlex.join() (below) single-quotes the --env-file arg, + # so `$$` would survive as a literal in the docker compose call + # while the cat > ... heredoc would expand it — the file would be + # written to one path and read from another. uuid.uuid4 sidesteps + # the entire shell-expansion-vs-quoting problem. remote_env_path = None if env: - remote_env_path = "/tmp/benchflow_env_$$.env" + remote_env_path = f"/tmp/benchflow_env_{uuid.uuid4().hex[:16]}.env" env_lines = "\n".join(f"{k}={v}" for k, v in env.items()) inner_parts.extend(["--env-file", remote_env_path]) inner_parts.extend(["main", "bash", "-c", command]) @@ -349,7 +356,9 @@ async def start( env_prefix = "" remote_env_path = None if env: - remote_env_path = "/tmp/benchflow_env_$$.env" + # Python-generated unique suffix; see DinD branch above for why + # $$ shell expansion is fragile across quoting boundaries. + remote_env_path = f"/tmp/benchflow_env_{uuid.uuid4().hex[:16]}.env" env_lines = "\n".join( f"export {k}={shlex.quote(v)}" for k, v in env.items() ) diff --git a/tests/test_process.py b/tests/test_process.py index c604a58..68ea0a0 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -202,3 +202,99 @@ async def capture_communicate(data=None): assert ( f"export SINGLE_QUOTE={shlex.quote('it' + chr(39) + 's a test')}" in content ) + + +class TestDaytonaProcessEnvFilePath: + """Regression: env-file path must be unique without relying on shell `$$` expansion. + + Guards the fix from PR #198 against the regression introduced by PR #193 + (DinD compose ACP via Daytona PTY WebSocket, commit cdccac7). + + The DinD branch builds an inner `docker compose exec --env-file PATH ...` + command and runs it through `shlex.join()`, which single-quotes any `$$` + (preventing remote shell expansion). The `cat > PATH` heredoc that writes + the file uses raw f-string interpolation where `$$` IS expanded. If the + path contains `$$`, the file is written to one path and read from another + — env vars silently disappear. + + The direct (non-DinD) branch uses raw f-string in both write and read, so + `$$` would expand consistently — but uuid is robust against future quoting + changes. Pin both branches. + """ + + @pytest.mark.asyncio + async def test_dind_env_file_path_does_not_use_shell_pid_expansion(self): + """DinD path must not use $$ — shlex.join would quote it literally.""" + from unittest.mock import MagicMock + + from benchflow.process import DaytonaProcess + + sandbox = MagicMock() + sandbox.create_ssh_access = AsyncMock( + return_value=MagicMock(token="abc") + ) + proc = DaytonaProcess( + sandbox=sandbox, + is_dind=True, + compose_cmd_prefix="", + compose_cmd_base="docker compose -p test", + ) + + captured = [] + + async def fake_exec(*args, **kwargs): + captured.append(list(args)) + mock_proc = AsyncMock() + mock_proc.pid = 12345 + mock_proc.returncode = 0 + mock_proc.stdin = AsyncMock() + mock_proc.stdout = AsyncMock() + mock_proc.stderr = AsyncMock() + mock_proc.communicate = AsyncMock(return_value=(b"", b"")) + return mock_proc + + with patch("asyncio.create_subprocess_exec", side_effect=fake_exec): + await proc.start(command="echo hi", env={"FOO": "bar"}) + + # Last arg of ssh is the remote command. Search it for $$ + remote_cmd = captured[0][-1] + assert "$$" not in remote_cmd, ( + "$$ in remote command — shlex.join() will quote it, mismatching " + f"the cat heredoc that does expand it. Got: {remote_cmd[:200]!r}" + ) + # And: a real path was used (literal hex suffix, no shell variable) + assert "/tmp/benchflow_env_" in remote_cmd + assert "--env-file" in remote_cmd + + @pytest.mark.asyncio + async def test_direct_sandbox_env_file_path_does_not_use_shell_pid_expansion(self): + """Direct (non-DinD) path is currently safe with $$, but pin the uuid form for robustness.""" + from unittest.mock import MagicMock + + from benchflow.process import DaytonaProcess + + sandbox = MagicMock() + sandbox.create_ssh_access = AsyncMock( + return_value=MagicMock(token="abc") + ) + proc = DaytonaProcess(sandbox=sandbox, is_dind=False) + + captured = [] + + async def fake_exec(*args, **kwargs): + captured.append(list(args)) + mock_proc = AsyncMock() + mock_proc.pid = 12345 + mock_proc.returncode = 0 + mock_proc.stdin = AsyncMock() + mock_proc.stdout = AsyncMock() + mock_proc.stderr = AsyncMock() + mock_proc.communicate = AsyncMock(return_value=(b"", b"")) + return mock_proc + + with patch("asyncio.create_subprocess_exec", side_effect=fake_exec): + await proc.start(command="echo hi", env={"FOO": "bar"}) + + remote_cmd = captured[0][-1] + assert "$$" not in remote_cmd + assert "/tmp/benchflow_env_" in remote_cmd