From c559c9278dadf5cd4272f2a2eeb2937eead89a04 Mon Sep 17 00:00:00 2001 From: fverac Date: Mon, 29 Sep 2025 12:53:37 -0700 Subject: [PATCH] allow rewrite to create new files Signed-off-by: fverac --- debug_gym/gym/tools/rewrite.py | 35 ++++++++++++++++++++++++++------- debug_gym/gym/workspace.py | 24 +++++++++++++--------- tests/gym/tools/test_rewrite.py | 25 +++++++++++++++++++++++ 3 files changed, 68 insertions(+), 16 deletions(-) diff --git a/debug_gym/gym/tools/rewrite.py b/debug_gym/gym/tools/rewrite.py index 3df34aaa..d8bf1ae0 100644 --- a/debug_gym/gym/tools/rewrite.py +++ b/debug_gym/gym/tools/rewrite.py @@ -13,9 +13,10 @@ class RewriteTool(EnvironmentTool): """rewrite(path="code/utils.py", start=10, end=None, new_code=" print('bonjour')") will rewite line number 10 of the specified file 'code/utils.py' to be print('bonjour'), with the indents ahead (in this case, 4 spaces).""", """rewrite(path="code/utils.py", start=10, end=20, new_code=" print('hello')\\n print('hi again')") will replace the chunk of code between line number 10 and 20 in the specified file 'code/utils.py' by the two lines provided, both with indents ahead (in this case, 4 spaces).""", """rewrite(path='code/utils.py', start=4, end=6, new_code=" print('buongiorno')") will replace the chunk of code between line number 4 and 6 in the specified file 'code/utils.py' by the single line provided, with the indent ahead (in this case, 8 spaces).""", + """rewrite(path='code/utils.py', is_new_file=True, new_code="print('greetings')") will generate a new file at the specified path 'code/utils.py' with the content print('greetings').""", ] description = ( - "Rewrite the content of the specified file path, between lines [start, end], with the new code. Line numbers are 1-based. When start is provided and end is None, it's assumed to rewrite a single line (start). When both start and end are None, it's assumed to rewrite the whole file, this is not recommended because most of the time the expected edit is local. The new code should be valid python code include proper indentation (can be determined from context)." + "Rewrite the content of the specified file path, between lines [start, end], with the new code. Line numbers are 1-based. When start is provided and end is None, it's assumed to rewrite a single line (start). When both start and end are None, it's assumed to rewrite the whole file, this is not recommended because most of the time the expected edit is local. When is_new_file is True, a new file will be created at the specified path with the new code. The new code should be valid python code include proper indentation (can be determined from context)." + "\nExamples (for demonstration purposes only, you need to adjust the tool calling format according to your specific syntax):" + "\n".join(examples) ) @@ -32,6 +33,10 @@ class RewriteTool(EnvironmentTool): "type": ["number", "null"], "description": "The ending line number to be rewritten. If None, end is the same as start.", }, + "is_new_file": { + "type": ["boolean", "null"], + "description": "Whether the file to be modified is a new file. Default is False.", + }, "new_code": { "type": ["string"], "description": "The new code to be inserted. The new code should be valid python code include proper indentation (can be determined from context).", @@ -41,12 +46,17 @@ class RewriteTool(EnvironmentTool): def _overwrite_file(self, environment, filepath: str, content: str): environment.workspace.write_file(filepath, content) - def _rewrite_file(self, environment, file_path, start, end, new_code): - original_content = environment.workspace.read_file(file_path) + def _rewrite_file( + self, environment, file_path, start, end, new_code, is_new_file=False + ): + raise_on_nonexistent_file = not is_new_file + original_content = environment.workspace.read_file( + file_path, raises=raise_on_nonexistent_file + ) new_code_lines = new_code.split("\n") new_code_length = len(new_code_lines) - if start is None: + if start is None or is_new_file: # no line number is provided, rewrite the whole code self._overwrite_file(environment, filepath=file_path, content=new_code) else: @@ -91,13 +101,24 @@ def use( path: str = None, start: int = None, end: int = None, + is_new_file: bool = False, new_code: str = "", ) -> Observation: self.rewrite_success = False if path is None: return self.fail(environment, "File path is None.") - if not environment.workspace.is_editable(path): - return self.fail(environment, f"`{path}` is not editable.") + + # If creating a new file, just ensure the target directory is inside workspace and not ignored + if is_new_file: + # Resolve without requiring existence + try: + environment.workspace.resolve_path(path, raises="ignore") + except Exception as e: + return self.fail(environment, f"Invalid path `{path}`: {e}") + else: + if not environment.workspace.is_editable(path): + return self.fail(environment, f"`{path}` is not editable.") + if start is not None: end = end or start # only start is provided (rewrite that line) if start > end: @@ -112,7 +133,7 @@ def use( start, end = start - 1, end - 1 # 1-based to 0-based try: diff, new_code_length = self._rewrite_file( - environment, path, start, end, new_code + environment, path, start, end, new_code, is_new_file=is_new_file ) except Exception as e: return self.fail(environment, str(e)) diff --git a/debug_gym/gym/workspace.py b/debug_gym/gym/workspace.py index 9b4db0e4..0c509d53 100644 --- a/debug_gym/gym/workspace.py +++ b/debug_gym/gym/workspace.py @@ -92,11 +92,12 @@ def copy_content(self, src: str | Path, target: str | Path | None = None): target = Path(target or self.working_dir).resolve() self.terminal.copy_content(src, target) - def resolve_path(self, filepath: str | Path, raises=False) -> Path: + def resolve_path(self, filepath: str | Path, raises: str | bool = False) -> Path: """Convert a relative filepath to absolute based on the working_dir. If the path is already absolute, it is returned as is. If raises is True, raises FileNotFoundError if the file does not exist, - is not in the working directory or is ignored by the ignore patterns. + or is not in the working directory or is ignored by the ignore patterns. + If raises is "ignore", then raises FileNotFoundError only if the file is ignored. If raises is False, returns the absolute path regardless of the file existence. """ abs_filepath = Path(filepath) @@ -104,7 +105,7 @@ def resolve_path(self, filepath: str | Path, raises=False) -> Path: abs_filepath = Path(self.working_dir) / abs_filepath abs_filepath_str = str(abs_filepath) - if raises and abs_filepath != self.working_dir: + if raises in [True, "ignore"] and abs_filepath != self.working_dir: # Check if file exists, is within working_dir and is not ignored. check_cmd = ( f'abs_path=$(realpath "{abs_filepath_str}"); ' @@ -113,7 +114,9 @@ def resolve_path(self, filepath: str | Path, raises=False) -> Path: success, result = self.terminal.run( f"{check_cmd} && echo OK || echo MISSING" ) - if result.strip() != "OK" or self._is_ignored_func(abs_filepath): + if (result.strip() != "OK" and raises == True) or self._is_ignored_func( + abs_filepath + ): raise FileNotFoundError( f"`{filepath}` does not exist or is not in " f"the working directory `{self.working_dir}`." @@ -121,18 +124,21 @@ def resolve_path(self, filepath: str | Path, raises=False) -> Path: return Path(abs_filepath_str) - def read_file(self, filepath: str) -> str: + def read_file(self, filepath: str, raises: bool = True) -> str: """Reads a file from the working directory. - Raises value error if the file does not exist""" - abs_filepath = self.resolve_path(filepath, raises=True) + By default, raises value error if the file does not exist""" + abs_filepath = self.resolve_path(filepath, raises=raises) success, output = self.terminal.run( - f"cat {abs_filepath}", raises=True, strip_output=False + f"cat {abs_filepath}", raises=raises, strip_output=False ) return output def write_file(self, filepath: str, content: str): """Writes `content` to `filepath` exactly as-is, preserving any trailing newlines.""" - abs_filepath = self.resolve_path(filepath) + abs_filepath = self.resolve_path(filepath, raises="ignore") + + # create parent directories via the terminal if needed + self.terminal.run(f'mkdir -p "{str(abs_filepath.parent)}"', raises=True) # In the following command we: # - use a single-quoted heredoc (cat <<'nDEBUGGYM_EOF' ... nDEBUGGYM_EOF) so the heredoc body is taken literally (no shell expansion) diff --git a/tests/gym/tools/test_rewrite.py b/tests/gym/tools/test_rewrite.py index 698bd6d3..a1f3e662 100644 --- a/tests/gym/tools/test_rewrite.py +++ b/tests/gym/tools/test_rewrite.py @@ -244,3 +244,28 @@ def test_rewrite_with_newlines(env): " print(f'Hello #2!')\n" " print('Goodbye, world!')\n" ) + + +def test_rewrite_new_file(env): + """Ensure the rewrite tool can create a brand new file when it does not already exist.""" + rewrite_tool = env.get_tool("rewrite") + filename = "new_dir/nested/new_module.py" + assert not (env.working_dir / filename).exists() + + patch = { + "path": filename, + "start": None, # full file write + "end": None, + "is_new_file": True, + "new_code": "def added():\n return 'created'\n", + } + obs = rewrite_tool.use(env, **patch) + + assert rewrite_tool.rewrite_success, f"Rewrite failed: {obs.observation}" + # We don't assert the entire diff (more brittle); just key substrings. + assert f"The file `{filename}` has been updated successfully." in obs.observation + assert "def added():" in obs.observation + + with open(env.working_dir / filename, "r") as f: + content = f.read() + assert content == "def added():\n return 'created'\n"