From 5dbbb5b1016ceaea95cfa65e50d6b4c745c2f239 Mon Sep 17 00:00:00 2001 From: samwaseda Date: Sun, 3 Aug 2025 07:29:45 +0000 Subject: [PATCH 01/10] Implement compress --- pyiron_snippets/files.py | 21 +++++++++++++++++++++ tests/unit/test_files.py | 11 +++++++++++ 2 files changed, 32 insertions(+) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index 4e28bd5..254bcc2 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -1,5 +1,6 @@ from __future__ import annotations +import tarfile from pathlib import Path @@ -95,3 +96,23 @@ def remove_files(self, *files: str): path = self.get_path(file) if path.is_file(): path.unlink() + + def compress( + self, exclude_files: list[str | Path] | None = None + ): + directory = self.path.resolve() + output_tar_path = directory.with_suffix(".tar.gz") + if output_tar_path.exists(): + return + if exclude_files is None: + exclude_files = [] + else: + exclude_files = [Path(f) for f in exclude_files] + exclude_set = {f.resolve() if f.is_absolute() else (directory / f).resolve() for f in exclude_files} + + with tarfile.open(output_tar_path, "w:gz") as tar: + for file in directory.rglob("*"): + if file.is_file(): + if file.resolve() not in exclude_set: + arcname = file.relative_to(directory) + tar.add(file, arcname=arcname) diff --git a/tests/unit/test_files.py b/tests/unit/test_files.py index 20ab917..1257ab6 100644 --- a/tests/unit/test_files.py +++ b/tests/unit/test_files.py @@ -92,6 +92,17 @@ def test_remove(self): msg="Should be able to remove just one file", ) + def test_compress(self): + self.directory.write(file_name="test1.txt", content="something") + self.directory.compress() + self.assertTrue( + Path("test.tar.gz").exists(), + msg="Compressed file should be created", + ) + # Test that compressing again does not overwrite the existing file + self.directory.compress() + self.assertTrue(Path("test.tar.gz").exists()) + if __name__ == "__main__": unittest.main() From f8dea6596d92ad965720a5f087c791cf5ec256e5 Mon Sep 17 00:00:00 2001 From: samwaseda Date: Sun, 3 Aug 2025 07:36:40 +0000 Subject: [PATCH 02/10] Add tests for exclude files --- tests/unit/test_files.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/unit/test_files.py b/tests/unit/test_files.py index 1257ab6..e5f9c9c 100644 --- a/tests/unit/test_files.py +++ b/tests/unit/test_files.py @@ -1,4 +1,5 @@ import pickle +import tarfile import unittest from pathlib import Path @@ -94,6 +95,7 @@ def test_remove(self): def test_compress(self): self.directory.write(file_name="test1.txt", content="something") + self.directory.write(file_name="test2.txt", content="something") self.directory.compress() self.assertTrue( Path("test.tar.gz").exists(), @@ -102,6 +104,12 @@ def test_compress(self): # Test that compressing again does not overwrite the existing file self.directory.compress() self.assertTrue(Path("test.tar.gz").exists()) + Path("test.tar.gz").unlink() + self.directory.compress(exclude_files=["test1.txt"]) + with tarfile.open("test.tar.gz", "r:*") as f: + content = [name for name in f.getnames()] + self.assertNotIn("test1.txt", content, msg="Excluded file should not be in archive") + self.assertIn("test2.txt", content, msg="Included file should be in archive") if __name__ == "__main__": From 3c4253fe355c31fb7a9213d3b2d85940fbef7694 Mon Sep 17 00:00:00 2001 From: samwaseda Date: Sun, 3 Aug 2025 07:37:20 +0000 Subject: [PATCH 03/10] black --- pyiron_snippets/files.py | 9 +++++---- tests/unit/test_files.py | 8 ++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index 254bcc2..f124373 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -97,9 +97,7 @@ def remove_files(self, *files: str): if path.is_file(): path.unlink() - def compress( - self, exclude_files: list[str | Path] | None = None - ): + def compress(self, exclude_files: list[str | Path] | None = None): directory = self.path.resolve() output_tar_path = directory.with_suffix(".tar.gz") if output_tar_path.exists(): @@ -108,7 +106,10 @@ def compress( exclude_files = [] else: exclude_files = [Path(f) for f in exclude_files] - exclude_set = {f.resolve() if f.is_absolute() else (directory / f).resolve() for f in exclude_files} + exclude_set = { + f.resolve() if f.is_absolute() else (directory / f).resolve() + for f in exclude_files + } with tarfile.open(output_tar_path, "w:gz") as tar: for file in directory.rglob("*"): diff --git a/tests/unit/test_files.py b/tests/unit/test_files.py index e5f9c9c..6b601ad 100644 --- a/tests/unit/test_files.py +++ b/tests/unit/test_files.py @@ -108,8 +108,12 @@ def test_compress(self): self.directory.compress(exclude_files=["test1.txt"]) with tarfile.open("test.tar.gz", "r:*") as f: content = [name for name in f.getnames()] - self.assertNotIn("test1.txt", content, msg="Excluded file should not be in archive") - self.assertIn("test2.txt", content, msg="Included file should be in archive") + self.assertNotIn( + "test1.txt", content, msg="Excluded file should not be in archive" + ) + self.assertIn( + "test2.txt", content, msg="Included file should be in archive" + ) if __name__ == "__main__": From 656a738b813db0c70e9e425fc7b999cd140a35c2 Mon Sep 17 00:00:00 2001 From: samwaseda Date: Sun, 3 Aug 2025 07:44:18 +0000 Subject: [PATCH 04/10] ruff and mypy --- pyiron_snippets/files.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index f124373..c6bfa2e 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -43,11 +43,12 @@ def categorize_folder_items(folder_path): class DirectoryObject: def __init__(self, directory: str | Path | DirectoryObject): if isinstance(directory, str): - self.path = Path(directory) + path = Path(directory) elif isinstance(directory, Path): - self.path = directory + path = directory elif isinstance(directory, DirectoryObject): - self.path = directory.path + path = directory.path + self.path = cast(Path, path) self.create() self._protected = False @@ -113,7 +114,6 @@ def compress(self, exclude_files: list[str | Path] | None = None): with tarfile.open(output_tar_path, "w:gz") as tar: for file in directory.rglob("*"): - if file.is_file(): - if file.resolve() not in exclude_set: - arcname = file.relative_to(directory) - tar.add(file, arcname=arcname) + if file.is_file() and file.resolve() not in exclude_set: + arcname = file.relative_to(directory) + tar.add(file, arcname=arcname) From d3fafc7811ffcf45958836043f296b335589134c Mon Sep 17 00:00:00 2001 From: samwaseda Date: Sun, 3 Aug 2025 07:46:00 +0000 Subject: [PATCH 05/10] forgot to add cast --- pyiron_snippets/files.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index c6bfa2e..38b5927 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -2,6 +2,7 @@ import tarfile from pathlib import Path +from typing import cast def delete_files_and_directories_recursively(path): From 92cbe4cd76aa3cbb61f19878a724a51822677d22 Mon Sep 17 00:00:00 2001 From: samwaseda Date: Sun, 3 Aug 2025 07:52:25 +0000 Subject: [PATCH 06/10] other tricks --- pyiron_snippets/files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index 38b5927..a89696d 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -49,7 +49,7 @@ def __init__(self, directory: str | Path | DirectoryObject): path = directory elif isinstance(directory, DirectoryObject): path = directory.path - self.path = cast(Path, path) + self.path: Path = path self.create() self._protected = False @@ -110,7 +110,7 @@ def compress(self, exclude_files: list[str | Path] | None = None): exclude_files = [Path(f) for f in exclude_files] exclude_set = { f.resolve() if f.is_absolute() else (directory / f).resolve() - for f in exclude_files + for f in cast(list[Path], exclude_files) } with tarfile.open(output_tar_path, "w:gz") as tar: From 38189fac60f45607f805191e1576e84ffcda8a58 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 4 Aug 2025 08:41:37 +0200 Subject: [PATCH 07/10] correct tests? --- pyiron_snippets/files.py | 4 ++++ tests/unit/test_files.py | 23 ++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index a89696d..481394d 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -113,8 +113,12 @@ def compress(self, exclude_files: list[str | Path] | None = None): for f in cast(list[Path], exclude_files) } + files_to_delete = [] with tarfile.open(output_tar_path, "w:gz") as tar: for file in directory.rglob("*"): if file.is_file() and file.resolve() not in exclude_set: arcname = file.relative_to(directory) tar.add(file, arcname=arcname) + files_to_delete.append(file) + for file in files_to_delete: + file.unlink() diff --git a/tests/unit/test_files.py b/tests/unit/test_files.py index 6b601ad..a83d26d 100644 --- a/tests/unit/test_files.py +++ b/tests/unit/test_files.py @@ -96,16 +96,8 @@ def test_remove(self): def test_compress(self): self.directory.write(file_name="test1.txt", content="something") self.directory.write(file_name="test2.txt", content="something") - self.directory.compress() - self.assertTrue( - Path("test.tar.gz").exists(), - msg="Compressed file should be created", - ) - # Test that compressing again does not overwrite the existing file - self.directory.compress() - self.assertTrue(Path("test.tar.gz").exists()) - Path("test.tar.gz").unlink() self.directory.compress(exclude_files=["test1.txt"]) + self.assertTrue(Path("test.tar.gz").exists()) with tarfile.open("test.tar.gz", "r:*") as f: content = [name for name in f.getnames()] self.assertNotIn( @@ -114,6 +106,19 @@ def test_compress(self): self.assertIn( "test2.txt", content, msg="Included file should be in archive" ) + self.assertFalse( + self.directory.file_exists("test2.txt"), + msg="Compressed files should not be in the directory", + ) + self.assertTrue( + self.directory.file_exists("test1.txt"), + msg="Excluded file should still be in the directory", + ) + # Test that compressing again does not raise an error + self.directory.compress() + self.assertTrue(Path("test.tar.gz").exists()) + while Path("test.tar.gz").exists(): + Path("test.tar.gz").unlink() if __name__ == "__main__": From cc852840825e52cb54392dfddb29587f701c5a92 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 4 Aug 2025 08:43:49 +0200 Subject: [PATCH 08/10] delete archive first --- tests/unit/test_files.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/test_files.py b/tests/unit/test_files.py index a83d26d..599a137 100644 --- a/tests/unit/test_files.py +++ b/tests/unit/test_files.py @@ -94,6 +94,8 @@ def test_remove(self): ) def test_compress(self): + while Path("test.tar.gz").exists(): + Path("test.tar.gz").unlink() self.directory.write(file_name="test1.txt", content="something") self.directory.write(file_name="test2.txt", content="something") self.directory.compress(exclude_files=["test1.txt"]) From fa9a337493248b673a8e3052d6314cad4893e317 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 4 Aug 2025 08:56:13 +0200 Subject: [PATCH 09/10] add decompress --- pyiron_snippets/files.py | 10 +++++++++- tests/unit/test_files.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index 481394d..b0b4b4c 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -112,7 +112,6 @@ def compress(self, exclude_files: list[str | Path] | None = None): f.resolve() if f.is_absolute() else (directory / f).resolve() for f in cast(list[Path], exclude_files) } - files_to_delete = [] with tarfile.open(output_tar_path, "w:gz") as tar: for file in directory.rglob("*"): @@ -122,3 +121,12 @@ def compress(self, exclude_files: list[str | Path] | None = None): files_to_delete.append(file) for file in files_to_delete: file.unlink() + + def decompress(self): + directory = self.path.resolve() + tar_path = directory.with_suffix(".tar.gz") + if not tar_path.exists(): + return + with tarfile.open(tar_path, "r:gz") as tar: + tar.extractall(path=directory) + tar_path.unlink() diff --git a/tests/unit/test_files.py b/tests/unit/test_files.py index 599a137..cc75548 100644 --- a/tests/unit/test_files.py +++ b/tests/unit/test_files.py @@ -119,8 +119,15 @@ def test_compress(self): # Test that compressing again does not raise an error self.directory.compress() self.assertTrue(Path("test.tar.gz").exists()) - while Path("test.tar.gz").exists(): - Path("test.tar.gz").unlink() + self.directory.decompress() + self.assertTrue( + self.directory.file_exists("test2.txt"), + msg="Decompressed files should be back in the directory", + ) + self.assertFalse( + Path("test.tar.gz").exists(), + msg="Archive should be deleted after decompression", + ) if __name__ == "__main__": From 0e60d494c8d4f5ab4ce39c5d87a2c0c7ff2204bd Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 4 Aug 2025 08:58:48 +0200 Subject: [PATCH 10/10] Add filter because it issues a warning --- pyiron_snippets/files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_snippets/files.py b/pyiron_snippets/files.py index b0b4b4c..46cf8e6 100644 --- a/pyiron_snippets/files.py +++ b/pyiron_snippets/files.py @@ -128,5 +128,5 @@ def decompress(self): if not tar_path.exists(): return with tarfile.open(tar_path, "r:gz") as tar: - tar.extractall(path=directory) + tar.extractall(path=directory, filter="fully_trusted") tar_path.unlink()