diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5e31d9b..7cfe754 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,8 @@ +UNRELEASED +---------- + +* `#240 `__: ``file_regression.check`` now short-circuits the pass path with an in-memory byte-exact comparison, skipping the ``.obtained`` write and ``difflib`` when contents already match the expected file. Suites with many ``file_regression`` checks see several-times-faster run times; behaviour on mismatch (and when a custom ``check_fn`` is supplied) is unchanged. + 2.10.0 ------ diff --git a/src/pytest_regressions/common.py b/src/pytest_regressions/common.py index de2f23d..740e7a1 100644 --- a/src/pytest_regressions/common.py +++ b/src/pytest_regressions/common.py @@ -99,6 +99,7 @@ def perform_regression_check( with_test_class_names: bool = False, obtained_filename: Optional["os.PathLike[str]"] = None, dump_aux_fn: Callable[[Path], list[str]] = lambda filename: [], + fast_equal_fn: Callable[[Path], bool] | None = None, ) -> None: """ First run of this check will generate a expected file. Following attempts will always try to @@ -124,6 +125,10 @@ def perform_regression_check( the basename. :param obtained_filename: complete path to use to write the obtained file. By default will prepend `.obtained` before the file extension. + :param fast_equal_fn: Optional function receiving the expected file path and returning + ``True`` when the in-memory contents already match the expected file byte-exact. When + provided and it returns ``True``, ``dump_fn`` and ``check_fn`` are skipped, avoiding the + ``.obtained`` write on the pass path. ..see: `data_regression.Check` for `basename` and `fullpath` arguments. """ import re @@ -171,6 +176,9 @@ def make_location_message(banner: str, filename: Path, aux_files: list[str]) -> ) pytest.fail(msg) else: + if fast_equal_fn is not None and not force_regen and fast_equal_fn(filename): + return + if obtained_filename is None: if fullpath: obtained_filename = (datadir / basename).with_suffix( diff --git a/src/pytest_regressions/file_regression.py b/src/pytest_regressions/file_regression.py index dcee8dc..1943928 100644 --- a/src/pytest_regressions/file_regression.py +++ b/src/pytest_regressions/file_regression.py @@ -14,6 +14,23 @@ from pytest_datadir.plugin import LazyDataDir +def _encode_as_dumped( + contents: str, + encoding: str | None, + newline: str | None, +) -> bytes: + """Return *contents* as the bytes ``open(..., "w", encoding=encoding, + newline=newline)`` would write, without touching disk. + """ + if newline is None: + translated = contents.replace("\n", os.linesep) + elif newline in ("", "\n"): + translated = contents + else: + translated = contents.replace("\n", newline) + return translated.encode(encoding or "utf-8") + + class FileRegressionFixture: """ Implementation of `file_regression` fixture. @@ -78,6 +95,7 @@ def check( type(contents).__name__ ) + user_supplied_check_fn = check_fn is not None if check_fn is None: if binary: @@ -97,6 +115,20 @@ def dump_fn(filename: Path) -> None: with open(str(filename), mode, encoding=encoding, newline=newline) as f: f.write(contents) + fast_equal_fn: Callable[[Path], bool] | None = None + if not user_supplied_check_fn: + if binary: + assert isinstance(contents, bytes) + expected_bytes = contents + else: + assert isinstance(contents, str) + expected_bytes = _encode_as_dumped( + contents=contents, encoding=encoding, newline=newline + ) + fast_equal_fn = ( + lambda expected: expected.read_bytes() == expected_bytes + ) # noqa: E731 + assert check_fn is not None perform_regression_check( datadir=self.datadir, @@ -110,6 +142,7 @@ def dump_fn(filename: Path) -> None: force_regen=self.force_regen, with_test_class_names=self.with_test_class_names, obtained_filename=obtained_filename, + fast_equal_fn=fast_equal_fn, ) # non-PEP 8 alias used internally at ESSS diff --git a/tests/test_file_regression.py b/tests/test_file_regression.py index cebb58c..d84ca0b 100644 --- a/tests/test_file_regression.py +++ b/tests/test_file_regression.py @@ -25,6 +25,94 @@ def test_binary_and_text_error(file_regression: FileRegressionFixture): file_regression.check("", encoding="UTF-8", binary=True) +def test_skips_obtained_write_on_match( + file_regression: FileRegressionFixture, tmp_path +): + """When ``contents`` already matches the expected file, the + ``.obtained`` sidecar is not written. + """ + golden = tmp_path / "golden.txt" + golden.write_text("hello\nworld\n", newline="") + obtained = tmp_path / "golden.obtained.txt" + + file_regression.check( + "hello\nworld\n", + extension=".txt", + newline="", + fullpath=golden, + obtained_filename=obtained, + ) + + assert not obtained.exists() + + +def test_skips_obtained_write_on_match_binary( + file_regression: FileRegressionFixture, tmp_path +): + """Same short-circuit for ``binary=True`` contents.""" + golden = tmp_path / "golden.bin" + golden.write_bytes(b"\x00\x01\x02payload\xff") + obtained = tmp_path / "golden.obtained.bin" + + file_regression.check( + b"\x00\x01\x02payload\xff", + binary=True, + extension=".bin", + fullpath=golden, + obtained_filename=obtained, + ) + + assert not obtained.exists() + + +def test_writes_obtained_on_mismatch(file_regression: FileRegressionFixture, tmp_path): + """A mismatch still goes through the standard path and the + ``.obtained`` file is written. + """ + golden = tmp_path / "golden.txt" + golden.write_text("expected\n", newline="") + obtained = tmp_path / "golden.obtained.txt" + + with pytest.raises(AssertionError, match="FILES DIFFER"): + file_regression.check( + "different\n", + extension=".txt", + newline="", + fullpath=golden, + obtained_filename=obtained, + ) + + assert obtained.exists() + + +def test_custom_check_fn_disables_fast_path( + file_regression: FileRegressionFixture, tmp_path +): + """A user-supplied ``check_fn`` must always receive an obtained + file, even when contents would match byte-exact. + """ + golden = tmp_path / "golden.txt" + golden.write_text("hello\n", newline="") + obtained = tmp_path / "golden.obtained.txt" + + calls: list[tuple[str, str]] = [] + + def my_check(obtained_fn, expected_fn): + calls.append((str(obtained_fn), str(expected_fn))) + + file_regression.check( + "hello\n", + extension=".txt", + newline="", + fullpath=golden, + obtained_filename=obtained, + check_fn=my_check, + ) + + assert len(calls) == 1 + assert obtained.exists() + + def test_file_regression_workflow(pytester, monkeypatch): monkeypatch.setattr(sys, "get_data", lambda: "foo", raising=False) source = """