Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
UNRELEASED
----------

* `#240 <https://github.com/ESSS/pytest-regressions/issues/240>`__: ``file_regression.check`` now short-circuits the pass path with an in-memory byte-exact comparison, skipping the ``.obtained`` write and ``difflib`` when contents already match the expected file. Suites with many ``file_regression`` checks see several-times-faster run times; behaviour on mismatch (and when a custom ``check_fn`` is supplied) is unchanged.

2.10.0
------

Expand Down
8 changes: 8 additions & 0 deletions src/pytest_regressions/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def perform_regression_check(
with_test_class_names: bool = False,
obtained_filename: Optional["os.PathLike[str]"] = None,
dump_aux_fn: Callable[[Path], list[str]] = lambda filename: [],
fast_equal_fn: Callable[[Path], bool] | None = None,
) -> None:
"""
First run of this check will generate a expected file. Following attempts will always try to
Expand All @@ -124,6 +125,10 @@ def perform_regression_check(
the basename.
:param obtained_filename: complete path to use to write the obtained file. By
default will prepend `.obtained` before the file extension.
:param fast_equal_fn: Optional function receiving the expected file path and returning
Copy link
Copy Markdown
Member

@nicoddemus nicoddemus Apr 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this extra complexity?

Seems like the short-circuit could just convert the input into bytes, and compare directly with the written file (as bytes).

This is an optimization: if for some reason the user has changed the line ending or the encoding, then the short circuit will fail, and fallback to the standard path of doing the full comparison.

``True`` when the in-memory contents already match the expected file byte-exact. When
provided and it returns ``True``, ``dump_fn`` and ``check_fn`` are skipped, avoiding the
``.obtained`` write on the pass path.
..see: `data_regression.Check` for `basename` and `fullpath` arguments.
"""
import re
Expand Down Expand Up @@ -171,6 +176,9 @@ def make_location_message(banner: str, filename: Path, aux_files: list[str]) ->
)
pytest.fail(msg)
else:
if fast_equal_fn is not None and not force_regen and fast_equal_fn(filename):
return

if obtained_filename is None:
if fullpath:
obtained_filename = (datadir / basename).with_suffix(
Expand Down
33 changes: 33 additions & 0 deletions src/pytest_regressions/file_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,23 @@
from pytest_datadir.plugin import LazyDataDir


def _encode_as_dumped(
contents: str,
encoding: str | None,
newline: str | None,
) -> bytes:
"""Return *contents* as the bytes ``open(..., "w", encoding=encoding,
newline=newline)`` would write, without touching disk.
"""
if newline is None:
translated = contents.replace("\n", os.linesep)
elif newline in ("", "\n"):
translated = contents
else:
translated = contents.replace("\n", newline)
return translated.encode(encoding or "utf-8")


class FileRegressionFixture:
"""
Implementation of `file_regression` fixture.
Expand Down Expand Up @@ -78,6 +95,7 @@ def check(
type(contents).__name__
)

user_supplied_check_fn = check_fn is not None
if check_fn is None:
if binary:

Expand All @@ -97,6 +115,20 @@ def dump_fn(filename: Path) -> None:
with open(str(filename), mode, encoding=encoding, newline=newline) as f:
f.write(contents)

fast_equal_fn: Callable[[Path], bool] | None = None
if not user_supplied_check_fn:
if binary:
assert isinstance(contents, bytes)
expected_bytes = contents
else:
assert isinstance(contents, str)
expected_bytes = _encode_as_dumped(
contents=contents, encoding=encoding, newline=newline
)
fast_equal_fn = (
lambda expected: expected.read_bytes() == expected_bytes
) # noqa: E731

assert check_fn is not None
perform_regression_check(
datadir=self.datadir,
Expand All @@ -110,6 +142,7 @@ def dump_fn(filename: Path) -> None:
force_regen=self.force_regen,
with_test_class_names=self.with_test_class_names,
obtained_filename=obtained_filename,
fast_equal_fn=fast_equal_fn,
)

# non-PEP 8 alias used internally at ESSS
Expand Down
88 changes: 88 additions & 0 deletions tests/test_file_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,94 @@ def test_binary_and_text_error(file_regression: FileRegressionFixture):
file_regression.check("", encoding="UTF-8", binary=True)


def test_skips_obtained_write_on_match(
file_regression: FileRegressionFixture, tmp_path
):
"""When ``contents`` already matches the expected file, the
``.obtained`` sidecar is not written.
"""
golden = tmp_path / "golden.txt"
golden.write_text("hello\nworld\n", newline="")
obtained = tmp_path / "golden.obtained.txt"

file_regression.check(
"hello\nworld\n",
extension=".txt",
newline="",
fullpath=golden,
obtained_filename=obtained,
)

assert not obtained.exists()


def test_skips_obtained_write_on_match_binary(
file_regression: FileRegressionFixture, tmp_path
):
"""Same short-circuit for ``binary=True`` contents."""
golden = tmp_path / "golden.bin"
golden.write_bytes(b"\x00\x01\x02payload\xff")
obtained = tmp_path / "golden.obtained.bin"

file_regression.check(
b"\x00\x01\x02payload\xff",
binary=True,
extension=".bin",
fullpath=golden,
obtained_filename=obtained,
)

assert not obtained.exists()


def test_writes_obtained_on_mismatch(file_regression: FileRegressionFixture, tmp_path):
"""A mismatch still goes through the standard path and the
``.obtained`` file is written.
"""
golden = tmp_path / "golden.txt"
golden.write_text("expected\n", newline="")
obtained = tmp_path / "golden.obtained.txt"

with pytest.raises(AssertionError, match="FILES DIFFER"):
file_regression.check(
"different\n",
extension=".txt",
newline="",
fullpath=golden,
obtained_filename=obtained,
)

assert obtained.exists()


def test_custom_check_fn_disables_fast_path(
file_regression: FileRegressionFixture, tmp_path
):
"""A user-supplied ``check_fn`` must always receive an obtained
file, even when contents would match byte-exact.
"""
golden = tmp_path / "golden.txt"
golden.write_text("hello\n", newline="")
obtained = tmp_path / "golden.obtained.txt"

calls: list[tuple[str, str]] = []

def my_check(obtained_fn, expected_fn):
calls.append((str(obtained_fn), str(expected_fn)))

file_regression.check(
"hello\n",
extension=".txt",
newline="",
fullpath=golden,
obtained_filename=obtained,
check_fn=my_check,
)

assert len(calls) == 1
assert obtained.exists()


def test_file_regression_workflow(pytester, monkeypatch):
monkeypatch.setattr(sys, "get_data", lambda: "foo", raising=False)
source = """
Expand Down