Fast path for `FileRegressionFixture.check` when contents already match

## Summary

On the pass path (the golden file exists and matches the obtained contents),
`FileRegressionFixture.check` still:

1. Writes an `.obtained` file to disk via `dump_fn`.
2. Reads both the `.obtained` file and the expected file back from disk.
3. Splits both into lines and compares them with `==`.
4. Sets up `difflib.unified_diff` / `difflib.HtmlDiff` machinery that is
   only needed on a mismatch.

For suites with thousands of fast, text-only regression checks the disk
round-trip dominates wall-clock time. Short-circuiting the match case by
encoding `contents` in memory and comparing it byte-exact against the
expected file — falling back to the current code path on mismatch or
when `--force-regen` / `--regen-all` is set — preserves the existing
error output while removing the `.obtained` write on the hot path.

## Reproducer

`test_bench.py`:

```python
"""Benchmark ``FileRegressionFixture.check`` pass path vs in-memory
fast paths (splitlines equality and byte-exact), both for the match
and the miss case.
"""

import time
from pathlib import Path

import pytest
from pytest_regressions.file_regression import FileRegressionFixture


ITERATIONS = 1000
LINES = 200
CONTENTS = "\n".join(f"line {i} with some payload text" for i in range(LINES)) + "\n"


@pytest.fixture(name="golden")
def fixture_golden(tmp_path: Path) -> Path:
    path = tmp_path / "golden.txt"
    path.write_text(CONTENTS)
    return path


def _fast_path_splitlines(
    *,
    file_regression: FileRegressionFixture,
    contents: str,
    golden_path: Path,
    extension: str,
    newline: str | None,
) -> None:
    config = file_regression.request.config
    regen = file_regression.force_regen or bool(
        config.getoption(name="regen_all")
        or config.getoption(name="force_regen"),
    )
    if (
        not regen
        and golden_path.is_file()
        and contents.splitlines() == golden_path.read_text().splitlines()
    ):
        return
    file_regression.check(
        contents=contents,
        extension=extension,
        newline=newline,
        fullpath=golden_path,
    )


def _fast_path_byte_exact(
    *,
    file_regression: FileRegressionFixture,
    contents: str,
    golden_path: Path,
    extension: str,
    encoding: str | None,
    newline: str | None,
) -> None:
    config = file_regression.request.config
    regen = file_regression.force_regen or bool(
        config.getoption(name="regen_all")
        or config.getoption(name="force_regen"),
    )
    if not regen and golden_path.is_file():
        expected = golden_path.read_bytes()
        encoded = contents.encode(encoding or "utf-8")
        if newline not in (None, ""):
            encoded = encoded.replace(b"\n", newline.encode(encoding or "utf-8"))
        if expected == encoded:
            return
    file_regression.check(
        contents=contents,
        extension=extension,
        encoding=encoding,
        newline=newline,
        fullpath=golden_path,
    )


def test_upstream(
    file_regression: FileRegressionFixture,
    golden: Path,
) -> None:
    start = time.perf_counter()
    for _ in range(ITERATIONS):
        file_regression.check(
            contents=CONTENTS,
            extension=".txt",
            newline="",
            fullpath=golden,
        )
    elapsed = time.perf_counter() - start
    print(
        f"\nupstream file_regression.check: "
        f"{ITERATIONS} calls in {elapsed:.3f}s "
        f"({elapsed / ITERATIONS * 1e6:.1f} us/call)"
    )


def test_fast_path_splitlines(
    file_regression: FileRegressionFixture,
    golden: Path,
) -> None:
    start = time.perf_counter()
    for _ in range(ITERATIONS):
        _fast_path_splitlines(
            file_regression=file_regression,
            contents=CONTENTS,
            golden_path=golden,
            extension=".txt",
            newline="",
        )
    elapsed = time.perf_counter() - start
    print(
        f"\nfast path (splitlines equality): "
        f"{ITERATIONS} calls in {elapsed:.3f}s "
        f"({elapsed / ITERATIONS * 1e6:.1f} us/call)"
    )


def test_fast_path_byte_exact(
    file_regression: FileRegressionFixture,
    golden: Path,
) -> None:
    start = time.perf_counter()
    for _ in range(ITERATIONS):
        _fast_path_byte_exact(
            file_regression=file_regression,
            contents=CONTENTS,
            golden_path=golden,
            extension=".txt",
            encoding=None,
            newline="",
        )
    elapsed = time.perf_counter() - start
    print(
        f"\nfast path (byte-exact, no .obtained write): "
        f"{ITERATIONS} calls in {elapsed:.3f}s "
        f"({elapsed / ITERATIONS * 1e6:.1f} us/call)"
    )


MISS_CONTENTS = CONTENTS + "extra line\n"
MISS_ITERATIONS = 200


def test_miss_upstream(
    file_regression: FileRegressionFixture,
    golden: Path,
) -> None:
    start = time.perf_counter()
    for _ in range(MISS_ITERATIONS):
        with pytest.raises(AssertionError):
            file_regression.check(
                contents=MISS_CONTENTS,
                extension=".txt",
                newline="",
                fullpath=golden,
            )
    elapsed = time.perf_counter() - start
    print(
        f"\nmiss upstream file_regression.check: "
        f"{MISS_ITERATIONS} calls in {elapsed:.3f}s "
        f"({elapsed / MISS_ITERATIONS * 1e6:.1f} us/call)"
    )


def test_miss_fast_path_splitlines(
    file_regression: FileRegressionFixture,
    golden: Path,
) -> None:
    start = time.perf_counter()
    for _ in range(MISS_ITERATIONS):
        with pytest.raises(AssertionError):
            _fast_path_splitlines(
                file_regression=file_regression,
                contents=MISS_CONTENTS,
                golden_path=golden,
                extension=".txt",
                newline="",
            )
    elapsed = time.perf_counter() - start
    print(
        f"\nmiss fast path (splitlines equality, then fall through): "
        f"{MISS_ITERATIONS} calls in {elapsed:.3f}s "
        f"({elapsed / MISS_ITERATIONS * 1e6:.1f} us/call)"
    )


def test_miss_fast_path_byte_exact(
    file_regression: FileRegressionFixture,
    golden: Path,
) -> None:
    start = time.perf_counter()
    for _ in range(MISS_ITERATIONS):
        with pytest.raises(AssertionError):
            _fast_path_byte_exact(
                file_regression=file_regression,
                contents=MISS_CONTENTS,
                golden_path=golden,
                extension=".txt",
                encoding=None,
                newline="",
            )
    elapsed = time.perf_counter() - start
    print(
        f"\nmiss fast path (byte-exact, then fall through): "
        f"{MISS_ITERATIONS} calls in {elapsed:.3f}s "
        f"({elapsed / MISS_ITERATIONS * 1e6:.1f} us/call)"
    )
```

Run with:

```
pytest test_bench.py -s
```

## Timings

- `pytest-regressions` 2.10.0
- Python 3.13.9
- macOS (Darwin 25.4.0, Apple Silicon), SSD
- 200-line text golden, 1000 iterations per run, 3 runs

### Match case (1000 iterations)

| Run | Upstream `check` | Byte-exact fast path | Speedup |
| --- | ---------------- | -------------------- | ------- |
| 1   | 86.8 us/call     | 11.9 us/call         | 7.3x    |
| 2   | 99.0 us/call     | 11.9 us/call         | 8.3x    |
| 3   | 93.9 us/call     | 12.9 us/call         | 7.3x    |

In a downstream suite with ~13k `file_regression.check` calls per run,
the fast path removed several seconds of wall-clock time end to end.

### Miss case (200 iterations)

When `contents` does not match the golden, the fast path falls through
to the existing `check` code path. The added overhead is one
`read_bytes()` + one `encode()` before the fallthrough; the ~800 us of
`.obtained` write and `difflib.HtmlDiff` rendering dominate either way.

| Run | Miss upstream | Miss byte-exact fast path | Overhead |
| --- | ------------- | ------------------------- | -------- |
| 1   | 882.4 us/call | 910.1 us/call             | +28 us   |
| 2   | 816.9 us/call | 861.1 us/call             | +44 us   |
| 3   | 864.4 us/call | 841.2 us/call             | −23 us   |

The overhead is within run-to-run noise (~1–3%) on a branch that only
runs for failing tests.

## Proposal

Add an in-memory byte-exact short-circuit to
`FileRegressionFixture.check` for the non-binary, default-`check_fn`
path. Encode `contents` using the same `encoding` and `newline`
semantics that `dump_fn` would apply, compare the result against
`expected_filename.read_bytes()`, and return without calling `dump_fn`
or `check_fn` on a match. On mismatch or when `--force-regen` /
`--regen-all` is set, fall through to the current code path so the
`.obtained` file and HTML diff are produced exactly as today.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fast path for `FileRegressionFixture.check` when contents already match #240

Summary

Reproducer

Timings

Match case (1000 iterations)

Miss case (200 iterations)

Proposal

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Run	Upstream `check`	Byte-exact fast path	Speedup
1	86.8 us/call	11.9 us/call	7.3x
2	99.0 us/call	11.9 us/call	8.3x
3	93.9 us/call	12.9 us/call	7.3x

Run	Miss upstream	Miss byte-exact fast path	Overhead
1	882.4 us/call	910.1 us/call	+28 us
2	816.9 us/call	861.1 us/call	+44 us
3	864.4 us/call	841.2 us/call	−23 us

Fast path for FileRegressionFixture.check when contents already match #240

Description

Summary

Reproducer

Timings

Match case (1000 iterations)

Miss case (200 iterations)

Proposal

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

Fast path for `FileRegressionFixture.check` when contents already match #240