From 689568269405af47765777b225433730e49083a2 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 23 Dec 2025 09:38:23 -0600 Subject: [PATCH] Add hypothesis harness, tarfile strategies --- .gitignore | 216 ++ Makefile | 4 +- hypothesis_strategies/__init__.py | 0 hypothesis_strategies/fspaths.py | 71 + hypothesis_strategies/tar.py | 71 + tarfile_hypothesis.py | 15 + vendored/__init__.py | 0 vendored/_hypothesis_ftz_detector.py | 159 + vendored/_hypothesis_globals.py | 28 + vendored/_hypothesis_pytestplugin.py | 480 +++ vendored/hypothesis/__init__.py | 63 + vendored/hypothesis/_settings.py | 1250 ++++++++ vendored/hypothesis/configuration.py | 107 + vendored/hypothesis/control.py | 354 +++ vendored/hypothesis/core.py | 2409 +++++++++++++++ vendored/hypothesis/database.py | 1287 ++++++++ vendored/hypothesis/entry_points.py | 39 + vendored/hypothesis/errors.py | 317 ++ vendored/hypothesis/extra/__init__.py | 9 + vendored/hypothesis/extra/_array_helpers.py | 700 +++++ vendored/hypothesis/extra/_patching.py | 341 +++ vendored/hypothesis/extra/array_api.py | 1156 +++++++ vendored/hypothesis/extra/cli.py | 339 +++ vendored/hypothesis/extra/codemods.py | 280 ++ vendored/hypothesis/extra/dateutil.py | 64 + vendored/hypothesis/extra/django/__init__.py | 32 + vendored/hypothesis/extra/django/_fields.py | 417 +++ vendored/hypothesis/extra/django/_impl.py | 230 ++ vendored/hypothesis/extra/dpcontracts.py | 49 + vendored/hypothesis/extra/ghostwriter.py | 1941 ++++++++++++ vendored/hypothesis/extra/lark.py | 247 ++ vendored/hypothesis/extra/numpy.py | 1441 +++++++++ vendored/hypothesis/extra/pandas/__init__.py | 20 + vendored/hypothesis/extra/pandas/impl.py | 761 +++++ vendored/hypothesis/extra/pytestplugin.py | 19 + vendored/hypothesis/extra/pytz.py | 61 + vendored/hypothesis/extra/redis.py | 149 + vendored/hypothesis/internal/__init__.py | 9 + vendored/hypothesis/internal/cache.py | 349 +++ vendored/hypothesis/internal/cathetus.py | 62 + vendored/hypothesis/internal/charmap.py | 337 ++ vendored/hypothesis/internal/compat.py | 308 ++ .../internal/conjecture/__init__.py | 9 + .../hypothesis/internal/conjecture/choice.py | 622 ++++ .../hypothesis/internal/conjecture/data.py | 1355 +++++++++ .../internal/conjecture/datatree.py | 1188 ++++++++ .../internal/conjecture/dfa/__init__.py | 674 ++++ .../internal/conjecture/dfa/lstar.py | 497 +++ .../hypothesis/internal/conjecture/engine.py | 1665 ++++++++++ .../hypothesis/internal/conjecture/floats.py | 219 ++ .../internal/conjecture/junkdrawer.py | 563 ++++ .../internal/conjecture/optimiser.py | 204 ++ .../hypothesis/internal/conjecture/pareto.py | 361 +++ .../conjecture/provider_conformance.py | 502 +++ .../internal/conjecture/providers.py | 1209 ++++++++ .../internal/conjecture/shrinker.py | 1764 +++++++++++ .../internal/conjecture/shrinking/__init__.py | 18 + .../internal/conjecture/shrinking/bytes.py | 23 + .../conjecture/shrinking/choicetree.py | 161 + .../conjecture/shrinking/collection.py | 82 + .../internal/conjecture/shrinking/common.py | 180 ++ .../internal/conjecture/shrinking/floats.py | 93 + .../internal/conjecture/shrinking/integer.py | 75 + .../internal/conjecture/shrinking/ordering.py | 96 + .../internal/conjecture/shrinking/string.py | 24 + .../hypothesis/internal/conjecture/utils.py | 397 +++ vendored/hypothesis/internal/constants_ast.py | 274 ++ vendored/hypothesis/internal/coverage.py | 109 + vendored/hypothesis/internal/detection.py | 41 + vendored/hypothesis/internal/entropy.py | 267 ++ vendored/hypothesis/internal/escalation.py | 175 ++ vendored/hypothesis/internal/filtering.py | 364 +++ vendored/hypothesis/internal/floats.py | 209 ++ vendored/hypothesis/internal/healthcheck.py | 21 + vendored/hypothesis/internal/intervalsets.py | 311 ++ .../hypothesis/internal/lambda_sources.py | 430 +++ vendored/hypothesis/internal/observability.py | 564 ++++ vendored/hypothesis/internal/reflection.py | 529 ++++ vendored/hypothesis/internal/scrutineer.py | 328 ++ vendored/hypothesis/internal/validation.py | 127 + vendored/hypothesis/provisional.py | 204 ++ vendored/hypothesis/py.typed | 0 vendored/hypothesis/reporting.py | 61 + vendored/hypothesis/stateful.py | 1178 +++++++ vendored/hypothesis/statistics.py | 131 + vendored/hypothesis/strategies/__init__.py | 141 + .../strategies/_internal/__init__.py | 16 + .../hypothesis/strategies/_internal/attrs.py | 218 ++ .../strategies/_internal/collections.py | 390 +++ .../hypothesis/strategies/_internal/core.py | 2701 +++++++++++++++++ .../strategies/_internal/datetime.py | 474 +++ .../strategies/_internal/deferred.py | 93 + .../strategies/_internal/featureflags.py | 132 + .../strategies/_internal/flatmapped.py | 68 + .../strategies/_internal/functions.py | 62 + .../strategies/_internal/ipaddress.py | 118 + .../hypothesis/strategies/_internal/lazy.py | 176 ++ .../hypothesis/strategies/_internal/misc.py | 138 + .../strategies/_internal/numbers.py | 528 ++++ .../hypothesis/strategies/_internal/random.py | 442 +++ .../strategies/_internal/recursive.py | 118 + .../hypothesis/strategies/_internal/regex.py | 576 ++++ .../hypothesis/strategies/_internal/shared.py | 56 + .../strategies/_internal/strategies.py | 1230 ++++++++ .../strategies/_internal/strings.py | 380 +++ .../hypothesis/strategies/_internal/types.py | 1157 +++++++ .../hypothesis/strategies/_internal/utils.py | 224 ++ vendored/hypothesis/utils/__init__.py | 12 + vendored/hypothesis/utils/conventions.py | 23 + vendored/hypothesis/utils/dynamicvariables.py | 39 + vendored/hypothesis/utils/terminal.py | 38 + vendored/hypothesis/utils/threading.py | 52 + vendored/hypothesis/vendor/__init__.py | 9 + vendored/hypothesis/vendor/pretty.py | 984 ++++++ .../vendor/tlds-alpha-by-domain.txt | 1439 +++++++++ vendored/hypothesis/version.py | 12 + vendored/requirements.in | 1 + vendored/requirements.txt | 14 + vendored/sortedcontainers/__init__.py | 74 + vendored/sortedcontainers/sorteddict.py | 812 +++++ vendored/sortedcontainers/sortedlist.py | 2646 ++++++++++++++++ vendored/sortedcontainers/sortedset.py | 733 +++++ 122 files changed, 49820 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 hypothesis_strategies/__init__.py create mode 100644 hypothesis_strategies/fspaths.py create mode 100644 hypothesis_strategies/tar.py create mode 100644 tarfile_hypothesis.py create mode 100644 vendored/__init__.py create mode 100644 vendored/_hypothesis_ftz_detector.py create mode 100644 vendored/_hypothesis_globals.py create mode 100644 vendored/_hypothesis_pytestplugin.py create mode 100644 vendored/hypothesis/__init__.py create mode 100644 vendored/hypothesis/_settings.py create mode 100644 vendored/hypothesis/configuration.py create mode 100644 vendored/hypothesis/control.py create mode 100644 vendored/hypothesis/core.py create mode 100644 vendored/hypothesis/database.py create mode 100644 vendored/hypothesis/entry_points.py create mode 100644 vendored/hypothesis/errors.py create mode 100644 vendored/hypothesis/extra/__init__.py create mode 100644 vendored/hypothesis/extra/_array_helpers.py create mode 100644 vendored/hypothesis/extra/_patching.py create mode 100644 vendored/hypothesis/extra/array_api.py create mode 100644 vendored/hypothesis/extra/cli.py create mode 100644 vendored/hypothesis/extra/codemods.py create mode 100644 vendored/hypothesis/extra/dateutil.py create mode 100644 vendored/hypothesis/extra/django/__init__.py create mode 100644 vendored/hypothesis/extra/django/_fields.py create mode 100644 vendored/hypothesis/extra/django/_impl.py create mode 100644 vendored/hypothesis/extra/dpcontracts.py create mode 100644 vendored/hypothesis/extra/ghostwriter.py create mode 100644 vendored/hypothesis/extra/lark.py create mode 100644 vendored/hypothesis/extra/numpy.py create mode 100644 vendored/hypothesis/extra/pandas/__init__.py create mode 100644 vendored/hypothesis/extra/pandas/impl.py create mode 100644 vendored/hypothesis/extra/pytestplugin.py create mode 100644 vendored/hypothesis/extra/pytz.py create mode 100644 vendored/hypothesis/extra/redis.py create mode 100644 vendored/hypothesis/internal/__init__.py create mode 100644 vendored/hypothesis/internal/cache.py create mode 100644 vendored/hypothesis/internal/cathetus.py create mode 100644 vendored/hypothesis/internal/charmap.py create mode 100644 vendored/hypothesis/internal/compat.py create mode 100644 vendored/hypothesis/internal/conjecture/__init__.py create mode 100644 vendored/hypothesis/internal/conjecture/choice.py create mode 100644 vendored/hypothesis/internal/conjecture/data.py create mode 100644 vendored/hypothesis/internal/conjecture/datatree.py create mode 100644 vendored/hypothesis/internal/conjecture/dfa/__init__.py create mode 100644 vendored/hypothesis/internal/conjecture/dfa/lstar.py create mode 100644 vendored/hypothesis/internal/conjecture/engine.py create mode 100644 vendored/hypothesis/internal/conjecture/floats.py create mode 100644 vendored/hypothesis/internal/conjecture/junkdrawer.py create mode 100644 vendored/hypothesis/internal/conjecture/optimiser.py create mode 100644 vendored/hypothesis/internal/conjecture/pareto.py create mode 100644 vendored/hypothesis/internal/conjecture/provider_conformance.py create mode 100644 vendored/hypothesis/internal/conjecture/providers.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinker.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/__init__.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/bytes.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/choicetree.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/collection.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/common.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/floats.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/integer.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/ordering.py create mode 100644 vendored/hypothesis/internal/conjecture/shrinking/string.py create mode 100644 vendored/hypothesis/internal/conjecture/utils.py create mode 100644 vendored/hypothesis/internal/constants_ast.py create mode 100644 vendored/hypothesis/internal/coverage.py create mode 100644 vendored/hypothesis/internal/detection.py create mode 100644 vendored/hypothesis/internal/entropy.py create mode 100644 vendored/hypothesis/internal/escalation.py create mode 100644 vendored/hypothesis/internal/filtering.py create mode 100644 vendored/hypothesis/internal/floats.py create mode 100644 vendored/hypothesis/internal/healthcheck.py create mode 100644 vendored/hypothesis/internal/intervalsets.py create mode 100644 vendored/hypothesis/internal/lambda_sources.py create mode 100644 vendored/hypothesis/internal/observability.py create mode 100644 vendored/hypothesis/internal/reflection.py create mode 100644 vendored/hypothesis/internal/scrutineer.py create mode 100644 vendored/hypothesis/internal/validation.py create mode 100644 vendored/hypothesis/provisional.py create mode 100644 vendored/hypothesis/py.typed create mode 100644 vendored/hypothesis/reporting.py create mode 100644 vendored/hypothesis/stateful.py create mode 100644 vendored/hypothesis/statistics.py create mode 100644 vendored/hypothesis/strategies/__init__.py create mode 100644 vendored/hypothesis/strategies/_internal/__init__.py create mode 100644 vendored/hypothesis/strategies/_internal/attrs.py create mode 100644 vendored/hypothesis/strategies/_internal/collections.py create mode 100644 vendored/hypothesis/strategies/_internal/core.py create mode 100644 vendored/hypothesis/strategies/_internal/datetime.py create mode 100644 vendored/hypothesis/strategies/_internal/deferred.py create mode 100644 vendored/hypothesis/strategies/_internal/featureflags.py create mode 100644 vendored/hypothesis/strategies/_internal/flatmapped.py create mode 100644 vendored/hypothesis/strategies/_internal/functions.py create mode 100644 vendored/hypothesis/strategies/_internal/ipaddress.py create mode 100644 vendored/hypothesis/strategies/_internal/lazy.py create mode 100644 vendored/hypothesis/strategies/_internal/misc.py create mode 100644 vendored/hypothesis/strategies/_internal/numbers.py create mode 100644 vendored/hypothesis/strategies/_internal/random.py create mode 100644 vendored/hypothesis/strategies/_internal/recursive.py create mode 100644 vendored/hypothesis/strategies/_internal/regex.py create mode 100644 vendored/hypothesis/strategies/_internal/shared.py create mode 100644 vendored/hypothesis/strategies/_internal/strategies.py create mode 100644 vendored/hypothesis/strategies/_internal/strings.py create mode 100644 vendored/hypothesis/strategies/_internal/types.py create mode 100644 vendored/hypothesis/strategies/_internal/utils.py create mode 100644 vendored/hypothesis/utils/__init__.py create mode 100644 vendored/hypothesis/utils/conventions.py create mode 100644 vendored/hypothesis/utils/dynamicvariables.py create mode 100644 vendored/hypothesis/utils/terminal.py create mode 100644 vendored/hypothesis/utils/threading.py create mode 100644 vendored/hypothesis/vendor/__init__.py create mode 100644 vendored/hypothesis/vendor/pretty.py create mode 100644 vendored/hypothesis/vendor/tlds-alpha-by-domain.txt create mode 100644 vendored/hypothesis/version.py create mode 100644 vendored/requirements.in create mode 100644 vendored/requirements.txt create mode 100644 vendored/sortedcontainers/__init__.py create mode 100644 vendored/sortedcontainers/sorteddict.py create mode 100644 vendored/sortedcontainers/sortedlist.py create mode 100644 vendored/sortedcontainers/sortedset.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..42f20c5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,216 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml diff --git a/Makefile b/Makefile index a550011..f81c708 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-zipfile fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml +all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags) @@ -26,6 +26,8 @@ fuzzer-zipfile: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zipfile.py\"" -ldl $(LDFLAGS) -o fuzzer-zipfile fuzzer-tarfile: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"tarfile.py\"" -ldl $(LDFLAGS) -o fuzzer-tarfile +fuzzer-tarfile-hypothesis: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"tarfile_hypothesis.py\"" -ldl $(LDFLAGS) -o fuzzer-tarfile-hypothesis fuzzer-configparser: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"configparser.py\"" -ldl $(LDFLAGS) -o fuzzer-configparser fuzzer-tomllib: diff --git a/hypothesis_strategies/__init__.py b/hypothesis_strategies/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hypothesis_strategies/fspaths.py b/hypothesis_strategies/fspaths.py new file mode 100644 index 0000000..6b868e6 --- /dev/null +++ b/hypothesis_strategies/fspaths.py @@ -0,0 +1,71 @@ +# Copyright 2017 Christoph Reiter +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import sys + +from vendored.hypothesis.strategies import ( + composite, + sampled_from, + lists, + integers, + binary, + randoms, +) + + +@composite +def fspaths(draw, max_size: int | None = None) -> str: + """A hypothesis strategy which gives valid path values. + + Valid path values are everything which when passed to open() will not raise + ValueError or TypeError (but might raise OSError due to file system or + operating system restrictions). + """ + + if os.name == "nt": + hight_surrogate = integers(min_value=0xD800, max_value=0xDBFF).map( + lambda i: chr(i) + ) + low_surrogate = integers(min_value=0xDC00, max_value=0xDFFF).map( + lambda i: chr(i) + ) + uni_char = integers(min_value=1, max_value=sys.maxunicode).map(lambda i: chr(i)) + any_char = sampled_from( + [draw(uni_char), draw(hight_surrogate), draw(low_surrogate)] + ) + any_text = lists(any_char, max_size=max_size).map(lambda l: "".join(l)) + path_text = any_text + else: + unix_path_bytes = binary(max_size=max_size).map(lambda b: b.replace(b"\x00", b" ")) + path_text = unix_path_bytes.map( + lambda b: b.decode(sys.getfilesystemencoding(), "surrogateescape") + ) + r = draw(randoms()) + + def shuffle_text(t): + l = list(t) + r.shuffle(l) + return "".join(l) + + path_text = path_text.map(shuffle_text) + + return draw(path_text) diff --git a/hypothesis_strategies/tar.py b/hypothesis_strategies/tar.py new file mode 100644 index 0000000..f5f8bad --- /dev/null +++ b/hypothesis_strategies/tar.py @@ -0,0 +1,71 @@ +import io +import tarfile + +import vendored.hypothesis.strategies as st +from .fspaths import fspaths + + +def tar_integers( + format: int, digits: int = 1, allow_negative: bool = False +) -> st.SearchStrategy[int]: + """tar has a unique way of encoding integers that is format-dependent + and based on the number of "digits" allowed for a value. + """ + if digits <= 0: + raise ValueError("Digits must be greater than one.") + if format == tarfile.GNU_FORMAT: + min_value = -(256 ** (digits - 1)) if allow_negative else 0 + max_value = (256 ** (digits - 1)) - 1 + else: + min_value = 0 + max_value = (4**digits) - 1 + return st.integers(min_value=min_value, max_value=max_value) + + +@st.composite +def tar_archives(draw): + buf = io.BytesIO() + format = draw( + st.sampled_from((tarfile.GNU_FORMAT, tarfile.PAX_FORMAT, tarfile.USTAR_FORMAT)) + ) + tar = tarfile.TarFile(fileobj=buf, format=format, mode="w") + types = list(tarfile.REGULAR_TYPES) + + for _ in range(draw(st.integers(min_value=1, max_value=10))): + info = tarfile.TarInfo( + name=draw(fspaths(max_size=tarfile.LENGTH_NAME)) + ) + fileobj = None + + info.type = draw(st.sampled_from(types)) + info.mode = draw(tar_integers(format=format, digits=8)) + info.uid = draw(tar_integers(format=format, digits=8)) + info.gid = draw(tar_integers(format=format, digits=8)) + info.mtime = draw(tar_integers(format=format, digits=12)) + info.devmajor = draw(tar_integers(format=format, digits=8)) + info.devminor = draw(tar_integers(format=format, digits=8)) + + if draw(st.booleans()): + info.linkname = draw(fspaths(max_size=tarfile.LENGTH_LINK)) + + def maybe_set_pax_header(obj, name, value): + if draw(st.booleans()): + obj.pax_headers[name] = value + + if format == tarfile.PAX_FORMAT: + maybe_set_pax_header(info, "uname", draw(st.text(max_size=32))) + maybe_set_pax_header(info, "gname", draw(st.text(max_size=32))) + maybe_set_pax_header( + info, + "path", + draw(fspaths(max_size=tarfile.LENGTH_NAME)), + ) + maybe_set_pax_header( + info, + "linkpath", + draw(fspaths(max_size=tarfile.LENGTH_LINK)), + ) + + tar.addfile(info, fileobj=fileobj) + + return buf.getvalue() \ No newline at end of file diff --git a/tarfile_hypothesis.py b/tarfile_hypothesis.py new file mode 100644 index 0000000..82a7626 --- /dev/null +++ b/tarfile_hypothesis.py @@ -0,0 +1,15 @@ +import io +import tarfile + +from vendored.hypothesis import given +from hypothesis_strategies import tar + + + +@given(tar.tar_archives()) +def tar_archive_fuzz_target(tar_archive: bytes) -> None: + tarfile.TarFile(fileobj=io.BytesIO(tar_archive)) + + +# Exposes the Hypothesis fuzz target for integrating with OSS-Fuzz. +FuzzerRunOne = tar_archive_fuzz_target.hypothesis.fuzz_one_input diff --git a/vendored/__init__.py b/vendored/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vendored/_hypothesis_ftz_detector.py b/vendored/_hypothesis_ftz_detector.py new file mode 100644 index 0000000..5eaadad --- /dev/null +++ b/vendored/_hypothesis_ftz_detector.py @@ -0,0 +1,159 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +This is a toolkit for determining which module set the "flush to zero" flag. + +For details, see the docstring and comments in `identify_ftz_culprit()`. This module +is defined outside the main Hypothesis namespace so that we can avoid triggering +import of Hypothesis itself from each subprocess which must import the worker function. +""" + +import importlib +import sys +from collections.abc import Callable +from typing import TYPE_CHECKING, TypeAlias + +if TYPE_CHECKING: + from multiprocessing import Queue + +FTZCulprits: TypeAlias = tuple[bool | None, set[str]] + + +KNOWN_EVER_CULPRITS = ( + # https://moyix.blogspot.com/2022/09/someones-been-messing-with-my-subnormals.html + # fmt: off + "archive-pdf-tools", "bgfx-python", "bicleaner-ai-glove", "BTrees", "cadbiom", + "ctranslate2", "dyNET", "dyNET38", "gevent", "glove-python-binary", "higra", + "hybridq", "ikomia", "ioh", "jij-cimod", "lavavu", "lavavu-osmesa", "MulticoreTSNE", + "neural-compressor", "nwhy", "openjij", "openturns", "perfmetrics", "pHashPy", + "pyace-lite", "pyapr", "pycompadre", "pycompadre-serial", "PyKEP", "pykep", + "pylimer-tools", "pyqubo", "pyscf", "PyTAT", "python-prtree", "qiskit-aer", + "qiskit-aer-gpu", "RelStorage", "sail-ml", "segmentation", "sente", "sinr", + "snapml", "superman", "symengine", "systran-align", "texture-tool", "tsne-mp", + "xcsf", + # fmt: on +) + + +def flush_to_zero() -> bool: + # If this subnormal number compares equal to zero we have a problem + return 2.0**-1073 == 0 + + +def run_in_process(fn: Callable[..., FTZCulprits], *args: object) -> FTZCulprits: + import multiprocessing as mp + + mp.set_start_method("spawn", force=True) + q: Queue[FTZCulprits] = mp.Queue() + p = mp.Process(target=target, args=(q, fn, *args)) + p.start() + retval = q.get() + p.join() + return retval + + +def target( + q: "Queue[FTZCulprits]", fn: Callable[..., FTZCulprits], *args: object +) -> None: + q.put(fn(*args)) + + +def always_imported_modules() -> FTZCulprits: + return flush_to_zero(), set(sys.modules) + + +def modules_imported_by(mod: str) -> FTZCulprits: + """Return the set of modules imported transitively by mod.""" + before = set(sys.modules) + try: + importlib.import_module(mod) + except Exception: + return None, set() + imports = set(sys.modules) - before + return flush_to_zero(), imports + + +# We don't want to redo all the expensive process-spawning checks when we've already +# done them, so we cache known-good packages and a known-FTZ result if we have one. +KNOWN_FTZ = None +CHECKED_CACHE = set() + + +def identify_ftz_culprits() -> str: + """Find the modules in sys.modules which cause "mod" to be imported.""" + # If we've run this function before, return the same result. + global KNOWN_FTZ + if KNOWN_FTZ: + return KNOWN_FTZ + # Start by determining our baseline: the FTZ and sys.modules state in a fresh + # process which has only imported this module and nothing else. + always_enables_ftz, always_imports = run_in_process(always_imported_modules) + if always_enables_ftz: + raise RuntimeError("Python is always in FTZ mode, even without imports!") + CHECKED_CACHE.update(always_imports) + + # Next, we'll search through sys.modules looking for a package (or packages) such + # that importing them in a new process sets the FTZ state. As a heuristic, we'll + # start with packages known to have ever enabled FTZ, then top-level packages as + # a way to eliminate large fractions of the search space relatively quickly. + def key(name: str) -> tuple[bool, int, str]: + """Prefer known-FTZ modules, then top-level packages, then alphabetical.""" + return (name not in KNOWN_EVER_CULPRITS, name.count("."), name) + + # We'll track the set of modules to be checked, and those which do trigger FTZ. + candidates = set(sys.modules) - CHECKED_CACHE + triggering_modules = {} + while candidates: + mod = min(candidates, key=key) + candidates.discard(mod) + enables_ftz, imports = run_in_process(modules_imported_by, mod) + imports -= CHECKED_CACHE + if enables_ftz: + triggering_modules[mod] = imports + candidates &= imports + else: + candidates -= imports + CHECKED_CACHE.update(imports) + + # We only want to report the 'top level' packages which enable FTZ - for example, + # if the enabling code is in `a.b`, and `a` in turn imports `a.b`, we prefer to + # report `a`. On the other hand, if `a` does _not_ import `a.b`, as is the case + # for `hypothesis.extra.*` modules, then `a` will not be in `triggering_modules` + # and we'll report `a.b` here instead. + prefixes = tuple(n + "." for n in triggering_modules) + result = {k for k in triggering_modules if not k.startswith(prefixes)} + + # Suppose that `bar` enables FTZ, and `foo` imports `bar`. At this point we're + # tracking both, but only want to report the latter. + for a in sorted(result): + for b in sorted(result): + if a in triggering_modules[b] and b not in triggering_modules[a]: + result.discard(b) + + # There may be a cyclic dependency which that didn't handle, or simply two + # separate modules which both enable FTZ. We already gave up comprehensive + # reporting for speed above (`candidates &= imports`), so we'll also buy + # simpler reporting by arbitrarily selecting the alphabetically first package. + KNOWN_FTZ = min(result) # Cache the result - it's likely this will trigger again! + return KNOWN_FTZ + + +if __name__ == "__main__": + # This would be really really annoying to write automated tests for, so I've + # done some manual exploratory testing: `pip install grequests gevent==21.12.0`, + # and call print() as desired to observe behavior. + import grequests # noqa + + # To test without skipping to a known answer, uncomment the following line and + # change the last element of key from `name` to `-len(name)` so that we check + # grequests before gevent. + # KNOWN_EVER_CULPRITS = [c for c in KNOWN_EVER_CULPRITS if c != "gevent"] + print(identify_ftz_culprits()) diff --git a/vendored/_hypothesis_globals.py b/vendored/_hypothesis_globals.py new file mode 100644 index 0000000..1f3eb6f --- /dev/null +++ b/vendored/_hypothesis_globals.py @@ -0,0 +1,28 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +Module for globals shared between plugin(s) and the main hypothesis module, without +depending on either. This file should have no imports outside of stdlib. +""" + +import os + +in_initialization = 1 +"""If >0, indicates that hypothesis is still initializing (importing or loading +the test environment). `import hypothesis` will cause this number to be decremented, +and the pytest plugin increments at load time, then decrements it just before each test +session starts. However, this leads to a hole in coverage if another pytest plugin +imports hypothesis before our plugin is loaded. HYPOTHESIS_EXTEND_INITIALIZATION may +be set to pre-increment the value on behalf of _hypothesis_pytestplugin, plugging the +hole.""" + +if os.environ.get("HYPOTHESIS_EXTEND_INITIALIZATION"): + in_initialization += 1 diff --git a/vendored/_hypothesis_pytestplugin.py b/vendored/_hypothesis_pytestplugin.py new file mode 100644 index 0000000..340bf90 --- /dev/null +++ b/vendored/_hypothesis_pytestplugin.py @@ -0,0 +1,480 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +The pytest plugin for Hypothesis. + +We move this from the old location at `hypothesis.extra.pytestplugin` so that it +can be loaded by Pytest without importing Hypothesis. In turn, this means that +Hypothesis will not load our own third-party plugins (with associated side-effects) +unless and until the user explicitly runs `import hypothesis`. + +See https://github.com/HypothesisWorks/hypothesis/issues/3140 for details. +""" + +import base64 +import json +import os +import sys +import warnings +from fnmatch import fnmatch +from inspect import signature + +import _hypothesis_globals +import pytest + +try: + from _pytest.junitxml import xml_key +except ImportError: + xml_key = "_xml" # type: ignore + +LOAD_PROFILE_OPTION = "--hypothesis-profile" +VERBOSITY_OPTION = "--hypothesis-verbosity" +PRINT_STATISTICS_OPTION = "--hypothesis-show-statistics" +SEED_OPTION = "--hypothesis-seed" +EXPLAIN_OPTION = "--hypothesis-explain" + +_VERBOSITY_NAMES = ["quiet", "normal", "verbose", "debug"] +_ALL_OPTIONS = [ + LOAD_PROFILE_OPTION, + VERBOSITY_OPTION, + PRINT_STATISTICS_OPTION, + SEED_OPTION, + EXPLAIN_OPTION, +] +STATS_KEY = "_hypothesis_stats" +FAILING_EXAMPLES_KEY = "_hypothesis_failing_examples" + + +class StoringReporter: + def __init__(self, config): + assert "hypothesis" in sys.modules + from hypothesis.reporting import default + + self.report = default + self.config = config + self.results = [] + + def __call__(self, msg): + if self.config.getoption("capture", "fd") == "no": + self.report(msg) + if not isinstance(msg, str): + msg = repr(msg) + self.results.append(msg) + + +# Avoiding distutils.version.LooseVersion due to +# https://github.com/HypothesisWorks/hypothesis/issues/2490 +if tuple(map(int, pytest.__version__.split(".")[:2])) < (4, 6): # pragma: no cover + import warnings + + PYTEST_TOO_OLD_MESSAGE = """ + You are using pytest version %s. Hypothesis tests work with any test + runner, but our pytest plugin requires pytest 4.6 or newer. + Note that the pytest developers no longer support your version either! + Disabling the Hypothesis pytest plugin... + """ + warnings.warn(PYTEST_TOO_OLD_MESSAGE % (pytest.__version__,), stacklevel=1) + +else: + # Restart side-effect detection as early as possible, to maximize coverage. We + # need balanced increment/decrement in configure/sessionstart to support nested + # pytest (e.g. runpytest_inprocess), so this early increment in effect replaces + # the first one in pytest_configure. + if not os.environ.get("HYPOTHESIS_EXTEND_INITIALIZATION"): + _hypothesis_globals.in_initialization += 1 + if "hypothesis" in sys.modules: + # Some other plugin has imported hypothesis, so we'll check if there + # have been undetected side-effects and warn if so. + from hypothesis.configuration import notice_initialization_restarted + + notice_initialization_restarted() + + def pytest_addoption(parser): + group = parser.getgroup("hypothesis", "Hypothesis") + group.addoption( + LOAD_PROFILE_OPTION, + action="store", + help="Load in a registered hypothesis.settings profile", + ) + group.addoption( + VERBOSITY_OPTION, + action="store", + choices=_VERBOSITY_NAMES, + help="Override profile with verbosity setting specified", + ) + group.addoption( + PRINT_STATISTICS_OPTION, + action="store_true", + help="Configure when statistics are printed", + default=False, + ) + group.addoption( + SEED_OPTION, + action="store", + help="Set a seed to use for all Hypothesis tests", + ) + group.addoption( + EXPLAIN_OPTION, + action="store_true", + help="Enable the `explain` phase for failing Hypothesis tests", + default=False, + ) + + def _any_hypothesis_option(config): + return bool(any(config.getoption(opt) for opt in _ALL_OPTIONS)) + + def pytest_report_header(config): + if not ( + config.option.verbose >= 1 + or "hypothesis" in sys.modules + or _any_hypothesis_option(config) + ): + return None + + from hypothesis import Verbosity, settings + + if config.option.verbose < 1 and settings.default.verbosity < Verbosity.verbose: + return None + settings_str = settings.default.show_changed() + if settings_str != "": + settings_str = f" -> {settings_str}" + return ( + f"hypothesis profile {settings.get_current_profile_name()!r}{settings_str}" + ) + + def pytest_configure(config): + config.addinivalue_line("markers", "hypothesis: Tests which use hypothesis.") + if not _any_hypothesis_option(config): + return + from hypothesis import Phase, Verbosity, core, settings + + profile = config.getoption(LOAD_PROFILE_OPTION) + if profile: + settings.load_profile(profile) + verbosity_name = config.getoption(VERBOSITY_OPTION) + if verbosity_name and verbosity_name != settings.default.verbosity.name: + verbosity_value = Verbosity[verbosity_name] + name = ( + f"{settings.get_current_profile_name()}-with-{verbosity_name}-verbosity" + ) + # register_profile creates a new profile, exactly like the current one, + # with the extra values given (in this case 'verbosity') + settings.register_profile(name, verbosity=verbosity_value) + settings.load_profile(name) + if ( + config.getoption(EXPLAIN_OPTION) + and Phase.explain not in settings.default.phases + ): + name = f"{settings.get_current_profile_name()}-with-explain-phase" + phases = (*settings.default.phases, Phase.explain) + settings.register_profile(name, phases=phases) + settings.load_profile(name) + + seed = config.getoption(SEED_OPTION) + if seed is not None: + try: + seed = int(seed) + except ValueError: + pass + core.global_force_seed = seed + + @pytest.hookimpl(hookwrapper=True) + def pytest_runtest_call(item): + __tracebackhide__ = True + if not (hasattr(item, "obj") and "hypothesis" in sys.modules): + yield + return + + from hypothesis import core, is_hypothesis_test + + # See https://github.com/pytest-dev/pytest/issues/9159 + core.pytest_shows_exceptiongroups = ( + getattr(pytest, "version_tuple", ())[:2] >= (7, 2) + or item.config.getoption("tbstyle", "auto") == "native" + ) + core.running_under_pytest = True + + if not is_hypothesis_test(item.obj): + # If @given was not applied, check whether other hypothesis + # decorators were applied, and raise an error if they were. + # We add this frame of indirection to enable __tracebackhide__. + def raise_hypothesis_usage_error(msg): + raise InvalidArgument(msg) + + if getattr(item.obj, "is_hypothesis_strategy_function", False): + from hypothesis.errors import InvalidArgument + + raise_hypothesis_usage_error( + f"{item.nodeid} is a function that returns a Hypothesis strategy, " + "but pytest has collected it as a test function. This is useless " + "as the function body will never be executed. To define a test " + "function, use @given instead of @composite." + ) + message = "Using `@%s` on a test without `@given` is completely pointless." + for name, attribute in [ + ("example", "hypothesis_explicit_examples"), + ("seed", "_hypothesis_internal_use_seed"), + ("settings", "_hypothesis_internal_settings_applied"), + ("reproduce_example", "_hypothesis_internal_use_reproduce_failure"), + ]: + if hasattr(item.obj, attribute): + from hypothesis.errors import InvalidArgument + + raise_hypothesis_usage_error(message % (name,)) + yield + return + + from hypothesis import HealthCheck, settings as Settings + from hypothesis.internal.escalation import current_pytest_item + from hypothesis.internal.healthcheck import fail_health_check + from hypothesis.reporting import with_reporter + from hypothesis.statistics import collector, describe_statistics + + # Retrieve the settings for this test from the test object, which + # is normally a Hypothesis wrapped_test wrapper. If this doesn't + # work, the test object is probably something weird + # (e.g a stateful test wrapper), so we skip the function-scoped + # fixture check. + settings = getattr( + item.obj, "_hypothesis_internal_use_settings", Settings.default + ) + + # Check for suspicious use of function-scoped fixtures, but only + # if the corresponding health check is not suppressed. + fixture_params = False + if not set(settings.suppress_health_check).issuperset( + {HealthCheck.function_scoped_fixture, HealthCheck.differing_executors} + ): + # Warn about function-scoped fixtures, excluding autouse fixtures because + # the advice is probably not actionable and the status quo seems OK... + # See https://github.com/HypothesisWorks/hypothesis/issues/377 for detail. + argnames = None + for fx_defs in item._request._fixturemanager.getfixtureinfo( + node=item, func=item.function, cls=None + ).name2fixturedefs.values(): + if argnames is None: + argnames = frozenset(signature(item.function).parameters) + for fx in fx_defs: + fixture_params |= bool(fx.params) + if fx.argname not in argnames: + continue + active_fx = item._request._get_active_fixturedef(fx.argname) + if active_fx.scope == "function": + fail_health_check( + settings, + f"{item.nodeid!r} uses a function-scoped fixture {fx.argname!r}." + "\n\n" + "Function-scoped fixtures are not reset between inputs " + "generated by `@given(...)`, which is often surprising and " + "can cause subtle test bugs." + "\n\n" + "If you were expecting the fixture to run separately " + "for each generated input, then unfortunately you " + "will need to find a different way to achieve your " + "goal (for example, replacing the fixture with a similar " + "context manager inside of the test)." + "\n\n" + "If you are confident that your test will work correctly " + "even though the fixture is not reset between generated " + "inputs, you can suppress this health check with " + "@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]). " + "See " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#hypothesis.HealthCheck " + "for details.", + HealthCheck.function_scoped_fixture, + ) + + if fixture_params or (item.get_closest_marker("parametrize") is not None): + # Disable the differing_executors health check due to false alarms: + # see https://github.com/HypothesisWorks/hypothesis/issues/3733 + fn = getattr(item.obj, "__func__", item.obj) + fn._hypothesis_internal_use_settings = Settings( + parent=settings, + suppress_health_check={HealthCheck.differing_executors} + | set(settings.suppress_health_check), + ) + + # Give every parametrized test invocation a unique database key + key = item.nodeid.encode() + item.obj.hypothesis.inner_test._hypothesis_internal_add_digest = key + + store = StoringReporter(item.config) + + def note_statistics(stats): + stats["nodeid"] = item.nodeid + item.hypothesis_statistics = describe_statistics(stats) + + with ( + collector.with_value(note_statistics), + with_reporter(store), + current_pytest_item.with_value(item), + ): + yield + + if store.results: + item.hypothesis_report_information = "\n".join(store.results) + + def _stash_get(config, key, default): + if hasattr(config, "stash"): + # pytest 7 + return config.stash.get(key, default) + elif hasattr(config, "_store"): + # pytest 5.4 + return config._store.get(key, default) + else: + return getattr(config, key, default) + + @pytest.hookimpl(hookwrapper=True) + def pytest_runtest_makereport(item, call): + report = (yield).get_result() + if hasattr(item, "hypothesis_report_information"): + report.sections.append(("Hypothesis", item.hypothesis_report_information)) + if report.when != "teardown": + return + + terminalreporter = item.config.pluginmanager.getplugin("terminalreporter") + + if hasattr(item, "hypothesis_statistics"): + stats = item.hypothesis_statistics + stats_base64 = base64.b64encode(stats.encode()).decode() + + name = "hypothesis-statistics-" + item.nodeid + + # Include hypothesis information to the junit XML report. + # + # Note that when `pytest-xdist` is enabled, `xml_key` is not present in the + # stash, so we don't add anything to the junit XML report in that scenario. + # https://github.com/pytest-dev/pytest/issues/7767#issuecomment-1082436256 + xml = _stash_get(item.config, xml_key, None) + if xml: + xml.add_global_property(name, stats_base64) + + # If there's a terminal report, include our summary stats for each test + if terminalreporter is not None: + report.__dict__[STATS_KEY] = stats + + # If there's an HTML report, include our summary stats for each test + pytest_html = item.config.pluginmanager.getplugin("html") + if pytest_html is not None: # pragma: no cover + report.extra = [ + *getattr(report, "extra", []), + pytest_html.extras.text(stats, name="Hypothesis stats"), + ] + + # This doesn't intrinsically have anything to do with the terminalreporter; + # we're just cargo-culting a way to get strings back to a single function + # even if the test were distributed with pytest-xdist. + failing_examples = getattr(item, FAILING_EXAMPLES_KEY, None) + if failing_examples and terminalreporter is not None: + try: + from hypothesis.extra._patching import FAIL_MSG, get_patch_for + except ImportError: + return + # We'll save this as a triple of [filename, hunk_before, hunk_after]. + triple = get_patch_for(item.obj, [(x, FAIL_MSG) for x in failing_examples]) + if triple is not None: + report.__dict__[FAILING_EXAMPLES_KEY] = json.dumps(triple) + + def pytest_terminal_summary(terminalreporter): + failing_examples = [] + print_stats = terminalreporter.config.getoption(PRINT_STATISTICS_OPTION) + if print_stats: + terminalreporter.section("Hypothesis Statistics") + for reports in terminalreporter.stats.values(): + for report in reports: + stats = report.__dict__.get(STATS_KEY) + if stats and print_stats: + terminalreporter.write_line(stats + "\n\n") + examples = report.__dict__.get(FAILING_EXAMPLES_KEY) + if examples: + failing_examples.append(json.loads(examples)) + + from hypothesis.internal.observability import _WROTE_TO + + if _WROTE_TO: + terminalreporter.section("Hypothesis") + for fname in sorted(_WROTE_TO): + terminalreporter.write_line(f"observations written to {fname}") + + if failing_examples: + # This must have been imported already to write the failing examples + from hypothesis.extra._patching import gc_patches, make_patch, save_patch + + patch = make_patch(failing_examples) + try: + gc_patches() + fname = save_patch(patch) + except Exception: + # fail gracefully if we hit any filesystem or permissions problems + return + if not _WROTE_TO: + terminalreporter.section("Hypothesis") + terminalreporter.write_line( + f"`git apply {fname}` to add failing examples to your code." + ) + + def pytest_collection_modifyitems(items): + if "hypothesis" not in sys.modules: + return + + from hypothesis import is_hypothesis_test + + for item in items: + if isinstance(item, pytest.Function) and is_hypothesis_test(item.obj): + item.add_marker("hypothesis") + + def pytest_sessionstart(session): + # Note: may be called multiple times, so we can go negative + _hypothesis_globals.in_initialization -= 1 + + # Monkeypatch some internals to prevent applying @pytest.fixture() to a + # function which has already been decorated with @hypothesis.given(). + # (the reverse case is already an explicit error in Hypothesis) + # We do this here so that it catches people on old Pytest versions too. + from _pytest import fixtures + + def _ban_given_call(self, function): + if "hypothesis" in sys.modules: + from hypothesis import is_hypothesis_test + + if is_hypothesis_test(function): + raise RuntimeError( + f"Can't apply @pytest.fixture() to {function.__name__} because " + "it is already decorated with @hypothesis.given()" + ) + return _orig_call(self, function) + + _orig_call = fixtures.FixtureFunctionMarker.__call__ + fixtures.FixtureFunctionMarker.__call__ = _ban_given_call # type: ignore + + if int(pytest.__version__.split(".")[0]) >= 7: # pragma: no branch + # Hook has had this signature since Pytest 7.0, so skip on older versions + + def pytest_ignore_collect(collection_path, config): + # Detect, warn about, and mititgate certain misconfigurations; + # this is mostly educational but can also speed up collection. + if ( + (name := collection_path.name) == ".hypothesis" + and collection_path.is_dir() + and not any(fnmatch(name, p) for p in config.getini("norecursedirs")) + ): + warnings.warn( + "Skipping collection of '.hypothesis' directory - this usually " + "means you've explicitly set the `norecursedirs` pytest config " + "option, replacing rather than extending the default ignores.", + stacklevel=1, + ) + return True + return None # let other hooks decide + + +def load(): + """Required for `pluggy` to load a plugin from setuptools entrypoints.""" diff --git a/vendored/hypothesis/__init__.py b/vendored/hypothesis/__init__.py new file mode 100644 index 0000000..0efe439 --- /dev/null +++ b/vendored/hypothesis/__init__.py @@ -0,0 +1,63 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""Hypothesis is a library for writing unit tests which are parametrized by +some source of data. + +It verifies your code against a wide range of input and minimizes any +failing examples it finds. +""" + +import _hypothesis_globals + +from hypothesis._settings import HealthCheck, Phase, Verbosity, settings +from hypothesis.control import ( + assume, + currently_in_test_context, + event, + note, + reject, + target, +) +from hypothesis.core import example, find, given, reproduce_failure, seed +from hypothesis.entry_points import run +from hypothesis.internal.detection import is_hypothesis_test +from hypothesis.internal.entropy import register_random +from hypothesis.utils.conventions import infer +from hypothesis.version import __version__, __version_info__ + +__all__ = [ + "HealthCheck", + "Phase", + "Verbosity", + "__version__", + "__version_info__", + "assume", + "currently_in_test_context", + "event", + "example", + "find", + "given", + "infer", + "is_hypothesis_test", + "note", + "register_random", + "reject", + "reproduce_failure", + "seed", + "settings", + "target", +] + +run() +del run + +_hypothesis_globals.in_initialization -= 1 +del _hypothesis_globals diff --git a/vendored/hypothesis/_settings.py b/vendored/hypothesis/_settings.py new file mode 100644 index 0000000..13c9037 --- /dev/null +++ b/vendored/hypothesis/_settings.py @@ -0,0 +1,1250 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""The settings module configures runtime options for Hypothesis. + +Either an explicit settings object can be used or the default object on +this module can be modified. +""" + +import contextlib +import datetime +import inspect +import os +import warnings +from collections.abc import Collection, Generator, Sequence +from enum import Enum, EnumMeta, unique +from functools import total_ordering +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Optional, + TypeVar, +) + +from hypothesis.errors import ( + HypothesisDeprecationWarning, + InvalidArgument, +) +from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.internal.validation import check_type, try_convert +from hypothesis.utils.conventions import not_set +from hypothesis.utils.dynamicvariables import DynamicVariable + +if TYPE_CHECKING: + from hypothesis.database import ExampleDatabase + +__all__ = ["settings"] + +T = TypeVar("T") +all_settings: list[str] = [ + "max_examples", + "derandomize", + "database", + "verbosity", + "phases", + "stateful_step_count", + "report_multiple_bugs", + "suppress_health_check", + "deadline", + "print_blob", + "backend", +] + + +@unique +@total_ordering +class Verbosity(Enum): + """Options for the |settings.verbosity| argument to |@settings|.""" + + quiet = "quiet" + """ + Hypothesis will not print any output, not even the final falsifying example. + """ + + normal = "normal" + """ + Standard verbosity. Hypothesis will print the falsifying example, alongside + any notes made with |note| (only for the falsfying example). + """ + + verbose = "verbose" + """ + Increased verbosity. In addition to everything in |Verbosity.normal|, Hypothesis + will: + + * Print each test case as it tries it + * Print any notes made with |note| for each test case + * Print each shrinking attempt + * Print all explicit failing examples when using |@example|, instead of only + the simplest one + """ + + debug = "debug" + """ + Even more verbosity. Useful for debugging Hypothesis internals. You probably + don't want this. + """ + + @classmethod + def _missing_(cls, value): + # deprecation pathway for integer values. Can be removed in Hypothesis 7. + if isinstance(value, int) and not isinstance(value, bool): + int_to_name = {0: "quiet", 1: "normal", 2: "verbose", 3: "debug"} + if value in int_to_name: + note_deprecation( + f"Passing Verbosity({value}) as an integer is deprecated. " + "Hypothesis now treats Verbosity values as strings, not integers. " + f"Use Verbosity.{int_to_name[value]} instead.", + since="2025-11-05", + has_codemod=False, + stacklevel=2, + ) + return cls(int_to_name[value]) + return None + + def __repr__(self) -> str: + return f"Verbosity.{self.name}" + + @staticmethod + def _int_value(value: "Verbosity") -> int: + # we would just map Verbosity keys, except it's not hashable + mapping = { + Verbosity.quiet.name: 0, + Verbosity.normal.name: 1, + Verbosity.verbose.name: 2, + Verbosity.debug.name: 3, + } + # make sure we don't forget any new verbosity members + assert list(mapping.keys()) == [verbosity.name for verbosity in Verbosity] + return mapping[value.name] + + def __eq__(self, other: Any) -> bool: + if isinstance(other, Verbosity): + return super().__eq__(other) + return Verbosity._int_value(self) == other + + def __gt__(self, other: Any) -> bool: + value1 = Verbosity._int_value(self) + value2 = Verbosity._int_value(other) if isinstance(other, Verbosity) else other + return value1 > value2 + + +@unique +class Phase(Enum): + """Options for the |settings.phases| argument to |@settings|.""" + + explicit = "explicit" + """ + Controls whether explicit examples are run. + """ + + reuse = "reuse" + """ + Controls whether previous examples will be reused. + """ + + generate = "generate" + """ + Controls whether new examples will be generated. + """ + + target = "target" + """ + Controls whether examples will be mutated for targeting. + """ + + shrink = "shrink" + """ + Controls whether examples will be shrunk. + """ + + explain = "explain" + """ + Controls whether Hypothesis attempts to explain test failures. + + The explain phase has two parts, each of which is best-effort - if Hypothesis + can't find a useful explanation, we'll just print the minimal failing example. + """ + + @classmethod + def _missing_(cls, value): + # deprecation pathway for integer values. Can be removed in Hypothesis 7. + if isinstance(value, int) and not isinstance(value, bool): + int_to_name = { + 0: "explicit", + 1: "reuse", + 2: "generate", + 3: "target", + 4: "shrink", + 5: "explain", + } + if value in int_to_name: + note_deprecation( + f"Passing Phase({value}) as an integer is deprecated. " + "Hypothesis now treats Phase values as strings, not integers. " + f"Use Phase.{int_to_name[value]} instead.", + since="2025-11-05", + has_codemod=False, + stacklevel=2, + ) + return cls(int_to_name[value]) + return None + + def __repr__(self) -> str: + return f"Phase.{self.name}" + + +class HealthCheckMeta(EnumMeta): + def __iter__(self): + deprecated = (HealthCheck.return_value, HealthCheck.not_a_test_method) + return iter(x for x in super().__iter__() if x not in deprecated) + + +@unique +class HealthCheck(Enum, metaclass=HealthCheckMeta): + """ + A |HealthCheck| is proactively raised by Hypothesis when Hypothesis detects + that your test has performance problems, which may result in less rigorous + testing than you expect. For example, if your test takes a long time to generate + inputs, or filters away too many inputs using |assume| or |filter|, Hypothesis + will raise a corresponding health check. + + A health check is a proactive warning, not an error. We encourage suppressing + health checks where you have evaluated they will not pose a problem, or where + you have evaluated that fixing the underlying issue is not worthwhile. + + With the exception of |HealthCheck.function_scoped_fixture| and + |HealthCheck.differing_executors|, all health checks warn about performance + problems, not correctness errors. + + Disabling health checks + ----------------------- + + Health checks can be disabled by |settings.suppress_health_check|. To suppress + all health checks, you can pass ``suppress_health_check=list(HealthCheck)``. + + .. seealso:: + + See also the :doc:`/how-to/suppress-healthchecks` how-to. + + Correctness health checks + ------------------------- + + Some health checks report potential correctness errors, rather than performance + problems. + + * |HealthCheck.function_scoped_fixture| indicates that a function-scoped + pytest fixture is used by an |@given| test. Many Hypothesis users expect + function-scoped fixtures to reset once per input, but they actually reset once + per test. We proactively raise |HealthCheck.function_scoped_fixture| to + ensure you have considered this case. + * |HealthCheck.differing_executors| indicates that the same |@given| test has + been executed multiple times with multiple distinct executors. + + We recommend treating these particular health checks with more care, as + suppressing them may result in an unsound test. + """ + + @classmethod + def _missing_(cls, value): + # deprecation pathway for integer values. Can be removed in Hypothesis 7. + if isinstance(value, int) and not isinstance(value, bool): + int_to_name = { + 1: "data_too_large", + 2: "filter_too_much", + 3: "too_slow", + 5: "return_value", + 7: "large_base_example", + 8: "not_a_test_method", + 9: "function_scoped_fixture", + 10: "differing_executors", + 11: "nested_given", + } + if value in int_to_name: + note_deprecation( + f"Passing HealthCheck({value}) as an integer is deprecated. " + "Hypothesis now treats HealthCheck values as strings, not integers. " + f"Use HealthCheck.{int_to_name[value]} instead.", + since="2025-11-05", + has_codemod=False, + stacklevel=2, + ) + return cls(int_to_name[value]) + return None + + def __repr__(self) -> str: + return f"{self.__class__.__name__}.{self.name}" + + @classmethod + def all(cls) -> list["HealthCheck"]: + # Skipping of deprecated attributes is handled in HealthCheckMeta.__iter__ + note_deprecation( + "`HealthCheck.all()` is deprecated; use `list(HealthCheck)` instead.", + since="2023-04-16", + has_codemod=True, + stacklevel=1, + ) + return list(HealthCheck) + + data_too_large = "data_too_large" + """Checks if too many examples are aborted for being too large. + + This is measured by the number of random choices that Hypothesis makes + in order to generate something, not the size of the generated object. + For example, choosing a 100MB object from a predefined list would take + only a few bits, while generating 10KB of JSON from scratch might trigger + this health check. + """ + + filter_too_much = "filter_too_much" + """Check for when the test is filtering out too many examples, either + through use of |assume| or |.filter|, or occasionally for Hypothesis + internal reasons.""" + + too_slow = "too_slow" + """ + Check for when input generation is very slow. Since Hypothesis generates 100 + (by default) inputs per test execution, a slowdown in generating each input + can result in very slow tests overall. + """ + + return_value = "return_value" + """Deprecated; we always error if a test returns a non-None value.""" + + large_base_example = "large_base_example" + """ + Checks if the smallest natural input to your test is very large. This makes + it difficult for Hypothesis to generate good inputs, especially when trying to + shrink failing inputs. + """ + + not_a_test_method = "not_a_test_method" + """Deprecated; we always error if |@given| is applied + to a method defined by :class:`python:unittest.TestCase` (i.e. not a test).""" + + function_scoped_fixture = "function_scoped_fixture" + """Checks if |@given| has been applied to a test + with a pytest function-scoped fixture. Function-scoped fixtures run once + for the whole function, not once per example, and this is usually not what + you want. + + Because of this limitation, tests that need to set up or reset + state for every example need to do so manually within the test itself, + typically using an appropriate context manager. + + Suppress this health check only in the rare case that you are using a + function-scoped fixture that does not need to be reset between individual + examples, but for some reason you cannot use a wider fixture scope + (e.g. session scope, module scope, class scope). + + This check requires the :ref:`Hypothesis pytest plugin`, + which is enabled by default when running Hypothesis inside pytest.""" + + differing_executors = "differing_executors" + """Checks if |@given| has been applied to a test + which is executed by different :ref:`executors`. + If your test function is defined as a method on a class, that class will be + your executor, and subclasses executing an inherited test is a common way + for things to go wrong. + + The correct fix is often to bring the executor instance under the control + of hypothesis by explicit parametrization over, or sampling from, + subclasses, or to refactor so that |@given| is + specified on leaf subclasses.""" + + nested_given = "nested_given" + """Checks if |@given| is used inside another + |@given|. This results in quadratic generation and + shrinking behavior, and can usually be expressed more cleanly by using + :func:`~hypothesis.strategies.data` to replace the inner + |@given|. + + Nesting @given can be appropriate if you set appropriate limits for the + quadratic behavior and cannot easily reexpress the inner function with + :func:`~hypothesis.strategies.data`. To suppress this health check, set + ``suppress_health_check=[HealthCheck.nested_given]`` on the outer + |@given|. Setting it on the inner + |@given| has no effect. If you have more than one + level of nesting, add a suppression for this health check to every + |@given| except the innermost one. + """ + + +class duration(datetime.timedelta): + """A timedelta specifically measured in milliseconds.""" + + def __repr__(self) -> str: + ms = self.total_seconds() * 1000 + return f"timedelta(milliseconds={int(ms) if ms == int(ms) else ms!r})" + + +# see https://adamj.eu/tech/2020/03/09/detect-if-your-tests-are-running-on-ci +# initially from https://github.com/tox-dev/tox/blob/e911788a/src/tox/util/ci.py +_CI_VARS = { + "CI": None, # various, including GitHub Actions, Travis CI, and AppVeyor + # see https://github.com/tox-dev/tox/issues/3442 + "__TOX_ENVIRONMENT_VARIABLE_ORIGINAL_CI": None, + "TF_BUILD": "true", # Azure Pipelines + "bamboo.buildKey": None, # Bamboo + "BUILDKITE": "true", # Buildkite + "CIRCLECI": "true", # Circle CI + "CIRRUS_CI": "true", # Cirrus CI + "CODEBUILD_BUILD_ID": None, # CodeBuild + "GITHUB_ACTIONS": "true", # GitHub Actions + "GITLAB_CI": None, # GitLab CI + "HEROKU_TEST_RUN_ID": None, # Heroku CI + "TEAMCITY_VERSION": None, # TeamCity +} + + +def is_in_ci() -> bool: + return any( + key in os.environ and (value is None or os.environ[key] == value) + for key, value in _CI_VARS.items() + ) + + +default_variable = DynamicVariable[Optional["settings"]](None) + + +def _validate_choices(name: str, value: T, *, choices: Sequence[object]) -> T: + if value not in choices: + msg = f"Invalid {name}, {value!r}. Valid choices: {choices!r}" + raise InvalidArgument(msg) + return value + + +def _validate_enum_value(cls: Any, value: object, *, name: str) -> Any: + try: + return cls(value) + except ValueError: + raise InvalidArgument( + f"{name}={value} is not a valid value. The options " + f"are: {', '.join(repr(m.name) for m in cls)}" + ) from None + + +def _validate_max_examples(max_examples: int) -> int: + check_type(int, max_examples, name="max_examples") + if max_examples < 1: + raise InvalidArgument( + f"max_examples={max_examples!r} must be at least one. If you want " + "to disable generation entirely, use phases=[Phase.explicit] instead." + ) + return max_examples + + +def _validate_database( + database: Optional["ExampleDatabase"], +) -> Optional["ExampleDatabase"]: + from hypothesis.database import ExampleDatabase + + if database is None or isinstance(database, ExampleDatabase): + return database + raise InvalidArgument( + "Arguments to the database setting must be None or an instance of " + "ExampleDatabase. Use one of the database classes in " + "hypothesis.database" + ) + + +def _validate_phases(phases: Collection[Phase]) -> Sequence[Phase]: + phases = try_convert(tuple, phases, "phases") + phases = tuple( + _validate_enum_value(Phase, phase, name="phases") for phase in phases + ) + # sort by definition order + return tuple(phase for phase in list(Phase) if phase in phases) + + +def _validate_stateful_step_count(stateful_step_count: int) -> int: + check_type(int, stateful_step_count, name="stateful_step_count") + if stateful_step_count < 1: + raise InvalidArgument( + f"stateful_step_count={stateful_step_count!r} must be at least one." + ) + return stateful_step_count + + +def _validate_suppress_health_check(suppressions: object) -> tuple[HealthCheck, ...]: + suppressions = try_convert(tuple, suppressions, "suppress_health_check") + for health_check in suppressions: + if health_check in (HealthCheck.return_value, HealthCheck.not_a_test_method): + note_deprecation( + f"The {health_check.name} health check is deprecated, because this is always an error.", + since="2023-03-15", + has_codemod=False, + stacklevel=2, + ) + return tuple( + _validate_enum_value(HealthCheck, health_check, name="suppress_health_check") + for health_check in suppressions + ) + + +def _validate_deadline( + deadline: int | float | datetime.timedelta | None, +) -> duration | None: + if deadline is None: + return deadline + invalid_deadline_error = InvalidArgument( + f"deadline={deadline!r} (type {type(deadline).__name__}) must be a timedelta object, " + "an integer or float number of milliseconds, or None to disable the " + "per-test-case deadline." + ) + if isinstance(deadline, (int, float)): + if isinstance(deadline, bool): + raise invalid_deadline_error + try: + deadline = duration(milliseconds=deadline) + except OverflowError: + raise InvalidArgument( + f"deadline={deadline!r} is invalid, because it is too large to represent " + "as a timedelta. Use deadline=None to disable deadlines." + ) from None + if isinstance(deadline, datetime.timedelta): + if deadline <= datetime.timedelta(0): + raise InvalidArgument( + f"deadline={deadline!r} is invalid, because it is impossible to meet a " + "deadline <= 0. Use deadline=None to disable deadlines." + ) + return duration(seconds=deadline.total_seconds()) + raise invalid_deadline_error + + +def _validate_backend(backend: str) -> str: + if backend not in AVAILABLE_PROVIDERS: + if backend == "crosshair": # pragma: no cover + install = '`pip install "hypothesis[crosshair]"` and try again.' + raise InvalidArgument(f"backend={backend!r} is not available. {install}") + raise InvalidArgument( + f"backend={backend!r} is not available - maybe you need to install a plugin?" + f"\n Installed backends: {sorted(AVAILABLE_PROVIDERS)!r}" + ) + return backend + + +class settingsMeta(type): + def __init__(cls, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def default(cls) -> Optional["settings"]: + v = default_variable.value + if v is not None: + return v + if getattr(settings, "_current_profile", None) is not None: + assert settings._current_profile is not None + settings.load_profile(settings._current_profile) + assert default_variable.value is not None + return default_variable.value + + def __setattr__(cls, name: str, value: object) -> None: + if name == "default": + raise AttributeError( + "Cannot assign to the property settings.default - " + "consider using settings.load_profile instead." + ) + elif not name.startswith("_"): + raise AttributeError( + f"Cannot assign hypothesis.settings.{name}={value!r} - the settings " + "class is immutable. You can change the global default " + "settings with settings.load_profile, or use @settings(...) " + "to decorate your test instead." + ) + super().__setattr__(name, value) + + def __repr__(cls): + return "hypothesis.settings" + + +class settings(metaclass=settingsMeta): + """ + A settings object controls the following aspects of test behavior: + |~settings.max_examples|, |~settings.derandomize|, |~settings.database|, + |~settings.verbosity|, |~settings.phases|, |~settings.stateful_step_count|, + |~settings.report_multiple_bugs|, |~settings.suppress_health_check|, + |~settings.deadline|, |~settings.print_blob|, and |~settings.backend|. + + A settings object can be applied as a decorator to a test function, in which + case that test function will use those settings. A test may only have one + settings object applied to it. A settings object can also be passed to + |settings.register_profile| or as a parent to another |settings|. + + Attribute inheritance + --------------------- + + Settings objects are immutable once created. When a settings object is created, + it uses the value specified for each attribute. Any attribute which is + not specified will inherit from its value in the ``parent`` settings object. + If ``parent`` is not passed, any attributes which are not specified will inherit + from the current settings profile instead. + + For instance, ``settings(max_examples=10)`` will have a ``max_examples`` of ``10``, + and the value of all other attributes will be equal to its value in the + current settings profile. + + Changes made from activating a new settings profile with |settings.load_profile| + will be reflected in settings objects created after the profile was loaded, + but not in existing settings objects. + + .. _builtin-profiles: + + Built-in profiles + ----------------- + + While you can register additional profiles with |settings.register_profile|, + Hypothesis comes with two built-in profiles: ``default`` and ``ci``. + + By default, the ``default`` profile is active. If the ``CI`` environment + variable is set to any value, the ``ci`` profile is active by default. Hypothesis + also automatically detects various vendor-specific CI environment variables. + + The attributes of the currently active settings profile can be retrieved with + ``settings()`` (so ``settings().max_examples`` is the currently active default + for |settings.max_examples|). + + The settings attributes for the built-in profiles are as follows: + + .. code-block:: python + + default = settings.register_profile( + "default", + max_examples=100, + derandomize=False, + database=not_set, # see settings.database for the default database + verbosity=Verbosity.normal, + phases=tuple(Phase), + stateful_step_count=50, + report_multiple_bugs=True, + suppress_health_check=(), + deadline=duration(milliseconds=200), + print_blob=False, + backend="hypothesis", + ) + + ci = settings.register_profile( + "ci", + parent=default, + derandomize=True, + deadline=None, + database=None, + print_blob=True, + suppress_health_check=[HealthCheck.too_slow], + ) + + You can replace either of the built-in profiles with |settings.register_profile|: + + .. code-block:: python + + # run more examples in CI + settings.register_profile( + "ci", + settings.get_profile("ci"), + max_examples=1000, + ) + """ + + _profiles: ClassVar[dict[str, "settings"]] = {} + _current_profile: ClassVar[str | None] = None + + def __init__( + self, + parent: Optional["settings"] = None, + *, + # This looks pretty strange, but there's good reason: we want Mypy to detect + # bad calls downstream, but not to freak out about the `= not_set` part even + # though it's not semantically valid to pass that as an argument value. + # The intended use is "like **kwargs, but more tractable for tooling". + max_examples: int = not_set, # type: ignore + derandomize: bool = not_set, # type: ignore + database: Optional["ExampleDatabase"] = not_set, # type: ignore + verbosity: "Verbosity" = not_set, # type: ignore + phases: Collection["Phase"] = not_set, # type: ignore + stateful_step_count: int = not_set, # type: ignore + report_multiple_bugs: bool = not_set, # type: ignore + suppress_health_check: Collection["HealthCheck"] = not_set, # type: ignore + deadline: int | float | datetime.timedelta | None = not_set, # type: ignore + print_blob: bool = not_set, # type: ignore + backend: str = not_set, # type: ignore + ) -> None: + self._in_definition = True + + if parent is not None: + check_type(settings, parent, "parent") + if derandomize not in (not_set, False): + if database not in (not_set, None): # type: ignore + raise InvalidArgument( + "derandomize=True implies database=None, so passing " + f"{database=} too is invalid." + ) + database = None + + # fallback is None if we're creating the default settings object, and + # the parent (or default settings object) otherwise + self._fallback = parent or settings.default + self._max_examples = ( + self._fallback.max_examples # type: ignore + if max_examples is not_set # type: ignore + else _validate_max_examples(max_examples) + ) + self._derandomize = ( + self._fallback.derandomize # type: ignore + if derandomize is not_set # type: ignore + else _validate_choices("derandomize", derandomize, choices=[True, False]) + ) + if database is not not_set: # type: ignore + database = _validate_database(database) + self._database = database + self._cached_database = None + self._verbosity = ( + self._fallback.verbosity # type: ignore + if verbosity is not_set # type: ignore + else _validate_enum_value(Verbosity, verbosity, name="verbosity") + ) + self._phases = ( + self._fallback.phases # type: ignore + if phases is not_set # type: ignore + else _validate_phases(phases) + ) + self._stateful_step_count = ( + self._fallback.stateful_step_count # type: ignore + if stateful_step_count is not_set # type: ignore + else _validate_stateful_step_count(stateful_step_count) + ) + self._report_multiple_bugs = ( + self._fallback.report_multiple_bugs # type: ignore + if report_multiple_bugs is not_set # type: ignore + else _validate_choices( + "report_multiple_bugs", report_multiple_bugs, choices=[True, False] + ) + ) + self._suppress_health_check = ( + self._fallback.suppress_health_check # type: ignore + if suppress_health_check is not_set # type: ignore + else _validate_suppress_health_check(suppress_health_check) + ) + self._deadline = ( + self._fallback.deadline # type: ignore + if deadline is not_set # type: ignore + else _validate_deadline(deadline) + ) + self._print_blob = ( + self._fallback.print_blob # type: ignore + if print_blob is not_set # type: ignore + else _validate_choices("print_blob", print_blob, choices=[True, False]) + ) + self._backend = ( + self._fallback.backend # type: ignore + if backend is not_set # type: ignore + else _validate_backend(backend) + ) + + self._in_definition = False + + @property + def max_examples(self): + """ + Once this many satisfying examples have been considered without finding any + counter-example, Hypothesis will stop looking. + + Note that we might call your test function fewer times if we find a bug early + or can tell that we've exhausted the search space; or more if we discard some + examples due to use of .filter(), assume(), or a few other things that can + prevent the test case from completing successfully. + + The default value is chosen to suit a workflow where the test will be part of + a suite that is regularly executed locally or on a CI server, balancing total + running time against the chance of missing a bug. + + If you are writing one-off tests, running tens of thousands of examples is + quite reasonable as Hypothesis may miss uncommon bugs with default settings. + For very complex code, we have observed Hypothesis finding novel bugs after + *several million* examples while testing :pypi:`SymPy `. + If you are running more than 100k examples for a test, consider using our + :ref:`integration for coverage-guided fuzzing ` - it really + shines when given minutes or hours to run. + + The default max examples is ``100``. + """ + return self._max_examples + + @property + def derandomize(self): + """ + If True, seed Hypothesis' random number generator using a hash of the test + function, so that every run will test the same set of examples until you + update Hypothesis, Python, or the test function. + + This allows you to `check for regressions and look for bugs + `__ using separate + settings profiles - for example running + quick deterministic tests on every commit, and a longer non-deterministic + nightly testing run. + + The default is ``False``. If running on CI, the default is ``True`` instead. + """ + return self._derandomize + + @property + def database(self): + """ + An instance of |ExampleDatabase| that will be used to save examples to + and load previous examples from. + + If not set, a |DirectoryBasedExampleDatabase| is created in the current + working directory under ``.hypothesis/examples``. If this location is + unusable, e.g. due to the lack of read or write permissions, Hypothesis + will emit a warning and fall back to an |InMemoryExampleDatabase|. + + If ``None``, no storage will be used. + + See the :ref:`database documentation ` for a list of database + classes, and how to define custom database classes. + """ + from hypothesis.database import _db_for_path + + # settings.database has two conflicting requirements: + # * The default settings should respect changes to set_hypothesis_home_dir + # in-between accesses + # * `s.database is s.database` should be true, except for the default settings + # + # We therefore cache s.database for everything except the default settings, + # which always recomputes dynamically. + if self._fallback is None: + # if self._fallback is None, we are the default settings, at which point + # we should recompute the database dynamically + assert self._database is not_set + return _db_for_path(not_set) + + # otherwise, we cache the database + if self._cached_database is None: + self._cached_database = ( + self._fallback.database if self._database is not_set else self._database + ) + return self._cached_database + + @property + def verbosity(self): + """ + Control the verbosity level of Hypothesis messages. + + To see what's going on while Hypothesis runs your tests, you can turn + up the verbosity setting. + + .. code-block:: pycon + + >>> from hypothesis import settings, Verbosity + >>> from hypothesis.strategies import lists, integers + >>> @given(lists(integers())) + ... @settings(verbosity=Verbosity.verbose) + ... def f(x): + ... assert not any(x) + ... f() + Trying example: [] + Falsifying example: [-1198601713, -67, 116, -29578] + Shrunk example to [-1198601713] + Shrunk example to [-128] + Shrunk example to [32] + Shrunk example to [1] + [1] + + The four levels are |Verbosity.quiet|, |Verbosity.normal|, + |Verbosity.verbose|, and |Verbosity.debug|. |Verbosity.normal| is the + default. For |Verbosity.quiet|, Hypothesis will not print anything out, + not even the final falsifying example. |Verbosity.debug| is basically + |Verbosity.verbose| but a bit more so. You probably don't want it. + + Verbosity can be passed either as a |Verbosity| enum value, or as the + corresponding string value, or as the corresponding integer value. For + example: + + .. code-block:: python + + # these three are equivalent + settings(verbosity=Verbosity.verbose) + settings(verbosity="verbose") + + If you are using :pypi:`pytest`, you may also need to :doc:`disable + output capturing for passing tests ` + to see verbose output as tests run. + """ + return self._verbosity + + @property + def phases(self): + """ + Control which phases should be run. + + Hypothesis divides tests into logically distinct phases. + + - |Phase.explicit|: Running explicit examples from |@example|. + - |Phase.reuse|: Running examples from the database which previously failed. + - |Phase.generate|: Generating new random examples. + - |Phase.target|: Mutating examples for :ref:`targeted property-based + testing `. Requires |Phase.generate|. + - |Phase.shrink|: Shrinking failing examples. + - |Phase.explain|: Attempting to explain why a failure occurred. + Requires |Phase.shrink|. + + The phases argument accepts a collection with any subset of these. E.g. + ``settings(phases=[Phase.generate, Phase.shrink])`` will generate new examples + and shrink them, but will not run explicit examples or reuse previous failures, + while ``settings(phases=[Phase.explicit])`` will only run explicit examples + from |@example|. + + Phases can be passed either as a |Phase| enum value, or as the corresponding + string value. For example: + + .. code-block:: python + + # these two are equivalent + settings(phases=[Phase.explicit]) + settings(phases=["explicit"]) + + Following the first failure, Hypothesis will (usually, depending on + which |Phase| is enabled) track which lines of code are always run on + failing but never on passing inputs. On 3.12+, this uses + :mod:`sys.monitoring`, while 3.11 and earlier uses :func:`python:sys.settrace`. + For python 3.11 and earlier, we therefore automatically disable the explain + phase on PyPy, or if you are using :pypi:`coverage` or a debugger. If + there are no clearly suspicious lines of code, :pep:`we refuse the + temptation to guess <20>`. + + After shrinking to a minimal failing example, Hypothesis will try to find + parts of the example -- e.g. separate args to |@given| + -- which can vary freely without changing the result + of that minimal failing example. If the automated experiments run without + finding a passing variation, we leave a comment in the final report: + + .. code-block:: python + + test_x_divided_by_y( + x=0, # or any other generated value + y=0, + ) + + Just remember that the *lack* of an explanation sometimes just means that + Hypothesis couldn't efficiently find one, not that no explanation (or + simpler failing example) exists. + """ + + return self._phases + + @property + def stateful_step_count(self): + """ + The maximum number of times to call an additional |@rule| method in + :ref:`stateful testing ` before we give up on finding a bug. + + Note that this setting is effectively multiplicative with max_examples, + as each example will run for a maximum of ``stateful_step_count`` steps. + + The default stateful step count is ``50``. + """ + return self._stateful_step_count + + @property + def report_multiple_bugs(self): + """ + Because Hypothesis runs the test many times, it can sometimes find multiple + bugs in a single run. Reporting all of them at once is usually very useful, + but replacing the exceptions can occasionally clash with debuggers. + If disabled, only the exception with the smallest minimal example is raised. + + The default value is ``True``. + """ + return self._report_multiple_bugs + + @property + def suppress_health_check(self): + """ + Suppress the given |HealthCheck| exceptions. Those health checks will not + be raised by Hypothesis. To suppress all health checks, you can pass + ``suppress_health_check=list(HealthCheck)``. + + Health checks can be passed either as a |HealthCheck| enum value, or as + the corresponding string value. For example: + + .. code-block:: python + + # these two are equivalent + settings(suppress_health_check=[HealthCheck.filter_too_much]) + settings(suppress_health_check=["filter_too_much"]) + + Health checks are proactive warnings, not correctness errors, so we + encourage suppressing health checks where you have evaluated they will + not pose a problem, or where you have evaluated that fixing the underlying + issue is not worthwhile. + + .. seealso:: + + See also the :doc:`/how-to/suppress-healthchecks` how-to. + """ + return self._suppress_health_check + + @property + def deadline(self): + """ + The maximum allowed duration of an individual test case, in milliseconds. + You can pass an integer, float, or timedelta. If ``None``, the deadline + is disabled entirely. + + We treat the deadline as a soft limit in some cases, where that would + avoid flakiness due to timing variability. + + The default deadline is 200 milliseconds. If running on CI, the default is + ``None`` instead. + """ + return self._deadline + + @property + def print_blob(self): + """ + If set to ``True``, Hypothesis will print code for failing examples that + can be used with |@reproduce_failure| to reproduce the failing example. + + The default value is ``False``. If running on CI, the default is ``True`` instead. + """ + return self._print_blob + + @property + def backend(self): + """ + .. warning:: + + EXPERIMENTAL AND UNSTABLE - see :ref:`alternative-backends`. + + The importable name of a backend which Hypothesis should use to generate + primitive types. We support heuristic-random, solver-based, and fuzzing-based + backends. + """ + return self._backend + + def __call__(self, test: T) -> T: + """Make the settings object (self) an attribute of the test. + + The settings are later discovered by looking them up on the test itself. + """ + # Aliasing as Any avoids mypy errors (attr-defined) when accessing and + # setting custom attributes on the decorated function or class. + _test: Any = test + + # Using the alias here avoids a mypy error (return-value) later when + # ``test`` is returned, because this check results in type refinement. + if not callable(_test): + raise InvalidArgument( + "settings objects can be called as a decorator with @given, " + f"but decorated {test=} is not callable." + ) + if inspect.isclass(test): + from hypothesis.stateful import RuleBasedStateMachine + + if issubclass(_test, RuleBasedStateMachine): + attr_name = "_hypothesis_internal_settings_applied" + if getattr(test, attr_name, False): + raise InvalidArgument( + "Applying the @settings decorator twice would " + "overwrite the first version; merge their arguments " + "instead." + ) + setattr(test, attr_name, True) + _test.TestCase.settings = self + return test + else: + raise InvalidArgument( + "@settings(...) can only be used as a decorator on " + "functions, or on subclasses of RuleBasedStateMachine." + ) + if hasattr(_test, "_hypothesis_internal_settings_applied"): + # Can't use _hypothesis_internal_use_settings as an indicator that + # @settings was applied, because @given also assigns that attribute. + descr = get_pretty_function_description(test) + raise InvalidArgument( + f"{descr} has already been decorated with a settings object.\n" + f" Previous: {_test._hypothesis_internal_use_settings!r}\n" + f" This: {self!r}" + ) + + _test._hypothesis_internal_use_settings = self + _test._hypothesis_internal_settings_applied = True + return test + + def __setattr__(self, name: str, value: object) -> None: + if not name.startswith("_") and not self._in_definition: + raise AttributeError("settings objects are immutable") + return super().__setattr__(name, value) + + def __repr__(self) -> str: + bits = sorted( + f"{name}={getattr(self, name)!r}" + for name in all_settings + if (name != "backend" or len(AVAILABLE_PROVIDERS) > 1) # experimental + ) + return "settings({})".format(", ".join(bits)) + + def show_changed(self) -> str: + bits = [] + for name in all_settings: + value = getattr(self, name) + if value != getattr(default, name): + bits.append(f"{name}={value!r}") + return ", ".join(sorted(bits, key=len)) + + @staticmethod + def register_profile( + name: str, + parent: Optional["settings"] = None, + **kwargs: Any, + ) -> None: + """ + Register a settings object as a settings profile, under the name ``name``. + The ``parent`` and ``kwargs`` arguments to this method are as for + |settings|. + + If a settings profile already exists under ``name``, it will be overwritten. + Registering a profile with the same name as the currently active profile + will cause those changes to take effect in the active profile immediately, + and do not require reloading the profile. + + Registered settings profiles can be retrieved later by name with + |settings.get_profile|. + """ + check_type(str, name, "name") + + if ( + default_variable.value + and settings._current_profile + and default_variable.value != settings._profiles[settings._current_profile] + ): + note_deprecation( + "Cannot register a settings profile when the current settings differ " + "from the current profile (usually due to an @settings decorator). " + "Register profiles at module level instead.", + since="2025-11-15", + has_codemod=False, + ) + + # if we just pass the parent and no kwargs, like + # settings.register_profile(settings(max_examples=10)) + # then optimize out the pointless intermediate settings object which + # would just forward everything to the parent. + settings._profiles[name] = ( + parent + if parent is not None and not kwargs + else settings(parent=parent, **kwargs) + ) + if settings._current_profile == name: + settings.load_profile(name) + + @staticmethod + def get_profile(name: str) -> "settings": + """ + Returns the settings profile registered under ``name``. If no settings + profile is registered under ``name``, raises |InvalidArgument|. + """ + check_type(str, name, "name") + try: + return settings._profiles[name] + except KeyError: + raise InvalidArgument(f"Profile {name!r} is not registered") from None + + @staticmethod + def load_profile(name: str) -> None: + """ + Makes the settings profile registered under ``name`` the active profile. + + If no settings profile is registered under ``name``, raises |InvalidArgument|. + """ + check_type(str, name, "name") + settings._current_profile = name + default_variable.value = settings.get_profile(name) + + @staticmethod + def get_current_profile_name() -> str: + """ + The name of the current settings profile. For example: + + .. code-block:: python + + >>> settings.load_profile("myprofile") + >>> settings.get_current_profile_name() + 'myprofile' + """ + assert settings._current_profile is not None + return settings._current_profile + + +@contextlib.contextmanager +def local_settings(s: settings) -> Generator[settings, None, None]: + with default_variable.with_value(s): + yield s + + +def note_deprecation( + message: str, *, since: str, has_codemod: bool, stacklevel: int = 0 +) -> None: + if since != "RELEASEDAY": + date = datetime.date.fromisoformat(since) + assert datetime.date(2021, 1, 1) <= date + if has_codemod: + message += ( + "\n The `hypothesis codemod` command-line tool can automatically " + "refactor your code to fix this warning." + ) + warnings.warn(HypothesisDeprecationWarning(message), stacklevel=2 + stacklevel) + + +default = settings( + max_examples=100, + derandomize=False, + database=not_set, # type: ignore + verbosity=Verbosity.normal, + phases=tuple(Phase), + stateful_step_count=50, + report_multiple_bugs=True, + suppress_health_check=(), + deadline=duration(milliseconds=200), + print_blob=False, + backend="hypothesis", +) +settings.register_profile("default", default) +settings.load_profile("default") + +assert settings.default is not None + +CI = settings( + derandomize=True, + deadline=None, + database=None, + print_blob=True, + suppress_health_check=[HealthCheck.too_slow], +) + +settings.register_profile("ci", CI) + + +if is_in_ci(): # pragma: no cover # covered in ci, but not locally + settings.load_profile("ci") + +assert settings.default is not None + + +# Check that the kwonly args to settings.__init__ is the same as the set of +# defined settings - in case we've added or remove something from one but +# not the other. +assert set(all_settings) == { + p.name + for p in inspect.signature(settings.__init__).parameters.values() + if p.kind == inspect.Parameter.KEYWORD_ONLY +} diff --git a/vendored/hypothesis/configuration.py b/vendored/hypothesis/configuration.py new file mode 100644 index 0000000..205f7eb --- /dev/null +++ b/vendored/hypothesis/configuration.py @@ -0,0 +1,107 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import os +import sys +import warnings +from pathlib import Path + +import _hypothesis_globals + +from hypothesis.errors import HypothesisSideeffectWarning + +__hypothesis_home_directory_default = Path.cwd() / ".hypothesis" +__hypothesis_home_directory = None + + +def set_hypothesis_home_dir(directory: str | Path | None) -> None: + global __hypothesis_home_directory + __hypothesis_home_directory = None if directory is None else Path(directory) + + +def storage_directory(*names: str, intent_to_write: bool = True) -> Path: + if intent_to_write: + check_sideeffect_during_initialization( + "accessing storage for {}", "/".join(names) + ) + + global __hypothesis_home_directory + if not __hypothesis_home_directory: + if where := os.getenv("HYPOTHESIS_STORAGE_DIRECTORY"): + __hypothesis_home_directory = Path(where) + if not __hypothesis_home_directory: + __hypothesis_home_directory = __hypothesis_home_directory_default + return __hypothesis_home_directory.joinpath(*names) + + +_first_postinit_what = None + + +def check_sideeffect_during_initialization( + what: str, *fmt_args: object, is_restart: bool = False +) -> None: + """Called from locations that should not be executed during initialization, for example + touching disk or materializing lazy/deferred strategies from plugins. If initialization + is in progress, a warning is emitted. + + Note that computing the repr can take nontrivial time or memory, so we avoid doing so + unless (and until) we're actually emitting the warning. + """ + global _first_postinit_what + # This is not a particularly hot path, but neither is it doing productive work, so we want to + # minimize the cost by returning immediately. The drawback is that we require + # notice_initialization_restarted() to be called if in_initialization changes away from zero. + if _first_postinit_what is not None: + return + elif _hypothesis_globals.in_initialization > 0: + msg = what.format(*fmt_args) + if is_restart: + when = "between importing hypothesis and loading the hypothesis plugin" + elif "_hypothesis_pytestplugin" in sys.modules or os.getenv( + "HYPOTHESIS_EXTEND_INITIALIZATION" + ): + when = "during pytest plugin or conftest initialization" + else: # pragma: no cover + # This can be triggered by Hypothesis plugins, but is really annoying + # to test automatically - drop st.text().example() in hypothesis.run() + # to manually confirm that we get the warning. + when = "at import time" + # Note: -Werror is insufficient under pytest, as doesn't take effect until + # test session start. + text = ( + f"Slow code in plugin: avoid {msg} {when}! Set PYTHONWARNINGS=error " + "to get a traceback and show which plugin is responsible." + ) + if is_restart: + text += " Additionally, set HYPOTHESIS_EXTEND_INITIALIZATION=1 to pinpoint the exact location." + warnings.warn( + text, + HypothesisSideeffectWarning, + stacklevel=3, + ) + else: + _first_postinit_what = (what, fmt_args) + + +def notice_initialization_restarted(*, warn: bool = True) -> None: + """Reset _first_postinit_what, so that we don't think we're in post-init. Additionally, if it + was set that means that there has been a sideeffect that we haven't warned about, so do that + now (the warning text will be correct, and we also hint that the stacktrace can be improved). + """ + global _first_postinit_what + if _first_postinit_what is not None: + what, *fmt_args = _first_postinit_what + _first_postinit_what = None + if warn: + check_sideeffect_during_initialization( + what, + *fmt_args, + is_restart=True, + ) diff --git a/vendored/hypothesis/control.py b/vendored/hypothesis/control.py new file mode 100644 index 0000000..11f8223 --- /dev/null +++ b/vendored/hypothesis/control.py @@ -0,0 +1,354 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import inspect +import math +import random +from collections import defaultdict +from collections.abc import Callable, Sequence +from contextlib import contextmanager +from typing import Any, Literal, NoReturn, Optional, overload +from weakref import WeakKeyDictionary + +from hypothesis import Verbosity, settings +from hypothesis._settings import note_deprecation +from hypothesis.errors import InvalidArgument, UnsatisfiedAssumption +from hypothesis.internal.compat import BaseExceptionGroup +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.observability import observability_enabled +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.internal.validation import check_type +from hypothesis.reporting import report, verbose_report +from hypothesis.utils.dynamicvariables import DynamicVariable +from hypothesis.vendor.pretty import IDKey, PrettyPrintFunction, pretty + + +def _calling_function_location(what: str, frame: Any) -> str: + where = frame.f_back + return f"{what}() in {where.f_code.co_name} (line {where.f_lineno})" + + +def reject() -> NoReturn: + if _current_build_context.value is None: + note_deprecation( + "Using `reject` outside a property-based test is deprecated", + since="2023-09-25", + has_codemod=False, + ) + where = _calling_function_location("reject", inspect.currentframe()) + if currently_in_test_context(): + counts = current_build_context().data._observability_predicates[where] + counts.update_count(condition=False) + raise UnsatisfiedAssumption(where) + + +@overload +def assume(condition: Literal[False] | None) -> NoReturn: ... +@overload +def assume(condition: object) -> Literal[True]: ... + + +def assume(condition: object) -> Literal[True]: + """Calling ``assume`` is like an :ref:`assert ` that marks + the example as bad, rather than failing the test. + + This allows you to specify properties that you *assume* will be + true, and let Hypothesis try to avoid similar examples in future. + """ + if _current_build_context.value is None: + note_deprecation( + "Using `assume` outside a property-based test is deprecated", + since="2023-09-25", + has_codemod=False, + ) + if observability_enabled() or not condition: + where = _calling_function_location("assume", inspect.currentframe()) + if observability_enabled() and currently_in_test_context(): + counts = current_build_context().data._observability_predicates[where] + counts.update_count(condition=bool(condition)) + if not condition: + raise UnsatisfiedAssumption(f"failed to satisfy {where}") + return True + + +_current_build_context = DynamicVariable[Optional["BuildContext"]](None) + + +def currently_in_test_context() -> bool: + """Return ``True`` if the calling code is currently running inside an + |@given| or :ref:`stateful ` test, and ``False`` otherwise. + + This is useful for third-party integrations and assertion helpers which + may be called from either traditional or property-based tests, and can only + use e.g. |assume| or |target| in the latter case. + """ + return _current_build_context.value is not None + + +def current_build_context() -> "BuildContext": + context = _current_build_context.value + if context is None: + raise InvalidArgument("No build context registered") + return context + + +@contextmanager +def deprecate_random_in_strategy(fmt, *args): + from hypothesis.internal import entropy + + state_before = random.getstate() + yield + state_after = random.getstate() + if ( + # there is a threading race condition here with deterministic_PRNG. Say + # we have two threads 1 and 2. We start in global random state A, and + # deterministic_PRNG sets to global random state B (which is constant across + # threads since we seed to 0 unconditionally). Then we might have state + # transitions: + # + # [1] [2] + # A -> B deterministic_PRNG().__enter__ + # B ->B deterministic_PRNG().__enter__ + # state_before = B deprecate_random_in_strategy.__enter__ + # B -> A deterministic_PRNG().__exit__ + # state_after = A deprecate_random_in_strategy.__exit__ + # + # where state_before != state_after because a different thread has reset + # the global random state. + # + # To fix this, we track the known random states set by deterministic_PRNG, + # and will not note a deprecation if it matches one of those. + state_after != state_before + and hash(state_after) not in entropy._known_random_state_hashes + ): + note_deprecation( + "Do not use the `random` module inside strategies; instead " + "consider `st.randoms()`, `st.sampled_from()`, etc. " + fmt.format(*args), + since="2024-02-05", + has_codemod=False, + stacklevel=1, + ) + + +class BuildContext: + def __init__( + self, + data: ConjectureData, + *, + is_final: bool = False, + wrapped_test: Callable, + ) -> None: + self.data = data + self.tasks: list[Callable[[], Any]] = [] + self.is_final = is_final + self.wrapped_test = wrapped_test + + # Use defaultdict(list) here to handle the possibility of having multiple + # functions registered for the same object (due to caching, small ints, etc). + # The printer will discard duplicates which return different representations. + self.known_object_printers: dict[IDKey, list[PrettyPrintFunction]] = ( + defaultdict(list) + ) + + def record_call( + self, + obj: object, + func: object, + *, + args: Sequence[object], + kwargs: dict[str, object], + ) -> None: + self.known_object_printers[IDKey(obj)].append( + # _func=func prevents mypy from inferring lambda type. Would need + # paramspec I think - not worth it. + lambda obj, p, cycle, *, _func=func: p.maybe_repr_known_object_as_call( # type: ignore + obj, cycle, get_pretty_function_description(_func), args, kwargs + ) + ) + + def prep_args_kwargs_from_strategies(self, kwarg_strategies): + arg_labels = {} + kwargs = {} + for k, s in kwarg_strategies.items(): + start_idx = len(self.data.nodes) + with deprecate_random_in_strategy("from {}={!r}", k, s): + obj = self.data.draw(s, observe_as=f"generate:{k}") + end_idx = len(self.data.nodes) + kwargs[k] = obj + + # This high up the stack, we can't see or really do much with the conjecture + # Example objects - not least because they're only materialized after the + # test case is completed. Instead, we'll stash the (start_idx, end_idx) + # pair on our data object for the ConjectureRunner engine to deal with, and + # pass a dict of such out so that the pretty-printer knows where to place + # the which-parts-matter comments later. + if start_idx != end_idx: + arg_labels[k] = (start_idx, end_idx) + self.data.arg_slices.add((start_idx, end_idx)) + + return kwargs, arg_labels + + def __enter__(self): + self.assign_variable = _current_build_context.with_value(self) + self.assign_variable.__enter__() + return self + + def __exit__(self, exc_type, exc_value, tb): + self.assign_variable.__exit__(exc_type, exc_value, tb) + errors = [] + for task in self.tasks: + try: + task() + except BaseException as err: + errors.append(err) + if errors: + if len(errors) == 1: + raise errors[0] from exc_value + raise BaseExceptionGroup("Cleanup failed", errors) from exc_value + + +def cleanup(teardown): + """Register a function to be called when the current test has finished + executing. Any exceptions thrown in teardown will be printed but not + rethrown. + + Inside a test this isn't very interesting, because you can just use + a finally block, but note that you can use this inside map, flatmap, + etc. in order to e.g. insist that a value is closed at the end. + """ + context = _current_build_context.value + if context is None: + raise InvalidArgument("Cannot register cleanup outside of build context") + context.tasks.append(teardown) + + +def should_note(): + context = _current_build_context.value + if context is None: + raise InvalidArgument("Cannot make notes outside of a test") + return context.is_final or settings.default.verbosity >= Verbosity.verbose + + +def note(value: object) -> None: + """Report this value for the minimal failing example.""" + if should_note(): + if not isinstance(value, str): + value = pretty(value) + report(value) + + +def event(value: str, payload: str | int | float = "") -> None: + """Record an event that occurred during this test. Statistics on the number of test + runs with each event will be reported at the end if you run Hypothesis in + statistics reporting mode. + + Event values should be strings or convertible to them. If an optional + payload is given, it will be included in the string for :ref:`statistics`. + """ + context = _current_build_context.value + if context is None: + raise InvalidArgument("Cannot record events outside of a test") + + avoid_realization = context.data.provider.avoid_realization + payload = _event_to_string( + payload, allowed_types=(str, int, float), avoid_realization=avoid_realization + ) + value = _event_to_string(value, avoid_realization=avoid_realization) + context.data.events[value] = payload + + +_events_to_strings: WeakKeyDictionary = WeakKeyDictionary() + + +def _event_to_string(event, *, allowed_types=str, avoid_realization): + if isinstance(event, allowed_types): + return event + + # _events_to_strings is a cache which persists across iterations, causing + # problems for symbolic backends. see + # https://github.com/pschanely/hypothesis-crosshair/issues/41 + if avoid_realization: + return str(event) + + try: + return _events_to_strings[event] + except (KeyError, TypeError): + pass + + result = str(event) + try: + _events_to_strings[event] = result + except TypeError: + pass + return result + + +def target(observation: int | float, *, label: str = "") -> int | float: + """Calling this function with an ``int`` or ``float`` observation gives it feedback + with which to guide our search for inputs that will cause an error, in + addition to all the usual heuristics. Observations must always be finite. + + Hypothesis will try to maximize the observed value over several examples; + almost any metric will work so long as it makes sense to increase it. + For example, ``-abs(error)`` is a metric that increases as ``error`` + approaches zero. + + Example metrics: + + - Number of elements in a collection, or tasks in a queue + - Mean or maximum runtime of a task (or both, if you use ``label``) + - Compression ratio for data (perhaps per-algorithm or per-level) + - Number of steps taken by a state machine + + The optional ``label`` argument can be used to distinguish between + and therefore separately optimise distinct observations, such as the + mean and standard deviation of a dataset. It is an error to call + ``target()`` with any label more than once per test case. + + .. note:: + The more examples you run, the better this technique works. + + As a rule of thumb, the targeting effect is noticeable above + :obj:`max_examples=1000 `, + and immediately obvious by around ten thousand examples + *per label* used by your test. + + :ref:`statistics` include the best score seen for each label, + which can help avoid `the threshold problem + `__ when the minimal + example shrinks right down to the threshold of failure (:issue:`2180`). + """ + check_type((int, float), observation, "observation") + if not math.isfinite(observation): + raise InvalidArgument(f"{observation=} must be a finite float.") + check_type(str, label, "label") + + context = _current_build_context.value + if context is None: + raise InvalidArgument( + "Calling target() outside of a test is invalid. " + "Consider guarding this call with `if currently_in_test_context(): ...`" + ) + elif context.data.provider.avoid_realization: + # We could in principle realize this in the engine, but it seems more + # efficient to have our alternative backend optimize it for us. + # See e.g. https://github.com/pschanely/hypothesis-crosshair/issues/3 + return observation # pragma: no cover + verbose_report(f"Saw target({observation!r}, {label=})") + + if label in context.data.target_observations: + raise InvalidArgument( + f"Calling target({observation!r}, {label=}) would overwrite " + f"target({context.data.target_observations[label]!r}, {label=})" + ) + else: + context.data.target_observations[label] = observation + + return observation diff --git a/vendored/hypothesis/core.py b/vendored/hypothesis/core.py new file mode 100644 index 0000000..66fb9fc --- /dev/null +++ b/vendored/hypothesis/core.py @@ -0,0 +1,2409 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""This module provides the core primitives of Hypothesis, such as given.""" + +import base64 +import contextlib +import dataclasses +import datetime +import inspect +import io +import math +import os +import sys +import threading +import time +import traceback +import types +import unittest +import warnings +import zlib +from collections import defaultdict +from collections.abc import Callable, Coroutine, Generator, Hashable, Iterable, Sequence +from dataclasses import dataclass, field +from functools import partial +from inspect import Parameter +from random import Random +from threading import Lock +from types import EllipsisType +from typing import ( + Any, + BinaryIO, + TypeVar, + overload, +) +from unittest import TestCase + +from hypothesis import strategies as st +from hypothesis._settings import ( + HealthCheck, + Phase, + Verbosity, + all_settings, + local_settings, + settings as Settings, +) +from hypothesis.control import BuildContext, currently_in_test_context +from hypothesis.database import choices_from_bytes, choices_to_bytes +from hypothesis.errors import ( + BackendCannotProceed, + DeadlineExceeded, + DidNotReproduce, + FailedHealthCheck, + FlakyFailure, + FlakyReplay, + Found, + Frozen, + HypothesisException, + HypothesisWarning, + InvalidArgument, + NoSuchExample, + StopTest, + Unsatisfiable, + UnsatisfiedAssumption, +) +from hypothesis.internal import observability +from hypothesis.internal.compat import ( + PYPY, + BaseExceptionGroup, + add_note, + bad_django_TestCase, + get_type_hints, + int_from_bytes, +) +from hypothesis.internal.conjecture.choice import ChoiceT +from hypothesis.internal.conjecture.data import ConjectureData, Status +from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner +from hypothesis.internal.conjecture.junkdrawer import ( + ensure_free_stackframes, + gc_cumulative_time, +) +from hypothesis.internal.conjecture.providers import ( + BytestringProvider, + PrimitiveProvider, +) +from hypothesis.internal.conjecture.shrinker import sort_key +from hypothesis.internal.entropy import deterministic_PRNG +from hypothesis.internal.escalation import ( + InterestingOrigin, + current_pytest_item, + format_exception, + get_trimmed_traceback, + is_hypothesis_file, +) +from hypothesis.internal.healthcheck import fail_health_check +from hypothesis.internal.observability import ( + InfoObservation, + InfoObservationType, + deliver_observation, + make_testcase, + observability_enabled, +) +from hypothesis.internal.reflection import ( + convert_positional_arguments, + define_function_signature, + function_digest, + get_pretty_function_description, + get_signature, + impersonate, + is_mock, + nicerepr, + proxies, + repr_call, +) +from hypothesis.internal.scrutineer import ( + MONITORING_TOOL_ID, + Trace, + Tracer, + explanatory_lines, + tractable_coverage_report, +) +from hypothesis.internal.validation import check_type +from hypothesis.reporting import ( + current_verbosity, + report, + verbose_report, + with_reporter, +) +from hypothesis.statistics import describe_statistics, describe_targets, note_statistics +from hypothesis.strategies._internal.misc import NOTHING +from hypothesis.strategies._internal.strategies import ( + Ex, + SearchStrategy, + check_strategy, +) +from hypothesis.utils.conventions import not_set +from hypothesis.utils.threading import ThreadLocal +from hypothesis.vendor.pretty import RepresentationPrinter +from hypothesis.version import __version__ + +TestFunc = TypeVar("TestFunc", bound=Callable) + + +running_under_pytest = False +pytest_shows_exceptiongroups = True +global_force_seed = None +# `threadlocal` stores "engine-global" constants, which are global relative to a +# ConjectureRunner instance (roughly speaking). Since only one conjecture runner +# instance can be active per thread, making engine constants thread-local prevents +# the ConjectureRunner instances of concurrent threads from treading on each other. +threadlocal = ThreadLocal(_hypothesis_global_random=lambda: None) + + +@dataclass(slots=True, frozen=False) +class Example: + args: Any + kwargs: Any + # Plus two optional arguments for .xfail() + raises: Any = field(default=None) + reason: Any = field(default=None) + + +@dataclass(slots=True, frozen=True) +class ReportableError: + fragments: list[str] + exception: BaseException + + +# TODO_DOCS link to not-yet-existent patch-dumping docs + + +class example: + """ + Add an explicit input to a Hypothesis test, which Hypothesis will always + try before generating random inputs. This combines the randomized nature of + Hypothesis generation with a traditional parametrized test. + + For example: + + .. code-block:: python + + @example("Hello world") + @example("some string with special significance") + @given(st.text()) + def test_strings(s): + pass + + will call ``test_strings("Hello World")`` and + ``test_strings("some string with special significance")`` before generating + any random inputs. |@example| may be placed in any order relative to |@given| + and |@settings|. + + Explicit inputs from |@example| are run in the |Phase.explicit| phase. + Explicit inputs do not count towards |settings.max_examples|. Note that + explicit inputs added by |@example| do not shrink. If an explicit input + fails, Hypothesis will stop and report the failure without generating any + random inputs. + + |@example| can also be used to easily reproduce a failure. For instance, if + Hypothesis reports that ``f(n=[0, math.nan])`` fails, you can add + ``@example(n=[0, math.nan])`` to your test to quickly reproduce that failure. + + Arguments to ``@example`` + ------------------------- + + Arguments to |@example| have the same behavior and restrictions as arguments + to |@given|. This means they may be either positional or keyword arguments + (but not both in the same |@example|): + + .. code-block:: python + + @example(1, 2) + @example(x=1, y=2) + @given(st.integers(), st.integers()) + def test(x, y): + pass + + Noting that while arguments to |@given| are strategies (like |st.integers|), + arguments to |@example| are values instead (like ``1``). + + See the :ref:`given-arguments` section for full details. + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + if args and kwargs: + raise InvalidArgument( + "Cannot mix positional and keyword arguments for examples" + ) + if not (args or kwargs): + raise InvalidArgument("An example must provide at least one argument") + + self.hypothesis_explicit_examples: list[Example] = [] + self._this_example = Example(tuple(args), kwargs) + + def __call__(self, test: TestFunc) -> TestFunc: + if not hasattr(test, "hypothesis_explicit_examples"): + test.hypothesis_explicit_examples = self.hypothesis_explicit_examples # type: ignore + test.hypothesis_explicit_examples.append(self._this_example) # type: ignore + return test + + def xfail( + self, + condition: bool = True, # noqa: FBT002 + *, + reason: str = "", + raises: type[BaseException] | tuple[type[BaseException], ...] = BaseException, + ) -> "example": + """Mark this example as an expected failure, similarly to + :obj:`pytest.mark.xfail(strict=True) `. + + Expected-failing examples allow you to check that your test does fail on + some examples, and therefore build confidence that *passing* tests are + because your code is working, not because the test is missing something. + + .. code-block:: python + + @example(...).xfail() + @example(...).xfail(reason="Prices must be non-negative") + @example(...).xfail(raises=(KeyError, ValueError)) + @example(...).xfail(sys.version_info[:2] >= (3, 12), reason="needs py 3.12") + @example(...).xfail(condition=sys.platform != "linux", raises=OSError) + def test(x): + pass + + .. note:: + + Expected-failing examples are handled separately from those generated + by strategies, so you should usually ensure that there is no overlap. + + .. code-block:: python + + @example(x=1, y=0).xfail(raises=ZeroDivisionError) + @given(x=st.just(1), y=st.integers()) # Missing `.filter(bool)`! + def test_fraction(x, y): + # This test will try the explicit example and see it fail as + # expected, then go on to generate more examples from the + # strategy. If we happen to generate y=0, the test will fail + # because only the explicit example is treated as xfailing. + x / y + """ + check_type(bool, condition, "condition") + check_type(str, reason, "reason") + if not ( + isinstance(raises, type) and issubclass(raises, BaseException) + ) and not ( + isinstance(raises, tuple) + and raises # () -> expected to fail with no error, which is impossible + and all( + isinstance(r, type) and issubclass(r, BaseException) for r in raises + ) + ): + raise InvalidArgument( + f"{raises=} must be an exception type or tuple of exception types" + ) + if condition: + self._this_example = dataclasses.replace( + self._this_example, raises=raises, reason=reason + ) + return self + + def via(self, whence: str, /) -> "example": + """Attach a machine-readable label noting what the origin of this example + was. |example.via| is completely optional and does not change runtime + behavior. + + |example.via| is intended to support self-documenting behavior, as well as + tooling which might add (or remove) |@example| decorators automatically. + For example: + + .. code-block:: python + + # Annotating examples is optional and does not change runtime behavior + @example(...) + @example(...).via("regression test for issue #42") + @example(...).via("discovered failure") + def test(x): + pass + + .. note:: + + `HypoFuzz `_ uses |example.via| to tag examples + in the patch of its high-coverage set of explicit inputs, on + `the patches page `_. + """ + if not isinstance(whence, str): + raise InvalidArgument(".via() must be passed a string") + # This is deliberately a no-op at runtime; the tools operate on source code. + return self + + +def seed(seed: Hashable) -> Callable[[TestFunc], TestFunc]: + """ + Seed the randomness for this test. + + ``seed`` may be any hashable object. No exact meaning for ``seed`` is provided + other than that for a fixed seed value Hypothesis will produce the same + examples (assuming that there are no other sources of nondeterminisim, such + as timing, hash randomization, or external state). + + For example, the following test function and |RuleBasedStateMachine| will + each generate the same series of examples each time they are executed: + + .. code-block:: python + + @seed(1234) + @given(st.integers()) + def test(n): ... + + @seed(6789) + class MyMachine(RuleBasedStateMachine): ... + + If using pytest, you can alternatively pass ``--hypothesis-seed`` on the + command line. + + Setting a seed overrides |settings.derandomize|, which is designed to enable + deterministic CI tests rather than reproducing observed failures. + + Hypothesis will only print the seed which would reproduce a failure if a test + fails in an unexpected way, for instance inside Hypothesis internals. + """ + + def accept(test): + test._hypothesis_internal_use_seed = seed + current_settings = getattr(test, "_hypothesis_internal_use_settings", None) + test._hypothesis_internal_use_settings = Settings( + current_settings, database=None + ) + return test + + return accept + + +# TODO_DOCS: link to /explanation/choice-sequence + + +def reproduce_failure(version: str, blob: bytes) -> Callable[[TestFunc], TestFunc]: + """ + Run the example corresponding to the binary ``blob`` in order to reproduce a + failure. ``blob`` is a serialized version of the internal input representation + of Hypothesis. + + A test decorated with |@reproduce_failure| always runs exactly one example, + which is expected to cause a failure. If the provided ``blob`` does not + cause a failure, Hypothesis will raise |DidNotReproduce|. + + Hypothesis will print an |@reproduce_failure| decorator if + |settings.print_blob| is ``True`` (which is the default in CI). + + |@reproduce_failure| is intended to be temporarily added to your test suite in + order to reproduce a failure. It is not intended to be a permanent addition to + your test suite. Because of this, no compatibility guarantees are made across + Hypothesis versions, and |@reproduce_failure| will error if used on a different + Hypothesis version than it was created for. + + .. seealso:: + + See also the :doc:`/tutorial/replaying-failures` tutorial. + """ + + def accept(test): + test._hypothesis_internal_use_reproduce_failure = (version, blob) + return test + + return accept + + +def reproduction_decorator(choices: Iterable[ChoiceT]) -> str: + return f"@reproduce_failure({__version__!r}, {encode_failure(choices)!r})" + + +def encode_failure(choices: Iterable[ChoiceT]) -> bytes: + blob = choices_to_bytes(choices) + compressed = zlib.compress(blob) + if len(compressed) < len(blob): + blob = b"\1" + compressed + else: + blob = b"\0" + blob + return base64.b64encode(blob) + + +def decode_failure(blob: bytes) -> Sequence[ChoiceT]: + try: + decoded = base64.b64decode(blob) + except Exception: + raise InvalidArgument(f"Invalid base64 encoded string: {blob!r}") from None + + prefix = decoded[:1] + if prefix == b"\0": + decoded = decoded[1:] + elif prefix == b"\1": + try: + decoded = zlib.decompress(decoded[1:]) + except zlib.error as err: + raise InvalidArgument( + f"Invalid zlib compression for blob {blob!r}" + ) from err + else: + raise InvalidArgument( + f"Could not decode blob {blob!r}: Invalid start byte {prefix!r}" + ) + + choices = choices_from_bytes(decoded) + if choices is None: + raise InvalidArgument(f"Invalid serialized choice sequence for blob {blob!r}") + + return choices + + +def _invalid(message, *, exc=InvalidArgument, test, given_kwargs): + @impersonate(test) + def wrapped_test(*arguments, **kwargs): # pragma: no cover # coverage limitation + raise exc(message) + + wrapped_test.is_hypothesis_test = True + wrapped_test.hypothesis = HypothesisHandle( + inner_test=test, + _get_fuzz_target=wrapped_test, + _given_kwargs=given_kwargs, + ) + return wrapped_test + + +def is_invalid_test(test, original_sig, given_arguments, given_kwargs): + """Check the arguments to ``@given`` for basic usage constraints. + + Most errors are not raised immediately; instead we return a dummy test + function that will raise the appropriate error if it is actually called. + When the user runs a subset of tests (e.g via ``pytest -k``), errors will + only be reported for tests that actually ran. + """ + invalid = partial(_invalid, test=test, given_kwargs=given_kwargs) + + if not (given_arguments or given_kwargs): + return invalid("given must be called with at least one argument") + + params = list(original_sig.parameters.values()) + pos_params = [p for p in params if p.kind is p.POSITIONAL_OR_KEYWORD] + kwonly_params = [p for p in params if p.kind is p.KEYWORD_ONLY] + if given_arguments and params != pos_params: + return invalid( + "positional arguments to @given are not supported with varargs, " + "varkeywords, positional-only, or keyword-only arguments" + ) + + if len(given_arguments) > len(pos_params): + return invalid( + f"Too many positional arguments for {test.__name__}() were passed to " + f"@given - expected at most {len(pos_params)} " + f"arguments, but got {len(given_arguments)} {given_arguments!r}" + ) + + if ... in given_arguments: + return invalid( + "... was passed as a positional argument to @given, but may only be " + "passed as a keyword argument or as the sole argument of @given" + ) + + if given_arguments and given_kwargs: + return invalid("cannot mix positional and keyword arguments to @given") + extra_kwargs = [ + k for k in given_kwargs if k not in {p.name for p in pos_params + kwonly_params} + ] + if extra_kwargs and (params == [] or params[-1].kind is not params[-1].VAR_KEYWORD): + arg = extra_kwargs[0] + extra = "" + if arg in all_settings: + extra = f". Did you mean @settings({arg}={given_kwargs[arg]!r})?" + return invalid( + f"{test.__name__}() got an unexpected keyword argument {arg!r}, " + f"from `{arg}={given_kwargs[arg]!r}` in @given{extra}" + ) + if any(p.default is not p.empty for p in params): + return invalid("Cannot apply @given to a function with defaults.") + + # This case would raise Unsatisfiable *anyway*, but by detecting it here we can + # provide a much more helpful error message for people e.g. using the Ghostwriter. + empty = [ + f"{s!r} (arg {idx})" for idx, s in enumerate(given_arguments) if s is NOTHING + ] + [f"{name}={s!r}" for name, s in given_kwargs.items() if s is NOTHING] + if empty: + strats = "strategies" if len(empty) > 1 else "strategy" + return invalid( + f"Cannot generate examples from empty {strats}: " + ", ".join(empty), + exc=Unsatisfiable, + ) + + +def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_sig): + assert isinstance(state, StateForActualGivenExecution) + posargs = [ + p.name + for p in original_sig.parameters.values() + if p.kind is p.POSITIONAL_OR_KEYWORD + ] + + for example in reversed(getattr(wrapped_test, "hypothesis_explicit_examples", ())): + assert isinstance(example, Example) + # All of this validation is to check that @example() got "the same" arguments + # as @given, i.e. corresponding to the same parameters, even though they might + # be any mixture of positional and keyword arguments. + if example.args: + assert not example.kwargs + if any( + p.kind is p.POSITIONAL_ONLY for p in original_sig.parameters.values() + ): + raise InvalidArgument( + "Cannot pass positional arguments to @example() when decorating " + "a test function which has positional-only parameters." + ) + if len(example.args) > len(posargs): + raise InvalidArgument( + "example has too many arguments for test. Expected at most " + f"{len(posargs)} but got {len(example.args)}" + ) + example_kwargs = dict( + zip(posargs[-len(example.args) :], example.args, strict=True) + ) + else: + example_kwargs = dict(example.kwargs) + given_kws = ", ".join( + repr(k) for k in sorted(wrapped_test.hypothesis._given_kwargs) + ) + example_kws = ", ".join(repr(k) for k in sorted(example_kwargs)) + if given_kws != example_kws: + raise InvalidArgument( + f"Inconsistent args: @given() got strategies for {given_kws}, " + f"but @example() got arguments for {example_kws}" + ) from None + + # This is certainly true because the example_kwargs exactly match the params + # reserved by @given(), which are then remove from the function signature. + assert set(example_kwargs).isdisjoint(kwargs) + example_kwargs.update(kwargs) + + if Phase.explicit not in state.settings.phases: + continue + + with local_settings(state.settings): + fragments_reported = [] + empty_data = ConjectureData.for_choices([]) + try: + execute_example = partial( + state.execute_once, + empty_data, + is_final=True, + print_example=True, + example_kwargs=example_kwargs, + ) + with with_reporter(fragments_reported.append): + if example.raises is None: + execute_example() + else: + # @example(...).xfail(...) + bits = ", ".join(nicerepr(x) for x in arguments) + ", ".join( + f"{k}={nicerepr(v)}" for k, v in example_kwargs.items() + ) + try: + execute_example() + except failure_exceptions_to_catch() as err: + if not isinstance(err, example.raises): + raise + # Save a string form of this example; we'll warn if it's + # ever generated by the strategy (which can't be xfailed) + state.xfail_example_reprs.add( + repr_call(state.test, arguments, example_kwargs) + ) + except example.raises as err: + # We'd usually check this as early as possible, but it's + # possible for failure_exceptions_to_catch() to grow when + # e.g. pytest is imported between import- and test-time. + raise InvalidArgument( + f"@example({bits}) raised an expected {err!r}, " + "but Hypothesis does not treat this as a test failure" + ) from err + else: + # Unexpectedly passing; always raise an error in this case. + reason = f" because {example.reason}" * bool(example.reason) + if example.raises is BaseException: + name = "exception" # special-case no raises= arg + elif not isinstance(example.raises, tuple): + name = example.raises.__name__ + elif len(example.raises) == 1: + name = example.raises[0].__name__ + else: + name = ( + ", ".join(ex.__name__ for ex in example.raises[:-1]) + + f", or {example.raises[-1].__name__}" + ) + vowel = name.upper()[0] in "AEIOU" + raise AssertionError( + f"Expected a{'n' * vowel} {name} from @example({bits})" + f"{reason}, but no exception was raised." + ) + except UnsatisfiedAssumption: + # Odd though it seems, we deliberately support explicit examples that + # are then rejected by a call to `assume()`. As well as iterative + # development, this is rather useful to replay Hypothesis' part of + # a saved failure when other arguments are supplied by e.g. pytest. + # See https://github.com/HypothesisWorks/hypothesis/issues/2125 + with contextlib.suppress(StopTest): + empty_data.conclude_test(Status.INVALID) + except BaseException as err: + # In order to support reporting of multiple failing examples, we yield + # each of the (report text, error) pairs we find back to the top-level + # runner. This also ensures that user-facing stack traces have as few + # frames of Hypothesis internals as possible. + err = err.with_traceback(get_trimmed_traceback()) + + # One user error - whether misunderstanding or typo - we've seen a few + # times is to pass strategies to @example() where values are expected. + # Checking is easy, and false-positives not much of a problem, so: + if isinstance(err, failure_exceptions_to_catch()) and any( + isinstance(arg, SearchStrategy) + for arg in example.args + tuple(example.kwargs.values()) + ): + new = HypothesisWarning( + "The @example() decorator expects to be passed values, but " + "you passed strategies instead. See https://hypothesis." + "readthedocs.io/en/latest/reference/api.html#hypothesis" + ".example for details." + ) + new.__cause__ = err + err = new + + with contextlib.suppress(StopTest): + empty_data.conclude_test(Status.INVALID) + yield ReportableError(fragments_reported, err) + if ( + state.settings.report_multiple_bugs + and pytest_shows_exceptiongroups + and isinstance(err, failure_exceptions_to_catch()) + and not isinstance(err, skip_exceptions_to_reraise()) + ): + continue + break + finally: + if fragments_reported: + assert fragments_reported[0].startswith("Falsifying example") + fragments_reported[0] = fragments_reported[0].replace( + "Falsifying example", "Falsifying explicit example", 1 + ) + + empty_data.freeze() + if observability_enabled(): + tc = make_testcase( + run_start=state._start_timestamp, + property=state.test_identifier, + data=empty_data, + how_generated="explicit example", + representation=state._string_repr, + timing=state._timing_features, + ) + deliver_observation(tc) + + if fragments_reported: + verbose_report(fragments_reported[0].replace("Falsifying", "Trying", 1)) + for f in fragments_reported[1:]: + verbose_report(f) + + +def get_random_for_wrapped_test(test, wrapped_test): + settings = wrapped_test._hypothesis_internal_use_settings + wrapped_test._hypothesis_internal_use_generated_seed = None + + if wrapped_test._hypothesis_internal_use_seed is not None: + return Random(wrapped_test._hypothesis_internal_use_seed) + + if settings.derandomize: + return Random(int_from_bytes(function_digest(test))) + + if global_force_seed is not None: + return Random(global_force_seed) + + if threadlocal._hypothesis_global_random is None: # pragma: no cover + threadlocal._hypothesis_global_random = Random() + seed = threadlocal._hypothesis_global_random.getrandbits(128) + wrapped_test._hypothesis_internal_use_generated_seed = seed + return Random(seed) + + +@dataclass(slots=True, frozen=False) +class Stuff: + selfy: Any + args: tuple + kwargs: dict + given_kwargs: dict + + +def process_arguments_to_given( + wrapped_test: Any, + arguments: Sequence[object], + kwargs: dict[str, object], + given_kwargs: dict[str, SearchStrategy], + params: dict[str, Parameter], +) -> tuple[Sequence[object], dict[str, object], Stuff]: + selfy = None + arguments, kwargs = convert_positional_arguments(wrapped_test, arguments, kwargs) + + # If the test function is a method of some kind, the bound object + # will be the first named argument if there are any, otherwise the + # first vararg (if any). + posargs = [p.name for p in params.values() if p.kind is p.POSITIONAL_OR_KEYWORD] + if posargs: + selfy = kwargs.get(posargs[0]) + elif arguments: + selfy = arguments[0] + + # Ensure that we don't mistake mocks for self here. + # This can cause the mock to be used as the test runner. + if is_mock(selfy): + selfy = None + + arguments = tuple(arguments) + + with ensure_free_stackframes(): + for k, s in given_kwargs.items(): + check_strategy(s, name=k) + s.validate() + + stuff = Stuff(selfy=selfy, args=arguments, kwargs=kwargs, given_kwargs=given_kwargs) + + return arguments, kwargs, stuff + + +def skip_exceptions_to_reraise(): + """Return a tuple of exceptions meaning 'skip this test', to re-raise. + + This is intended to cover most common test runners; if you would + like another to be added please open an issue or pull request adding + it to this function and to tests/cover/test_lazy_import.py + """ + # This is a set in case any library simply re-exports another's Skip exception + exceptions = set() + # We use this sys.modules trick to avoid importing libraries - + # you can't be an instance of a type from an unimported module! + # This is fast enough that we don't need to cache the result, + # and more importantly it avoids possible side-effects :-) + if "unittest" in sys.modules: + exceptions.add(sys.modules["unittest"].SkipTest) + if "_pytest.outcomes" in sys.modules: + exceptions.add(sys.modules["_pytest.outcomes"].Skipped) + return tuple(sorted(exceptions, key=str)) + + +def failure_exceptions_to_catch() -> tuple[type[BaseException], ...]: + """Return a tuple of exceptions meaning 'this test has failed', to catch. + + This is intended to cover most common test runners; if you would + like another to be added please open an issue or pull request. + """ + # While SystemExit and GeneratorExit are instances of BaseException, we also + # expect them to be deterministic - unlike KeyboardInterrupt - and so we treat + # them as standard exceptions, check for flakiness, etc. + # See https://github.com/HypothesisWorks/hypothesis/issues/2223 for details. + exceptions = [Exception, SystemExit, GeneratorExit] + if "_pytest.outcomes" in sys.modules: + exceptions.append(sys.modules["_pytest.outcomes"].Failed) + return tuple(exceptions) + + +def new_given_signature(original_sig, given_kwargs): + """Make an updated signature for the wrapped test.""" + return original_sig.replace( + parameters=[ + p + for p in original_sig.parameters.values() + if not ( + p.name in given_kwargs + and p.kind in (p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY) + ) + ], + return_annotation=None, + ) + + +def default_executor(data, function): + return function(data) + + +def get_executor(runner): + try: + execute_example = runner.execute_example + except AttributeError: + pass + else: + return lambda data, function: execute_example(partial(function, data)) + + if hasattr(runner, "setup_example") or hasattr(runner, "teardown_example"): + setup = getattr(runner, "setup_example", None) or (lambda: None) + teardown = getattr(runner, "teardown_example", None) or (lambda ex: None) + + def execute(data, function): + token = None + try: + token = setup() + return function(data) + finally: + teardown(token) + + return execute + + return default_executor + + +# This function is a crude solution, a better way of resolving it would probably +# be to rewrite a bunch of exception handlers to use except*. +T = TypeVar("T", bound=BaseException) + + +def _flatten_group(excgroup: BaseExceptionGroup[T]) -> list[T]: + found_exceptions: list[T] = [] + for exc in excgroup.exceptions: + if isinstance(exc, BaseExceptionGroup): + found_exceptions.extend(_flatten_group(exc)) + else: + found_exceptions.append(exc) + return found_exceptions + + +@contextlib.contextmanager +def unwrap_markers_from_group() -> Generator[None, None, None]: + try: + yield + except BaseExceptionGroup as excgroup: + _frozen_exceptions, non_frozen_exceptions = excgroup.split(Frozen) + + # group only contains Frozen, reraise the group + # it doesn't matter what we raise, since any exceptions get disregarded + # and reraised as StopTest if data got frozen. + if non_frozen_exceptions is None: + raise + # in all other cases they are discarded + + # Can RewindRecursive end up in this group? + _, user_exceptions = non_frozen_exceptions.split( + lambda e: isinstance(e, (StopTest, HypothesisException)) + ) + + # this might contain marker exceptions, or internal errors, but not frozen. + if user_exceptions is not None: + raise + + # single marker exception - reraise it + flattened_non_frozen_exceptions: list[BaseException] = _flatten_group( + non_frozen_exceptions + ) + if len(flattened_non_frozen_exceptions) == 1: + e = flattened_non_frozen_exceptions[0] + # preserve the cause of the original exception to not hinder debugging + # note that __context__ is still lost though + raise e from e.__cause__ + + # multiple marker exceptions. If we re-raise the whole group we break + # a bunch of logic so ....? + stoptests, non_stoptests = non_frozen_exceptions.split(StopTest) + + # TODO: stoptest+hypothesisexception ...? Is it possible? If so, what do? + + if non_stoptests: + # TODO: multiple marker exceptions is easy to produce, but the logic in the + # engine does not handle it... so we just reraise the first one for now. + e = _flatten_group(non_stoptests)[0] + raise e from e.__cause__ + assert stoptests is not None + + # multiple stoptests: raising the one with the lowest testcounter + raise min(_flatten_group(stoptests), key=lambda s_e: s_e.testcounter) + + +class StateForActualGivenExecution: + def __init__( + self, stuff, test, settings, random, wrapped_test, *, thread_overlap=None + ): + self.stuff = stuff + self.test = test + self.settings = settings + self.random = random + self.wrapped_test = wrapped_test + self.thread_overlap = {} if thread_overlap is None else thread_overlap + + self.test_runner = get_executor(stuff.selfy) + self.print_given_args = getattr( + wrapped_test, "_hypothesis_internal_print_given_args", True + ) + + self.last_exception = None + self.falsifying_examples = () + self.ever_executed = False + self.xfail_example_reprs = set() + self.files_to_propagate = set() + self.failed_normally = False + self.failed_due_to_deadline = False + + self.explain_traces = defaultdict(set) + self._start_timestamp = time.time() + self._string_repr = "" + self._timing_features = {} + + @property + def test_identifier(self) -> str: + return getattr( + current_pytest_item.value, "nodeid", None + ) or get_pretty_function_description(self.wrapped_test) + + def _should_trace(self): + # NOTE: we explicitly support monkeypatching this. Keep the namespace + # access intact. + _trace_obs = ( + observability_enabled() and observability.OBSERVABILITY_COLLECT_COVERAGE + ) + _trace_failure = ( + self.failed_normally + and not self.failed_due_to_deadline + and {Phase.shrink, Phase.explain}.issubset(self.settings.phases) + ) + return _trace_obs or _trace_failure + + def execute_once( + self, + data, + *, + print_example=False, + is_final=False, + expected_failure=None, + example_kwargs=None, + ): + """Run the test function once, using ``data`` as input. + + If the test raises an exception, it will propagate through to the + caller of this method. Depending on its type, this could represent + an ordinary test failure, or a fatal error, or a control exception. + + If this method returns normally, the test might have passed, or + it might have placed ``data`` in an unsuccessful state and then + swallowed the corresponding control exception. + """ + + self.ever_executed = True + + self._string_repr = "" + text_repr = None + if self.settings.deadline is None and not observability_enabled(): + + @proxies(self.test) + def test(*args, **kwargs): + with unwrap_markers_from_group(), ensure_free_stackframes(): + return self.test(*args, **kwargs) + + else: + + @proxies(self.test) + def test(*args, **kwargs): + arg_drawtime = math.fsum(data.draw_times.values()) + arg_stateful = math.fsum(data._stateful_run_times.values()) + arg_gctime = gc_cumulative_time() + with unwrap_markers_from_group(), ensure_free_stackframes(): + start = time.perf_counter() + try: + result = self.test(*args, **kwargs) + finally: + finish = time.perf_counter() + in_drawtime = math.fsum(data.draw_times.values()) - arg_drawtime + in_stateful = ( + math.fsum(data._stateful_run_times.values()) - arg_stateful + ) + in_gctime = gc_cumulative_time() - arg_gctime + runtime = finish - start - in_drawtime - in_stateful - in_gctime + self._timing_features = { + "execute:test": runtime, + "overall:gc": in_gctime, + **data.draw_times, + **data._stateful_run_times, + } + + if ( + (current_deadline := self.settings.deadline) is not None + # we disable the deadline check under concurrent threads, since + # cpython may switch away from a thread for arbitrarily long. + and not self.thread_overlap.get(threading.get_ident(), False) + ): + if not is_final: + current_deadline = (current_deadline // 4) * 5 + if runtime >= current_deadline.total_seconds(): + raise DeadlineExceeded( + datetime.timedelta(seconds=runtime), self.settings.deadline + ) + return result + + def run(data: ConjectureData) -> None: + # Set up dynamic context needed by a single test run. + if self.stuff.selfy is not None: + data.hypothesis_runner = self.stuff.selfy + # Generate all arguments to the test function. + args = self.stuff.args + kwargs = dict(self.stuff.kwargs) + if example_kwargs is None: + kw, argslices = context.prep_args_kwargs_from_strategies( + self.stuff.given_kwargs + ) + else: + kw = example_kwargs + argslices = {} + kwargs.update(kw) + if expected_failure is not None: + nonlocal text_repr + text_repr = repr_call(test, args, kwargs) + + if print_example or current_verbosity() >= Verbosity.verbose: + printer = RepresentationPrinter(context=context) + if print_example: + printer.text("Falsifying example:") + else: + printer.text("Trying example:") + + if self.print_given_args: + printer.text(" ") + printer.repr_call( + test.__name__, + args, + kwargs, + force_split=True, + arg_slices=argslices, + leading_comment=( + "# " + context.data.slice_comments[(0, 0)] + if (0, 0) in context.data.slice_comments + else None + ), + avoid_realization=data.provider.avoid_realization, + ) + report(printer.getvalue()) + + if observability_enabled(): + printer = RepresentationPrinter(context=context) + printer.repr_call( + test.__name__, + args, + kwargs, + force_split=True, + arg_slices=argslices, + leading_comment=( + "# " + context.data.slice_comments[(0, 0)] + if (0, 0) in context.data.slice_comments + else None + ), + avoid_realization=data.provider.avoid_realization, + ) + self._string_repr = printer.getvalue() + + try: + return test(*args, **kwargs) + except TypeError as e: + # If we sampled from a sequence of strategies, AND failed with a + # TypeError, *AND that exception mentions SearchStrategy*, add a note: + if ( + "SearchStrategy" in str(e) + and data._sampled_from_all_strategies_elements_message is not None + ): + msg, format_arg = data._sampled_from_all_strategies_elements_message + add_note(e, msg.format(format_arg)) + raise + finally: + if data._stateful_repr_parts is not None: + self._string_repr = "\n".join(data._stateful_repr_parts) + + if observability_enabled(): + printer = RepresentationPrinter(context=context) + for name, value in data._observability_args.items(): + if name.startswith("generate:Draw "): + try: + value = data.provider.realize(value) + except BackendCannotProceed: # pragma: no cover + value = "" + printer.text(f"\n{name.removeprefix('generate:')}: ") + printer.pretty(value) + + self._string_repr += printer.getvalue() + + # self.test_runner can include the execute_example method, or setup/teardown + # _example, so it's important to get the PRNG and build context in place first. + with ( + local_settings(self.settings), + deterministic_PRNG(), + BuildContext( + data, is_final=is_final, wrapped_test=self.wrapped_test + ) as context, + ): + # providers may throw in per_case_context_fn, and we'd like + # `result` to still be set in these cases. + result = None + with data.provider.per_test_case_context_manager(): + # Run the test function once, via the executor hook. + # In most cases this will delegate straight to `run(data)`. + result = self.test_runner(data, run) + + # If a failure was expected, it should have been raised already, so + # instead raise an appropriate diagnostic error. + if expected_failure is not None: + exception, traceback = expected_failure + if isinstance(exception, DeadlineExceeded) and ( + runtime_secs := math.fsum( + v + for k, v in self._timing_features.items() + if k.startswith("execute:") + ) + ): + report( + "Unreliable test timings! On an initial run, this " + f"test took {exception.runtime.total_seconds() * 1000:.2f}ms, " + "which exceeded the deadline of " + f"{self.settings.deadline.total_seconds() * 1000:.2f}ms, but " + f"on a subsequent run it took {runtime_secs * 1000:.2f} ms, " + "which did not. If you expect this sort of " + "variability in your test timings, consider turning " + "deadlines off for this test by setting deadline=None." + ) + else: + report("Failed to reproduce exception. Expected: \n" + traceback) + raise FlakyFailure( + f"Hypothesis {text_repr} produces unreliable results: " + "Falsified on the first call but did not on a subsequent one", + [exception], + ) + return result + + def _flaky_replay_to_failure( + self, err: FlakyReplay, context: BaseException + ) -> FlakyFailure: + # Note that in the mark_interesting case, _context_ itself + # is part of err._interesting_examples - but it's not in + # _runner.interesting_examples - this is fine, as the context + # (i.e., immediate exception) is appended. + interesting_examples = [ + self._runner.interesting_examples[origin] + for origin in err._interesting_origins + if origin in self._runner.interesting_examples + ] + exceptions = [result.expected_exception for result in interesting_examples] + exceptions.append(context) # the immediate exception + return FlakyFailure(err.reason, exceptions) + + def _execute_once_for_engine(self, data: ConjectureData) -> None: + """Wrapper around ``execute_once`` that intercepts test failure + exceptions and single-test control exceptions, and turns them into + appropriate method calls to `data` instead. + + This allows the engine to assume that any exception other than + ``StopTest`` must be a fatal error, and should stop the entire engine. + """ + trace: Trace = set() + try: + with Tracer(should_trace=self._should_trace()) as tracer: + try: + result = self.execute_once(data) + if ( + data.status == Status.VALID and tracer.branches + ): # pragma: no cover + # This is in fact covered by our *non-coverage* tests, but due + # to the settrace() contention *not* by our coverage tests. + self.explain_traces[None].add(frozenset(tracer.branches)) + finally: + trace = tracer.branches + if result is not None: + fail_health_check( + self.settings, + "Tests run under @given should return None, but " + f"{self.test.__name__} returned {result!r} instead.", + HealthCheck.return_value, + ) + except UnsatisfiedAssumption as e: + # An "assume" check failed, so instead we inform the engine that + # this test run was invalid. + try: + data.mark_invalid(e.reason) + except FlakyReplay as err: + # This was unexpected, meaning that the assume was flaky. + # Report it as such. + raise self._flaky_replay_to_failure(err, e) from None + except (StopTest, BackendCannotProceed): + # The engine knows how to handle this control exception, so it's + # OK to re-raise it. + raise + except ( + FailedHealthCheck, + *skip_exceptions_to_reraise(), + ): + # These are fatal errors or control exceptions that should stop the + # engine, so we re-raise them. + raise + except failure_exceptions_to_catch() as e: + # If an unhandled (i.e., non-Hypothesis) error was raised by + # Hypothesis-internal code, re-raise it as a fatal error instead + # of treating it as a test failure. + if isinstance(e, BaseExceptionGroup) and len(e.exceptions) == 1: + # When a naked exception is implicitly wrapped in an ExceptionGroup + # due to a re-raising "except*", the ExceptionGroup is constructed in + # the caller's stack frame (see #4183). This workaround is specifically + # for implicit wrapping of naked exceptions by "except*", since explicit + # raising of ExceptionGroup gets the proper traceback in the first place + # - there's no need to handle hierarchical groups here, at least if no + # such implicit wrapping happens inside hypothesis code (we only care + # about the hypothesis-or-not distinction). + # + # 01-25-2025: this was patched to give the correct + # stacktrace in cpython https://github.com/python/cpython/issues/128799. + # can remove once python3.11 is EOL. + tb = e.exceptions[0].__traceback__ or e.__traceback__ + else: + tb = e.__traceback__ + filepath = traceback.extract_tb(tb)[-1][0] + if ( + is_hypothesis_file(filepath) + and not isinstance(e, HypothesisException) + # We expect backend authors to use the provider_conformance test + # to test their backends. If an error occurs there, it is probably + # from their backend, and we would like to treat it as a standard + # error, not a hypothesis-internal error. + and not filepath.endswith( + f"internal{os.sep}conjecture{os.sep}provider_conformance.py" + ) + ): + raise + + if data.frozen: + # This can happen if an error occurred in a finally + # block somewhere, suppressing our original StopTest. + # We raise a new one here to resume normal operation. + raise StopTest(data.testcounter) from e + else: + # The test failed by raising an exception, so we inform the + # engine that this test run was interesting. This is the normal + # path for test runs that fail. + tb = get_trimmed_traceback() + data.expected_traceback = format_exception(e, tb) + data.expected_exception = e + assert data.expected_traceback is not None # for mypy + verbose_report(data.expected_traceback) + + self.failed_normally = True + + interesting_origin = InterestingOrigin.from_exception(e) + if trace: # pragma: no cover + # Trace collection is explicitly disabled under coverage. + self.explain_traces[interesting_origin].add(frozenset(trace)) + if interesting_origin.exc_type == DeadlineExceeded: + self.failed_due_to_deadline = True + self.explain_traces.clear() + try: + data.mark_interesting(interesting_origin) + except FlakyReplay as err: + raise self._flaky_replay_to_failure(err, e) from None + + finally: + # Conditional here so we can save some time constructing the payload; in + # other cases (without coverage) it's cheap enough to do that regardless. + # + # Note that we have to unconditionally realize data.events, because + # the statistics reported by the pytest plugin use a different flow + # than observability, but still access symbolic events. + + try: + data.events = data.provider.realize(data.events) + except BackendCannotProceed: + data.events = {} + + if observability_enabled(): + if runner := getattr(self, "_runner", None): + phase = runner._current_phase + else: # pragma: no cover # in case of messing with internals + if self.failed_normally or self.failed_due_to_deadline: + phase = "shrink" + else: + phase = "unknown" + backend_desc = f", using backend={self.settings.backend!r}" * ( + self.settings.backend != "hypothesis" + and not getattr(runner, "_switch_to_hypothesis_provider", False) + ) + try: + data._observability_args = data.provider.realize( + data._observability_args + ) + except BackendCannotProceed: + data._observability_args = {} + + try: + self._string_repr = data.provider.realize(self._string_repr) + except BackendCannotProceed: + self._string_repr = "" + + data.freeze() + tc = make_testcase( + run_start=self._start_timestamp, + property=self.test_identifier, + data=data, + how_generated=f"during {phase} phase{backend_desc}", + representation=self._string_repr, + arguments=data._observability_args, + timing=self._timing_features, + coverage=tractable_coverage_report(trace) or None, + phase=phase, + backend_metadata=data.provider.observe_test_case(), + ) + deliver_observation(tc) + + for msg in data.provider.observe_information_messages( + lifetime="test_case" + ): + self._deliver_information_message(**msg) + self._timing_features = {} + + def _deliver_information_message( + self, *, type: InfoObservationType, title: str, content: str | dict + ) -> None: + deliver_observation( + InfoObservation( + type=type, + run_start=self._start_timestamp, + property=self.test_identifier, + title=title, + content=content, + ) + ) + + def run_engine(self): + """Run the test function many times, on database input and generated + input, using the Conjecture engine. + """ + # Tell pytest to omit the body of this function from tracebacks + __tracebackhide__ = True + try: + database_key = self.wrapped_test._hypothesis_internal_database_key + except AttributeError: + if global_force_seed is None: + database_key = function_digest(self.test) + else: + database_key = None + + runner = self._runner = ConjectureRunner( + self._execute_once_for_engine, + settings=self.settings, + random=self.random, + database_key=database_key, + thread_overlap=self.thread_overlap, + ) + # Use the Conjecture engine to run the test function many times + # on different inputs. + runner.run() + note_statistics(runner.statistics) + if observability_enabled(): + self._deliver_information_message( + type="info", + title="Hypothesis Statistics", + content=describe_statistics(runner.statistics), + ) + for msg in ( + p if isinstance(p := runner.provider, PrimitiveProvider) else p(None) + ).observe_information_messages(lifetime="test_function"): + self._deliver_information_message(**msg) + + if runner.call_count == 0: + return + if runner.interesting_examples: + self.falsifying_examples = sorted( + runner.interesting_examples.values(), + key=lambda d: sort_key(d.nodes), + reverse=True, + ) + else: + if runner.valid_examples == 0: + explanations = [] + # use a somewhat arbitrary cutoff to avoid recommending spurious + # fixes. + # eg, a few invalid examples from internal filters when the + # problem is the user generating large inputs, or a + # few overruns during internal mutation when the problem is + # impossible user filters/assumes. + if runner.invalid_examples > min(20, runner.call_count // 5): + explanations.append( + f"{runner.invalid_examples} of {runner.call_count} " + "examples failed a .filter() or assume() condition. Try " + "making your filters or assumes less strict, or rewrite " + "using strategy parameters: " + "st.integers().filter(lambda x: x > 0) fails less often " + "(that is, never) when rewritten as st.integers(min_value=1)." + ) + if runner.overrun_examples > min(20, runner.call_count // 5): + explanations.append( + f"{runner.overrun_examples} of {runner.call_count} " + "examples were too large to finish generating; try " + "reducing the typical size of your inputs?" + ) + rep = get_pretty_function_description(self.test) + raise Unsatisfiable( + f"Unable to satisfy assumptions of {rep}. " + f"{' Also, '.join(explanations)}" + ) + + # If we have not traced executions, warn about that now (but only when + # we'd expect to do so reliably, i.e. on CPython>=3.12) + if ( + hasattr(sys, "monitoring") + and not PYPY + and self._should_trace() + and not Tracer.can_trace() + ): # pragma: no cover + # actually covered by our tests, but only on >= 3.12 + warnings.warn( + "avoiding tracing test function because tool id " + f"{MONITORING_TOOL_ID} is already taken by tool " + f"{sys.monitoring.get_tool(MONITORING_TOOL_ID)}.", + HypothesisWarning, + stacklevel=3, + ) + + if not self.falsifying_examples: + return + elif not (self.settings.report_multiple_bugs and pytest_shows_exceptiongroups): + # Pretend that we only found one failure, by discarding the others. + del self.falsifying_examples[:-1] + + # The engine found one or more failures, so we need to reproduce and + # report them. + + errors_to_report = [] + + report_lines = describe_targets(runner.best_observed_targets) + if report_lines: + report_lines.append("") + + explanations = explanatory_lines(self.explain_traces, self.settings) + for falsifying_example in self.falsifying_examples: + fragments = [] + + ran_example = runner.new_conjecture_data( + falsifying_example.choices, max_choices=len(falsifying_example.choices) + ) + ran_example.slice_comments = falsifying_example.slice_comments + tb = None + origin = None + assert falsifying_example.expected_exception is not None + assert falsifying_example.expected_traceback is not None + try: + with with_reporter(fragments.append): + self.execute_once( + ran_example, + print_example=True, + is_final=True, + expected_failure=( + falsifying_example.expected_exception, + falsifying_example.expected_traceback, + ), + ) + except StopTest as e: + # Link the expected exception from the first run. Not sure + # how to access the current exception, if it failed + # differently on this run. In fact, in the only known + # reproducer, the StopTest is caused by OVERRUN before the + # test is even executed. Possibly because all initial examples + # failed until the final non-traced replay, and something was + # exhausted? Possibly a FIXME, but sufficiently weird to + # ignore for now. + err = FlakyFailure( + "Inconsistent results: An example failed on the " + "first run but now succeeds (or fails with another " + "error, or is for some reason not runnable).", + # (note: e is a BaseException) + [falsifying_example.expected_exception or e], + ) + errors_to_report.append(ReportableError(fragments, err)) + except UnsatisfiedAssumption as e: # pragma: no cover # ironically flaky + err = FlakyFailure( + "Unreliable assumption: An example which satisfied " + "assumptions on the first run now fails it.", + [e], + ) + errors_to_report.append(ReportableError(fragments, err)) + except BaseException as e: + # If we have anything for explain-mode, this is the time to report. + fragments.extend(explanations[falsifying_example.interesting_origin]) + error_with_tb = e.with_traceback(get_trimmed_traceback()) + errors_to_report.append(ReportableError(fragments, error_with_tb)) + tb = format_exception(e, get_trimmed_traceback(e)) + origin = InterestingOrigin.from_exception(e) + else: + # execute_once() will always raise either the expected error, or Flaky. + raise NotImplementedError("This should be unreachable") + finally: + ran_example.freeze() + if observability_enabled(): + # log our observability line for the final failing example + tc = make_testcase( + run_start=self._start_timestamp, + property=self.test_identifier, + data=ran_example, + how_generated="minimal failing example", + representation=self._string_repr, + arguments=ran_example._observability_args, + timing=self._timing_features, + coverage=None, # Not recorded when we're replaying the MFE + status="passed" if sys.exc_info()[0] else "failed", + status_reason=str(origin or "unexpected/flaky pass"), + metadata={"traceback": tb}, + ) + deliver_observation(tc) + + # Whether or not replay actually raised the exception again, we want + # to print the reproduce_failure decorator for the failing example. + if self.settings.print_blob: + fragments.append( + "\nYou can reproduce this example by temporarily adding " + f"{reproduction_decorator(falsifying_example.choices)} " + "as a decorator on your test case" + ) + + _raise_to_user( + errors_to_report, + self.settings, + report_lines, + # A backend might report a failure and then report verified afterwards, + # which is to be interpreted as "there are no more failures *other + # than what we already reported*". Do not report this as unsound. + unsound_backend=( + runner._verified_by + if runner._verified_by and not runner._backend_found_failure + else None + ), + ) + + +def _simplify_explicit_errors(errors: list[ReportableError]) -> list[ReportableError]: + """ + Group explicit example errors by their InterestingOrigin, keeping only the + simplest one, and adding a note of how many other examples failed with the same + error. + """ + by_origin: dict[InterestingOrigin, list[ReportableError]] = defaultdict(list) + for error in errors: + origin = InterestingOrigin.from_exception(error.exception) + by_origin[origin].append(error) + + result = [] + for group in by_origin.values(): + if len(group) == 1: + result.append(group[0]) + else: + # Sort by shortlex of representation (first fragment) + def shortlex_key(error): + repr_str = error.fragments[0] if error.fragments else "" + return (len(repr_str), repr_str) + + sorted_group = sorted(group, key=shortlex_key) + simplest = sorted_group[0] + other_count = len(group) - 1 + add_note( + simplest.exception, + f"(note: {other_count} other explicit example{'s' * (other_count > 1)} " + "also failed with this error; use Verbosity.verbose to view)", + ) + result.append(simplest) + + return result + + +def _raise_to_user( + errors_to_report, settings, target_lines, trailer="", *, unsound_backend=None +): + """Helper function for attaching notes and grouping multiple errors.""" + failing_prefix = "Falsifying example: " + ls = [] + for error in errors_to_report: + for note in error.fragments: + add_note(error.exception, note) + if note.startswith(failing_prefix): + ls.append(note.removeprefix(failing_prefix)) + if current_pytest_item.value: + current_pytest_item.value._hypothesis_failing_examples = ls + + if len(errors_to_report) == 1: + the_error_hypothesis_found = errors_to_report[0].exception + else: + assert errors_to_report + the_error_hypothesis_found = BaseExceptionGroup( + f"Hypothesis found {len(errors_to_report)} distinct failures{trailer}.", + [error.exception for error in errors_to_report], + ) + + if settings.verbosity >= Verbosity.normal: + for line in target_lines: + add_note(the_error_hypothesis_found, line) + + if unsound_backend: + add_note( + the_error_hypothesis_found, + f"backend={unsound_backend!r} claimed to verify this test passes - " + "please send them a bug report!", + ) + + raise the_error_hypothesis_found + + +@contextlib.contextmanager +def fake_subTest(self, msg=None, **__): + """Monkeypatch for `unittest.TestCase.subTest` during `@given`. + + If we don't patch this out, each failing example is reported as a + separate failing test by the unittest test runner, which is + obviously incorrect. We therefore replace it for the duration with + this version. + """ + warnings.warn( + "subTest per-example reporting interacts badly with Hypothesis " + "trying hundreds of examples, so we disable it for the duration of " + "any test that uses `@given`.", + HypothesisWarning, + stacklevel=2, + ) + yield + + +@dataclass(slots=False, frozen=False) +class HypothesisHandle: + """This object is provided as the .hypothesis attribute on @given tests. + + Downstream users can reassign its attributes to insert custom logic into + the execution of each case, for example by converting an async into a + sync function. + + This must be an attribute of an attribute, because reassignment of a + first-level attribute would not be visible to Hypothesis if the function + had been decorated before the assignment. + + See https://github.com/HypothesisWorks/hypothesis/issues/1257 for more + information. + """ + + inner_test: Any + _get_fuzz_target: Any + _given_kwargs: Any + + @property + def fuzz_one_input( + self, + ) -> Callable[[bytes | bytearray | memoryview | BinaryIO], bytes | None]: + """ + Run the test as a fuzz target, driven with the ``buffer`` of bytes. + + Depending on the passed ``buffer`` one of three things will happen: + + * If the bytestring was invalid, for example because it was too short or was + filtered out by |assume| or |.filter|, |fuzz_one_input| returns ``None``. + * If the bytestring was valid and the test passed, |fuzz_one_input| returns + a canonicalised and pruned bytestring which will replay that test case. + This is provided as an option to improve the performance of mutating + fuzzers, but can safely be ignored. + * If the test *failed*, i.e. raised an exception, |fuzz_one_input| will + add the pruned buffer to :ref:`the Hypothesis example database ` + and then re-raise that exception. All you need to do to reproduce, + minimize, and de-duplicate all the failures found via fuzzing is run + your test suite! + + To reduce the performance impact of database writes, |fuzz_one_input| only + records failing inputs which would be valid shrinks for a known failure - + meaning writes are somewhere between constant and log(N) rather than linear + in runtime. However, this tracking only works within a persistent fuzzing + process; for forkserver fuzzers we recommend ``database=None`` for the main + run, and then replaying with a database enabled if you need to analyse + failures. + + Note that the interpretation of both input and output bytestrings is + specific to the exact version of Hypothesis you are using and the strategies + given to the test, just like the :ref:`database ` and + |@reproduce_failure|. + + Interaction with |@settings| + ---------------------------- + + |fuzz_one_input| uses just enough of Hypothesis' internals to drive your + test function with a bytestring, and most settings therefore have no effect + in this mode. We recommend running your tests the usual way before fuzzing + to get the benefits of health checks, as well as afterwards to replay, + shrink, deduplicate, and report whatever errors were discovered. + + * |settings.database| *is* used by |fuzz_one_input| - adding failures to + the database to be replayed when + you next run your tests is our preferred reporting mechanism and response + to `the 'fuzzer taming' problem `__. + * |settings.verbosity| and |settings.stateful_step_count| work as usual. + * The |~settings.deadline|, |~settings.derandomize|, |~settings.max_examples|, + |~settings.phases|, |~settings.print_blob|, |~settings.report_multiple_bugs|, + and |~settings.suppress_health_check| settings do not affect |fuzz_one_input|. + + Example Usage + ------------- + + .. code-block:: python + + @given(st.text()) + def test_foo(s): ... + + # This is a traditional fuzz target - call it with a bytestring, + # or a binary IO object, and it runs the test once. + fuzz_target = test_foo.hypothesis.fuzz_one_input + + # For example: + fuzz_target(b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00") + fuzz_target(io.BytesIO(b"\\x01")) + + .. tip:: + + If you expect to discover many failures while using |fuzz_one_input|, + consider wrapping your database with |BackgroundWriteDatabase|, for + low-overhead writes of failures. + + .. tip:: + + | Want an integrated workflow for your team's local tests, CI, and continuous fuzzing? + | Use `HypoFuzz `__ to fuzz your whole test suite, and find more bugs with the same tests! + + .. seealso:: + + See also the :doc:`/how-to/external-fuzzers` how-to. + """ + # Note: most users, if they care about fuzzer performance, will access the + # property and assign it to a local variable to move the attribute lookup + # outside their fuzzing loop / before the fork point. We cache it anyway, + # so that naive or unusual use-cases get the best possible performance too. + try: + return self.__cached_target # type: ignore + except AttributeError: + self.__cached_target = self._get_fuzz_target() + return self.__cached_target + + +@overload +def given( + _: EllipsisType, / +) -> Callable[ + [Callable[..., Coroutine[Any, Any, None] | None]], Callable[[], None] +]: # pragma: no cover + ... + + +@overload +def given( + *_given_arguments: SearchStrategy[Any], +) -> Callable[ + [Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None] +]: # pragma: no cover + ... + + +@overload +def given( + **_given_kwargs: SearchStrategy[Any] | EllipsisType, +) -> Callable[ + [Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None] +]: # pragma: no cover + ... + + +def given( + *_given_arguments: SearchStrategy[Any] | EllipsisType, + **_given_kwargs: SearchStrategy[Any] | EllipsisType, +) -> Callable[[Callable[..., Coroutine[Any, Any, None] | None]], Callable[..., None]]: + """ + The |@given| decorator turns a function into a Hypothesis test. This is the + main entry point to Hypothesis. + + .. seealso:: + + See also the :doc:`/tutorial/introduction` tutorial, which introduces + defining Hypothesis tests with |@given|. + + .. _given-arguments: + + Arguments to ``@given`` + ----------------------- + + Arguments to |@given| may be either positional or keyword arguments: + + .. code-block:: python + + @given(st.integers(), st.floats()) + def test_one(x, y): + pass + + @given(x=st.integers(), y=st.floats()) + def test_two(x, y): + pass + + If using keyword arguments, the arguments may appear in any order, as with + standard Python functions: + + .. code-block:: python + + # different order, but still equivalent to before + @given(y=st.floats(), x=st.integers()) + def test(x, y): + assert isinstance(x, int) + assert isinstance(y, float) + + If |@given| is provided fewer positional arguments than the decorated test, + the test arguments are filled in on the right side, leaving the leftmost + positional arguments unfilled: + + .. code-block:: python + + @given(st.integers(), st.floats()) + def test(manual_string, y, z): + assert manual_string == "x" + assert isinstance(y, int) + assert isinstance(z, float) + + # `test` is now a callable which takes one argument `manual_string` + + test("x") + # or equivalently: + test(manual_string="x") + + The reason for this "from the right" behavior is to support using |@given| + with instance methods, by automatically passing through ``self``: + + .. code-block:: python + + class MyTest(TestCase): + @given(st.integers()) + def test(self, x): + assert isinstance(self, MyTest) + assert isinstance(x, int) + + If (and only if) using keyword arguments, |@given| may be combined with + ``**kwargs`` or ``*args``: + + .. code-block:: python + + @given(x=integers(), y=integers()) + def test(x, **kwargs): + assert "y" in kwargs + + @given(x=integers(), y=integers()) + def test(x, *args, **kwargs): + assert args == () + assert "x" not in kwargs + assert "y" in kwargs + + It is an error to: + + * Mix positional and keyword arguments to |@given|. + * Use |@given| with a function that has a default value for an argument. + * Use |@given| with positional arguments with a function that uses ``*args``, + ``**kwargs``, or keyword-only arguments. + + The function returned by given has all the same arguments as the original + test, minus those that are filled in by |@given|. See the :ref:`notes on + framework compatibility ` for how this interacts + with features of other testing libraries, such as :pypi:`pytest` fixtures. + """ + + if currently_in_test_context(): + fail_health_check( + Settings(), + "Nesting @given tests results in quadratic generation and shrinking " + "behavior, and can usually be more cleanly expressed by replacing the " + "inner function with an st.data() parameter on the outer @given." + "\n\n" + "If it is difficult or impossible to refactor this test to remove the " + "nested @given, you can disable this health check with " + "@settings(suppress_health_check=[HealthCheck.nested_given]) on the " + "outer @given. See " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#hypothesis.HealthCheck " + "for details.", + HealthCheck.nested_given, + ) + + def run_test_as_given(test): + if inspect.isclass(test): + # Provide a meaningful error to users, instead of exceptions from + # internals that assume we're dealing with a function. + raise InvalidArgument("@given cannot be applied to a class") + + if ( + "_pytest" in sys.modules + and "_pytest.fixtures" in sys.modules + and ( + tuple(map(int, sys.modules["_pytest"].__version__.split(".")[:2])) + >= (8, 4) + ) + and isinstance( + test, sys.modules["_pytest.fixtures"].FixtureFunctionDefinition + ) + ): # pragma: no cover # covered by pytest/test_fixtures, but not by cover/ + raise InvalidArgument("@given cannot be applied to a pytest fixture") + + given_arguments = tuple(_given_arguments) + given_kwargs = dict(_given_kwargs) + + original_sig = get_signature(test) + if given_arguments == (Ellipsis,) and not given_kwargs: + # user indicated that they want to infer all arguments + given_kwargs = { + p.name: Ellipsis + for p in original_sig.parameters.values() + if p.kind in (p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY) + } + given_arguments = () + + check_invalid = is_invalid_test( + test, original_sig, given_arguments, given_kwargs + ) + + # If the argument check found problems, return a dummy test function + # that will raise an error if it is actually called. + if check_invalid is not None: + return check_invalid + + # Because the argument check succeeded, we can convert @given's + # positional arguments into keyword arguments for simplicity. + if given_arguments: + assert not given_kwargs + posargs = [ + p.name + for p in original_sig.parameters.values() + if p.kind is p.POSITIONAL_OR_KEYWORD + ] + given_kwargs = dict( + list(zip(posargs[::-1], given_arguments[::-1], strict=False))[::-1] + ) + # These have been converted, so delete them to prevent accidental use. + del given_arguments + + new_signature = new_given_signature(original_sig, given_kwargs) + + # Use type information to convert "infer" arguments into appropriate strategies. + if ... in given_kwargs.values(): + hints = get_type_hints(test) + for name in [name for name, value in given_kwargs.items() if value is ...]: + if name not in hints: + return _invalid( + f"passed {name}=... for {test.__name__}, but {name} has " + "no type annotation", + test=test, + given_kwargs=given_kwargs, + ) + given_kwargs[name] = st.from_type(hints[name]) + + # only raise if the same thread uses two different executors, not if two + # different threads use different executors. + thread_local = ThreadLocal(prev_self=lambda: not_set) + # maps thread_id to whether that thread overlaps in execution with any + # other thread in this @given. We use this to detect whether an @given is + # being run from multiple different threads at once, which informs + # decisions like whether to raise DeadlineExceeded or HealthCheck.too_slow. + thread_overlap: dict[int, bool] = {} + thread_overlap_lock = Lock() + + @impersonate(test) + @define_function_signature(test.__name__, test.__doc__, new_signature) + def wrapped_test(*arguments, **kwargs): + # Tell pytest to omit the body of this function from tracebacks + __tracebackhide__ = True + with thread_overlap_lock: + for overlap_thread_id in thread_overlap: + thread_overlap[overlap_thread_id] = True + + threadid = threading.get_ident() + # if there are existing threads when this thread starts, then + # this thread starts at an overlapped state. + has_existing_threads = len(thread_overlap) > 0 + thread_overlap[threadid] = has_existing_threads + + try: + test = wrapped_test.hypothesis.inner_test + if getattr(test, "is_hypothesis_test", False): + raise InvalidArgument( + f"You have applied @given to the test {test.__name__} more than " + "once, which wraps the test several times and is extremely slow. " + "A similar effect can be gained by combining the arguments " + "of the two calls to given. For example, instead of " + "@given(booleans()) @given(integers()), you could write " + "@given(booleans(), integers())" + ) + + settings = wrapped_test._hypothesis_internal_use_settings + random = get_random_for_wrapped_test(test, wrapped_test) + arguments, kwargs, stuff = process_arguments_to_given( + wrapped_test, + arguments, + kwargs, + given_kwargs, + new_signature.parameters, + ) + + if ( + inspect.iscoroutinefunction(test) + and get_executor(stuff.selfy) is default_executor + ): + # See https://github.com/HypothesisWorks/hypothesis/issues/3054 + # If our custom executor doesn't handle coroutines, or we return an + # awaitable from a non-async-def function, we just rely on the + # return_value health check. This catches most user errors though. + raise InvalidArgument( + "Hypothesis doesn't know how to run async test functions like " + f"{test.__name__}. You'll need to write a custom executor, " + "or use a library like pytest-asyncio or pytest-trio which can " + "handle the translation for you.\n See https://hypothesis." + "readthedocs.io/en/latest/details.html#custom-function-execution" + ) + + runner = stuff.selfy + if isinstance(stuff.selfy, TestCase) and test.__name__ in dir(TestCase): + fail_health_check( + settings, + f"You have applied @given to the method {test.__name__}, which is " + "used by the unittest runner but is not itself a test. " + "This is not useful in any way.", + HealthCheck.not_a_test_method, + ) + if bad_django_TestCase(runner): # pragma: no cover + # Covered by the Django tests, but not the pytest coverage task + raise InvalidArgument( + "You have applied @given to a method on " + f"{type(runner).__qualname__}, but this " + "class does not inherit from the supported versions in " + "`hypothesis.extra.django`. Use the Hypothesis variants " + "to ensure that each example is run in a separate " + "database transaction." + ) + + nonlocal thread_local + # Check selfy really is self (not e.g. a mock) before we health-check + cur_self = ( + stuff.selfy + if getattr(type(stuff.selfy), test.__name__, None) is wrapped_test + else None + ) + if thread_local.prev_self is not_set: + thread_local.prev_self = cur_self + elif cur_self is not thread_local.prev_self: + fail_health_check( + settings, + f"The method {test.__qualname__} was called from multiple " + "different executors. This may lead to flaky tests and " + "nonreproducible errors when replaying from database." + "\n\n" + "Unlike most health checks, HealthCheck.differing_executors " + "warns about a correctness issue with your test. We " + "therefore recommend fixing the underlying issue, rather " + "than suppressing this health check. However, if you are " + "confident this health check can be safely disabled, you can " + "do so with " + "@settings(suppress_health_check=[HealthCheck.differing_executors]). " + "See " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#hypothesis.HealthCheck " + "for details.", + HealthCheck.differing_executors, + ) + + state = StateForActualGivenExecution( + stuff, + test, + settings, + random, + wrapped_test, + thread_overlap=thread_overlap, + ) + + # If there was a @reproduce_failure decorator, use it to reproduce + # the error (or complain that we couldn't). Either way, this will + # always raise some kind of error. + if ( + reproduce_failure := wrapped_test._hypothesis_internal_use_reproduce_failure + ) is not None: + expected_version, failure = reproduce_failure + if expected_version != __version__: + raise InvalidArgument( + "Attempting to reproduce a failure from a different " + f"version of Hypothesis. This failure is from {expected_version}, but " + f"you are currently running {__version__!r}. Please change your " + "Hypothesis version to a matching one." + ) + try: + state.execute_once( + ConjectureData.for_choices(decode_failure(failure)), + print_example=True, + is_final=True, + ) + raise DidNotReproduce( + "Expected the test to raise an error, but it " + "completed successfully." + ) + except StopTest: + raise DidNotReproduce( + "The shape of the test data has changed in some way " + "from where this blob was defined. Are you sure " + "you're running the same test?" + ) from None + except UnsatisfiedAssumption: + raise DidNotReproduce( + "The test data failed to satisfy an assumption in the " + "test. Have you added it since this blob was generated?" + ) from None + + # There was no @reproduce_failure, so start by running any explicit + # examples from @example decorators. + if errors := list( + execute_explicit_examples( + state, wrapped_test, arguments, kwargs, original_sig + ) + ): + # If we're not going to report multiple bugs, we would have + # stopped running explicit examples at the first failure. + assert len(errors) == 1 or state.settings.report_multiple_bugs + + # If an explicit example raised a 'skip' exception, ensure it's never + # wrapped up in an exception group. Because we break out of the loop + # immediately on finding a skip, if present it's always the last error. + if isinstance(errors[-1].exception, skip_exceptions_to_reraise()): + # Covered by `test_issue_3453_regression`, just in a subprocess. + del errors[:-1] # pragma: no cover + + if state.settings.verbosity < Verbosity.verbose: + # keep only one error per interesting origin, unless + # verbosity is high + errors = _simplify_explicit_errors(errors) + + _raise_to_user(errors, state.settings, [], " in explicit examples") + + # If there were any explicit examples, they all ran successfully. + # The next step is to use the Conjecture engine to run the test on + # many different inputs. + ran_explicit_examples = ( + Phase.explicit in state.settings.phases + and getattr(wrapped_test, "hypothesis_explicit_examples", ()) + ) + SKIP_BECAUSE_NO_EXAMPLES = unittest.SkipTest( + "Hypothesis has been told to run no examples for this test." + ) + if not ( + Phase.reuse in settings.phases or Phase.generate in settings.phases + ): + if not ran_explicit_examples: + raise SKIP_BECAUSE_NO_EXAMPLES + return + + try: + if isinstance(runner, TestCase) and hasattr(runner, "subTest"): + subTest = runner.subTest + try: + runner.subTest = types.MethodType(fake_subTest, runner) + state.run_engine() + finally: + runner.subTest = subTest + else: + state.run_engine() + except BaseException as e: + # The exception caught here should either be an actual test + # failure (or BaseExceptionGroup), or some kind of fatal error + # that caused the engine to stop. + generated_seed = ( + wrapped_test._hypothesis_internal_use_generated_seed + ) + with local_settings(settings): + if not (state.failed_normally or generated_seed is None): + if running_under_pytest: + report( + f"You can add @seed({generated_seed}) to this test or " + f"run pytest with --hypothesis-seed={generated_seed} " + "to reproduce this failure." + ) + else: + report( + f"You can add @seed({generated_seed}) to this test to " + "reproduce this failure." + ) + # The dance here is to avoid showing users long tracebacks + # full of Hypothesis internals they don't care about. + # We have to do this inline, to avoid adding another + # internal stack frame just when we've removed the rest. + # + # Using a variable for our trimmed error ensures that the line + # which will actually appear in tracebacks is as clear as + # possible - "raise the_error_hypothesis_found". + the_error_hypothesis_found = e.with_traceback( + None + if isinstance(e, BaseExceptionGroup) + else get_trimmed_traceback() + ) + raise the_error_hypothesis_found + + if not (ran_explicit_examples or state.ever_executed): + raise SKIP_BECAUSE_NO_EXAMPLES + finally: + with thread_overlap_lock: + del thread_overlap[threadid] + + def _get_fuzz_target() -> ( + Callable[[bytes | bytearray | memoryview | BinaryIO], bytes | None] + ): + # Because fuzzing interfaces are very performance-sensitive, we use a + # somewhat more complicated structure here. `_get_fuzz_target()` is + # called by the `HypothesisHandle.fuzz_one_input` property, allowing + # us to defer our collection of the settings, random instance, and + # reassignable `inner_test` (etc) until `fuzz_one_input` is accessed. + # + # We then share the performance cost of setting up `state` between + # many invocations of the target. We explicitly force `deadline=None` + # for performance reasons, saving ~40% the runtime of an empty test. + test = wrapped_test.hypothesis.inner_test + settings = Settings( + parent=wrapped_test._hypothesis_internal_use_settings, deadline=None + ) + random = get_random_for_wrapped_test(test, wrapped_test) + _args, _kwargs, stuff = process_arguments_to_given( + wrapped_test, (), {}, given_kwargs, new_signature.parameters + ) + assert not _args + assert not _kwargs + state = StateForActualGivenExecution( + stuff, + test, + settings, + random, + wrapped_test, + thread_overlap=thread_overlap, + ) + database_key = function_digest(test) + b".secondary" + # We track the minimal-so-far example for each distinct origin, so + # that we track log-n instead of n examples for long runs. In particular + # it means that we saturate for common errors in long runs instead of + # storing huge volumes of low-value data. + minimal_failures: dict = {} + + def fuzz_one_input( + buffer: bytes | bytearray | memoryview | BinaryIO, + ) -> bytes | None: + # This inner part is all that the fuzzer will actually run, + # so we keep it as small and as fast as possible. + if isinstance(buffer, io.IOBase): + buffer = buffer.read(BUFFER_SIZE) + assert isinstance(buffer, (bytes, bytearray, memoryview)) + data = ConjectureData( + random=None, + provider=BytestringProvider, + provider_kw={"bytestring": buffer}, + ) + try: + state.execute_once(data) + status = Status.VALID + except StopTest: + status = data.status + return None + except UnsatisfiedAssumption: + status = Status.INVALID + return None + except BaseException: + known = minimal_failures.get(data.interesting_origin) + if settings.database is not None and ( + known is None or sort_key(data.nodes) <= sort_key(known) + ): + settings.database.save( + database_key, choices_to_bytes(data.choices) + ) + minimal_failures[data.interesting_origin] = data.nodes + status = Status.INTERESTING + raise + finally: + if observability_enabled(): + data.freeze() + tc = make_testcase( + run_start=state._start_timestamp, + property=state.test_identifier, + data=data, + how_generated="fuzz_one_input", + representation=state._string_repr, + arguments=data._observability_args, + timing=state._timing_features, + coverage=None, + status=status, + backend_metadata=data.provider.observe_test_case(), + ) + deliver_observation(tc) + state._timing_features = {} + + assert isinstance(data.provider, BytestringProvider) + return bytes(data.provider.drawn) + + fuzz_one_input.__doc__ = HypothesisHandle.fuzz_one_input.__doc__ + return fuzz_one_input + + # After having created the decorated test function, we need to copy + # over some attributes to make the switch as seamless as possible. + + for attrib in dir(test): + if not (attrib.startswith("_") or hasattr(wrapped_test, attrib)): + setattr(wrapped_test, attrib, getattr(test, attrib)) + wrapped_test.is_hypothesis_test = True + if hasattr(test, "_hypothesis_internal_settings_applied"): + # Used to check if @settings is applied twice. + wrapped_test._hypothesis_internal_settings_applied = True + wrapped_test._hypothesis_internal_use_seed = getattr( + test, "_hypothesis_internal_use_seed", None + ) + wrapped_test._hypothesis_internal_use_settings = ( + getattr(test, "_hypothesis_internal_use_settings", None) or Settings.default + ) + wrapped_test._hypothesis_internal_use_reproduce_failure = getattr( + test, "_hypothesis_internal_use_reproduce_failure", None + ) + wrapped_test.hypothesis = HypothesisHandle(test, _get_fuzz_target, given_kwargs) + return wrapped_test + + return run_test_as_given + + +def find( + specifier: SearchStrategy[Ex], + condition: Callable[[Any], bool], + *, + settings: Settings | None = None, + random: Random | None = None, + database_key: bytes | None = None, +) -> Ex: + """Returns the minimal example from the given strategy ``specifier`` that + matches the predicate function ``condition``.""" + if settings is None: + settings = Settings(max_examples=2000) + settings = Settings( + settings, suppress_health_check=list(HealthCheck), report_multiple_bugs=False + ) + + if database_key is None and settings.database is not None: + # Note: The database key is not guaranteed to be unique. If not, replaying + # of database examples may fail to reproduce due to being replayed on the + # wrong condition. + database_key = function_digest(condition) + + if not isinstance(specifier, SearchStrategy): + raise InvalidArgument( + f"Expected SearchStrategy but got {specifier!r} of " + f"type {type(specifier).__name__}" + ) + specifier.validate() + + last: list[Ex] = [] + + @settings + @given(specifier) + def test(v): + if condition(v): + last[:] = [v] + raise Found + + if random is not None: + test = seed(random.getrandbits(64))(test) + + test._hypothesis_internal_database_key = database_key # type: ignore + + try: + test() + except Found: + return last[0] + + raise NoSuchExample(get_pretty_function_description(condition)) diff --git a/vendored/hypothesis/database.py b/vendored/hypothesis/database.py new file mode 100644 index 0000000..9106a2d --- /dev/null +++ b/vendored/hypothesis/database.py @@ -0,0 +1,1287 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import abc +import errno +import json +import os +import struct +import sys +import tempfile +import warnings +import weakref +from collections.abc import Callable, Iterable +from datetime import datetime, timedelta, timezone +from functools import lru_cache +from hashlib import sha384 +from os import PathLike, getenv +from pathlib import Path, PurePath +from queue import Queue +from threading import Thread +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Literal, + TypeAlias, + cast, +) +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen +from zipfile import BadZipFile, ZipFile + +from hypothesis._settings import note_deprecation +from hypothesis.configuration import storage_directory +from hypothesis.errors import HypothesisException, HypothesisWarning +from hypothesis.internal.conjecture.choice import ChoiceT +from hypothesis.utils.conventions import UniqueIdentifier, not_set + +__all__ = [ + "DirectoryBasedExampleDatabase", + "ExampleDatabase", + "GitHubArtifactDatabase", + "InMemoryExampleDatabase", + "MultiplexedDatabase", + "ReadOnlyDatabase", +] + +if TYPE_CHECKING: + from watchdog.observers.api import BaseObserver + +StrPathT: TypeAlias = str | PathLike[str] +SaveDataT: TypeAlias = tuple[bytes, bytes] # key, value +DeleteDataT: TypeAlias = tuple[bytes, bytes | None] # key, value +ListenerEventT: TypeAlias = ( + tuple[Literal["save"], SaveDataT] | tuple[Literal["delete"], DeleteDataT] +) +ListenerT: TypeAlias = Callable[[ListenerEventT], Any] + + +def _usable_dir(path: StrPathT) -> bool: + """ + Returns True if the desired path can be used as database path because + either the directory exists and can be used, or its root directory can + be used and we can make the directory as needed. + """ + path = Path(path) + try: + while not path.exists(): + # Loop terminates because the root dir ('/' on unix) always exists. + path = path.parent + return path.is_dir() and os.access(path, os.R_OK | os.W_OK | os.X_OK) + except PermissionError: + return False + + +def _db_for_path( + path: StrPathT | UniqueIdentifier | Literal[":memory:"] | None = None, +) -> "ExampleDatabase": + if path is not_set: + if os.getenv("HYPOTHESIS_DATABASE_FILE") is not None: # pragma: no cover + raise HypothesisException( + "The $HYPOTHESIS_DATABASE_FILE environment variable no longer has any " + "effect. Configure your database location via a settings profile instead.\n" + "https://hypothesis.readthedocs.io/en/latest/settings.html#settings-profiles" + ) + + path = storage_directory("examples", intent_to_write=False) + if not _usable_dir(path): # pragma: no cover + warnings.warn( + "The database setting is not configured, and the default " + "location is unusable - falling back to an in-memory " + f"database for this session. {path=}", + HypothesisWarning, + stacklevel=3, + ) + return InMemoryExampleDatabase() + if path in (None, ":memory:"): + return InMemoryExampleDatabase() + path = cast(StrPathT, path) + return DirectoryBasedExampleDatabase(path) + + +class _EDMeta(abc.ABCMeta): + def __call__(self, *args: Any, **kwargs: Any) -> "ExampleDatabase": + if self is ExampleDatabase: + note_deprecation( + "Creating a database using the abstract ExampleDatabase() class " + "is deprecated. Prefer using a concrete subclass, like " + "InMemoryExampleDatabase() or DirectoryBasedExampleDatabase(path). " + 'In particular, the special string ExampleDatabase(":memory:") ' + "should be replaced by InMemoryExampleDatabase().", + since="2025-04-07", + has_codemod=False, + ) + return _db_for_path(*args, **kwargs) + return super().__call__(*args, **kwargs) + + +# This __call__ method is picked up by Sphinx as the signature of all ExampleDatabase +# subclasses, which is accurate, reasonable, and unhelpful. Fortunately Sphinx +# maintains a list of metaclass-call-methods to ignore, and while they would prefer +# not to maintain it upstream (https://github.com/sphinx-doc/sphinx/pull/8262) we +# can insert ourselves here. +# +# This code only runs if Sphinx has already been imported; and it would live in our +# docs/conf.py except that we would also like it to work for anyone documenting +# downstream ExampleDatabase subclasses too. +if "sphinx" in sys.modules: + try: + import sphinx.ext.autodoc + + signature = "hypothesis.database._EDMeta.__call__" + # _METACLASS_CALL_BLACKLIST is a frozenset in later sphinx versions + if isinstance(sphinx.ext.autodoc._METACLASS_CALL_BLACKLIST, frozenset): + sphinx.ext.autodoc._METACLASS_CALL_BLACKLIST = ( + sphinx.ext.autodoc._METACLASS_CALL_BLACKLIST | {signature} + ) + else: + sphinx.ext.autodoc._METACLASS_CALL_BLACKLIST.append(signature) + except Exception: + pass + + +class ExampleDatabase(metaclass=_EDMeta): + """ + A Hypothesis database, for use in |settings.database|. + + Hypothesis automatically saves failures to the database set in + |settings.database|. The next time the test is run, Hypothesis will replay + any failures from the database in |settings.database| for that test (in + |Phase.reuse|). + + The database is best thought of as a cache that you never need to invalidate. + Entries may be transparently dropped when upgrading your Hypothesis version + or changing your test. Do not rely on the database for correctness; to ensure + Hypothesis always tries an input, use |@example|. + + A Hypothesis database is a simple mapping of bytes to sets of bytes. Hypothesis + provides several concrete database subclasses. To write your own database class, + see :doc:`/how-to/custom-database`. + + Change listening + ---------------- + + An optional extension to |ExampleDatabase| is change listening. On databases + which support change listening, calling |ExampleDatabase.add_listener| adds + a function as a change listener, which will be called whenever a value is + added, deleted, or moved inside the database. See |ExampleDatabase.add_listener| + for details. + + All databases in Hypothesis support change listening. Custom database classes + are not required to support change listening, though they will not be compatible + with features that require change listening until they do so. + + .. note:: + + While no Hypothesis features currently require change listening, change + listening is required by `HypoFuzz `_. + + Database methods + ---------------- + + Required methods: + + * |ExampleDatabase.save| + * |ExampleDatabase.fetch| + * |ExampleDatabase.delete| + + Optional methods: + + * |ExampleDatabase.move| + + Change listening methods: + + * |ExampleDatabase.add_listener| + * |ExampleDatabase.remove_listener| + * |ExampleDatabase.clear_listeners| + * |ExampleDatabase._start_listening| + * |ExampleDatabase._stop_listening| + * |ExampleDatabase._broadcast_change| + """ + + def __init__(self) -> None: + self._listeners: list[ListenerT] = [] + + @abc.abstractmethod + def save(self, key: bytes, value: bytes) -> None: + """Save ``value`` under ``key``. + + If ``value`` is already present in ``key``, silently do nothing. + """ + raise NotImplementedError(f"{type(self).__name__}.save") + + @abc.abstractmethod + def fetch(self, key: bytes) -> Iterable[bytes]: + """Return an iterable over all values matching this key.""" + raise NotImplementedError(f"{type(self).__name__}.fetch") + + @abc.abstractmethod + def delete(self, key: bytes, value: bytes) -> None: + """Remove ``value`` from ``key``. + + If ``value`` is not present in ``key``, silently do nothing. + """ + raise NotImplementedError(f"{type(self).__name__}.delete") + + def move(self, src: bytes, dest: bytes, value: bytes) -> None: + """ + Move ``value`` from key ``src`` to key ``dest``. + + Equivalent to ``delete(src, value)`` followed by ``save(src, value)``, + but may have a more efficient implementation. + + Note that ``value`` will be inserted at ``dest`` regardless of whether + it is currently present at ``src``. + """ + if src == dest: + self.save(src, value) + return + self.delete(src, value) + self.save(dest, value) + + def add_listener(self, f: ListenerT, /) -> None: + """ + Add a change listener. ``f`` will be called whenever a value is saved, + deleted, or moved in the database. + + ``f`` can be called with two different event values: + + * ``("save", (key, value))`` + * ``("delete", (key, value))`` + + where ``key`` and ``value`` are both ``bytes``. + + There is no ``move`` event. Instead, a move is broadcasted as a + ``delete`` event followed by a ``save`` event. + + For the ``delete`` event, ``value`` may be ``None``. This might occur if + the database knows that a deletion has occurred in ``key``, but does not + know what value was deleted. + """ + had_listeners = bool(self._listeners) + self._listeners.append(f) + if not had_listeners: + self._start_listening() + + def remove_listener(self, f: ListenerT, /) -> None: + """ + Removes ``f`` from the list of change listeners. + + If ``f`` is not in the list of change listeners, silently do nothing. + """ + if f not in self._listeners: + return + self._listeners.remove(f) + if not self._listeners: + self._stop_listening() + + def clear_listeners(self) -> None: + """Remove all change listeners.""" + had_listeners = bool(self._listeners) + self._listeners.clear() + if had_listeners: + self._stop_listening() + + def _broadcast_change(self, event: ListenerEventT) -> None: + """ + Called when a value has been either added to or deleted from a key in + the underlying database store. The possible values for ``event`` are: + + * ``("save", (key, value))`` + * ``("delete", (key, value))`` + + ``value`` may be ``None`` for the ``delete`` event, indicating we know + that some value was deleted under this key, but not its exact value. + + Note that you should not assume your instance is the only reference to + the underlying database store. For example, if two instances of + |DirectoryBasedExampleDatabase| reference the same directory, + _broadcast_change should be called whenever a file is added or removed + from the directory, even if that database was not responsible for + changing the file. + """ + for listener in self._listeners: + listener(event) + + def _start_listening(self) -> None: + """ + Called when the database adds a change listener, and did not previously + have any change listeners. Intended to allow databases to wait to start + expensive listening operations until necessary. + + ``_start_listening`` and ``_stop_listening`` are guaranteed to alternate, + so you do not need to handle the case of multiple consecutive + ``_start_listening`` calls without an intermediate ``_stop_listening`` + call. + """ + warnings.warn( + f"{self.__class__} does not support listening for changes", + HypothesisWarning, + stacklevel=4, + ) + + def _stop_listening(self) -> None: + """ + Called whenever no change listeners remain on the database. + + ``_stop_listening`` and ``_start_listening`` are guaranteed to alternate, + so you do not need to handle the case of multiple consecutive + ``_stop_listening`` calls without an intermediate ``_start_listening`` + call. + """ + warnings.warn( + f"{self.__class__} does not support stopping listening for changes", + HypothesisWarning, + stacklevel=4, + ) + + +class InMemoryExampleDatabase(ExampleDatabase): + """A non-persistent example database, implemented in terms of an in-memory + dictionary. + + This can be useful if you call a test function several times in a single + session, or for testing other database implementations, but because it + does not persist between runs we do not recommend it for general use. + """ + + def __init__(self) -> None: + super().__init__() + self.data: dict[bytes, set[bytes]] = {} + + def __repr__(self) -> str: + return f"InMemoryExampleDatabase({self.data!r})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, InMemoryExampleDatabase) and self.data is other.data + + def fetch(self, key: bytes) -> Iterable[bytes]: + yield from self.data.get(key, ()) + + def save(self, key: bytes, value: bytes) -> None: + value = bytes(value) + values = self.data.setdefault(key, set()) + changed = value not in values + values.add(value) + + if changed: + self._broadcast_change(("save", (key, value))) + + def delete(self, key: bytes, value: bytes) -> None: + value = bytes(value) + values = self.data.get(key, set()) + changed = value in values + values.discard(value) + + if changed: + self._broadcast_change(("delete", (key, value))) + + def _start_listening(self) -> None: + # declare compatibility with the listener api, but do the actual + # implementation in .delete and .save, since we know we are the only + # writer to .data. + pass + + def _stop_listening(self) -> None: + pass + + +def _hash(key: bytes) -> str: + return sha384(key).hexdigest()[:16] + + +class DirectoryBasedExampleDatabase(ExampleDatabase): + """Use a directory to store Hypothesis examples as files. + + Each test corresponds to a directory, and each example to a file within that + directory. While the contents are fairly opaque, a + |DirectoryBasedExampleDatabase| can be shared by checking the directory + into version control, for example with the following ``.gitignore``:: + + # Ignore files cached by Hypothesis... + .hypothesis/* + # except for the examples directory + !.hypothesis/examples/ + + Note however that this only makes sense if you also pin to an exact version of + Hypothesis, and we would usually recommend implementing a shared database with + a network datastore - see |ExampleDatabase|, and the |MultiplexedDatabase| helper. + """ + + # we keep a database entry of the full values of all the database keys. + # currently only used for inverse mapping of hash -> key in change listening. + _metakeys_name: ClassVar[bytes] = b".hypothesis-keys" + _metakeys_hash: ClassVar[str] = _hash(_metakeys_name) + + def __init__(self, path: StrPathT) -> None: + super().__init__() + self.path = Path(path) + self.keypaths: dict[bytes, Path] = {} + self._observer: BaseObserver | None = None + + def __repr__(self) -> str: + return f"DirectoryBasedExampleDatabase({self.path!r})" + + def __eq__(self, other: object) -> bool: + return ( + isinstance(other, DirectoryBasedExampleDatabase) and self.path == other.path + ) + + def _key_path(self, key: bytes) -> Path: + try: + return self.keypaths[key] + except KeyError: + pass + self.keypaths[key] = self.path / _hash(key) + return self.keypaths[key] + + def _value_path(self, key: bytes, value: bytes) -> Path: + return self._key_path(key) / _hash(value) + + def fetch(self, key: bytes) -> Iterable[bytes]: + kp = self._key_path(key) + if not kp.is_dir(): + return + + try: + for path in os.listdir(kp): + try: + yield (kp / path).read_bytes() + except OSError: + pass + except OSError: # pragma: no cover + # the `kp` directory might have been deleted in the meantime + pass + + def save(self, key: bytes, value: bytes) -> None: + key_path = self._key_path(key) + if key_path.name != self._metakeys_hash: + # add this key to our meta entry of all keys - taking care to avoid + # infinite recursion. + self.save(self._metakeys_name, key) + + # Note: we attempt to create the dir in question now. We + # already checked for permissions, but there can still be other issues, + # e.g. the disk is full, or permissions might have been changed. + try: + key_path.mkdir(exist_ok=True, parents=True) + path = self._value_path(key, value) + if not path.exists(): + # to mimic an atomic write, create and write in a temporary + # directory, and only move to the final path after. This avoids + # any intermediate state where the file is created (and empty) + # but not yet written to. + fd, tmpname = tempfile.mkstemp() + tmppath = Path(tmpname) + os.write(fd, value) + os.close(fd) + try: + tmppath.rename(path) + except OSError as err: # pragma: no cover + if err.errno == errno.EXDEV: + # Can't rename across filesystem boundaries, see e.g. + # https://github.com/HypothesisWorks/hypothesis/issues/4335 + try: + path.write_bytes(tmppath.read_bytes()) + except OSError: + pass + tmppath.unlink() + assert not tmppath.exists() + except OSError: # pragma: no cover + pass + + def move(self, src: bytes, dest: bytes, value: bytes) -> None: + if src == dest: + self.save(src, value) + return + + src_path = self._value_path(src, value) + dest_path = self._value_path(dest, value) + # if the dest key path does not exist, os.renames will create it for us, + # and we will never track its creation in the meta keys entry. Do so now. + if not self._key_path(dest).exists(): + self.save(self._metakeys_name, dest) + + try: + os.renames(src_path, dest_path) + except OSError: + self.delete(src, value) + self.save(dest, value) + + def delete(self, key: bytes, value: bytes) -> None: + try: + self._value_path(key, value).unlink() + except OSError: + return + + # try deleting the key dir, which will only succeed if the dir is empty + # (i.e. ``value`` was the last value in this key). + try: + self._key_path(key).rmdir() + except OSError: + pass + else: + # if the deletion succeeded, also delete this key entry from metakeys. + # (if this key happens to be the metakey itself, this deletion will + # fail; that's ok and faster than checking for this rare case.) + self.delete(self._metakeys_name, key) + + def _start_listening(self) -> None: + try: + from watchdog.events import ( + DirCreatedEvent, + DirDeletedEvent, + DirMovedEvent, + FileCreatedEvent, + FileDeletedEvent, + FileMovedEvent, + FileSystemEventHandler, + ) + from watchdog.observers import Observer + except ImportError: + warnings.warn( + f"listening for changes in a {self.__class__.__name__} " + "requires the watchdog library. To install, run " + "`pip install hypothesis[watchdog]`", + HypothesisWarning, + stacklevel=4, + ) + return + + hash_to_key = {_hash(key): key for key in self.fetch(self._metakeys_name)} + _metakeys_hash = self._metakeys_hash + _broadcast_change = self._broadcast_change + + class Handler(FileSystemEventHandler): + def on_created(_self, event: FileCreatedEvent | DirCreatedEvent) -> None: + # we only registered for the file creation event + assert not isinstance(event, DirCreatedEvent) + # watchdog events are only bytes if we passed a byte path to + # .schedule + assert isinstance(event.src_path, str) + + value_path = Path(event.src_path) + # the parent dir represents the key, and its name is the key hash + key_hash = value_path.parent.name + + if key_hash == _metakeys_hash: + try: + hash_to_key[value_path.name] = value_path.read_bytes() + except OSError: # pragma: no cover + # this might occur if all the values in a key have been + # deleted and DirectoryBasedExampleDatabase removes its + # metakeys entry (which is `value_path` here`). + pass + return + + key = hash_to_key.get(key_hash) + if key is None: # pragma: no cover + # we didn't recognize this key. This shouldn't ever happen, + # but some race condition trickery might cause this. + return + + try: + value = value_path.read_bytes() + except OSError: # pragma: no cover + return + + _broadcast_change(("save", (key, value))) + + def on_deleted(self, event: FileDeletedEvent | DirDeletedEvent) -> None: + assert not isinstance(event, DirDeletedEvent) + assert isinstance(event.src_path, str) + + value_path = Path(event.src_path) + key = hash_to_key.get(value_path.parent.name) + if key is None: # pragma: no cover + return + + _broadcast_change(("delete", (key, None))) + + def on_moved(self, event: FileMovedEvent | DirMovedEvent) -> None: + assert not isinstance(event, DirMovedEvent) + assert isinstance(event.src_path, str) + assert isinstance(event.dest_path, str) + + src_path = Path(event.src_path) + dest_path = Path(event.dest_path) + k1 = hash_to_key.get(src_path.parent.name) + k2 = hash_to_key.get(dest_path.parent.name) + + if k1 is None or k2 is None: # pragma: no cover + return + + try: + value = dest_path.read_bytes() + except OSError: # pragma: no cover + return + + _broadcast_change(("delete", (k1, value))) + _broadcast_change(("save", (k2, value))) + + # If we add a listener to a DirectoryBasedExampleDatabase whose database + # directory doesn't yet exist, the watchdog observer will not fire any + # events, even after the directory gets created. + # + # Ensure the directory exists before starting the observer. + self.path.mkdir(exist_ok=True, parents=True) + self._observer = Observer() + self._observer.schedule( + Handler(), + # remove type: ignore when released + # https://github.com/gorakhargosh/watchdog/pull/1096 + self.path, # type: ignore + recursive=True, + event_filter=[FileCreatedEvent, FileDeletedEvent, FileMovedEvent], + ) + self._observer.start() + + def _stop_listening(self) -> None: + assert self._observer is not None + self._observer.stop() + self._observer.join() + self._observer = None + + +class ReadOnlyDatabase(ExampleDatabase): + """A wrapper to make the given database read-only. + + The implementation passes through ``fetch``, and turns ``save``, ``delete``, and + ``move`` into silent no-ops. + + Note that this disables Hypothesis' automatic discarding of stale examples. + It is designed to allow local machines to access a shared database (e.g. from CI + servers), without propagating changes back from a local or in-development branch. + """ + + def __init__(self, db: ExampleDatabase) -> None: + super().__init__() + assert isinstance(db, ExampleDatabase) + self._wrapped = db + + def __repr__(self) -> str: + return f"ReadOnlyDatabase({self._wrapped!r})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, ReadOnlyDatabase) and self._wrapped == other._wrapped + + def fetch(self, key: bytes) -> Iterable[bytes]: + yield from self._wrapped.fetch(key) + + def save(self, key: bytes, value: bytes) -> None: + pass + + def delete(self, key: bytes, value: bytes) -> None: + pass + + def _start_listening(self) -> None: + # we're read only, so there are no changes to broadcast. + pass + + def _stop_listening(self) -> None: + pass + + +class MultiplexedDatabase(ExampleDatabase): + """A wrapper around multiple databases. + + Each ``save``, ``fetch``, ``move``, or ``delete`` operation will be run against + all of the wrapped databases. ``fetch`` does not yield duplicate values, even + if the same value is present in two or more of the wrapped databases. + + This combines well with a :class:`ReadOnlyDatabase`, as follows: + + .. code-block:: python + + local = DirectoryBasedExampleDatabase("/tmp/hypothesis/examples/") + shared = CustomNetworkDatabase() + + settings.register_profile("ci", database=shared) + settings.register_profile( + "dev", database=MultiplexedDatabase(local, ReadOnlyDatabase(shared)) + ) + settings.load_profile("ci" if os.environ.get("CI") else "dev") + + So your CI system or fuzzing runs can populate a central shared database; + while local runs on development machines can reproduce any failures from CI + but will only cache their own failures locally and cannot remove examples + from the shared database. + """ + + def __init__(self, *dbs: ExampleDatabase) -> None: + super().__init__() + assert all(isinstance(db, ExampleDatabase) for db in dbs) + self._wrapped = dbs + + def __repr__(self) -> str: + return "MultiplexedDatabase({})".format(", ".join(map(repr, self._wrapped))) + + def __eq__(self, other: object) -> bool: + return ( + isinstance(other, MultiplexedDatabase) and self._wrapped == other._wrapped + ) + + def fetch(self, key: bytes) -> Iterable[bytes]: + seen = set() + for db in self._wrapped: + for value in db.fetch(key): + if value not in seen: + yield value + seen.add(value) + + def save(self, key: bytes, value: bytes) -> None: + for db in self._wrapped: + db.save(key, value) + + def delete(self, key: bytes, value: bytes) -> None: + for db in self._wrapped: + db.delete(key, value) + + def move(self, src: bytes, dest: bytes, value: bytes) -> None: + for db in self._wrapped: + db.move(src, dest, value) + + def _start_listening(self) -> None: + for db in self._wrapped: + db.add_listener(self._broadcast_change) + + def _stop_listening(self) -> None: + for db in self._wrapped: + db.remove_listener(self._broadcast_change) + + +class GitHubArtifactDatabase(ExampleDatabase): + """ + A file-based database loaded from a `GitHub Actions `_ artifact. + + You can use this for sharing example databases between CI runs and developers, allowing + the latter to get read-only access to the former. This is particularly useful for + continuous fuzzing (i.e. with `HypoFuzz `_), + where the CI system can help find new failing examples through fuzzing, + and developers can reproduce them locally without any manual effort. + + .. note:: + You must provide ``GITHUB_TOKEN`` as an environment variable. In CI, Github Actions provides + this automatically, but it needs to be set manually for local usage. In a developer machine, + this would usually be a `Personal Access Token `_. + If the repository is private, it's necessary for the token to have ``repo`` scope + in the case of a classic token, or ``actions:read`` in the case of a fine-grained token. + + + In most cases, this will be used + through the :class:`~hypothesis.database.MultiplexedDatabase`, + by combining a local directory-based database with this one. For example: + + .. code-block:: python + + local = DirectoryBasedExampleDatabase(".hypothesis/examples") + shared = ReadOnlyDatabase(GitHubArtifactDatabase("user", "repo")) + + settings.register_profile("ci", database=local) + settings.register_profile("dev", database=MultiplexedDatabase(local, shared)) + # We don't want to use the shared database in CI, only to populate its local one. + # which the workflow should then upload as an artifact. + settings.load_profile("ci" if os.environ.get("CI") else "dev") + + .. note:: + Because this database is read-only, you always need to wrap it with the + :class:`ReadOnlyDatabase`. + + A setup like this can be paired with a GitHub Actions workflow including + something like the following: + + .. code-block:: yaml + + - name: Download example database + uses: dawidd6/action-download-artifact@v9 + with: + name: hypothesis-example-db + path: .hypothesis/examples + if_no_artifact_found: warn + workflow_conclusion: completed + + - name: Run tests + run: pytest + + - name: Upload example database + uses: actions/upload-artifact@v3 + if: always() + with: + name: hypothesis-example-db + path: .hypothesis/examples + + In this workflow, we use `dawidd6/action-download-artifact `_ + to download the latest artifact given that the official `actions/download-artifact `_ + does not support downloading artifacts from previous workflow runs. + + The database automatically implements a simple file-based cache with a default expiration period + of 1 day. You can adjust this through the ``cache_timeout`` property. + + For mono-repo support, you can provide a unique ``artifact_name`` (e.g. ``hypofuzz-example-db-frontend``). + """ + + def __init__( + self, + owner: str, + repo: str, + artifact_name: str = "hypothesis-example-db", + cache_timeout: timedelta = timedelta(days=1), + path: StrPathT | None = None, + ): + super().__init__() + self.owner = owner + self.repo = repo + self.artifact_name = artifact_name + self.cache_timeout = cache_timeout + + # Get the GitHub token from the environment + # It's unnecessary to use a token if the repo is public + self.token: str | None = getenv("GITHUB_TOKEN") + + if path is None: + self.path: Path = Path( + storage_directory(f"github-artifacts/{self.artifact_name}/") + ) + else: + self.path = Path(path) + + # We don't want to initialize the cache until we need to + self._initialized: bool = False + self._disabled: bool = False + + # This is the path to the artifact in usage + # .hypothesis/github-artifacts//.zip + self._artifact: Path | None = None + # This caches the artifact structure + self._access_cache: dict[PurePath, set[PurePath]] | None = None + + # Message to display if user doesn't wrap around ReadOnlyDatabase + self._read_only_message = ( + "This database is read-only. " + "Please wrap this class with ReadOnlyDatabase" + "i.e. ReadOnlyDatabase(GitHubArtifactDatabase(...))." + ) + + def __repr__(self) -> str: + return ( + f"GitHubArtifactDatabase(owner={self.owner!r}, " + f"repo={self.repo!r}, artifact_name={self.artifact_name!r})" + ) + + def __eq__(self, other: object) -> bool: + return ( + isinstance(other, GitHubArtifactDatabase) + and self.owner == other.owner + and self.repo == other.repo + and self.artifact_name == other.artifact_name + and self.path == other.path + ) + + def _prepare_for_io(self) -> None: + assert self._artifact is not None, "Artifact not loaded." + + if self._initialized: # pragma: no cover + return + + # Test that the artifact is valid + try: + with ZipFile(self._artifact) as f: + if f.testzip(): # pragma: no cover + raise BadZipFile + + # Turns out that testzip() doesn't work quite well + # doing the cache initialization here instead + # will give us more coverage of the artifact. + + # Cache the files inside each keypath + self._access_cache = {} + with ZipFile(self._artifact) as zf: + namelist = zf.namelist() + # Iterate over files in the artifact + for filename in namelist: + fileinfo = zf.getinfo(filename) + if fileinfo.is_dir(): + self._access_cache[PurePath(filename)] = set() + else: + # Get the keypath from the filename + keypath = PurePath(filename).parent + # Add the file to the keypath + self._access_cache[keypath].add(PurePath(filename)) + except BadZipFile: + warnings.warn( + "The downloaded artifact from GitHub is invalid. " + "This could be because the artifact was corrupted, " + "or because the artifact was not created by Hypothesis. ", + HypothesisWarning, + stacklevel=3, + ) + self._disabled = True + + self._initialized = True + + def _initialize_db(self) -> None: + # Trigger warning that we suppressed earlier by intent_to_write=False + storage_directory(self.path.name) + # Create the cache directory if it doesn't exist + self.path.mkdir(exist_ok=True, parents=True) + + # Get all artifacts + cached_artifacts = sorted( + self.path.glob("*.zip"), + key=lambda a: datetime.fromisoformat(a.stem.replace("_", ":")), + ) + + # Remove all but the latest artifact + for artifact in cached_artifacts[:-1]: + artifact.unlink() + + try: + found_artifact = cached_artifacts[-1] + except IndexError: + found_artifact = None + + # Check if the latest artifact is a cache hit + if found_artifact is not None and ( + datetime.now(timezone.utc) + - datetime.fromisoformat(found_artifact.stem.replace("_", ":")) + < self.cache_timeout + ): + self._artifact = found_artifact + else: + # Download the latest artifact from GitHub + new_artifact = self._fetch_artifact() + + if new_artifact: + if found_artifact is not None: + found_artifact.unlink() + self._artifact = new_artifact + elif found_artifact is not None: + warnings.warn( + "Using an expired artifact as a fallback for the database: " + f"{found_artifact}", + HypothesisWarning, + stacklevel=2, + ) + self._artifact = found_artifact + else: + warnings.warn( + "Couldn't acquire a new or existing artifact. Disabling database.", + HypothesisWarning, + stacklevel=2, + ) + self._disabled = True + return + + self._prepare_for_io() + + def _get_bytes(self, url: str) -> bytes | None: # pragma: no cover + request = Request( + url, + headers={ + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28 ", + "Authorization": f"Bearer {self.token}", + }, + ) + warning_message = None + response_bytes: bytes | None = None + try: + with urlopen(request) as response: + response_bytes = response.read() + except HTTPError as e: + if e.code == 401: + warning_message = ( + "Authorization failed when trying to download artifact from GitHub. " + "Check that you have a valid GITHUB_TOKEN set in your environment." + ) + else: + warning_message = ( + "Could not get the latest artifact from GitHub. " + "This could be because because the repository " + "or artifact does not exist. " + ) + # see https://github.com/python/cpython/issues/128734 + e.close() + except URLError: + warning_message = "Could not connect to GitHub to get the latest artifact. " + except TimeoutError: + warning_message = ( + "Could not connect to GitHub to get the latest artifact " + "(connection timed out)." + ) + + if warning_message is not None: + warnings.warn(warning_message, HypothesisWarning, stacklevel=4) + return None + + return response_bytes + + def _fetch_artifact(self) -> Path | None: # pragma: no cover + # Get the list of artifacts from GitHub + url = f"https://api.github.com/repos/{self.owner}/{self.repo}/actions/artifacts" + response_bytes = self._get_bytes(url) + if response_bytes is None: + return None + + artifacts = json.loads(response_bytes)["artifacts"] + artifacts = [a for a in artifacts if a["name"] == self.artifact_name] + + if not artifacts: + return None + + # Get the latest artifact from the list + artifact = max(artifacts, key=lambda a: a["created_at"]) + url = artifact["archive_download_url"] + + # Download the artifact + artifact_bytes = self._get_bytes(url) + if artifact_bytes is None: + return None + + # Save the artifact to the cache + # We replace ":" with "_" to ensure the filenames are compatible + # with Windows filesystems + timestamp = datetime.now(timezone.utc).isoformat().replace(":", "_") + artifact_path = self.path / f"{timestamp}.zip" + try: + artifact_path.write_bytes(artifact_bytes) + except OSError: + warnings.warn( + "Could not save the latest artifact from GitHub. ", + HypothesisWarning, + stacklevel=3, + ) + return None + + return artifact_path + + @staticmethod + @lru_cache + def _key_path(key: bytes) -> PurePath: + return PurePath(_hash(key) + "/") + + def fetch(self, key: bytes) -> Iterable[bytes]: + if self._disabled: + return + + if not self._initialized: + self._initialize_db() + if self._disabled: + return + + assert self._artifact is not None + assert self._access_cache is not None + + kp = self._key_path(key) + + with ZipFile(self._artifact) as zf: + # Get the all files in the the kp from the cache + filenames = self._access_cache.get(kp, ()) + for filename in filenames: + with zf.open(filename.as_posix()) as f: + yield f.read() + + # Read-only interface + def save(self, key: bytes, value: bytes) -> None: + raise RuntimeError(self._read_only_message) + + def move(self, src: bytes, dest: bytes, value: bytes) -> None: + raise RuntimeError(self._read_only_message) + + def delete(self, key: bytes, value: bytes) -> None: + raise RuntimeError(self._read_only_message) + + +class BackgroundWriteDatabase(ExampleDatabase): + """A wrapper which defers writes on the given database to a background thread. + + Calls to :meth:`~hypothesis.database.ExampleDatabase.fetch` wait for any + enqueued writes to finish before fetching from the database. + """ + + def __init__(self, db: ExampleDatabase) -> None: + super().__init__() + self._db = db + self._queue: Queue[tuple[str, tuple[bytes, ...]]] = Queue() + self._thread: Thread | None = None + + def _ensure_thread(self): + if self._thread is None: + self._thread = Thread(target=self._worker, daemon=True) + self._thread.start() + # avoid an unbounded timeout during gc. 0.1 should be plenty for most + # use cases. + weakref.finalize(self, self._join, 0.1) + + def __repr__(self) -> str: + return f"BackgroundWriteDatabase({self._db!r})" + + def __eq__(self, other: object) -> bool: + return isinstance(other, BackgroundWriteDatabase) and self._db == other._db + + def _worker(self) -> None: + while True: + method, args = self._queue.get() + getattr(self._db, method)(*args) + self._queue.task_done() + + def _join(self, timeout: float | None = None) -> None: + # copy of Queue.join with a timeout. https://bugs.python.org/issue9634 + with self._queue.all_tasks_done: + while self._queue.unfinished_tasks: + self._queue.all_tasks_done.wait(timeout) + + def fetch(self, key: bytes) -> Iterable[bytes]: + self._join() + return self._db.fetch(key) + + def save(self, key: bytes, value: bytes) -> None: + self._ensure_thread() + self._queue.put(("save", (key, value))) + + def delete(self, key: bytes, value: bytes) -> None: + self._ensure_thread() + self._queue.put(("delete", (key, value))) + + def move(self, src: bytes, dest: bytes, value: bytes) -> None: + self._ensure_thread() + self._queue.put(("move", (src, dest, value))) + + def _start_listening(self) -> None: + self._db.add_listener(self._broadcast_change) + + def _stop_listening(self) -> None: + self._db.remove_listener(self._broadcast_change) + + +def _pack_uleb128(value: int) -> bytes: + """ + Serialize an integer into variable-length bytes. For each byte, the first 7 + bits represent (part of) the integer, while the last bit indicates whether the + integer continues into the next byte. + + https://en.wikipedia.org/wiki/LEB128 + """ + parts = bytearray() + assert value >= 0 + while True: + # chop off 7 bits + byte = value & ((1 << 7) - 1) + value >>= 7 + # set the continuation bit if we have more left + if value: + byte |= 1 << 7 + + parts.append(byte) + if not value: + break + return bytes(parts) + + +def _unpack_uleb128(buffer: bytes) -> tuple[int, int]: + """ + Inverts _pack_uleb128, and also returns the index at which at which we stopped + reading. + """ + value = 0 + for i, byte in enumerate(buffer): + n = byte & ((1 << 7) - 1) + value |= n << (i * 7) + + if not byte >> 7: + break + return (i + 1, value) + + +def choices_to_bytes(choices: Iterable[ChoiceT], /) -> bytes: + """Serialize a list of choices to a bytestring. Inverts choices_from_bytes.""" + # We use a custom serialization format for this, which might seem crazy - but our + # data is a flat sequence of elements, and standard tools like protobuf or msgpack + # don't deal well with e.g. nonstandard bit-pattern-NaNs, or invalid-utf8 unicode. + # + # We simply encode each element with a metadata byte, if needed a uint16 size, and + # then the payload bytes. For booleans, the payload is inlined into the metadata. + parts = [] + for choice in choices: + if isinstance(choice, bool): + # `000_0000v` - tag zero, low bit payload. + parts.append(b"\1" if choice else b"\0") + continue + + # `tag_ssss [uint16 size?] [payload]` + if isinstance(choice, float): + tag = 1 << 5 + choice = struct.pack("!d", choice) + elif isinstance(choice, int): + tag = 2 << 5 + choice = choice.to_bytes(1 + choice.bit_length() // 8, "big", signed=True) + elif isinstance(choice, bytes): + tag = 3 << 5 + else: + assert isinstance(choice, str) + tag = 4 << 5 + choice = choice.encode(errors="surrogatepass") + + size = len(choice) + if size < 0b11111: + parts.append((tag | size).to_bytes(1, "big")) + else: + parts.append((tag | 0b11111).to_bytes(1, "big")) + parts.append(_pack_uleb128(size)) + parts.append(choice) + + return b"".join(parts) + + +def _choices_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...]: + # See above for an explanation of the format. + parts: list[ChoiceT] = [] + idx = 0 + while idx < len(buffer): + tag = buffer[idx] >> 5 + size = buffer[idx] & 0b11111 + idx += 1 + + if tag == 0: + parts.append(bool(size)) + continue + if size == 0b11111: + (offset, size) = _unpack_uleb128(buffer[idx:]) + idx += offset + chunk = buffer[idx : idx + size] + idx += size + + if tag == 1: + assert size == 8, "expected float64" + parts.extend(struct.unpack("!d", chunk)) + elif tag == 2: + parts.append(int.from_bytes(chunk, "big", signed=True)) + elif tag == 3: + parts.append(chunk) + else: + assert tag == 4 + parts.append(chunk.decode(errors="surrogatepass")) + return tuple(parts) + + +def choices_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...] | None: + """ + Deserialize a bytestring to a tuple of choices. Inverts choices_to_bytes. + + Returns None if the given bytestring is not a valid serialization of choice + sequences. + """ + try: + return _choices_from_bytes(buffer) + except Exception: + # deserialization error, eg because our format changed or someone put junk + # data in the db. + return None diff --git a/vendored/hypothesis/entry_points.py b/vendored/hypothesis/entry_points.py new file mode 100644 index 0000000..4a68af7 --- /dev/null +++ b/vendored/hypothesis/entry_points.py @@ -0,0 +1,39 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""Run all functions registered for the "hypothesis" entry point. + +This can be used with `st.register_type_strategy` to register strategies for your +custom types, running the relevant code when *hypothesis* is imported instead of +your package. +""" + +import importlib.metadata +import os +from collections.abc import Generator, Sequence +from importlib.metadata import EntryPoint + + +def get_entry_points() -> Generator[EntryPoint, None, None]: + try: + eps: Sequence[EntryPoint] = importlib.metadata.entry_points(group="hypothesis") + except TypeError: # pragma: no cover + # Load-time selection requires Python >= 3.10. See also + # https://importlib-metadata.readthedocs.io/en/latest/using.html + eps = importlib.metadata.entry_points().get("hypothesis", []) + yield from eps + + +def run() -> None: + if not os.environ.get("HYPOTHESIS_NO_PLUGINS"): + for entry in get_entry_points(): # pragma: no cover + hook = entry.load() + if callable(hook): + hook() diff --git a/vendored/hypothesis/errors.py b/vendored/hypothesis/errors.py new file mode 100644 index 0000000..aed4468 --- /dev/null +++ b/vendored/hypothesis/errors.py @@ -0,0 +1,317 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from datetime import timedelta +from typing import Any, Literal + +from hypothesis.internal.compat import ExceptionGroup + + +class HypothesisException(Exception): + """Generic parent class for exceptions thrown by Hypothesis.""" + + +class _Trimmable(HypothesisException): + """Hypothesis can trim these tracebacks even if they're raised internally.""" + + +class UnsatisfiedAssumption(HypothesisException): + """An internal error raised by assume. + + If you're seeing this error something has gone wrong. + """ + + def __init__(self, reason: str | None = None) -> None: + self.reason = reason + + +class NoSuchExample(HypothesisException): + """The condition we have been asked to satisfy appears to be always false. + + This does not guarantee that no example exists, only that we were + unable to find one. + """ + + def __init__(self, condition_string: str, extra: str = "") -> None: + super().__init__(f"No examples found of condition {condition_string}{extra}") + + +class Unsatisfiable(_Trimmable): + """We ran out of time or examples before we could find enough examples + which satisfy the assumptions of this hypothesis. + + This could be because the function is too slow. If so, try upping + the timeout. It could also be because the function is using assume + in a way that is too hard to satisfy. If so, try writing a custom + strategy or using a better starting point (e.g if you are requiring + a list has unique values you could instead filter out all duplicate + values from the list) + """ + + +class ChoiceTooLarge(HypothesisException): + """An internal error raised by choice_from_index.""" + + +class Flaky(_Trimmable): + """ + Base class for indeterministic failures. Usually one of the more + specific subclasses (|FlakyFailure| or |FlakyStrategyDefinition|) is raised. + + .. seealso:: + + See also the :doc:`flaky failures tutorial `. + """ + + +class FlakyReplay(Flaky): + """Internal error raised by the conjecture engine if flaky failures are + detected during replay. + + Carries information allowing the runner to reconstruct the flakiness as + a FlakyFailure exception group for final presentation. + """ + + def __init__(self, reason, interesting_origins=None): + super().__init__(reason) + self.reason = reason + self._interesting_origins = interesting_origins + + +class FlakyStrategyDefinition(Flaky): + """ + This function appears to cause inconsistent data generation. + + Common causes for this problem are: + 1. The strategy depends on external state. e.g. it uses an external + random number generator. Try to make a version that passes all the + relevant state in from Hypothesis. + + .. seealso:: + + See also the :doc:`flaky failures tutorial `. + """ + + +class _WrappedBaseException(Exception): + """Used internally for wrapping BaseExceptions as components of FlakyFailure.""" + + +class FlakyFailure(ExceptionGroup, Flaky): + """ + This function appears to fail non-deterministically: We have seen it + fail when passed this example at least once, but a subsequent invocation + did not fail, or caused a distinct error. + + Common causes for this problem are: + 1. The function depends on external state. e.g. it uses an external + random number generator. Try to make a version that passes all the + relevant state in from Hypothesis. + 2. The function is suffering from too much recursion and its failure + depends sensitively on where it's been called from. + 3. The function is timing sensitive and can fail or pass depending on + how long it takes. Try breaking it up into smaller functions which + don't do that and testing those instead. + + .. seealso:: + + See also the :doc:`flaky failures tutorial `. + """ + + def __new__(cls, msg, group): + # The Exception mixin forces this an ExceptionGroup (only accepting + # Exceptions, not BaseException). Usually BaseException is raised + # directly and will hence not be part of a FlakyFailure, but I'm not + # sure this assumption holds everywhere. So wrap any BaseExceptions. + group = list(group) + for i, exc in enumerate(group): + if not isinstance(exc, Exception): + err = _WrappedBaseException() + err.__cause__ = err.__context__ = exc + group[i] = err + return ExceptionGroup.__new__(cls, msg, group) + + # defining `derive` is required for `split` to return an instance of FlakyFailure + # instead of ExceptionGroup. See https://github.com/python/cpython/issues/119287 + # and https://docs.python.org/3/library/exceptions.html#BaseExceptionGroup.derive + def derive(self, excs): + return type(self)(self.message, excs) + + +class FlakyBackendFailure(FlakyFailure): + """ + A failure was reported by an |alternative backend|, + but this failure did not reproduce when replayed under the Hypothesis backend. + + When an alternative backend reports a failure, Hypothesis first replays it + under the standard Hypothesis backend to check for flakiness. If the failure + does not reproduce, Hypothesis raises this exception. + """ + + +class InvalidArgument(_Trimmable, TypeError): + """Used to indicate that the arguments to a Hypothesis function were in + some manner incorrect.""" + + +class ResolutionFailed(InvalidArgument): + """Hypothesis had to resolve a type to a strategy, but this failed. + + Type inference is best-effort, so this only happens when an + annotation exists but could not be resolved for a required argument + to the target of ``builds()``, or where the user passed ``...``. + """ + + +class InvalidState(HypothesisException): + """The system is not in a state where you were allowed to do that.""" + + +class InvalidDefinition(_Trimmable, TypeError): + """Used to indicate that a class definition was not well put together and + has something wrong with it.""" + + +class HypothesisWarning(HypothesisException, Warning): + """A generic warning issued by Hypothesis.""" + + +class FailedHealthCheck(_Trimmable): + """Raised when a test fails a health check. See |HealthCheck|.""" + + +class NonInteractiveExampleWarning(HypothesisWarning): + """SearchStrategy.example() is designed for interactive use, + but should never be used in the body of a test. + """ + + +class HypothesisDeprecationWarning(HypothesisWarning, FutureWarning): + """A deprecation warning issued by Hypothesis. + + Actually inherits from FutureWarning, because DeprecationWarning is + hidden by the default warnings filter. + + You can configure the :mod:`python:warnings` module to handle these + warnings differently to others, either turning them into errors or + suppressing them entirely. Obviously we would prefer the former! + """ + + +class HypothesisSideeffectWarning(HypothesisWarning): + """A warning issued by Hypothesis when it sees actions that are + discouraged at import or initialization time because they are + slow or have user-visible side effects. + """ + + +class Frozen(HypothesisException): + """Raised when a mutation method has been called on a ConjectureData object + after freeze() has been called.""" + + +def __getattr__(name: str) -> Any: + if name == "MultipleFailures": + from hypothesis._settings import note_deprecation + from hypothesis.internal.compat import BaseExceptionGroup + + note_deprecation( + "MultipleFailures is deprecated; use the builtin `BaseExceptionGroup` type " + "instead, or `exceptiongroup.BaseExceptionGroup` before Python 3.11", + since="2022-08-02", + has_codemod=False, # This would be a great PR though! + stacklevel=1, + ) + return BaseExceptionGroup + + raise AttributeError(f"Module 'hypothesis.errors' has no attribute {name}") + + +class DeadlineExceeded(_Trimmable): + """ + Raised when an input takes too long to run, relative to the |settings.deadline| + setting. + """ + + def __init__(self, runtime: timedelta, deadline: timedelta) -> None: + super().__init__( + f"Test took {runtime.total_seconds() * 1000:.2f}ms, which exceeds " + f"the deadline of {deadline.total_seconds() * 1000:.2f}ms. If you " + "expect test cases to take this long, you can use @settings(deadline=...) " + "to either set a higher deadline, or to disable it with deadline=None." + ) + self.runtime = runtime + self.deadline = deadline + + def __reduce__( + self, + ) -> tuple[type["DeadlineExceeded"], tuple[timedelta, timedelta]]: + return (type(self), (self.runtime, self.deadline)) + + +class StopTest(BaseException): + """Raised when a test should stop running and return control to + the Hypothesis engine, which should then continue normally. + """ + + def __init__(self, testcounter: int) -> None: + super().__init__(repr(testcounter)) + self.testcounter = testcounter + + +class DidNotReproduce(HypothesisException): + pass + + +class Found(HypothesisException): + """Signal that the example matches condition. Internal use only.""" + + +class RewindRecursive(Exception): + """Signal that the type inference should be rewound due to recursive types. Internal use only.""" + + def __init__(self, target: object) -> None: + self.target = target + + +class SmallSearchSpaceWarning(HypothesisWarning): + """Indicates that an inferred strategy does not span the search space + in a meaningful way, for example by only creating default instances.""" + + +CannotProceedScopeT = Literal["verified", "exhausted", "discard_test_case", "other"] + + +class BackendCannotProceed(HypothesisException): + """ + Raised by alternative backends when a |PrimitiveProvider| cannot proceed. + This is expected to occur inside one of the ``.draw_*()`` methods, or for + symbolic execution perhaps in |PrimitiveProvider.realize|. + + The optional ``scope`` argument can enable smarter integration: + + verified: + Do not request further test cases from this backend. We *may* + generate more test cases with other backends; if one fails then + Hypothesis will report unsound verification in the backend too. + + exhausted: + Do not request further test cases from this backend; finish testing + with test cases generated with the default backend. Common if e.g. + native code blocks symbolic reasoning very early. + + discard_test_case: + This particular test case could not be converted to concrete values; + skip any further processing and continue with another test case from + this backend. + """ + + def __init__(self, scope: CannotProceedScopeT = "other", /) -> None: + self.scope = scope diff --git a/vendored/hypothesis/extra/__init__.py b/vendored/hypothesis/extra/__init__.py new file mode 100644 index 0000000..fcb1ac6 --- /dev/null +++ b/vendored/hypothesis/extra/__init__.py @@ -0,0 +1,9 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. diff --git a/vendored/hypothesis/extra/_array_helpers.py b/vendored/hypothesis/extra/_array_helpers.py new file mode 100644 index 0000000..c688bc5 --- /dev/null +++ b/vendored/hypothesis/extra/_array_helpers.py @@ -0,0 +1,700 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import re +from types import EllipsisType +from typing import NamedTuple + +from hypothesis import assume, strategies as st +from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture.utils import _calc_p_continue +from hypothesis.internal.coverage import check_function +from hypothesis.internal.validation import check_type, check_valid_interval +from hypothesis.strategies._internal.utils import defines_strategy +from hypothesis.utils.conventions import UniqueIdentifier, not_set + +__all__ = [ + "NDIM_MAX", + "_BIE", + "BasicIndex", + "BasicIndexStrategy", + "BroadcastableShapes", + "MutuallyBroadcastableShapesStrategy", + "Shape", + "_BIENoEllipsis", + "_BIENoEllipsisNoNewaxis", + "_BIENoNewaxis", + "array_shapes", + "broadcastable_shapes", + "check_argument", + "check_valid_dims", + "mutually_broadcastable_shapes", + "order_check", + "valid_tuple_axes", +] + + +Shape = tuple[int, ...] + +# Type aliases for basic array index elements. Variants exist to accurately +# type the return value of basic_indices() based on allow_ellipsis/allow_newaxis. +_BIE = int | slice | None | EllipsisType +_BIENoEllipsis = int | slice | None +_BIENoNewaxis = int | slice | EllipsisType +_BIENoEllipsisNoNewaxis = int | slice + +BasicIndex = _BIE | tuple[_BIE, ...] + + +class BroadcastableShapes(NamedTuple): + input_shapes: tuple[Shape, ...] + result_shape: Shape + + +@check_function +def check_argument(condition, fail_message, *f_args, **f_kwargs): + if not condition: + raise InvalidArgument(fail_message.format(*f_args, **f_kwargs)) + + +@check_function +def order_check(name, floor, min_, max_): + if floor > min_: + raise InvalidArgument(f"min_{name} must be at least {floor} but was {min_}") + if min_ > max_: + raise InvalidArgument(f"min_{name}={min_} is larger than max_{name}={max_}") + + +# 32 is a dimension limit specific to NumPy, and does not necessarily apply to +# other array/tensor libraries. Historically these strategies were built for the +# NumPy extra, so it's nice to keep these limits, and it's seemingly unlikely +# someone would want to generate >32 dim arrays anyway. +# See https://github.com/HypothesisWorks/hypothesis/pull/3067. +NDIM_MAX = 32 + + +@check_function +def check_valid_dims(dims, name): + if dims > NDIM_MAX: + raise InvalidArgument( + f"{name}={dims}, but Hypothesis does not support arrays with " + f"more than {NDIM_MAX} dimensions" + ) + + +@defines_strategy() +def array_shapes( + *, + min_dims: int = 1, + max_dims: int | None = None, + min_side: int = 1, + max_side: int | None = None, +) -> st.SearchStrategy[Shape]: + """Return a strategy for array shapes (tuples of int >= 1). + + * ``min_dims`` is the smallest length that the generated shape can possess. + * ``max_dims`` is the largest length that the generated shape can possess, + defaulting to ``min_dims + 2``. + * ``min_side`` is the smallest size that a dimension can possess. + * ``max_side`` is the largest size that a dimension can possess, + defaulting to ``min_side + 5``. + """ + check_type(int, min_dims, "min_dims") + check_type(int, min_side, "min_side") + check_valid_dims(min_dims, "min_dims") + + if max_dims is None: + max_dims = min(min_dims + 2, NDIM_MAX) + check_type(int, max_dims, "max_dims") + check_valid_dims(max_dims, "max_dims") + + if max_side is None: + max_side = min_side + 5 + check_type(int, max_side, "max_side") + + order_check("dims", 0, min_dims, max_dims) + order_check("side", 0, min_side, max_side) + + return st.lists( + st.integers(min_side, max_side), min_size=min_dims, max_size=max_dims + ).map(tuple) + + +@defines_strategy() +def valid_tuple_axes( + ndim: int, + *, + min_size: int = 0, + max_size: int | None = None, +) -> st.SearchStrategy[tuple[int, ...]]: + """All tuples will have a length >= ``min_size`` and <= ``max_size``. The default + value for ``max_size`` is ``ndim``. + + Examples from this strategy shrink towards an empty tuple, which render most + sequential functions as no-ops. + + The following are some examples drawn from this strategy. + + .. code-block:: pycon + + >>> [valid_tuple_axes(3).example() for i in range(4)] + [(-3, 1), (0, 1, -1), (0, 2), (0, -2, 2)] + + ``valid_tuple_axes`` can be joined with other strategies to generate + any type of valid axis object, i.e. integers, tuples, and ``None``: + + .. code-block:: python + + any_axis_strategy = none() | integers(-ndim, ndim - 1) | valid_tuple_axes(ndim) + + """ + check_type(int, ndim, "ndim") + check_type(int, min_size, "min_size") + if max_size is None: + max_size = ndim + check_type(int, max_size, "max_size") + order_check("size", 0, min_size, max_size) + check_valid_interval(max_size, ndim, "max_size", "ndim") + + axes = st.integers(0, max(0, 2 * ndim - 1)).map( + lambda x: x if x < ndim else x - 2 * ndim + ) + + return st.lists( + axes, min_size=min_size, max_size=max_size, unique_by=lambda x: x % ndim + ).map(tuple) + + +@defines_strategy() +def broadcastable_shapes( + shape: Shape, + *, + min_dims: int = 0, + max_dims: int | None = None, + min_side: int = 1, + max_side: int | None = None, +) -> st.SearchStrategy[Shape]: + """Return a strategy for shapes that are broadcast-compatible with the + provided shape. + + Examples from this strategy shrink towards a shape with length ``min_dims``. + The size of an aligned dimension shrinks towards size ``1``. The size of an + unaligned dimension shrink towards ``min_side``. + + * ``shape`` is a tuple of integers. + * ``min_dims`` is the smallest length that the generated shape can possess. + * ``max_dims`` is the largest length that the generated shape can possess, + defaulting to ``max(len(shape), min_dims) + 2``. + * ``min_side`` is the smallest size that an unaligned dimension can possess. + * ``max_side`` is the largest size that an unaligned dimension can possess, + defaulting to 2 plus the size of the largest aligned dimension. + + The following are some examples drawn from this strategy. + + .. code-block:: pycon + + >>> [broadcastable_shapes(shape=(2, 3)).example() for i in range(5)] + [(1, 3), (), (2, 3), (2, 1), (4, 1, 3), (3, )] + + """ + check_type(tuple, shape, "shape") + check_type(int, min_side, "min_side") + check_type(int, min_dims, "min_dims") + check_valid_dims(min_dims, "min_dims") + + strict_check = max_side is None or max_dims is None + + if max_dims is None: + max_dims = min(max(len(shape), min_dims) + 2, NDIM_MAX) + check_type(int, max_dims, "max_dims") + check_valid_dims(max_dims, "max_dims") + + if max_side is None: + max_side = max(shape[-max_dims:] + (min_side,)) + 2 + check_type(int, max_side, "max_side") + + order_check("dims", 0, min_dims, max_dims) + order_check("side", 0, min_side, max_side) + + if strict_check: + dims = max_dims + bound_name = "max_dims" + else: + dims = min_dims + bound_name = "min_dims" + + # check for unsatisfiable min_side + if not all(min_side <= s for s in shape[::-1][:dims] if s != 1): + raise InvalidArgument( + f"Given shape={shape}, there are no broadcast-compatible " + f"shapes that satisfy: {bound_name}={dims} and min_side={min_side}" + ) + + # check for unsatisfiable [min_side, max_side] + if not ( + min_side <= 1 <= max_side or all(s <= max_side for s in shape[::-1][:dims]) + ): + raise InvalidArgument( + f"Given base_shape={shape}, there are no broadcast-compatible " + f"shapes that satisfy all of {bound_name}={dims}, " + f"min_side={min_side}, and max_side={max_side}" + ) + + if not strict_check: + # reduce max_dims to exclude unsatisfiable dimensions + for n, s in zip(range(max_dims), shape[::-1], strict=False): + if s < min_side and s != 1: + max_dims = n + break + if not (min_side <= 1 <= max_side or s <= max_side): + max_dims = n + break + + return MutuallyBroadcastableShapesStrategy( + num_shapes=1, + base_shape=shape, + min_dims=min_dims, + max_dims=max_dims, + min_side=min_side, + max_side=max_side, + ).map(lambda x: x.input_shapes[0]) + + +# See https://numpy.org/doc/stable/reference/c-api/generalized-ufuncs.html +# Implementation based on numpy.lib.function_base._parse_gufunc_signature +# with minor upgrades to handle numeric and optional dimensions. Examples: +# +# add (),()->() binary ufunc +# sum1d (i)->() reduction +# inner1d (i),(i)->() vector-vector multiplication +# matmat (m,n),(n,p)->(m,p) matrix multiplication +# vecmat (n),(n,p)->(p) vector-matrix multiplication +# matvec (m,n),(n)->(m) matrix-vector multiplication +# matmul (m?,n),(n,p?)->(m?,p?) combination of the four above +# cross1d (3),(3)->(3) cross product with frozen dimensions +# +# Note that while no examples of such usage are given, Numpy does allow +# generalised ufuncs that have *multiple output arrays*. This is not +# currently supported by Hypothesis - please contact us if you would use it! +# +# We are unsure if gufuncs allow frozen dimensions to be optional, but it's +# easy enough to support here - and so we will unless we learn otherwise. +_DIMENSION = r"\w+\??" # Note that \w permits digits too! +_SHAPE = rf"\((?:{_DIMENSION}(?:,{_DIMENSION}){{0,31}})?\)" +_ARGUMENT_LIST = f"{_SHAPE}(?:,{_SHAPE})*" +_SIGNATURE = rf"^{_ARGUMENT_LIST}->{_SHAPE}$" +_SIGNATURE_MULTIPLE_OUTPUT = rf"^{_ARGUMENT_LIST}->{_ARGUMENT_LIST}$" + + +class _GUfuncSig(NamedTuple): + input_shapes: tuple[Shape, ...] + result_shape: Shape + + +def _hypothesis_parse_gufunc_signature(signature): + # Disable all_checks to better match the Numpy version, for testing + if not re.match(_SIGNATURE, signature): + if re.match(_SIGNATURE_MULTIPLE_OUTPUT, signature): + raise InvalidArgument( + "Hypothesis does not yet support generalised ufunc signatures " + "with multiple output arrays - mostly because we don't know of " + "anyone who uses them! Please get in touch with us to fix that." + f"\n ({signature=})" + ) + if re.match( + ( + # Taken from np.lib.function_base._SIGNATURE + r"^\((?:\w+(?:,\w+)*)?\)(?:,\((?:\w+(?:,\w+)*)?\))*->" + r"\((?:\w+(?:,\w+)*)?\)(?:,\((?:\w+(?:,\w+)*)?\))*$" + ), + signature, + ): + raise InvalidArgument( + f"{signature=} matches Numpy's regex for gufunc signatures, " + f"but contains shapes with more than {NDIM_MAX} dimensions and is thus invalid." + ) + raise InvalidArgument(f"{signature!r} is not a valid gufunc signature") + input_shapes, output_shapes = ( + tuple(tuple(re.findall(_DIMENSION, a)) for a in re.findall(_SHAPE, arg_list)) + for arg_list in signature.split("->") + ) + assert len(output_shapes) == 1 + result_shape = output_shapes[0] + # Check that there are no names in output shape that do not appear in inputs. + # (kept out of parser function for easier generation of test values) + # We also disallow frozen optional dimensions - this is ambiguous as there is + # no way to share an un-named dimension between shapes. Maybe just padding? + # Anyway, we disallow it pending clarification from upstream. + for shape in (*input_shapes, result_shape): + for name in shape: + try: + int(name.strip("?")) + if "?" in name: + raise InvalidArgument( + f"Got dimension {name!r}, but handling of frozen optional dimensions " + "is ambiguous. If you known how this should work, please " + f"contact us to get this fixed and documented ({signature=})." + ) + except ValueError: + names_in = {n.strip("?") for shp in input_shapes for n in shp} + names_out = {n.strip("?") for n in result_shape} + if name.strip("?") in (names_out - names_in): + raise InvalidArgument( + f"The {name!r} dimension only appears in the output shape, and is " + f"not frozen, so the size is not determined ({signature=})." + ) from None + return _GUfuncSig(input_shapes=input_shapes, result_shape=result_shape) + + +@defines_strategy() +def mutually_broadcastable_shapes( + *, + num_shapes: UniqueIdentifier | int = not_set, + signature: UniqueIdentifier | str = not_set, + base_shape: Shape = (), + min_dims: int = 0, + max_dims: int | None = None, + min_side: int = 1, + max_side: int | None = None, +) -> st.SearchStrategy[BroadcastableShapes]: + """Return a strategy for a specified number of shapes N that are + mutually-broadcastable with one another and with the provided base shape. + + * ``num_shapes`` is the number of mutually broadcast-compatible shapes to generate. + * ``base_shape`` is the shape against which all generated shapes can broadcast. + The default shape is empty, which corresponds to a scalar and thus does + not constrain broadcasting at all. + * ``min_dims`` is the smallest length that the generated shape can possess. + * ``max_dims`` is the largest length that the generated shape can possess, + defaulting to ``max(len(shape), min_dims) + 2``. + * ``min_side`` is the smallest size that an unaligned dimension can possess. + * ``max_side`` is the largest size that an unaligned dimension can possess, + defaulting to 2 plus the size of the largest aligned dimension. + + The strategy will generate a :obj:`python:typing.NamedTuple` containing: + + * ``input_shapes`` as a tuple of the N generated shapes. + * ``result_shape`` as the resulting shape produced by broadcasting the N shapes + with the base shape. + + The following are some examples drawn from this strategy. + + .. code-block:: pycon + + >>> # Draw three shapes where each shape is broadcast-compatible with (2, 3) + ... strat = mutually_broadcastable_shapes(num_shapes=3, base_shape=(2, 3)) + >>> for _ in range(5): + ... print(strat.example()) + BroadcastableShapes(input_shapes=((4, 1, 3), (4, 2, 3), ()), result_shape=(4, 2, 3)) + BroadcastableShapes(input_shapes=((3,), (1, 3), (2, 3)), result_shape=(2, 3)) + BroadcastableShapes(input_shapes=((), (), ()), result_shape=()) + BroadcastableShapes(input_shapes=((3,), (), (3,)), result_shape=(3,)) + BroadcastableShapes(input_shapes=((1, 2, 3), (3,), ()), result_shape=(1, 2, 3)) + + """ + arg_msg = "Pass either the `num_shapes` or the `signature` argument, but not both." + if num_shapes is not not_set: + check_argument(signature is not_set, arg_msg) + check_type(int, num_shapes, "num_shapes") + assert isinstance(num_shapes, int) # for mypy + parsed_signature = None + sig_dims = 0 + else: + check_argument(signature is not not_set, arg_msg) + if signature is None: + raise InvalidArgument( + "Expected a string, but got invalid signature=None. " + "(maybe .signature attribute of an element-wise ufunc?)" + ) + check_type(str, signature, "signature") + parsed_signature = _hypothesis_parse_gufunc_signature(signature) + all_shapes = (*parsed_signature.input_shapes, parsed_signature.result_shape) + sig_dims = min(len(s) for s in all_shapes) + num_shapes = len(parsed_signature.input_shapes) + + if num_shapes < 1: + raise InvalidArgument(f"num_shapes={num_shapes} must be at least 1") + + check_type(tuple, base_shape, "base_shape") + check_type(int, min_side, "min_side") + check_type(int, min_dims, "min_dims") + check_valid_dims(min_dims, "min_dims") + + strict_check = max_dims is not None + + if max_dims is None: + max_dims = min(max(len(base_shape), min_dims) + 2, NDIM_MAX - sig_dims) + check_type(int, max_dims, "max_dims") + check_valid_dims(max_dims, "max_dims") + + if max_side is None: + max_side = max(base_shape[-max_dims:] + (min_side,)) + 2 + check_type(int, max_side, "max_side") + + order_check("dims", 0, min_dims, max_dims) + order_check("side", 0, min_side, max_side) + + if signature is not None and max_dims > NDIM_MAX - sig_dims: + raise InvalidArgument( + f"max_dims={signature!r} would exceed the {NDIM_MAX}-dimension" + "limit Hypothesis imposes on array shapes, " + f"given signature={parsed_signature!r}" + ) + + if strict_check: + dims = max_dims + bound_name = "max_dims" + else: + dims = min_dims + bound_name = "min_dims" + + # check for unsatisfiable min_side + if not all(min_side <= s for s in base_shape[::-1][:dims] if s != 1): + raise InvalidArgument( + f"Given base_shape={base_shape}, there are no broadcast-compatible " + f"shapes that satisfy: {bound_name}={dims} and min_side={min_side}" + ) + + # check for unsatisfiable [min_side, max_side] + if not ( + min_side <= 1 <= max_side or all(s <= max_side for s in base_shape[::-1][:dims]) + ): + raise InvalidArgument( + f"Given base_shape={base_shape}, there are no broadcast-compatible " + f"shapes that satisfy all of {bound_name}={dims}, " + f"min_side={min_side}, and max_side={max_side}" + ) + + if not strict_check: + # reduce max_dims to exclude unsatisfiable dimensions + for n, s in zip(range(max_dims), base_shape[::-1], strict=False): + if s < min_side and s != 1: + max_dims = n + break + if not (min_side <= 1 <= max_side or s <= max_side): + max_dims = n + break + + return MutuallyBroadcastableShapesStrategy( + num_shapes=num_shapes, + signature=parsed_signature, + base_shape=base_shape, + min_dims=min_dims, + max_dims=max_dims, + min_side=min_side, + max_side=max_side, + ) + + +class MutuallyBroadcastableShapesStrategy(st.SearchStrategy): + def __init__( + self, + num_shapes, + signature=None, + base_shape=(), + min_dims=0, + max_dims=None, + min_side=1, + max_side=None, + ): + super().__init__() + self.base_shape = base_shape + self.side_strat = st.integers(min_side, max_side) + self.num_shapes = num_shapes + self.signature = signature + self.min_dims = min_dims + self.max_dims = max_dims + self.min_side = min_side + self.max_side = max_side + + self.size_one_allowed = self.min_side <= 1 <= self.max_side + + def do_draw(self, data): + # We don't usually have a gufunc signature; do the common case first & fast. + if self.signature is None: + return self._draw_loop_dimensions(data) + + # When we *do*, draw the core dims, then draw loop dims, and finally combine. + core_in, core_res = self._draw_core_dimensions(data) + + # If some core shape has omitted optional dimensions, it's an error to add + # loop dimensions to it. We never omit core dims if min_dims >= 1. + # This ensures that we respect Numpy's gufunc broadcasting semantics and user + # constraints without needing to check whether the loop dims will be + # interpreted as an invalid substitute for the omitted core dims. + # We may implement this check later! + use = [None not in shp for shp in core_in] + loop_in, loop_res = self._draw_loop_dimensions(data, use=use) + + def add_shape(loop, core): + return tuple(x for x in (loop + core)[-NDIM_MAX:] if x is not None) + + return BroadcastableShapes( + input_shapes=tuple( + add_shape(l_in, c) for l_in, c in zip(loop_in, core_in, strict=True) + ), + result_shape=add_shape(loop_res, core_res), + ) + + def _draw_core_dimensions(self, data): + # Draw gufunc core dimensions, with None standing for optional dimensions + # that will not be present in the final shape. We track omitted dims so + # that we can do an accurate per-shape length cap. + dims = {} + shapes = [] + for shape in (*self.signature.input_shapes, self.signature.result_shape): + shapes.append([]) + for name in shape: + if name.isdigit(): + shapes[-1].append(int(name)) + continue + if name not in dims: + dim = name.strip("?") + dims[dim] = data.draw(self.side_strat) + if self.min_dims == 0 and not data.draw_boolean(7 / 8): + dims[dim + "?"] = None + else: + dims[dim + "?"] = dims[dim] + shapes[-1].append(dims[name]) + return tuple(tuple(s) for s in shapes[:-1]), tuple(shapes[-1]) + + def _draw_loop_dimensions(self, data, use=None): + # All shapes are handled in column-major order; i.e. they are reversed + base_shape = self.base_shape[::-1] + result_shape = list(base_shape) + shapes = [[] for _ in range(self.num_shapes)] + if use is None: + use = [True for _ in range(self.num_shapes)] + else: + assert len(use) == self.num_shapes + assert all(isinstance(x, bool) for x in use) + + _gap = self.max_dims - self.min_dims + p_keep_extending_shape = _calc_p_continue(desired_avg=_gap / 2, max_size=_gap) + + for dim_count in range(1, self.max_dims + 1): + dim = dim_count - 1 + + # We begin by drawing a valid dimension-size for the given + # dimension. This restricts the variability across the shapes + # at this dimension such that they can only choose between + # this size and a singleton dimension. + if len(base_shape) < dim_count or base_shape[dim] == 1: + # dim is unrestricted by the base-shape: shrink to min_side + dim_side = data.draw(self.side_strat) + elif base_shape[dim] <= self.max_side: + # dim is aligned with non-singleton base-dim + dim_side = base_shape[dim] + else: + # only a singleton is valid in alignment with the base-dim + dim_side = 1 + + allowed_sides = sorted([1, dim_side]) # shrink to 0 when available + for shape_id, shape in enumerate(shapes): + # Populating this dimension-size for each shape, either + # the drawn size is used or, if permitted, a singleton + # dimension. + if dim <= len(result_shape) and self.size_one_allowed: + # aligned: shrink towards size 1 + side = data.draw(st.sampled_from(allowed_sides)) + else: + side = dim_side + + # Use a trick where where a biased coin is queried to see + # if the given shape-tuple will continue to be grown. All + # of the relevant draws will still be made for the given + # shape-tuple even if it is no longer being added to. + # This helps to ensure more stable shrinking behavior. + if self.min_dims < dim_count: + use[shape_id] &= data.draw_boolean(p_keep_extending_shape) + + if use[shape_id]: + shape.append(side) + if len(result_shape) < len(shape): + result_shape.append(shape[-1]) + elif shape[-1] != 1 and result_shape[dim] == 1: + result_shape[dim] = shape[-1] + if not any(use): + break + + result_shape = result_shape[: max(map(len, [self.base_shape, *shapes]))] + + assert len(shapes) == self.num_shapes + assert all(self.min_dims <= len(s) <= self.max_dims for s in shapes) + assert all(self.min_side <= s <= self.max_side for side in shapes for s in side) + + return BroadcastableShapes( + input_shapes=tuple(tuple(reversed(shape)) for shape in shapes), + result_shape=tuple(reversed(result_shape)), + ) + + +class BasicIndexStrategy(st.SearchStrategy): + def __init__( + self, + shape, + min_dims, + max_dims, + allow_ellipsis, + allow_newaxis, + allow_fewer_indices_than_dims, + ): + super().__init__() + self.shape = shape + self.min_dims = min_dims + self.max_dims = max_dims + self.allow_ellipsis = allow_ellipsis + self.allow_newaxis = allow_newaxis + # allow_fewer_indices_than_dims=False will disable generating indices + # that don't cover all axes, i.e. indices that will flat index arrays. + # This is necessary for the Array API as such indices are not supported. + self.allow_fewer_indices_than_dims = allow_fewer_indices_than_dims + + def do_draw(self, data): + # General plan: determine the actual selection up front with a straightforward + # approach that shrinks well, then complicate it by inserting other things. + result = [] + for dim_size in self.shape: + if dim_size == 0: + result.append(slice(None)) + continue + strategy = st.integers(-dim_size, dim_size - 1) | st.slices(dim_size) + result.append(data.draw(strategy)) + # Insert some number of new size-one dimensions if allowed + result_dims = sum(isinstance(idx, slice) for idx in result) + while ( + self.allow_newaxis + and result_dims < self.max_dims + and (result_dims < self.min_dims or data.draw(st.booleans())) + ): + i = data.draw(st.integers(0, len(result))) + result.insert(i, None) # Note that `np.newaxis is None` + result_dims += 1 + # Check that we'll have the right number of dimensions; reject if not. + # It's easy to do this by construction if you don't care about shrinking, + # which is really important for array shapes. So we filter instead. + assume(self.min_dims <= result_dims <= self.max_dims) + # This is a quick-and-dirty way to insert ..., xor shorten the indexer, + # but it means we don't have to do any structural analysis. + if self.allow_ellipsis and data.draw(st.booleans()): + # Choose an index; then replace all adjacent whole-dimension slices. + i = j = data.draw(st.integers(0, len(result))) + while i > 0 and result[i - 1] == slice(None): + i -= 1 + while j < len(result) and result[j] == slice(None): + j += 1 + result[i:j] = [Ellipsis] + elif self.allow_fewer_indices_than_dims: # pragma: no cover + while result[-1:] == [slice(None, None)] and data.draw(st.integers(0, 7)): + result.pop() + if len(result) == 1 and data.draw(st.booleans()): + # Sometimes generate bare element equivalent to a length-one tuple + return result[0] + return tuple(result) diff --git a/vendored/hypothesis/extra/_patching.py b/vendored/hypothesis/extra/_patching.py new file mode 100644 index 0000000..3dba3f7 --- /dev/null +++ b/vendored/hypothesis/extra/_patching.py @@ -0,0 +1,341 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +Write patches which add @example() decorators for discovered test cases. + +Requires `hypothesis[codemods,ghostwriter]` installed, i.e. black and libcst. + +This module is used by Hypothesis' builtin pytest plugin for failing examples +discovered during testing, and by HypoFuzz for _covering_ examples discovered +during fuzzing. +""" + +import ast +import difflib +import hashlib +import inspect +import re +import sys +import types +from ast import literal_eval +from collections.abc import Sequence +from contextlib import suppress +from datetime import date, datetime, timedelta, timezone +from pathlib import Path +from typing import Any + +import libcst as cst +from libcst import matchers as m +from libcst.codemod import CodemodContext, VisitorBasedCodemodCommand +from libcst.metadata import ExpressionContext, ExpressionContextProvider + +from hypothesis.configuration import storage_directory +from hypothesis.version import __version__ + +try: + import black +except ImportError: + black = None # type: ignore + +HEADER = """\ +From HEAD Mon Sep 17 00:00:00 2001 +From: {author} +Date: {when:%a, %d %b %Y %H:%M:%S} +Subject: [PATCH] {msg} + +--- +""" +FAIL_MSG = "discovered failure" +_space_only_re = re.compile("^ +$", re.MULTILINE) +_leading_space_re = re.compile("(^[ ]*)(?:[^ \n])", re.MULTILINE) + + +def dedent(text: str) -> tuple[str, str]: + # Simplified textwrap.dedent, for valid Python source code only + text = _space_only_re.sub("", text) + prefix = min(_leading_space_re.findall(text), key=len) + return re.sub(r"(?m)^" + prefix, "", text), prefix + + +def indent(text: str, prefix: str) -> str: + return "".join(prefix + line for line in text.splitlines(keepends=True)) + + +class AddExamplesCodemod(VisitorBasedCodemodCommand): + DESCRIPTION = "Add explicit examples to failing tests." + + def __init__( + self, + context: CodemodContext, + fn_examples: dict[str, list[tuple[cst.Call, str]]], + strip_via: tuple[str, ...] = (), + decorator: str = "example", + width: int = 88, + ): + """Add @example() decorator(s) for failing test(s). + + `code` is the source code of the module where the test functions are defined. + `fn_examples` is a dict of function name to list-of-failing-examples. + """ + assert fn_examples, "This codemod does nothing without fn_examples." + super().__init__(context) + + self.decorator_func = cst.parse_expression(decorator) + self.line_length = width + value_in_strip_via: Any = m.MatchIfTrue( + lambda x: literal_eval(x.value) in strip_via + ) + self.strip_matching = m.Call( + m.Attribute(m.Call(), m.Name("via")), + [m.Arg(m.SimpleString() & value_in_strip_via)], + ) + + # Codemod the failing examples to Call nodes usable as decorators + self.fn_examples = { + k: tuple( + d + for (node, via) in nodes + if (d := self.__call_node_to_example_dec(node, via)) + ) + for k, nodes in fn_examples.items() + } + + def __call_node_to_example_dec( + self, node: cst.Call, via: str + ) -> cst.Decorator | None: + # If we have black installed, remove trailing comma, _unless_ there's a comment + node = node.with_changes( + func=self.decorator_func, + args=( + [ + a.with_changes( + comma=( + a.comma + if m.findall(a.comma, m.Comment()) + else cst.MaybeSentinel.DEFAULT + ) + ) + for a in node.args + ] + if black + else node.args + ), + ) + via: cst.BaseExpression = cst.Call( + func=cst.Attribute(node, cst.Name("via")), + args=[cst.Arg(cst.SimpleString(repr(via)))], + ) + if black: # pragma: no branch + try: + pretty = black.format_str( + cst.Module([]).code_for_node(via), + mode=black.Mode(line_length=self.line_length), + ) + except (ImportError, AttributeError): # pragma: no cover + return None # See https://github.com/psf/black/pull/4224 + via = cst.parse_expression(pretty.strip()) + return cst.Decorator(via) + + def leave_FunctionDef( + self, _original_node: cst.FunctionDef, updated_node: cst.FunctionDef + ) -> cst.FunctionDef: + return updated_node.with_changes( + # TODO: improve logic for where in the list to insert this decorator + decorators=tuple( + d + for d in updated_node.decorators + # `findall()` to see through the identity function workaround on py38 + if not m.findall(d, self.strip_matching) + ) + + self.fn_examples.get(updated_node.name.value, ()) + ) + + +def get_patch_for( + func: Any, + examples: Sequence[tuple[str, str]], + *, + strip_via: tuple[str, ...] = (), +) -> tuple[str, str, str] | None: + # Skip this if we're unable to find the location of this function. + try: + module = sys.modules[func.__module__] + file_path = Path(module.__file__) # type: ignore + except Exception: + return None + + fname = ( + file_path.relative_to(Path.cwd()) + if file_path.is_relative_to(Path.cwd()) + else file_path + ) + patch = _get_patch_for( + func, examples, strip_via=strip_via, namespace=module.__dict__ + ) + if patch is None: + return None + + (before, after) = patch + return (str(fname), before, after) + + +# split out for easier testing of patches in hypofuzz, where the function to +# apply the patch to may not be loaded in sys.modules. +def _get_patch_for( + func: Any, + examples: Sequence[tuple[str, str]], + *, + strip_via: tuple[str, ...] = (), + namespace: dict[str, Any], +) -> tuple[str, str] | None: + try: + before = inspect.getsource(func) + except Exception: # pragma: no cover + return None + + modules_in_test_scope = sorted( + ((k, v) for (k, v) in namespace.items() if isinstance(v, types.ModuleType)), + key=lambda kv: len(kv[1].__name__), + ) + + # The printed examples might include object reprs which are invalid syntax, + # so we parse here and skip over those. If _none_ are valid, there's no patch. + call_nodes: list[tuple[cst.Call, str]] = [] + + # we want to preserve order, but remove duplicates. + seen_examples = set() + for ex, via in examples: + if (ex, via) in seen_examples: + continue + seen_examples.add((ex, via)) + + with suppress(Exception): + node: Any = cst.parse_module(ex) + the_call = node.body[0].body[0].value + assert isinstance(the_call, cst.Call), the_call + # Check for st.data(), which doesn't support explicit examples + data = m.Arg(m.Call(m.Name("data"), args=[m.Arg(m.Ellipsis())])) + if m.matches(the_call, m.Call(args=[m.ZeroOrMore(), data, m.ZeroOrMore()])): + return None + + # Many reprs use the unqualified name of the type, e.g. np.array() + # -> array([...]), so here we find undefined names and look them up + # on each module which was in the test's global scope. + names = {} + for anode in ast.walk(ast.parse(ex, "eval")): + if ( + isinstance(anode, ast.Name) + and isinstance(anode.ctx, ast.Load) + and anode.id not in names + and anode.id not in namespace + ): + for k, v in modules_in_test_scope: + if anode.id in v.__dict__: + names[anode.id] = cst.parse_expression(f"{k}.{anode.id}") + break + + # LibCST doesn't track Load()/Store() state of names by default, so we have + # to do a bit of a dance here, *and* explicitly handle keyword arguments + # which are treated as Load() context - but even if that's fixed later + # we'll still want to support older versions. + with suppress(Exception): + wrapper = cst.metadata.MetadataWrapper(node) + kwarg_names = { + node.keyword # type: ignore + for node in m.findall(wrapper, m.Arg(keyword=m.Name())) + } + node = m.replace( + wrapper, + m.Name(value=m.MatchIfTrue(names.__contains__)) + & m.MatchMetadata(ExpressionContextProvider, ExpressionContext.LOAD) + & m.MatchIfTrue(lambda n, k=kwarg_names: n not in k), # type: ignore + replacement=lambda node, _, ns=names: ns[node.value], # type: ignore + ) + node = node.body[0].body[0].value + assert isinstance(node, cst.Call), node + call_nodes.append((node, via)) + + if not call_nodes: + return None + + if ( + namespace.get("hypothesis") is sys.modules["hypothesis"] + and "given" not in namespace # more reliably present than `example` + ): + decorator_func = "hypothesis.example" + else: + decorator_func = "example" + + # Do the codemod and return a triple containing location and replacement info. + dedented, prefix = dedent(before) + try: + node = cst.parse_module(dedented) + except Exception: # pragma: no cover + # inspect.getsource() sometimes returns a decorator alone, which is invalid + return None + after = AddExamplesCodemod( + CodemodContext(), + fn_examples={func.__name__: call_nodes}, + strip_via=strip_via, + decorator=decorator_func, + width=88 - len(prefix), # to match Black's default formatting + ).transform_module(node) + return (before, indent(after.code, prefix=prefix)) + + +def make_patch( + triples: Sequence[tuple[str, str, str]], + *, + msg: str = "Hypothesis: add explicit examples", + when: datetime | None = None, + author: str = f"Hypothesis {__version__} ", +) -> str: + """Create a patch for (fname, before, after) triples.""" + assert triples, "attempted to create empty patch" + when = when or datetime.now(tz=timezone.utc) + + by_fname: dict[Path, list[tuple[str, str]]] = {} + for fname, before, after in triples: + by_fname.setdefault(Path(fname), []).append((before, after)) + + diffs = [HEADER.format(msg=msg, when=when, author=author)] + for fname, changes in sorted(by_fname.items()): + source_before = source_after = fname.read_text(encoding="utf-8") + for before, after in changes: + source_after = source_after.replace(before.rstrip(), after.rstrip(), 1) + ud = difflib.unified_diff( + source_before.splitlines(keepends=True), + source_after.splitlines(keepends=True), + fromfile=f"./{fname}", # git strips the first part of the path by default + tofile=f"./{fname}", + ) + diffs.append("".join(ud)) + return "".join(diffs) + + +def save_patch(patch: str, *, slug: str = "") -> Path: # pragma: no cover + assert re.fullmatch(r"|[a-z]+-", slug), f"malformed {slug=}" + now = date.today().isoformat() + cleaned = re.sub(r"^Date: .+?$", "", patch, count=1, flags=re.MULTILINE) + hash8 = hashlib.sha1(cleaned.encode()).hexdigest()[:8] + fname = Path(storage_directory("patches", f"{now}--{slug}{hash8}.patch")) + fname.parent.mkdir(parents=True, exist_ok=True) + fname.write_text(patch, encoding="utf-8") + return fname.relative_to(Path.cwd()) + + +def gc_patches(slug: str = "") -> None: # pragma: no cover + cutoff = date.today() - timedelta(days=7) + for fname in Path(storage_directory("patches")).glob( + f"????-??-??--{slug}????????.patch" + ): + if date.fromisoformat(fname.stem.split("--")[0]) < cutoff: + fname.unlink() diff --git a/vendored/hypothesis/extra/array_api.py b/vendored/hypothesis/extra/array_api.py new file mode 100644 index 0000000..bba7974 --- /dev/null +++ b/vendored/hypothesis/extra/array_api.py @@ -0,0 +1,1156 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +import sys +from collections.abc import Iterable, Iterator, Mapping, Sequence +from numbers import Real +from types import SimpleNamespace +from typing import ( + Any, + Literal, + NamedTuple, + TypeAlias, + TypeVar, + get_args, +) +from warnings import warn +from weakref import WeakValueDictionary + +from hypothesis import strategies as st +from hypothesis.errors import HypothesisWarning, InvalidArgument +from hypothesis.extra._array_helpers import ( + NDIM_MAX, + BasicIndex, + BasicIndexStrategy, + BroadcastableShapes, + Shape, + array_shapes, + broadcastable_shapes, + check_argument, + check_valid_dims, + mutually_broadcastable_shapes as _mutually_broadcastable_shapes, + order_check, + valid_tuple_axes as _valid_tuple_axes, +) +from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.coverage import check_function +from hypothesis.internal.floats import next_down +from hypothesis.internal.reflection import proxies +from hypothesis.internal.validation import ( + check_type, + check_valid_bound, + check_valid_integer, + check_valid_interval, +) +from hypothesis.strategies._internal.strategies import check_strategy +from hypothesis.strategies._internal.utils import defines_strategy + +__all__ = [ + "make_strategies_namespace", +] + + +RELEASED_VERSIONS = ("2021.12", "2022.12", "2023.12", "2024.12") +NOMINAL_VERSIONS = (*RELEASED_VERSIONS, "draft") +assert sorted(NOMINAL_VERSIONS) == list(NOMINAL_VERSIONS) # sanity check +NominalVersion = Literal["2021.12", "2022.12", "2023.12", "2024.12", "draft"] +assert get_args(NominalVersion) == NOMINAL_VERSIONS # sanity check + + +INT_NAMES = ("int8", "int16", "int32", "int64") +UINT_NAMES = ("uint8", "uint16", "uint32", "uint64") +ALL_INT_NAMES = INT_NAMES + UINT_NAMES +FLOAT_NAMES = ("float32", "float64") +REAL_NAMES = ALL_INT_NAMES + FLOAT_NAMES +COMPLEX_NAMES = ("complex64", "complex128") +NUMERIC_NAMES = REAL_NAMES + COMPLEX_NAMES +DTYPE_NAMES = ("bool", *NUMERIC_NAMES) + +DataType = TypeVar("DataType") + + +@check_function +def check_xp_attributes(xp: Any, attributes: list[str]) -> None: + missing_attrs = [attr for attr in attributes if not hasattr(xp, attr)] + if len(missing_attrs) > 0: + f_attrs = ", ".join(missing_attrs) + raise InvalidArgument( + f"Array module {xp.__name__} does not have required attributes: {f_attrs}" + ) + + +def partition_attributes_and_stubs( + xp: Any, attributes: Iterable[str] +) -> tuple[list[Any], list[str]]: + non_stubs = [] + stubs = [] + for attr in attributes: + try: + non_stubs.append(getattr(xp, attr)) + except AttributeError: + stubs.append(attr) + + return non_stubs, stubs + + +def warn_on_missing_dtypes(xp: Any, stubs: list[str]) -> None: + f_stubs = ", ".join(stubs) + warn( + f"Array module {xp.__name__} does not have the following " + f"dtypes in its namespace: {f_stubs}", + HypothesisWarning, + stacklevel=3, + ) + + +def find_castable_builtin_for_dtype( + xp: Any, api_version: NominalVersion, dtype: DataType +) -> type[bool | int | float | complex]: + """Returns builtin type which can have values that are castable to the given + dtype, according to :xp-ref:`type promotion rules `. + + For floating dtypes we always return ``float``, even though ``int`` is also castable. + """ + stubs = [] + + try: + bool_dtype = xp.bool + if dtype == bool_dtype: + return bool + except AttributeError: + stubs.append("bool") + + int_dtypes, int_stubs = partition_attributes_and_stubs(xp, ALL_INT_NAMES) + if dtype in int_dtypes: + return int + + float_dtypes, float_stubs = partition_attributes_and_stubs(xp, FLOAT_NAMES) + # None equals NumPy's xp.float64 object, so we specifically skip it here to + # ensure that InvalidArgument is still raised. xp.float64 is in fact an + # alias of np.dtype('float64'), and its equality with None is meant to be + # deprecated at some point. See https://github.com/numpy/numpy/issues/18434 + if dtype is not None and dtype in float_dtypes: + return float + + stubs.extend(int_stubs) + stubs.extend(float_stubs) + + if api_version > "2021.12": + complex_dtypes, complex_stubs = partition_attributes_and_stubs( + xp, COMPLEX_NAMES + ) + if dtype in complex_dtypes: + return complex + stubs.extend(complex_stubs) + + if len(stubs) > 0: + warn_on_missing_dtypes(xp, stubs) + raise InvalidArgument(f"dtype={dtype} not recognised in {xp.__name__}") + + +@check_function +def dtype_from_name(xp: Any, name: str) -> Any: + if name in DTYPE_NAMES: + try: + return getattr(xp, name) + except AttributeError as e: + raise InvalidArgument( + f"Array module {xp.__name__} does not have dtype {name} in its namespace" + ) from e + else: + f_valid_dtypes = ", ".join(DTYPE_NAMES) + raise InvalidArgument( + f"{name} is not a valid Array API data type (pick from: {f_valid_dtypes})" + ) + + +def _from_dtype( + xp: Any, + api_version: NominalVersion, + dtype: DataType | str, + *, + min_value: int | float | None = None, + max_value: int | float | None = None, + allow_nan: bool | None = None, + allow_infinity: bool | None = None, + allow_subnormal: bool | None = None, + exclude_min: bool | None = None, + exclude_max: bool | None = None, +) -> st.SearchStrategy[bool | int | float | complex]: + """Return a strategy for any value of the given dtype. + + Values generated are of the Python scalar which is + :xp-ref:`promotable ` to ``dtype``, where the values do + not exceed its bounds. + + * ``dtype`` may be a dtype object or the string name of a + :xp-ref:`valid dtype `. + + Compatible ``**kwargs`` are passed to the inferred strategy function for + integers and floats. This allows you to customise the min and max values, + and exclude non-finite numbers. This is particularly useful when kwargs are + passed through from :func:`arrays()`, as it seamlessly handles the ``width`` + or other representable bounds for you. + """ + # TODO: for next released xp version, add note for complex dtype support + check_xp_attributes(xp, ["iinfo", "finfo"]) + + if isinstance(dtype, str): + dtype = dtype_from_name(xp, dtype) + builtin = find_castable_builtin_for_dtype(xp, api_version, dtype) + + def check_valid_minmax(prefix, val, info_obj): + name = f"{prefix}_value" + check_valid_bound(val, name) + check_argument( + val >= info_obj.min, + f"dtype={dtype} requires {name}={val} to be at least {info_obj.min}", + ) + check_argument( + val <= info_obj.max, + f"dtype={dtype} requires {name}={val} to be at most {info_obj.max}", + ) + + if builtin is bool: + return st.booleans() + elif builtin is int: + iinfo = xp.iinfo(dtype) + if min_value is None: + min_value = iinfo.min + if max_value is None: + max_value = iinfo.max + check_valid_integer(min_value, "min_value") + check_valid_integer(max_value, "max_value") + assert isinstance(min_value, int) + assert isinstance(max_value, int) + check_valid_minmax("min", min_value, iinfo) + check_valid_minmax("max", max_value, iinfo) + check_valid_interval(min_value, max_value, "min_value", "max_value") + return st.integers(min_value=min_value, max_value=max_value) + elif builtin is float: + finfo = xp.finfo(dtype) + kw = {} + + # Whilst we know the boundary values of float dtypes from finfo, we do + # not assign them to the floats() strategy by default - passing min/max + # values will modify test case reduction behaviour so that simple bugs + # may become harder for users to identify. We plan to improve floats() + # behaviour in https://github.com/HypothesisWorks/hypothesis/issues/2907. + # Setting width should manage boundary values for us anyway. + if min_value is not None: + check_valid_bound(min_value, "min_value") + assert isinstance(min_value, Real) + check_valid_minmax("min", min_value, finfo) + kw["min_value"] = min_value + if max_value is not None: + check_valid_bound(max_value, "max_value") + assert isinstance(max_value, Real) + check_valid_minmax("max", max_value, finfo) + if min_value is not None: + check_valid_interval(min_value, max_value, "min_value", "max_value") + kw["max_value"] = max_value + + # We infer whether an array module will flush subnormals to zero, as may + # be the case when libraries are built with compiler options that + # violate IEEE-754 (e.g. -ffast-math and -ftz=true). Note we do this for + # the specific dtype, as compilers may end up flushing subnormals for + # one float but supporting subnormals for the other. + # + # By default, floats() will generate subnormals if they are in the + # inferred values range. If we have detected that xp flushes to zero for + # the passed dtype, we ensure from_dtype() will not generate subnormals + # by default. + if allow_subnormal is not None: + kw["allow_subnormal"] = allow_subnormal + else: + subnormal = next_down(float(finfo.smallest_normal), width=finfo.bits) + ftz = bool(xp.asarray(subnormal, dtype=dtype) == 0) + if ftz: + kw["allow_subnormal"] = False + + if allow_nan is not None: + kw["allow_nan"] = allow_nan + if allow_infinity is not None: + kw["allow_infinity"] = allow_infinity + if exclude_min is not None: + kw["exclude_min"] = exclude_min + if exclude_max is not None: + kw["exclude_max"] = exclude_max + + return st.floats(width=finfo.bits, **kw) + else: + finfo = xp.finfo(dtype) + # See above comment on FTZ inference. We explicitly infer with a + # complex array, in case complex arrays have different FTZ behaviour + # than arrays of the respective composite float. + if allow_subnormal is None: + subnormal = next_down(float(finfo.smallest_normal), width=finfo.bits) + x = xp.asarray(complex(subnormal, subnormal), dtype=dtype) + builtin_x = complex(x) + allow_subnormal = builtin_x.real != 0 and builtin_x.imag != 0 + return st.complex_numbers( + allow_nan=allow_nan, + allow_infinity=allow_infinity, + allow_subnormal=allow_subnormal, + width=finfo.bits * 2, + ) + + +class ArrayStrategy(st.SearchStrategy): + def __init__( + self, *, xp, api_version, elements_strategy, dtype, shape, fill, unique + ): + super().__init__() + self.xp = xp + self.elements_strategy = elements_strategy + self.dtype = dtype + self.shape = shape + self.fill = fill + self.unique = unique + self.array_size = math.prod(shape) + self.builtin = find_castable_builtin_for_dtype(xp, api_version, dtype) + self.finfo = None if self.builtin is not float else xp.finfo(self.dtype) + + def check_set_value(self, val, val_0d, strategy): + if val == val and self.builtin(val_0d) != val: + if self.builtin is float: + assert self.finfo is not None # for mypy + try: + is_subnormal = 0 < abs(val) < self.finfo.smallest_normal + except Exception: + # val may be a non-float that does not support the + # operations __lt__ and __abs__ + is_subnormal = False + if is_subnormal: + raise InvalidArgument( + f"Generated subnormal float {val} from strategy " + f"{strategy} resulted in {val_0d!r}, probably " + f"as a result of array module {self.xp.__name__} " + "being built with flush-to-zero compiler options. " + "Consider passing allow_subnormal=False." + ) + raise InvalidArgument( + f"Generated array element {val!r} from strategy {strategy} " + f"cannot be represented with dtype {self.dtype}. " + f"Array module {self.xp.__name__} instead " + f"represents the element as {val_0d}. " + "Consider using a more precise elements strategy, " + "for example passing the width argument to floats()." + ) + + def do_draw(self, data): + if 0 in self.shape: + return self.xp.zeros(self.shape, dtype=self.dtype) + + if self.fill.is_empty: + # We have no fill value (either because the user explicitly + # disabled it or because the default behaviour was used and our + # elements strategy does not produce reusable values), so we must + # generate a fully dense array with a freshly drawn value for each + # entry. + elems = data.draw( + st.lists( + self.elements_strategy, + min_size=self.array_size, + max_size=self.array_size, + unique=self.unique, + ) + ) + try: + result = self.xp.asarray(elems, dtype=self.dtype) + except Exception as e: + if len(elems) <= 6: + f_elems = str(elems) + else: + f_elems = f"[{elems[0]}, {elems[1]}, ..., {elems[-2]}, {elems[-1]}]" + types = tuple( + sorted({type(e) for e in elems}, key=lambda t: t.__name__) + ) + f_types = f"type {types[0]}" if len(types) == 1 else f"types {types}" + raise InvalidArgument( + f"Generated elements {f_elems} from strategy " + f"{self.elements_strategy} could not be converted " + f"to array of dtype {self.dtype}. " + f"Consider if elements of {f_types} " + f"are compatible with {self.dtype}." + ) from e + for i in range(self.array_size): + self.check_set_value(elems[i], result[i], self.elements_strategy) + else: + # We draw arrays as "sparse with an offset". We assume not every + # element will be assigned and so first draw a single value from our + # fill strategy to create a full array. We then draw a collection of + # index assignments within the array and assign fresh values from + # our elements strategy to those indices. + + fill_val = data.draw(self.fill) + result_obj = [fill_val for _ in range(self.array_size)] + fill_mask = [True for _ in range(self.array_size)] + + elements = cu.many( + data, + min_size=0, + max_size=self.array_size, + # sqrt isn't chosen for any particularly principled reason. It + # just grows reasonably quickly but sublinearly, and for small + # arrays it represents a decent fraction of the array size. + average_size=min( + 0.9 * self.array_size, # ensure small arrays sometimes use fill + max(10, math.sqrt(self.array_size)), # ...but *only* sometimes + ), + ) + + assigned = set() + seen = set() + + while elements.more(): + i = data.draw_integer(0, self.array_size - 1) + if i in assigned: + elements.reject("chose an array index we've already used") + continue + val = data.draw(self.elements_strategy) + if self.unique: + if val in seen: + elements.reject("chose an element we've already used") + continue + seen.add(val) + + result_obj[i] = val + assigned.add(i) + fill_mask[i] = False + + try: + result = self.xp.asarray(result_obj, dtype=self.dtype) + except Exception as e: + f_expr = f"xp.asarray({result_obj}, dtype={self.dtype})" + raise InvalidArgument(f"Could not create array via {f_expr}") from e + + for i, val in enumerate(result_obj): + val_0d = result[i] + if fill_mask[i] and self.unique: + if not self.xp.isnan(val_0d): + raise InvalidArgument( + f"Array module {self.xp.__name__} did not recognise fill " + f"value {fill_val!r} as NaN - instead got {val_0d!r}. " + "Cannot fill unique array with non-NaN values." + ) + else: + self.check_set_value(val, val_0d, self.elements_strategy) + + return self.xp.reshape(result, self.shape) + + +def _arrays( + xp: Any, + api_version: NominalVersion, + dtype: DataType | str | st.SearchStrategy[DataType] | st.SearchStrategy[str], + shape: int | Shape | st.SearchStrategy[Shape], + *, + elements: Mapping[str, Any] | st.SearchStrategy | None = None, + fill: st.SearchStrategy[Any] | None = None, + unique: bool = False, +) -> st.SearchStrategy: + """Returns a strategy for :xp-ref:`arrays `. + + * ``dtype`` may be a :xp-ref:`valid dtype ` object or name, + or a strategy that generates such values. + * ``shape`` may be an integer >= 0, a tuple of such integers, or a strategy + that generates such values. + * ``elements`` is a strategy for values to put in the array. If ``None`` + then a suitable value will be inferred based on the dtype, which may give + any legal value (including e.g. NaN for floats). If a mapping, it will be + passed as ``**kwargs`` to :func:`from_dtype()` when inferring based on the dtype. + * ``fill`` is a strategy that may be used to generate a single background + value for the array. If ``None``, a suitable default will be inferred + based on the other arguments. If set to + :func:`~hypothesis.strategies.nothing` then filling behaviour will be + disabled entirely and every element will be generated independently. + * ``unique`` specifies if the elements of the array should all be distinct + from one another; if fill is also set, the only valid values for fill to + return are NaN values. + + Arrays of specified ``dtype`` and ``shape`` are generated for example + like this: + + .. code-block:: pycon + + >>> from numpy import array_api as xp + >>> xps.arrays(xp, xp.int8, (2, 3)).example() + Array([[-8, 6, 3], + [-6, 4, 6]], dtype=int8) + + Specifying element boundaries by a :obj:`python:dict` of the kwargs to pass + to :func:`from_dtype` will ensure ``dtype`` bounds will be respected. + + .. code-block:: pycon + + >>> xps.arrays(xp, xp.int8, 3, elements={"min_value": 10}).example() + Array([125, 13, 79], dtype=int8) + + .. code-block:: pycon + + >>> xps.arrays(xp, xp.float32, 3, elements=floats(0, 1, width=32)).example() + Array([ 0.88974794, 0.77387938, 0.1977879 ], dtype=float32) + + Array values are generated in two parts: + + 1. A single value is drawn from the fill strategy and is used to create a + filled array. + 2. Some subset of the coordinates of the array are populated with a value + drawn from the elements strategy (or its inferred form). + + You can set ``fill`` to :func:`~hypothesis.strategies.nothing` if you want + to disable this behaviour and draw a value for every element. + + By default ``arrays`` will attempt to infer the correct fill behaviour: if + ``unique`` is also ``True``, no filling will occur. Otherwise, if it looks + safe to reuse the values of elements across multiple coordinates (this will + be the case for any inferred strategy, and for most of the builtins, but is + not the case for mutable values or strategies built with flatmap, map, + composite, etc.) then it will use the elements strategy as the fill, else it + will default to having no fill. + + Having a fill helps Hypothesis craft high quality examples, but its + main importance is when the array generated is large: Hypothesis is + primarily designed around testing small examples. If you have arrays with + hundreds or more elements, having a fill value is essential if you want + your tests to run in reasonable time. + """ + check_xp_attributes( + xp, ["finfo", "asarray", "zeros", "all", "isnan", "isfinite", "reshape"] + ) + + if isinstance(dtype, st.SearchStrategy): + return dtype.flatmap( + lambda d: _arrays( + xp, api_version, d, shape, elements=elements, fill=fill, unique=unique + ) + ) + elif isinstance(dtype, str): + dtype = dtype_from_name(xp, dtype) + + if isinstance(shape, st.SearchStrategy): + return shape.flatmap( + lambda s: _arrays( + xp, api_version, dtype, s, elements=elements, fill=fill, unique=unique + ) + ) + elif isinstance(shape, int): + shape = (shape,) + elif not isinstance(shape, tuple): + raise InvalidArgument(f"shape={shape} is not a valid shape or strategy") + check_argument( + all(isinstance(x, int) and x >= 0 for x in shape), + f"{shape=}, but all dimensions must be non-negative integers.", + ) + + if elements is None: + elements = _from_dtype(xp, api_version, dtype) + elif isinstance(elements, Mapping): + elements = _from_dtype(xp, api_version, dtype, **elements) + check_strategy(elements, "elements") + + if fill is None: + assert isinstance(elements, st.SearchStrategy) # for mypy + if unique or not elements.has_reusable_values: + fill = st.nothing() + else: + fill = elements + check_strategy(fill, "fill") + + return ArrayStrategy( + xp=xp, + api_version=api_version, + elements_strategy=elements, + dtype=dtype, + shape=shape, + fill=fill, + unique=unique, + ) + + +@check_function +def check_dtypes(xp: Any, dtypes: list[DataType], stubs: list[str]) -> None: + if len(dtypes) == 0: + assert len(stubs) > 0, "No dtypes passed but stubs is empty" + f_stubs = ", ".join(stubs) + raise InvalidArgument( + f"Array module {xp.__name__} does not have the following " + f"required dtypes in its namespace: {f_stubs}" + ) + elif len(stubs) > 0: + warn_on_missing_dtypes(xp, stubs) + + +def _scalar_dtypes(xp: Any, api_version: NominalVersion) -> st.SearchStrategy[DataType]: + """Return a strategy for all :xp-ref:`valid dtype ` objects.""" + return st.one_of(_boolean_dtypes(xp), _numeric_dtypes(xp, api_version)) + + +def _boolean_dtypes(xp: Any) -> st.SearchStrategy[DataType]: + """Return a strategy for just the boolean dtype object.""" + try: + return st.just(xp.bool) + except AttributeError: + raise InvalidArgument( + f"Array module {xp.__name__} does not have a bool dtype in its namespace" + ) from None + + +def _real_dtypes(xp: Any) -> st.SearchStrategy[DataType]: + """Return a strategy for all real-valued dtype objects.""" + return st.one_of( + _integer_dtypes(xp), + _unsigned_integer_dtypes(xp), + _floating_dtypes(xp), + ) + + +def _numeric_dtypes( + xp: Any, api_version: NominalVersion +) -> st.SearchStrategy[DataType]: + """Return a strategy for all numeric dtype objects.""" + strat: st.SearchStrategy[DataType] = _real_dtypes(xp) + if api_version > "2021.12": + strat |= _complex_dtypes(xp) + return strat + + +@check_function +def check_valid_sizes( + category: str, sizes: Sequence[int], valid_sizes: Sequence[int] +) -> None: + check_argument(len(sizes) > 0, "No sizes passed") + + invalid_sizes = [s for s in sizes if s not in valid_sizes] + f_valid_sizes = ", ".join(str(s) for s in valid_sizes) + f_invalid_sizes = ", ".join(str(s) for s in invalid_sizes) + check_argument( + len(invalid_sizes) == 0, + f"The following sizes are not valid for {category} dtypes: " + f"{f_invalid_sizes} (valid sizes: {f_valid_sizes})", + ) + + +def numeric_dtype_names(base_name: str, sizes: Sequence[int]) -> Iterator[str]: + for size in sizes: + yield f"{base_name}{size}" + + +IntSize: TypeAlias = Literal[8, 16, 32, 64] +FltSize: TypeAlias = Literal[32, 64] +CpxSize: TypeAlias = Literal[64, 128] + + +def _integer_dtypes( + xp: Any, *, sizes: IntSize | Sequence[IntSize] = (8, 16, 32, 64) +) -> st.SearchStrategy[DataType]: + """Return a strategy for signed integer dtype objects. + + ``sizes`` contains the signed integer sizes in bits, defaulting to + ``(8, 16, 32, 64)`` which covers all valid sizes. + """ + if isinstance(sizes, int): + sizes = (sizes,) + check_valid_sizes("int", sizes, (8, 16, 32, 64)) + dtypes, stubs = partition_attributes_and_stubs( + xp, numeric_dtype_names("int", sizes) + ) + check_dtypes(xp, dtypes, stubs) + return st.sampled_from(dtypes) + + +def _unsigned_integer_dtypes( + xp: Any, *, sizes: IntSize | Sequence[IntSize] = (8, 16, 32, 64) +) -> st.SearchStrategy[DataType]: + """Return a strategy for unsigned integer dtype objects. + + ``sizes`` contains the unsigned integer sizes in bits, defaulting to + ``(8, 16, 32, 64)`` which covers all valid sizes. + """ + if isinstance(sizes, int): + sizes = (sizes,) + check_valid_sizes("int", sizes, (8, 16, 32, 64)) + + dtypes, stubs = partition_attributes_and_stubs( + xp, numeric_dtype_names("uint", sizes) + ) + check_dtypes(xp, dtypes, stubs) + + return st.sampled_from(dtypes) + + +def _floating_dtypes( + xp: Any, *, sizes: FltSize | Sequence[FltSize] = (32, 64) +) -> st.SearchStrategy[DataType]: + """Return a strategy for real-valued floating-point dtype objects. + + ``sizes`` contains the floating-point sizes in bits, defaulting to + ``(32, 64)`` which covers all valid sizes. + """ + if isinstance(sizes, int): + sizes = (sizes,) + check_valid_sizes("int", sizes, (32, 64)) + dtypes, stubs = partition_attributes_and_stubs( + xp, numeric_dtype_names("float", sizes) + ) + check_dtypes(xp, dtypes, stubs) + return st.sampled_from(dtypes) + + +def _complex_dtypes( + xp: Any, *, sizes: CpxSize | Sequence[CpxSize] = (64, 128) +) -> st.SearchStrategy[DataType]: + """Return a strategy for complex dtype objects. + + ``sizes`` contains the complex sizes in bits, defaulting to ``(64, 128)`` + which covers all valid sizes. + """ + if isinstance(sizes, int): + sizes = (sizes,) + check_valid_sizes("complex", sizes, (64, 128)) + dtypes, stubs = partition_attributes_and_stubs( + xp, numeric_dtype_names("complex", sizes) + ) + check_dtypes(xp, dtypes, stubs) + return st.sampled_from(dtypes) + + +@proxies(_valid_tuple_axes) +def valid_tuple_axes(*args, **kwargs): + return _valid_tuple_axes(*args, **kwargs) + + +valid_tuple_axes.__doc__ = f""" + Return a strategy for permissible tuple-values for the ``axis`` + argument in Array API sequential methods e.g. ``sum``, given the specified + dimensionality. + + {_valid_tuple_axes.__doc__} + """ + + +@defines_strategy() +def mutually_broadcastable_shapes( + num_shapes: int, + *, + base_shape: Shape = (), + min_dims: int = 0, + max_dims: int | None = None, + min_side: int = 1, + max_side: int | None = None, +) -> st.SearchStrategy[BroadcastableShapes]: + return _mutually_broadcastable_shapes( + num_shapes=num_shapes, + base_shape=base_shape, + min_dims=min_dims, + max_dims=max_dims, + min_side=min_side, + max_side=max_side, + ) + + +mutually_broadcastable_shapes.__doc__ = _mutually_broadcastable_shapes.__doc__ + + +@defines_strategy() +def indices( + shape: Shape, + *, + min_dims: int = 0, + max_dims: int | None = None, + allow_newaxis: bool = False, + allow_ellipsis: bool = True, +) -> st.SearchStrategy[BasicIndex]: + """Return a strategy for :xp-ref:`valid indices ` of + arrays with the specified shape, which may include dimensions of size zero. + + It generates tuples containing some mix of integers, :obj:`python:slice` + objects, ``...`` (an ``Ellipsis``), and ``None``. When a length-one tuple + would be generated, this strategy may instead return the element which will + index the first axis, e.g. ``5`` instead of ``(5,)``. + + * ``shape`` is the shape of the array that will be indexed, as a tuple of + integers >= 0. This must be at least two-dimensional for a tuple to be a + valid index; for one-dimensional arrays use + :func:`~hypothesis.strategies.slices` instead. + * ``min_dims`` is the minimum dimensionality of the resulting array from use + of the generated index. + * ``max_dims`` is the the maximum dimensionality of the resulting array, + defaulting to ``len(shape) if not allow_newaxis else + max(len(shape), min_dims) + 2``. + * ``allow_ellipsis`` specifies whether ``None`` is allowed in the index. + * ``allow_ellipsis`` specifies whether ``...`` is allowed in the index. + """ + check_type(tuple, shape, "shape") + check_argument( + all(isinstance(x, int) and x >= 0 for x in shape), + f"{shape=}, but all dimensions must be non-negative integers.", + ) + check_type(bool, allow_newaxis, "allow_newaxis") + check_type(bool, allow_ellipsis, "allow_ellipsis") + check_type(int, min_dims, "min_dims") + if not allow_newaxis: + check_argument( + min_dims <= len(shape), + f"min_dims={min_dims} is larger than len(shape)={len(shape)}, " + "but it is impossible for an indexing operation to add dimensions ", + "when allow_newaxis=False.", + ) + check_valid_dims(min_dims, "min_dims") + + if max_dims is None: + if allow_newaxis: + max_dims = min(max(len(shape), min_dims) + 2, NDIM_MAX) + else: + max_dims = min(len(shape), NDIM_MAX) + check_type(int, max_dims, "max_dims") + assert isinstance(max_dims, int) + if not allow_newaxis: + check_argument( + max_dims <= len(shape), + f"max_dims={max_dims} is larger than len(shape)={len(shape)}, " + "but it is impossible for an indexing operation to add dimensions ", + "when allow_newaxis=False.", + ) + check_valid_dims(max_dims, "max_dims") + + order_check("dims", 0, min_dims, max_dims) + + return BasicIndexStrategy( + shape, + min_dims=min_dims, + max_dims=max_dims, + allow_ellipsis=allow_ellipsis, + allow_newaxis=allow_newaxis, + allow_fewer_indices_than_dims=False, + ) + + +# Cache for make_strategies_namespace() +_args_to_xps: WeakValueDictionary = WeakValueDictionary() + + +def make_strategies_namespace( + xp: Any, *, api_version: NominalVersion | None = None +) -> SimpleNamespace: + """Creates a strategies namespace for the given array module. + + * ``xp`` is the Array API library to automatically pass to the namespaced methods. + * ``api_version`` is the version of the Array API which the returned + strategies namespace should conform to. If ``None``, the latest API + version which ``xp`` supports will be inferred from ``xp.__array_api_version__``. + If a version string in the ``YYYY.MM`` format, the strategies namespace + will conform to that version if supported. + + A :obj:`python:types.SimpleNamespace` is returned which contains all the + strategy methods in this module but without requiring the ``xp`` argument. + Creating and using a strategies namespace for NumPy's Array API + implementation would go like this: + + .. code-block:: pycon + + >>> xp.__array_api_version__ # xp is your desired array library + '2021.12' + >>> xps = make_strategies_namespace(xp) + >>> xps.api_version + '2021.12' + >>> x = xps.arrays(xp.int8, (2, 3)).example() + >>> x + Array([[-8, 6, 3], + [-6, 4, 6]], dtype=int8) + >>> x.__array_namespace__() is xp + True + + """ + not_available_msg = ( + "If the standard version you want is not available, please ensure " + "you're using the latest version of Hypothesis, then open an issue if " + "one doesn't already exist." + ) + if api_version is None: + check_argument( + hasattr(xp, "__array_api_version__"), + f"Array module {xp.__name__} has no attribute __array_api_version__, " + "which is required when inferring api_version. If you believe " + f"{xp.__name__} is indeed an Array API module, try explicitly " + "passing an api_version.", + ) + check_argument( + isinstance(xp.__array_api_version__, str) + and xp.__array_api_version__ in RELEASED_VERSIONS, + f"{xp.__array_api_version__=}, but it must " + f"be a valid version string {RELEASED_VERSIONS}. {not_available_msg}", + ) + api_version = xp.__array_api_version__ + inferred_version = True + else: + check_argument( + isinstance(api_version, str) and api_version in NOMINAL_VERSIONS, + f"{api_version=}, but it must be None, or a valid version " + f"string in {RELEASED_VERSIONS}. {not_available_msg}", + ) + inferred_version = False + try: + array = xp.zeros(1) + array.__array_namespace__() + except Exception: + warn( + f"Could not determine whether module {xp.__name__} is an Array API library", + HypothesisWarning, + stacklevel=2, + ) + + try: + namespace = _args_to_xps[(xp, api_version)] + except (KeyError, TypeError): + pass + else: + return namespace + + @defines_strategy(force_reusable_values=True) + def from_dtype( + dtype: DataType | str, + *, + min_value: int | float | None = None, + max_value: int | float | None = None, + allow_nan: bool | None = None, + allow_infinity: bool | None = None, + allow_subnormal: bool | None = None, + exclude_min: bool | None = None, + exclude_max: bool | None = None, + ) -> st.SearchStrategy[bool | int | float | complex]: + return _from_dtype( + xp, + api_version, + dtype, + min_value=min_value, + max_value=max_value, + allow_nan=allow_nan, + allow_infinity=allow_infinity, + allow_subnormal=allow_subnormal, + exclude_min=exclude_min, + exclude_max=exclude_max, + ) + + @defines_strategy(force_reusable_values=True) + def arrays( + dtype: DataType | str | st.SearchStrategy[DataType] | st.SearchStrategy[str], + shape: int | Shape | st.SearchStrategy[Shape], + *, + elements: Mapping[str, Any] | st.SearchStrategy | None = None, + fill: st.SearchStrategy[Any] | None = None, + unique: bool = False, + ) -> st.SearchStrategy: + return _arrays( + xp, + api_version, + dtype, + shape, + elements=elements, + fill=fill, + unique=unique, + ) + + @defines_strategy() + def scalar_dtypes() -> st.SearchStrategy[DataType]: + return _scalar_dtypes(xp, api_version) + + @defines_strategy() + def boolean_dtypes() -> st.SearchStrategy[DataType]: + return _boolean_dtypes(xp) + + @defines_strategy() + def real_dtypes() -> st.SearchStrategy[DataType]: + return _real_dtypes(xp) + + @defines_strategy() + def numeric_dtypes() -> st.SearchStrategy[DataType]: + return _numeric_dtypes(xp, api_version) + + @defines_strategy() + def integer_dtypes( + *, sizes: IntSize | Sequence[IntSize] = (8, 16, 32, 64) + ) -> st.SearchStrategy[DataType]: + return _integer_dtypes(xp, sizes=sizes) + + @defines_strategy() + def unsigned_integer_dtypes( + *, sizes: IntSize | Sequence[IntSize] = (8, 16, 32, 64) + ) -> st.SearchStrategy[DataType]: + return _unsigned_integer_dtypes(xp, sizes=sizes) + + @defines_strategy() + def floating_dtypes( + *, sizes: FltSize | Sequence[FltSize] = (32, 64) + ) -> st.SearchStrategy[DataType]: + return _floating_dtypes(xp, sizes=sizes) + + from_dtype.__doc__ = _from_dtype.__doc__ + arrays.__doc__ = _arrays.__doc__ + scalar_dtypes.__doc__ = _scalar_dtypes.__doc__ + boolean_dtypes.__doc__ = _boolean_dtypes.__doc__ + real_dtypes.__doc__ = _real_dtypes.__doc__ + numeric_dtypes.__doc__ = _numeric_dtypes.__doc__ + integer_dtypes.__doc__ = _integer_dtypes.__doc__ + unsigned_integer_dtypes.__doc__ = _unsigned_integer_dtypes.__doc__ + floating_dtypes.__doc__ = _floating_dtypes.__doc__ + + class StrategiesNamespace(SimpleNamespace): + def __init__(self, **kwargs): + for attr in ["name", "api_version"]: + if attr not in kwargs: + raise ValueError(f"'{attr}' kwarg required") + super().__init__(**kwargs) + + @property + def complex_dtypes(self): + try: + return self.__dict__["complex_dtypes"] + except KeyError as e: + raise AttributeError( + "You attempted to access 'complex_dtypes', but it is not " + f"available for api_version='{self.api_version}' of " + f"xp={self.name}." + ) from e + + def __repr__(self): + f_args = self.name + if not inferred_version: + f_args += f", api_version='{self.api_version}'" + return f"make_strategies_namespace({f_args})" + + kwargs = { + "name": xp.__name__, + "api_version": api_version, + "from_dtype": from_dtype, + "arrays": arrays, + "array_shapes": array_shapes, + "scalar_dtypes": scalar_dtypes, + "boolean_dtypes": boolean_dtypes, + "real_dtypes": real_dtypes, + "numeric_dtypes": numeric_dtypes, + "integer_dtypes": integer_dtypes, + "unsigned_integer_dtypes": unsigned_integer_dtypes, + "floating_dtypes": floating_dtypes, + "valid_tuple_axes": valid_tuple_axes, + "broadcastable_shapes": broadcastable_shapes, + "mutually_broadcastable_shapes": mutually_broadcastable_shapes, + "indices": indices, + } + + if api_version > "2021.12": + + @defines_strategy() + def complex_dtypes( + *, sizes: CpxSize | Sequence[CpxSize] = (64, 128) + ) -> st.SearchStrategy[DataType]: + return _complex_dtypes(xp, sizes=sizes) + + complex_dtypes.__doc__ = _complex_dtypes.__doc__ + kwargs["complex_dtypes"] = complex_dtypes + + namespace = StrategiesNamespace(**kwargs) + try: + _args_to_xps[(xp, api_version)] = namespace + except TypeError: + pass + + return namespace + + +try: + import numpy as np +except ImportError: + if "sphinx" in sys.modules: + # This is pretty awkward, but also the best way available + from unittest.mock import Mock + + np = Mock() + else: + np = None # type: ignore[assignment] +if np is not None: + + class FloatInfo(NamedTuple): + bits: int + eps: float + max: float + min: float + smallest_normal: float + + def mock_finfo(dtype: DataType) -> FloatInfo: + """Returns a finfo object compliant with the Array API + + Ensures all attributes are Python scalars and not NumPy scalars. This + lets us ignore corner cases with how NumPy scalars operate, such as + NumPy floats breaking our next_down() util. + + Also ensures the finfo obj has the smallest_normal attribute. NumPy only + introduced it in v1.21.1, so we just use the equivalent tiny attribute + to keep mocking with older versions working. + """ + _finfo = np.finfo(dtype) # type: ignore[call-overload] + return FloatInfo( + int(_finfo.bits), + float(_finfo.eps), + float(_finfo.max), + float(_finfo.min), + float(_finfo.tiny), + ) + + mock_xp = SimpleNamespace( + __name__="mock", + __array_api_version__="2022.12", + # Data types + int8=np.int8, + int16=np.int16, + int32=np.int32, + int64=np.int64, + uint8=np.uint8, + uint16=np.uint16, + uint32=np.uint32, + uint64=np.uint64, + float32=np.float32, + float64=np.float64, + complex64=np.complex64, + complex128=np.complex128, + bool=np.bool_, + # Constants + nan=np.nan, + # Data type functions + astype=lambda x, d: x.astype(d), + iinfo=np.iinfo, + finfo=mock_finfo, + broadcast_arrays=np.broadcast_arrays, + # Creation functions + arange=np.arange, + asarray=np.asarray, + empty=np.empty, + zeros=np.zeros, + ones=np.ones, + # Manipulation functions + reshape=np.reshape, + # Element-wise functions + isnan=np.isnan, + isfinite=np.isfinite, + logical_or=np.logical_or, + # Statistical functions + sum=np.sum, + # Searching functions + nonzero=np.nonzero, + # Sorting functions + sort=np.sort, + # Set functions + unique_values=np.unique, + # Utility functions + any=np.any, + all=np.all, + ) diff --git a/vendored/hypothesis/extra/cli.py b/vendored/hypothesis/extra/cli.py new file mode 100644 index 0000000..20c7a79 --- /dev/null +++ b/vendored/hypothesis/extra/cli.py @@ -0,0 +1,339 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +:: + + $ hypothesis --help + Usage: hypothesis [OPTIONS] COMMAND [ARGS]... + + Options: + --version Show the version and exit. + -h, --help Show this message and exit. + + Commands: + codemod `hypothesis codemod` refactors deprecated or inefficient code. + fuzz [hypofuzz] runs tests with an adaptive coverage-guided fuzzer. + write `hypothesis write` writes property-based tests for you! + +This module requires the :pypi:`click` package, and provides Hypothesis' command-line +interface, for e.g. :ref:`'ghostwriting' tests ` via the terminal. +It's also where `HypoFuzz `__ adds the :command:`hypothesis fuzz` +command (`learn more about that here `__). +""" + +import builtins +import importlib +import inspect +import sys +import types +from difflib import get_close_matches +from functools import partial +from multiprocessing import Pool +from pathlib import Path + +try: + import pytest +except ImportError: + pytest = None # type: ignore + +MESSAGE = """ +The Hypothesis command-line interface requires the `{}` package, +which you do not have installed. Run: + + python -m pip install --upgrade 'hypothesis[cli]' + +and try again. +""" + +try: + import click +except ImportError: + + def main(): + """If `click` is not installed, tell the user to install it then exit.""" + sys.stderr.write(MESSAGE.format("click")) + sys.exit(1) + +else: + # Ensure that Python scripts in the current working directory are importable, + # on the principle that Ghostwriter should 'just work' for novice users. Note + # that we append rather than prepend to the module search path, so this will + # never shadow the stdlib or installed packages. + sys.path.append(".") + + @click.group(context_settings={"help_option_names": ("-h", "--help")}) + @click.version_option() + def main(): + pass + + def obj_name(s: str) -> object: + """This "type" imports whatever object is named by a dotted string.""" + s = s.strip() + if "/" in s or "\\" in s: + raise click.UsageError( + "Remember that the ghostwriter should be passed the name of a module, not a path." + ) from None + try: + return importlib.import_module(s) + except ImportError: + pass + classname = None + if "." not in s: + modulename, module, funcname = "builtins", builtins, s + else: + modulename, funcname = s.rsplit(".", 1) + try: + module = importlib.import_module(modulename) + except ImportError as err: + try: + modulename, classname = modulename.rsplit(".", 1) + module = importlib.import_module(modulename) + except (ImportError, ValueError): + if s.endswith(".py"): + raise click.UsageError( + "Remember that the ghostwriter should be passed the name of a module, not a file." + ) from None + raise click.UsageError( + f"Failed to import the {modulename} module for introspection. " + "Check spelling and your Python import path, or use the Python API?" + ) from err + + def describe_close_matches( + module_or_class: types.ModuleType, objname: str + ) -> str: + public_names = [ + name for name in vars(module_or_class) if not name.startswith("_") + ] + matches = get_close_matches(objname, public_names) + if matches: + return f" Closest matches: {matches!r}" + else: + return "" + + if classname is None: + try: + return getattr(module, funcname) + except AttributeError as err: + if funcname == "py": + # Likely attempted to pass a local file (Eg., "myscript.py") instead of a module name + raise click.UsageError( + "Remember that the ghostwriter should be passed the name of a module, not a file." + f"\n\tTry: hypothesis write {s[:-3]}" + ) from None + raise click.UsageError( + f"Found the {modulename!r} module, but it doesn't have a " + f"{funcname!r} attribute." + + describe_close_matches(module, funcname) + ) from err + else: + try: + func_class = getattr(module, classname) + except AttributeError as err: + raise click.UsageError( + f"Found the {modulename!r} module, but it doesn't have a " + f"{classname!r} class." + describe_close_matches(module, classname) + ) from err + try: + return getattr(func_class, funcname) + except AttributeError as err: + if inspect.isclass(func_class): + func_class_is = "class" + else: + func_class_is = "attribute" + raise click.UsageError( + f"Found the {modulename!r} module and {classname!r} {func_class_is}, " + f"but it doesn't have a {funcname!r} attribute." + + describe_close_matches(func_class, funcname) + ) from err + + def _refactor(func, fname): + try: + oldcode = Path(fname).read_text(encoding="utf-8") + except (OSError, UnicodeError) as err: + # Permissions or encoding issue, or file deleted, etc. + return f"skipping {fname!r} due to {err}" + + if "hypothesis" not in oldcode: + return # This is a fast way to avoid running slow no-op codemods + + try: + newcode = func(oldcode) + except Exception as err: + from libcst import ParserSyntaxError + + if isinstance(err, ParserSyntaxError): + from hypothesis.extra._patching import indent + + msg = indent(str(err).replace("\n\n", "\n"), " ").strip() + return f"skipping {fname!r} due to {msg}" + raise + + if newcode != oldcode: + Path(fname).write_text(newcode, encoding="utf-8") + + @main.command() # type: ignore # Click adds the .command attribute + @click.argument("path", type=str, required=True, nargs=-1) + def codemod(path): + """`hypothesis codemod` refactors deprecated or inefficient code. + + It adapts `python -m libcst.tool`, removing many features and config options + which are rarely relevant for this purpose. If you need more control, we + encourage you to use the libcst CLI directly; if not this one is easier. + + PATH is the file(s) or directories of files to format in place, or + "-" to read from stdin and write to stdout. + """ + try: + from libcst.codemod import gather_files + + from hypothesis.extra import codemods + except ImportError: + sys.stderr.write( + "You are missing required dependencies for this option. Run:\n\n" + " python -m pip install --upgrade hypothesis[codemods]\n\n" + "and try again." + ) + sys.exit(1) + + # Special case for stdin/stdout usage + if "-" in path: + if len(path) > 1: + raise Exception( + "Cannot specify multiple paths when reading from stdin!" + ) + print("Codemodding from stdin", file=sys.stderr) + print(codemods.refactor(sys.stdin.read())) + return 0 + + # Find all the files to refactor, and then codemod them + files = gather_files(path) + errors = set() + if len(files) <= 1: + errors.add(_refactor(codemods.refactor, *files)) + else: + with Pool() as pool: + for msg in pool.imap_unordered( + partial(_refactor, codemods.refactor), files + ): + errors.add(msg) + errors.discard(None) + for msg in errors: + print(msg, file=sys.stderr) + return 1 if errors else 0 + + @main.command() # type: ignore # Click adds the .command attribute + @click.argument("func", type=obj_name, required=True, nargs=-1) + @click.option( + "--roundtrip", + "writer", + flag_value="roundtrip", + help="start by testing write/read or encode/decode!", + ) + @click.option( + "--equivalent", + "writer", + flag_value="equivalent", + help="very useful when optimising or refactoring code", + ) + @click.option( + "--errors-equivalent", + "writer", + flag_value="errors-equivalent", + help="--equivalent, but also allows consistent errors", + ) + @click.option( + "--idempotent", + "writer", + flag_value="idempotent", + help="check that f(x) == f(f(x))", + ) + @click.option( + "--binary-op", + "writer", + flag_value="binary_operation", + help="associativity, commutativity, identity element", + ) + # Note: we deliberately omit a --ufunc flag, because the magic() + # detection of ufuncs is both precise and complete. + @click.option( + "--style", + type=click.Choice(["pytest", "unittest"]), + default="pytest" if pytest else "unittest", + help="pytest-style function, or unittest-style method?", + ) + @click.option( + "-e", + "--except", + "except_", + type=obj_name, + multiple=True, + help="dotted name of exception(s) to ignore", + ) + @click.option( + "--annotate/--no-annotate", + default=None, + help="force ghostwritten tests to be type-annotated (or not). " + "By default, match the code to test.", + ) + def write(func, writer, except_, style, annotate): # \b disables autowrap + """`hypothesis write` writes property-based tests for you! + + Type annotations are helpful but not required for our advanced introspection + and templating logic. Try running the examples below to see how it works: + + \b + hypothesis write gzip + hypothesis write numpy.matmul + hypothesis write pandas.from_dummies + hypothesis write re.compile --except re.error + hypothesis write --equivalent ast.literal_eval eval + hypothesis write --roundtrip json.dumps json.loads + hypothesis write --style=unittest --idempotent sorted + hypothesis write --binary-op operator.add + """ + # NOTE: if you want to call this function from Python, look instead at the + # ``hypothesis.extra.ghostwriter`` module. Click-decorated functions have + # a different calling convention, and raise SystemExit instead of returning. + kwargs = {"except_": except_ or (), "style": style, "annotate": annotate} + if writer is None: + writer = "magic" + elif writer == "idempotent" and len(func) > 1: + raise click.UsageError("Test functions for idempotence one at a time.") + elif writer == "roundtrip" and len(func) == 1: + writer = "idempotent" + elif "equivalent" in writer and len(func) == 1: + writer = "fuzz" + if writer == "errors-equivalent": + writer = "equivalent" + kwargs["allow_same_errors"] = True + + try: + from hypothesis.extra import ghostwriter + except ImportError: + sys.stderr.write(MESSAGE.format("black")) + sys.exit(1) + + code = getattr(ghostwriter, writer)(*func, **kwargs) + try: + from rich.console import Console + from rich.syntax import Syntax + + from hypothesis.utils.terminal import guess_background_color + except ImportError: + print(code) + else: + try: + theme = "default" if guess_background_color() == "light" else "monokai" + code = Syntax(code, "python", background_color="default", theme=theme) + Console().print(code, soft_wrap=True) + except Exception: + print("# Error while syntax-highlighting code", file=sys.stderr) + print(code) diff --git a/vendored/hypothesis/extra/codemods.py b/vendored/hypothesis/extra/codemods.py new file mode 100644 index 0000000..d2e6711 --- /dev/null +++ b/vendored/hypothesis/extra/codemods.py @@ -0,0 +1,280 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +This module provides codemods based on the :pypi:`LibCST` library, which can +both detect *and automatically fix* issues with code that uses Hypothesis, +including upgrading from deprecated features to our recommended style. + +You can run the codemods via our CLI:: + + $ hypothesis codemod --help + Usage: hypothesis codemod [OPTIONS] PATH... + + `hypothesis codemod` refactors deprecated or inefficient code. + + It adapts `python -m libcst.tool`, removing many features and config + options which are rarely relevant for this purpose. If you need more + control, we encourage you to use the libcst CLI directly; if not this one + is easier. + + PATH is the file(s) or directories of files to format in place, or "-" to + read from stdin and write to stdout. + + Options: + -h, --help Show this message and exit. + +Alternatively you can use ``python -m libcst.tool``, which offers more control +at the cost of additional configuration (adding ``'hypothesis.extra'`` to the +``modules`` list in ``.libcst.codemod.yaml``) and `some issues on Windows +`__. + +.. autofunction:: refactor +""" + +import functools +import importlib +from inspect import Parameter, signature +from typing import ClassVar + +import libcst as cst +import libcst.matchers as m +from libcst.codemod import VisitorBasedCodemodCommand + + +def refactor(code: str) -> str: + """Update a source code string from deprecated to modern Hypothesis APIs. + + This may not fix *all* the deprecation warnings in your code, but we're + confident that it will be easier than doing it all by hand. + + We recommend using the CLI, but if you want a Python function here it is. + """ + context = cst.codemod.CodemodContext() + mod = cst.parse_module(code) + transforms: list[VisitorBasedCodemodCommand] = [ + HypothesisFixPositionalKeywonlyArgs(context), + HypothesisFixComplexMinMagnitude(context), + HypothesisFixHealthCheckAll(context), + HypothesisFixCharactersArguments(context), + ] + for transform in transforms: + mod = transform.transform_module(mod) + return mod.code + + +def match_qualname(name): + # We use the metadata to get qualname instead of matching directly on function + # name, because this handles some scope and "from x import y as z" issues. + return m.MatchMetadataIfTrue( + cst.metadata.QualifiedNameProvider, + # If there are multiple possible qualnames, e.g. due to conditional imports, + # be conservative. Better to leave the user to fix a few things by hand than + # to break their code while attempting to refactor it! + lambda qualnames: all(n.name == name for n in qualnames), + ) + + +class HypothesisFixComplexMinMagnitude(VisitorBasedCodemodCommand): + """Fix a deprecated min_magnitude=None argument for complex numbers:: + + st.complex_numbers(min_magnitude=None) -> st.complex_numbers(min_magnitude=0) + + Note that this should be run *after* ``HypothesisFixPositionalKeywonlyArgs``, + in order to handle ``st.complex_numbers(None)``. + """ + + DESCRIPTION = "Fix a deprecated min_magnitude=None argument for complex numbers." + METADATA_DEPENDENCIES = (cst.metadata.QualifiedNameProvider,) + + @m.call_if_inside( + m.Call(metadata=match_qualname("hypothesis.strategies.complex_numbers")) + ) + def leave_Arg(self, original_node, updated_node): + if m.matches( + updated_node, m.Arg(keyword=m.Name("min_magnitude"), value=m.Name("None")) + ): + return updated_node.with_changes(value=cst.Integer("0")) + return updated_node + + +@functools.lru_cache +def get_fn(import_path): + mod, fn = import_path.rsplit(".", 1) + return getattr(importlib.import_module(mod), fn) + + +class HypothesisFixPositionalKeywonlyArgs(VisitorBasedCodemodCommand): + """Fix positional arguments for newly keyword-only parameters, e.g.:: + + st.fractions(0, 1, 9) -> st.fractions(0, 1, max_denominator=9) + + Applies to a majority of our public API, since keyword-only parameters are + great but we couldn't use them until after we dropped support for Python 2. + """ + + DESCRIPTION = "Fix positional arguments for newly keyword-only parameters." + METADATA_DEPENDENCIES = (cst.metadata.QualifiedNameProvider,) + + kwonly_functions = ( + "hypothesis.target", + "hypothesis.find", + "hypothesis.extra.lark.from_lark", + "hypothesis.extra.numpy.arrays", + "hypothesis.extra.numpy.array_shapes", + "hypothesis.extra.numpy.unsigned_integer_dtypes", + "hypothesis.extra.numpy.integer_dtypes", + "hypothesis.extra.numpy.floating_dtypes", + "hypothesis.extra.numpy.complex_number_dtypes", + "hypothesis.extra.numpy.datetime64_dtypes", + "hypothesis.extra.numpy.timedelta64_dtypes", + "hypothesis.extra.numpy.byte_string_dtypes", + "hypothesis.extra.numpy.unicode_string_dtypes", + "hypothesis.extra.numpy.array_dtypes", + "hypothesis.extra.numpy.nested_dtypes", + "hypothesis.extra.numpy.valid_tuple_axes", + "hypothesis.extra.numpy.broadcastable_shapes", + "hypothesis.extra.pandas.indexes", + "hypothesis.extra.pandas.series", + "hypothesis.extra.pandas.columns", + "hypothesis.extra.pandas.data_frames", + "hypothesis.provisional.domains", + "hypothesis.stateful.run_state_machine_as_test", + "hypothesis.stateful.rule", + "hypothesis.stateful.initialize", + "hypothesis.strategies.floats", + "hypothesis.strategies.lists", + "hypothesis.strategies.sets", + "hypothesis.strategies.frozensets", + "hypothesis.strategies.iterables", + "hypothesis.strategies.dictionaries", + "hypothesis.strategies.characters", + "hypothesis.strategies.text", + "hypothesis.strategies.from_regex", + "hypothesis.strategies.binary", + "hypothesis.strategies.fractions", + "hypothesis.strategies.decimals", + "hypothesis.strategies.recursive", + "hypothesis.strategies.complex_numbers", + "hypothesis.strategies.shared", + "hypothesis.strategies.uuids", + "hypothesis.strategies.runner", + "hypothesis.strategies.functions", + "hypothesis.strategies.datetimes", + "hypothesis.strategies.times", + ) + + def leave_Call(self, original_node, updated_node): + """Convert positional to keyword arguments.""" + metadata = self.get_metadata(cst.metadata.QualifiedNameProvider, original_node) + qualnames = {qn.name for qn in metadata} + + # If this isn't one of our known functions, or it has no posargs, stop there. + if ( + len(qualnames) != 1 + or not qualnames.intersection(self.kwonly_functions) + or not m.matches( + updated_node, + m.Call( + func=m.DoesNotMatch(m.Call()), + args=[m.Arg(keyword=None), m.ZeroOrMore()], + ), + ) + ): + return updated_node + + # Get the actual function object so that we can inspect the signature. + # This does e.g. incur a dependency on Numpy to fix Numpy-dependent code, + # but having a single source of truth about the signatures is worth it. + try: + params = signature(get_fn(*qualnames)).parameters.values() + except ModuleNotFoundError: + return updated_node + + # st.floats() has a new allow_subnormal kwonly argument not at the end, + # so we do a bit more of a dance here. + if qualnames == {"hypothesis.strategies.floats"}: + params = [p for p in params if p.name != "allow_subnormal"] + + if len(updated_node.args) > len(params): + return updated_node + + # Create new arg nodes with the newly required keywords + assign_nospace = cst.AssignEqual( + whitespace_before=cst.SimpleWhitespace(""), + whitespace_after=cst.SimpleWhitespace(""), + ) + newargs = [ + ( + arg + if arg.keyword or arg.star or p.kind is not Parameter.KEYWORD_ONLY + else arg.with_changes(keyword=cst.Name(p.name), equal=assign_nospace) + ) + for p, arg in zip(params, updated_node.args, strict=False) + ] + return updated_node.with_changes(args=newargs) + + +class HypothesisFixHealthCheckAll(VisitorBasedCodemodCommand): + """Replace HealthCheck.all() with list(HealthCheck)""" + + DESCRIPTION = "Replace HealthCheck.all() with list(HealthCheck)" + + @m.leave(m.Call(func=m.Attribute(m.Name("HealthCheck"), m.Name("all")), args=[])) + def replace_healthcheck(self, original_node, updated_node): + return updated_node.with_changes( + func=cst.Name("list"), + args=[cst.Arg(value=cst.Name("HealthCheck"))], + ) + + +class HypothesisFixCharactersArguments(VisitorBasedCodemodCommand): + """Fix deprecated white/blacklist arguments to characters:: + + st.characters(whitelist_categories=...) -> st.characters(categories=...) + st.characters(blacklist_categories=...) -> st.characters(exclude_categories=...) + st.characters(whitelist_characters=...) -> st.characters(include_characters=...) + st.characters(blacklist_characters=...) -> st.characters(exclude_characters=...) + + Additionally, we drop `exclude_categories=` if `categories=` is present, + because this argument is always redundant (or an error). + """ + + DESCRIPTION = "Fix deprecated white/blacklist arguments to characters." + METADATA_DEPENDENCIES = (cst.metadata.QualifiedNameProvider,) + + _replacements: ClassVar = { + "whitelist_categories": "categories", + "blacklist_categories": "exclude_categories", + "whitelist_characters": "include_characters", + "blacklist_characters": "exclude_characters", + } + + @m.leave( + m.Call( + metadata=match_qualname("hypothesis.strategies.characters"), + args=[ + m.ZeroOrMore(), + m.Arg(keyword=m.OneOf(*map(m.Name, _replacements))), + m.ZeroOrMore(), + ], + ), + ) + def fn(self, original_node, updated_node): + # Update to the new names + newargs = [] + for arg in updated_node.args: + kw = self._replacements.get(arg.keyword.value, arg.keyword.value) + newargs.append(arg.with_changes(keyword=cst.Name(kw))) + # Drop redundant exclude_categories, which is now an error + if any(m.matches(arg, m.Arg(keyword=m.Name("categories"))) for arg in newargs): + ex = m.Arg(keyword=m.Name("exclude_categories")) + newargs = [a for a in newargs if m.matches(a, ~ex)] + return updated_node.with_changes(args=newargs) diff --git a/vendored/hypothesis/extra/dateutil.py b/vendored/hypothesis/extra/dateutil.py new file mode 100644 index 0000000..a41f7f4 --- /dev/null +++ b/vendored/hypothesis/extra/dateutil.py @@ -0,0 +1,64 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +This module provides :pypi:`dateutil ` timezones. + +You can use this strategy to make :func:`~hypothesis.strategies.datetimes` +and :func:`~hypothesis.strategies.times` produce timezone-aware values. + +.. tip:: + Consider using the stdlib :mod:`zoneinfo` module, via + :func:`st.timezones() `. +""" + +import datetime as dt + +from dateutil import tz, zoneinfo # type: ignore + +from hypothesis import strategies as st +from hypothesis.strategies._internal.utils import cacheable, defines_strategy + +__all__ = ["timezones"] + + +def __zone_sort_key(zone): + """Sort by absolute UTC offset at reference date, + positive first, with ties broken by name. + """ + assert zone is not None + offset = zone.utcoffset(dt.datetime(2000, 1, 1)) + offset = 999 if offset is None else offset + return (abs(offset), -offset, str(zone)) + + +@cacheable +@defines_strategy() +def timezones() -> st.SearchStrategy[dt.tzinfo]: + """Any timezone from :pypi:`dateutil `. + + This strategy minimises to UTC, or the timezone with the smallest offset + from UTC as of 2000-01-01, and is designed for use with + :py:func:`~hypothesis.strategies.datetimes`. + + Note that the timezones generated by the strategy may vary depending on the + configuration of your machine. See the dateutil documentation for more + information. + """ + all_timezones = sorted( + (tz.gettz(t) for t in zoneinfo.get_zonefile_instance().zones), + key=__zone_sort_key, + ) + all_timezones.insert(0, tz.UTC) + # We discard Nones in the list comprehension because Mypy knows that + # tz.gettz may return None. However this should never happen for known + # zone names, so we assert that it's impossible first. + assert None not in all_timezones + return st.sampled_from([z for z in all_timezones if z is not None]) diff --git a/vendored/hypothesis/extra/django/__init__.py b/vendored/hypothesis/extra/django/__init__.py new file mode 100644 index 0000000..465eaf3 --- /dev/null +++ b/vendored/hypothesis/extra/django/__init__.py @@ -0,0 +1,32 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.extra.django._fields import from_field, register_field_strategy +from hypothesis.extra.django._impl import ( + LiveServerTestCase, + SimpleTestCase, + StaticLiveServerTestCase, + TestCase, + TransactionTestCase, + from_form, + from_model, +) + +__all__ = [ + "LiveServerTestCase", + "SimpleTestCase", + "StaticLiveServerTestCase", + "TestCase", + "TransactionTestCase", + "from_field", + "from_form", + "from_model", + "register_field_strategy", +] diff --git a/vendored/hypothesis/extra/django/_fields.py b/vendored/hypothesis/extra/django/_fields.py new file mode 100644 index 0000000..960b52b --- /dev/null +++ b/vendored/hypothesis/extra/django/_fields.py @@ -0,0 +1,417 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import re +import string +from collections.abc import Callable +from datetime import datetime, timedelta +from decimal import Decimal +from functools import lru_cache +from typing import Any, TypeAlias, TypeVar, Union + +import django +from django import forms as df +from django.conf import settings +from django.core.files.base import ContentFile +from django.core.validators import ( + validate_ipv4_address, + validate_ipv6_address, + validate_ipv46_address, +) +from django.db import models as dm + +from hypothesis import strategies as st +from hypothesis.errors import InvalidArgument, ResolutionFailed +from hypothesis.internal.validation import check_type +from hypothesis.provisional import urls +from hypothesis.strategies import emails + +# Use old-style union to avoid hitting +# https://github.com/sphinx-doc/sphinx/issues/11211 +AnyField: TypeAlias = Union[dm.Field, df.Field] # noqa: UP007 +F = TypeVar("F", bound=AnyField) + + +def numeric_bounds_from_validators( + field, min_value=float("-inf"), max_value=float("inf") +): + for v in field.validators: + if isinstance(v, django.core.validators.MinValueValidator): + min_value = max(min_value, v.limit_value) + elif isinstance(v, django.core.validators.MaxValueValidator): + max_value = min(max_value, v.limit_value) + return min_value, max_value + + +def integers_for_field(min_value, max_value): + def inner(field): + return st.integers(*numeric_bounds_from_validators(field, min_value, max_value)) + + return inner + + +@lru_cache +def timezones(): + # From Django 4.0, the default is to use zoneinfo instead of pytz. + assert getattr(django.conf.settings, "USE_TZ", False) + if django.VERSION < (5, 0, 0) and getattr( + django.conf.settings, "USE_DEPRECATED_PYTZ", True + ): + from hypothesis.extra.pytz import timezones + else: + from hypothesis.strategies import timezones + + return timezones() + + +# Mapping of field types, to strategy objects or functions of (type) -> strategy +_FieldLookUpType = dict[ + type[AnyField], + st.SearchStrategy | Callable[[Any], st.SearchStrategy], +] +_global_field_lookup: _FieldLookUpType = { + dm.SmallIntegerField: integers_for_field(-32768, 32767), + dm.IntegerField: integers_for_field(-2147483648, 2147483647), + dm.BigIntegerField: integers_for_field(-9223372036854775808, 9223372036854775807), + dm.PositiveIntegerField: integers_for_field(0, 2147483647), + dm.PositiveSmallIntegerField: integers_for_field(0, 32767), + dm.BooleanField: st.booleans(), + dm.DateField: st.dates(), + dm.EmailField: emails(), + dm.FloatField: st.floats(), + dm.NullBooleanField: st.one_of(st.none(), st.booleans()), + dm.URLField: urls(), + dm.UUIDField: st.uuids(), + df.DateField: st.dates(), + df.DurationField: st.timedeltas(), + df.EmailField: emails(), + df.FloatField: lambda field: st.floats( + *numeric_bounds_from_validators(field), allow_nan=False, allow_infinity=False + ), + df.IntegerField: integers_for_field(-2147483648, 2147483647), + df.NullBooleanField: st.one_of(st.none(), st.booleans()), + df.URLField: urls(), + df.UUIDField: st.uuids(), + df.FileField: st.builds( + ContentFile, st.binary(min_size=1), name=st.text(min_size=1, max_size=100) + ), +} + +_ipv6_strings = st.one_of( + st.ip_addresses(v=6).map(str), + st.ip_addresses(v=6).map(lambda addr: addr.exploded), +) + + +def register_for(field_type): + def inner(func): + _global_field_lookup[field_type] = func + return func + + return inner + + +@register_for(dm.DateTimeField) +@register_for(df.DateTimeField) +def _for_datetime(field): + if getattr(django.conf.settings, "USE_TZ", False): + # avoid https://code.djangoproject.com/ticket/35683 + return st.datetimes( + min_value=datetime.min + timedelta(days=1), + max_value=datetime.max - timedelta(days=1), + timezones=timezones(), + ) + return st.datetimes() + + +def using_sqlite(): + try: + return ( + getattr(django.conf.settings, "DATABASES", {}) + .get("default", {}) + .get("ENGINE", "") + .endswith(".sqlite3") + ) + except django.core.exceptions.ImproperlyConfigured: + return None + + +@register_for(dm.TimeField) +def _for_model_time(field): + # SQLITE supports TZ-aware datetimes, but not TZ-aware times. + if getattr(django.conf.settings, "USE_TZ", False) and not using_sqlite(): + return st.times(timezones=timezones()) + return st.times() + + +@register_for(df.TimeField) +def _for_form_time(field): + if getattr(django.conf.settings, "USE_TZ", False): + return st.times(timezones=timezones()) + return st.times() + + +@register_for(dm.DurationField) +def _for_duration(field): + # SQLite stores timedeltas as six bytes of microseconds + if using_sqlite(): + delta = timedelta(microseconds=2**47 - 1) + return st.timedeltas(-delta, delta) + return st.timedeltas() + + +@register_for(dm.SlugField) +@register_for(df.SlugField) +def _for_slug(field): + min_size = 1 + if getattr(field, "blank", False) or not getattr(field, "required", True): + min_size = 0 + return st.text( + alphabet=string.ascii_letters + string.digits, + min_size=min_size, + max_size=field.max_length, + ) + + +@register_for(dm.GenericIPAddressField) +def _for_model_ip(field): + return { + "ipv4": st.ip_addresses(v=4).map(str), + "ipv6": _ipv6_strings, + "both": st.ip_addresses(v=4).map(str) | _ipv6_strings, + }[field.protocol.lower()] + + +@register_for(df.GenericIPAddressField) +def _for_form_ip(field): + # the IP address form fields have no direct indication of which type + # of address they want, so direct comparison with the validator + # function has to be used instead. Sorry for the potato logic here + if validate_ipv46_address in field.default_validators: + return st.ip_addresses(v=4).map(str) | _ipv6_strings + if validate_ipv4_address in field.default_validators: + return st.ip_addresses(v=4).map(str) + if validate_ipv6_address in field.default_validators: + return _ipv6_strings + raise ResolutionFailed(f"No IP version validator on {field=}") + + +@register_for(dm.DecimalField) +@register_for(df.DecimalField) +def _for_decimal(field): + min_value, max_value = numeric_bounds_from_validators(field) + bound = Decimal(10**field.max_digits - 1) / (10**field.decimal_places) + return st.decimals( + min_value=max(min_value, -bound), + max_value=min(max_value, bound), + places=field.decimal_places, + ) + + +def length_bounds_from_validators(field): + min_size = 1 + max_size = field.max_length + for v in field.validators: + if isinstance(v, django.core.validators.MinLengthValidator): + min_size = max(min_size, v.limit_value) + elif isinstance(v, django.core.validators.MaxLengthValidator): + max_size = min(max_size or v.limit_value, v.limit_value) + return min_size, max_size + + +@register_for(dm.BinaryField) +def _for_binary(field): + min_size, max_size = length_bounds_from_validators(field) + if getattr(field, "blank", False) or not getattr(field, "required", True): + return st.just(b"") | st.binary(min_size=min_size, max_size=max_size) + return st.binary(min_size=min_size, max_size=max_size) + + +@register_for(dm.CharField) +@register_for(dm.TextField) +@register_for(df.CharField) +@register_for(df.RegexField) +def _for_text(field): + # We can infer a vastly more precise strategy by considering the + # validators as well as the field type. This is a minimal proof of + # concept, but we intend to leverage the idea much more heavily soon. + # See https://github.com/HypothesisWorks/hypothesis-python/issues/1116 + regexes = [ + re.compile(v.regex, v.flags) if isinstance(v.regex, str) else v.regex + for v in field.validators + if isinstance(v, django.core.validators.RegexValidator) and not v.inverse_match + ] + if regexes: + # This strategy generates according to one of the regexes, and + # filters using the others. It can therefore learn to generate + # from the most restrictive and filter with permissive patterns. + # Not maximally efficient, but it makes pathological cases rarer. + # If you want a challenge: extend https://qntm.org/greenery to + # compute intersections of the full Python regex language. + return st.one_of(*(st.from_regex(r) for r in regexes)) + # If there are no (usable) regexes, we use a standard text strategy. + min_size, max_size = length_bounds_from_validators(field) + strategy = st.text( + alphabet=st.characters(exclude_characters="\x00", exclude_categories=("Cs",)), + min_size=min_size, + max_size=max_size, + ).filter(lambda s: min_size <= len(s.strip())) + if getattr(field, "blank", False) or not getattr(field, "required", True): + return st.just("") | strategy + return strategy + + +if "django.contrib.auth" in settings.INSTALLED_APPS: + from django.contrib.auth.forms import UsernameField + + register_for(UsernameField)(_for_text) + + +@register_for(df.BooleanField) +def _for_form_boolean(field): + if field.required: + return st.just(True) + return st.booleans() + + +def _model_choice_strategy(field): + def _strategy(): + if field.choices is None: + # The field was instantiated with queryset=None, and not + # subsequently updated. + raise InvalidArgument( + "Cannot create strategy for ModelChoicesField with no choices" + ) + elif hasattr(field, "_choices"): + # The choices property was set manually. + choices = field._choices + else: + # choices is not None, and was not set manually, so we + # must have a QuerySet. + choices = field.queryset + + if not choices.ordered: + raise InvalidArgument( + f"Cannot create strategy for {field.__class__.__name__} with a choices " + "attribute derived from a QuerySet without an explicit ordering - this may " + "cause Hypothesis to produce unstable results between runs." + ) + + return st.sampled_from( + [ + ( + choice.value + if isinstance(choice, df.models.ModelChoiceIteratorValue) + else choice # Empty value, if included. + ) + for choice, _ in field.choices + ] + ) + + # Accessing field.choices causes database access, so defer the strategy. + return st.deferred(_strategy) + + +@register_for(df.ModelChoiceField) +def _for_model_choice(field): + return _model_choice_strategy(field) + + +@register_for(df.ModelMultipleChoiceField) +def _for_model_multiple_choice(field): + min_size = 1 if field.required else 0 + return st.lists(_model_choice_strategy(field), min_size=min_size, unique=True) + + +def register_field_strategy( + field_type: type[AnyField], strategy: st.SearchStrategy +) -> None: + """Add an entry to the global field-to-strategy lookup used by + :func:`~hypothesis.extra.django.from_field`. + + ``field_type`` must be a subtype of :class:`django.db.models.Field` or + :class:`django.forms.Field`, which must not already be registered. + ``strategy`` must be a :class:`~hypothesis.strategies.SearchStrategy`. + """ + if not issubclass(field_type, (dm.Field, df.Field)): + raise InvalidArgument(f"{field_type=} must be a subtype of Field") + check_type(st.SearchStrategy, strategy, "strategy") + if field_type in _global_field_lookup: + raise InvalidArgument( + f"{field_type=} already has a registered " + f"strategy ({_global_field_lookup[field_type]!r})" + ) + if issubclass(field_type, dm.AutoField): + raise InvalidArgument("Cannot register a strategy for an AutoField") + _global_field_lookup[field_type] = strategy + + +def from_field(field: F) -> st.SearchStrategy[F | None]: + """Return a strategy for values that fit the given field. + + This function is used by :func:`~hypothesis.extra.django.from_form` and + :func:`~hypothesis.extra.django.from_model` for any fields that require + a value, or for which you passed ``...`` (:obj:`python:Ellipsis`) to infer + a strategy from an annotation. + + It's pretty similar to the core :func:`~hypothesis.strategies.from_type` + function, with a subtle but important difference: ``from_field`` takes a + Field *instance*, rather than a Field *subtype*, so that it has access to + instance attributes such as string length and validators. + """ + check_type((dm.Field, df.Field), field, "field") + + # The following isinstance check must occur *before* the getattr + # check. In the case of ModelChoicesField, evaluating + # field.choices causes database access, which we want to avoid if + # we don't have a connection (the generated strategies for + # ModelChoicesField defer evaluation of `choices'). + if not isinstance(field, df.ModelChoiceField) and getattr(field, "choices", False): + choices: list = [] + for value, name_or_optgroup in field.choices: + if isinstance(name_or_optgroup, (list, tuple)): + choices.extend(key for key, _ in name_or_optgroup) + else: + choices.append(value) + # form fields automatically include an empty choice, strip it out + if "" in choices: + choices.remove("") + min_size = 1 + if isinstance(field, (dm.CharField, dm.TextField)) and field.blank: + choices.insert(0, "") + elif isinstance(field, (df.Field)) and not field.required: + choices.insert(0, "") + min_size = 0 + strategy = st.sampled_from(choices) + if isinstance(field, (df.MultipleChoiceField, df.TypedMultipleChoiceField)): + strategy = st.lists(st.sampled_from(choices), min_size=min_size) + else: + if type(field) not in _global_field_lookup: + if getattr(field, "null", False): + return st.none() + raise ResolutionFailed(f"Could not infer a strategy for {field!r}") + strategy = _global_field_lookup[type(field)] # type: ignore + if not isinstance(strategy, st.SearchStrategy): + strategy = strategy(field) + assert isinstance(strategy, st.SearchStrategy) + if field.validators: + + def validate(value): + try: + field.run_validators(value) + return True + except django.core.exceptions.ValidationError: + return False + + strategy = strategy.filter(validate) + + if getattr(field, "null", False): + return st.none() | strategy + return strategy diff --git a/vendored/hypothesis/extra/django/_impl.py b/vendored/hypothesis/extra/django/_impl.py new file mode 100644 index 0000000..7023f8a --- /dev/null +++ b/vendored/hypothesis/extra/django/_impl.py @@ -0,0 +1,230 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import unittest +from functools import partial +from types import EllipsisType +from typing import Any, TypeVar + +from django import forms as df, test as dt +from django.contrib.staticfiles import testing as dst +from django.core.exceptions import ValidationError +from django.db import IntegrityError, models as dm + +from hypothesis import reject, strategies as st +from hypothesis.errors import InvalidArgument +from hypothesis.extra.django._fields import from_field +from hypothesis.strategies._internal.utils import defines_strategy + +ModelT = TypeVar("ModelT", bound=dm.Model) + + +class HypothesisTestCase: + def setup_example(self): + self._pre_setup() + + def teardown_example(self, example): + self._post_teardown() + + def __call__(self, result=None): + testMethod = getattr(self, self._testMethodName) + if getattr(testMethod, "is_hypothesis_test", False): + return unittest.TestCase.__call__(self, result) + else: + return dt.SimpleTestCase.__call__(self, result) + + +class SimpleTestCase(HypothesisTestCase, dt.SimpleTestCase): + pass + + +class TestCase(HypothesisTestCase, dt.TestCase): + pass + + +class TransactionTestCase(HypothesisTestCase, dt.TransactionTestCase): + pass + + +class LiveServerTestCase(HypothesisTestCase, dt.LiveServerTestCase): + pass + + +class StaticLiveServerTestCase(HypothesisTestCase, dst.StaticLiveServerTestCase): + pass + + +@defines_strategy() +def from_model( + model: type[ModelT], /, **field_strategies: st.SearchStrategy | EllipsisType +) -> st.SearchStrategy[ModelT]: + """Return a strategy for examples of ``model``. + + .. warning:: + Hypothesis creates saved models. This will run inside your testing + transaction when using the test runner, but if you use the dev console + this will leave debris in your database. + + ``model`` must be an subclass of :class:`~django:django.db.models.Model`. + Strategies for fields may be passed as keyword arguments, for example + ``is_staff=st.just(False)``. In order to support models with fields named + "model", this is a positional-only parameter. + + Hypothesis can often infer a strategy based the field type and validators, + and will attempt to do so for any required fields. No strategy will be + inferred for an :class:`~django:django.db.models.AutoField`, nullable field, + foreign key, or field for which a keyword + argument is passed to ``from_model()``. For example, + a Shop type with a foreign key to Company could be generated with:: + + shop_strategy = from_model(Shop, company=from_model(Company)) + + Like for :func:`~hypothesis.strategies.builds`, you can pass + ``...`` (:obj:`python:Ellipsis`) as a keyword argument to infer a strategy for + a field which has a default value instead of using the default. + """ + if not issubclass(model, dm.Model): + raise InvalidArgument(f"{model=} must be a subtype of Model") + + fields_by_name = {f.name: f for f in model._meta.concrete_fields} + for name, value in sorted(field_strategies.items()): + if value is ...: + field_strategies[name] = from_field(fields_by_name[name]) + for name, field in sorted(fields_by_name.items()): + if ( + name not in field_strategies + and not field.auto_created + and not isinstance(field, dm.AutoField) + and not isinstance(field, getattr(dm, "GeneratedField", ())) + and field.default is dm.fields.NOT_PROVIDED + ): + field_strategies[name] = from_field(field) + + for field in field_strategies: + if model._meta.get_field(field).primary_key: + # The primary key is generated as part of the strategy. We + # want to find any existing row with this primary key and + # overwrite its contents. + kwargs = {field: field_strategies.pop(field)} + kwargs["defaults"] = st.fixed_dictionaries(field_strategies) # type: ignore + return _models_impl(st.builds(model.objects.update_or_create, **kwargs)) + + # The primary key is not generated as part of the strategy, so we + # just match against any row that has the same value for all + # fields. + return _models_impl(st.builds(model.objects.get_or_create, **field_strategies)) + + +@st.composite +def _models_impl(draw, strat): + """Handle the nasty part of drawing a value for models()""" + try: + return draw(strat)[0] + except IntegrityError: + reject() + + +@defines_strategy() +def from_form( + form: type[df.Form], + form_kwargs: dict | None = None, + **field_strategies: st.SearchStrategy | EllipsisType, +) -> st.SearchStrategy[df.Form]: + """Return a strategy for examples of ``form``. + + ``form`` must be an subclass of :class:`~django:django.forms.Form`. + Strategies for fields may be passed as keyword arguments, for example + ``is_staff=st.just(False)``. + + Hypothesis can often infer a strategy based the field type and validators, + and will attempt to do so for any required fields. No strategy will be + inferred for a disabled field or field for which a keyword argument + is passed to ``from_form()``. + + This function uses the fields of an unbound ``form`` instance to determine + field strategies, any keyword arguments needed to instantiate the unbound + ``form`` instance can be passed into ``from_form()`` as a dict with the + keyword ``form_kwargs``. E.g.:: + + shop_strategy = from_form(Shop, form_kwargs={"company_id": 5}) + + Like for :func:`~hypothesis.strategies.builds`, you can pass + ``...`` (:obj:`python:Ellipsis`) as a keyword argument to infer a strategy for + a field which has a default value instead of using the default. + """ + # currently unsupported: + # ComboField + # FilePathField + # ImageField + form_kwargs = form_kwargs or {} + if not issubclass(form, df.BaseForm): + raise InvalidArgument(f"{form=} must be a subtype of Form") + + # Forms are a little bit different from models. Model classes have + # all their fields defined, whereas forms may have different fields + # per-instance. So, we ought to instantiate the form and get the + # fields from the instance, thus we need to accept the kwargs for + # instantiation as well as the explicitly defined strategies + + unbound_form = form(**form_kwargs) + fields_by_name = {} + for name, field in unbound_form.fields.items(): + if isinstance(field, df.MultiValueField): + # PS: So this is a little strange, but MultiValueFields must + # have their form data encoded in a particular way for the + # values to actually be picked up by the widget instances' + # ``value_from_datadict``. + # E.g. if a MultiValueField named 'mv_field' has 3 + # sub-fields then the ``value_from_datadict`` will look for + # 'mv_field_0', 'mv_field_1', and 'mv_field_2'. Here I'm + # decomposing the individual sub-fields into the names that + # the form validation process expects + for i, _field in enumerate(field.fields): + fields_by_name[f"{name}_{i}"] = _field + else: + fields_by_name[name] = field + + for name, value in sorted(field_strategies.items()): + if value is ...: + field_strategies[name] = from_field(fields_by_name[name]) + + for name, field in sorted(fields_by_name.items()): + if name not in field_strategies and not field.disabled: + field_strategies[name] = from_field(field) + + # files are handled a bit specially in forms. A Form accepts two arguments: + # `data` and `files`. The former is for normal fields, and the latter is for + # file fields. + # see https://docs.djangoproject.com/en/5.1/ref/forms/api/#binding-uploaded-files. + data_strategies: dict[str, Any] = {} + file_strategies: dict[str, Any] = {} + for name, field in field_strategies.items(): + form_field = fields_by_name[name] + dictionary = ( + file_strategies if isinstance(form_field, df.FileField) else data_strategies + ) + dictionary[name] = field + + return _forms_impl( + st.builds( + partial(form, **form_kwargs), # type: ignore + data=st.fixed_dictionaries(data_strategies), + files=st.fixed_dictionaries(file_strategies), + ) + ) + + +@st.composite +def _forms_impl(draw, strat): + """Handle the nasty part of drawing a value for from_form()""" + try: + return draw(strat) + except ValidationError: + reject() diff --git a/vendored/hypothesis/extra/dpcontracts.py b/vendored/hypothesis/extra/dpcontracts.py new file mode 100644 index 0000000..7c2f084 --- /dev/null +++ b/vendored/hypothesis/extra/dpcontracts.py @@ -0,0 +1,49 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +This module provides tools for working with the :pypi:`dpcontracts` library, +because `combining contracts and property-based testing works really well +`_. + +It requires ``dpcontracts >= 0.4``. +""" + +from dpcontracts import PreconditionError + +from hypothesis import reject +from hypothesis.errors import InvalidArgument +from hypothesis.internal.reflection import proxies + + +def fulfill(contract_func): + """Decorate ``contract_func`` to reject calls which violate preconditions, + and retry them with different arguments. + + This is a convenience function for testing internal code that uses + :pypi:`dpcontracts`, to automatically filter out arguments that would be + rejected by the public interface before triggering a contract error. + + This can be used as ``builds(fulfill(func), ...)`` or in the body of the + test e.g. ``assert fulfill(func)(*args)``. + """ + if not hasattr(contract_func, "__contract_wrapped_func__"): + raise InvalidArgument( + f"{contract_func.__name__} has no dpcontracts preconditions" + ) + + @proxies(contract_func) + def inner(*args, **kwargs): + try: + return contract_func(*args, **kwargs) + except PreconditionError: + reject() + + return inner diff --git a/vendored/hypothesis/extra/ghostwriter.py b/vendored/hypothesis/extra/ghostwriter.py new file mode 100644 index 0000000..e9aeb8c --- /dev/null +++ b/vendored/hypothesis/extra/ghostwriter.py @@ -0,0 +1,1941 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +Writing tests with Hypothesis frees you from the tedium of deciding on and +writing out specific inputs to test. Now, the ``hypothesis.extra.ghostwriter`` +module can write your test functions for you too! + +The idea is to provide **an easy way to start** property-based testing, +**and a seamless transition** to more complex test code - because ghostwritten +tests are source code that you could have written for yourself. + +So just pick a function you'd like tested, and feed it to one of the functions +below. They follow imports, use but do not require type annotations, and +generally do their best to write you a useful test. You can also use +:ref:`our command-line interface `:: + + $ hypothesis write --help + Usage: hypothesis write [OPTIONS] FUNC... + + `hypothesis write` writes property-based tests for you! + + Type annotations are helpful but not required for our advanced + introspection and templating logic. Try running the examples below to see + how it works: + + hypothesis write gzip + hypothesis write numpy.matmul + hypothesis write pandas.from_dummies + hypothesis write re.compile --except re.error + hypothesis write --equivalent ast.literal_eval eval + hypothesis write --roundtrip json.dumps json.loads + hypothesis write --style=unittest --idempotent sorted + hypothesis write --binary-op operator.add + + Options: + --roundtrip start by testing write/read or encode/decode! + --equivalent very useful when optimising or refactoring code + --errors-equivalent --equivalent, but also allows consistent errors + --idempotent check that f(x) == f(f(x)) + --binary-op associativity, commutativity, identity element + --style [pytest|unittest] pytest-style function, or unittest-style method? + -e, --except OBJ_NAME dotted name of exception(s) to ignore + --annotate / --no-annotate force ghostwritten tests to be type-annotated + (or not). By default, match the code to test. + -h, --help Show this message and exit. + +.. tip:: + + Using a light theme? Hypothesis respects `NO_COLOR `__ + and ``DJANGO_COLORS=light``. + +.. note:: + + The ghostwriter requires :pypi:`black`, but the generated code only + requires Hypothesis itself. + +.. note:: + + Legal questions? While the ghostwriter fragments and logic is under the + MPL-2.0 license like the rest of Hypothesis, the *output* from the ghostwriter + is made available under the `Creative Commons Zero (CC0) + `__ + public domain dedication, so you can use it without any restrictions. +""" + +import ast +import builtins +import contextlib +import enum +import inspect +import os +import re +import sys +import types +import warnings +from collections import OrderedDict, defaultdict +from collections.abc import Callable, Iterable, Mapping +from itertools import permutations, zip_longest +from keyword import iskeyword as _iskeyword +from string import ascii_lowercase +from textwrap import dedent, indent +from types import EllipsisType +from typing import ( + Any, + ForwardRef, + NamedTuple, + TypeVar, + get_args, + get_origin, +) + +import black + +from hypothesis import Verbosity, find, settings, strategies as st +from hypothesis.errors import InvalidArgument, SmallSearchSpaceWarning +from hypothesis.internal.compat import get_type_hints +from hypothesis.internal.reflection import get_signature, is_mock +from hypothesis.internal.validation import check_type +from hypothesis.provisional import domains +from hypothesis.strategies._internal.collections import ListStrategy +from hypothesis.strategies._internal.core import BuildsStrategy +from hypothesis.strategies._internal.deferred import DeferredStrategy +from hypothesis.strategies._internal.flatmapped import FlatMapStrategy +from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies +from hypothesis.strategies._internal.strategies import ( + FilteredStrategy, + MappedStrategy, + OneOfStrategy, + SampledFromStrategy, +) +from hypothesis.strategies._internal.types import _global_type_lookup, is_generic_type + +IMPORT_SECTION = """ +# This test code was written by the `hypothesis.extra.ghostwriter` module +# and is provided under the Creative Commons Zero public domain dedication. + +{imports} +""" + +TEMPLATE = """ +@given({given_args}) +def test_{test_kind}_{func_name}({arg_names}){return_annotation}: +{test_body} +""" + +SUPPRESS_BLOCK = """ +try: +{test_body} +except {exceptions}: + reject() +""".strip() + +Except = type[Exception] | tuple[type[Exception], ...] +ImportSet = set[str | tuple[str, str]] +_quietly_settings = settings( + database=None, + deadline=None, + derandomize=True, + verbosity=Verbosity.quiet, +) + + +def _dedupe_exceptions(exc: tuple[type[Exception], ...]) -> tuple[type[Exception], ...]: + # This is reminiscent of de-duplication logic I wrote for flake8-bugbear, + # but with access to the actual objects we can just check for subclasses. + # This lets us print e.g. `Exception` instead of `(Exception, OSError)`. + uniques = list(exc) + for a, b in permutations(exc, 2): + if a in uniques and issubclass(a, b): + uniques.remove(a) + return tuple(sorted(uniques, key=lambda e: e.__name__)) + + +def _check_except(except_: Except) -> tuple[type[Exception], ...]: + if isinstance(except_, tuple): + for i, e in enumerate(except_): + if not isinstance(e, type) or not issubclass(e, Exception): + raise InvalidArgument( + f"Expected an Exception but got except_[{i}]={e!r}" + f" (type={_get_qualname(type(e))})" + ) + return except_ + if not isinstance(except_, type) or not issubclass(except_, Exception): + raise InvalidArgument( + "Expected an Exception or tuple of exceptions, but got except_=" + f"{except_!r} (type={_get_qualname(type(except_))})" + ) + return (except_,) + + +def _exception_string(except_: tuple[type[Exception], ...]) -> tuple[ImportSet, str]: + if not except_: + return set(), "" + exceptions = [] + imports: ImportSet = set() + for ex in _dedupe_exceptions(except_): + if ex.__qualname__ in dir(builtins): + exceptions.append(ex.__qualname__) + else: + imports.add(ex.__module__) + exceptions.append(_get_qualname(ex, include_module=True)) + return imports, ( + "(" + ", ".join(exceptions) + ")" if len(exceptions) > 1 else exceptions[0] + ) + + +def _check_style(style: str) -> None: + if style not in ("pytest", "unittest"): + raise InvalidArgument(f"Valid styles are 'pytest' or 'unittest', got {style!r}") + + +def _exceptions_from_docstring(doc: str) -> tuple[type[Exception], ...]: + """Return a tuple of exceptions that the docstring says may be raised. + + Note that we ignore non-builtin exception types for simplicity, as this is + used directly in _write_call() and passing import sets around would be really + really annoying. + """ + # TODO: it would be great to handle Google- and Numpy-style docstrings + # (e.g. by using the Napoleon Sphinx extension) + assert isinstance(doc, str), doc + raises = [] + for excname in re.compile(r"\:raises\s+(\w+)\:", re.MULTILINE).findall(doc): + exc_type = getattr(builtins, excname, None) + if isinstance(exc_type, type) and issubclass(exc_type, Exception): + raises.append(exc_type) + return tuple(_dedupe_exceptions(tuple(raises))) + + +def _type_from_doc_fragment(token: str) -> type | None: + # Special cases for "integer" and for numpy array-like and dtype + if token == "integer": + return int + if "numpy" in sys.modules: + if re.fullmatch(r"[Aa]rray[-_ ]?like", token): + return sys.modules["numpy"].ndarray + elif token == "dtype": + return sys.modules["numpy"].dtype + # Natural-language syntax, e.g. "sequence of integers" + coll_match = re.fullmatch(r"(\w+) of (\w+)", token) + if coll_match is not None: + coll_token, elem_token = coll_match.groups() + elems = _type_from_doc_fragment(elem_token) + if elems is None and elem_token.endswith("s"): + elems = _type_from_doc_fragment(elem_token[:-1]) + if elems is not None and coll_token in ("list", "sequence", "collection"): + return list[elems] # type: ignore + # This might be e.g. "array-like of float"; arrays is better than nothing + # even if we can't conveniently pass a generic type around. + return _type_from_doc_fragment(coll_token) + # Check either builtins, or the module for a dotted name + if "." not in token: + return getattr(builtins, token, None) + mod, name = token.rsplit(".", maxsplit=1) + return getattr(sys.modules.get(mod, None), name, None) + + +def _strip_typevars(type_): + with contextlib.suppress(Exception): + if {type(a) for a in get_args(type_)} == {TypeVar}: + return get_origin(type_) + return type_ + + +def _strategy_for(param: inspect.Parameter, docstring: str) -> st.SearchStrategy: + # Example types in docstrings: + # - `:type a: sequence of integers` + # - `b (list, tuple, or None): ...` + # - `c : {"foo", "bar", or None}` + for pattern in ( + rf"^\s*\:type\s+{param.name}\:\s+(.+)", # RST-style + rf"^\s*{param.name} \((.+)\):", # Google-style + rf"^\s*{param.name} \: (.+)", # Numpy-style + ): + match = re.search(pattern, docstring, flags=re.MULTILINE) + if match is None: + continue + doc_type = match.group(1) + doc_type = doc_type.removesuffix(", optional").strip("}{") + elements = [] + types = [] + for token in re.split(r",? +or +| *, *", doc_type): + for prefix in ("default ", "python "): + # e.g. `str or None, default "auto"` or `python int or numpy.int64` + token = token.removeprefix(prefix) + if not token: + continue + try: + # Elements of `{"inner", "outer"}` etc. + elements.append(ast.literal_eval(token)) + continue + except (ValueError, SyntaxError): + t = _type_from_doc_fragment(token) + if isinstance(t, type) or is_generic_type(t): + assert t is not None + types.append(_strip_typevars(t)) + if ( + param.default is not inspect.Parameter.empty + and param.default not in elements + and not isinstance( + param.default, tuple(t for t in types if isinstance(t, type)) + ) + ): + with contextlib.suppress(SyntaxError): + compile(repr(st.just(param.default)), "", "eval") + elements.insert(0, param.default) + if elements or types: + return (st.sampled_from(elements) if elements else st.nothing()) | ( + st.one_of(*map(st.from_type, types)) if types else st.nothing() + ) + + # If our default value is an Enum or a boolean, we assume that any value + # of that type is acceptable. Otherwise, we only generate the default. + if isinstance(param.default, bool): + return st.booleans() + if isinstance(param.default, enum.Enum): + return st.sampled_from(type(param.default)) + if param.default is not inspect.Parameter.empty: + # Using `st.from_type(type(param.default))` would introduce spurious + # failures in cases like the `flags` argument to regex functions. + # Better in to keep it simple, and let the user elaborate if desired. + return st.just(param.default) + return _guess_strategy_by_argname(name=param.name.lower()) + + +# fmt: off +BOOL_NAMES = ( + "keepdims", "verbose", "debug", "force", "train", "training", "trainable", "bias", + "shuffle", "show", "load", "pretrained", "save", "overwrite", "normalize", + "reverse", "success", "enabled", "strict", "copy", "quiet", "required", "inplace", + "recursive", "enable", "active", "create", "validate", "refresh", "use_bias", +) +POSITIVE_INTEGER_NAMES = ( + "width", "size", "length", "limit", "idx", "stride", "epoch", "epochs", "depth", + "pid", "steps", "iteration", "iterations", "vocab_size", "ttl", "count", +) +FLOAT_NAMES = ( + "real", "imag", "alpha", "theta", "beta", "sigma", "gamma", "angle", "reward", + "tau", "temperature", +) +STRING_NAMES = ( + "text", "txt", "password", "label", "prefix", "suffix", "desc", "description", + "str", "pattern", "subject", "reason", "comment", "prompt", "sentence", "sep", +) +# fmt: on + + +def _guess_strategy_by_argname(name: str) -> st.SearchStrategy: + """ + If all else fails, we try guessing a strategy based on common argument names. + + We wouldn't do this in builds() where strict correctness is required, but for + the ghostwriter we accept "good guesses" since the user would otherwise have + to change the strategy anyway - from `nothing()` - if we refused to guess. + + A "good guess" is _usually correct_, and _a reasonable mistake_ if not. + The logic below is therefore based on a manual reading of the builtins and + some standard-library docs, plus the analysis of about three hundred million + arguments in https://github.com/HypothesisWorks/hypothesis/issues/3311 + """ + # Special-cased names + if name in ("function", "func", "f"): + return st.functions() + if name in ("pred", "predicate"): + return st.functions(returns=st.booleans(), pure=True) + if name in ("iterable",): + return st.iterables(st.integers()) | st.iterables(st.text()) + if name in ("list", "lst", "ls"): + return st.lists(st.nothing()) + if name in ("object",): + return st.builds(object) + if "uuid" in name: + return st.uuids().map(str) + + # Names which imply the value is a boolean + if name.startswith("is_") or name in BOOL_NAMES: + return st.booleans() + + # Names which imply that the value is a number, perhaps in a particular range + if name in ("amount", "threshold", "number", "num"): + return st.integers() | st.floats() + + if name in ("port",): + return st.integers(0, 2**16 - 1) + if ( + name.endswith("_size") + or (name.endswith("size") and "_" not in name) + or re.fullmatch(r"n(um)?_[a-z_]*s", name) + or name in POSITIVE_INTEGER_NAMES + ): + return st.integers(min_value=0) + if name in ("offset", "seed", "dim", "total", "priority"): + return st.integers() + + if name in ("learning_rate", "dropout", "dropout_rate", "epsilon", "eps", "prob"): + return st.floats(0, 1) + if name in ("lat", "latitude"): + return st.floats(-90, 90) + if name in ("lon", "longitude"): + return st.floats(-180, 180) + if name in ("radius", "tol", "tolerance", "rate"): + return st.floats(min_value=0) + if name in FLOAT_NAMES: + return st.floats() + + # Names which imply that the value is a string + if name in ("host", "hostname"): + return domains() + if name in ("email",): + return st.emails() + if name in ("word", "slug", "api_key"): + return st.from_regex(r"\w+", fullmatch=True) + if name in ("char", "character"): + return st.characters() + + if ( + "file" in name + or "path" in name + or name.endswith("_dir") + or name in ("fname", "dir", "dirname", "directory", "folder") + ): + # Common names for filesystem paths: these are usually strings, but we + # don't want to make strings more convenient than pathlib.Path. + return st.nothing() + + if ( + name.endswith(("_name", "label")) + or (name.endswith("name") and "_" not in name) + or ("string" in name and "as" not in name) + or name in STRING_NAMES + ): + return st.text() + + # Last clever idea: maybe we're looking a plural, and know the singular: + if re.fullmatch(r"\w*[^s]s", name): + elems = _guess_strategy_by_argname(name[:-1]) + if not elems.is_empty: + return st.lists(elems) + + # And if all that failed, we'll return nothing() - the user will have to + # fill this in by hand, and we'll leave a comment to that effect later. + return st.nothing() + + +def _get_params_builtin_fn(func: Callable) -> list[inspect.Parameter]: + if ( + isinstance(func, (types.BuiltinFunctionType, types.BuiltinMethodType)) + and hasattr(func, "__doc__") + and isinstance(func.__doc__, str) + ): + # inspect.signature doesn't work on all builtin functions or methods. + # In such cases, we can try to reconstruct simple signatures from the docstring. + match = re.match(rf"^{func.__name__}\((.+?)\)", func.__doc__) + if match is None: + return [] + args = match.group(1).replace("[", "").replace("]", "") + params = [] + # Even if the signature doesn't contain a /, we assume that arguments + # are positional-only until shown otherwise - the / is often omitted. + kind: inspect._ParameterKind = inspect.Parameter.POSITIONAL_ONLY + for arg in args.split(", "): + arg, *_ = arg.partition("=") + arg = arg.strip() + if arg == "/": + kind = inspect.Parameter.POSITIONAL_OR_KEYWORD + continue + if arg.startswith("*") or arg == "...": + kind = inspect.Parameter.KEYWORD_ONLY + continue # we omit *varargs, if there are any + if _iskeyword(arg.lstrip("*")) or not arg.lstrip("*").isidentifier(): + break # skip all subsequent params if this name is invalid + params.append(inspect.Parameter(name=arg, kind=kind)) + return params + return [] + + +def _get_params_ufunc(func: Callable) -> list[inspect.Parameter]: + if _is_probably_ufunc(func): + # `inspect.signature` results vary for ufunc objects, but we can work out + # what the required parameters would look like if it was reliable. + # Note that we use args named a, b, c... to match the `operator` module, + # rather than x1, x2, x3... like the Numpy docs. Because they're pos-only + # this doesn't make a runtime difference, and it's much nicer for use-cases + # like `equivalent(numpy.add, operator.add)`. + return [ + inspect.Parameter(name=name, kind=inspect.Parameter.POSITIONAL_ONLY) + for name in ascii_lowercase[: func.nin] # type: ignore + ] + return [] + + +def _get_params(func: Callable) -> dict[str, inspect.Parameter]: + """Get non-vararg parameters of `func` as an ordered dict.""" + try: + params = list(get_signature(func).parameters.values()) + except Exception: + if params := _get_params_ufunc(func): + pass + elif params := _get_params_builtin_fn(func): + pass + else: + # If we haven't managed to recover a signature through the tricks above, + # we're out of ideas and should just re-raise the exception. + raise + else: + # If the params we got look like an uninformative placeholder, try fallbacks. + P = inspect.Parameter + placeholder = [("args", P.VAR_POSITIONAL), ("kwargs", P.VAR_KEYWORD)] + if [(p.name, p.kind) for p in params] == placeholder: + params = _get_params_ufunc(func) or _get_params_builtin_fn(func) or params + return _params_to_dict(params) + + +def _params_to_dict( + params: Iterable[inspect.Parameter], +) -> dict[str, inspect.Parameter]: + var_param_kinds = (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) + return OrderedDict((p.name, p) for p in params if p.kind not in var_param_kinds) + + +@contextlib.contextmanager +def _with_any_registered(): + # If the user has registered their own strategy for Any, leave it alone + if Any in _global_type_lookup: + yield + # We usually want to force from_type(Any) to raise an error because we don't + # have enough information to accurately resolve user intent, but in this case + # we can treat it as a synonym for object - this is probably wrong, but you'll + # get at least _some_ output to edit later. We then reset everything in order + # to avoid polluting the resolution logic in case you run tests later. + else: + try: + _global_type_lookup[Any] = st.builds(object) + yield + finally: + del _global_type_lookup[Any] + st.from_type.__clear_cache() + + +def _get_strategies( + *funcs: Callable, pass_result_to_next_func: bool = False +) -> dict[str, st.SearchStrategy]: + """Return a dict of strategies for the union of arguments to `funcs`. + + If `pass_result_to_next_func` is True, assume that the result of each function + is passed to the next, and therefore skip the first argument of all but the + first function. + + This dict is used to construct our call to the `@given(...)` decorator. + """ + assert funcs, "Must pass at least one function" + given_strategies: dict[str, st.SearchStrategy] = {} + for i, f in enumerate(funcs): + params = _get_params(f) + if pass_result_to_next_func and i >= 1: + del params[next(iter(params))] + hints = get_type_hints(f) + docstring = getattr(f, "__doc__", None) or "" + builder_args = { + k: ... if k in hints else _strategy_for(v, docstring) + for k, v in params.items() + } + with _with_any_registered(): + strat = st.builds(f, **builder_args).wrapped_strategy # type: ignore + + if strat.args: + raise NotImplementedError("Expected to pass everything as kwargs") + + for k, v in strat.kwargs.items(): + if _valid_syntax_repr(v)[1] == "nothing()" and k in hints: + # e.g. from_type(Hashable) is OK but the unwrapped repr is not + v = LazyStrategy(st.from_type, (hints[k],), {}) + if k in given_strategies: + given_strategies[k] |= v + else: + given_strategies[k] = v + + # If there is only one function, we pass arguments to @given in the order of + # that function's signature. Otherwise, we use alphabetical order. + if len(funcs) == 1: + return {name: given_strategies[name] for name in _get_params(f)} + return dict(sorted(given_strategies.items())) + + +def _assert_eq(style: str, a: str, b: str) -> str: + if style == "unittest": + return f"self.assertEqual({a}, {b})" + assert style == "pytest" + if a.isidentifier() and b.isidentifier(): + return f"assert {a} == {b}, ({a}, {b})" + return f"assert {a} == {b}" + + +def _imports_for_object(obj): + """Return the imports for `obj`, which may be empty for e.g. lambdas""" + if type(obj) is getattr(types, "UnionType", object()): + return {mod for mod, _ in set().union(*map(_imports_for_object, obj.__args__))} + if isinstance(obj, (re.Pattern, re.Match)): + return {"re"} + if isinstance(obj, st.SearchStrategy): + return _imports_for_strategy(obj) + if isinstance(obj, getattr(sys.modules.get("numpy"), "dtype", ())): + return {("numpy", "dtype")} + try: + if is_generic_type(obj): + if isinstance(obj, TypeVar): + return {(obj.__module__, obj.__name__)} + with contextlib.suppress(Exception): + return set().union(*map(_imports_for_object, obj.__args__)) + if (not callable(obj)) or obj.__name__ == "": + return set() + name = _get_qualname(obj).split(".")[0] + return {(_get_module(obj), name)} + except Exception: + return set() + + +def _imports_for_strategy(strategy): + # If we have a lazy from_type strategy, because unwrapping it gives us an + # error or invalid syntax, import that type and we're done. + if isinstance(strategy, LazyStrategy): + imports = { + imp + for arg in set(strategy._LazyStrategy__args) + | set(strategy._LazyStrategy__kwargs.values()) + for imp in _imports_for_object(_strip_typevars(arg)) + } + if re.match(r"from_(type|regex)\(", repr(strategy)): + return imports + elif _get_module(strategy.function).startswith("hypothesis.extra."): + module = _get_module(strategy.function).replace("._array_helpers", ".numpy") + return {(module, strategy.function.__name__)} | imports + + imports = set() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", SmallSearchSpaceWarning) + strategy = unwrap_strategies(strategy) + + # Get imports for s.map(f), s.filter(f), s.flatmap(f), including both s and f + if isinstance(strategy, MappedStrategy): + imports |= _imports_for_strategy(strategy.mapped_strategy) + imports |= _imports_for_object(strategy.pack) + if isinstance(strategy, FilteredStrategy): + imports |= _imports_for_strategy(strategy.filtered_strategy) + for f in strategy.flat_conditions: + imports |= _imports_for_object(f) + if isinstance(strategy, FlatMapStrategy): + imports |= _imports_for_strategy(strategy.base) + imports |= _imports_for_object(strategy.expand) + + # recurse through one_of to handle e.g. from_type(Optional[Foo]) + if isinstance(strategy, OneOfStrategy): + for s in strategy.element_strategies: + imports |= _imports_for_strategy(s) + + # get imports for the target of builds(), and recurse into the argument strategies + if isinstance(strategy, BuildsStrategy): + imports |= _imports_for_object(strategy.target) + for s in strategy.args: + imports |= _imports_for_strategy(s) + for s in strategy.kwargs.values(): + imports |= _imports_for_strategy(s) + + if isinstance(strategy, SampledFromStrategy): + for obj in strategy.elements: + imports |= _imports_for_object(obj) + + if isinstance(strategy, ListStrategy): + imports |= _imports_for_strategy(strategy.element_strategy) + + return imports + + +def _valid_syntax_repr(strategy): + # For binary_op, we pass a variable name - so pass it right back again. + if isinstance(strategy, str): + return set(), strategy + # Flatten and de-duplicate any one_of strategies, whether that's from resolving + # a Union type or combining inputs to multiple functions. + try: + if isinstance(strategy, DeferredStrategy): + strategy = strategy.wrapped_strategy + if isinstance(strategy, OneOfStrategy): + seen = set() + elems = [] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", SmallSearchSpaceWarning) + strategy.element_strategies # might warn on first access + for s in strategy.element_strategies: + if isinstance(s, SampledFromStrategy) and s.elements == (os.environ,): + continue + if repr(s) not in seen: + elems.append(s) + seen.add(repr(s)) + strategy = st.one_of(elems or st.nothing()) + # hardcode some special cases for nicer reprs + if strategy == st.text().wrapped_strategy: + return set(), "text()" + if strategy == st.from_type(type): + return set(), "from_type(type)" + # Remove any typevars; we don't exploit them so they're just clutter here + if ( + isinstance(strategy, LazyStrategy) + and strategy.function.__name__ == st.from_type.__name__ + and strategy._LazyStrategy__representation is None + ): + strategy._LazyStrategy__args = tuple( + _strip_typevars(a) for a in strategy._LazyStrategy__args + ) + # Return a syntactically-valid strategy repr, including fixing some + # strategy reprs and replacing invalid syntax reprs with `"nothing()"`. + # String-replace to hide the special case in from_type() for Decimal('snan') + r = ( + repr(strategy) + .replace(".filter(_can_hash)", "") + .replace("hypothesis.strategies.", "") + ) + # Replace with ... in confusing lambdas + r = re.sub(r"(lambda.*?: )()([,)])", r"\1...\3", r) + compile(r, "", "eval") + # Finally, try to work out the imports we need for builds(), .map(), + # .filter(), and .flatmap() to work without NameError + imports = {i for i in _imports_for_strategy(strategy) if i[1] in r} + return imports, r + except (SyntaxError, RecursionError, InvalidArgument): + return set(), "nothing()" + + +# When we ghostwrite for a module, we want to treat that as the __module__ for +# each function, rather than whichever internal file it was actually defined in. +KNOWN_FUNCTION_LOCATIONS: dict[object, str] = {} + + +def _get_module_helper(obj): + # Get the __module__ attribute of the object, and return the first ancestor module + # which contains the object; falling back to the literal __module__ if none do. + # The goal is to show location from which obj should usually be accessed, rather + # than what we assume is an internal submodule which defined it. + module_name = obj.__module__ + + # if "collections.abc" is used don't use the deprecated aliases in "collections" + if module_name == "collections.abc": + return module_name + + dots = [i for i, c in enumerate(module_name) if c == "."] + [None] + for idx in dots: + for candidate in (module_name[:idx].lstrip("_"), module_name[:idx]): + if getattr(sys.modules.get(candidate), obj.__name__, None) is obj: + KNOWN_FUNCTION_LOCATIONS[obj] = candidate + return candidate + return module_name + + +def _get_module(obj): + if obj in KNOWN_FUNCTION_LOCATIONS: + return KNOWN_FUNCTION_LOCATIONS[obj] + try: + return _get_module_helper(obj) + except AttributeError: + if not _is_probably_ufunc(obj): + raise + for module_name in sorted(sys.modules, key=lambda n: tuple(n.split("."))): + if obj is getattr(sys.modules[module_name], obj.__name__, None): + KNOWN_FUNCTION_LOCATIONS[obj] = module_name + return module_name + raise RuntimeError(f"Could not find module for ufunc {obj.__name__} ({obj!r}") + + +def _get_qualname(obj: Any, *, include_module: bool = False) -> str: + # Replacing angle-brackets for objects defined in `..` + qname = getattr(obj, "__qualname__", obj.__name__) + qname = qname.replace("<", "_").replace(">", "_").replace(" ", "") + if include_module: + return _get_module(obj) + "." + qname + return qname + + +def _write_call( + func: Callable, *pass_variables: str, except_: Except = Exception, assign: str = "" +) -> str: + """Write a call to `func` with explicit and implicit arguments. + + >>> _write_call(sorted, "my_seq", "func") + "builtins.sorted(my_seq, key=func, reverse=reverse)" + + >>> write_call(f, assign="var1") + "var1 = f()" + + The fancy part is that we'll check the docstring for any known exceptions + which `func` might raise, and catch-and-reject on them... *unless* they're + subtypes of `except_`, which will be handled in an outer try-except block. + """ + args = ", ".join( + ( + (v or p.name) + if p.kind is inspect.Parameter.POSITIONAL_ONLY + else f"{p.name}={v or p.name}" + ) + for v, p in zip_longest(pass_variables, _get_params(func).values()) + ) + call = f"{_get_qualname(func, include_module=True)}({args})" + if assign: + call = f"{assign} = {call}" + raises = _exceptions_from_docstring(getattr(func, "__doc__", "") or "") + exnames = [ex.__name__ for ex in raises if not issubclass(ex, except_)] + if not exnames: + return call + return SUPPRESS_BLOCK.format( + test_body=indent(call, prefix=" "), + exceptions="(" + ", ".join(exnames) + ")" if len(exnames) > 1 else exnames[0], + ) + + +def _st_strategy_names(s: str) -> str: + """Replace strategy name() with st.name(). + + Uses a tricky re.sub() to avoid problems with frozensets() matching + sets() too. + """ + names = "|".join(sorted(st.__all__, key=len, reverse=True)) + return re.sub(pattern=rf"\b(?:{names})\b[^= ]", repl=r"st.\g<0>", string=s) + + +def _make_test_body( + *funcs: Callable, + ghost: str, + test_body: str, + except_: tuple[type[Exception], ...], + assertions: str = "", + style: str, + given_strategies: Mapping[str, str | st.SearchStrategy] | None = None, + imports: ImportSet | None = None, + annotate: bool, +) -> tuple[ImportSet, str]: + # A set of modules to import - we might add to this later. The import code + # is written later, so we can have one import section for multiple magic() + # test functions. + imports = (imports or set()) | {_get_module(f) for f in funcs} + + # Get strategies for all the arguments to each function we're testing. + with _with_any_registered(): + given_strategies = given_strategies or _get_strategies( + *funcs, pass_result_to_next_func=ghost in ("idempotent", "roundtrip") + ) + reprs = [((k, *_valid_syntax_repr(v))) for k, v in given_strategies.items()] + imports = imports.union(*(imp for _, imp, _ in reprs)) + given_args = ", ".join(f"{k}={v}" for k, _, v in reprs) + given_args = _st_strategy_names(given_args) + + if except_: + # Convert to strings, either builtin names or qualified names. + imp, exc_string = _exception_string(except_) + imports.update(imp) + # And finally indent the existing test body into a try-except block + # which catches these exceptions and calls `hypothesis.reject()`. + test_body = SUPPRESS_BLOCK.format( + test_body=indent(test_body, prefix=" "), + exceptions=exc_string, + ) + + if assertions: + test_body = f"{test_body}\n{assertions}" + + # Indent our test code to form the body of a function or method. + argnames = ["self"] if style == "unittest" else [] + if annotate: + argnames.extend(_annotate_args(given_strategies, funcs, imports)) + else: + argnames.extend(given_strategies) + + body = TEMPLATE.format( + given_args=given_args, + test_kind=ghost, + func_name="_".join(_get_qualname(f).replace(".", "_") for f in funcs), + arg_names=", ".join(argnames), + return_annotation=" -> None" if annotate else "", + test_body=indent(test_body, prefix=" "), + ) + + # For unittest-style, indent method further into a class body + if style == "unittest": + imports.add("unittest") + body = "class Test{}{}(unittest.TestCase):\n{}".format( + ghost.title(), + "".join(_get_qualname(f).replace(".", "").title() for f in funcs), + indent(body, " "), + ) + + return imports, body + + +def _annotate_args( + argnames: Iterable[str], funcs: Iterable[Callable], imports: ImportSet +) -> Iterable[str]: + arg_parameters: defaultdict[str, set[Any]] = defaultdict(set) + for func in funcs: + try: + params = tuple(get_signature(func, eval_str=True).parameters.values()) + except Exception: + # don't add parameters if the annotations could not be evaluated + pass + else: + for key, param in _params_to_dict(params).items(): + if param.annotation != inspect.Parameter.empty: + arg_parameters[key].add(param.annotation) + + for argname in argnames: + parameters = arg_parameters.get(argname) + annotation = _parameters_to_annotation_name(parameters, imports) + if annotation is None: + yield argname + else: + yield f"{argname}: {annotation}" + + +class _AnnotationData(NamedTuple): + type_name: str + imports: set[str] + + +def _parameters_to_annotation_name( + parameters: Iterable[Any] | None, imports: ImportSet +) -> str | None: + if parameters is None: + return None + annotations = tuple( + annotation + for annotation in map(_parameter_to_annotation, parameters) + if annotation is not None + ) + if not annotations: + return None + if len(annotations) == 1: + type_name, new_imports = annotations[0] + imports.update(new_imports) + return type_name + joined = _join_generics(("typing.Union", {"typing"}), annotations) + if joined is None: + return None + imports.update(joined.imports) + return joined.type_name + + +def _join_generics( + origin_type_data: tuple[str, set[str]] | None, + annotations: Iterable[_AnnotationData | None], +) -> _AnnotationData | None: + if origin_type_data is None: + return None + + # because typing.Optional is converted to a Union, it also contains None + # since typing.Optional only accepts one type variable, we need to remove it + if origin_type_data is not None and origin_type_data[0] == "typing.Optional": + annotations = ( + annotation + for annotation in annotations + if annotation is None or annotation.type_name != "None" + ) + + origin_type, imports = origin_type_data + joined = _join_argument_annotations(annotations) + if joined is None or not joined[0]: + return None + + arg_types, new_imports = joined + imports.update(new_imports) + return _AnnotationData("{}[{}]".format(origin_type, ", ".join(arg_types)), imports) + + +def _join_argument_annotations( + annotations: Iterable[_AnnotationData | None], +) -> tuple[list[str], set[str]] | None: + imports: set[str] = set() + arg_types: list[str] = [] + + for annotation in annotations: + if annotation is None: + return None + arg_types.append(annotation.type_name) + imports.update(annotation.imports) + + return arg_types, imports + + +def _parameter_to_annotation(parameter: Any) -> _AnnotationData | None: + # if a ForwardRef could not be resolved + if isinstance(parameter, str): + return None + + if isinstance(parameter, ForwardRef): + if sys.version_info[:2] < (3, 14): + forwarded_value = parameter.__forward_value__ + if forwarded_value is None: + return None + else: + # ForwardRef.__forward_value__ was removed in 3.14 in favor of + # ForwardRef.evaluate(). See also PEP 649, PEP 749, and + # typing.evaluate_forward_ref. + # + # .evaluate() with Format.VALUE (the default) throws if the name + # could not be resolved. + # https://docs.python.org/3.14/library/annotationlib.html#annotationlib.ForwardRef.evaluate + try: + forwarded_value = parameter.evaluate() + except Exception: + return None + return _parameter_to_annotation(forwarded_value) + + # the arguments of Callable are in a list + if isinstance(parameter, list): + joined = _join_argument_annotations( + _parameter_to_annotation(param) for param in parameter + ) + if joined is None: + return None + arg_type_names, new_imports = joined + return _AnnotationData("[{}]".format(", ".join(arg_type_names)), new_imports) + + if isinstance(parameter, type): + if parameter.__module__ == "builtins": + return _AnnotationData( + "None" if parameter.__name__ == "NoneType" else parameter.__name__, + set(), + ) + + type_name = _get_qualname(parameter, include_module=True) + + # the types.UnionType does not support type arguments and needs to be translated + if type_name == "types.UnionType": + return _AnnotationData("typing.Union", {"typing"}) + else: + if hasattr(parameter, "__module__") and hasattr(parameter, "__name__"): + type_name = _get_qualname(parameter, include_module=True) + else: + type_name = str(parameter) + + if type_name.startswith("hypothesis.strategies."): + return _AnnotationData(type_name.replace("hypothesis.strategies", "st"), set()) + + origin_type = get_origin(parameter) + + # if not generic or no generic arguments + if origin_type is None or origin_type == parameter: + return _AnnotationData(type_name, set(type_name.rsplit(".", maxsplit=1)[:-1])) + + arg_types = get_args(parameter) + if {type(a) for a in arg_types} == {TypeVar}: + arg_types = () + + # typing types get translated to classes that don't support generics + origin_annotation: _AnnotationData | None + if type_name.startswith("typing."): + try: + new_type_name = type_name[: type_name.index("[")] + except ValueError: + new_type_name = type_name + origin_annotation = _AnnotationData(new_type_name, {"typing"}) + else: + origin_annotation = _parameter_to_annotation(origin_type) + + if arg_types: + return _join_generics( + origin_annotation, + (_parameter_to_annotation(arg_type) for arg_type in arg_types), + ) + return origin_annotation + + +def _are_annotations_used(*functions: Callable) -> bool: + for function in functions: + try: + params = get_signature(function).parameters.values() + except Exception: + pass + else: + if any(param.annotation != inspect.Parameter.empty for param in params): + return True + return False + + +def _make_test(imports: ImportSet, body: str) -> str: + # Discarding "builtins." and "__main__" probably isn't particularly useful + # for user code, but important for making a good impression in demos. + body = body.replace("builtins.", "").replace("__main__.", "") + imports |= {("hypothesis", "given"), ("hypothesis", "strategies as st")} + if " reject()\n" in body: + imports.add(("hypothesis", "reject")) + + do_not_import = {"builtins", "__main__", "hypothesis.strategies"} + direct = {f"import {i}" for i in imports - do_not_import if isinstance(i, str)} + from_imports = defaultdict(set) + for module, name in {i for i in imports if isinstance(i, tuple)}: + if not (module.startswith("hypothesis.strategies") and name in st.__all__): + from_imports[module].add(name) + from_ = { + "from {} import {}".format(module, ", ".join(sorted(names))) + for module, names in from_imports.items() + if isinstance(module, str) and module not in do_not_import + } + header = IMPORT_SECTION.format(imports="\n".join(sorted(direct) + sorted(from_))) + nothings = body.count("st.nothing()") + if nothings == 1: + header += "# TODO: replace st.nothing() with an appropriate strategy\n\n" + elif nothings >= 1: + header += "# TODO: replace st.nothing() with appropriate strategies\n\n" + return black.format_str(header + body, mode=black.Mode()) + + +def _is_probably_ufunc(obj): + # See https://numpy.org/doc/stable/reference/ufuncs.html - there doesn't seem + # to be an upstream function to detect this, so we just guess. + has_attributes = [ + "nin", + "nout", + "nargs", + "ntypes", + "types", + "identity", + "signature", + ] + return callable(obj) and all(hasattr(obj, name) for name in has_attributes) + + +# If we have a pair of functions where one name matches the regex and the second +# is the result of formatting the template with matched groups, our magic() +# ghostwriter will write a roundtrip test for them. Additional patterns welcome. +ROUNDTRIP_PAIRS = ( + # Defined prefix, shared postfix. The easy cases. + (r"write(.+)", "read{}"), + (r"save(.+)", "load{}"), + (r"dump(.+)", "load{}"), + (r"to(.+)", "from{}"), + # Known stem, maybe matching prefixes, maybe matching postfixes. + (r"(.*)en(.+)", "{}de{}"), + # Shared postfix, prefix only on "inverse" function + (r"(.+)", "de{}"), + (r"(?!safe)(.+)", "un{}"), # safe_load / unsafe_load isn't a roundtrip + # a2b_postfix and b2a_postfix. Not a fan of this pattern, but it's pretty + # common in code imitating an C API - see e.g. the stdlib binascii module. + (r"(.+)2(.+?)(_.+)?", "{1}2{0}{2}"), + # Common in e.g. the colorsys module + (r"(.+)_to_(.+)", "{1}_to_{0}"), + # Sockets patterns + (r"(inet|if)_(.+)to(.+)", "{0}_{2}to{1}"), + (r"(\w)to(\w)(.+)", "{1}to{0}{2}"), + (r"send(.+)", "recv{}"), + (r"send(.+)", "receive{}"), +) + + +def _get_testable_functions(thing: object) -> dict[str, Callable]: + by_name = {} + if callable(thing): + funcs: list[Any | None] = [thing] + elif isinstance(thing, types.ModuleType): + if hasattr(thing, "__all__"): + funcs = [getattr(thing, name, None) for name in thing.__all__] + elif hasattr(thing, "__package__"): + pkg = thing.__package__ + funcs = [ + v + for k, v in vars(thing).items() + if callable(v) + and not is_mock(v) + and ((not pkg) or getattr(v, "__module__", pkg).startswith(pkg)) + and not k.startswith("_") + ] + if pkg and any(getattr(f, "__module__", pkg) == pkg for f in funcs): + funcs = [f for f in funcs if getattr(f, "__module__", pkg) == pkg] + else: + raise InvalidArgument(f"Can't test non-module non-callable {thing!r}") + + for f in list(funcs): + if inspect.isclass(f): + funcs += [ + v.__get__(f) + for k, v in vars(f).items() + if hasattr(v, "__func__") and not is_mock(v) and not k.startswith("_") + ] + for f in funcs: + try: + if ( + (not is_mock(f)) + and callable(f) + and _get_params(f) + and not isinstance(f, enum.EnumMeta) + ): + if getattr(thing, "__name__", None): + if inspect.isclass(thing): + KNOWN_FUNCTION_LOCATIONS[f] = _get_module_helper(thing) + elif isinstance(thing, types.ModuleType): + KNOWN_FUNCTION_LOCATIONS[f] = thing.__name__ + try: + _get_params(f) + by_name[_get_qualname(f, include_module=True)] = f + except Exception: + # usually inspect.signature on C code such as socket.inet_aton, + # or Pandas 'CallableDynamicDoc' object has no attr. '__name__' + pass + except (TypeError, ValueError): + pass + return by_name + + +def magic( + *modules_or_functions: Callable | types.ModuleType, + except_: Except = (), + style: str = "pytest", + annotate: bool | None = None, +) -> str: + """Guess which ghostwriters to use, for a module or collection of functions. + + As for all ghostwriters, the ``except_`` argument should be an + :class:`python:Exception` or tuple of exceptions, and ``style`` may be either + ``"pytest"`` to write test functions or ``"unittest"`` to write test methods + and :class:`~python:unittest.TestCase`. + + After finding the public functions attached to any modules, the ``magic`` + ghostwriter looks for pairs of functions to pass to :func:`~roundtrip`, + then checks for :func:`~binary_operation` and :func:`~ufunc` functions, + and any others are passed to :func:`~fuzz`. + + For example, try :command:`hypothesis write gzip` on the command line! + """ + except_ = _check_except(except_) + _check_style(style) + if not modules_or_functions: + raise InvalidArgument("Must pass at least one function or module to test.") + + parts = [] + by_name = {} + imports = set() + + for thing in modules_or_functions: + by_name.update(found := _get_testable_functions(thing)) + if (not found) and isinstance(thing, types.ModuleType): + msg = f"# Found no testable functions in {thing.__name__} (from {thing.__file__!r})" + mods: list = [] + for k in sorted(sys.modules, key=len): + if ( + k.startswith(f"{thing.__name__}.") + and "._" not in k.removeprefix(thing.__name__) + and not k.startswith(tuple(f"{m}." for m in mods)) + and _get_testable_functions(sys.modules[k]) + ): + mods.append(k) + if mods: + msg += ( + f"\n# Try writing tests for submodules, e.g. by using:\n" + f"# hypothesis write {' '.join(sorted(mods))}" + ) + parts.append(msg) + + if not by_name: + return "\n\n".join(parts) + + if annotate is None: + annotate = _are_annotations_used(*by_name.values()) + + def make_(how, *args, **kwargs): + imp, body = how(*args, **kwargs, except_=except_, style=style) + imports.update(imp) + parts.append(body) + + # Look for pairs of functions that roundtrip, based on known naming patterns. + for writename, readname in ROUNDTRIP_PAIRS: + for name in sorted(by_name): + match = re.fullmatch(writename, name.split(".")[-1]) + if match: + inverse_name = readname.format(*match.groups()) + for other in sorted( + n for n in by_name if n.split(".")[-1] == inverse_name + ): + make_( + _make_roundtrip_body, + (by_name.pop(name), by_name.pop(other)), + annotate=annotate, + ) + break + else: + try: + other_func = getattr( + sys.modules[_get_module(by_name[name])], + inverse_name, + ) + _get_params(other_func) # we want to skip if this fails + except Exception: + pass + else: + make_( + _make_roundtrip_body, + (by_name.pop(name), other_func), + annotate=annotate, + ) + + # Look for equivalent functions: same name, all required arguments of any can + # be found in all signatures, and if all have return-type annotations they match. + names = defaultdict(list) + for _, f in sorted(by_name.items()): + names[_get_qualname(f)].append(f) + for group in names.values(): + if len(group) >= 2 and len({frozenset(_get_params(f)) for f in group}) == 1: + sentinel = object() + returns = {get_type_hints(f).get("return", sentinel) for f in group} + if len(returns - {sentinel}) <= 1: + make_(_make_equiv_body, group, annotate=annotate) + for f in group: + by_name.pop(_get_qualname(f, include_module=True)) + + # Look for binary operators - functions with two identically-typed arguments, + # and the same return type. The latter restriction might be lifted later. + for name, func in sorted(by_name.items()): + hints = get_type_hints(func) + hints.pop("return", None) + params = _get_params(func) + if (len(hints) == len(params) == 2) or ( + _get_module(func) == "operator" + and "item" not in func.__name__ + and tuple(params) in [("a", "b"), ("x", "y")] + ): + a, b = hints.values() or [Any, Any] + arg1, arg2 = params + if a == b and len(arg1) == len(arg2) <= 3: + # https://en.wikipedia.org/wiki/Distributive_property#Other_examples + known = { + "mul": "add", + "matmul": "add", + "or_": "and_", + "and_": "or_", + }.get(func.__name__, "") + distributes_over = getattr(sys.modules[_get_module(func)], known, None) + make_( + _make_binop_body, + func, + commutative=func.__name__ != "matmul", + distributes_over=distributes_over, + annotate=annotate, + ) + del by_name[name] + + # Look for Numpy ufuncs or gufuncs, and write array-oriented tests for them. + if "numpy" in sys.modules: + for name, func in sorted(by_name.items()): + if _is_probably_ufunc(func): + make_(_make_ufunc_body, func, annotate=annotate) + del by_name[name] + + # For all remaining callables, just write a fuzz-test. In principle we could + # guess at equivalence or idempotence; but it doesn't seem accurate enough to + # be worth the trouble when it's so easy for the user to specify themselves. + for _, f in sorted(by_name.items()): + make_( + _make_test_body, + f, + test_body=_write_call(f, except_=except_), + ghost="fuzz", + annotate=annotate, + ) + return _make_test(imports, "\n".join(parts)) + + +def fuzz( + func: Callable, + *, + except_: Except = (), + style: str = "pytest", + annotate: bool | None = None, +) -> str: + """Write source code for a property-based test of ``func``. + + The resulting test checks that valid input only leads to expected exceptions. + For example: + + .. code-block:: python + + from re import compile, error + + from hypothesis.extra import ghostwriter + + ghostwriter.fuzz(compile, except_=error) + + Gives: + + .. code-block:: python + + # This test code was written by the `hypothesis.extra.ghostwriter` module + # and is provided under the Creative Commons Zero public domain dedication. + import re + + from hypothesis import given, reject, strategies as st + + # TODO: replace st.nothing() with an appropriate strategy + + + @given(pattern=st.nothing(), flags=st.just(0)) + def test_fuzz_compile(pattern, flags): + try: + re.compile(pattern=pattern, flags=flags) + except re.error: + reject() + + Note that it includes all the required imports. + Because the ``pattern`` parameter doesn't have annotations or a default argument, + you'll need to specify a strategy - for example :func:`~hypothesis.strategies.text` + or :func:`~hypothesis.strategies.binary`. After that, you have a test! + """ + if not callable(func): + raise InvalidArgument(f"Got non-callable {func=}") + except_ = _check_except(except_) + _check_style(style) + + if annotate is None: + annotate = _are_annotations_used(func) + + imports, body = _make_test_body( + func, + test_body=_write_call(func, except_=except_), + except_=except_, + ghost="fuzz", + style=style, + annotate=annotate, + ) + return _make_test(imports, body) + + +def idempotent( + func: Callable, + *, + except_: Except = (), + style: str = "pytest", + annotate: bool | None = None, +) -> str: + """Write source code for a property-based test of ``func``. + + The resulting test checks that if you call ``func`` on it's own output, + the result does not change. For example: + + .. code-block:: python + + from typing import Sequence + + from hypothesis.extra import ghostwriter + + + def timsort(seq: Sequence[int]) -> Sequence[int]: + return sorted(seq) + + + ghostwriter.idempotent(timsort) + + Gives: + + .. code-block:: python + + # This test code was written by the `hypothesis.extra.ghostwriter` module + # and is provided under the Creative Commons Zero public domain dedication. + + from hypothesis import given, strategies as st + + + @given(seq=st.one_of(st.binary(), st.binary().map(bytearray), st.lists(st.integers()))) + def test_idempotent_timsort(seq): + result = timsort(seq=seq) + repeat = timsort(seq=result) + assert result == repeat, (result, repeat) + """ + if not callable(func): + raise InvalidArgument(f"Got non-callable {func=}") + except_ = _check_except(except_) + _check_style(style) + + if annotate is None: + annotate = _are_annotations_used(func) + + imports, body = _make_test_body( + func, + test_body="result = {}\nrepeat = {}".format( + _write_call(func, except_=except_), + _write_call(func, "result", except_=except_), + ), + except_=except_, + assertions=_assert_eq(style, "result", "repeat"), + ghost="idempotent", + style=style, + annotate=annotate, + ) + return _make_test(imports, body) + + +def _make_roundtrip_body(funcs, except_, style, annotate): + first_param = next(iter(_get_params(funcs[0]))) + test_lines = [ + _write_call(funcs[0], assign="value0", except_=except_), + *( + _write_call(f, f"value{i}", assign=f"value{i + 1}", except_=except_) + for i, f in enumerate(funcs[1:]) + ), + ] + return _make_test_body( + *funcs, + test_body="\n".join(test_lines), + except_=except_, + assertions=_assert_eq(style, first_param, f"value{len(funcs) - 1}"), + ghost="roundtrip", + style=style, + annotate=annotate, + ) + + +def roundtrip( + *funcs: Callable, + except_: Except = (), + style: str = "pytest", + annotate: bool | None = None, +) -> str: + """Write source code for a property-based test of ``funcs``. + + The resulting test checks that if you call the first function, pass the result + to the second (and so on), the final result is equal to the first input argument. + + This is a *very* powerful property to test, especially when the config options + are varied along with the object to round-trip. For example, try ghostwriting + a test for :func:`python:json.dumps` - would you have thought of all that? + + .. code-block:: shell + + hypothesis write --roundtrip json.dumps json.loads + """ + if not funcs: + raise InvalidArgument("Round-trip of zero functions is meaningless.") + for i, f in enumerate(funcs): + if not callable(f): + raise InvalidArgument(f"Got non-callable funcs[{i}]={f!r}") + except_ = _check_except(except_) + _check_style(style) + + if annotate is None: + annotate = _are_annotations_used(*funcs) + + return _make_test(*_make_roundtrip_body(funcs, except_, style, annotate)) + + +def _get_varnames(funcs): + var_names = [f"result_{f.__name__}" for f in funcs] + if len(set(var_names)) < len(var_names): + var_names = [f"result_{f.__name__}_{_get_module(f)}" for f in funcs] + if len(set(var_names)) < len(var_names): + var_names = [f"result_{i}_{f.__name__}" for i, f in enumerate(funcs)] + return var_names + + +def _make_equiv_body(funcs, except_, style, annotate): + var_names = _get_varnames(funcs) + test_lines = [ + _write_call(f, assign=vname, except_=except_) + for vname, f in zip(var_names, funcs, strict=True) + ] + assertions = "\n".join( + _assert_eq(style, var_names[0], vname) for vname in var_names[1:] + ) + + return _make_test_body( + *funcs, + test_body="\n".join(test_lines), + except_=except_, + assertions=assertions, + ghost="equivalent", + style=style, + annotate=annotate, + ) + + +EQUIV_FIRST_BLOCK = """ +try: +{} + exc_type = None + target(1, label="input was valid") +{}except Exception as exc: + exc_type = type(exc) +""".strip() + +EQUIV_CHECK_BLOCK = """ +if exc_type: + with {ctx}(exc_type): +{check_raises} +else: +{call} +{compare} +""".rstrip() + + +def _make_equiv_errors_body(funcs, except_, style, annotate): + var_names = _get_varnames(funcs) + first, *rest = funcs + first_call = _write_call(first, assign=var_names[0], except_=except_) + extra_imports, suppress = _exception_string(except_) + extra_imports.add(("hypothesis", "target")) + catch = f"except {suppress}:\n reject()\n" if suppress else "" + test_lines = [EQUIV_FIRST_BLOCK.format(indent(first_call, prefix=" "), catch)] + + for vname, f in zip(var_names[1:], rest, strict=True): + if style == "pytest": + ctx = "pytest.raises" + extra_imports.add("pytest") + else: + assert style == "unittest" + ctx = "self.assertRaises" + block = EQUIV_CHECK_BLOCK.format( + ctx=ctx, + check_raises=indent(_write_call(f, except_=()), " "), + call=indent(_write_call(f, assign=vname, except_=()), " "), + compare=indent(_assert_eq(style, var_names[0], vname), " "), + ) + test_lines.append(block) + + imports, source_code = _make_test_body( + *funcs, + test_body="\n".join(test_lines), + except_=(), + ghost="equivalent", + style=style, + annotate=annotate, + ) + return imports | extra_imports, source_code + + +def equivalent( + *funcs: Callable, + allow_same_errors: bool = False, + except_: Except = (), + style: str = "pytest", + annotate: bool | None = None, +) -> str: + """Write source code for a property-based test of ``funcs``. + + The resulting test checks that calling each of the functions returns + an equal value. This can be used as a classic 'oracle', such as testing + a fast sorting algorithm against the :func:`python:sorted` builtin, or + for differential testing where none of the compared functions are fully + trusted but any difference indicates a bug (e.g. running a function on + different numbers of threads, or simply multiple times). + + The functions should have reasonably similar signatures, as only the + common parameters will be passed the same arguments - any other parameters + will be allowed to vary. + + If allow_same_errors is True, then the test will pass if calling each of + the functions returns an equal value, *or* if the first function raises an + exception and each of the others raises an exception of the same type. + This relaxed mode can be useful for code synthesis projects. + """ + if len(funcs) < 2: + raise InvalidArgument("Need at least two functions to compare.") + for i, f in enumerate(funcs): + if not callable(f): + raise InvalidArgument(f"Got non-callable funcs[{i}]={f!r}") + check_type(bool, allow_same_errors, "allow_same_errors") + except_ = _check_except(except_) + _check_style(style) + + if annotate is None: + annotate = _are_annotations_used(*funcs) + + if allow_same_errors and not any(issubclass(Exception, ex) for ex in except_): + imports, source_code = _make_equiv_errors_body(funcs, except_, style, annotate) + else: + imports, source_code = _make_equiv_body(funcs, except_, style, annotate) + return _make_test(imports, source_code) + + +X = TypeVar("X") +Y = TypeVar("Y") + + +def binary_operation( + func: Callable[[X, X], Y], + *, + associative: bool = True, + commutative: bool = True, + identity: X | EllipsisType | None = ..., + distributes_over: Callable[[X, X], X] | None = None, + except_: Except = (), + style: str = "pytest", + annotate: bool | None = None, +) -> str: + """Write property tests for the binary operation ``func``. + + While :wikipedia:`binary operations ` are not particularly + common, they have such nice properties to test that it seems a shame not to + demonstrate them with a ghostwriter. For an operator ``f``, test that: + + - if :wikipedia:`associative `, + ``f(a, f(b, c)) == f(f(a, b), c)`` + - if :wikipedia:`commutative `, ``f(a, b) == f(b, a)`` + - if :wikipedia:`identity ` is not None, ``f(a, identity) == a`` + - if :wikipedia:`distributes_over ` is ``+``, + ``f(a, b) + f(a, c) == f(a, b+c)`` + + For example: + + .. code-block:: python + + ghostwriter.binary_operation( + operator.mul, + identity=1, + distributes_over=operator.add, + style="unittest", + ) + """ + if not callable(func): + raise InvalidArgument(f"Got non-callable {func=}") + except_ = _check_except(except_) + _check_style(style) + check_type(bool, associative, "associative") + check_type(bool, commutative, "commutative") + if distributes_over is not None and not callable(distributes_over): + raise InvalidArgument( + f"{distributes_over=} must be an operation which " + f"distributes over {func.__name__}" + ) + if not any([associative, commutative, identity, distributes_over]): + raise InvalidArgument( + "You must select at least one property of the binary operation to test." + ) + + if annotate is None: + annotate = _are_annotations_used(func) + + imports, body = _make_binop_body( + func, + associative=associative, + commutative=commutative, + identity=identity, + distributes_over=distributes_over, + except_=except_, + style=style, + annotate=annotate, + ) + return _make_test(imports, body) + + +def _make_binop_body( + func: Callable[[X, X], Y], + *, + associative: bool = True, + commutative: bool = True, + identity: X | EllipsisType | None = ..., + distributes_over: Callable[[X, X], X] | None = None, + except_: tuple[type[Exception], ...], + style: str, + annotate: bool, +) -> tuple[ImportSet, str]: + strategies = _get_strategies(func) + operands, b = (strategies.pop(p) for p in list(_get_params(func))[:2]) + if repr(operands) != repr(b): + operands |= b + operands_name = func.__name__ + "_operands" + + all_imports = set() + parts = [] + + def maker( + sub_property: str, + args: str, + body: str, + right: str | None = None, + ) -> None: + if right is None: + assertions = "" + else: + body = f"{body}\n{right}" + assertions = _assert_eq(style, "left", "right") + imports, body = _make_test_body( + func, + test_body=body, + ghost=sub_property + "_binary_operation", + except_=except_, + assertions=assertions, + style=style, + given_strategies={**strategies, **{n: operands_name for n in args}}, + annotate=annotate, + ) + all_imports.update(imports) + if style == "unittest": + endline = "(unittest.TestCase):\n" + body = body[body.index(endline) + len(endline) + 1 :] + parts.append(body) + + if associative: + maker( + "associative", + "abc", + _write_call(func, "a", _write_call(func, "b", "c"), assign="left"), + _write_call( + func, + _write_call(func, "a", "b"), + "c", + assign="right", + ), + ) + if commutative: + maker( + "commutative", + "ab", + _write_call(func, "a", "b", assign="left"), + _write_call(func, "b", "a", assign="right"), + ) + if identity is not None: + # Guess that the identity element is the minimal example from our operands + # strategy. This is correct often enough to be worthwhile, and close enough + # that it's a good starting point to edit much of the rest. + if identity is ...: + try: + identity = find(operands, lambda x: True, settings=_quietly_settings) + except Exception: + identity = "identity element here" # type: ignore + # If the repr of this element is invalid Python, stringify it - this + # can't be executed as-is, but at least makes it clear what should + # happen. E.g. type(None) -> -> quoted. + try: + # We don't actually execute this code object; we're just compiling + # to check that the repr is syntactically valid. HOWEVER, we're + # going to output that code string into test code which will be + # executed; so you still shouldn't ghostwrite for hostile code. + compile(repr(identity), "", "exec") + except SyntaxError: + identity = repr(identity) # type: ignore + identity_parts = [ + f"{identity = }", + _assert_eq( + style, + "a", + _write_call(func, "a", "identity"), + ), + _assert_eq( + style, + "a", + _write_call(func, "identity", "a"), + ), + ] + maker("identity", "a", "\n".join(identity_parts)) + if distributes_over: + do = distributes_over + dist_parts = [ + _write_call(func, "a", _write_call(do, "b", "c"), assign="left"), + _write_call( + do, + _write_call(func, "a", "b"), + _write_call(func, "a", "c"), + assign="ldist", + ), + _assert_eq(style, "ldist", "left"), + "\n", + _write_call(func, _write_call(do, "a", "b"), "c", assign="right"), + _write_call( + do, + _write_call(func, "a", "c"), + _write_call(func, "b", "c"), + assign="rdist", + ), + _assert_eq(style, "rdist", "right"), + ] + maker(do.__name__ + "_distributes_over", "abc", "\n".join(dist_parts)) + + operands_imports, operands_repr = _valid_syntax_repr(operands) + all_imports.update(operands_imports) + operands_repr = _st_strategy_names(operands_repr) + classdef = "" + if style == "unittest": + classdef = f"class TestBinaryOperation{func.__name__}(unittest.TestCase):\n " + return ( + all_imports, + classdef + f"{operands_name} = {operands_repr}\n" + "\n".join(parts), + ) + + +def ufunc( + func: Callable, + *, + except_: Except = (), + style: str = "pytest", + annotate: bool | None = None, +) -> str: + """Write a property-based test for the :doc:`array ufunc ` ``func``. + + The resulting test checks that your ufunc or :doc:`gufunc + ` has the expected broadcasting and dtype casting + behaviour. You will probably want to add extra assertions, but as with the other + ghostwriters this gives you a great place to start. + + .. code-block:: shell + + hypothesis write numpy.matmul + """ + if not _is_probably_ufunc(func): + raise InvalidArgument(f"{func=} does not seem to be a ufunc") + except_ = _check_except(except_) + _check_style(style) + + if annotate is None: + annotate = _are_annotations_used(func) + + return _make_test( + *_make_ufunc_body(func, except_=except_, style=style, annotate=annotate) + ) + + +def _make_ufunc_body(func, *, except_, style, annotate): + import hypothesis.extra.numpy as npst + + if func.signature is None: + shapes = npst.mutually_broadcastable_shapes(num_shapes=func.nin) + else: + shapes = npst.mutually_broadcastable_shapes(signature=func.signature) + shapes.function.__module__ = npst.__name__ + + body = """ + input_shapes, expected_shape = shapes + input_dtypes, expected_dtype = types.split("->") + array_strats = [ + arrays(dtype=dtp, shape=shp, elements={{"allow_nan": True}}) + for dtp, shp in zip(input_dtypes, input_shapes) + ] + + {array_names} = data.draw(st.tuples(*array_strats)) + result = {call} + """.format( + array_names=", ".join(ascii_lowercase[: func.nin]), + call=_write_call(func, *ascii_lowercase[: func.nin], except_=except_), + ) + assertions = "\n{shape_assert}\n{type_assert}".format( + shape_assert=_assert_eq(style, "result.shape", "expected_shape"), + type_assert=_assert_eq(style, "result.dtype.char", "expected_dtype"), + ) + + qname = _get_qualname(func, include_module=True) + obj_sigs = ["O" in sig for sig in func.types] + if all(obj_sigs) or not any(obj_sigs): + types = f"sampled_from({qname}.types)" + else: + types = f"sampled_from([sig for sig in {qname}.types if 'O' not in sig])" + + return _make_test_body( + func, + test_body=dedent(body).strip(), + except_=except_, + assertions=assertions, + ghost="ufunc" if func.signature is None else "gufunc", + style=style, + given_strategies={"data": st.data(), "shapes": shapes, "types": types}, + imports={("hypothesis.extra.numpy", "arrays")}, + annotate=annotate, + ) diff --git a/vendored/hypothesis/extra/lark.py b/vendored/hypothesis/extra/lark.py new file mode 100644 index 0000000..ad554e2 --- /dev/null +++ b/vendored/hypothesis/extra/lark.py @@ -0,0 +1,247 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +This extra can be used to generate strings matching any context-free grammar, +using the `Lark parser library `_. + +It currently only supports Lark's native EBNF syntax, but we plan to extend +this to support other common syntaxes such as ANTLR and :rfc:`5234` ABNF. +Lark already `supports loading grammars +`_ +from `nearley.js `_, so you may not have to write +your own at all. +""" + +from inspect import signature + +import lark +from lark.grammar import NonTerminal, Rule, Symbol, Terminal +from lark.lark import Lark +from lark.lexer import TerminalDef + +from hypothesis import strategies as st +from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.utils import calc_label_from_name +from hypothesis.internal.validation import check_type +from hypothesis.strategies._internal.regex import IncompatibleWithAlphabet +from hypothesis.strategies._internal.utils import cacheable, defines_strategy + +__all__ = ["from_lark"] + + +def get_terminal_names( + terminals: list[TerminalDef], rules: list[Rule], ignore_names: list[str] +) -> set[str]: + """Get names of all terminals in the grammar. + + The arguments are the results of calling ``Lark.grammar.compile()``, + so you would think that the ``terminals`` and ``ignore_names`` would + have it all... but they omit terminals created with ``@declare``, + which appear only in the expansion(s) of nonterminals. + """ + names = {t.name for t in terminals} | set(ignore_names) + for rule in rules: + names |= {t.name for t in rule.expansion if isinstance(t, Terminal)} + return names + + +class LarkStrategy(st.SearchStrategy): + """Low-level strategy implementation wrapping a Lark grammar. + + See ``from_lark`` for details. + """ + + def __init__( + self, + grammar: Lark, + start: str | None, + explicit: dict[str, st.SearchStrategy[str]], + alphabet: st.SearchStrategy[str], + ) -> None: + super().__init__() + assert isinstance(grammar, lark.lark.Lark) + start: list[str] = grammar.options.start if start is None else [start] + + # This is a total hack, but working around the changes is a nicer user + # experience than breaking for anyone who doesn't instantly update their + # installation of Lark alongside Hypothesis. + compile_args = signature(grammar.grammar.compile).parameters + if "terminals_to_keep" in compile_args: + terminals, rules, ignore_names = grammar.grammar.compile(start, ()) + elif "start" in compile_args: # pragma: no cover + # Support lark <= 0.10.0, without the terminals_to_keep argument. + terminals, rules, ignore_names = grammar.grammar.compile(start) # type: ignore + else: # pragma: no cover + # This branch is to support lark <= 0.7.1, without the start argument. + terminals, rules, ignore_names = grammar.grammar.compile() # type: ignore + + self.names_to_symbols: dict[str, Symbol] = {} + + for r in rules: + self.names_to_symbols[r.origin.name] = r.origin + + disallowed = set() + self.terminal_strategies: dict[str, st.SearchStrategy[str]] = {} + for t in terminals: + self.names_to_symbols[t.name] = Terminal(t.name) + s = st.from_regex(t.pattern.to_regexp(), fullmatch=True, alphabet=alphabet) + try: + s.validate() + except IncompatibleWithAlphabet: + disallowed.add(t.name) + else: + self.terminal_strategies[t.name] = s + + self.ignored_symbols = tuple(self.names_to_symbols[n] for n in ignore_names) + + all_terminals = get_terminal_names(terminals, rules, ignore_names) + if unknown_explicit := sorted(set(explicit) - all_terminals): + raise InvalidArgument( + "The following arguments were passed as explicit_strategies, but " + f"there is no {unknown_explicit} terminal production in this grammar." + ) + if missing_declared := sorted( + all_terminals - {t.name for t in terminals} - set(explicit) + ): + raise InvalidArgument( + f"Undefined terminal{'s' * (len(missing_declared) > 1)} " + f"{sorted(missing_declared)!r}. Generation does not currently " + "support use of %declare unless you pass `explicit`, a dict of " + f"names-to-strategies, such as `{{{missing_declared[0]!r}: " + 'st.just("")}}`' + ) + self.terminal_strategies.update(explicit) + + # can in fact contain any symbol, despite its name. + nonterminals: dict[str, list[tuple[Symbol, ...]]] = {} + + for rule in rules: + if disallowed.isdisjoint(r.name for r in rule.expansion): + nonterminals.setdefault(rule.origin.name, []).append( + tuple(rule.expansion) + ) + + allowed_rules = {*self.terminal_strategies, *nonterminals} + while dict(nonterminals) != ( + nonterminals := { + k: clean + for k, v in nonterminals.items() + if (clean := [x for x in v if all(r.name in allowed_rules for r in x)]) + } + ): + allowed_rules = {*self.terminal_strategies, *nonterminals} + + if set(start).isdisjoint(allowed_rules): + raise InvalidArgument( + f"No start rule {tuple(start)} is allowed by {alphabet=}" + ) + self.start = st.sampled_from( + [self.names_to_symbols[s] for s in start if s in allowed_rules] + ) + + self.nonterminal_strategies = { + k: st.sampled_from(sorted(v, key=len)) for k, v in nonterminals.items() + } + + self.__rule_labels: dict[str, int] = {} + + def do_draw(self, data: ConjectureData) -> str: + state: list[str] = [] + start = data.draw(self.start) + self.draw_symbol(data, start, state) + return "".join(state) + + def rule_label(self, name: str) -> int: + try: + return self.__rule_labels[name] + except KeyError: + return self.__rule_labels.setdefault( + name, calc_label_from_name(f"LARK:{name}") + ) + + def draw_symbol( + self, + data: ConjectureData, + symbol: Symbol, + draw_state: list[str], + ) -> None: + if isinstance(symbol, Terminal): + strategy = self.terminal_strategies[symbol.name] + draw_state.append(data.draw(strategy)) + else: + assert isinstance(symbol, NonTerminal) + data.start_span(self.rule_label(symbol.name)) + expansion = data.draw(self.nonterminal_strategies[symbol.name]) + for e in expansion: + self.draw_symbol(data, e, draw_state) + self.gen_ignore(data, draw_state) + data.stop_span() + + def gen_ignore(self, data: ConjectureData, draw_state: list[str]) -> None: + if self.ignored_symbols and data.draw_boolean(1 / 4): + emit = data.draw(st.sampled_from(self.ignored_symbols)) + self.draw_symbol(data, emit, draw_state) + + def calc_has_reusable_values(self, recur): + return True + + +def check_explicit(name): + def inner(value): + check_type(str, value, "value drawn from " + name) + return value + + return inner + + +@cacheable +@defines_strategy(force_reusable_values=True) +def from_lark( + grammar: lark.lark.Lark, + *, + start: str | None = None, + explicit: dict[str, st.SearchStrategy[str]] | None = None, + alphabet: st.SearchStrategy[str] = st.characters(codec="utf-8"), +) -> st.SearchStrategy[str]: + """A strategy for strings accepted by the given context-free grammar. + + ``grammar`` must be a ``Lark`` object, which wraps an EBNF specification. + The Lark EBNF grammar reference can be found + `here `_. + + ``from_lark`` will automatically generate strings matching the + nonterminal ``start`` symbol in the grammar, which was supplied as an + argument to the Lark class. To generate strings matching a different + symbol, including terminals, you can override this by passing the + ``start`` argument to ``from_lark``. Note that Lark may remove unreachable + productions when the grammar is compiled, so you should probably pass the + same value for ``start`` to both. + + Currently ``from_lark`` does not support grammars that need custom lexing. + Any lexers will be ignored, and any undefined terminals from the use of + ``%declare`` will result in generation errors. To define strategies for + such terminals, pass a dictionary mapping their name to a corresponding + strategy as the ``explicit`` argument. + + The :pypi:`hypothesmith` project includes a strategy for Python source, + based on a grammar and careful post-processing. + """ + check_type(lark.lark.Lark, grammar, "grammar") + if explicit is None: + explicit = {} + else: + check_type(dict, explicit, "explicit") + explicit = { + k: v.map(check_explicit(f"explicit[{k!r}]={v!r}")) + for k, v in explicit.items() + } + return LarkStrategy(grammar, start, explicit, alphabet) diff --git a/vendored/hypothesis/extra/numpy.py b/vendored/hypothesis/extra/numpy.py new file mode 100644 index 0000000..64f381b --- /dev/null +++ b/vendored/hypothesis/extra/numpy.py @@ -0,0 +1,1441 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import importlib +import math +import types +from collections.abc import Mapping, Sequence +from typing import ( + TYPE_CHECKING, + Any, + Literal, + TypeVar, + Union, + cast, + get_args, + get_origin, + overload, +) + +import numpy as np + +from hypothesis import strategies as st +from hypothesis._settings import note_deprecation +from hypothesis.errors import HypothesisException, InvalidArgument +from hypothesis.extra._array_helpers import ( + _BIE, + NDIM_MAX, + BasicIndex, + BasicIndexStrategy, + BroadcastableShapes, + Shape, + _BIENoEllipsis, + _BIENoEllipsisNoNewaxis, + _BIENoNewaxis, + array_shapes, + broadcastable_shapes, + check_argument, + check_valid_dims, + mutually_broadcastable_shapes as _mutually_broadcastable_shapes, + order_check, + valid_tuple_axes as _valid_tuple_axes, +) +from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.coverage import check_function +from hypothesis.internal.reflection import proxies +from hypothesis.internal.validation import check_type +from hypothesis.strategies._internal.lazy import unwrap_strategies +from hypothesis.strategies._internal.numbers import Real +from hypothesis.strategies._internal.strategies import ( + Ex, + MappedStrategy, + T, + check_strategy, +) +from hypothesis.strategies._internal.utils import defines_strategy + + +def _try_import(mod_name: str, attr_name: str) -> Any: + assert "." not in attr_name + try: + mod = importlib.import_module(mod_name) + return getattr(mod, attr_name, None) + except ImportError: + return None + + +if TYPE_CHECKING: + from numpy.typing import DTypeLike, NDArray +else: + NDArray = _try_import("numpy.typing", "NDArray") + +ArrayLike = _try_import("numpy.typing", "ArrayLike") +_NestedSequence = _try_import("numpy._typing._nested_sequence", "_NestedSequence") +_SupportsArray = _try_import("numpy._typing._array_like", "_SupportsArray") + +__all__ = [ + "BroadcastableShapes", + "array_dtypes", + "array_shapes", + "arrays", + "basic_indices", + "boolean_dtypes", + "broadcastable_shapes", + "byte_string_dtypes", + "complex_number_dtypes", + "datetime64_dtypes", + "floating_dtypes", + "from_dtype", + "integer_array_indices", + "integer_dtypes", + "mutually_broadcastable_shapes", + "nested_dtypes", + "scalar_dtypes", + "timedelta64_dtypes", + "unicode_string_dtypes", + "unsigned_integer_dtypes", + "valid_tuple_axes", +] + +TIME_RESOLUTIONS = ("Y", "M", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as") + +# See https://github.com/HypothesisWorks/hypothesis/pull/3394 and linked discussion. +NP_FIXED_UNICODE = tuple(int(x) for x in np.__version__.split(".")[:2]) >= (1, 19) + + +@defines_strategy(force_reusable_values=True) +def from_dtype( + dtype: np.dtype, + *, + alphabet: st.SearchStrategy[str] | None = None, + min_size: int = 0, + max_size: int | None = None, + min_value: int | float | None = None, + max_value: int | float | None = None, + allow_nan: bool | None = None, + allow_infinity: bool | None = None, + allow_subnormal: bool | None = None, + exclude_min: bool | None = None, + exclude_max: bool | None = None, + min_magnitude: Real = 0, + max_magnitude: Real | None = None, +) -> st.SearchStrategy[Any]: + """Creates a strategy which can generate any value of the given dtype. + + Compatible parameters are passed to the inferred strategy function while + inapplicable ones are ignored. + This allows you, for example, to customise the min and max values, + control the length or contents of strings, or exclude non-finite + numbers. This is particularly useful when kwargs are passed through from + :func:`arrays` which allow a variety of numeric dtypes, as it seamlessly + handles the ``width`` or representable bounds for you. + """ + check_type(np.dtype, dtype, "dtype") + kwargs = {k: v for k, v in locals().items() if k != "dtype" and v is not None} + + # Compound datatypes, eg 'f4,f4,f4' + if dtype.names is not None and dtype.fields is not None: + # mapping np.void.type over a strategy is nonsense, so return now. + subs = [from_dtype(dtype.fields[name][0], **kwargs) for name in dtype.names] + return st.tuples(*subs) + + # Subarray datatypes, eg '(2, 3)i4' + if dtype.subdtype is not None: + subtype, shape = dtype.subdtype + return arrays(subtype, shape, elements=kwargs) + + def compat_kw(*args, **kw): + """Update default args to the strategy with user-supplied keyword args.""" + assert {"min_value", "max_value", "max_size"}.issuperset(kw) + for key in set(kwargs).intersection(kw): + msg = f"dtype {dtype!r} requires {key}={kwargs[key]!r} to be %s {kw[key]!r}" + if kw[key] is not None: + if key.startswith("min_") and kw[key] > kwargs[key]: + raise InvalidArgument(msg % ("at least",)) + elif key.startswith("max_") and kw[key] < kwargs[key]: + raise InvalidArgument(msg % ("at most",)) + kw.update({k: v for k, v in kwargs.items() if k in args or k in kw}) + return kw + + # Scalar datatypes + if dtype.kind == "b": + result: st.SearchStrategy[Any] = st.booleans() + elif dtype.kind == "f": + result = st.floats( + width=cast(Literal[16, 32, 64], min(8 * dtype.itemsize, 64)), + **compat_kw( + "min_value", + "max_value", + "allow_nan", + "allow_infinity", + "allow_subnormal", + "exclude_min", + "exclude_max", + ), + ) + elif dtype.kind == "c": + result = st.complex_numbers( + width=cast( + Literal[32, 64, 128], min(8 * dtype.itemsize, 128) + ), # convert from bytes to bits + **compat_kw( + "min_magnitude", + "max_magnitude", + "allow_nan", + "allow_infinity", + "allow_subnormal", + ), + ) + elif dtype.kind in ("S", "a"): + # Numpy strings are null-terminated; only allow round-trippable values. + # `itemsize == 0` means 'fixed length determined at array creation' + max_size = dtype.itemsize or None + result = st.binary(**compat_kw("min_size", max_size=max_size)).filter( + lambda b: b[-1:] != b"\0" + ) + elif dtype.kind == "u": + kw = compat_kw(min_value=0, max_value=2 ** (8 * dtype.itemsize) - 1) + result = st.integers(**kw) + elif dtype.kind == "i": + overflow = 2 ** (8 * dtype.itemsize - 1) + result = st.integers(**compat_kw(min_value=-overflow, max_value=overflow - 1)) + elif dtype.kind == "U": + # Encoded in UTF-32 (four bytes/codepoint) and null-terminated + max_size = (dtype.itemsize or 0) // 4 or None + if NP_FIXED_UNICODE and "alphabet" not in kwargs: + kwargs["alphabet"] = st.characters() + result = st.text(**compat_kw("alphabet", "min_size", max_size=max_size)).filter( + lambda b: b[-1:] != "\0" + ) + elif dtype.kind in ("m", "M"): + if "[" in dtype.str: + res = st.just(dtype.str.split("[")[-1][:-1]) + else: + # Note that this case isn't valid to pass to arrays(), but we support + # it here because we'd have to guard against equivalents in arrays() + # regardless and drawing scalars is a valid use-case. + res = st.sampled_from(TIME_RESOLUTIONS) + if allow_nan is not False: + elems = st.integers(-(2**63), 2**63 - 1) | st.just("NaT") + else: # NEP-7 defines the NaT value as integer -(2**63) + elems = st.integers(-(2**63) + 1, 2**63 - 1) + result = st.builds(dtype.type, elems, res) + elif dtype.kind == "O": + return st.from_type(object) + else: + raise InvalidArgument(f"No strategy inference for {dtype}") + return result.map(dtype.type) + + +class ArrayStrategy(st.SearchStrategy): + def __init__(self, element_strategy, shape, dtype, fill, unique): + super().__init__() + self.shape = tuple(shape) + self.fill = fill + self.array_size = int(np.prod(shape)) + self.dtype = dtype + self.element_strategy = element_strategy + self.unique = unique + self._check_elements = dtype.kind not in ("O", "V") + + def __repr__(self): + return ( + f"ArrayStrategy({self.element_strategy!r}, shape={self.shape}, " + f"dtype={self.dtype!r}, fill={self.fill!r}, unique={self.unique!r})" + ) + + def set_element(self, val, result, idx, *, fill=False): + # `val` is either an arbitrary object (for dtype="O"), or otherwise an + # instance of a numpy dtype. This means we can *usually* expect e.g. + # val.dtype to be present, but can only guarantee it if + # `self.dtype != "O"`. + + try: + result[idx] = val + except TypeError as err: + raise InvalidArgument( + f"Could not add element={val!r} of " + f"{getattr(val, 'dtype', type(val))} to array of " + f"{result.dtype!r} - possible mismatch of time units in dtypes?" + ) from err + + try: + elem_changed = self._check_elements and val != result[idx] and val == val + except Exception as err: # pragma: no cover + # This branch only exists to help debug weird behaviour in Numpy, + # such as the string problems we had a while back. + raise HypothesisException( + f"Internal error when checking element={val!r} of " + f"{getattr(val, 'dtype', type(val))!r} to array of " + f"{result.dtype!r}" + ) from err + + if elem_changed: + strategy = self.fill if fill else self.element_strategy + if self.dtype.kind == "f": # pragma: no cover + # This logic doesn't trigger in our coverage tests under Numpy 1.24+, + # with built-in checks for overflow, but we keep it for good error + # messages and compatibility with older versions of Numpy. + try: + is_subnormal = 0 < abs(val) < np.finfo(self.dtype).tiny + except Exception: + # val may be a non-float that does not support the + # operations __lt__ and __abs__ + is_subnormal = False + if is_subnormal: + raise InvalidArgument( + f"Generated subnormal float {val} from strategy " + f"{strategy} resulted in {result[idx]!r}, probably " + "as a result of NumPy being built with flush-to-zero " + "compiler options. Consider passing " + "allow_subnormal=False." + ) + raise InvalidArgument( + f"Generated array element {val!r} from {strategy!r} cannot be " + f"represented as dtype {self.dtype!r} - instead it becomes " + f"{result[idx]!r} (type {type(result[idx])!r}). Consider using " + "a more precise strategy, for example passing the `width` argument " + "to `floats()`." + ) + + def do_draw(self, data): + if 0 in self.shape: + return np.zeros(dtype=self.dtype, shape=self.shape) + + # Because Numpy allocates memory for strings at array creation, if we have + # an unsized string dtype we'll fill an object array and then cast it back. + unsized_string_dtype = ( + self.dtype.kind in ("S", "a", "U") and self.dtype.itemsize == 0 + ) + + # This could legitimately be a np.empty, but the performance gains for + # that would be so marginal that there's really not much point risking + # undefined behaviour shenanigans. + result = np.zeros( + shape=self.array_size, dtype=object if unsized_string_dtype else self.dtype + ) + + if self.fill.is_empty: + # We have no fill value (either because the user explicitly + # disabled it or because the default behaviour was used and our + # elements strategy does not produce reusable values), so we must + # generate a fully dense array with a freshly drawn value for each + # entry. + if self.unique: + elems = st.lists( + self.element_strategy, + min_size=self.array_size, + max_size=self.array_size, + unique=True, + ) + for i, v in enumerate(data.draw(elems)): + self.set_element(v, result, i) + else: + for i in range(len(result)): + self.set_element(data.draw(self.element_strategy), result, i) + else: + # We draw numpy arrays as "sparse with an offset". We draw a + # collection of index assignments within the array and assign + # fresh values from our elements strategy to those indices. If at + # the end we have not assigned every element then we draw a single + # value from our fill strategy and use that to populate the + # remaining positions with that strategy. + + elements = cu.many( + data, + min_size=0, + max_size=self.array_size, + # sqrt isn't chosen for any particularly principled reason. It + # just grows reasonably quickly but sublinearly, and for small + # arrays it represents a decent fraction of the array size. + average_size=min( + 0.9 * self.array_size, # ensure small arrays sometimes use fill + max(10, math.sqrt(self.array_size)), # ...but *only* sometimes + ), + ) + + needs_fill = np.full(self.array_size, True) + seen = set() + + while elements.more(): + i = data.draw_integer(0, self.array_size - 1) + if not needs_fill[i]: + elements.reject() + continue + self.set_element(data.draw(self.element_strategy), result, i) + if self.unique: + if result[i] in seen: + elements.reject() + continue + seen.add(result[i]) + + needs_fill[i] = False + if needs_fill.any(): + # We didn't fill all of the indices in the early loop, so we + # put a fill value into the rest. + + # We have to do this hilarious little song and dance to work + # around numpy's special handling of iterable values. If the + # value here were e.g. a tuple then neither array creation + # nor putmask would do the right thing. But by creating an + # array of size one and then assigning the fill value as a + # single element, we both get an array with the right value in + # it and putmask will do the right thing by repeating the + # values of the array across the mask. + one_element = np.zeros( + shape=1, dtype=object if unsized_string_dtype else self.dtype + ) + self.set_element(data.draw(self.fill), one_element, 0, fill=True) + if unsized_string_dtype: + one_element = one_element.astype(self.dtype) + fill_value = one_element[0] + if self.unique: + try: + is_nan = np.isnan(fill_value) + except TypeError: + is_nan = False + + if not is_nan: + raise InvalidArgument( + f"Cannot fill unique array with non-NaN value {fill_value!r}" + ) + + np.putmask(result, needs_fill, one_element) + + if unsized_string_dtype: + out = result.astype(self.dtype) + mismatch = out != result + if mismatch.any(): + raise InvalidArgument( + f"Array elements {result[mismatch]!r} cannot be represented " + f"as dtype {self.dtype!r} - instead they become " + f"{out[mismatch]!r}. Use a more precise strategy, e.g. without " + "trailing null bytes, as this will be an error future versions." + ) + result = out + + result = result.reshape(self.shape).copy() + + assert result.base is None + + return result + + +def fill_for(elements, unique, fill, name=""): + if fill is None: + if unique or not elements.has_reusable_values: + fill = st.nothing() + else: + fill = elements + else: + check_strategy(fill, f"{name}.fill" if name else "fill") + return fill + + +D = TypeVar("D", bound="DTypeLike") +G = TypeVar("G", bound="np.generic") + + +@overload +def arrays( + dtype: Union["np.dtype[G]", st.SearchStrategy["np.dtype[G]"]], + shape: int | st.SearchStrategy[int] | Shape | st.SearchStrategy[Shape], + *, + elements: st.SearchStrategy[Any] | Mapping[str, Any] | None = None, + fill: st.SearchStrategy[Any] | None = None, + unique: bool = False, +) -> "st.SearchStrategy[NDArray[G]]": ... + + +@overload +def arrays( + dtype: D | st.SearchStrategy[D], + shape: int | st.SearchStrategy[int] | Shape | st.SearchStrategy[Shape], + *, + elements: st.SearchStrategy[Any] | Mapping[str, Any] | None = None, + fill: st.SearchStrategy[Any] | None = None, + unique: bool = False, +) -> "st.SearchStrategy[NDArray[Any]]": ... + + +@defines_strategy(force_reusable_values=True) +def arrays( + dtype: D | st.SearchStrategy[D], + shape: int | st.SearchStrategy[int] | Shape | st.SearchStrategy[Shape], + *, + elements: st.SearchStrategy[Any] | Mapping[str, Any] | None = None, + fill: st.SearchStrategy[Any] | None = None, + unique: bool = False, +) -> "st.SearchStrategy[NDArray[Any]]": + r"""Returns a strategy for generating :class:`numpy:numpy.ndarray`\ s. + + * ``dtype`` may be any valid input to :class:`~numpy:numpy.dtype` + (this includes :class:`~numpy:numpy.dtype` objects), or a strategy that + generates such values. + * ``shape`` may be an integer >= 0, a tuple of such integers, or a + strategy that generates such values. + * ``elements`` is a strategy for generating values to put in the array. + If it is None a suitable value will be inferred based on the dtype, + which may give any legal value (including eg NaN for floats). + If a mapping, it will be passed as ``**kwargs`` to ``from_dtype()`` + * ``fill`` is a strategy that may be used to generate a single background + value for the array. If None, a suitable default will be inferred + based on the other arguments. If set to + :func:`~hypothesis.strategies.nothing` then filling + behaviour will be disabled entirely and every element will be generated + independently. + * ``unique`` specifies if the elements of the array should all be + distinct from one another. Note that in this case multiple NaN values + may still be allowed. If fill is also set, the only valid values for + it to return are NaN values (anything for which :obj:`numpy:numpy.isnan` + returns True. So e.g. for complex numbers ``nan+1j`` is also a valid fill). + Note that if ``unique`` is set to ``True`` the generated values must be + hashable. + + Arrays of specified ``dtype`` and ``shape`` are generated for example + like this: + + .. code-block:: pycon + + >>> import numpy as np + >>> arrays(np.int8, (2, 3)).example() + array([[-8, 6, 3], + [-6, 4, 6]], dtype=int8) + >>> arrays(np.float, 3, elements=st.floats(0, 1)).example() + array([ 0.88974794, 0.77387938, 0.1977879 ]) + + Array values are generated in two parts: + + 1. Some subset of the coordinates of the array are populated with a value + drawn from the elements strategy (or its inferred form). + 2. If any coordinates were not assigned in the previous step, a single + value is drawn from the ``fill`` strategy and is assigned to all remaining + places. + + You can set :func:`fill=nothing() ` to + disable this behaviour and draw a value for every element. + + If ``fill=None``, then it will attempt to infer the correct behaviour + automatically. If ``unique`` is ``True``, no filling will occur by default. + Otherwise, if it looks safe to reuse the values of elements across + multiple coordinates (this will be the case for any inferred strategy, and + for most of the builtins, but is not the case for mutable values or + strategies built with flatmap, map, composite, etc) then it will use the + elements strategy as the fill, else it will default to having no fill. + + Having a fill helps Hypothesis craft high quality examples, but its + main importance is when the array generated is large: Hypothesis is + primarily designed around testing small examples. If you have arrays with + hundreds or more elements, having a fill value is essential if you want + your tests to run in reasonable time. + """ + # Our dtype argument might be a union, e.g. `np.float64 | np.complex64`; we handle + # that by turning it into a strategy up-front. + if type(dtype) in (getattr(types, "UnionType", object()), Union): + dtype = st.one_of(*(from_dtype(np.dtype(d)) for d in dtype.__args__)) # type: ignore + + # We support passing strategies as arguments for convenience, or at least + # for legacy reasons, but don't want to pay the perf cost of a composite + # strategy (i.e. repeated argument handling and validation) when it's not + # needed. So we get the best of both worlds by recursing with flatmap, + # but only when it's actually needed. + if isinstance(dtype, st.SearchStrategy): + return dtype.flatmap( + lambda d: arrays(d, shape, elements=elements, fill=fill, unique=unique) + ) + if isinstance(shape, st.SearchStrategy): + return shape.flatmap( + lambda s: arrays(dtype, s, elements=elements, fill=fill, unique=unique) + ) + # From here on, we're only dealing with values and it's relatively simple. + dtype = np.dtype(dtype) # type: ignore[arg-type] + assert isinstance(dtype, np.dtype) # help mypy out a bit... + if elements is None or isinstance(elements, Mapping): + if dtype.kind in ("m", "M") and "[" not in dtype.str: + # For datetime and timedelta dtypes, we have a tricky situation - + # because they *may or may not* specify a unit as part of the dtype. + # If not, we flatmap over the various resolutions so that array + # elements have consistent units but units may vary between arrays. + return ( + st.sampled_from(TIME_RESOLUTIONS) + .map((dtype.str + "[{}]").format) + .flatmap(lambda d: arrays(d, shape=shape, fill=fill, unique=unique)) + ) + elements = from_dtype(dtype, **(elements or {})) + check_strategy(elements, "elements") + # If there's a redundant cast to the requested dtype, remove it. This unlocks + # optimizations such as fast unique sampled_from, and saves some time directly too. + unwrapped = unwrap_strategies(elements) + if isinstance(unwrapped, MappedStrategy) and unwrapped.pack == dtype.type: + elements = unwrapped.mapped_strategy + if getattr(unwrapped, "force_has_reusable_values", False): + elements.force_has_reusable_values = True # type: ignore + if isinstance(shape, int): + shape = (shape,) + shape = tuple(shape) + check_argument( + all(isinstance(s, int) for s in shape), + "Array shape must be integer in each dimension, provided shape was {}", + shape, + ) + fill = fill_for(elements=elements, unique=unique, fill=fill) + return ArrayStrategy(elements, shape, dtype, fill, unique) + + +@defines_strategy() +def scalar_dtypes() -> st.SearchStrategy[np.dtype]: + """Return a strategy that can return any non-flexible scalar dtype.""" + return st.one_of( + boolean_dtypes(), + integer_dtypes(), + unsigned_integer_dtypes(), + floating_dtypes(), + complex_number_dtypes(), + datetime64_dtypes(), + timedelta64_dtypes(), + ) + + +def defines_dtype_strategy(strat: T) -> T: + @defines_strategy() + @proxies(strat) + def inner(*args, **kwargs): + return strat(*args, **kwargs).map(np.dtype) + + return inner + + +@defines_dtype_strategy +def boolean_dtypes() -> st.SearchStrategy["np.dtype[np.bool_]"]: + """Return a strategy for boolean dtypes.""" + return st.just("?") # type: ignore[arg-type] + + +def dtype_factory(kind, sizes, valid_sizes, endianness): + # Utility function, shared logic for most integer and string types + valid_endian = ("?", "<", "=", ">") + check_argument( + endianness in valid_endian, + "Unknown endianness: was {}, must be in {}", + endianness, + valid_endian, + ) + if valid_sizes is not None: + if isinstance(sizes, int): + sizes = (sizes,) + check_argument(sizes, "Dtype must have at least one possible size.") + check_argument( + all(s in valid_sizes for s in sizes), + "Invalid sizes: was {} must be an item or sequence in {}", + sizes, + valid_sizes, + ) + if all(isinstance(s, int) for s in sizes): + sizes = sorted({s // 8 for s in sizes}) + strat = st.sampled_from(sizes) + if "{}" not in kind: + kind += "{}" + if endianness == "?": + return strat.map(("<" + kind).format) | strat.map((">" + kind).format) + return strat.map((endianness + kind).format) + + +@overload +def unsigned_integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[8], +) -> st.SearchStrategy["np.dtype[np.uint8]"]: ... + + +@overload +def unsigned_integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[16], +) -> st.SearchStrategy["np.dtype[np.uint16]"]: ... + + +@overload +def unsigned_integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[32], +) -> st.SearchStrategy["np.dtype[np.uint32]"]: ... + + +@overload +def unsigned_integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[64], +) -> st.SearchStrategy["np.dtype[np.uint64]"]: ... + + +@overload +def unsigned_integer_dtypes( + *, + endianness: str = "?", + sizes: Sequence[Literal[8, 16, 32, 64]] = (8, 16, 32, 64), +) -> st.SearchStrategy["np.dtype[np.unsignedinteger[Any]]"]: ... + + +@defines_dtype_strategy +def unsigned_integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[8, 16, 32, 64] | Sequence[Literal[8, 16, 32, 64]] = ( + 8, + 16, + 32, + 64, + ), +) -> st.SearchStrategy["np.dtype[np.unsignedinteger[Any]]"]: + """Return a strategy for unsigned integer dtypes. + + endianness may be ``<`` for little-endian, ``>`` for big-endian, + ``=`` for native byte order, or ``?`` to allow either byte order. + This argument only applies to dtypes of more than one byte. + + sizes must be a collection of integer sizes in bits. The default + (8, 16, 32, 64) covers the full range of sizes. + """ + return dtype_factory("u", sizes, (8, 16, 32, 64), endianness) + + +@overload +def integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[8], +) -> st.SearchStrategy["np.dtype[np.int8]"]: ... + + +@overload +def integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[16], +) -> st.SearchStrategy["np.dtype[np.int16]"]: ... + + +@overload +def integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[32], +) -> st.SearchStrategy["np.dtype[np.int32]"]: ... + + +@overload +def integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[64], +) -> st.SearchStrategy["np.dtype[np.int64]"]: ... + + +@overload +def integer_dtypes( + *, + endianness: str = "?", + sizes: Sequence[Literal[8, 16, 32, 64]] = (8, 16, 32, 64), +) -> st.SearchStrategy["np.dtype[np.signedinteger[Any]]"]: ... + + +@defines_dtype_strategy +def integer_dtypes( + *, + endianness: str = "?", + sizes: Literal[8, 16, 32, 64] | Sequence[Literal[8, 16, 32, 64]] = ( + 8, + 16, + 32, + 64, + ), +) -> st.SearchStrategy["np.dtype[np.signedinteger[Any]]"]: + """Return a strategy for signed integer dtypes. + + endianness and sizes are treated as for + :func:`unsigned_integer_dtypes`. + """ + return dtype_factory("i", sizes, (8, 16, 32, 64), endianness) + + +@overload +def floating_dtypes( + *, + endianness: str = "?", + sizes: Literal[16], +) -> st.SearchStrategy["np.dtype[np.float16]"]: ... + + +@overload +def floating_dtypes( + *, + endianness: str = "?", + sizes: Literal[32], +) -> st.SearchStrategy["np.dtype[np.float32]"]: ... + + +@overload +def floating_dtypes( + *, + endianness: str = "?", + sizes: Literal[64], +) -> st.SearchStrategy["np.dtype[np.float64]"]: ... + + +@overload +def floating_dtypes( + *, + endianness: str = "?", + sizes: Literal[128], +) -> st.SearchStrategy["np.dtype[np.float128]"]: ... + + +@overload +def floating_dtypes( + *, + endianness: str = "?", + sizes: Sequence[Literal[16, 32, 64, 96, 128]] = (16, 32, 64), +) -> st.SearchStrategy["np.dtype[np.floating[Any]]"]: ... + + +@defines_dtype_strategy +def floating_dtypes( + *, + endianness: str = "?", + sizes: Literal[16, 32, 64, 96, 128] | Sequence[Literal[16, 32, 64, 96, 128]] = ( + 16, + 32, + 64, + ), +) -> st.SearchStrategy["np.dtype[np.floating[Any]]"]: + """Return a strategy for floating-point dtypes. + + sizes is the size in bits of floating-point number. Some machines support + 96- or 128-bit floats, but these are not generated by default. + + Larger floats (96 and 128 bit real parts) are not supported on all + platforms and therefore disabled by default. To generate these dtypes, + include these values in the sizes argument. + """ + return dtype_factory("f", sizes, (16, 32, 64, 96, 128), endianness) + + +@overload +def complex_number_dtypes( + *, + endianness: str = "?", + sizes: Literal[64], +) -> st.SearchStrategy["np.dtype[np.complex64]"]: ... + + +@overload +def complex_number_dtypes( + *, + endianness: str = "?", + sizes: Literal[128], +) -> st.SearchStrategy["np.dtype[np.complex128]"]: ... + + +@overload +def complex_number_dtypes( + *, + endianness: str = "?", + sizes: Literal[256], +) -> st.SearchStrategy["np.dtype[np.complex256]"]: ... + + +@overload +def complex_number_dtypes( + *, + endianness: str = "?", + sizes: Sequence[Literal[64, 128, 192, 256]] = (64, 128), +) -> st.SearchStrategy["np.dtype[np.complexfloating[Any, Any]]"]: ... + + +@defines_dtype_strategy +def complex_number_dtypes( + *, + endianness: str = "?", + sizes: Literal[64, 128, 192, 256] | Sequence[Literal[64, 128, 192, 256]] = ( + 64, + 128, + ), +) -> st.SearchStrategy["np.dtype[np.complexfloating[Any, Any]]"]: + """Return a strategy for complex-number dtypes. + + sizes is the total size in bits of a complex number, which consists + of two floats. Complex halves (a 16-bit real part) are not supported + by numpy and will not be generated by this strategy. + """ + return dtype_factory("c", sizes, (64, 128, 192, 256), endianness) + + +@check_function +def validate_time_slice(max_period, min_period): + check_argument( + max_period in TIME_RESOLUTIONS, + "max_period {} must be a valid resolution in {}", + max_period, + TIME_RESOLUTIONS, + ) + check_argument( + min_period in TIME_RESOLUTIONS, + "min_period {} must be a valid resolution in {}", + min_period, + TIME_RESOLUTIONS, + ) + start = TIME_RESOLUTIONS.index(max_period) + end = TIME_RESOLUTIONS.index(min_period) + 1 + check_argument( + start < end, + "max_period {} must be earlier in sequence {} than min_period {}", + max_period, + TIME_RESOLUTIONS, + min_period, + ) + return TIME_RESOLUTIONS[start:end] + + +@defines_dtype_strategy +def datetime64_dtypes( + *, max_period: str = "Y", min_period: str = "ns", endianness: str = "?" +) -> st.SearchStrategy["np.dtype[np.datetime64]"]: + """Return a strategy for datetime64 dtypes, with various precisions from + year to attosecond.""" + return dtype_factory( + "datetime64[{}]", + validate_time_slice(max_period, min_period), + TIME_RESOLUTIONS, + endianness, + ) + + +@defines_dtype_strategy +def timedelta64_dtypes( + *, max_period: str = "Y", min_period: str = "ns", endianness: str = "?" +) -> st.SearchStrategy["np.dtype[np.timedelta64]"]: + """Return a strategy for timedelta64 dtypes, with various precisions from + year to attosecond.""" + return dtype_factory( + "timedelta64[{}]", + validate_time_slice(max_period, min_period), + TIME_RESOLUTIONS, + endianness, + ) + + +@defines_dtype_strategy +def byte_string_dtypes( + *, endianness: str = "?", min_len: int = 1, max_len: int = 16 +) -> st.SearchStrategy["np.dtype[np.bytes_]"]: + """Return a strategy for generating bytestring dtypes, of various lengths + and byteorder. + + While Hypothesis' string strategies can generate empty strings, string + dtypes with length 0 indicate that size is still to be determined, so + the minimum length for string dtypes is 1. + """ + order_check("len", 1, min_len, max_len) + return dtype_factory("S", list(range(min_len, max_len + 1)), None, endianness) + + +@defines_dtype_strategy +def unicode_string_dtypes( + *, endianness: str = "?", min_len: int = 1, max_len: int = 16 +) -> st.SearchStrategy["np.dtype[np.str_]"]: + """Return a strategy for generating unicode string dtypes, of various + lengths and byteorder. + + While Hypothesis' string strategies can generate empty strings, string + dtypes with length 0 indicate that size is still to be determined, so + the minimum length for string dtypes is 1. + """ + order_check("len", 1, min_len, max_len) + return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness) + + +def _no_title_is_name_of_a_titled_field(ls): + seen = set() + for title_and_name, *_ in ls: + if isinstance(title_and_name, tuple): + if seen.intersection(title_and_name): # pragma: no cover + # Our per-element filters below make this as rare as possible, + # so it's not always covered. + return False + seen.update(title_and_name) + return True + + +@defines_dtype_strategy +def array_dtypes( + subtype_strategy: st.SearchStrategy[np.dtype] = scalar_dtypes(), + *, + min_size: int = 1, + max_size: int = 5, + allow_subarrays: bool = False, +) -> st.SearchStrategy[np.dtype]: + """Return a strategy for generating array (compound) dtypes, with members + drawn from the given subtype strategy.""" + order_check("size", 0, min_size, max_size) + # The empty string is replaced by f{idx}; see #1963 for details. Much easier to + # insist that field names be unique and just boost f{idx} strings manually. + field_names = st.integers(0, 127).map("f{}".format) | st.text(min_size=1) + name_titles = st.one_of( + field_names, + st.tuples(field_names, field_names).filter(lambda ns: ns[0] != ns[1]), + ) + elements: st.SearchStrategy[tuple] = st.tuples(name_titles, subtype_strategy) + if allow_subarrays: + elements |= st.tuples( + name_titles, subtype_strategy, array_shapes(max_dims=2, max_side=2) + ) + return st.lists( # type: ignore[return-value] + elements=elements, + min_size=min_size, + max_size=max_size, + unique_by=( + # Deduplicate by both name and title for efficiency before filtering. + # (Field names must be unique, as must titles, and no intersections) + lambda d: d[0] if isinstance(d[0], str) else d[0][0], + lambda d: d[0] if isinstance(d[0], str) else d[0][1], + ), + ).filter(_no_title_is_name_of_a_titled_field) + + +@defines_strategy() +def nested_dtypes( + subtype_strategy: st.SearchStrategy[np.dtype] = scalar_dtypes(), + *, + max_leaves: int = 10, + max_itemsize: int | None = None, +) -> st.SearchStrategy[np.dtype]: + """Return the most-general dtype strategy. + + Elements drawn from this strategy may be simple (from the + subtype_strategy), or several such values drawn from + :func:`array_dtypes` with ``allow_subarrays=True``. Subdtypes in an + array dtype may be nested to any depth, subject to the max_leaves + argument. + """ + return st.recursive( + subtype_strategy, + lambda x: array_dtypes(x, allow_subarrays=True), + max_leaves=max_leaves, + ).filter(lambda d: max_itemsize is None or d.itemsize <= max_itemsize) + + +@proxies(_valid_tuple_axes) +def valid_tuple_axes(*args, **kwargs): + return _valid_tuple_axes(*args, **kwargs) + + +valid_tuple_axes.__doc__ = f""" + Return a strategy for generating permissible tuple-values for the + ``axis`` argument for a numpy sequential function (e.g. + :func:`numpy:numpy.sum`), given an array of the specified + dimensionality. + + {_valid_tuple_axes.__doc__} + """ + + +@proxies(_mutually_broadcastable_shapes) +def mutually_broadcastable_shapes(*args, **kwargs): + return _mutually_broadcastable_shapes(*args, **kwargs) + + +mutually_broadcastable_shapes.__doc__ = f""" + {_mutually_broadcastable_shapes.__doc__} + + **Use with Generalised Universal Function signatures** + + A :doc:`universal function ` (or ufunc for short) is a function + that operates on ndarrays in an element-by-element fashion, supporting array + broadcasting, type casting, and several other standard features. + A :doc:`generalised ufunc ` operates on + sub-arrays rather than elements, based on the "signature" of the function. + Compare e.g. :obj:`numpy.add() ` (ufunc) to + :obj:`numpy.matmul() ` (gufunc). + + To generate shapes for a gufunc, you can pass the ``signature`` argument instead of + ``num_shapes``. This must be a gufunc signature string; which you can write by + hand or access as e.g. ``np.matmul.signature`` on generalised ufuncs. + + In this case, the ``side`` arguments are applied to the 'core dimensions' as well, + ignoring any frozen dimensions. ``base_shape`` and the ``dims`` arguments are + applied to the 'loop dimensions', and if necessary, the dimensionality of each + shape is silently capped to respect the 32-dimension limit. + + The generated ``result_shape`` is the real result shape of applying the gufunc + to arrays of the generated ``input_shapes``, even where this is different to + broadcasting the loop dimensions. + + gufunc-compatible shapes shrink their loop dimensions as above, towards omitting + optional core dimensions, and smaller-size core dimensions. + + .. code-block:: pycon + + >>> # np.matmul.signature == "(m?,n),(n,p?)->(m?,p?)" + >>> for _ in range(3): + ... mutually_broadcastable_shapes(signature=np.matmul.signature).example() + BroadcastableShapes(input_shapes=((2,), (2,)), result_shape=()) + BroadcastableShapes(input_shapes=((3, 4, 2), (1, 2)), result_shape=(3, 4)) + BroadcastableShapes(input_shapes=((4, 2), (1, 2, 3)), result_shape=(4, 3)) + + """ + + +@overload +def basic_indices( + shape: Shape, + *, + min_dims: int = 0, + max_dims: int | None = None, + allow_newaxis: Literal[False] = ..., + allow_ellipsis: Literal[False], +) -> st.SearchStrategy[ + _BIENoEllipsisNoNewaxis | tuple[_BIENoEllipsisNoNewaxis, ...] +]: ... + + +@overload +def basic_indices( + shape: Shape, + *, + min_dims: int = 0, + max_dims: int | None = None, + allow_newaxis: Literal[False] = ..., + allow_ellipsis: Literal[True] = ..., +) -> st.SearchStrategy[_BIENoNewaxis | tuple[_BIENoNewaxis, ...]]: ... + + +@overload +def basic_indices( + shape: Shape, + *, + min_dims: int = 0, + max_dims: int | None = None, + allow_newaxis: Literal[True], + allow_ellipsis: Literal[False], +) -> st.SearchStrategy[_BIENoEllipsis | tuple[_BIENoEllipsis, ...]]: ... + + +@overload +def basic_indices( + shape: Shape, + *, + min_dims: int = 0, + max_dims: int | None = None, + allow_newaxis: Literal[True], + allow_ellipsis: Literal[True] = ..., +) -> st.SearchStrategy[_BIE | tuple[_BIE, ...]]: ... + + +@defines_strategy() +def basic_indices( + shape: Shape, + *, + min_dims: int = 0, + max_dims: int | None = None, + allow_newaxis: bool = False, + allow_ellipsis: bool = True, +) -> st.SearchStrategy[BasicIndex]: + """Return a strategy for :doc:`basic indexes ` of + arrays with the specified shape, which may include dimensions of size zero. + + It generates tuples containing some mix of integers, :obj:`python:slice` + objects, ``...`` (an ``Ellipsis``), and ``None``. When a length-one tuple + would be generated, this strategy may instead return the element which will + index the first axis, e.g. ``5`` instead of ``(5,)``. + + * ``shape`` is the shape of the array that will be indexed, as a tuple of + positive integers. This must be at least two-dimensional for a tuple to be + a valid index; for one-dimensional arrays use + :func:`~hypothesis.strategies.slices` instead. + * ``min_dims`` is the minimum dimensionality of the resulting array from use + of the generated index. When ``min_dims == 0``, scalars and zero-dimensional + arrays are both allowed. + * ``max_dims`` is the the maximum dimensionality of the resulting array, + defaulting to ``len(shape) if not allow_newaxis else + max(len(shape), min_dims) + 2``. + * ``allow_newaxis`` specifies whether ``None`` is allowed in the index. + * ``allow_ellipsis`` specifies whether ``...`` is allowed in the index. + """ + # Arguments to exclude scalars, zero-dim arrays, and dims of size zero were + # all considered and rejected. We want users to explicitly consider those + # cases if they're dealing in general indexers, and while it's fiddly we can + # back-compatibly add them later (hence using kwonlyargs). + check_type(tuple, shape, "shape") + check_argument( + all(isinstance(x, int) and x >= 0 for x in shape), + f"{shape=}, but all dimensions must be non-negative integers.", + ) + check_type(bool, allow_ellipsis, "allow_ellipsis") + check_type(bool, allow_newaxis, "allow_newaxis") + check_type(int, min_dims, "min_dims") + if min_dims > len(shape) and not allow_newaxis: + note_deprecation( + f"min_dims={min_dims} is larger than len(shape)={len(shape)}, " + "but allow_newaxis=False makes it impossible for an indexing " + "operation to add dimensions.", + since="2021-09-15", + has_codemod=False, + ) + check_valid_dims(min_dims, "min_dims") + + if max_dims is None: + if allow_newaxis: + max_dims = min(max(len(shape), min_dims) + 2, NDIM_MAX) + else: + max_dims = min(len(shape), NDIM_MAX) + else: + check_type(int, max_dims, "max_dims") + if max_dims > len(shape) and not allow_newaxis: + note_deprecation( + f"max_dims={max_dims} is larger than len(shape)={len(shape)}, " + "but allow_newaxis=False makes it impossible for an indexing " + "operation to add dimensions.", + since="2021-09-15", + has_codemod=False, + ) + check_valid_dims(max_dims, "max_dims") + + order_check("dims", 0, min_dims, max_dims) + + return BasicIndexStrategy( + shape, + min_dims=min_dims, + max_dims=max_dims, + allow_ellipsis=allow_ellipsis, + allow_newaxis=allow_newaxis, + allow_fewer_indices_than_dims=True, + ) + + +I = TypeVar("I", bound=np.integer) + + +@overload +def integer_array_indices( + shape: Shape, + *, + result_shape: st.SearchStrategy[Shape] = array_shapes(), +) -> "st.SearchStrategy[tuple[NDArray[np.signedinteger[Any]], ...]]": ... + + +@overload +def integer_array_indices( + shape: Shape, + *, + result_shape: st.SearchStrategy[Shape] = array_shapes(), + dtype: "np.dtype[I]", +) -> "st.SearchStrategy[tuple[NDArray[I], ...]]": ... + + +@defines_strategy() +def integer_array_indices( + shape: Shape, + *, + result_shape: st.SearchStrategy[Shape] = array_shapes(), + dtype: "np.dtype[I] | np.dtype[np.signedinteger[Any] | np.bool[bool]]" = np.dtype( + int + ), +) -> "st.SearchStrategy[tuple[NDArray[I], ...]]": + """Return a search strategy for tuples of integer-arrays that, when used + to index into an array of shape ``shape``, given an array whose shape + was drawn from ``result_shape``. + + Examples from this strategy shrink towards the tuple of index-arrays:: + + len(shape) * (np.zeros(drawn_result_shape, dtype), ) + + * ``shape`` a tuple of integers that indicates the shape of the array, + whose indices are being generated. + * ``result_shape`` a strategy for generating tuples of integers, which + describe the shape of the resulting index arrays. The default is + :func:`~hypothesis.extra.numpy.array_shapes`. The shape drawn from + this strategy determines the shape of the array that will be produced + when the corresponding example from ``integer_array_indices`` is used + as an index. + * ``dtype`` the integer data type of the generated index-arrays. Negative + integer indices can be generated if a signed integer type is specified. + + Recall that an array can be indexed using a tuple of integer-arrays to + access its members in an arbitrary order, producing an array with an + arbitrary shape. For example: + + .. code-block:: pycon + + >>> from numpy import array + >>> x = array([-0, -1, -2, -3, -4]) + >>> ind = (array([[4, 0], [0, 1]]),) # a tuple containing a 2D integer-array + >>> x[ind] # the resulting array is commensurate with the indexing array(s) + array([[-4, 0], + [0, -1]]) + + Note that this strategy does not accommodate all variations of so-called + 'advanced indexing', as prescribed by NumPy's nomenclature. Combinations + of basic and advanced indexes are too complex to usefully define in a + standard strategy; we leave application-specific strategies to the user. + Advanced-boolean indexing can be defined as ``arrays(shape=..., dtype=bool)``, + and is similarly left to the user. + """ + check_type(tuple, shape, "shape") + check_argument( + shape and all(isinstance(x, int) and x > 0 for x in shape), + f"{shape=} must be a non-empty tuple of integers > 0", + ) + check_strategy(result_shape, "result_shape") + check_argument( + np.issubdtype(dtype, np.integer), f"{dtype=} must be an integer dtype" + ) + signed = np.issubdtype(dtype, np.signedinteger) + + def array_for(index_shape, size): + return arrays( + dtype=dtype, + shape=index_shape, + elements=st.integers(-size if signed else 0, size - 1), + ) + + return result_shape.flatmap( + lambda index_shape: st.tuples(*(array_for(index_shape, size) for size in shape)) + ) + + +def _unpack_dtype(dtype): + dtype_args = getattr(dtype, "__args__", ()) + if dtype_args and type(dtype) not in (getattr(types, "UnionType", object()), Union): + assert len(dtype_args) == 1 + if isinstance(dtype_args[0], TypeVar): + # numpy.dtype[+ScalarType] + assert dtype_args[0].__bound__ == np.generic + dtype = Any + else: + # plain dtype + dtype = dtype_args[0] + return dtype + + +def _dtype_from_args(args): + if len(args) <= 1: + # Zero args: ndarray, _SupportsArray + # One arg: ndarray[type], _SupportsArray[type] + dtype = _unpack_dtype(args[0]) if args else Any + else: + # Two args: ndarray[shape, type], NDArray[*] + assert len(args) == 2 + dtype = _unpack_dtype(args[1]) + + if dtype is Any: + return scalar_dtypes() + elif type(dtype) in (getattr(types, "UnionType", object()), Union): + return dtype + return np.dtype(dtype) + + +def _from_type(thing: type[Ex]) -> st.SearchStrategy[Ex] | None: + """Called by st.from_type to try to infer a strategy for thing using numpy. + + If we can infer a numpy-specific strategy for thing, we return that; otherwise, + we return None. + """ + + base_strats = st.one_of( + [ + st.booleans(), + st.integers(), + st.floats(), + st.complex_numbers(), + st.text(), + st.binary(), + ] + ) + # don't mix strings and non-ascii bytestrings (ex: ['', b'\x80']). See + # https://github.com/numpy/numpy/issues/23899. + base_strats_ascii = st.one_of( + [ + st.booleans(), + st.integers(), + st.floats(), + st.complex_numbers(), + st.text(), + st.binary().filter(bytes.isascii), + ] + ) + + if thing == np.dtype: + # Note: Parameterized dtypes and DTypeLike are not supported. + return st.one_of( + scalar_dtypes(), + byte_string_dtypes(), + unicode_string_dtypes(), + array_dtypes(), + nested_dtypes(), + ) + + if thing == ArrayLike: + # We override the default type resolution to ensure the "coercible to + # array" contract is honoured. See + # https://github.com/HypothesisWorks/hypothesis/pull/3670#issuecomment-1578140422. + # The actual type is (as of np 1.24), with + # scalars:=[bool, int, float, complex, str, bytes]: + # Union[ + # _SupportsArray, + # _NestedSequence[_SupportsArray], + # *scalars, + # _NestedSequence[Union[*scalars]] + # ] + return st.one_of( + # *scalars + base_strats, + # The two recursive strategies below cover the following cases: + # - _SupportsArray (using plain ndarrays) + # - _NestedSequence[Union[*scalars]] (but excluding non-ascii binary) + # - _NestedSequence[_SupportsArray] (but with a single leaf element + # . to avoid the issue of unequally sized leaves) + st.recursive(st.lists(base_strats_ascii), extend=st.tuples), + st.recursive(st.from_type(np.ndarray), extend=st.tuples), + ) + + if isinstance(thing, type) and issubclass(thing, np.generic): + dtype = np.dtype(thing) + return from_dtype(dtype) if dtype.kind not in "OV" else None + + origin = get_origin(thing) + # if origin is not generic-like, get_origin returns None. Fall back to thing. + if origin is None: + origin = thing + args = get_args(thing) + + if origin == _NestedSequence: + # We have to override the default resolution to ensure sequences are of + # equal length. Actually they are still not, if the arg specialization + # returns arbitrary-shaped sequences or arrays - hence the even more special + # resolution of ArrayLike, above. + assert len(args) <= 1 + base_strat = st.from_type(args[0]) if args else base_strats + return st.one_of( + st.lists(base_strat), + st.recursive(st.tuples(), st.tuples), + st.recursive(st.tuples(base_strat), st.tuples), + st.recursive(st.tuples(base_strat, base_strat), st.tuples), + ) + + if origin in [np.ndarray, _SupportsArray]: + dtype = _dtype_from_args(args) + return arrays(dtype, array_shapes(max_dims=2)) # type: ignore[return-value] + + # We didn't find a type to resolve, continue + return None diff --git a/vendored/hypothesis/extra/pandas/__init__.py b/vendored/hypothesis/extra/pandas/__init__.py new file mode 100644 index 0000000..5d8a436 --- /dev/null +++ b/vendored/hypothesis/extra/pandas/__init__.py @@ -0,0 +1,20 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.extra.pandas.impl import ( + column, + columns, + data_frames, + indexes, + range_indexes, + series, +) + +__all__ = ["column", "columns", "data_frames", "indexes", "range_indexes", "series"] diff --git a/vendored/hypothesis/extra/pandas/impl.py b/vendored/hypothesis/extra/pandas/impl.py new file mode 100644 index 0000000..4d4c00f --- /dev/null +++ b/vendored/hypothesis/extra/pandas/impl.py @@ -0,0 +1,761 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections import OrderedDict, abc +from collections.abc import Sequence +from copy import copy +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import Any, Generic, Union + +import numpy as np +import pandas + +from hypothesis import strategies as st +from hypothesis._settings import note_deprecation +from hypothesis.control import reject +from hypothesis.errors import InvalidArgument +from hypothesis.extra import numpy as npst +from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.coverage import check, check_function +from hypothesis.internal.validation import ( + check_type, + check_valid_interval, + check_valid_size, + try_convert, +) +from hypothesis.strategies._internal.strategies import Ex, check_strategy +from hypothesis.strategies._internal.utils import cacheable, defines_strategy + +try: + from pandas.core.arrays.integer import IntegerDtype +except ImportError: + IntegerDtype = () + + +def dtype_for_elements_strategy(s): + return st.shared( + s.map(lambda x: pandas.Series([x]).dtype), + key=("hypothesis.extra.pandas.dtype_for_elements_strategy", s), + ) + + +def infer_dtype_if_necessary(dtype, values, elements, draw): + if dtype is None and not values: + return draw(dtype_for_elements_strategy(elements)) + return dtype + + +@check_function +def elements_and_dtype(elements, dtype, source=None): + if source is None: + prefix = "" + else: + prefix = f"{source}." + + if elements is not None: + check_strategy(elements, f"{prefix}elements") + else: + with check("dtype is not None"): + if dtype is None: + raise InvalidArgument( + f"At least one of {prefix}elements or {prefix}dtype must be provided." + ) + + with check("isinstance(dtype, CategoricalDtype)"): + if pandas.api.types.CategoricalDtype.is_dtype(dtype): + raise InvalidArgument( + f"{prefix}dtype is categorical, which is currently unsupported" + ) + + if isinstance(dtype, type) and issubclass(dtype, IntegerDtype): + raise InvalidArgument( + f"Passed {dtype=} is a dtype class, please pass in an instance of this class." + "Otherwise it would be treated as dtype=object" + ) + + if isinstance(dtype, type) and np.dtype(dtype).kind == "O" and dtype is not object: + err_msg = f"Passed {dtype=} is not a valid Pandas dtype." + if issubclass(dtype, datetime): + err_msg += ' To generate valid datetimes, pass `dtype="datetime64[ns]"`' + raise InvalidArgument(err_msg) + elif issubclass(dtype, timedelta): + err_msg += ' To generate valid timedeltas, pass `dtype="timedelta64[ns]"`' + raise InvalidArgument(err_msg) + note_deprecation( + f"{err_msg} We'll treat it as " + "dtype=object for now, but this will be an error in a future version.", + since="2021-12-31", + has_codemod=False, + stacklevel=1, + ) + + if isinstance(dtype, st.SearchStrategy): + raise InvalidArgument( + f"Passed {dtype=} is a strategy, but we require a concrete dtype " + "here. See https://stackoverflow.com/q/74355937 for workaround patterns." + ) + + _get_subclasses = getattr(IntegerDtype, "__subclasses__", list) + dtype = {t.name: t() for t in _get_subclasses()}.get(dtype, dtype) + + is_na_dtype = False + if isinstance(dtype, IntegerDtype): + is_na_dtype = True + dtype = np.dtype(dtype.name.lower()) + elif dtype is not None: + dtype = try_convert(np.dtype, dtype, "dtype") + + if elements is None: + elements = npst.from_dtype(dtype) + if is_na_dtype: + elements = st.none() | elements + # as an optimization, avoid converting object dtypes, which will always + # remain unchanged. + elif dtype is not None and dtype.kind != "O": + + def convert_element(value): + if is_na_dtype and value is None: + return None + + try: + return np.array([value], dtype=dtype)[0] + except (TypeError, ValueError, OverflowError): + name = f"draw({prefix}elements)" + raise InvalidArgument( + f"Cannot convert {name}={value!r} of type " + f"{type(value).__name__} to dtype {dtype.str}" + ) from None + + elements = elements.map(convert_element) + assert elements is not None + + return elements, dtype + + +class ValueIndexStrategy(st.SearchStrategy): + def __init__(self, elements, dtype, min_size, max_size, unique, name): + super().__init__() + self.elements = elements + self.dtype = dtype + self.min_size = min_size + self.max_size = max_size + self.unique = unique + self.name = name + + def do_draw(self, data): + result = [] + seen = set() + + iterator = cu.many( + data, + min_size=self.min_size, + max_size=self.max_size, + average_size=(self.min_size + self.max_size) / 2, + ) + + while iterator.more(): + elt = data.draw(self.elements) + + if self.unique: + if elt in seen: + iterator.reject() + continue + seen.add(elt) + result.append(elt) + + dtype = infer_dtype_if_necessary( + dtype=self.dtype, values=result, elements=self.elements, draw=data.draw + ) + return pandas.Index( + result, dtype=dtype, tupleize_cols=False, name=data.draw(self.name) + ) + + +DEFAULT_MAX_SIZE = 10 + + +@cacheable +@defines_strategy() +def range_indexes( + min_size: int = 0, + max_size: int | None = None, + name: st.SearchStrategy[str | None] = st.none(), +) -> st.SearchStrategy[pandas.RangeIndex]: + """Provides a strategy which generates an :class:`~pandas.Index` whose + values are 0, 1, ..., n for some n. + + Arguments: + + * min_size is the smallest number of elements the index can have. + * max_size is the largest number of elements the index can have. If None + it will default to some suitable value based on min_size. + * name is the name of the index. If st.none(), the index will have no name. + """ + check_valid_size(min_size, "min_size") + check_valid_size(max_size, "max_size") + if max_size is None: + max_size = min([min_size + DEFAULT_MAX_SIZE, 2**63 - 1]) + check_valid_interval(min_size, max_size, "min_size", "max_size") + check_strategy(name) + + return st.builds(pandas.RangeIndex, st.integers(min_size, max_size), name=name) + + +@cacheable +@defines_strategy() +def indexes( + *, + elements: st.SearchStrategy[Ex] | None = None, + dtype: Any = None, + min_size: int = 0, + max_size: int | None = None, + unique: bool = True, + name: st.SearchStrategy[str | None] = st.none(), +) -> st.SearchStrategy[pandas.Index]: + """Provides a strategy for producing a :class:`pandas.Index`. + + Arguments: + + * elements is a strategy which will be used to generate the individual + values of the index. If None, it will be inferred from the dtype. Note: + even if the elements strategy produces tuples, the generated value + will not be a MultiIndex, but instead be a normal index whose elements + are tuples. + * dtype is the dtype of the resulting index. If None, it will be inferred + from the elements strategy. At least one of dtype or elements must be + provided. + * min_size is the minimum number of elements in the index. + * max_size is the maximum number of elements in the index. If None then it + will default to a suitable small size. If you want larger indexes you + should pass a max_size explicitly. + * unique specifies whether all of the elements in the resulting index + should be distinct. + * name is a strategy for strings or ``None``, which will be passed to + the :class:`pandas.Index` constructor. + """ + check_valid_size(min_size, "min_size") + check_valid_size(max_size, "max_size") + check_valid_interval(min_size, max_size, "min_size", "max_size") + check_type(bool, unique, "unique") + + elements, dtype = elements_and_dtype(elements, dtype) + + if max_size is None: + max_size = min_size + DEFAULT_MAX_SIZE + return ValueIndexStrategy(elements, dtype, min_size, max_size, unique, name) + + +@defines_strategy() +def series( + *, + elements: st.SearchStrategy[Ex] | None = None, + dtype: Any = None, + # new-style unions hit https://github.com/sphinx-doc/sphinx/issues/11211 during + # doc builds. See related comment in django/_fields.py. Quote to prevent + # shed/pyupgrade from changing it. + index: ( + st.SearchStrategy["Union[Sequence, pandas.Index]"] | None # noqa: UP007 + ) = None, + fill: st.SearchStrategy[Ex] | None = None, + unique: bool = False, + name: st.SearchStrategy[str | None] = st.none(), +) -> st.SearchStrategy[pandas.Series]: + """Provides a strategy for producing a :class:`pandas.Series`. + + Arguments: + + * elements: a strategy that will be used to generate the individual + values in the series. If None, we will attempt to infer a suitable + default from the dtype. + + * dtype: the dtype of the resulting series and may be any value + that can be passed to :class:`numpy.dtype`. If None, will use + pandas's standard behaviour to infer it from the type of the elements + values. Note that if the type of values that comes out of your + elements strategy varies, then so will the resulting dtype of the + series. + + * index: If not None, a strategy for generating indexes for the + resulting Series. This can generate either :class:`pandas.Index` + objects or any sequence of values (which will be passed to the + Index constructor). + + You will probably find it most convenient to use the + :func:`~hypothesis.extra.pandas.indexes` or + :func:`~hypothesis.extra.pandas.range_indexes` function to produce + values for this argument. + + * name: is a strategy for strings or ``None``, which will be passed to + the :class:`pandas.Series` constructor. + + Usage: + + .. code-block:: pycon + + >>> series(dtype=int).example() + 0 -2001747478 + 1 1153062837 + """ + if index is None: + index = range_indexes() + else: + check_strategy(index, "index") + + elements, np_dtype = elements_and_dtype(elements, dtype) + index_strategy = index + + # if it is converted to an object, use object for series type + if ( + np_dtype is not None + and np_dtype.kind == "O" + and not isinstance(dtype, IntegerDtype) + ): + dtype = np_dtype + + @st.composite + def result(draw): + index = draw(index_strategy) + + if len(index) > 0: + if dtype is not None: + result_data = draw( + npst.arrays( + dtype=object, + elements=elements, + shape=len(index), + fill=fill, + unique=unique, + ) + ).tolist() + else: + result_data = list( + draw( + npst.arrays( + dtype=object, + elements=elements, + shape=len(index), + fill=fill, + unique=unique, + ) + ).tolist() + ) + return pandas.Series(result_data, index=index, dtype=dtype, name=draw(name)) + else: + return pandas.Series( + (), + index=index, + dtype=( + dtype + if dtype is not None + else draw(dtype_for_elements_strategy(elements)) + ), + name=draw(name), + ) + + return result() + + +@dataclass(slots=True, frozen=False) +class column(Generic[Ex]): + """Data object for describing a column in a DataFrame. + + Arguments: + + * name: the column name, or None to default to the column position. Must + be hashable, but can otherwise be any value supported as a pandas column + name. + * elements: the strategy for generating values in this column, or None + to infer it from the dtype. + * dtype: the dtype of the column, or None to infer it from the element + strategy. At least one of dtype or elements must be provided. + * fill: A default value for elements of the column. See + :func:`~hypothesis.extra.numpy.arrays` for a full explanation. + * unique: If all values in this column should be distinct. + """ + + name: str | int | None = None + elements: st.SearchStrategy[Ex] | None = None + dtype: Any = None + fill: st.SearchStrategy[Ex] | None = None + unique: bool = False + + +def columns( + names_or_number: int | Sequence[str], + *, + dtype: Any = None, + elements: st.SearchStrategy[Ex] | None = None, + fill: st.SearchStrategy[Ex] | None = None, + unique: bool = False, +) -> list[column[Ex]]: + """A convenience function for producing a list of :class:`column` objects + of the same general shape. + + The names_or_number argument is either a sequence of values, the + elements of which will be used as the name for individual column + objects, or a number, in which case that many unnamed columns will + be created. All other arguments are passed through verbatim to + create the columns. + """ + if isinstance(names_or_number, (int, float)): + names: list[int | str | None] = [None] * names_or_number + else: + names = list(names_or_number) + return [ + column(name=n, dtype=dtype, elements=elements, fill=fill, unique=unique) + for n in names + ] + + +@defines_strategy() +def data_frames( + columns: Sequence[column] | None = None, + *, + rows: st.SearchStrategy[dict | Sequence[Any]] | None = None, + index: st.SearchStrategy[Ex] | None = None, +) -> st.SearchStrategy[pandas.DataFrame]: + """Provides a strategy for producing a :class:`pandas.DataFrame`. + + Arguments: + + * columns: An iterable of :class:`column` objects describing the shape + of the generated DataFrame. + + * rows: A strategy for generating a row object. Should generate + either dicts mapping column names to values or a sequence mapping + column position to the value in that position (note that unlike the + :class:`pandas.DataFrame` constructor, single values are not allowed + here. Passing e.g. an integer is an error, even if there is only one + column). + + At least one of rows and columns must be provided. If both are + provided then the generated rows will be validated against the + columns and an error will be raised if they don't match. + + Caveats on using rows: + + * In general you should prefer using columns to rows, and only use + rows if the columns interface is insufficiently flexible to + describe what you need - you will get better performance and + example quality that way. + * If you provide rows and not columns, then the shape and dtype of + the resulting DataFrame may vary. e.g. if you have a mix of int + and float in the values for one column in your row entries, the + column will sometimes have an integral dtype and sometimes a float. + + * index: If not None, a strategy for generating indexes for the + resulting DataFrame. This can generate either :class:`pandas.Index` + objects or any sequence of values (which will be passed to the + Index constructor). + + You will probably find it most convenient to use the + :func:`~hypothesis.extra.pandas.indexes` or + :func:`~hypothesis.extra.pandas.range_indexes` function to produce + values for this argument. + + Usage: + + The expected usage pattern is that you use :class:`column` and + :func:`columns` to specify a fixed shape of the DataFrame you want as + follows. For example the following gives a two column data frame: + + .. code-block:: pycon + + >>> from hypothesis.extra.pandas import column, data_frames + >>> data_frames([ + ... column('A', dtype=int), column('B', dtype=float)]).example() + A B + 0 2021915903 1.793898e+232 + 1 1146643993 inf + 2 -2096165693 1.000000e+07 + + If you want the values in different columns to interact in some way you + can use the rows argument. For example the following gives a two column + DataFrame where the value in the first column is always at most the value + in the second: + + .. code-block:: pycon + + >>> from hypothesis.extra.pandas import column, data_frames + >>> import hypothesis.strategies as st + >>> data_frames( + ... rows=st.tuples(st.floats(allow_nan=False), + ... st.floats(allow_nan=False)).map(sorted) + ... ).example() + 0 1 + 0 -3.402823e+38 9.007199e+15 + 1 -1.562796e-298 5.000000e-01 + + You can also combine the two: + + .. code-block:: pycon + + >>> from hypothesis.extra.pandas import columns, data_frames + >>> import hypothesis.strategies as st + >>> data_frames( + ... columns=columns(["lo", "hi"], dtype=float), + ... rows=st.tuples(st.floats(allow_nan=False), + ... st.floats(allow_nan=False)).map(sorted) + ... ).example() + lo hi + 0 9.314723e-49 4.353037e+45 + 1 -9.999900e-01 1.000000e+07 + 2 -2.152861e+134 -1.069317e-73 + + (Note that the column dtype must still be specified and will not be + inferred from the rows. This restriction may be lifted in future). + + Combining rows and columns has the following behaviour: + + * The column names and dtypes will be used. + * If the column is required to be unique, this will be enforced. + * Any values missing from the generated rows will be provided using the + column's fill. + * Any values in the row not present in the column specification (if + dicts are passed, if there are keys with no corresponding column name, + if sequences are passed if there are too many items) will result in + InvalidArgument being raised. + """ + if index is None: + index = range_indexes() + else: + check_strategy(index, "index") + + index_strategy = index + + if columns is None: + if rows is None: + raise InvalidArgument("At least one of rows and columns must be provided") + else: + + @st.composite + def rows_only(draw): + index = draw(index_strategy) + + def row(): + result = draw(rows) + check_type(abc.Iterable, result, "draw(row)") + return result + + if len(index) > 0: + return pandas.DataFrame([row() for _ in index], index=index) + else: + # If we haven't drawn any rows we need to draw one row and + # then discard it so that we get a consistent shape for the + # DataFrame. + base = pandas.DataFrame([row()]) + return base.drop(0) + + return rows_only() + + assert columns is not None + cols = try_convert(tuple, columns, "columns") + + rewritten_columns = [] + column_names: set[str] = set() + + for i, c in enumerate(cols): + check_type(column, c, f"columns[{i}]") + + c = copy(c) + if c.name is None: + label = f"columns[{i}]" + c.name = i + else: + label = c.name + try: + hash(c.name) + except TypeError: + raise InvalidArgument( + f"Column names must be hashable, but columns[{i}].name was " + f"{c.name!r} of type {type(c.name).__name__}, which cannot be hashed." + ) from None + + if c.name in column_names: + raise InvalidArgument(f"duplicate definition of column name {c.name!r}") + + column_names.add(c.name) + c.elements, _ = elements_and_dtype(c.elements, c.dtype, label) + + if c.dtype is None and rows is not None: + raise InvalidArgument( + "Must specify a dtype for all columns when combining rows with columns." + ) + + c.fill = npst.fill_for( + fill=c.fill, elements=c.elements, unique=c.unique, name=label + ) + rewritten_columns.append(c) + + if rows is None: + + @st.composite + def just_draw_columns(draw): + index = draw(index_strategy) + local_index_strategy = st.just(index) + + data = OrderedDict((c.name, None) for c in rewritten_columns) + + # Depending on how the columns are going to be generated we group + # them differently to get better shrinking. For columns with fill + # enabled, the elements can be shrunk independently of the size, + # so we can just shrink by shrinking the index then shrinking the + # length and are generally much more free to move data around. + + # For columns with no filling the problem is harder, and drawing + # them like that would result in rows being very far apart from + # each other in the choice sequence, which gets in the way + # of shrinking. So what we do is reorder and draw those columns + # row wise, so that the values of each row are next to each other. + # This makes life easier for the shrinker when deleting choices. + + columns_without_fill = [c for c in rewritten_columns if c.fill.is_empty] + if columns_without_fill: + for c in columns_without_fill: + data[c.name] = pandas.Series( + np.zeros(shape=len(index), dtype=object), + index=index, + dtype=c.dtype, + ) + seen = {c.name: set() for c in columns_without_fill if c.unique} + + for i in range(len(index)): + for c in columns_without_fill: + if c.unique: + for _ in range(5): + value = draw(c.elements) + if value not in seen[c.name]: + seen[c.name].add(value) + break + else: + reject() + else: + value = draw(c.elements) + + try: + data[c.name].iloc[i] = value + except ValueError as err: # pragma: no cover + # This just works in Pandas 1.4 and later, but gives + # a confusing error on previous versions. + if c.dtype is None and not isinstance( + value, (float, int, str, bool, datetime, timedelta) + ): + raise ValueError( + f"Failed to add {value=} to column " + f"{c.name} with dtype=None. Maybe passing " + "dtype=object would help?" + ) from err + # Unclear how this could happen, but users find a way... + raise + + for c in rewritten_columns: + if not c.fill.is_empty: + data[c.name] = draw( + series( + index=local_index_strategy, + dtype=c.dtype, + elements=c.elements, + fill=c.fill, + unique=c.unique, + ) + ) + + return pandas.DataFrame(data, index=index) + + return just_draw_columns() + else: + + @st.composite + def assign_rows(draw): + index = draw(index_strategy) + + result = pandas.DataFrame( + OrderedDict( + ( + c.name, + pandas.Series( + np.zeros(dtype=c.dtype, shape=len(index)), dtype=c.dtype + ), + ) + for c in rewritten_columns + ), + index=index, + ) + + fills = {} + + any_unique = any(c.unique for c in rewritten_columns) + + if any_unique: + all_seen = [set() if c.unique else None for c in rewritten_columns] + while all_seen[-1] is None: + all_seen.pop() + + for row_index in range(len(index)): + for _ in range(5): + original_row = draw(rows) + row = original_row + if isinstance(row, dict): + as_list = [None] * len(rewritten_columns) + for i, c in enumerate(rewritten_columns): + try: + as_list[i] = row[c.name] + except KeyError: + try: + as_list[i] = fills[i] + except KeyError: + if c.fill.is_empty: + raise InvalidArgument( + f"Empty fill strategy in {c!r} cannot " + f"complete row {original_row!r}" + ) from None + fills[i] = draw(c.fill) + as_list[i] = fills[i] + for k in row: + if k not in column_names: + raise InvalidArgument( + f"Row {row!r} contains column {k!r} not in " + f"columns {[c.name for c in rewritten_columns]!r})" + ) + row = as_list + if any_unique: + has_duplicate = False + for seen, value in zip(all_seen, row, strict=False): + if seen is None: + continue + if value in seen: + has_duplicate = True + break + seen.add(value) + if has_duplicate: + continue + row = list(try_convert(tuple, row, "draw(rows)")) + + if len(row) > len(rewritten_columns): + raise InvalidArgument( + f"Row {original_row!r} contains too many entries. Has " + f"{len(row)} but expected at most {len(rewritten_columns)}" + ) + while len(row) < len(rewritten_columns): + c = rewritten_columns[len(row)] + if c.fill.is_empty: + raise InvalidArgument( + f"Empty fill strategy in {c!r} cannot " + f"complete row {original_row!r}" + ) + row.append(draw(c.fill)) + result.iloc[row_index] = row + break + else: + reject() + return result + + return assign_rows() diff --git a/vendored/hypothesis/extra/pytestplugin.py b/vendored/hypothesis/extra/pytestplugin.py new file mode 100644 index 0000000..d21b209 --- /dev/null +++ b/vendored/hypothesis/extra/pytestplugin.py @@ -0,0 +1,19 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +Stub for users who manually load our pytest plugin. + +The plugin implementation is now located in a top-level module outside the main +hypothesis tree, so that Pytest can load the plugin without thereby triggering +the import of Hypothesis itself (and thus loading our own plugins). +""" + +from _hypothesis_pytestplugin import * # noqa diff --git a/vendored/hypothesis/extra/pytz.py b/vendored/hypothesis/extra/pytz.py new file mode 100644 index 0000000..3b902a5 --- /dev/null +++ b/vendored/hypothesis/extra/pytz.py @@ -0,0 +1,61 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +This module provides :pypi:`pytz` timezones. + +If you are unable to use the stdlib :mod:`zoneinfo` module, e.g. via the +:func:`hypothesis.strategies.timezones` strategy, you can use this +strategy with :py:func:`hypothesis.strategies.datetimes` and +:py:func:`hypothesis.strategies.times` to produce timezone-aware values. + +.. warning:: + + Since :mod:`zoneinfo` was added in Python 3.9, this extra + is deprecated. We intend to remove it after libraries + such as Pandas and Django complete their own migrations. +""" + +import datetime as dt + +import pytz +from pytz.tzfile import StaticTzInfo # type: ignore # considered private by typeshed + +from hypothesis import strategies as st +from hypothesis.strategies._internal.utils import cacheable, defines_strategy + +__all__ = ["timezones"] + + +@cacheable +@defines_strategy() +def timezones() -> st.SearchStrategy[dt.tzinfo]: + """Any timezone in the Olsen database, as a pytz tzinfo object. + + This strategy minimises to UTC, or the smallest possible fixed + offset, and is designed for use with :func:`hypothesis.strategies.datetimes`. + + .. tip:: + Prefer the :func:`hypothesis.strategies.timezones` strategy, which uses + the stdlib :mod:`zoneinfo` module and avoids `the many footguns in pytz + `__. + """ + all_timezones = [pytz.timezone(tz) for tz in pytz.all_timezones] + # Some timezones have always had a constant offset from UTC. This makes + # them simpler than timezones with daylight savings, and the smaller the + # absolute offset the simpler they are. Of course, UTC is even simpler! + static: list = [pytz.UTC] + static += sorted( + (t for t in all_timezones if isinstance(t, StaticTzInfo)), + key=lambda tz: abs(tz.utcoffset(dt.datetime(2000, 1, 1))), + ) + # Timezones which have changed UTC offset; best ordered by name. + dynamic = [tz for tz in all_timezones if tz not in static] + return st.sampled_from(static + dynamic) diff --git a/vendored/hypothesis/extra/redis.py b/vendored/hypothesis/extra/redis.py new file mode 100644 index 0000000..87a4e69 --- /dev/null +++ b/vendored/hypothesis/extra/redis.py @@ -0,0 +1,149 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import base64 +import json +from collections.abc import Iterable +from contextlib import contextmanager +from datetime import timedelta +from typing import Any + +from redis import Redis + +from hypothesis.database import ExampleDatabase +from hypothesis.internal.validation import check_type + + +class RedisExampleDatabase(ExampleDatabase): + """Store Hypothesis examples as sets in the given :class:`~redis.Redis` datastore. + + This is particularly useful for shared databases, as per the recipe + for a :class:`~hypothesis.database.MultiplexedDatabase`. + + .. note:: + + If a test has not been run for ``expire_after``, those examples will be allowed + to expire. The default time-to-live persists examples between weekly runs. + """ + + def __init__( + self, + redis: Redis, + *, + expire_after: timedelta = timedelta(days=8), + key_prefix: bytes = b"hypothesis-example:", + listener_channel: str = "hypothesis-changes", + ): + super().__init__() + check_type(Redis, redis, "redis") + check_type(timedelta, expire_after, "expire_after") + check_type(bytes, key_prefix, "key_prefix") + check_type(str, listener_channel, "listener_channel") + self.redis = redis + self._expire_after = expire_after + self._prefix = key_prefix + self.listener_channel = listener_channel + self._pubsub: Any = None + + def __repr__(self) -> str: + return ( + f"RedisExampleDatabase({self.redis!r}, expire_after={self._expire_after!r})" + ) + + def __eq__(self, other: object) -> bool: + return ( + isinstance(other, RedisExampleDatabase) + and self.redis == other.redis + and self._prefix == other._prefix + and self.listener_channel == other.listener_channel + ) + + @contextmanager + def _pipeline( + self, + *reset_expire_keys, + execute_and_publish=True, + event_type=None, + to_publish=None, + ): + # Context manager to batch updates and expiry reset, reducing TCP roundtrips + pipe = self.redis.pipeline() + yield pipe + for key in reset_expire_keys: + pipe.expire(self._prefix + key, self._expire_after) + if execute_and_publish: + changed = pipe.execute() + # pipe.execute returns the rows modified for each operation, which includes + # the operations performed during the yield, followed by the n operations + # from pipe.exire. Look at just the operations from during the yield. + changed = changed[: -len(reset_expire_keys)] + if any(count > 0 for count in changed): + assert to_publish is not None + assert event_type is not None + self._publish((event_type, to_publish)) + + def _publish(self, event): + event = (event[0], tuple(self._encode(v) for v in event[1])) + self.redis.publish(self.listener_channel, json.dumps(event)) + + def _encode(self, value: bytes) -> str: + return base64.b64encode(value).decode("ascii") + + def _decode(self, value: str) -> bytes: + return base64.b64decode(value) + + def fetch(self, key: bytes) -> Iterable[bytes]: + with self._pipeline(key, execute_and_publish=False) as pipe: + pipe.smembers(self._prefix + key) + yield from pipe.execute()[0] + + def save(self, key: bytes, value: bytes) -> None: + with self._pipeline(key, event_type="save", to_publish=(key, value)) as pipe: + pipe.sadd(self._prefix + key, value) + + def delete(self, key: bytes, value: bytes) -> None: + with self._pipeline(key, event_type="delete", to_publish=(key, value)) as pipe: + pipe.srem(self._prefix + key, value) + + def move(self, src: bytes, dest: bytes, value: bytes) -> None: + if src == dest: + self.save(dest, value) + return + + with self._pipeline(src, dest, execute_and_publish=False) as pipe: + pipe.srem(self._prefix + src, value) + pipe.sadd(self._prefix + dest, value) + + changed = pipe.execute() + if changed[0] > 0: + # did the value set of the first key change? + self._publish(("delete", (src, value))) + if changed[1] > 0: + # did the value set of the second key change? + self._publish(("save", (dest, value))) + + def _handle_message(self, message: dict) -> None: + # other message types include "subscribe" and "unsubscribe". these are + # sent to the client, but not to the pubsub channel. + assert message["type"] == "message" + data = json.loads(message["data"]) + event_type = data[0] + self._broadcast_change( + (event_type, tuple(self._decode(v) for v in data[1])) # type: ignore + ) + + def _start_listening(self) -> None: + self._pubsub = self.redis.pubsub() + self._pubsub.subscribe(**{self.listener_channel: self._handle_message}) + + def _stop_listening(self) -> None: + self._pubsub.unsubscribe() + self._pubsub.close() + self._pubsub = None diff --git a/vendored/hypothesis/internal/__init__.py b/vendored/hypothesis/internal/__init__.py new file mode 100644 index 0000000..fcb1ac6 --- /dev/null +++ b/vendored/hypothesis/internal/__init__.py @@ -0,0 +1,9 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. diff --git a/vendored/hypothesis/internal/cache.py b/vendored/hypothesis/internal/cache.py new file mode 100644 index 0000000..899576f --- /dev/null +++ b/vendored/hypothesis/internal/cache.py @@ -0,0 +1,349 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import threading +from collections import OrderedDict +from dataclasses import dataclass +from typing import Any, Generic, TypeVar + +from hypothesis.errors import InvalidArgument + +K = TypeVar("K") +V = TypeVar("V") + + +@dataclass(slots=True, frozen=False) +class Entry(Generic[K, V]): + key: K + value: V + score: int + pins: int = 0 + + @property + def sort_key(self) -> tuple[int, ...]: + if self.pins == 0: + # Unpinned entries are sorted by score. + return (0, self.score) + else: + # Pinned entries sort after unpinned ones. Beyond that, we don't + # worry about their relative order. + return (1,) + + +class GenericCache(Generic[K, V]): + """Generic supertype for cache implementations. + + Defines a dict-like mapping with a maximum size, where as well as mapping + to a value, each key also maps to a score. When a write would cause the + dict to exceed its maximum size, it first evicts the existing key with + the smallest score, then adds the new key to the map. If due to pinning + no key can be evicted, ValueError is raised. + + A key has the following lifecycle: + + 1. key is written for the first time, the key is given the score + self.new_entry(key, value) + 2. whenever an existing key is read or written, self.on_access(key, value, + score) is called. This returns a new score for the key. + 3. After a key is evicted, self.on_evict(key, value, score) is called. + + The cache will be in a valid state in all of these cases. + + Implementations are expected to implement new_entry and optionally + on_access and on_evict to implement a specific scoring strategy. + """ + + __slots__ = ("_threadlocal", "max_size") + + def __init__(self, max_size: int): + if max_size <= 0: + raise InvalidArgument("Cache size must be at least one.") + + self.max_size = max_size + + # Implementation: We store a binary heap of Entry objects in self.data, + # with the heap property requiring that a parent's score is <= that of + # its children. keys_to_index then maps keys to their index in the + # heap. We keep these two in sync automatically - the heap is never + # reordered without updating the index. + self._threadlocal = threading.local() + + @property + def keys_to_indices(self) -> dict[K, int]: + try: + return self._threadlocal.keys_to_indices + except AttributeError: + self._threadlocal.keys_to_indices = {} + return self._threadlocal.keys_to_indices + + @property + def data(self) -> list[Entry[K, V]]: + try: + return self._threadlocal.data + except AttributeError: + self._threadlocal.data = [] + return self._threadlocal.data + + def __len__(self) -> int: + assert len(self.keys_to_indices) == len(self.data) + return len(self.data) + + def __contains__(self, key: K) -> bool: + return key in self.keys_to_indices + + def __getitem__(self, key: K) -> V: + i = self.keys_to_indices[key] + result = self.data[i] + self.__entry_was_accessed(i) + return result.value + + def __setitem__(self, key: K, value: V) -> None: + evicted = None + try: + i = self.keys_to_indices[key] + except KeyError: + entry = Entry(key, value, self.new_entry(key, value)) + if len(self.data) >= self.max_size: + evicted = self.data[0] + if evicted.pins > 0: + raise ValueError( + "Cannot increase size of cache where all keys have been pinned." + ) from None + del self.keys_to_indices[evicted.key] + i = 0 + self.data[0] = entry + else: + i = len(self.data) + self.data.append(entry) + self.keys_to_indices[key] = i + self.__balance(i) + else: + entry = self.data[i] + assert entry.key == key + entry.value = value + self.__entry_was_accessed(i) + + if evicted is not None: + if self.data[0] is not entry: + assert evicted.sort_key <= self.data[0].sort_key + self.on_evict(evicted.key, evicted.value, evicted.score) + + def __iter__(self): + return iter(self.keys_to_indices) + + def pin(self, key: K, value: V) -> None: + """Mark ``key`` as pinned (with the given value). That is, it may not + be evicted until ``unpin(key)`` has been called. The same key may be + pinned multiple times, possibly changing its value, and will not be + unpinned until the same number of calls to unpin have been made. + """ + self[key] = value + + i = self.keys_to_indices[key] + entry = self.data[i] + entry.pins += 1 + if entry.pins == 1: + self.__balance(i) + + def unpin(self, key: K) -> None: + """Undo one previous call to ``pin(key)``. The value stays the same. + Once all calls are undone this key may be evicted as normal.""" + i = self.keys_to_indices[key] + entry = self.data[i] + if entry.pins == 0: + raise ValueError(f"Key {key!r} has not been pinned") + entry.pins -= 1 + if entry.pins == 0: + self.__balance(i) + + def is_pinned(self, key: K) -> bool: + """Returns True if the key is currently pinned.""" + i = self.keys_to_indices[key] + return self.data[i].pins > 0 + + def clear(self) -> None: + """Remove all keys, regardless of their pinned status.""" + del self.data[:] + self.keys_to_indices.clear() + + def __repr__(self) -> str: + return "{" + ", ".join(f"{e.key!r}: {e.value!r}" for e in self.data) + "}" + + def new_entry(self, key: K, value: V) -> int: + """Called when a key is written that does not currently appear in the + map. + + Returns the score to associate with the key. + """ + raise NotImplementedError + + def on_access(self, key: K, value: V, score: Any) -> Any: + """Called every time a key that is already in the map is read or + written. + + Returns the new score for the key. + """ + return score + + def on_evict(self, key: K, value: V, score: Any) -> Any: + """Called after a key has been evicted, with the score it had had at + the point of eviction.""" + + def check_valid(self) -> None: + """Debugging method for use in tests. + + Asserts that all of the cache's invariants hold. When everything + is working correctly this should be an expensive no-op. + """ + assert len(self.keys_to_indices) == len(self.data) + for i, e in enumerate(self.data): + assert self.keys_to_indices[e.key] == i + for j in [i * 2 + 1, i * 2 + 2]: + if j < len(self.data): + assert e.sort_key <= self.data[j].sort_key, self.data + + def __entry_was_accessed(self, i: int) -> None: + entry = self.data[i] + new_score = self.on_access(entry.key, entry.value, entry.score) + if new_score != entry.score: + entry.score = new_score + # changing the score of a pinned entry cannot unbalance the heap, as + # we place all pinned entries after unpinned ones, regardless of score. + if entry.pins == 0: + self.__balance(i) + + def __swap(self, i: int, j: int) -> None: + assert i < j + assert self.data[j].sort_key < self.data[i].sort_key + self.data[i], self.data[j] = self.data[j], self.data[i] + self.keys_to_indices[self.data[i].key] = i + self.keys_to_indices[self.data[j].key] = j + + def __balance(self, i: int) -> None: + """When we have made a modification to the heap such that + the heap property has been violated locally around i but previously + held for all other indexes (and no other values have been modified), + this fixes the heap so that the heap property holds everywhere.""" + # bubble up (if score is too low for current position) + while (parent := (i - 1) // 2) >= 0: + if self.__out_of_order(parent, i): + self.__swap(parent, i) + i = parent + else: + break + # or bubble down (if score is too high for current position) + while children := [j for j in (2 * i + 1, 2 * i + 2) if j < len(self.data)]: + smallest_child = min(children, key=lambda j: self.data[j].sort_key) + if self.__out_of_order(i, smallest_child): + self.__swap(i, smallest_child) + i = smallest_child + else: + break + + def __out_of_order(self, i: int, j: int) -> bool: + """Returns True if the indices i, j are in the wrong order. + + i must be the parent of j. + """ + assert i == (j - 1) // 2 + return self.data[j].sort_key < self.data[i].sort_key + + +class LRUReusedCache(GenericCache[K, V]): + """The only concrete implementation of GenericCache we use outside of tests + currently. + + Adopts a modified least-recently used eviction policy: It evicts the key + that has been used least recently, but it will always preferentially evict + keys that have never been accessed after insertion. Among keys that have been + accessed, it ignores the number of accesses. + + This retains most of the benefits of an LRU cache, but adds an element of + scan-resistance to the process: If we end up scanning through a large + number of keys without reusing them, this does not evict the existing + entries in preference for the new ones. + """ + + __slots__ = ("__tick",) + + def __init__(self, max_size: int): + super().__init__(max_size) + self.__tick: int = 0 + + def tick(self) -> int: + self.__tick += 1 + return self.__tick + + def new_entry(self, key: K, value: V) -> Any: + return (1, self.tick()) + + def on_access(self, key: K, value: V, score: Any) -> Any: + return (2, self.tick()) + + +class LRUCache(Generic[K, V]): + """ + This is a drop-in replacement for a GenericCache (despite the lack of inheritance) + in performance critical environments. It turns out that GenericCache's heap + balancing for arbitrary scores can be quite expensive compared to the doubly + linked list approach of lru_cache or OrderedDict. + + This class is a pure LRU and does not provide any sort of affininty towards + the number of accesses beyond recency. If soft-pinning entries which have been + accessed at least once is important, use LRUReusedCache. + """ + + # Here are some nice performance references for lru_cache vs OrderedDict: + # https://github.com/python/cpython/issues/72426#issuecomment-1093727671 + # https://discuss.python.org/t/simplify-lru-cache/18192/6 + # + # We use OrderedDict here because it is unclear to me we can provide the same + # api as GenericCache using @lru_cache without messing with lru_cache internals. + # + # Anecdotally, OrderedDict seems quite competitive with lru_cache, but perhaps + # that is localized to our access patterns. + + def __init__(self, max_size: int) -> None: + assert max_size > 0 + self.max_size = max_size + self._threadlocal = threading.local() + + @property + def cache(self) -> OrderedDict[K, V]: + try: + return self._threadlocal.cache + except AttributeError: + self._threadlocal.cache = OrderedDict() + return self._threadlocal.cache + + def __setitem__(self, key: K, value: V) -> None: + self.cache[key] = value + self.cache.move_to_end(key) + + while len(self.cache) > self.max_size: + self.cache.popitem(last=False) + + def __getitem__(self, key: K) -> V: + val = self.cache[key] + self.cache.move_to_end(key) + return val + + def __iter__(self): + return iter(self.cache) + + def __len__(self) -> int: + return len(self.cache) + + def __contains__(self, key: K) -> bool: + return key in self.cache + + # implement GenericCache interface, for tests + def check_valid(self) -> None: + pass diff --git a/vendored/hypothesis/internal/cathetus.py b/vendored/hypothesis/internal/cathetus.py new file mode 100644 index 0000000..2012df4 --- /dev/null +++ b/vendored/hypothesis/internal/cathetus.py @@ -0,0 +1,62 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from math import fabs, inf, isinf, isnan, nan, sqrt +from sys import float_info + + +def cathetus(h: float, a: float) -> float: + """Given the lengths of the hypotenuse and a side of a right triangle, + return the length of the other side. + + A companion to the C99 hypot() function. Some care is needed to avoid + underflow in the case of small arguments, and overflow in the case of + large arguments as would occur for the naive implementation as + sqrt(h*h - a*a). The behaviour with respect the non-finite arguments + (NaNs and infinities) is designed to be as consistent as possible with + the C99 hypot() specifications. + + This function relies on the system ``sqrt`` function and so, like it, + may be inaccurate up to a relative error of (around) floating-point + epsilon. + + Based on the C99 implementation https://gitlab.com/jjg/cathetus + """ + if isnan(h): + return nan + + if isinf(h): + if isinf(a): + return nan + else: + # Deliberately includes the case when isnan(a), because the + # C99 standard mandates that hypot(inf, nan) == inf + return inf + + h = fabs(h) + a = fabs(a) + + if h < a: + return nan + + # Thanks to floating-point precision issues when performing multiple + # operations on extremely large or small values, we may rarely calculate + # a side length that is longer than the hypotenuse. This is clearly an + # error, so we clip to the hypotenuse as the best available estimate. + if h > sqrt(float_info.max): + if h > float_info.max / 2: + b = sqrt(h - a) * sqrt(h / 2 + a / 2) * sqrt(2) + else: + b = sqrt(h - a) * sqrt(h + a) + elif h < sqrt(float_info.min): + b = sqrt(h - a) * sqrt(h + a) + else: + b = sqrt((h - a) * (h + a)) + return min(b, h) diff --git a/vendored/hypothesis/internal/charmap.py b/vendored/hypothesis/internal/charmap.py new file mode 100644 index 0000000..6b17f40 --- /dev/null +++ b/vendored/hypothesis/internal/charmap.py @@ -0,0 +1,337 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import codecs +import gzip +import json +import os +import sys +import tempfile +import unicodedata +from collections.abc import Collection, Iterable +from functools import cache +from pathlib import Path +from typing import Literal, TypeAlias + +from hypothesis.configuration import storage_directory +from hypothesis.control import _current_build_context +from hypothesis.errors import InvalidArgument +from hypothesis.internal.intervalsets import IntervalSet, IntervalsT + +# See https://en.wikipedia.org/wiki/Unicode_character_property#General_Category +CategoryName: TypeAlias = Literal[ + "L", # Letter + "Lu", # Letter, uppercase + "Ll", # Letter, lowercase + "Lt", # Letter, titlecase + "Lm", # Letter, modifier + "Lo", # Letter, other + "M", # Mark + "Mn", # Mark, nonspacing + "Mc", # Mark, spacing combining + "Me", # Mark, enclosing + "N", # Number + "Nd", # Number, decimal digit + "Nl", # Number, letter + "No", # Number, other + "P", # Punctuation + "Pc", # Punctuation, connector + "Pd", # Punctuation, dash + "Ps", # Punctuation, open + "Pe", # Punctuation, close + "Pi", # Punctuation, initial quote + "Pf", # Punctuation, final quote + "Po", # Punctuation, other + "S", # Symbol + "Sm", # Symbol, math + "Sc", # Symbol, currency + "Sk", # Symbol, modifier + "So", # Symbol, other + "Z", # Separator + "Zs", # Separator, space + "Zl", # Separator, line + "Zp", # Separator, paragraph + "C", # Other + "Cc", # Other, control + "Cf", # Other, format + "Cs", # Other, surrogate + "Co", # Other, private use + "Cn", # Other, not assigned +] +Categories: TypeAlias = Iterable[CategoryName] +CategoriesTuple: TypeAlias = tuple[CategoryName, ...] + + +def charmap_file(fname: str = "charmap") -> Path: + return storage_directory( + "unicode_data", unicodedata.unidata_version, f"{fname}.json.gz" + ) + + +_charmap: dict[CategoryName, IntervalsT] | None = None + + +def charmap() -> dict[CategoryName, IntervalsT]: + """Return a dict that maps a Unicode category, to a tuple of 2-tuples + covering the codepoint intervals for characters in that category. + + >>> charmap()['Co'] + ((57344, 63743), (983040, 1048573), (1048576, 1114109)) + """ + global _charmap + # Best-effort caching in the face of missing files and/or unwritable + # filesystems is fairly simple: check if loaded, else try loading, + # else calculate and try writing the cache. + if _charmap is None: + f = charmap_file() + try: + with gzip.GzipFile(f, "rb") as d: + tmp_charmap = dict(json.load(d)) + + except Exception: + # This loop is reduced to using only local variables for performance; + # indexing and updating containers is a ~3x slowdown. This doesn't fix + # https://github.com/HypothesisWorks/hypothesis/issues/2108 but it helps. + category = unicodedata.category # Local variable -> ~20% speedup! + tmp_charmap = {} + last_cat = category(chr(0)) + last_start = 0 + for i in range(1, sys.maxunicode + 1): + cat = category(chr(i)) + if cat != last_cat: + tmp_charmap.setdefault(last_cat, []).append((last_start, i - 1)) + last_cat, last_start = cat, i + tmp_charmap.setdefault(last_cat, []).append((last_start, sys.maxunicode)) + + try: + # Write the Unicode table atomically + tmpdir = storage_directory("tmp") + tmpdir.mkdir(exist_ok=True, parents=True) + fd, tmpfile = tempfile.mkstemp(dir=tmpdir) + os.close(fd) + # Explicitly set the mtime to get reproducible output + with gzip.GzipFile(tmpfile, "wb", mtime=1) as fp: + result = json.dumps(sorted(tmp_charmap.items())) + fp.write(result.encode()) + + os.renames(tmpfile, f) + except Exception: + pass + + # convert between lists and tuples + _charmap = { + k: tuple(tuple(pair) for pair in pairs) for k, pairs in tmp_charmap.items() + } + # each value is a tuple of 2-tuples (that is, tuples of length 2) + # and both elements of that tuple are integers. + for vs in _charmap.values(): + ints = list(sum(vs, ())) + assert all(isinstance(x, int) for x in ints) + assert ints == sorted(ints) + assert all(len(tup) == 2 for tup in vs) + + assert _charmap is not None + return _charmap + + +@cache +def intervals_from_codec(codec_name: str) -> IntervalSet: # pragma: no cover + """Return an IntervalSet of characters which are part of this codec.""" + assert codec_name == codecs.lookup(codec_name).name + fname = charmap_file(f"codec-{codec_name}") + try: + with gzip.GzipFile(fname) as gzf: + encodable_intervals = json.load(gzf) + + except Exception: + # This loop is kinda slow, but hopefully we don't need to do it very often! + encodable_intervals = [] + for i in range(sys.maxunicode + 1): + try: + chr(i).encode(codec_name) + except Exception: # usually _but not always_ UnicodeEncodeError + pass + else: + encodable_intervals.append((i, i)) + + res = IntervalSet(encodable_intervals) + res = res.union(res) + try: + # Write the Unicode table atomically + tmpdir = storage_directory("tmp") + tmpdir.mkdir(exist_ok=True, parents=True) + fd, tmpfile = tempfile.mkstemp(dir=tmpdir) + os.close(fd) + # Explicitly set the mtime to get reproducible output + with gzip.GzipFile(tmpfile, "wb", mtime=1) as f: + f.write(json.dumps(res.intervals).encode()) + os.renames(tmpfile, fname) + except Exception: + pass + return res + + +_categories: Categories | None = None + + +def categories() -> Categories: + """Return a tuple of Unicode categories in a normalised order. + + >>> categories() # doctest: +ELLIPSIS + ('Zl', 'Zp', 'Co', 'Me', 'Pc', ..., 'Cc', 'Cs') + """ + global _categories + if _categories is None: + cm = charmap() + categories = sorted(cm.keys(), key=lambda c: len(cm[c])) + categories.remove("Cc") # Other, Control + categories.remove("Cs") # Other, Surrogate + categories.append("Cc") + categories.append("Cs") + _categories = tuple(categories) + return _categories + + +def as_general_categories(cats: Categories, name: str = "cats") -> CategoriesTuple: + """Return a tuple of Unicode categories in a normalised order. + + This function expands one-letter designations of a major class to include + all subclasses: + + >>> as_general_categories(['N']) + ('Nd', 'Nl', 'No') + + See section 4.5 of the Unicode standard for more on classes: + https://www.unicode.org/versions/Unicode10.0.0/ch04.pdf + + If the collection ``cats`` includes any elements that do not represent a + major class or a class with subclass, a deprecation warning is raised. + """ + major_classes = ("L", "M", "N", "P", "S", "Z", "C") + cs = categories() + out = set(cats) + for c in cats: + if c in major_classes: + out.discard(c) + out.update(x for x in cs if x.startswith(c)) + elif c not in cs: + raise InvalidArgument( + f"In {name}={cats!r}, {c!r} is not a valid Unicode category." + ) + return tuple(c for c in cs if c in out) + + +category_index_cache: dict[frozenset[CategoryName], IntervalsT] = {frozenset(): ()} + + +def _category_key(cats: Iterable[str] | None) -> CategoriesTuple: + """Return a normalised tuple of all Unicode categories that are in + `include`, but not in `exclude`. + + If include is None then default to including all categories. + Any item in include that is not a unicode character will be excluded. + + >>> _category_key(exclude=['So'], include=['Lu', 'Me', 'Cs', 'So']) + ('Me', 'Lu', 'Cs') + """ + cs = categories() + if cats is None: + cats = set(cs) + return tuple(c for c in cs if c in cats) + + +def _query_for_key(key: Categories) -> IntervalsT: + """Return a tuple of codepoint intervals covering characters that match one + or more categories in the tuple of categories `key`. + + >>> _query_for_key(categories()) + ((0, 1114111),) + >>> _query_for_key(('Zl', 'Zp', 'Co')) + ((8232, 8233), (57344, 63743), (983040, 1048573), (1048576, 1114109)) + """ + key = tuple(key) + # ignore ordering on the cache key to increase potential cache hits. + cache_key = frozenset(key) + context = _current_build_context.value + if context is None or not context.data.provider.avoid_realization: + try: + return category_index_cache[cache_key] + except KeyError: + pass + elif not key: # pragma: no cover # only on alternative backends + return () + assert key + if set(key) == set(categories()): + result = IntervalSet([(0, sys.maxunicode)]) + else: + result = IntervalSet(_query_for_key(key[:-1])).union( + IntervalSet(charmap()[key[-1]]) + ) + assert isinstance(result, IntervalSet) + if context is None or not context.data.provider.avoid_realization: + category_index_cache[cache_key] = result.intervals + return result.intervals + + +limited_category_index_cache: dict[ + tuple[CategoriesTuple, int, int, IntervalsT, IntervalsT], IntervalSet +] = {} + + +def query( + *, + categories: Categories | None = None, + min_codepoint: int | None = None, + max_codepoint: int | None = None, + include_characters: Collection[str] = "", + exclude_characters: Collection[str] = "", +) -> IntervalSet: + """Return a tuple of intervals covering the codepoints for all characters + that meet the criteria. + + >>> query() + ((0, 1114111),) + >>> query(min_codepoint=0, max_codepoint=128) + ((0, 128),) + >>> query(min_codepoint=0, max_codepoint=128, categories=['Lu']) + ((65, 90),) + >>> query(min_codepoint=0, max_codepoint=128, categories=['Lu'], + ... include_characters='☃') + ((65, 90), (9731, 9731)) + """ + if min_codepoint is None: + min_codepoint = 0 + if max_codepoint is None: + max_codepoint = sys.maxunicode + catkey = _category_key(categories) + character_intervals = IntervalSet.from_string("".join(include_characters)) + exclude_intervals = IntervalSet.from_string("".join(exclude_characters)) + qkey = ( + catkey, + min_codepoint, + max_codepoint, + character_intervals.intervals, + exclude_intervals.intervals, + ) + context = _current_build_context.value + if context is None or not context.data.provider.avoid_realization: + try: + return limited_category_index_cache[qkey] + except KeyError: + pass + base = _query_for_key(catkey) + result = [] + for u, v in base: + if v >= min_codepoint and u <= max_codepoint: + result.append((max(u, min_codepoint), min(v, max_codepoint))) + result = (IntervalSet(result) | character_intervals) - exclude_intervals + if context is None or not context.data.provider.avoid_realization: + limited_category_index_cache[qkey] = result + return result diff --git a/vendored/hypothesis/internal/compat.py b/vendored/hypothesis/internal/compat.py new file mode 100644 index 0000000..0c82a53 --- /dev/null +++ b/vendored/hypothesis/internal/compat.py @@ -0,0 +1,308 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import codecs +import copy +import dataclasses +import inspect +import itertools +import platform +import sys +import sysconfig +import typing +from functools import partial +from typing import ( + TYPE_CHECKING, + Any, + ForwardRef, + Optional, + TypedDict as TypedDict, + get_args, +) + +try: + BaseExceptionGroup = BaseExceptionGroup + ExceptionGroup = ExceptionGroup # pragma: no cover +except NameError: + from exceptiongroup import ( + BaseExceptionGroup as BaseExceptionGroup, + ExceptionGroup as ExceptionGroup, + ) +if TYPE_CHECKING: + from typing_extensions import ( + NotRequired as NotRequired, + TypedDict as TypedDict, + override as override, + ) + + from hypothesis.internal.conjecture.engine import ConjectureRunner +else: + # In order to use NotRequired, we need the version of TypedDict included in Python 3.11+. + if sys.version_info[:2] >= (3, 11): + from typing import NotRequired as NotRequired, TypedDict as TypedDict + else: + try: + from typing_extensions import ( + NotRequired as NotRequired, + TypedDict as TypedDict, + ) + except ImportError: + # We can use the old TypedDict from Python 3.8+ at runtime. + class NotRequired: + """A runtime placeholder for the NotRequired type, which is not available in Python <3.11.""" + + def __class_getitem__(cls, item): + return cls + + try: + from typing import ( + override as override, + ) + except ImportError: + try: + from typing_extensions import ( + override as override, + ) + except ImportError: + override = lambda f: f + +PYPY = platform.python_implementation() == "PyPy" +GRAALPY = platform.python_implementation() == "GraalVM" +WINDOWS = platform.system() == "Windows" +# First defined in CPython 3.13, defaults to False +FREE_THREADED_CPYTHON = bool(sysconfig.get_config_var("Py_GIL_DISABLED")) + + +def add_note(exc, note): + try: + exc.add_note(note) + except AttributeError: + if not hasattr(exc, "__notes__"): + try: + exc.__notes__ = [] + except AttributeError: + return # give up, might be e.g. a frozen dataclass + exc.__notes__.append(note) + + +def escape_unicode_characters(s: str) -> str: + return codecs.encode(s, "unicode_escape").decode("ascii") + + +def int_from_bytes(data: bytes | bytearray) -> int: + return int.from_bytes(data, "big") + + +def int_to_bytes(i: int, size: int) -> bytes: + return i.to_bytes(size, "big") + + +def int_to_byte(i: int) -> bytes: + return bytes([i]) + + +def is_typed_named_tuple(cls: type) -> bool: + """Return True if cls is probably a subtype of `typing.NamedTuple`. + + Unfortunately types created with `class T(NamedTuple):` actually + subclass `tuple` directly rather than NamedTuple. This is annoying, + and means we just have to hope that nobody defines a different tuple + subclass with similar attributes. + """ + return ( + issubclass(cls, tuple) + and hasattr(cls, "_fields") + and (hasattr(cls, "_field_types") or hasattr(cls, "__annotations__")) + ) + + +def _hint_and_args(x): + return (x, *get_args(x)) + + +def get_type_hints(thing: object) -> dict[str, Any]: + """Like the typing version, but tries harder and never errors. + + Tries harder: if the thing to inspect is a class but typing.get_type_hints + raises an error or returns no hints, then this function will try calling it + on the __init__ method. This second step often helps with user-defined + classes on older versions of Python. The third step we take is trying + to fetch types from the __signature__ property. + They override any other ones we found earlier. + + Never errors: instead of raising TypeError for uninspectable objects, or + NameError for unresolvable forward references, just return an empty dict. + """ + if isinstance(thing, partial): + from hypothesis.internal.reflection import get_signature + + bound = set(get_signature(thing.func).parameters).difference( + get_signature(thing).parameters + ) + return {k: v for k, v in get_type_hints(thing.func).items() if k not in bound} + + try: + hints = typing.get_type_hints(thing, include_extras=True) + except (AttributeError, TypeError, NameError): # pragma: no cover + hints = {} + + if inspect.isclass(thing): + try: + hints.update(typing.get_type_hints(thing.__init__, include_extras=True)) + except (TypeError, NameError, AttributeError): + pass + + try: + if hasattr(thing, "__signature__"): + # It is possible for the signature and annotations attributes to + # differ on an object due to renamed arguments. + from hypothesis.internal.reflection import get_signature + from hypothesis.strategies._internal.types import is_a_type + + vkinds = (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) + for p in get_signature(thing).parameters.values(): + if ( + p.kind not in vkinds + and is_a_type(p.annotation) + and p.annotation is not p.empty + ): + p_hint = p.annotation + + # Defer to `get_type_hints` if signature annotation is, or + # contains, a forward reference that is otherwise resolved. + if any( + isinstance(sig_hint, ForwardRef) + and not isinstance(hint, ForwardRef) + for sig_hint, hint in zip( + _hint_and_args(p.annotation), + _hint_and_args(hints.get(p.name, Any)), + strict=False, + ) + ): + p_hint = hints[p.name] + if p.default is None: + hints[p.name] = p_hint | None + else: + hints[p.name] = p_hint + except (AttributeError, TypeError, NameError): # pragma: no cover + pass + + return hints + + +# Under Python 2, math.floor and math.ceil returned floats, which cannot +# represent large integers - eg `float(2**53) == float(2**53 + 1)`. +# We therefore implement them entirely in (long) integer operations. +# We still use the same trick on Python 3, because Numpy values and other +# custom __floor__ or __ceil__ methods may convert via floats. +# See issue #1667, Numpy issue 9068. +def floor(x): + y = int(x) + if y != x and x < 0: + return y - 1 + return y + + +def ceil(x): + y = int(x) + if y != x and x > 0: + return y + 1 + return y + + +def extract_bits(x: int, /, width: int | None = None) -> list[int]: + assert x >= 0 + result = [] + while x: + result.append(x & 1) + x >>= 1 + if width is not None: + result = (result + [0] * width)[:width] + result.reverse() + return result + + +# int.bit_count was added in python 3.10 +try: + bit_count = int.bit_count +except AttributeError: # pragma: no cover + bit_count = lambda self: sum(extract_bits(abs(self))) + + +def bad_django_TestCase(runner: Optional["ConjectureRunner"]) -> bool: + if runner is None or "django.test" not in sys.modules: + return False + else: # pragma: no cover + if not isinstance(runner, sys.modules["django.test"].TransactionTestCase): + return False + + from hypothesis.extra.django._impl import HypothesisTestCase + + return not isinstance(runner, HypothesisTestCase) + + +# see issue #3812 +if sys.version_info[:2] < (3, 12): + + def dataclass_asdict(obj, *, dict_factory=dict): + """ + A vendored variant of dataclasses.asdict. Includes the bugfix for + defaultdicts (cpython/32056) for all versions. See also issues/3812. + + This should be removed whenever we drop support for 3.11. We can use the + standard dataclasses.asdict after that point. + """ + if not dataclasses._is_dataclass_instance(obj): # pragma: no cover + raise TypeError("asdict() should be called on dataclass instances") + return _asdict_inner(obj, dict_factory) + +else: # pragma: no cover + dataclass_asdict = dataclasses.asdict + + +def _asdict_inner(obj, dict_factory): + if dataclasses._is_dataclass_instance(obj): + return dict_factory( + (f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) + for f in dataclasses.fields(obj) + ) + elif isinstance(obj, tuple) and hasattr(obj, "_fields"): + return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) + elif isinstance(obj, (list, tuple)): + return type(obj)(_asdict_inner(v, dict_factory) for v in obj) + elif isinstance(obj, dict): + if hasattr(type(obj), "default_factory"): + result = type(obj)(obj.default_factory) + for k, v in obj.items(): + result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) + return result + return type(obj)( + (_asdict_inner(k, dict_factory), _asdict_inner(v, dict_factory)) + for k, v in obj.items() + ) + else: + return copy.deepcopy(obj) + + +if sys.version_info[:2] < (3, 13): + # batched was added in 3.12, strict flag in 3.13 + # copied from 3.13 docs reference implementation + + def batched(iterable, n, *, strict=False): + if n < 1: + raise ValueError("n must be at least one") + iterator = iter(iterable) + while batch := tuple(itertools.islice(iterator, n)): + if strict and len(batch) != n: # pragma: no cover + raise ValueError("batched(): incomplete batch") + yield batch + +else: # pragma: no cover + batched = itertools.batched diff --git a/vendored/hypothesis/internal/conjecture/__init__.py b/vendored/hypothesis/internal/conjecture/__init__.py new file mode 100644 index 0000000..fcb1ac6 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/__init__.py @@ -0,0 +1,9 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. diff --git a/vendored/hypothesis/internal/conjecture/choice.py b/vendored/hypothesis/internal/conjecture/choice.py new file mode 100644 index 0000000..b66f7e2 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/choice.py @@ -0,0 +1,622 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +from collections.abc import Callable, Hashable, Iterable, Sequence +from dataclasses import dataclass +from typing import ( + Literal, + TypeAlias, + TypedDict, + TypeVar, + cast, +) + +from hypothesis.errors import ChoiceTooLarge +from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float +from hypothesis.internal.conjecture.utils import identity +from hypothesis.internal.floats import float_to_int, make_float_clamper, sign_aware_lte +from hypothesis.internal.intervalsets import IntervalSet + +T = TypeVar("T") + + +class IntegerConstraints(TypedDict): + min_value: int | None + max_value: int | None + weights: dict[int, float] | None + shrink_towards: int + + +class FloatConstraints(TypedDict): + min_value: float + max_value: float + allow_nan: bool + smallest_nonzero_magnitude: float + + +class StringConstraints(TypedDict): + intervals: IntervalSet + min_size: int + max_size: int + + +class BytesConstraints(TypedDict): + min_size: int + max_size: int + + +class BooleanConstraints(TypedDict): + p: float + + +ChoiceT: TypeAlias = int | str | bool | float | bytes +ChoiceConstraintsT: TypeAlias = ( + IntegerConstraints + | FloatConstraints + | StringConstraints + | BytesConstraints + | BooleanConstraints +) +ChoiceTypeT: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"] +ChoiceKeyT: TypeAlias = ( + int | str | bytes | tuple[Literal["bool"], bool] | tuple[Literal["float"], int] +) + + +@dataclass(slots=True, frozen=False) +class ChoiceTemplate: + type: Literal["simplest"] + count: int | None + + def __post_init__(self) -> None: + if self.count is not None: + assert self.count > 0 + + +@dataclass(slots=True, frozen=False) +class ChoiceNode: + type: ChoiceTypeT + value: ChoiceT + constraints: ChoiceConstraintsT + was_forced: bool + index: int | None = None + + def copy( + self, + *, + with_value: ChoiceT | None = None, + with_constraints: ChoiceConstraintsT | None = None, + ) -> "ChoiceNode": + # we may want to allow this combination in the future, but for now it's + # a footgun. + if self.was_forced: + assert with_value is None, "modifying a forced node doesn't make sense" + # explicitly not copying index. node indices are only assigned via + # ExampleRecord. This prevents footguns with relying on stale indices + # after copying. + return ChoiceNode( + type=self.type, + value=self.value if with_value is None else with_value, + constraints=( + self.constraints if with_constraints is None else with_constraints + ), + was_forced=self.was_forced, + ) + + @property + def trivial(self) -> bool: + """ + A node is trivial if it cannot be simplified any further. This does not + mean that modifying a trivial node can't produce simpler test cases when + viewing the tree as a whole. Just that when viewing this node in + isolation, this is the simplest the node can get. + """ + if self.was_forced: + return True + + if self.type != "float": + zero_value = choice_from_index(0, self.type, self.constraints) + return choice_equal(self.value, zero_value) + else: + constraints = cast(FloatConstraints, self.constraints) + min_value = constraints["min_value"] + max_value = constraints["max_value"] + shrink_towards = 0.0 + + if min_value == -math.inf and max_value == math.inf: + return choice_equal(self.value, shrink_towards) + + if ( + not math.isinf(min_value) + and not math.isinf(max_value) + and math.ceil(min_value) <= math.floor(max_value) + ): + # the interval contains an integer. the simplest integer is the + # one closest to shrink_towards + shrink_towards = max(math.ceil(min_value), shrink_towards) + shrink_towards = min(math.floor(max_value), shrink_towards) + return choice_equal(self.value, float(shrink_towards)) + + # the real answer here is "the value in [min_value, max_value] with + # the lowest denominator when represented as a fraction". + # It would be good to compute this correctly in the future, but it's + # also not incorrect to be conservative here. + return False + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ChoiceNode): + return NotImplemented + + return ( + self.type == other.type + and choice_equal(self.value, other.value) + and choice_constraints_equal(self.type, self.constraints, other.constraints) + and self.was_forced == other.was_forced + ) + + def __hash__(self) -> int: + return hash( + ( + self.type, + choice_key(self.value), + choice_constraints_key(self.type, self.constraints), + self.was_forced, + ) + ) + + def __repr__(self) -> str: + forced_marker = " [forced]" if self.was_forced else "" + return f"{self.type} {self.value!r}{forced_marker} {self.constraints!r}" + + +def _size_to_index(size: int, *, alphabet_size: int) -> int: + # this is the closed form of this geometric series: + # for i in range(size): + # index += alphabet_size**i + if alphabet_size <= 0: + assert size == 0 + return 0 + if alphabet_size == 1: + return size + v = (alphabet_size**size - 1) // (alphabet_size - 1) + # mypy thinks (m: int) // (n: int) -> Any. assert it back to int. + return cast(int, v) + + +def _index_to_size(index: int, alphabet_size: int) -> int: + if alphabet_size == 0: + return 0 + elif alphabet_size == 1: + # there is only one string of each size, so the size is equal to its + # ordering. + return index + + # the closed-form inverse of _size_to_index is + # size = math.floor(math.log(index * (alphabet_size - 1) + 1, alphabet_size)) + # which is fast, but suffers from float precision errors. As performance is + # relatively critical here, we'll use this formula by default, but fall back to + # a much slower integer-only logarithm when the calculation is too close for + # comfort. + total = index * (alphabet_size - 1) + 1 + size = math.log(total, alphabet_size) + + # if this computation is close enough that it could have been affected by + # floating point errors, use a much slower integer-only logarithm instead, + # which is guaranteed to be precise. + if 0 < math.ceil(size) - size < 1e-7: + s = 0 + while total >= alphabet_size: + total //= alphabet_size + s += 1 + return s + return math.floor(size) + + +def collection_index( + choice: Sequence[T], + *, + min_size: int, + alphabet_size: int, + to_order: Callable[[T], int], +) -> int: + # Collections are ordered by counting the number of values of each size, + # starting with min_size. alphabet_size indicates how many options there + # are for a single element. to_order orders an element by returning an n ≥ 0. + + # we start by adding the size to the index, relative to min_size. + index = _size_to_index(len(choice), alphabet_size=alphabet_size) - _size_to_index( + min_size, alphabet_size=alphabet_size + ) + # We then add each element c to the index, starting from the end (so "ab" is + # simpler than "ba"). Each loop takes c at position i in the sequence and + # computes the number of sequences of size i which come before it in the ordering. + + # this running_exp computation is equivalent to doing + # index += (alphabet_size**i) * n + # but reuses intermediate exponentiation steps for efficiency. + running_exp = 1 + for c in reversed(choice): + index += running_exp * to_order(c) + running_exp *= alphabet_size + return index + + +def collection_value( + index: int, + *, + min_size: int, + alphabet_size: int, + from_order: Callable[[int], T], +) -> list[T]: + from hypothesis.internal.conjecture.engine import BUFFER_SIZE + + # this function is probably easiest to make sense of as an inverse of + # collection_index, tracking ~corresponding lines of code between the two. + + index += _size_to_index(min_size, alphabet_size=alphabet_size) + size = _index_to_size(index, alphabet_size=alphabet_size) + # index -> value computation can be arbitrarily expensive for arbitrarily + # large min_size collections. short-circuit if the resulting size would be + # obviously-too-large. callers will generally turn this into a .mark_overrun(). + if size >= BUFFER_SIZE: + raise ChoiceTooLarge + + # subtract out the amount responsible for the size + index -= _size_to_index(size, alphabet_size=alphabet_size) + vals: list[T] = [] + for i in reversed(range(size)): + # optimization for common case when we hit index 0. Exponentiation + # on large integers is expensive! + if index == 0: + n = 0 + else: + n = index // (alphabet_size**i) + # subtract out the nearest multiple of alphabet_size**i + index -= n * (alphabet_size**i) + vals.append(from_order(n)) + return vals + + +def zigzag_index(value: int, *, shrink_towards: int) -> int: + # value | 0 1 -1 2 -2 3 -3 4 + # index | 0 1 2 3 4 5 6 7 + index = 2 * abs(shrink_towards - value) + if value > shrink_towards: + index -= 1 + return index + + +def zigzag_value(index: int, *, shrink_towards: int) -> int: + assert index >= 0 + # count how many "steps" away from shrink_towards we are. + n = (index + 1) // 2 + # now check if we're stepping up or down from shrink_towards. + if (index % 2) == 0: + n *= -1 + return shrink_towards + n + + +def choice_to_index(choice: ChoiceT, constraints: ChoiceConstraintsT) -> int: + # This function takes a choice in the choice sequence and returns the + # complexity index of that choice from among its possible values, where 0 + # is the simplest. + # + # Note that the index of a choice depends on its constraints. The simplest value + # (at index 0) for {"min_value": None, "max_value": None} is 0, while for + # {"min_value": 1, "max_value": None} the simplest value is 1. + # + # choice_from_index inverts this function. An invariant on both functions is + # that they must be injective. Unfortunately, floats do not currently respect + # this. That's not *good*, but nothing has blown up - yet. And ordering + # floats in a sane manner is quite hard, so I've left it for another day. + + if isinstance(choice, int) and not isinstance(choice, bool): + # Let a = shrink_towards. + # * Unbounded: Ordered by (|a - x|, sgn(a - x)). Think of a zigzag. + # [a, a + 1, a - 1, a + 2, a - 2, ...] + # * Semi-bounded: Same as unbounded, except stop on one side when you hit + # {min, max}_value. so min_value=-1 a=0 has order + # [0, 1, -1, 2, 3, 4, ...] + # * Bounded: Same as unbounded and semibounded, except stop on each side + # when you hit {min, max}_value. + # + # To simplify and gain intuition about this ordering, you can think about + # the most common case where 0 is first (a = 0). We deviate from this only + # rarely, e.g. for datetimes, where we generally want year 2000 to be + # simpler than year 0. + constraints = cast(IntegerConstraints, constraints) + shrink_towards = constraints["shrink_towards"] + min_value = constraints["min_value"] + max_value = constraints["max_value"] + + if min_value is not None: + shrink_towards = max(min_value, shrink_towards) + if max_value is not None: + shrink_towards = min(max_value, shrink_towards) + + if min_value is None and max_value is None: + # case: unbounded + return zigzag_index(choice, shrink_towards=shrink_towards) + elif min_value is not None and max_value is None: + # case: semibounded below + + # min_value = -2 + # index | 0 1 2 3 4 5 6 7 + # v | 0 1 -1 2 -2 3 4 5 + if abs(choice - shrink_towards) <= (shrink_towards - min_value): + return zigzag_index(choice, shrink_towards=shrink_towards) + return choice - min_value + elif max_value is not None and min_value is None: + # case: semibounded above + if abs(choice - shrink_towards) <= (max_value - shrink_towards): + return zigzag_index(choice, shrink_towards=shrink_towards) + return max_value - choice + else: + # case: bounded + + # range = [-2, 5] + # shrink_towards = 2 + # index | 0 1 2 3 4 5 6 7 + # v | 2 3 1 4 0 5 -1 -2 + # + # ^ with zero weights at index = [0, 2, 6] + # index | 0 1 2 3 4 + # v | 3 4 0 5 -2 + + assert min_value is not None + assert max_value is not None + assert constraints["weights"] is None or all( + w > 0 for w in constraints["weights"].values() + ), "technically possible but really annoying to support zero weights" + + # check which side gets exhausted first + if (shrink_towards - min_value) < (max_value - shrink_towards): + # Below shrink_towards gets exhausted first. Equivalent to + # semibounded below + if abs(choice - shrink_towards) <= (shrink_towards - min_value): + return zigzag_index(choice, shrink_towards=shrink_towards) + return choice - min_value + else: + # Above shrink_towards gets exhausted first. Equivalent to semibounded + # above + if abs(choice - shrink_towards) <= (max_value - shrink_towards): + return zigzag_index(choice, shrink_towards=shrink_towards) + return max_value - choice + elif isinstance(choice, bool): + constraints = cast(BooleanConstraints, constraints) + # Ordered by [False, True]. + p = constraints["p"] + if not (2 ** (-64) < p < (1 - 2 ** (-64))): + # only one option is possible, so whatever it is is first. + return 0 + return int(choice) + elif isinstance(choice, bytes): + constraints = cast(BytesConstraints, constraints) + return collection_index( + list(choice), + min_size=constraints["min_size"], + alphabet_size=2**8, + to_order=identity, + ) + elif isinstance(choice, str): + constraints = cast(StringConstraints, constraints) + intervals = constraints["intervals"] + return collection_index( + choice, + min_size=constraints["min_size"], + alphabet_size=len(intervals), + to_order=intervals.index_from_char_in_shrink_order, + ) + elif isinstance(choice, float): + sign = int(math.copysign(1.0, choice) < 0) + return (sign << 64) | float_to_lex(abs(choice)) + else: + raise NotImplementedError + + +def choice_from_index( + index: int, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> ChoiceT: + assert index >= 0 + if choice_type == "integer": + constraints = cast(IntegerConstraints, constraints) + shrink_towards = constraints["shrink_towards"] + min_value = constraints["min_value"] + max_value = constraints["max_value"] + + if min_value is not None: + shrink_towards = max(min_value, shrink_towards) + if max_value is not None: + shrink_towards = min(max_value, shrink_towards) + + if min_value is None and max_value is None: + # case: unbounded + return zigzag_value(index, shrink_towards=shrink_towards) + elif min_value is not None and max_value is None: + # case: semibounded below + if index <= zigzag_index(min_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return index + min_value + elif max_value is not None and min_value is None: + # case: semibounded above + if index <= zigzag_index(max_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return max_value - index + else: + # case: bounded + assert min_value is not None + assert max_value is not None + assert constraints["weights"] is None or all( + w > 0 for w in constraints["weights"].values() + ), "possible but really annoying to support zero weights" + + if (shrink_towards - min_value) < (max_value - shrink_towards): + # equivalent to semibounded below case + if index <= zigzag_index(min_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return index + min_value + else: + # equivalent to semibounded above case + if index <= zigzag_index(max_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return max_value - index + elif choice_type == "boolean": + constraints = cast(BooleanConstraints, constraints) + # Ordered by [False, True]. + p = constraints["p"] + only = None + if p <= 2 ** (-64): + only = False + elif p >= (1 - 2 ** (-64)): + only = True + + assert index in {0, 1} + if only is not None: + # only one choice + assert index == 0 + return only + return bool(index) + elif choice_type == "bytes": + constraints = cast(BytesConstraints, constraints) + value_b = collection_value( + index, + min_size=constraints["min_size"], + alphabet_size=2**8, + from_order=identity, + ) + return bytes(value_b) + elif choice_type == "string": + constraints = cast(StringConstraints, constraints) + intervals = constraints["intervals"] + # _s because mypy is unhappy with reusing different-typed names in branches, + # even if the branches are disjoint. + value_s = collection_value( + index, + min_size=constraints["min_size"], + alphabet_size=len(intervals), + from_order=intervals.char_in_shrink_order, + ) + return "".join(value_s) + elif choice_type == "float": + constraints = cast(FloatConstraints, constraints) + sign = -1 if index >> 64 else 1 + result = sign * lex_to_float(index & ((1 << 64) - 1)) + + clamper = make_float_clamper( + min_value=constraints["min_value"], + max_value=constraints["max_value"], + smallest_nonzero_magnitude=constraints["smallest_nonzero_magnitude"], + allow_nan=constraints["allow_nan"], + ) + return clamper(result) + else: + raise NotImplementedError + + +def choice_permitted(choice: ChoiceT, constraints: ChoiceConstraintsT) -> bool: + if isinstance(choice, int) and not isinstance(choice, bool): + constraints = cast(IntegerConstraints, constraints) + min_value = constraints["min_value"] + max_value = constraints["max_value"] + if min_value is not None and choice < min_value: + return False + return not (max_value is not None and choice > max_value) + elif isinstance(choice, float): + constraints = cast(FloatConstraints, constraints) + if math.isnan(choice): + return constraints["allow_nan"] + if 0 < abs(choice) < constraints["smallest_nonzero_magnitude"]: + return False + return sign_aware_lte(constraints["min_value"], choice) and sign_aware_lte( + choice, constraints["max_value"] + ) + elif isinstance(choice, str): + constraints = cast(StringConstraints, constraints) + if len(choice) < constraints["min_size"]: + return False + if ( + constraints["max_size"] is not None + and len(choice) > constraints["max_size"] + ): + return False + return all(ord(c) in constraints["intervals"] for c in choice) + elif isinstance(choice, bytes): + constraints = cast(BytesConstraints, constraints) + if len(choice) < constraints["min_size"]: + return False + return constraints["max_size"] is None or len(choice) <= constraints["max_size"] + elif isinstance(choice, bool): + constraints = cast(BooleanConstraints, constraints) + if constraints["p"] <= 0: + return choice is False + if constraints["p"] >= 1: + return choice is True + return True + else: + raise NotImplementedError(f"unhandled type {type(choice)} with value {choice}") + + +def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]: + return tuple(choice_key(choice) for choice in choices) + + +def choice_key(choice: ChoiceT) -> ChoiceKeyT: + if isinstance(choice, float): + # float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc, + # and then add a "float" key to avoid colliding with actual integers. + return ("float", float_to_int(choice)) + if isinstance(choice, bool): + # avoid choice_key(0) == choice_key(False) + return ("bool", choice) + return choice + + +def choice_equal(choice1: ChoiceT, choice2: ChoiceT) -> bool: + assert type(choice1) is type(choice2), (choice1, choice2) + return choice_key(choice1) == choice_key(choice2) + + +def choice_constraints_equal( + choice_type: ChoiceTypeT, + constraints1: ChoiceConstraintsT, + constraints2: ChoiceConstraintsT, +) -> bool: + return choice_constraints_key(choice_type, constraints1) == choice_constraints_key( + choice_type, constraints2 + ) + + +def choice_constraints_key( + choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> tuple[Hashable, ...]: + if choice_type == "float": + constraints = cast(FloatConstraints, constraints) + return ( + float_to_int(constraints["min_value"]), + float_to_int(constraints["max_value"]), + constraints["allow_nan"], + constraints["smallest_nonzero_magnitude"], + ) + if choice_type == "integer": + constraints = cast(IntegerConstraints, constraints) + return ( + constraints["min_value"], + constraints["max_value"], + None if constraints["weights"] is None else tuple(constraints["weights"]), + constraints["shrink_towards"], + ) + return tuple(constraints[key] for key in sorted(constraints)) # type: ignore + + +def choices_size(choices: Iterable[ChoiceT]) -> int: + from hypothesis.database import choices_to_bytes + + return len(choices_to_bytes(choices)) diff --git a/vendored/hypothesis/internal/conjecture/data.py b/vendored/hypothesis/internal/conjecture/data.py new file mode 100644 index 0000000..4ac53e9 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/data.py @@ -0,0 +1,1355 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +import time +from collections import defaultdict +from collections.abc import Hashable, Iterable, Iterator, Sequence +from dataclasses import dataclass, field +from enum import IntEnum +from functools import cached_property +from random import Random +from typing import ( + TYPE_CHECKING, + Any, + Literal, + NoReturn, + TypeAlias, + TypeVar, + cast, + overload, +) + +from hypothesis.errors import ( + CannotProceedScopeT, + ChoiceTooLarge, + Frozen, + InvalidArgument, + StopTest, +) +from hypothesis.internal.cache import LRUCache +from hypothesis.internal.compat import add_note +from hypothesis.internal.conjecture.choice import ( + BooleanConstraints, + BytesConstraints, + ChoiceConstraintsT, + ChoiceNode, + ChoiceT, + ChoiceTemplate, + ChoiceTypeT, + FloatConstraints, + IntegerConstraints, + StringConstraints, + choice_constraints_key, + choice_from_index, + choice_permitted, + choices_size, +) +from hypothesis.internal.conjecture.junkdrawer import IntList, gc_cumulative_time +from hypothesis.internal.conjecture.providers import ( + COLLECTION_DEFAULT_MAX_SIZE, + HypothesisProvider, + PrimitiveProvider, +) +from hypothesis.internal.conjecture.utils import calc_label_from_name +from hypothesis.internal.escalation import InterestingOrigin +from hypothesis.internal.floats import ( + SMALLEST_SUBNORMAL, + float_to_int, + int_to_float, + sign_aware_lte, +) +from hypothesis.internal.intervalsets import IntervalSet +from hypothesis.internal.observability import PredicateCounts +from hypothesis.reporting import debug_report +from hypothesis.utils.conventions import not_set +from hypothesis.utils.threading import ThreadLocal + +if TYPE_CHECKING: + from hypothesis.strategies import SearchStrategy + from hypothesis.strategies._internal.core import DataObject + from hypothesis.strategies._internal.random import RandomState + from hypothesis.strategies._internal.strategies import Ex + + +def __getattr__(name: str) -> Any: + if name == "AVAILABLE_PROVIDERS": + from hypothesis._settings import note_deprecation + from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS + + note_deprecation( + "hypothesis.internal.conjecture.data.AVAILABLE_PROVIDERS has been moved to " + "hypothesis.internal.conjecture.providers.AVAILABLE_PROVIDERS.", + since="2025-01-25", + has_codemod=False, + stacklevel=1, + ) + return AVAILABLE_PROVIDERS + + raise AttributeError( + f"Module 'hypothesis.internal.conjecture.data' has no attribute {name}" + ) + + +T = TypeVar("T") +TargetObservations = dict[str, int | float] +# index, choice_type, constraints, forced value +MisalignedAt: TypeAlias = tuple[int, ChoiceTypeT, ChoiceConstraintsT, ChoiceT | None] + +TOP_LABEL = calc_label_from_name("top") +MAX_DEPTH = 100 + +threadlocal = ThreadLocal(global_test_counter=int) + + +class Status(IntEnum): + OVERRUN = 0 + INVALID = 1 + VALID = 2 + INTERESTING = 3 + + def __repr__(self) -> str: + return f"Status.{self.name}" + + +@dataclass(slots=True, frozen=True) +class StructuralCoverageTag: + label: int + + +STRUCTURAL_COVERAGE_CACHE: dict[int, StructuralCoverageTag] = {} + + +def structural_coverage(label: int) -> StructuralCoverageTag: + try: + return STRUCTURAL_COVERAGE_CACHE[label] + except KeyError: + return STRUCTURAL_COVERAGE_CACHE.setdefault(label, StructuralCoverageTag(label)) + + +# This cache can be quite hot and so we prefer LRUCache over LRUReusedCache for +# performance. We lose scan resistance, but that's probably fine here. +POOLED_CONSTRAINTS_CACHE: LRUCache[tuple[Any, ...], ChoiceConstraintsT] = LRUCache(4096) + + +class Span: + """A span tracks the hierarchical structure of choices within a single test run. + + Spans are created to mark regions of the choice sequence that that are + logically related to each other. For instance, Hypothesis tracks: + - A single top-level span for the entire choice sequence + - A span for the choices made by each strategy + - Some strategies define additional spans within their choices. For instance, + st.lists() tracks the "should add another element" choice and the "add + another element" choices as separate spans. + + Spans provide useful information to the shrinker, mutator, targeted PBT, + and other subsystems of Hypothesis. + + Rather than store each ``Span`` as a rich object, it is actually + just an index into the ``Spans`` class defined below. This has two + purposes: Firstly, for most properties of spans we will never need + to allocate storage at all, because most properties are not used on + most spans. Secondly, by storing the spans as compact lists + of integers, we save a considerable amount of space compared to + Python's normal object size. + + This does have the downside that it increases the amount of allocation + we do, and slows things down as a result, in some usage patterns because + we repeatedly allocate the same Span or int objects, but it will + often dramatically reduce our memory usage, so is worth it. + """ + + __slots__ = ("index", "owner") + + def __init__(self, owner: "Spans", index: int) -> None: + self.owner = owner + self.index = index + + def __eq__(self, other: object) -> bool: + if self is other: + return True + if not isinstance(other, Span): + return NotImplemented + return (self.owner is other.owner) and (self.index == other.index) + + def __ne__(self, other: object) -> bool: + if self is other: + return False + if not isinstance(other, Span): + return NotImplemented + return (self.owner is not other.owner) or (self.index != other.index) + + def __repr__(self) -> str: + return f"spans[{self.index}]" + + @property + def label(self) -> int: + """A label is an opaque value that associates each span with its + approximate origin, such as a particular strategy class or a particular + kind of draw.""" + return self.owner.labels[self.owner.label_indices[self.index]] + + @property + def parent(self) -> int | None: + """The index of the span that this one is nested directly within.""" + if self.index == 0: + return None + return self.owner.parentage[self.index] + + @property + def start(self) -> int: + return self.owner.starts[self.index] + + @property + def end(self) -> int: + return self.owner.ends[self.index] + + @property + def depth(self) -> int: + """ + Depth of this span in the span tree. The top-level span has a depth of 0. + """ + return self.owner.depths[self.index] + + @property + def discarded(self) -> bool: + """True if this is span's ``stop_span`` call had ``discard`` set to + ``True``. This means we believe that the shrinker should be able to delete + this span completely, without affecting the value produced by its enclosing + strategy. Typically set when a rejection sampler decides to reject a + generated value and try again.""" + return self.index in self.owner.discarded + + @property + def choice_count(self) -> int: + """The number of choices in this span.""" + return self.end - self.start + + @property + def children(self) -> "list[Span]": + """The list of all spans with this as a parent, in increasing index + order.""" + return [self.owner[i] for i in self.owner.children[self.index]] + + +class SpanProperty: + """There are many properties of spans that we calculate by + essentially rerunning the test case multiple times based on the + calls which we record in SpanProperty. + + This class defines a visitor, subclasses of which can be used + to calculate these properties. + """ + + def __init__(self, spans: "Spans"): + self.span_stack: list[int] = [] + self.spans = spans + self.span_count = 0 + self.choice_count = 0 + + def run(self) -> Any: + """Rerun the test case with this visitor and return the + results of ``self.finish()``.""" + for record in self.spans.trail: + if record == TrailType.STOP_SPAN_DISCARD: + self.__pop(discarded=True) + elif record == TrailType.STOP_SPAN_NO_DISCARD: + self.__pop(discarded=False) + elif record == TrailType.CHOICE: + self.choice_count += 1 + else: + # everything after TrailType.CHOICE is the label of a span start. + self.__push(record - TrailType.CHOICE - 1) + + return self.finish() + + def __push(self, label_index: int) -> None: + i = self.span_count + assert i < len(self.spans) + self.start_span(i, label_index=label_index) + self.span_count += 1 + self.span_stack.append(i) + + def __pop(self, *, discarded: bool) -> None: + i = self.span_stack.pop() + self.stop_span(i, discarded=discarded) + + def start_span(self, i: int, label_index: int) -> None: + """Called at the start of each span, with ``i`` the + index of the span and ``label_index`` the index of + its label in ``self.spans.labels``.""" + + def stop_span(self, i: int, *, discarded: bool) -> None: + """Called at the end of each span, with ``i`` the + index of the span and ``discarded`` being ``True`` if ``stop_span`` + was called with ``discard=True``.""" + + def finish(self) -> Any: + raise NotImplementedError + + +class TrailType(IntEnum): + STOP_SPAN_DISCARD = 1 + STOP_SPAN_NO_DISCARD = 2 + CHOICE = 3 + # every trail element larger than TrailType.CHOICE is the label of a span + # start, offset by its index. So the first span label is stored as 4, the + # second as 5, etc, regardless of its actual integer label. + + +class SpanRecord: + """Records the series of ``start_span``, ``stop_span``, and + ``draw_bits`` calls so that these may be stored in ``Spans`` and + replayed when we need to know about the structure of individual + ``Span`` objects. + + Note that there is significant similarity between this class and + ``DataObserver``, and the plan is to eventually unify them, but + they currently have slightly different functions and implementations. + """ + + def __init__(self) -> None: + self.labels: list[int] = [] + self.__index_of_labels: dict[int, int] | None = {} + self.trail = IntList() + self.nodes: list[ChoiceNode] = [] + + def freeze(self) -> None: + self.__index_of_labels = None + + def record_choice(self) -> None: + self.trail.append(TrailType.CHOICE) + + def start_span(self, label: int) -> None: + assert self.__index_of_labels is not None + try: + i = self.__index_of_labels[label] + except KeyError: + i = self.__index_of_labels.setdefault(label, len(self.labels)) + self.labels.append(label) + self.trail.append(TrailType.CHOICE + 1 + i) + + def stop_span(self, *, discard: bool) -> None: + if discard: + self.trail.append(TrailType.STOP_SPAN_DISCARD) + else: + self.trail.append(TrailType.STOP_SPAN_NO_DISCARD) + + +class _starts_and_ends(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.starts = IntList.of_length(len(self.spans)) + self.ends = IntList.of_length(len(self.spans)) + + def start_span(self, i: int, label_index: int) -> None: + self.starts[i] = self.choice_count + + def stop_span(self, i: int, *, discarded: bool) -> None: + self.ends[i] = self.choice_count + + def finish(self) -> tuple[IntList, IntList]: + return (self.starts, self.ends) + + +class _discarded(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result: set[int] = set() + + def finish(self) -> frozenset[int]: + return frozenset(self.result) + + def stop_span(self, i: int, *, discarded: bool) -> None: + if discarded: + self.result.add(i) + + +class _parentage(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result = IntList.of_length(len(self.spans)) + + def stop_span(self, i: int, *, discarded: bool) -> None: + if i > 0: + self.result[i] = self.span_stack[-1] + + def finish(self) -> IntList: + return self.result + + +class _depths(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result = IntList.of_length(len(self.spans)) + + def start_span(self, i: int, label_index: int) -> None: + self.result[i] = len(self.span_stack) + + def finish(self) -> IntList: + return self.result + + +class _label_indices(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result = IntList.of_length(len(self.spans)) + + def start_span(self, i: int, label_index: int) -> None: + self.result[i] = label_index + + def finish(self) -> IntList: + return self.result + + +class _mutator_groups(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.groups: dict[int, set[tuple[int, int]]] = defaultdict(set) + + def start_span(self, i: int, label_index: int) -> None: + # TODO should we discard start == end cases? occurs for eg st.data() + # which is conditionally or never drawn from. arguably swapping + # nodes with the empty list is a useful mutation enabled by start == end? + key = (self.spans[i].start, self.spans[i].end) + self.groups[label_index].add(key) + + def finish(self) -> Iterable[set[tuple[int, int]]]: + # Discard groups with only one span, since the mutator can't + # do anything useful with them. + return [g for g in self.groups.values() if len(g) >= 2] + + +class Spans: + """A lazy collection of ``Span`` objects, derived from + the record of recorded behaviour in ``SpanRecord``. + + Behaves logically as if it were a list of ``Span`` objects, + but actually mostly exists as a compact store of information + for them to reference into. All properties on here are best + understood as the backing storage for ``Span`` and are + described there. + """ + + def __init__(self, record: SpanRecord) -> None: + self.trail = record.trail + self.labels = record.labels + self.__length = self.trail.count( + TrailType.STOP_SPAN_DISCARD + ) + record.trail.count(TrailType.STOP_SPAN_NO_DISCARD) + self.__children: list[Sequence[int]] | None = None + + @cached_property + def starts_and_ends(self) -> tuple[IntList, IntList]: + return _starts_and_ends(self).run() + + @property + def starts(self) -> IntList: + return self.starts_and_ends[0] + + @property + def ends(self) -> IntList: + return self.starts_and_ends[1] + + @cached_property + def discarded(self) -> frozenset[int]: + return _discarded(self).run() + + @cached_property + def parentage(self) -> IntList: + return _parentage(self).run() + + @cached_property + def depths(self) -> IntList: + return _depths(self).run() + + @cached_property + def label_indices(self) -> IntList: + return _label_indices(self).run() + + @cached_property + def mutator_groups(self) -> list[set[tuple[int, int]]]: + return _mutator_groups(self).run() + + @property + def children(self) -> list[Sequence[int]]: + if self.__children is None: + children = [IntList() for _ in range(len(self))] + for i, p in enumerate(self.parentage): + if i > 0: + children[p].append(i) + # Replace empty children lists with a tuple to reduce + # memory usage. + for i, c in enumerate(children): + if not c: + children[i] = () # type: ignore + self.__children = children # type: ignore + return self.__children # type: ignore + + def __len__(self) -> int: + return self.__length + + def __getitem__(self, i: int) -> Span: + n = self.__length + if i < -n or i >= n: + raise IndexError(f"Index {i} out of range [-{n}, {n})") + if i < 0: + i += n + return Span(self, i) + + # not strictly necessary as we have len/getitem, but required for mypy. + # https://github.com/python/mypy/issues/9737 + def __iter__(self) -> Iterator[Span]: + for i in range(len(self)): + yield self[i] + + +class _Overrun: + status: Status = Status.OVERRUN + + def __repr__(self) -> str: + return "Overrun" + + +Overrun = _Overrun() + + +class DataObserver: + """Observer class for recording the behaviour of a + ConjectureData object, primarily used for tracking + the behaviour in the tree cache.""" + + def conclude_test( + self, + status: Status, + interesting_origin: InterestingOrigin | None, + ) -> None: + """Called when ``conclude_test`` is called on the + observed ``ConjectureData``, with the same arguments. + + Note that this is called after ``freeze`` has completed. + """ + + def kill_branch(self) -> None: + """Mark this part of the tree as not worth re-exploring.""" + + def draw_integer( + self, value: int, *, constraints: IntegerConstraints, was_forced: bool + ) -> None: + pass + + def draw_float( + self, value: float, *, constraints: FloatConstraints, was_forced: bool + ) -> None: + pass + + def draw_string( + self, value: str, *, constraints: StringConstraints, was_forced: bool + ) -> None: + pass + + def draw_bytes( + self, value: bytes, *, constraints: BytesConstraints, was_forced: bool + ) -> None: + pass + + def draw_boolean( + self, value: bool, *, constraints: BooleanConstraints, was_forced: bool + ) -> None: + pass + + +@dataclass(slots=True, frozen=True) +class ConjectureResult: + """Result class storing the parts of ConjectureData that we + will care about after the original ConjectureData has outlived its + usefulness.""" + + status: Status + interesting_origin: InterestingOrigin | None + nodes: tuple[ChoiceNode, ...] = field(repr=False, compare=False) + length: int + output: str + expected_exception: BaseException | None + expected_traceback: str | None + has_discards: bool + target_observations: TargetObservations + tags: frozenset[StructuralCoverageTag] + spans: Spans = field(repr=False, compare=False) + arg_slices: set[tuple[int, int]] = field(repr=False) + slice_comments: dict[tuple[int, int], str] = field(repr=False) + misaligned_at: MisalignedAt | None = field(repr=False) + cannot_proceed_scope: CannotProceedScopeT | None = field(repr=False) + + def as_result(self) -> "ConjectureResult": + return self + + @property + def choices(self) -> tuple[ChoiceT, ...]: + return tuple(node.value for node in self.nodes) + + +class ConjectureData: + @classmethod + def for_choices( + cls, + choices: Sequence[ChoiceTemplate | ChoiceT], + *, + observer: DataObserver | None = None, + provider: PrimitiveProvider | type[PrimitiveProvider] = HypothesisProvider, + random: Random | None = None, + ) -> "ConjectureData": + from hypothesis.internal.conjecture.engine import choice_count + + return cls( + max_choices=choice_count(choices), + random=random, + prefix=choices, + observer=observer, + provider=provider, + ) + + def __init__( + self, + *, + random: Random | None, + observer: DataObserver | None = None, + provider: PrimitiveProvider | type[PrimitiveProvider] = HypothesisProvider, + prefix: Sequence[ChoiceTemplate | ChoiceT] | None = None, + max_choices: int | None = None, + provider_kw: dict[str, Any] | None = None, + ) -> None: + from hypothesis.internal.conjecture.engine import BUFFER_SIZE + + if observer is None: + observer = DataObserver() + if provider_kw is None: + provider_kw = {} + elif not isinstance(provider, type): + raise InvalidArgument( + f"Expected {provider=} to be a class since {provider_kw=} was " + "passed, but got an instance instead." + ) + + assert isinstance(observer, DataObserver) + self.observer = observer + self.max_choices = max_choices + self.max_length = BUFFER_SIZE + self.overdraw = 0 + self._random = random + + self.length = 0 + self.index = 0 + self.output = "" + self.status = Status.VALID + self.frozen = False + self.testcounter = threadlocal.global_test_counter + threadlocal.global_test_counter += 1 + self.start_time = time.perf_counter() + self.gc_start_time = gc_cumulative_time() + self.events: dict[str, str | int | float] = {} + self.interesting_origin: InterestingOrigin | None = None + self.draw_times: dict[str, float] = {} + self._stateful_run_times: dict[str, float] = defaultdict(float) + self.max_depth = 0 + self.has_discards = False + + self.provider: PrimitiveProvider = ( + provider(self, **provider_kw) if isinstance(provider, type) else provider + ) + assert isinstance(self.provider, PrimitiveProvider) + + self.__result: ConjectureResult | None = None + + # Observations used for targeted search. They'll be aggregated in + # ConjectureRunner.generate_new_examples and fed to TargetSelector. + self.target_observations: TargetObservations = {} + + # Tags which indicate something about which part of the search space + # this example is in. These are used to guide generation. + self.tags: set[StructuralCoverageTag] = set() + self.labels_for_structure_stack: list[set[int]] = [] + + # Normally unpopulated but we need this in the niche case + # that self.as_result() is Overrun but we still want the + # examples for reporting purposes. + self.__spans: Spans | None = None + + # We want the top level span to have depth 0, so we start + # at -1. + self.depth = -1 + self.__span_record = SpanRecord() + + # Slice indices for discrete reportable parts that which-parts-matter can + # try varying, to report if the minimal example always fails anyway. + self.arg_slices: set[tuple[int, int]] = set() + self.slice_comments: dict[tuple[int, int], str] = {} + self._observability_args: dict[str, Any] = {} + self._observability_predicates: defaultdict[str, PredicateCounts] = defaultdict( + PredicateCounts + ) + + self._sampled_from_all_strategies_elements_message: ( + tuple[str, object] | None + ) = None + self._shared_strategy_draws: dict[Hashable, tuple[Any, SearchStrategy]] = {} + self._shared_data_strategy: DataObject | None = None + self._stateful_repr_parts: list[Any] | None = None + self.states_for_ids: dict[int, RandomState] | None = None + self.seeds_to_states: dict[Any, RandomState] | None = None + self.hypothesis_runner: Any = not_set + + self.expected_exception: BaseException | None = None + self.expected_traceback: str | None = None + + self.prefix = prefix + self.nodes: tuple[ChoiceNode, ...] = () + self.misaligned_at: MisalignedAt | None = None + self.cannot_proceed_scope: CannotProceedScopeT | None = None + self.start_span(TOP_LABEL) + + def __repr__(self) -> str: + return "ConjectureData(%s, %d choices%s)" % ( + self.status.name, + len(self.nodes), + ", frozen" if self.frozen else "", + ) + + @property + def choices(self) -> tuple[ChoiceT, ...]: + return tuple(node.value for node in self.nodes) + + # draw_* functions might be called in one of two contexts: either "above" or + # "below" the choice sequence. For instance, draw_string calls draw_boolean + # from ``many`` when calculating the number of characters to return. We do + # not want these choices to get written to the choice sequence, because they + # are not true choices themselves. + # + # `observe` formalizes this. The choice will only be written to the choice + # sequence if observe is True. + + @overload + def _draw( + self, + choice_type: Literal["integer"], + constraints: IntegerConstraints, + *, + observe: bool, + forced: int | None, + ) -> int: ... + + @overload + def _draw( + self, + choice_type: Literal["float"], + constraints: FloatConstraints, + *, + observe: bool, + forced: float | None, + ) -> float: ... + + @overload + def _draw( + self, + choice_type: Literal["string"], + constraints: StringConstraints, + *, + observe: bool, + forced: str | None, + ) -> str: ... + + @overload + def _draw( + self, + choice_type: Literal["bytes"], + constraints: BytesConstraints, + *, + observe: bool, + forced: bytes | None, + ) -> bytes: ... + + @overload + def _draw( + self, + choice_type: Literal["boolean"], + constraints: BooleanConstraints, + *, + observe: bool, + forced: bool | None, + ) -> bool: ... + + def _draw( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + observe: bool, + forced: ChoiceT | None, + ) -> ChoiceT: + # this is somewhat redundant with the length > max_length check at the + # end of the function, but avoids trying to use a null self.random when + # drawing past the node of a ConjectureData.for_choices data. + if self.length == self.max_length: + debug_report(f"overrun because hit {self.max_length=}") + self.mark_overrun() + if len(self.nodes) == self.max_choices: + debug_report(f"overrun because hit {self.max_choices=}") + self.mark_overrun() + + if observe and self.prefix is not None and self.index < len(self.prefix): + value = self._pop_choice(choice_type, constraints, forced=forced) + elif forced is None: + value = getattr(self.provider, f"draw_{choice_type}")(**constraints) + + if forced is not None: + value = forced + + # nan values generated via int_to_float break list membership: + # + # >>> n = 18444492273895866368 + # >>> assert math.isnan(int_to_float(n)) + # >>> assert int_to_float(n) not in [int_to_float(n)] + # + # because int_to_float nans are not equal in the sense of either + # `a == b` or `a is b`. + # + # This can lead to flaky errors when collections require unique + # floats. What was happening is that in some places we provided math.nan + # provide math.nan, and in others we provided + # int_to_float(float_to_int(math.nan)), and which one gets used + # was not deterministic across test iterations. + # + # To fix this, *never* provide a nan value which is equal (via `is`) to + # another provided nan value. This sacrifices some test power; we should + # bring that back (ABOVE the choice sequence layer) in the future. + # + # See https://github.com/HypothesisWorks/hypothesis/issues/3926. + if choice_type == "float": + assert isinstance(value, float) + if math.isnan(value): + value = int_to_float(float_to_int(value)) + + if observe: + was_forced = forced is not None + getattr(self.observer, f"draw_{choice_type}")( + value, constraints=constraints, was_forced=was_forced + ) + size = 0 if self.provider.avoid_realization else choices_size([value]) + if self.length + size > self.max_length: + debug_report( + f"overrun because {self.length=} + {size=} > {self.max_length=}" + ) + self.mark_overrun() + + node = ChoiceNode( + type=choice_type, + value=value, + constraints=constraints, + was_forced=was_forced, + index=len(self.nodes), + ) + self.__span_record.record_choice() + self.nodes += (node,) + self.length += size + + return value + + def draw_integer( + self, + min_value: int | None = None, + max_value: int | None = None, + *, + weights: dict[int, float] | None = None, + shrink_towards: int = 0, + forced: int | None = None, + observe: bool = True, + ) -> int: + # Validate arguments + if weights is not None: + assert min_value is not None + assert max_value is not None + assert len(weights) <= 255 # arbitrary practical limit + # We can and should eventually support total weights. But this + # complicates shrinking as we can no longer assume we can force + # a value to the unmapped probability mass if that mass might be 0. + assert sum(weights.values()) < 1 + # similarly, things get simpler if we assume every value is possible. + # we'll want to drop this restriction eventually. + assert all(w != 0 for w in weights.values()) + + if forced is not None and min_value is not None: + assert min_value <= forced + if forced is not None and max_value is not None: + assert forced <= max_value + + constraints: IntegerConstraints = self._pooled_constraints( + "integer", + { + "min_value": min_value, + "max_value": max_value, + "weights": weights, + "shrink_towards": shrink_towards, + }, + ) + return self._draw("integer", constraints, observe=observe, forced=forced) + + def draw_float( + self, + min_value: float = -math.inf, + max_value: float = math.inf, + *, + allow_nan: bool = True, + smallest_nonzero_magnitude: float = SMALLEST_SUBNORMAL, + # TODO: consider supporting these float widths at the choice sequence + # level in the future. + # width: Literal[16, 32, 64] = 64, + forced: float | None = None, + observe: bool = True, + ) -> float: + assert smallest_nonzero_magnitude > 0 + assert not math.isnan(min_value) + assert not math.isnan(max_value) + + if smallest_nonzero_magnitude == 0.0: # pragma: no cover + raise FloatingPointError( + "Got allow_subnormal=True, but we can't represent subnormal floats " + "right now, in violation of the IEEE-754 floating-point " + "specification. This is usually because something was compiled with " + "-ffast-math or a similar option, which sets global processor state. " + "See https://simonbyrne.github.io/notes/fastmath/ for a more detailed " + "writeup - and good luck!" + ) + + if forced is not None: + assert allow_nan or not math.isnan(forced) + assert math.isnan(forced) or ( + sign_aware_lte(min_value, forced) and sign_aware_lte(forced, max_value) + ) + + constraints: FloatConstraints = self._pooled_constraints( + "float", + { + "min_value": min_value, + "max_value": max_value, + "allow_nan": allow_nan, + "smallest_nonzero_magnitude": smallest_nonzero_magnitude, + }, + ) + return self._draw("float", constraints, observe=observe, forced=forced) + + def draw_string( + self, + intervals: IntervalSet, + *, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + forced: str | None = None, + observe: bool = True, + ) -> str: + assert forced is None or min_size <= len(forced) <= max_size + assert min_size >= 0 + if len(intervals) == 0: + assert min_size == 0 + + constraints: StringConstraints = self._pooled_constraints( + "string", + { + "intervals": intervals, + "min_size": min_size, + "max_size": max_size, + }, + ) + return self._draw("string", constraints, observe=observe, forced=forced) + + def draw_bytes( + self, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + *, + forced: bytes | None = None, + observe: bool = True, + ) -> bytes: + assert forced is None or min_size <= len(forced) <= max_size + assert min_size >= 0 + + constraints: BytesConstraints = self._pooled_constraints( + "bytes", {"min_size": min_size, "max_size": max_size} + ) + return self._draw("bytes", constraints, observe=observe, forced=forced) + + def draw_boolean( + self, + p: float = 0.5, + *, + forced: bool | None = None, + observe: bool = True, + ) -> bool: + assert (forced is not True) or p > 0 + assert (forced is not False) or p < 1 + + constraints: BooleanConstraints = self._pooled_constraints("boolean", {"p": p}) + return self._draw("boolean", constraints, observe=observe, forced=forced) + + @overload + def _pooled_constraints( + self, choice_type: Literal["integer"], constraints: IntegerConstraints + ) -> IntegerConstraints: ... + + @overload + def _pooled_constraints( + self, choice_type: Literal["float"], constraints: FloatConstraints + ) -> FloatConstraints: ... + + @overload + def _pooled_constraints( + self, choice_type: Literal["string"], constraints: StringConstraints + ) -> StringConstraints: ... + + @overload + def _pooled_constraints( + self, choice_type: Literal["bytes"], constraints: BytesConstraints + ) -> BytesConstraints: ... + + @overload + def _pooled_constraints( + self, choice_type: Literal["boolean"], constraints: BooleanConstraints + ) -> BooleanConstraints: ... + + def _pooled_constraints( + self, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT + ) -> ChoiceConstraintsT: + """Memoize common dictionary objects to reduce memory pressure.""" + # caching runs afoul of nondeterminism checks + if self.provider.avoid_realization: + return constraints + + key = (choice_type, *choice_constraints_key(choice_type, constraints)) + try: + return POOLED_CONSTRAINTS_CACHE[key] + except KeyError: + POOLED_CONSTRAINTS_CACHE[key] = constraints + return constraints + + def _pop_choice( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + forced: ChoiceT | None, + ) -> ChoiceT: + assert self.prefix is not None + # checked in _draw + assert self.index < len(self.prefix) + + value = self.prefix[self.index] + if isinstance(value, ChoiceTemplate): + node: ChoiceTemplate = value + if node.count is not None: + assert node.count >= 0 + # node templates have to be at the end for now, since it's not immediately + # apparent how to handle overruning a node template while generating a single + # node if the alternative is not "the entire data is an overrun". + assert self.index == len(self.prefix) - 1 + if node.type == "simplest": + if forced is not None: + choice = forced + try: + choice = choice_from_index(0, choice_type, constraints) + except ChoiceTooLarge: + self.mark_overrun() + else: + raise NotImplementedError + + if node.count is not None: + node.count -= 1 + if node.count < 0: + self.mark_overrun() + return choice + + choice = value + node_choice_type = { + str: "string", + float: "float", + int: "integer", + bool: "boolean", + bytes: "bytes", + }[type(choice)] + # If we're trying to: + # * draw a different choice type at the same location + # * draw the same choice type with a different constraints, which does not permit + # the current value + # + # then we call this a misalignment, because the choice sequence has + # changed from what we expected at some point. An easy misalignment is + # + # one_of(integers(0, 100), integers(101, 200)) + # + # where the choice sequence [0, 100] has constraints {min_value: 0, max_value: 100} + # at index 1, but [0, 101] has constraints {min_value: 101, max_value: 200} at + # index 1 (which does not permit any of the values 0-100). + # + # When the choice sequence becomes misaligned, we generate a new value of the + # type and constraints the strategy expects. + if node_choice_type != choice_type or not choice_permitted(choice, constraints): + # only track first misalignment for now. + if self.misaligned_at is None: + self.misaligned_at = (self.index, choice_type, constraints, forced) + try: + # Fill in any misalignments with index 0 choices. An alternative to + # this is using the index of the misaligned choice instead + # of index 0, which may be useful for maintaining + # "similarly-complex choices" in the shrinker. This requires + # attaching an index to every choice in ConjectureData.for_choices, + # which we don't always have (e.g. when reading from db). + # + # If we really wanted this in the future we could make this complexity + # optional, use it if present, and default to index 0 otherwise. + # This complicates our internal api and so I'd like to avoid it + # if possible. + # + # Additionally, I don't think slips which require + # slipping to high-complexity values are common. Though arguably + # we may want to expand a bit beyond *just* the simplest choice. + # (we could for example consider sampling choices from index 0-10). + choice = choice_from_index(0, choice_type, constraints) + except ChoiceTooLarge: + # should really never happen with a 0-index choice, but let's be safe. + self.mark_overrun() + + self.index += 1 + return choice + + def as_result(self) -> ConjectureResult | _Overrun: + """Convert the result of running this test into + either an Overrun object or a ConjectureResult.""" + + assert self.frozen + if self.status == Status.OVERRUN: + return Overrun + if self.__result is None: + self.__result = ConjectureResult( + status=self.status, + interesting_origin=self.interesting_origin, + spans=self.spans, + nodes=self.nodes, + length=self.length, + output=self.output, + expected_traceback=self.expected_traceback, + expected_exception=self.expected_exception, + has_discards=self.has_discards, + target_observations=self.target_observations, + tags=frozenset(self.tags), + arg_slices=self.arg_slices, + slice_comments=self.slice_comments, + misaligned_at=self.misaligned_at, + cannot_proceed_scope=self.cannot_proceed_scope, + ) + assert self.__result is not None + return self.__result + + def __assert_not_frozen(self, name: str) -> None: + if self.frozen: + raise Frozen(f"Cannot call {name} on frozen ConjectureData") + + def note(self, value: Any) -> None: + self.__assert_not_frozen("note") + if not isinstance(value, str): + value = repr(value) + self.output += value + + def draw( + self, + strategy: "SearchStrategy[Ex]", + label: int | None = None, + observe_as: str | None = None, + ) -> "Ex": + from hypothesis.internal.observability import observability_enabled + from hypothesis.strategies._internal.lazy import unwrap_strategies + from hypothesis.strategies._internal.utils import to_jsonable + + at_top_level = self.depth == 0 + start_time = None + if at_top_level: + # We start this timer early, because accessing attributes on a LazyStrategy + # can be almost arbitrarily slow. In cases like characters() and text() + # where we cache something expensive, this led to Flaky deadline errors! + # See https://github.com/HypothesisWorks/hypothesis/issues/2108 + start_time = time.perf_counter() + gc_start_time = gc_cumulative_time() + + strategy.validate() + + if strategy.is_empty: + self.mark_invalid(f"empty strategy {self!r}") + + if self.depth >= MAX_DEPTH: + self.mark_invalid("max depth exceeded") + + # Jump directly to the unwrapped strategy for the label and for do_draw. + # This avoids adding an extra span to all lazy strategies. + unwrapped = unwrap_strategies(strategy) + if label is None: + label = unwrapped.label + assert isinstance(label, int) + + self.start_span(label=label) + try: + if not at_top_level: + return unwrapped.do_draw(self) + assert start_time is not None + key = observe_as or f"generate:unlabeled_{len(self.draw_times)}" + try: + try: + v = unwrapped.do_draw(self) + finally: + # Subtract the time spent in GC to avoid overcounting, as it is + # accounted for at the overall example level. + in_gctime = gc_cumulative_time() - gc_start_time + self.draw_times[key] = time.perf_counter() - start_time - in_gctime + except Exception as err: + add_note( + err, + f"while generating {key.removeprefix('generate:')!r} from {strategy!r}", + ) + raise + if observability_enabled(): + avoid = self.provider.avoid_realization + self._observability_args[key] = to_jsonable(v, avoid_realization=avoid) + return v + finally: + self.stop_span() + + def start_span(self, label: int) -> None: + self.provider.span_start(label) + self.__assert_not_frozen("start_span") + self.depth += 1 + # Logically it would make sense for this to just be + # ``self.depth = max(self.depth, self.max_depth)``, which is what it used to + # be until we ran the code under tracemalloc and found a rather significant + # chunk of allocation was happening here. This was presumably due to varargs + # or the like, but we didn't investigate further given that it was easy + # to fix with this check. + if self.depth > self.max_depth: + self.max_depth = self.depth + self.__span_record.start_span(label) + self.labels_for_structure_stack.append({label}) + + def stop_span(self, *, discard: bool = False) -> None: + self.provider.span_end(discard) + if self.frozen: + return + if discard: + self.has_discards = True + self.depth -= 1 + assert self.depth >= -1 + self.__span_record.stop_span(discard=discard) + + labels_for_structure = self.labels_for_structure_stack.pop() + + if not discard: + if self.labels_for_structure_stack: + self.labels_for_structure_stack[-1].update(labels_for_structure) + else: + self.tags.update([structural_coverage(l) for l in labels_for_structure]) + + if discard: + # Once we've discarded a span, every test case starting with + # this prefix contains discards. We prune the tree at that point so + # as to avoid future test cases bothering with this region, on the + # assumption that some span that you could have used instead + # there would *not* trigger the discard. This greatly speeds up + # test case generation in some cases, because it allows us to + # ignore large swathes of the search space that are effectively + # redundant. + # + # A scenario that can cause us problems but which we deliberately + # have decided not to support is that if there are side effects + # during data generation then you may end up with a scenario where + # every good test case generates a discard because the discarded + # section sets up important things for later. This is not terribly + # likely and all that you see in this case is some degradation in + # quality of testing, so we don't worry about it. + # + # Note that killing the branch does *not* mean we will never + # explore below this point, and in particular we may do so during + # shrinking. Any explicit request for a data object that starts + # with the branch here will work just fine, but novel prefix + # generation will avoid it, and we can use it to detect when we + # have explored the entire tree (up to redundancy). + + self.observer.kill_branch() + + @property + def spans(self) -> Spans: + assert self.frozen + if self.__spans is None: + self.__spans = Spans(record=self.__span_record) + return self.__spans + + def freeze(self) -> None: + if self.frozen: + return + self.finish_time = time.perf_counter() + self.gc_finish_time = gc_cumulative_time() + + # Always finish by closing all remaining spans so that we have a valid tree. + while self.depth >= 0: + self.stop_span() + + self.__span_record.freeze() + self.frozen = True + self.observer.conclude_test(self.status, self.interesting_origin) + + def choice( + self, + values: Sequence[T], + *, + forced: T | None = None, + observe: bool = True, + ) -> T: + forced_i = None if forced is None else values.index(forced) + i = self.draw_integer( + 0, + len(values) - 1, + forced=forced_i, + observe=observe, + ) + return values[i] + + def conclude_test( + self, + status: Status, + interesting_origin: InterestingOrigin | None = None, + ) -> NoReturn: + assert (interesting_origin is None) or (status == Status.INTERESTING) + self.__assert_not_frozen("conclude_test") + self.interesting_origin = interesting_origin + self.status = status + self.freeze() + raise StopTest(self.testcounter) + + def mark_interesting(self, interesting_origin: InterestingOrigin) -> NoReturn: + self.conclude_test(Status.INTERESTING, interesting_origin) + + def mark_invalid(self, why: str | None = None) -> NoReturn: + if why is not None: + self.events["invalid because"] = why + self.conclude_test(Status.INVALID) + + def mark_overrun(self) -> NoReturn: + self.conclude_test(Status.OVERRUN) + + +def draw_choice( + choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT, *, random: Random +) -> ChoiceT: + cd = ConjectureData(random=random) + return cast(ChoiceT, getattr(cd.provider, f"draw_{choice_type}")(**constraints)) diff --git a/vendored/hypothesis/internal/conjecture/datatree.py b/vendored/hypothesis/internal/conjecture/datatree.py new file mode 100644 index 0000000..df02449 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/datatree.py @@ -0,0 +1,1188 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +from collections.abc import Generator, Set +from dataclasses import dataclass, field +from random import Random +from typing import TYPE_CHECKING, Final, TypeAlias, cast + +from hypothesis.errors import ( + FlakyReplay, + FlakyStrategyDefinition, + HypothesisException, + StopTest, +) +from hypothesis.internal import floats as flt +from hypothesis.internal.conjecture.choice import ( + BooleanConstraints, + BytesConstraints, + ChoiceConstraintsT, + ChoiceT, + ChoiceTypeT, + FloatConstraints, + IntegerConstraints, + StringConstraints, + choice_from_index, +) +from hypothesis.internal.conjecture.data import ConjectureData, DataObserver, Status +from hypothesis.internal.escalation import InterestingOrigin +from hypothesis.internal.floats import ( + count_between_floats, + float_to_int, + int_to_float, + sign_aware_lte, +) + +if TYPE_CHECKING: + from hypothesis.vendor.pretty import RepresentationPrinter + +ChildrenCacheValueT: TypeAlias = tuple[ + Generator[ChoiceT, None, None], list[ChoiceT], set[ChoiceT] +] + + +class PreviouslyUnseenBehaviour(HypothesisException): + pass + + +_FLAKY_STRAT_MSG = ( + "Inconsistent data generation! Data generation behaved differently " + "between different runs. Is your data generation depending on external " + "state?" +) + + +EMPTY: frozenset[int] = frozenset() + + +@dataclass(slots=True, frozen=True) +class Killed: + """Represents a transition to part of the tree which has been marked as + "killed", meaning we want to treat it as not worth exploring, so it will + be treated as if it were completely explored for the purposes of + exhaustion.""" + + next_node: "TreeNode" + + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: + assert cycle is False + p.text("Killed") + + +def _node_pretty( + choice_type: ChoiceTypeT, + value: ChoiceT, + constraints: ChoiceConstraintsT, + *, + forced: bool, +) -> str: + forced_marker = " [forced]" if forced else "" + return f"{choice_type} {value!r}{forced_marker} {constraints}" + + +@dataclass(slots=True, frozen=False) +class Branch: + """Represents a transition where multiple choices can be made as to what + to drawn.""" + + constraints: ChoiceConstraintsT + choice_type: ChoiceTypeT + children: dict[ChoiceT, "TreeNode"] = field(repr=False) + + @property + def max_children(self) -> int: + max_children = compute_max_children(self.choice_type, self.constraints) + assert max_children > 0 + return max_children + + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: + assert cycle is False + for i, (value, child) in enumerate(self.children.items()): + if i > 0: + p.break_() + p.text( + _node_pretty(self.choice_type, value, self.constraints, forced=False) + ) + with p.indent(2): + p.break_() + p.pretty(child) + + +@dataclass(slots=True, frozen=True) +class Conclusion: + """Represents a transition to a finished state.""" + + status: Status + interesting_origin: InterestingOrigin | None + + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: + assert cycle is False + o = self.interesting_origin + # avoid str(o), which can include multiple lines of context + origin = ( + "" if o is None else f", {o.exc_type.__name__} at {o.filename}:{o.lineno}" + ) + p.text(f"Conclusion ({self.status!r}{origin})") + + +# The number of max children where, beyond this, it is practically impossible +# for hypothesis to saturate / explore all children nodes in a reasonable time +# frame. We use this to bail out of expensive max children computations early, +# where the numbers involved are so large that we know they will be larger than +# this number. +# +# Note that it's ok for us to underestimate the number of max children of a node +# by using this. We just may think the node is exhausted when in fact it has more +# possible children to be explored. This has the potential to finish generation +# early due to exhausting the entire tree, but that is quite unlikely: (1) the +# number of examples would have to be quite high, and (2) the tree would have to +# contain only one or two nodes, or generate_novel_prefix would simply switch to +# exploring another non-exhausted node. +# +# Also note that we may sometimes compute max children above this value. In other +# words, this is *not* a hard maximum on the computed max children. It's the point +# where further computation is not beneficial - but sometimes doing that computation +# unconditionally is cheaper than estimating against this value. +# +# The one case where this may be detrimental is fuzzing, where the throughput of +# examples is so high that it really may saturate important nodes. We'll cross +# that bridge when we come to it. +MAX_CHILDREN_EFFECTIVELY_INFINITE: Final[int] = 10_000_000 + + +def _count_distinct_strings(*, alphabet_size: int, min_size: int, max_size: int) -> int: + # We want to estimate if we're going to have more children than + # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially + # extremely expensive pow. We'll check the two extreme cases - if the + # number of strings in the largest string size alone is enough to put us + # over this limit (at alphabet_size >= 2), and if the variation in sizes + # (at alphabet_size == 1) is enough. If neither result in an early return, + # the exact result should be reasonably cheap to compute. + if alphabet_size == 0: + # Special-case the empty string, avoid error in math.log(0). + return 1 + elif alphabet_size == 1: + # Special-case the constant alphabet, invalid in the geom-series sum. + return max_size - min_size + 1 + else: + # Estimate against log, which is cheaper than computing a pow. + # + # m = max_size + # a = alphabet_size + # N = MAX_CHILDREN_EFFECTIVELY_INFINITE + # + # a**m > N + # <=> m * log(a) > log(N) + log_max_sized_children = max_size * math.log(alphabet_size) + if log_max_sized_children > math.log(MAX_CHILDREN_EFFECTIVELY_INFINITE): + return MAX_CHILDREN_EFFECTIVELY_INFINITE + + # The sum of a geometric series is given by (ref: wikipedia): + # ᵐ∑ₖ₌₀ aᵏ = (aᵐ⁺¹ - 1) / (a - 1) + # = S(m) / S(0) + # assuming a != 1 and using the definition + # S(m) := aᵐ⁺¹ - 1. + # The sum we want, starting from a number n [0 <= n <= m] rather than zero, is + # ᵐ∑ₖ₌ₙ aᵏ = ᵐ∑ₖ₌₀ aᵏ - ⁿ⁻¹∑ₖ₌₀ aᵏ = S(m) / S(0) - S(n - 1) / S(0) + def S(n): + return alphabet_size ** (n + 1) - 1 + + return (S(max_size) - S(min_size - 1)) // S(0) + + +def compute_max_children( + choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> int: + if choice_type == "integer": + constraints = cast(IntegerConstraints, constraints) + min_value = constraints["min_value"] + max_value = constraints["max_value"] + + if min_value is None and max_value is None: + # full 128 bit range. + return 2**128 - 1 + if min_value is not None and max_value is not None: + # count between min/max value. + return max_value - min_value + 1 + + # hard case: only one bound was specified. Here we probe either upwards + # or downwards with our full 128 bit generation, but only half of these + # (plus one for the case of generating zero) result in a probe in the + # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127 + assert (min_value is None) != (max_value is None) + return 2**127 + elif choice_type == "boolean": + constraints = cast(BooleanConstraints, constraints) + p = constraints["p"] + # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice. + if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)): + return 1 + return 2 + elif choice_type == "bytes": + constraints = cast(BytesConstraints, constraints) + return _count_distinct_strings( + alphabet_size=2**8, + min_size=constraints["min_size"], + max_size=constraints["max_size"], + ) + elif choice_type == "string": + constraints = cast(StringConstraints, constraints) + min_size = constraints["min_size"] + max_size = constraints["max_size"] + intervals = constraints["intervals"] + + return _count_distinct_strings( + alphabet_size=len(intervals), min_size=min_size, max_size=max_size + ) + elif choice_type == "float": + constraints = cast(FloatConstraints, constraints) + min_value_f = constraints["min_value"] + max_value_f = constraints["max_value"] + smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"] + + count = count_between_floats(min_value_f, max_value_f) + + # we have two intervals: + # a. [min_value, max_value] + # b. [-smallest_nonzero_magnitude, smallest_nonzero_magnitude] + # + # which could be subsets (in either order), overlapping, or disjoint. We + # want the interval difference a - b. + + # next_down because endpoints are ok with smallest_nonzero_magnitude + min_point = max(min_value_f, -flt.next_down(smallest_nonzero_magnitude)) + max_point = min(max_value_f, flt.next_down(smallest_nonzero_magnitude)) + + if min_point > max_point: + # case: disjoint intervals. + return count + + count -= count_between_floats(min_point, max_point) + if sign_aware_lte(min_value_f, -0.0) and sign_aware_lte(-0.0, max_value_f): + # account for -0.0 + count += 1 + if sign_aware_lte(min_value_f, 0.0) and sign_aware_lte(0.0, max_value_f): + # account for 0.0 + count += 1 + return count + + raise NotImplementedError(f"unhandled choice_type {choice_type}") + + +# In theory, this is a strict superset of the functionality of compute_max_children; +# +# assert len(all_children(choice_type, constraints)) == compute_max_children(choice_type, constraints) +# +# In practice, we maintain two distinct implementations for efficiency and space +# reasons. If you just need the number of children, it is cheaper to use +# compute_max_children than to reify the list of children (only to immediately +# throw it away). +def _floats_between(a: float, b: float) -> Generator[float, None, None]: + for n in range(float_to_int(a), float_to_int(b) + 1): + yield int_to_float(n) + + +def all_children( + choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> Generator[ChoiceT, None, None]: + if choice_type != "float": + for index in range(compute_max_children(choice_type, constraints)): + yield choice_from_index(index, choice_type, constraints) + else: + constraints = cast(FloatConstraints, constraints) + # the float ordering is not injective (because of resampling + # out-of-bounds values), so using choice_from_index would result in + # duplicates. This violates invariants in datatree about being able + # to draw unique new children using all_children. + # + # We instead maintain a separate implementation for floats. + # TODO_IR write a better (bijective) ordering for floats and remove this! + min_value = constraints["min_value"] + max_value = constraints["max_value"] + smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"] + + # handle zeroes separately so smallest_nonzero_magnitude can think of + # itself as a complete interval (instead of a hole at ±0). + if sign_aware_lte(min_value, -0.0) and sign_aware_lte(-0.0, max_value): + yield -0.0 + if sign_aware_lte(min_value, 0.0) and sign_aware_lte(0.0, max_value): + yield 0.0 + + if flt.is_negative(min_value): + if flt.is_negative(max_value): + # case: both negative. + max_point = min(max_value, -smallest_nonzero_magnitude) + # float_to_int increases as negative magnitude increases, so + # invert order. + yield from _floats_between(max_point, min_value) + else: + # case: straddles midpoint (which is between -0.0 and 0.0). + yield from _floats_between(-smallest_nonzero_magnitude, min_value) + yield from _floats_between(smallest_nonzero_magnitude, max_value) + else: + # case: both positive. + min_point = max(min_value, smallest_nonzero_magnitude) + yield from _floats_between(min_point, max_value) + + +@dataclass(slots=True, frozen=False) +class TreeNode: + """ + A node, or collection of directly descended nodes, in a DataTree. + + We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree), + which means that nodes that are the only child of their parent are collapsed + into their parent to save space. + + Conceptually, you can unfold a single TreeNode storing n values in its lists + into a sequence of n nodes, each a child of the last. In other words, + (constraints[i], values[i], choice_types[i]) corresponds to the single node at index + i. + + Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted + via the radix tree definition), then its lists will be empty and it will + store a `Branch` representing that choce in its `transition`. + + Examples + -------- + + Consider sequentially drawing a boolean, then an integer. + + data.draw_boolean() + data.draw_integer(1, 3) + + If we draw True and then 2, the tree may conceptually look like this. + + ┌──────┐ + │ root │ + └──┬───┘ + ┌──┴───┐ + │ True │ + └──┬───┘ + ┌──┴───┐ + │ 2 │ + └──────┘ + + But since 2 is the only child of True, we will compact these nodes and store + them as a single TreeNode. + + ┌──────┐ + │ root │ + └──┬───┘ + ┌────┴──────┐ + │ [True, 2] │ + └───────────┘ + + If we then draw True and then 3, True will have multiple children and we + can no longer store this compacted representation. We would call split_at(0) + on the [True, 2] node to indicate that we need to add a choice at 0-index + node (True). + + ┌──────┐ + │ root │ + └──┬───┘ + ┌──┴───┐ + ┌─┤ True ├─┐ + │ └──────┘ │ + ┌─┴─┐ ┌─┴─┐ + │ 2 │ │ 3 │ + └───┘ └───┘ + """ + + # The constraints, value, and choice_types of the nodes stored here. These always + # have the same length. The values at index i belong to node i. + constraints: list[ChoiceConstraintsT] = field(default_factory=list) + values: list[ChoiceT] = field(default_factory=list) + choice_types: list[ChoiceTypeT] = field(default_factory=list) + + # The indices of nodes which had forced values. + # + # Stored as None if no indices have been forced, purely for space saving + # reasons (we force quite rarely). + __forced: set[int] | None = field(default=None, init=False) + + # What happens next after drawing these nodes. (conceptually, "what is the + # child/children of the last node stored here"). + # + # One of: + # - None (we don't know yet) + # - Branch (we have seen multiple possible outcomes here) + # - Conclusion (ConjectureData.conclude_test was called here) + # - Killed (this branch is valid and may even have children, but should not + # be explored when generating novel prefixes) + transition: None | Branch | Conclusion | Killed = None + + # A tree node is exhausted if every possible sequence of draws below it has + # been explored. We only update this when performing operations that could + # change the answer. + # + # See also TreeNode.check_exhausted. + is_exhausted: bool = field(default=False, init=False) + + @property + def forced(self) -> Set[int]: + if not self.__forced: + return EMPTY + return self.__forced + + def mark_forced(self, i: int) -> None: + """ + Note that the draw at node i was forced. + """ + assert 0 <= i < len(self.values) + if self.__forced is None: + self.__forced = set() + self.__forced.add(i) + + def split_at(self, i: int) -> None: + """ + Splits the tree so that it can incorporate a decision at the draw call + corresponding to the node at position i. + + Raises FlakyStrategyDefinition if node i was forced. + """ + + if i in self.forced: + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) + + assert not self.is_exhausted + + key = self.values[i] + + child = TreeNode( + choice_types=self.choice_types[i + 1 :], + constraints=self.constraints[i + 1 :], + values=self.values[i + 1 :], + transition=self.transition, + ) + self.transition = Branch( + constraints=self.constraints[i], + choice_type=self.choice_types[i], + children={key: child}, + ) + if self.__forced is not None: + child.__forced = {j - i - 1 for j in self.__forced if j > i} + self.__forced = {j for j in self.__forced if j < i} + child.check_exhausted() + del self.choice_types[i:] + del self.values[i:] + del self.constraints[i:] + assert len(self.values) == len(self.constraints) == len(self.choice_types) == i + + def check_exhausted(self) -> bool: + """ + Recalculates is_exhausted if necessary, and then returns it. + + A node is exhausted if: + - Its transition is Conclusion or Killed + - It has the maximum number of children (i.e. we have found all of its + possible children), and all its children are exhausted + + Therefore, we only need to compute this for a node when: + - We first create it in split_at + - We set its transition to either Conclusion or Killed + (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch) + - We exhaust any of its children + """ + + if ( + # a node cannot go from is_exhausted -> not is_exhausted. + not self.is_exhausted + # if we don't know what happens after this node, we don't have + # enough information to tell if it's exhausted. + and self.transition is not None + # if there are still any nodes left which are the only child of their + # parent (len(self.values) > 0), then this TreeNode must be not + # exhausted, unless all of those nodes were forced. + # + # This is because we maintain an invariant of only adding nodes to + # DataTree which have at least 2 possible values, so we know that if + # they do not have any siblings that we still have more choices to + # discover. + # + # (We actually *do* currently add single-valued nodes to the tree, + # but immediately split them into a transition to avoid falsifying + # this check. this is a bit of a hack.) + and len(self.forced) == len(self.values) + ): + if isinstance(self.transition, (Conclusion, Killed)): + self.is_exhausted = True + elif len(self.transition.children) == self.transition.max_children: + self.is_exhausted = all( + v.is_exhausted for v in self.transition.children.values() + ) + return self.is_exhausted + + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: + assert cycle is False + indent = 0 + for i, (choice_type, constraints, value) in enumerate( + zip(self.choice_types, self.constraints, self.values, strict=True) + ): + with p.indent(indent): + if i > 0: + p.break_() + p.text( + _node_pretty( + choice_type, value, constraints, forced=i in self.forced + ) + ) + indent += 2 + + with p.indent(indent): + if len(self.values) > 0: + p.break_() + if self.transition is not None: + p.pretty(self.transition) + else: + p.text("unknown") + + +class DataTree: + """ + A DataTree tracks the structured history of draws in some test function, + across multiple ConjectureData objects. + + This information is used by ConjectureRunner to generate novel prefixes of + this tree (see generate_novel_prefix). A novel prefix is a sequence of draws + which the tree has not seen before, and therefore the ConjectureRunner has + not generated as an input to the test function before. + + DataTree tracks the following: + + - Drawn choices in the choice sequence + - ConjectureData.draw_integer() + - ConjectureData.draw_float() + - ConjectureData.draw_string() + - ConjectureData.draw_boolean() + - ConjectureData.draw_bytes() + - Test conclusions (with some Status, e.g. Status.VALID) + - ConjectureData.conclude_test() + + A DataTree is — surprise — a *tree*. A node in this tree is either a choice draw + with some value, a test conclusion with some Status, or a special `Killed` value, + which denotes that further draws may exist beyond this node but should not be + considered worth exploring when generating novel prefixes. A node is a leaf + iff it is a conclusion or Killed. + + A branch from node A to node B indicates that we have previously seen some + sequence (a, b) of draws, where a and b are the values in nodes A and B. + Similar intuition holds for conclusion and Killed nodes. + + Examples + -------- + + To see how a DataTree gets built through successive sets of draws, consider + the following code that calls through to some ConjecutreData object `data`. + The first call can be either True or False, and the second call can be any + integer in the range [1, 3]. + + data.draw_boolean() + data.draw_integer(1, 3) + + To start, the corresponding DataTree object is completely empty. + + ┌──────┐ + │ root │ + └──────┘ + + We happen to draw True and then 2 in the above code. The tree tracks this. + (2 also connects to a child Conclusion node with Status.VALID since it's the + final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.) + + ┌──────┐ + │ root │ + └──┬───┘ + ┌──┴───┐ + │ True │ + └──┬───┘ + ┌──┴───┐ + │ 2 │ + └──────┘ + + This is a very boring tree so far! But now we happen to draw False and + then 1. This causes a split in the tree. Remember, DataTree tracks history + over all invocations of a function, not just one. The end goal is to know + what invocations haven't been tried yet, after all. + + ┌──────┐ + ┌───┤ root ├───┐ + │ └──────┘ │ + ┌──┴───┐ ┌─┴─────┐ + │ True │ │ False │ + └──┬───┘ └──┬────┘ + ┌─┴─┐ ┌─┴─┐ + │ 2 │ │ 1 │ + └───┘ └───┘ + + If we were to ask DataTree for a novel prefix at this point, it might + generate any of (True, 1), (True, 3), (False, 2), or (False, 3). + + Note that the novel prefix stops as soon as it generates a novel node. For + instance, if we had generated a novel prefix back when the tree was only + root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or + (False). But we could *not* have gotten (False, n), because both False and + n were novel at that point, and we stop at the first novel node — False. + + I won't belabor this example. Here's what the tree looks like when fully + explored: + + ┌──────┐ + ┌──────┤ root ├──────┐ + │ └──────┘ │ + ┌──┴───┐ ┌─┴─────┐ + ┌──┤ True ├──┐ ┌───┤ False ├──┐ + │ └──┬───┘ │ │ └──┬────┘ │ + ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + │ 1 │ │ 2 │ │ 3 │ │ 1 │ │ 2 │ │ 3 │ + └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + + You could imagine much more complicated trees than this arising in practice, + and indeed they do. In particular, the tree need not be balanced or 'nice' + like the tree above. For instance, + + b = data.draw_boolean() + if b: + data.draw_integer(1, 3) + + results in a tree with the entire right part lopped off, and False leading + straight to a conclusion node with Status.VALID. As another example, + + n = data.draw_integers() + assume(n >= 3) + data.draw_string() + + results in a tree with the 0, 1, and 2 nodes leading straight to a + conclusion node with Status.INVALID, and the rest branching off into all + the possibilities of draw_string. + + Notes + ----- + + The above examples are slightly simplified and are intended to convey + intuition. In practice, there are some implementation details to be aware + of. + + - In draw nodes, we store the constraints used in addition to the value drawn. + E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5) + would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for + other constraints omitted). + + The constraints parameters have the potential to change both the range of + possible outputs of a node, and the probability distribution within that + range, so we need to use these when drawing in DataTree as well. We draw + values using these constraints when (1) generating a novel value for a node + and (2) choosing a random child when traversing the tree. + + - For space efficiency, rather than tracking the full tree structure, we + store DataTree as a radix tree. This is conceptually equivalent (radix + trees can always be "unfolded" to the full tree) but it means the internal + representation may differ in practice. + + See TreeNode for more information. + """ + + def __init__(self) -> None: + self.root: TreeNode = TreeNode() + self._children_cache: dict[ChoiceT, ChildrenCacheValueT] = {} + + @property + def is_exhausted(self) -> bool: + """ + Returns True if every node is exhausted, and therefore the tree has + been fully explored. + """ + return self.root.is_exhausted + + def generate_novel_prefix(self, random: Random) -> tuple[ChoiceT, ...]: + """Generate a short random string that (after rewriting) is not + a prefix of any choice sequence previously added to the tree. + + The resulting prefix is essentially arbitrary - it would be nice + for it to be uniform at random, but previous attempts to do that + have proven too expensive. + """ + assert not self.is_exhausted + prefix = [] + + def append_choice(choice_type: ChoiceTypeT, choice: ChoiceT) -> None: + if choice_type == "float": + assert isinstance(choice, int) + choice = int_to_float(choice) + prefix.append(choice) + + current_node = self.root + while True: + assert not current_node.is_exhausted + for i, (choice_type, constraints, value) in enumerate( + zip( + current_node.choice_types, + current_node.constraints, + current_node.values, + strict=True, + ) + ): + if i in current_node.forced: + append_choice(choice_type, value) + else: + attempts = 0 + while True: + if attempts <= 10: + try: + node_value = self._draw( + choice_type, constraints, random=random + ) + except StopTest: # pragma: no cover + # it is possible that drawing from a fresh data can + # overrun BUFFER_SIZE, due to eg unlucky rejection sampling + # of integer probes. Retry these cases. + attempts += 1 + continue + else: + node_value = self._draw_from_cache( + choice_type, + constraints, + key=id(current_node), + random=random, + ) + + if node_value != value: + append_choice(choice_type, node_value) + break + attempts += 1 + self._reject_child( + choice_type, + constraints, + child=node_value, + key=id(current_node), + ) + # We've now found a value that is allowed to + # vary, so what follows is not fixed. + return tuple(prefix) + + assert not isinstance(current_node.transition, (Conclusion, Killed)) + if current_node.transition is None: + return tuple(prefix) + branch = current_node.transition + assert isinstance(branch, Branch) + + attempts = 0 + while True: + if attempts <= 10: + try: + node_value = self._draw( + branch.choice_type, branch.constraints, random=random + ) + except StopTest: # pragma: no cover + attempts += 1 + continue + else: + node_value = self._draw_from_cache( + branch.choice_type, + branch.constraints, + key=id(branch), + random=random, + ) + try: + child = branch.children[node_value] + except KeyError: + append_choice(branch.choice_type, node_value) + return tuple(prefix) + if not child.is_exhausted: + append_choice(branch.choice_type, node_value) + current_node = child + break + attempts += 1 + self._reject_child( + branch.choice_type, + branch.constraints, + child=node_value, + key=id(branch), + ) + + # We don't expect this assertion to ever fire, but coverage + # wants the loop inside to run if you have branch checking + # on, hence the pragma. + assert ( # pragma: no cover + attempts != 1000 + or len(branch.children) < branch.max_children + or any(not v.is_exhausted for v in branch.children.values()) + ) + + def rewrite(self, choices): + """Use previously seen ConjectureData objects to return a tuple of + the rewritten choice sequence and the status we would get from running + that with the test function. If the status cannot be predicted + from the existing values it will be None.""" + data = ConjectureData.for_choices(choices) + try: + self.simulate_test_function(data) + return (data.choices, data.status) + except PreviouslyUnseenBehaviour: + return (choices, None) + + def simulate_test_function(self, data: ConjectureData) -> None: + """Run a simulated version of the test function recorded by + this tree. Note that this does not currently call ``stop_span`` + or ``start_span`` as these are not currently recorded in the + tree. This will likely change in future.""" + node = self.root + + def draw(choice_type, constraints, *, forced=None, convert_forced=True): + if choice_type == "float" and forced is not None and convert_forced: + forced = int_to_float(forced) + + draw_func = getattr(data, f"draw_{choice_type}") + value = draw_func(**constraints, forced=forced) + + if choice_type == "float": + value = float_to_int(value) + return value + + try: + while True: + for i, (choice_type, constraints, previous) in enumerate( + zip(node.choice_types, node.constraints, node.values, strict=True) + ): + v = draw( + choice_type, + constraints, + forced=previous if i in node.forced else None, + ) + if v != previous: + raise PreviouslyUnseenBehaviour + if isinstance(node.transition, Conclusion): + t = node.transition + data.conclude_test(t.status, t.interesting_origin) + elif node.transition is None: + raise PreviouslyUnseenBehaviour + elif isinstance(node.transition, Branch): + v = draw(node.transition.choice_type, node.transition.constraints) + try: + node = node.transition.children[v] + except KeyError as err: + raise PreviouslyUnseenBehaviour from err + else: + assert isinstance(node.transition, Killed) + data.observer.kill_branch() + node = node.transition.next_node + except StopTest: + pass + + def new_observer(self): + return TreeRecordingObserver(self) + + def _draw( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + random: Random, + ) -> ChoiceT: + from hypothesis.internal.conjecture.data import draw_choice + + value = draw_choice(choice_type, constraints, random=random) + # using floats as keys into branch.children breaks things, because + # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are + # in fact distinct child branches. + # To distinguish floats here we'll use their bits representation. This + # entails some bookkeeping such that we're careful about when the + # float key is in its bits form (as a key into branch.children) and + # when it is in its float form (as a value we want to write to the + # choice sequence), and converting between the two forms as appropriate. + if choice_type == "float": + assert isinstance(value, float) + value = float_to_int(value) + return value + + def _get_children_cache( + self, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT, *, key: ChoiceT + ) -> ChildrenCacheValueT: + # cache the state of the children generator per node/branch (passed as + # `key` here), such that we track which children we've already tried + # for this branch across draws. + # We take advantage of python generators here as one-way iterables, + # so each time we iterate we implicitly store our position in the + # children generator and don't re-draw children. `children` is the + # concrete list of children draw from the generator that we will work + # with. Whenever we need to top up this list, we will draw a new value + # from the generator. + if key not in self._children_cache: + generator = all_children(choice_type, constraints) + children: list[ChoiceT] = [] + rejected: set[ChoiceT] = set() + self._children_cache[key] = (generator, children, rejected) + + return self._children_cache[key] + + def _draw_from_cache( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + key: ChoiceT, + random: Random, + ) -> ChoiceT: + (generator, children, rejected) = self._get_children_cache( + choice_type, constraints, key=key + ) + # Keep a stock of 100 potentially-valid children at all times. + # This number is chosen to balance memory/speed vs randomness. Ideally + # we would sample uniformly from all not-yet-rejected children, but + # computing and storing said children is not free. + # no-branch because coverage of the fall-through case here is a bit + # annoying. + if len(children) < 100: # pragma: no branch + for v in generator: + if choice_type == "float": + assert isinstance(v, float) + v = float_to_int(v) + if v in rejected: + continue + children.append(v) + if len(children) >= 100: + break + + return random.choice(children) + + def _reject_child( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + child: ChoiceT, + key: ChoiceT, + ) -> None: + (_generator, children, rejected) = self._get_children_cache( + choice_type, constraints, key=key + ) + rejected.add(child) + # we remove a child from the list of possible children *only* when it is + # rejected, and not when it is initially drawn in _draw_from_cache. The + # reason is that a child being drawn does not guarantee that child will + # be used in a way such that it is written back to the tree, so it needs + # to be available for future draws until we are certain it has been + # used. + # + # For instance, if we generated novel prefixes in a loop (but never used + # those prefixes to generate new values!) then we don't want to remove + # the drawn children from the available pool until they are actually + # used. + # + # This does result in a small inefficiency: we may draw a child, + # immediately use it (so we know it cannot be drawn again), but still + # wait to draw and reject it here, because DataTree cannot guarantee + # the drawn child has been used. + if child in children: + children.remove(child) + + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: + assert cycle is False + p.pretty(self.root) + + +class TreeRecordingObserver(DataObserver): + def __init__(self, tree: DataTree): + # this attr isn't read, but is very useful for local debugging flaky + # errors, with + # `from hypothesis.vendor import pretty; print(pretty.pretty(self._root))` + self._root = tree.root + self._current_node: TreeNode = tree.root + self._index_in_current_node: int = 0 + self._trail: list[TreeNode] = [self._current_node] + self.killed: bool = False + + def draw_integer( + self, value: int, *, was_forced: bool, constraints: IntegerConstraints + ) -> None: + self.draw_value( + "integer", value, was_forced=was_forced, constraints=constraints + ) + + def draw_float( + self, value: float, *, was_forced: bool, constraints: FloatConstraints + ) -> None: + self.draw_value("float", value, was_forced=was_forced, constraints=constraints) + + def draw_string( + self, value: str, *, was_forced: bool, constraints: StringConstraints + ) -> None: + self.draw_value("string", value, was_forced=was_forced, constraints=constraints) + + def draw_bytes( + self, value: bytes, *, was_forced: bool, constraints: BytesConstraints + ) -> None: + self.draw_value("bytes", value, was_forced=was_forced, constraints=constraints) + + def draw_boolean( + self, value: bool, *, was_forced: bool, constraints: BooleanConstraints + ) -> None: + self.draw_value( + "boolean", value, was_forced=was_forced, constraints=constraints + ) + + def draw_value( + self, + choice_type: ChoiceTypeT, + value: ChoiceT, + *, + was_forced: bool, + constraints: ChoiceConstraintsT, + ) -> None: + i = self._index_in_current_node + self._index_in_current_node += 1 + node = self._current_node + + if isinstance(value, float): + value = float_to_int(value) + + assert len(node.constraints) == len(node.values) == len(node.choice_types) + if i < len(node.values): + if ( + choice_type != node.choice_types[i] + or constraints != node.constraints[i] + ): + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) + # Note that we don't check whether a previously + # forced value is now free. That will be caught + # if we ever split the node there, but otherwise + # may pass silently. This is acceptable because it + # means we skip a hash set lookup on every + # draw and that's a pretty niche failure mode. + if was_forced and i not in node.forced: + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) + if value != node.values[i]: + node.split_at(i) + assert i == len(node.values) + new_node = TreeNode() + assert isinstance(node.transition, Branch) + node.transition.children[value] = new_node + self._current_node = new_node + self._index_in_current_node = 0 + else: + trans = node.transition + if trans is None: + node.choice_types.append(choice_type) + node.constraints.append(constraints) + node.values.append(value) + if was_forced: + node.mark_forced(i) + # generate_novel_prefix assumes the following invariant: any one + # of the series of draws in a particular node can vary, i.e. the + # max number of children is at least 2. However, some draws are + # pseudo-choices and only have a single value, such as + # integers(0, 0). + # + # Currently, we address this by forcefully splitting such + # single-valued nodes into a transition when we see them. An + # exception to this is if it was forced: forced pseudo-choices + # do not cause the above issue because they inherently cannot + # vary, and moreover they trip other invariants about never + # splitting forced nodes. + # + # An alternative is not writing such choices to the tree at + # all, and thus guaranteeing that each node has at least 2 max + # children. + if ( + compute_max_children(choice_type, constraints) == 1 + and not was_forced + ): + node.split_at(i) + assert isinstance(node.transition, Branch) + self._current_node = node.transition.children[value] + self._index_in_current_node = 0 + elif isinstance(trans, Conclusion): + assert trans.status != Status.OVERRUN + # We tried to draw where history says we should have + # stopped + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) + else: + assert isinstance(trans, Branch), trans + if choice_type != trans.choice_type or constraints != trans.constraints: + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) + try: + self._current_node = trans.children[value] + except KeyError: + self._current_node = trans.children.setdefault(value, TreeNode()) + self._index_in_current_node = 0 + if self._trail[-1] is not self._current_node: + self._trail.append(self._current_node) + + def kill_branch(self) -> None: + """Mark this part of the tree as not worth re-exploring.""" + if self.killed: + return + + self.killed = True + + if self._index_in_current_node < len(self._current_node.values) or ( + self._current_node.transition is not None + and not isinstance(self._current_node.transition, Killed) + ): + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) + + if self._current_node.transition is None: + self._current_node.transition = Killed(TreeNode()) + self.__update_exhausted() + + self._current_node = self._current_node.transition.next_node + self._index_in_current_node = 0 + self._trail.append(self._current_node) + + def conclude_test( + self, status: Status, interesting_origin: InterestingOrigin | None + ) -> None: + """Says that ``status`` occurred at node ``node``. This updates the + node if necessary and checks for consistency.""" + if status == Status.OVERRUN: + return + i = self._index_in_current_node + node = self._current_node + + if i < len(node.values) or isinstance(node.transition, Branch): + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) + + new_transition = Conclusion(status, interesting_origin) + + if node.transition is not None and node.transition != new_transition: + # As an, I'm afraid, horrible bodge, we deliberately ignore flakiness + # where tests go from interesting to valid, because it's much easier + # to produce good error messages for these further up the stack. + if isinstance(node.transition, Conclusion) and ( + node.transition.status != Status.INTERESTING + or new_transition.status != Status.VALID + ): + old_origin = node.transition.interesting_origin + new_origin = new_transition.interesting_origin + raise FlakyReplay( + f"Inconsistent results from replaying a test case!\n" + f" last: {node.transition.status.name} from {old_origin}\n" + f" this: {new_transition.status.name} from {new_origin}", + (old_origin, new_origin), + ) + else: + node.transition = new_transition + + assert node is self._trail[-1] + node.check_exhausted() + assert len(node.values) > 0 or node.check_exhausted() + + if not self.killed: + self.__update_exhausted() + + def __update_exhausted(self) -> None: + for t in reversed(self._trail): + # Any node we've traversed might have now become exhausted. + # We check from the right. As soon as we hit a node that + # isn't exhausted, this automatically implies that all of + # its parents are not exhausted, so we stop. + if not t.check_exhausted(): + break diff --git a/vendored/hypothesis/internal/conjecture/dfa/__init__.py b/vendored/hypothesis/internal/conjecture/dfa/__init__.py new file mode 100644 index 0000000..f30602c --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/dfa/__init__.py @@ -0,0 +1,674 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import threading +from collections import Counter, defaultdict, deque +from math import inf + +from hypothesis.internal.reflection import proxies + + +def cached(fn): + @proxies(fn) + def wrapped(self, *args): + cache = self._DFA__cache(fn.__name__) + try: + return cache[args] + except KeyError: + return cache.setdefault(args, fn(self, *args)) + + return wrapped + + +class DFA: + """Base class for implementations of deterministic finite + automata. + + This is abstract to allow for the possibility of states + being calculated lazily as we traverse the DFA (which + we make heavy use of in our L* implementation - see + lstar.py for details). + + States can be of any hashable type. + """ + + def __init__(self): + self.__caches = threading.local() + + def __cache(self, name): + try: + cache = getattr(self.__caches, name) + except AttributeError: + cache = {} + setattr(self.__caches, name, cache) + return cache + + @property + def start(self): + """Returns the starting state.""" + raise NotImplementedError + + def is_accepting(self, i): + """Returns if state ``i`` is an accepting one.""" + raise NotImplementedError + + def transition(self, i, c): + """Returns the state that i transitions to on reading + character c from a string.""" + raise NotImplementedError + + @property + def alphabet(self): + return range(256) + + def transitions(self, i): + """Iterates over all pairs (byte, state) of transitions + which do not lead to dead states.""" + for c, j in self.raw_transitions(i): + if not self.is_dead(j): + yield c, j + + @cached + def transition_counts(self, state): + counts = Counter() + for _, j in self.transitions(state): + counts[j] += 1 + return list(counts.items()) + + def matches(self, s): + """Returns whether the string ``s`` is accepted + by this automaton.""" + i = self.start + for c in s: + i = self.transition(i, c) + return self.is_accepting(i) + + def all_matching_regions(self, string): + """Return all pairs ``(u, v)`` such that ``self.matches(string[u:v])``.""" + + # Stack format: (k, state, indices). After reading ``k`` characters + # starting from any i in ``indices`` the DFA would be at ``state``. + stack = [(0, self.start, range(len(string)))] + + results = [] + + while stack: + k, state, indices = stack.pop() + + # If the state is dead, abort early - no point continuing on + # from here where there will be no more matches. + if self.is_dead(state): + continue + + # If the state is accepting, then every one of these indices + # has a matching region of length ``k`` starting from it. + if self.is_accepting(state): + results.extend([(i, i + k) for i in indices]) + + next_by_state = defaultdict(list) + + for i in indices: + if i + k < len(string): + c = string[i + k] + next_by_state[self.transition(state, c)].append(i) + for next_state, next_indices in next_by_state.items(): + stack.append((k + 1, next_state, next_indices)) + return results + + def max_length(self, i): + """Returns the maximum length of a string that is + accepted when starting from i.""" + if self.is_dead(i): + return 0 + + cache = self.__cache("max_length") + + try: + return cache[i] + except KeyError: + pass + + # Naively we can calculate this as 1 longer than the + # max length of the non-dead states this can immediately + # transition to, but a) We don't want unbounded recursion + # because that's how you get RecursionErrors and b) This + # makes it hard to look for cycles. So we basically do + # the recursion explicitly with a stack, but we maintain + # a parallel set that tracks what's already on the stack + # so that when we encounter a loop we can immediately + # determine that the max length here is infinite. + + stack = [i] + stack_set = {i} + + def pop(): + """Remove the top element from the stack, maintaining + the stack set appropriately.""" + assert len(stack) == len(stack_set) + j = stack.pop() + stack_set.remove(j) + assert len(stack) == len(stack_set) + + while stack: + j = stack[-1] + assert not self.is_dead(j) + # If any of the children have infinite max_length we don't + # need to check all of them to know that this state does + # too. + if any(cache.get(k) == inf for k in self.successor_states(j)): + cache[j] = inf + pop() + continue + + # Recurse to the first child node that we have not yet + # calculated max_length for. + for k in self.successor_states(j): + if k in stack_set: + # k is part of a loop and is known to be live + # (since we never push dead states on the stack), + # so it can reach strings of unbounded length. + assert not self.is_dead(k) + cache[k] = inf + break + if k not in cache and not self.is_dead(k): + stack.append(k) + stack_set.add(k) + break + else: + # All of j's successors have a known max_length or are dead, + # so we can now compute a max_length for j itself. + cache[j] = max( + ( + 1 + cache[k] + for k in self.successor_states(j) + if not self.is_dead(k) + ), + default=0, + ) + + # j is live so it must either be accepting or have a live child. + assert self.is_accepting(j) or cache[j] > 0 + pop() + return cache[i] + + @cached + def has_strings(self, state, length): + """Returns if any strings of length ``length`` are accepted when + starting from state ``state``.""" + assert length >= 0 + + cache = self.__cache("has_strings") + + try: + return cache[state, length] + except KeyError: + pass + + pending = [(state, length)] + seen = set() + i = 0 + + while i < len(pending): + s, n = pending[i] + i += 1 + if n > 0: + for t in self.successor_states(s): + key = (t, n - 1) + if key not in cache and key not in seen: + pending.append(key) + seen.add(key) + + while pending: + s, n = pending.pop() + if n == 0: + cache[s, n] = self.is_accepting(s) + else: + cache[s, n] = any( + cache.get((t, n - 1)) for t in self.successor_states(s) + ) + + return cache[state, length] + + def count_strings(self, state, length): + """Returns the number of strings of length ``length`` + that are accepted when starting from state ``state``.""" + assert length >= 0 + cache = self.__cache("count_strings") + + try: + return cache[state, length] + except KeyError: + pass + + pending = [(state, length)] + seen = set() + i = 0 + + while i < len(pending): + s, n = pending[i] + i += 1 + if n > 0: + for t in self.successor_states(s): + key = (t, n - 1) + if key not in cache and key not in seen: + pending.append(key) + seen.add(key) + + while pending: + s, n = pending.pop() + if n == 0: + cache[s, n] = int(self.is_accepting(s)) + else: + cache[s, n] = sum( + cache[t, n - 1] * k for t, k in self.transition_counts(s) + ) + + return cache[state, length] + + @cached + def successor_states(self, state): + """Returns all of the distinct states that can be reached via one + transition from ``state``, in the lexicographic order of the + smallest character that reaches them.""" + seen = set() + result = [] + for _, j in self.raw_transitions(state): + if j not in seen: + seen.add(j) + result.append(j) + return tuple(result) + + def is_dead(self, state): + """Returns True if no strings can be accepted + when starting from ``state``.""" + return not self.is_live(state) + + def is_live(self, state): + """Returns True if any strings can be accepted + when starting from ``state``.""" + if self.is_accepting(state): + return True + + # We work this out by calculating is_live for all nodes + # reachable from state which have not already had it calculated. + cache = self.__cache("is_live") + try: + return cache[state] + except KeyError: + pass + + # roots are states that we know already must be live, + # either because we have previously calculated them to + # be or because they are an accepting state. + roots = set() + + # We maintain a backwards graph where ``j in backwards_graph[k]`` + # if there is a transition from j to k. Thus if a key in this + # graph is live, so must all its values be. + backwards_graph = defaultdict(set) + + # First we find all reachable nodes from i which have not + # already been cached, noting any which are roots and + # populating the backwards graph. + + explored = set() + queue = deque([state]) + while queue: + j = queue.popleft() + if cache.get(j, self.is_accepting(j)): + # If j can be immediately determined to be live + # then there is no point in exploring beneath it, + # because any effect of states below it is screened + # off by the known answer for j. + roots.add(j) + continue + + if j in cache: + # Likewise if j is known to be dead then there is + # no point exploring beneath it because we know + # that all nodes reachable from it must be dead. + continue + + if j in explored: + continue + explored.add(j) + + for k in self.successor_states(j): + backwards_graph[k].add(j) + queue.append(k) + + marked_live = set() + queue = deque(roots) + while queue: + j = queue.popleft() + if j in marked_live: + continue + marked_live.add(j) + for k in backwards_graph[j]: + queue.append(k) + for j in explored: + cache[j] = j in marked_live + + return cache[state] + + def all_matching_strings_of_length(self, k): + """Yields all matching strings whose length is ``k``, in ascending + lexicographic order.""" + if k == 0: + if self.is_accepting(self.start): + yield b"" + return + + if not self.has_strings(self.start, k): + return + + # This tracks a path through the DFA. We alternate between growing + # it until it has length ``k`` and is in an accepting state, then + # yielding that as a result, then modifying it so that the next + # time we do that it will yield the lexicographically next matching + # string. + path = bytearray() + + # Tracks the states that are visited by following ``path`` from the + # starting point. + states = [self.start] + + while True: + # First we build up our current best prefix to the lexicographically + # first string starting with it. + while len(path) < k: + state = states[-1] + for c, j in self.transitions(state): + if self.has_strings(j, k - len(path) - 1): + states.append(j) + path.append(c) + break + else: + raise NotImplementedError("Should be unreachable") + assert self.is_accepting(states[-1]) + assert len(states) == len(path) + 1 + yield bytes(path) + + # Now we want to replace this string with the prefix that will + # cause us to extend to its lexicographic successor. This can + # be thought of as just repeatedly moving to the next lexicographic + # successor until we find a matching string, but we're able to + # use our length counts to jump over long sequences where there + # cannot be a match. + while True: + # As long as we are in this loop we are trying to move to + # the successor of the current string. + + # If we've removed the entire prefix then we're done - no + # successor is possible. + if not path: + return + + if path[-1] == 255: + # If our last element is maximal then the we have to "carry + # the one" - our lexicographic successor must be incremented + # earlier than this. + path.pop() + states.pop() + else: + # Otherwise increment by one. + path[-1] += 1 + states[-1] = self.transition(states[-2], path[-1]) + + # If there are no strings of the right length starting from + # this prefix we need to keep going. Otherwise, this is + # the right place to be and we break out of our loop of + # trying to find the successor because it starts here. + if self.count_strings(states[-1], k - len(path)) > 0: + break + + def all_matching_strings(self, min_length=0): + """Iterate over all strings matched by this automaton + in shortlex-ascending order.""" + # max_length might be infinite, hence the while loop + max_length = self.max_length(self.start) + length = min_length + while length <= max_length: + yield from self.all_matching_strings_of_length(length) + length += 1 + + def raw_transitions(self, i): + for c in self.alphabet: + j = self.transition(i, c) + yield c, j + + def canonicalise(self): + """Return a canonical version of ``self`` as a ConcreteDFA. + + The DFA is not minimized, but nodes are sorted and relabelled + and dead nodes are pruned, so two minimized DFAs for the same + language will end up with identical canonical representatives. + This is mildly important because it means that the output of + L* should produce the same canonical DFA regardless of what + order we happen to have run it in. + """ + # We map all states to their index of appearance in depth + # first search. This both is useful for canonicalising and + # also allows for states that aren't integers. + state_map = {} + reverse_state_map = [] + accepting = set() + + seen = set() + + queue = deque([self.start]) + while queue: + state = queue.popleft() + if state in state_map: + continue + i = len(reverse_state_map) + if self.is_accepting(state): + accepting.add(i) + reverse_state_map.append(state) + state_map[state] = i + for _, j in self.transitions(state): + if j in seen: + continue + seen.add(j) + queue.append(j) + + transitions = [ + {c: state_map[s] for c, s in self.transitions(t)} for t in reverse_state_map + ] + + result = ConcreteDFA(transitions, accepting) + assert self.equivalent(result) + return result + + def equivalent(self, other): + """Checks whether this DFA and other match precisely the same + language. + + Uses the classic algorithm of Hopcroft and Karp (more or less): + Hopcroft, John E. A linear algorithm for testing equivalence + of finite automata. Vol. 114. Defense Technical Information Center, 1971. + """ + + # The basic idea of this algorithm is that we repeatedly + # merge states that would be equivalent if the two start + # states were. This starts by merging the two start states, + # and whenever we merge two states merging all pairs of + # states that are reachable by following the same character + # from that point. + # + # Whenever we merge two states, we check if one of them + # is accepting and the other non-accepting. If so, we have + # obtained a contradiction and have made a bad merge, so + # the two start states must not have been equivalent in the + # first place and we return False. + # + # If the languages matched are different then some string + # is contained in one but not the other. By looking at + # the pairs of states visited by traversing the string in + # each automaton in parallel, we eventually come to a pair + # of states that would have to be merged by this algorithm + # where one is accepting and the other is not. Thus this + # algorithm always returns False as a result of a bad merge + # if the two languages are not the same. + # + # If we successfully complete all merges without a contradiction + # we can thus safely return True. + + # We maintain a union/find table for tracking merges of states. + table = {} + + def find(s): + trail = [s] + while trail[-1] in table and table[trail[-1]] != trail[-1]: + trail.append(table[trail[-1]]) + + for t in trail: + table[t] = trail[-1] + + return trail[-1] + + def union(s, t): + s = find(s) + t = find(t) + table[s] = t + + alphabet = sorted(set(self.alphabet) | set(other.alphabet)) + + queue = deque([(self.start, other.start)]) + while queue: + self_state, other_state = queue.popleft() + + # We use a DFA/state pair for keys because the same value + # may represent a different state in each DFA. + self_key = (self, self_state) + other_key = (other, other_state) + + # We have already merged these, no need to remerge. + if find(self_key) == find(other_key): + continue + + # We have found a contradiction, therefore the two DFAs must + # not be equivalent. + if self.is_accepting(self_state) != other.is_accepting(other_state): + return False + + # Merge the two states + union(self_key, other_key) + + # And also queue any logical consequences of merging those + # two states for merging. + for c in alphabet: + queue.append( + (self.transition(self_state, c), other.transition(other_state, c)) + ) + return True + + +DEAD = "DEAD" + + +class ConcreteDFA(DFA): + """A concrete representation of a DFA in terms of an explicit list + of states.""" + + def __init__(self, transitions, accepting, start=0): + """ + * ``transitions`` is a list where transitions[i] represents the + valid transitions out of state ``i``. Elements may be either dicts + (in which case they map characters to other states) or lists. If they + are a list they may contain tuples of length 2 or 3. A tuple ``(c, j)`` + indicates that this state transitions to state ``j`` given ``c``. A + tuple ``(u, v, j)`` indicates this state transitions to state ``j`` + given any ``c`` with ``u <= c <= v``. + * ``accepting`` is a set containing the integer labels of accepting + states. + * ``start`` is the integer label of the starting state. + """ + super().__init__() + self.__start = start + self.__accepting = accepting + self.__transitions = list(transitions) + + def __repr__(self): + transitions = [] + # Particularly for including in source code it's nice to have the more + # compact repr, so where possible we convert to the tuple based representation + # which can represent ranges more compactly. + for i in range(len(self.__transitions)): + table = [] + for c, j in self.transitions(i): + if not table or j != table[-1][-1] or c != table[-1][1] + 1: + table.append([c, c, j]) + else: + table[-1][1] = c + transitions.append([(u, j) if u == v else (u, v, j) for u, v, j in table]) + + start = "" if self.__start == 0 else f", start={self.__start!r}" + return f"ConcreteDFA({transitions!r}, {self.__accepting!r}{start})" + + @property + def start(self): + return self.__start + + def is_accepting(self, i): + return i in self.__accepting + + def transition(self, state, char): + """Returns the state that i transitions to on reading + character c from a string.""" + if state == DEAD: + return DEAD + + table = self.__transitions[state] + + # Given long transition tables we convert them to + # dictionaries for more efficient lookup. + if not isinstance(table, dict) and len(table) >= 5: + new_table = {} + for t in table: + if len(t) == 2: + new_table[t[0]] = t[1] + else: + u, v, j = t + for c in range(u, v + 1): + new_table[c] = j + self.__transitions[state] = new_table + table = new_table + + if isinstance(table, dict): + try: + return self.__transitions[state][char] + except KeyError: + return DEAD + else: + for t in table: + if len(t) == 2: + if t[0] == char: + return t[1] + else: + u, v, j = t + if u <= char <= v: + return j + return DEAD + + def raw_transitions(self, i): + if i == DEAD: + return + transitions = self.__transitions[i] + if isinstance(transitions, dict): + yield from sorted(transitions.items()) + else: + for t in transitions: + if len(t) == 2: + yield t + else: + u, v, j = t + for c in range(u, v + 1): + yield c, j diff --git a/vendored/hypothesis/internal/conjecture/dfa/lstar.py b/vendored/hypothesis/internal/conjecture/dfa/lstar.py new file mode 100644 index 0000000..25e6386 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/dfa/lstar.py @@ -0,0 +1,497 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from bisect import bisect_right, insort +from collections import Counter +from dataclasses import dataclass, field + +from hypothesis.errors import InvalidState +from hypothesis.internal.conjecture.dfa import DFA, cached +from hypothesis.internal.conjecture.junkdrawer import ( + IntList, + NotFound, + SelfOrganisingList, + find_integer, +) + +""" +This module contains an implementation of the L* algorithm +for learning a deterministic finite automaton based on an +unknown membership function and a series of examples of +strings that may or may not satisfy it. + +The two relevant papers for understanding this are: + +* Angluin, Dana. "Learning regular sets from queries and counterexamples." + Information and computation 75.2 (1987): 87-106. +* Rivest, Ronald L., and Robert E. Schapire. "Inference of finite automata + using homing sequences." Information and Computation 103.2 (1993): 299-347. + Note that we only use the material from section 4.5 "Improving Angluin's L* + algorithm" (page 318), and all of the rest of the material on homing + sequences can be skipped. + +The former explains the core algorithm, the latter a modification +we use (which we have further modified) which allows it to +be implemented more efficiently. + +Although we continue to call this L*, we in fact depart heavily from it to the +point where honestly this is an entirely different algorithm and we should come +up with a better name. + +We have several major departures from the papers: + +1. We learn the automaton lazily as we traverse it. This is particularly + valuable because if we make many corrections on the same string we only + have to learn the transitions that correspond to the string we are + correcting on. +2. We make use of our ``find_integer`` method rather than a binary search + as proposed in the Rivest and Schapire paper, as we expect that + usually most strings will be mispredicted near the beginning. +3. We try to learn a smaller alphabet of "interestingly distinct" + values. e.g. if all bytes larger than two result in an invalid + string, there is no point in distinguishing those bytes. In aid + of this we learn a single canonicalisation table which maps integers + to smaller integers that we currently think are equivalent, and learn + their inequivalence where necessary. This may require more learning + steps, as at each stage in the process we might learn either an + inequivalent pair of integers or a new experiment, but it may greatly + reduce the number of membership queries we have to make. + + +In addition, we have a totally different approach for mapping a string to its +canonical representative, which will be explained below inline. The general gist +is that our implementation is much more willing to make mistakes: It will often +create a DFA that is demonstrably wrong, based on information that it already +has, but where it is too expensive to discover that before it causes us to +make a mistake. + +A note on performance: This code is not really fast enough for +us to ever want to run in production on large strings, and this +is somewhat intrinsic. We should only use it in testing or for +learning languages offline that we can record for later use. + +""" + + +@dataclass(slots=True, frozen=False) +class DistinguishedState: + """Relevant information for a state that we have witnessed as definitely + distinct from ones we have previously seen so far.""" + + # Index of this state in the learner's list of states + index: int + + # A string that witnesses this state (i.e. when starting from the origin + # and following this string you will end up in this state). + label: str + + # A boolean as to whether this is an accepting state. + accepting: bool + + # A list of experiments that it is necessary to run to determine whether + # a string is in this state. This is stored as a dict mapping experiments + # to their expected result. A string is only considered to lead to this + # state if ``all(learner.member(s + experiment) == result for experiment, + # result in self.experiments.items())``. + experiments: dict + + # A cache of transitions out of this state, mapping bytes to the states + # that they lead to. + transitions: dict = field(default_factory=dict) + + +class LStar: + """This class holds the state for learning a DFA. The current DFA can be + accessed as the ``dfa`` member of this class. Such a DFA becomes invalid + as soon as ``learn`` has been called, and should only be used until the + next call to ``learn``. + + Note that many of the DFA methods are on this class, but it is not itself + a DFA. The reason for this is that it stores mutable state which can cause + the structure of the learned DFA to change in potentially arbitrary ways, + making all cached properties become nonsense. + """ + + def __init__(self, member): + self.experiments = [] + self.__experiment_set = set() + self.normalizer = IntegerNormalizer() + + self.__member_cache = {} + self.__member = member + self.__generation = 0 + + # A list of all state objects that correspond to strings we have + # seen and can demonstrate map to unique states. + self.__states = [ + DistinguishedState( + index=0, + label=b"", + accepting=self.member(b""), + experiments={b"": self.member(b"")}, + ) + ] + + # When we're trying to figure out what state a string leads to we will + # end up searching to find a suitable candidate. By putting states in + # a self-organising list we ideally minimise the number of lookups. + self.__self_organising_states = SelfOrganisingList(self.__states) + + self.start = 0 + + self.__dfa_changed() + + def __dfa_changed(self): + """Note that something has changed, updating the generation + and resetting any cached state.""" + self.__generation += 1 + self.dfa = LearnedDFA(self) + + def is_accepting(self, i): + """Equivalent to ``self.dfa.is_accepting(i)``""" + return self.__states[i].accepting + + def label(self, i): + """Returns the string label for state ``i``.""" + return self.__states[i].label + + def transition(self, i, c): + """Equivalent to ``self.dfa.transition(i, c)```""" + c = self.normalizer.normalize(c) + state = self.__states[i] + try: + return state.transitions[c] + except KeyError: + pass + + # The state that we transition to when reading ``c`` is reached by + # this string, because this state is reached by state.label. We thus + # want our candidate for the transition to be some state with a label + # equivalent to this string. + # + # We find such a state by looking for one such that all of its listed + # experiments agree on the result for its state label and this string. + string = state.label + bytes([c]) + + # We keep track of some useful experiments for distinguishing this + # string from other states, as this both allows us to more accurately + # select the state to map to and, if necessary, create the new state + # that this string corresponds to with a decent set of starting + # experiments. + accumulated = {} + counts = Counter() + + def equivalent(t): + """Checks if ``string`` could possibly lead to state ``t``.""" + for e, expected in accumulated.items(): + if self.member(t.label + e) != expected: + counts[e] += 1 + return False + + for e, expected in t.experiments.items(): + result = self.member(string + e) + if result != expected: + # We expect most experiments to return False so if we add + # only True ones to our collection of essential experiments + # we keep the size way down and select only ones that are + # likely to provide useful information in future. + if result: + accumulated[e] = result + return False + return True + + try: + destination = self.__self_organising_states.find(equivalent) + except NotFound: + i = len(self.__states) + destination = DistinguishedState( + index=i, + label=string, + experiments=accumulated, + accepting=self.member(string), + ) + self.__states.append(destination) + self.__self_organising_states.add(destination) + state.transitions[c] = destination.index + return destination.index + + def member(self, s): + """Check whether this string is a member of the language + to be learned.""" + try: + return self.__member_cache[s] + except KeyError: + result = self.__member(s) + self.__member_cache[s] = result + return result + + @property + def generation(self): + """Return an integer value that will be incremented + every time the DFA we predict changes.""" + return self.__generation + + def learn(self, string): + """Learn to give the correct answer on this string. + That is, after this method completes we will have + ``self.dfa.matches(s) == self.member(s)``. + + Note that we do not guarantee that this will remain + true in the event that learn is called again with + a different string. It is in principle possible that + future learning will cause us to make a mistake on + this string. However, repeatedly calling learn on + each of a set of strings until the generation stops + changing is guaranteed to terminate. + """ + string = bytes(string) + correct_outcome = self.member(string) + + # We don't want to check this inside the loop because it potentially + # causes us to evaluate more of the states than we actually need to, + # but if our model is mostly correct then this will be faster because + # we only need to evaluate strings that are of the form + # ``state + experiment``, which will generally be cached and/or needed + # later. + if self.dfa.matches(string) == correct_outcome: + return + + # In the papers they assume that we only run this process + # once, but this is silly - often when you've got a messy + # string it will be wrong for many different reasons. + # + # Thus we iterate this to a fixed point where we repair + # the DFA by repeatedly adding experiments until the DFA + # agrees with the membership function on this string. + + # First we make sure that normalization is not the source of the + # failure to match. + while True: + normalized = bytes(self.normalizer.normalize(c) for c in string) + # We can correctly replace the string with its normalized version + # so normalization is not the problem here. + if self.member(normalized) == correct_outcome: + string = normalized + break + alphabet = sorted(set(string), reverse=True) + target = string + for a in alphabet: + + def replace(b): + if a == b: + return target + return bytes(b if c == a else c for c in target) + + self.normalizer.distinguish(a, lambda x: self.member(replace(x))) + target = replace(self.normalizer.normalize(a)) + assert self.member(target) == correct_outcome + assert target != normalized + self.__dfa_changed() + + if self.dfa.matches(string) == correct_outcome: + return + + # Now we know normalization is correct we can attempt to determine if + # any of our transitions are wrong. + while True: + dfa = self.dfa + + states = [dfa.start] + + def seems_right(n): + """After reading n characters from s, do we seem to be + in the right state? + + We determine this by replacing the first n characters + of s with the label of the state we expect to be in. + If we are in the right state, that will replace a substring + with an equivalent one so must produce the same answer. + """ + if n > len(string): + return False + + # Populate enough of the states list to know where we are. + while n >= len(states): + states.append(dfa.transition(states[-1], string[len(states) - 1])) + + return self.member(dfa.label(states[n]) + string[n:]) == correct_outcome + + assert seems_right(0) + + n = find_integer(seems_right) + + # We got to the end without ever finding ourself in a bad + # state, so we must correctly match this string. + if n == len(string): + assert dfa.matches(string) == correct_outcome + break + + # Reading n characters does not put us in a bad state but + # reading n + 1 does. This means that the remainder of + # the string that we have not read yet is an experiment + # that allows us to distinguish the state that we ended + # up in from the state that we should have ended up in. + + source = states[n] + character = string[n] + wrong_destination = states[n + 1] + + # We've made an error in transitioning from ``source`` to + # ``wrong_destination`` via ``character``. We now need to update + # the DFA so that this transition no longer occurs. Note that we + # do not guarantee that the transition is *correct* after this, + # only that we don't make this particular error. + assert self.transition(source, character) == wrong_destination + + labels_wrong_destination = self.dfa.label(wrong_destination) + labels_correct_destination = self.dfa.label(source) + bytes([character]) + + ex = string[n + 1 :] + + assert self.member(labels_wrong_destination + ex) != self.member( + labels_correct_destination + ex + ) + + # Adding this experiment causes us to distinguish the wrong + # destination from the correct one. + self.__states[wrong_destination].experiments[ex] = self.member( + labels_wrong_destination + ex + ) + + # We now clear the cached details that caused us to make this error + # so that when we recalculate this transition we get to a + # (hopefully now correct) different state. + del self.__states[source].transitions[character] + self.__dfa_changed() + + # We immediately recalculate the transition so that we can check + # that it has changed as we expect it to have. + new_destination = self.transition(source, string[n]) + assert new_destination != wrong_destination + + +class LearnedDFA(DFA): + """This implements a lazily calculated DFA where states + are labelled by some string that reaches them, and are + distinguished by a membership test and a set of experiments.""" + + def __init__(self, lstar): + super().__init__() + self.__lstar = lstar + self.__generation = lstar.generation + + def __check_changed(self): + if self.__generation != self.__lstar.generation: + raise InvalidState( + "The underlying L* model has changed, so this DFA is no longer valid. " + "If you want to preserve a previously learned DFA for posterity, call " + "canonicalise() on it first." + ) + + def label(self, i): + self.__check_changed() + return self.__lstar.label(i) + + @property + def start(self): + self.__check_changed() + return self.__lstar.start + + def is_accepting(self, i): + self.__check_changed() + return self.__lstar.is_accepting(i) + + def transition(self, i, c): + self.__check_changed() + + return self.__lstar.transition(i, c) + + @cached + def successor_states(self, state): + """Returns all of the distinct states that can be reached via one + transition from ``state``, in the lexicographic order of the + smallest character that reaches them.""" + seen = set() + result = [] + for c in self.__lstar.normalizer.representatives(): + j = self.transition(state, c) + if j not in seen: + seen.add(j) + result.append(j) + return tuple(result) + + +class IntegerNormalizer: + """A class for replacing non-negative integers with a + "canonical" value that is equivalent for all relevant + purposes.""" + + def __init__(self): + # We store canonical values as a sorted list of integers + # with each value being treated as equivalent to the largest + # integer in the list that is below it. + self.__values = IntList([0]) + self.__cache = {} + + def __repr__(self): + return f"IntegerNormalizer({list(self.__values)!r})" + + def __copy__(self): + result = IntegerNormalizer() + result.__values = IntList(self.__values) + return result + + def representatives(self): + yield from self.__values + + def normalize(self, value): + """Return the canonical integer considered equivalent + to ``value``.""" + try: + return self.__cache[value] + except KeyError: + pass + i = bisect_right(self.__values, value) - 1 + assert i >= 0 + return self.__cache.setdefault(value, self.__values[i]) + + def distinguish(self, value, test): + """Checks whether ``test`` gives the same answer for + ``value`` and ``self.normalize(value)``. If it does + not, updates the list of canonical values so that + it does. + + Returns True if and only if this makes a change to + the underlying canonical values.""" + canonical = self.normalize(value) + if canonical == value: + return False + + value_test = test(value) + + if test(canonical) == value_test: + return False + + self.__cache.clear() + + def can_lower(k): + new_canon = value - k + if new_canon <= canonical: + return False + return test(new_canon) == value_test + + new_canon = value - find_integer(can_lower) + + assert new_canon not in self.__values + + insort(self.__values, new_canon) + + assert self.normalize(value) == new_canon + return True diff --git a/vendored/hypothesis/internal/conjecture/engine.py b/vendored/hypothesis/internal/conjecture/engine.py new file mode 100644 index 0000000..21ea97c --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/engine.py @@ -0,0 +1,1665 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import importlib +import inspect +import math +import threading +import time +from collections import defaultdict +from collections.abc import Callable, Generator, Sequence +from contextlib import AbstractContextManager, contextmanager, nullcontext, suppress +from dataclasses import dataclass, field +from datetime import timedelta +from enum import Enum +from random import Random +from typing import Literal, NoReturn, cast + +from hypothesis import HealthCheck, Phase, Verbosity, settings as Settings +from hypothesis._settings import local_settings, note_deprecation +from hypothesis.database import ExampleDatabase, choices_from_bytes, choices_to_bytes +from hypothesis.errors import ( + BackendCannotProceed, + FlakyBackendFailure, + HypothesisException, + InvalidArgument, + StopTest, +) +from hypothesis.internal.cache import LRUReusedCache +from hypothesis.internal.compat import NotRequired, TypedDict, ceil, override +from hypothesis.internal.conjecture.choice import ( + ChoiceConstraintsT, + ChoiceKeyT, + ChoiceNode, + ChoiceT, + ChoiceTemplate, + choices_key, +) +from hypothesis.internal.conjecture.data import ( + ConjectureData, + ConjectureResult, + DataObserver, + Overrun, + Status, + _Overrun, +) +from hypothesis.internal.conjecture.datatree import ( + DataTree, + PreviouslyUnseenBehaviour, + TreeRecordingObserver, +) +from hypothesis.internal.conjecture.junkdrawer import ( + ensure_free_stackframes, + startswith, +) +from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser +from hypothesis.internal.conjecture.providers import ( + AVAILABLE_PROVIDERS, + HypothesisProvider, + PrimitiveProvider, +) +from hypothesis.internal.conjecture.shrinker import Shrinker, ShrinkPredicateT, sort_key +from hypothesis.internal.escalation import InterestingOrigin +from hypothesis.internal.healthcheck import fail_health_check +from hypothesis.internal.observability import Observation, with_observability_callback +from hypothesis.reporting import base_report, report + +# In most cases, the following constants are all Final. However, we do allow users +# to monkeypatch all of these variables, which means we cannot annotate them as +# Final or mypyc will inline them and render monkeypatching useless. + +#: The maximum number of times the shrinker will reduce the complexity of a failing +#: input before giving up. This avoids falling down a trap of exponential (or worse) +#: complexity, where the shrinker appears to be making progress but will take a +#: substantially long time to finish completely. +MAX_SHRINKS: int = 500 + +# If the shrinking phase takes more than five minutes, abort it early and print +# a warning. Many CI systems will kill a build after around ten minutes with +# no output, and appearing to hang isn't great for interactive use either - +# showing partially-shrunk examples is better than quitting with no examples! +# (but make it monkeypatchable, for the rare users who need to keep on shrinking) + +#: The maximum total time in seconds that the shrinker will try to shrink a failure +#: for before giving up. This is across all shrinks for the same failure, so even +#: if the shrinker successfully reduces the complexity of a single failure several +#: times, it will stop when it hits |MAX_SHRINKING_SECONDS| of total time taken. +MAX_SHRINKING_SECONDS: int = 300 + +#: The maximum amount of entropy a single test case can use before giving up +#: while making random choices during input generation. +#: +#: The "unit" of one |BUFFER_SIZE| does not have any defined semantics, and you +#: should not rely on it, except that a linear increase |BUFFER_SIZE| will linearly +#: increase the amount of entropy a test case can use during generation. +BUFFER_SIZE: int = 8 * 1024 +CACHE_SIZE: int = 10000 +MIN_TEST_CALLS: int = 10 + +# we use this to isolate Hypothesis from interacting with the global random, +# to make it easier to reason about our global random warning logic easier (see +# deprecate_random_in_strategy). +_random = Random() + + +def shortlex(s): + return (len(s), s) + + +@dataclass(slots=True, frozen=False) +class HealthCheckState: + valid_examples: int = field(default=0) + invalid_examples: int = field(default=0) + overrun_examples: int = field(default=0) + draw_times: defaultdict[str, list[float]] = field( + default_factory=lambda: defaultdict(list) + ) + + @property + def total_draw_time(self) -> float: + return math.fsum(sum(self.draw_times.values(), start=[])) + + def timing_report(self) -> str: + """Return a terminal report describing what was slow.""" + if not self.draw_times: + return "" + width = max( + len(k.removeprefix("generate:").removesuffix(": ")) for k in self.draw_times + ) + out = [f"\n {'':^{width}} count | fraction | slowest draws (seconds)"] + args_in_order = sorted(self.draw_times.items(), key=lambda kv: -sum(kv[1])) + for i, (argname, times) in enumerate(args_in_order): # pragma: no branch + # If we have very many unique keys, which can happen due to interactive + # draws with computed labels, we'll skip uninformative rows. + if ( + 5 <= i < (len(self.draw_times) - 2) + and math.fsum(times) * 20 < self.total_draw_time + ): + out.append(f" (skipped {len(self.draw_times) - i} rows of fast draws)") + break + # Compute the row to report, omitting times <1ms to focus on slow draws + reprs = [f"{t:>6.3f}," for t in sorted(times)[-5:] if t > 5e-4] + desc = " ".join(([" -- "] * 5 + reprs)[-5:]).rstrip(",") + arg = argname.removeprefix("generate:").removesuffix(": ") + out.append( + f" {arg:^{width}} | {len(times):>4} | " + f"{math.fsum(times)/self.total_draw_time:>7.0%} | {desc}" + ) + return "\n".join(out) + + +class ExitReason(Enum): + max_examples = "settings.max_examples={s.max_examples}" + max_iterations = ( + "settings.max_examples={s.max_examples}, " + "but < 10% of examples satisfied assumptions" + ) + max_shrinks = f"shrunk example {MAX_SHRINKS} times" + finished = "nothing left to do" + flaky = "test was flaky" + very_slow_shrinking = "shrinking was very slow" + + def describe(self, settings: Settings) -> str: + return self.value.format(s=settings) + + +class RunIsComplete(Exception): + pass + + +def _get_provider(backend: str) -> PrimitiveProvider | type[PrimitiveProvider]: + provider_cls = AVAILABLE_PROVIDERS[backend] + if isinstance(provider_cls, str): + module_name, class_name = provider_cls.rsplit(".", 1) + provider_cls = getattr(importlib.import_module(module_name), class_name) + + if provider_cls.lifetime == "test_function": + return provider_cls(None) + elif provider_cls.lifetime == "test_case": + return provider_cls + else: + raise InvalidArgument( + f"invalid lifetime {provider_cls.lifetime} for provider {provider_cls.__name__}. " + "Expected one of 'test_function', 'test_case'." + ) + + +class CallStats(TypedDict): + status: str + runtime: float + drawtime: float + gctime: float + events: list[str] + + +PhaseStatistics = TypedDict( + "PhaseStatistics", + { + "duration-seconds": float, + "test-cases": list[CallStats], + "distinct-failures": int, + "shrinks-successful": int, + }, +) +StatisticsDict = TypedDict( + "StatisticsDict", + { + "generate-phase": NotRequired[PhaseStatistics], + "reuse-phase": NotRequired[PhaseStatistics], + "shrink-phase": NotRequired[PhaseStatistics], + "stopped-because": NotRequired[str], + "targets": NotRequired[dict[str, float]], + "nodeid": NotRequired[str], + }, +) + + +def choice_count(choices: Sequence[ChoiceT | ChoiceTemplate]) -> int | None: + count = 0 + for choice in choices: + if isinstance(choice, ChoiceTemplate): + if choice.count is None: + return None + count += choice.count + else: + count += 1 + return count + + +class DiscardObserver(DataObserver): + @override + def kill_branch(self) -> NoReturn: + raise ContainsDiscard + + +def realize_choices(data: ConjectureData, *, for_failure: bool) -> None: + # backwards-compatibility with backends without for_failure, can remove + # in a few months + kwargs = {} + if for_failure: + if "for_failure" in inspect.signature(data.provider.realize).parameters: + kwargs["for_failure"] = True + else: + note_deprecation( + f"{type(data.provider).__qualname__}.realize does not have the " + "for_failure parameter. This will be an error in future versions " + "of Hypothesis. (If you installed this backend from a separate " + "package, upgrading that package may help).", + has_codemod=False, + since="2025-05-07", + ) + + for node in data.nodes: + value = data.provider.realize(node.value, **kwargs) + expected_type = { + "string": str, + "float": float, + "integer": int, + "boolean": bool, + "bytes": bytes, + }[node.type] + if type(value) is not expected_type: + raise HypothesisException( + f"expected {expected_type} from " + f"{data.provider.realize.__qualname__}, got {type(value)}" + ) + + constraints = cast( + ChoiceConstraintsT, + { + k: data.provider.realize(v, **kwargs) + for k, v in node.constraints.items() + }, + ) + node.value = value + node.constraints = constraints + + +class ConjectureRunner: + def __init__( + self, + test_function: Callable[[ConjectureData], None], + *, + settings: Settings | None = None, + random: Random | None = None, + database_key: bytes | None = None, + ignore_limits: bool = False, + thread_overlap: dict[int, bool] | None = None, + ) -> None: + self._test_function: Callable[[ConjectureData], None] = test_function + self.settings: Settings = settings or Settings() + self.shrinks: int = 0 + self.finish_shrinking_deadline: float | None = None + self.call_count: int = 0 + self.misaligned_count: int = 0 + self.valid_examples: int = 0 + self.invalid_examples: int = 0 + self.overrun_examples: int = 0 + self.random: Random = random or Random(_random.getrandbits(128)) + self.database_key: bytes | None = database_key + self.ignore_limits: bool = ignore_limits + self.thread_overlap = {} if thread_overlap is None else thread_overlap + + # Global dict of per-phase statistics, and a list of per-call stats + # which transfer to the global dict at the end of each phase. + self._current_phase: str = "(not a phase)" + self.statistics: StatisticsDict = {} + self.stats_per_test_case: list[CallStats] = [] + + self.interesting_examples: dict[InterestingOrigin, ConjectureResult] = {} + # We use call_count because there may be few possible valid_examples. + self.first_bug_found_at: int | None = None + self.last_bug_found_at: int | None = None + self.first_bug_found_time: float = math.inf + + self.shrunk_examples: set[InterestingOrigin] = set() + self.health_check_state: HealthCheckState | None = None + self.tree: DataTree = DataTree() + self.provider: PrimitiveProvider | type[PrimitiveProvider] = _get_provider( + self.settings.backend + ) + + self.best_observed_targets: defaultdict[str, float] = defaultdict( + lambda: NO_SCORE + ) + self.best_examples_of_observed_targets: dict[str, ConjectureResult] = {} + + # We keep the pareto front in the example database if we have one. This + # is only marginally useful at present, but speeds up local development + # because it means that large targets will be quickly surfaced in your + # testing. + self.pareto_front: ParetoFront | None = None + if self.database_key is not None and self.settings.database is not None: + self.pareto_front = ParetoFront(self.random) + self.pareto_front.on_evict(self.on_pareto_evict) + + # We want to be able to get the ConjectureData object that results + # from running a choice sequence without recalculating, especially during + # shrinking where we need to know about the structure of the + # executed test case. + self.__data_cache = LRUReusedCache[ + tuple[ChoiceKeyT, ...], ConjectureResult | _Overrun + ](CACHE_SIZE) + + self.reused_previously_shrunk_test_case: bool = False + + self.__pending_call_explanation: str | None = None + self._backend_found_failure: bool = False + self._backend_exceeded_deadline: bool = False + self._switch_to_hypothesis_provider: bool = False + + self.__failed_realize_count: int = 0 + # note unsound verification by alt backends + self._verified_by: str | None = None + + @contextmanager + def _with_switch_to_hypothesis_provider( + self, value: bool + ) -> Generator[None, None, None]: + previous = self._switch_to_hypothesis_provider + try: + self._switch_to_hypothesis_provider = value + yield + finally: + self._switch_to_hypothesis_provider = previous + + @property + def using_hypothesis_backend(self) -> bool: + return ( + self.settings.backend == "hypothesis" or self._switch_to_hypothesis_provider + ) + + def explain_next_call_as(self, explanation: str) -> None: + self.__pending_call_explanation = explanation + + def clear_call_explanation(self) -> None: + self.__pending_call_explanation = None + + @contextmanager + def _log_phase_statistics( + self, phase: Literal["reuse", "generate", "shrink"] + ) -> Generator[None, None, None]: + self.stats_per_test_case.clear() + start_time = time.perf_counter() + try: + self._current_phase = phase + yield + finally: + self.statistics[phase + "-phase"] = { # type: ignore + "duration-seconds": time.perf_counter() - start_time, + "test-cases": list(self.stats_per_test_case), + "distinct-failures": len(self.interesting_examples), + "shrinks-successful": self.shrinks, + } + + @property + def should_optimise(self) -> bool: + return Phase.target in self.settings.phases + + def __tree_is_exhausted(self) -> bool: + return self.tree.is_exhausted and self.using_hypothesis_backend + + def __stoppable_test_function(self, data: ConjectureData) -> None: + """Run ``self._test_function``, but convert a ``StopTest`` exception + into a normal return and avoid raising anything flaky for RecursionErrors. + """ + # We ensure that the test has this much stack space remaining, no + # matter the size of the stack when called, to de-flake RecursionErrors + # (#2494, #3671). Note, this covers the data generation part of the test; + # the actual test execution is additionally protected at the call site + # in hypothesis.core.execute_once. + with ensure_free_stackframes(): + try: + self._test_function(data) + except StopTest as e: + if e.testcounter == data.testcounter: + # This StopTest has successfully stopped its test, and can now + # be discarded. + pass + else: + # This StopTest was raised by a different ConjectureData. We + # need to re-raise it so that it will eventually reach the + # correct engine. + raise + + def _cache_key(self, choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]: + return choices_key(choices) + + def _cache(self, data: ConjectureData) -> None: + result = data.as_result() + key = self._cache_key(data.choices) + self.__data_cache[key] = result + + def cached_test_function( + self, + choices: Sequence[ChoiceT | ChoiceTemplate], + *, + error_on_discard: bool = False, + extend: int | Literal["full"] = 0, + ) -> ConjectureResult | _Overrun: + """ + If ``error_on_discard`` is set to True this will raise ``ContainsDiscard`` + in preference to running the actual test function. This is to allow us + to skip test cases we expect to be redundant in some cases. Note that + it may be the case that we don't raise ``ContainsDiscard`` even if the + result has discards if we cannot determine from previous runs whether + it will have a discard. + """ + # node templates represent a not-yet-filled hole and therefore cannot + # be cached or retrieved from the cache. + if not any(isinstance(choice, ChoiceTemplate) for choice in choices): + # this type cast is validated by the isinstance check above (ie, there + # are no ChoiceTemplate elements). + choices = cast(Sequence[ChoiceT], choices) + key = self._cache_key(choices) + try: + cached = self.__data_cache[key] + # if we have a cached overrun for this key, but we're allowing extensions + # of the nodes, it could in fact run to a valid data if we try. + if extend == 0 or cached.status is not Status.OVERRUN: + return cached + except KeyError: + pass + + if extend == "full": + max_length = None + elif (count := choice_count(choices)) is None: + max_length = None + else: + max_length = count + extend + + # explicitly use a no-op DataObserver here instead of a TreeRecordingObserver. + # The reason is we don't expect simulate_test_function to explore new choices + # and write back to the tree, so we don't want the overhead of the + # TreeRecordingObserver tracking those calls. + trial_observer: DataObserver | None = DataObserver() + if error_on_discard: + trial_observer = DiscardObserver() + + try: + trial_data = self.new_conjecture_data( + choices, observer=trial_observer, max_choices=max_length + ) + self.tree.simulate_test_function(trial_data) + except PreviouslyUnseenBehaviour: + pass + else: + trial_data.freeze() + key = self._cache_key(trial_data.choices) + if trial_data.status > Status.OVERRUN: + try: + return self.__data_cache[key] + except KeyError: + pass + else: + # if we simulated to an overrun, then we our result is certainly + # an overrun; no need to consult the cache. (and we store this result + # for simulation-less lookup later). + self.__data_cache[key] = Overrun + return Overrun + try: + return self.__data_cache[key] + except KeyError: + pass + + data = self.new_conjecture_data(choices, max_choices=max_length) + # note that calling test_function caches `data` for us. + self.test_function(data) + return data.as_result() + + def test_function(self, data: ConjectureData) -> None: + if self.__pending_call_explanation is not None: + self.debug(self.__pending_call_explanation) + self.__pending_call_explanation = None + + self.call_count += 1 + interrupted = False + + try: + self.__stoppable_test_function(data) + except KeyboardInterrupt: + interrupted = True + raise + except BackendCannotProceed as exc: + if exc.scope in ("verified", "exhausted"): + self._switch_to_hypothesis_provider = True + if exc.scope == "verified": + self._verified_by = self.settings.backend + elif exc.scope == "discard_test_case": + self.__failed_realize_count += 1 + if ( + self.__failed_realize_count > 10 + and (self.__failed_realize_count / self.call_count) > 0.2 + ): + self._switch_to_hypothesis_provider = True + + # treat all BackendCannotProceed exceptions as invalid. This isn't + # great; "verified" should really be counted as self.valid_examples += 1. + # But we check self.valid_examples == 0 to determine whether to raise + # Unsatisfiable, and that would throw this check off. + self.invalid_examples += 1 + + # skip the post-test-case tracking; we're pretending this never happened + interrupted = True + data.cannot_proceed_scope = exc.scope + data.freeze() + return + except BaseException: + data.freeze() + if self.settings.backend != "hypothesis": + realize_choices(data, for_failure=True) + self.save_choices(data.choices) + raise + finally: + # No branch, because if we're interrupted we always raise + # the KeyboardInterrupt, never continue to the code below. + if not interrupted: # pragma: no branch + assert data.cannot_proceed_scope is None + data.freeze() + + if self.settings.backend != "hypothesis": + realize_choices(data, for_failure=data.status is Status.INTERESTING) + + call_stats: CallStats = { + "status": data.status.name.lower(), + "runtime": data.finish_time - data.start_time, + "drawtime": math.fsum(data.draw_times.values()), + "gctime": data.gc_finish_time - data.gc_start_time, + "events": sorted( + k if v == "" else f"{k}: {v}" for k, v in data.events.items() + ), + } + self.stats_per_test_case.append(call_stats) + + self._cache(data) + if data.misaligned_at is not None: # pragma: no branch # coverage bug? + self.misaligned_count += 1 + + self.debug_data(data) + + if ( + data.target_observations + and self.pareto_front is not None + and self.pareto_front.add(data.as_result()) + ): + self.save_choices(data.choices, sub_key=b"pareto") + + if data.status >= Status.VALID: + for k, v in data.target_observations.items(): + self.best_observed_targets[k] = max(self.best_observed_targets[k], v) + + if k not in self.best_examples_of_observed_targets: + data_as_result = data.as_result() + assert not isinstance(data_as_result, _Overrun) + self.best_examples_of_observed_targets[k] = data_as_result + continue + + existing_example = self.best_examples_of_observed_targets[k] + existing_score = existing_example.target_observations[k] + + if v < existing_score: + continue + + if v > existing_score or sort_key(data.nodes) < sort_key( + existing_example.nodes + ): + data_as_result = data.as_result() + assert not isinstance(data_as_result, _Overrun) + self.best_examples_of_observed_targets[k] = data_as_result + + if data.status is Status.VALID: + self.valid_examples += 1 + if data.status is Status.INVALID: + self.invalid_examples += 1 + if data.status is Status.OVERRUN: + self.overrun_examples += 1 + + if data.status == Status.INTERESTING: + if not self.using_hypothesis_backend: + # replay this failure on the hypothesis backend to ensure it still + # finds a failure. otherwise, it is flaky. + initial_exception = data.expected_exception + data = ConjectureData.for_choices(data.choices) + # we've already going to use the hypothesis provider for this + # data, so the verb "switch" is a bit misleading here. We're really + # setting this to inform our on_observation logic that the observation + # generated here was from a hypothesis backend, and shouldn't be + # sent to the on_observation of any alternative backend. + with self._with_switch_to_hypothesis_provider(True): + self.__stoppable_test_function(data) + data.freeze() + # TODO: Should same-origin also be checked? (discussion in + # https://github.com/HypothesisWorks/hypothesis/pull/4470#discussion_r2217055487) + if data.status != Status.INTERESTING: + desc_new_status = { + data.status.VALID: "passed", + data.status.INVALID: "failed filters", + data.status.OVERRUN: "overran", + }[data.status] + raise FlakyBackendFailure( + f"Inconsistent results from replaying a failing test case! " + f"Raised {type(initial_exception).__name__} on " + f"backend={self.settings.backend!r}, but " + f"{desc_new_status} under backend='hypothesis'.", + [initial_exception], + ) + + self._cache(data) + + assert data.interesting_origin is not None + key = data.interesting_origin + changed = False + try: + existing = self.interesting_examples[key] + except KeyError: + changed = True + self.last_bug_found_at = self.call_count + if self.first_bug_found_at is None: + self.first_bug_found_at = self.call_count + self.first_bug_found_time = time.monotonic() + else: + if sort_key(data.nodes) < sort_key(existing.nodes): + self.shrinks += 1 + self.downgrade_choices(existing.choices) + self.__data_cache.unpin(self._cache_key(existing.choices)) + changed = True + + if changed: + self.save_choices(data.choices) + self.interesting_examples[key] = data.as_result() # type: ignore + if not self.using_hypothesis_backend: + self._backend_found_failure = True + self.__data_cache.pin(self._cache_key(data.choices), data.as_result()) + self.shrunk_examples.discard(key) + + if self.shrinks >= MAX_SHRINKS: + self.exit_with(ExitReason.max_shrinks) + + if ( + not self.ignore_limits + and self.finish_shrinking_deadline is not None + and self.finish_shrinking_deadline < time.perf_counter() + ): + # See https://github.com/HypothesisWorks/hypothesis/issues/2340 + report( + "WARNING: Hypothesis has spent more than five minutes working to shrink" + " a failing example, and stopped because it is making very slow" + " progress. When you re-run your tests, shrinking will resume and may" + " take this long before aborting again.\nPLEASE REPORT THIS if you can" + " provide a reproducing example, so that we can improve shrinking" + " performance for everyone." + ) + self.exit_with(ExitReason.very_slow_shrinking) + + if not self.interesting_examples: + # Note that this logic is reproduced to end the generation phase when + # we have interesting examples. Update that too if you change this! + # (The doubled implementation is because here we exit the engine entirely, + # while in the other case below we just want to move on to shrinking.) + if self.valid_examples >= self.settings.max_examples: + self.exit_with(ExitReason.max_examples) + if self.call_count >= max( + self.settings.max_examples * 10, + # We have a high-ish default max iterations, so that tests + # don't become flaky when max_examples is too low. + 1000, + ): + self.exit_with(ExitReason.max_iterations) + + if self.__tree_is_exhausted(): + self.exit_with(ExitReason.finished) + + self.record_for_health_check(data) + + def on_pareto_evict(self, data: ConjectureResult) -> None: + self.settings.database.delete(self.pareto_key, choices_to_bytes(data.choices)) + + def generate_novel_prefix(self) -> tuple[ChoiceT, ...]: + """Uses the tree to proactively generate a starting choice sequence + that we haven't explored yet for this test. + + When this method is called, we assume that there must be at + least one novel prefix left to find. If there were not, then the + test run should have already stopped due to tree exhaustion. + """ + return self.tree.generate_novel_prefix(self.random) + + def record_for_health_check(self, data: ConjectureData) -> None: + # Once we've actually found a bug, there's no point in trying to run + # health checks - they'll just mask the actually important information. + if data.status == Status.INTERESTING: + self.health_check_state = None + + state = self.health_check_state + + if state is None: + return + + for k, v in data.draw_times.items(): + state.draw_times[k].append(v) + + if data.status == Status.VALID: + state.valid_examples += 1 + elif data.status == Status.INVALID: + state.invalid_examples += 1 + else: + assert data.status == Status.OVERRUN + state.overrun_examples += 1 + + max_valid_draws = 10 + max_invalid_draws = 50 + max_overrun_draws = 20 + + assert state.valid_examples <= max_valid_draws + + if state.valid_examples == max_valid_draws: + self.health_check_state = None + return + + if state.overrun_examples == max_overrun_draws: + fail_health_check( + self.settings, + "Generated inputs routinely consumed more than the maximum " + f"allowed entropy: {state.valid_examples} inputs were generated " + f"successfully, while {state.overrun_examples} inputs exceeded the " + f"maximum allowed entropy during generation." + "\n\n" + f"Testing with inputs this large tends to be slow, and to produce " + "failures that are both difficult to shrink and difficult to understand. " + "Try decreasing the amount of data generated, for example by " + "decreasing the minimum size of collection strategies like " + "st.lists()." + "\n\n" + "If you expect the average size of your input to be this large, " + "you can disable this health check with " + "@settings(suppress_health_check=[HealthCheck.data_too_large]). " + "See " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#hypothesis.HealthCheck " + "for details.", + HealthCheck.data_too_large, + ) + if state.invalid_examples == max_invalid_draws: + fail_health_check( + self.settings, + "It looks like this test is filtering out a lot of inputs. " + f"{state.valid_examples} inputs were generated successfully, " + f"while {state.invalid_examples} inputs were filtered out. " + "\n\n" + "An input might be filtered out by calls to assume(), " + "strategy.filter(...), or occasionally by Hypothesis internals." + "\n\n" + "Applying this much filtering makes input generation slow, since " + "Hypothesis must discard inputs which are filtered out and try " + "generating it again. It is also possible that applying this much " + "filtering will distort the domain and/or distribution of the test, " + "leaving your testing less rigorous than expected." + "\n\n" + "If you expect this many inputs to be filtered out during generation, " + "you can disable this health check with " + "@settings(suppress_health_check=[HealthCheck.filter_too_much]). See " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#hypothesis.HealthCheck " + "for details.", + HealthCheck.filter_too_much, + ) + + # Allow at least the greater of one second or 5x the deadline. If deadline + # is None, allow 30s - the user can disable the healthcheck too if desired. + draw_time = state.total_draw_time + draw_time_limit = 5 * (self.settings.deadline or timedelta(seconds=6)) + if ( + draw_time > max(1.0, draw_time_limit.total_seconds()) + # we disable HealthCheck.too_slow under concurrent threads, since + # cpython may switch away from a thread for arbitrarily long. + and not self.thread_overlap.get(threading.get_ident(), False) + ): + extra_str = [] + if state.invalid_examples: + extra_str.append(f"{state.invalid_examples} invalid inputs") + if state.overrun_examples: + extra_str.append( + f"{state.overrun_examples} inputs which exceeded the " + "maximum allowed entropy" + ) + extra_str = ", and ".join(extra_str) + extra_str = f" ({extra_str})" if extra_str else "" + + fail_health_check( + self.settings, + "Input generation is slow: Hypothesis only generated " + f"{state.valid_examples} valid inputs after {draw_time:.2f} " + f"seconds{extra_str}." + "\n" + state.timing_report() + "\n\n" + "This could be for a few reasons:" + "\n" + "1. This strategy could be generating too much data per input. " + "Try decreasing the amount of data generated, for example by " + "decreasing the minimum size of collection strategies like " + "st.lists()." + "\n" + "2. Some other expensive computation could be running during input " + "generation. For example, " + "if @st.composite or st.data() is interspersed with an expensive " + "computation, HealthCheck.too_slow is likely to trigger. If this " + "computation is unrelated to input generation, move it elsewhere. " + "Otherwise, try making it more efficient, or disable this health " + "check if that is not possible." + "\n\n" + "If you expect input generation to take this long, you can disable " + "this health check with " + "@settings(suppress_health_check=[HealthCheck.too_slow]). See " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#hypothesis.HealthCheck " + "for details.", + HealthCheck.too_slow, + ) + + def save_choices( + self, choices: Sequence[ChoiceT], sub_key: bytes | None = None + ) -> None: + if self.settings.database is not None: + key = self.sub_key(sub_key) + if key is None: + return + self.settings.database.save(key, choices_to_bytes(choices)) + + def downgrade_choices(self, choices: Sequence[ChoiceT]) -> None: + buffer = choices_to_bytes(choices) + if self.settings.database is not None and self.database_key is not None: + self.settings.database.move(self.database_key, self.secondary_key, buffer) + + def sub_key(self, sub_key: bytes | None) -> bytes | None: + if self.database_key is None: + return None + if sub_key is None: + return self.database_key + return b".".join((self.database_key, sub_key)) + + @property + def secondary_key(self) -> bytes | None: + return self.sub_key(b"secondary") + + @property + def pareto_key(self) -> bytes | None: + return self.sub_key(b"pareto") + + def debug(self, message: str) -> None: + if self.settings.verbosity >= Verbosity.debug: + base_report(message) + + @property + def report_debug_info(self) -> bool: + return self.settings.verbosity >= Verbosity.debug + + def debug_data(self, data: ConjectureData | ConjectureResult) -> None: + if not self.report_debug_info: + return + + status = repr(data.status) + if data.status == Status.INTERESTING: + status = f"{status} ({data.interesting_origin!r})" + + self.debug( + f"{len(data.choices)} choices {data.choices} -> {status}" + f"{', ' + data.output if data.output else ''}" + ) + + def observe_for_provider(self) -> AbstractContextManager: + def on_observation(observation: Observation) -> None: + assert observation.type == "test_case" + # because lifetime == "test_function" + assert isinstance(self.provider, PrimitiveProvider) + # only fire if we actually used that provider to generate this observation + if not self._switch_to_hypothesis_provider: + self.provider.on_observation(observation) + + if ( + self.settings.backend != "hypothesis" + # only for lifetime = "test_function" providers (guaranteed + # by this isinstance check) + and isinstance(self.provider, PrimitiveProvider) + # and the provider opted-in to observations + and self.provider.add_observability_callback + ): + return with_observability_callback(on_observation) + return nullcontext() + + def run(self) -> None: + with local_settings(self.settings), self.observe_for_provider(): + try: + self._run() + except RunIsComplete: + pass + for v in self.interesting_examples.values(): + self.debug_data(v) + self.debug( + f"Run complete after {self.call_count} examples " + f"({self.valid_examples} valid) and {self.shrinks} shrinks" + ) + + @property + def database(self) -> ExampleDatabase | None: + if self.database_key is None: + return None + return self.settings.database + + def has_existing_examples(self) -> bool: + return self.database is not None and Phase.reuse in self.settings.phases + + def reuse_existing_examples(self) -> None: + """If appropriate (we have a database and have been told to use it), + try to reload existing examples from the database. + + If there are a lot we don't try all of them. We always try the + smallest example in the database (which is guaranteed to be the + last failure) and the largest (which is usually the seed example + which the last failure came from but we don't enforce that). We + then take a random sampling of the remainder and try those. Any + examples that are no longer interesting are cleared out. + """ + if self.has_existing_examples(): + self.debug("Reusing examples from database") + # We have to do some careful juggling here. We have two database + # corpora: The primary and secondary. The primary corpus is a + # small set of minimized examples each of which has at one point + # demonstrated a distinct bug. We want to retry all of these. + + # We also have a secondary corpus of examples that have at some + # point demonstrated interestingness (currently only ones that + # were previously non-minimal examples of a bug, but this will + # likely expand in future). These are a good source of potentially + # interesting examples, but there are a lot of them, so we down + # sample the secondary corpus to a more manageable size. + + corpus = sorted( + self.settings.database.fetch(self.database_key), key=shortlex + ) + factor = 0.1 if (Phase.generate in self.settings.phases) else 1 + desired_size = max(2, ceil(factor * self.settings.max_examples)) + primary_corpus_size = len(corpus) + + if len(corpus) < desired_size: + extra_corpus = list(self.settings.database.fetch(self.secondary_key)) + + shortfall = desired_size - len(corpus) + + if len(extra_corpus) <= shortfall: + extra = extra_corpus + else: + extra = self.random.sample(extra_corpus, shortfall) + extra.sort(key=shortlex) + corpus.extend(extra) + + # We want a fast path where every primary entry in the database was + # interesting. + found_interesting_in_primary = False + all_interesting_in_primary_were_exact = True + + for i, existing in enumerate(corpus): + if i >= primary_corpus_size and found_interesting_in_primary: + break + choices = choices_from_bytes(existing) + if choices is None: + # clear out any keys which fail deserialization + self.settings.database.delete(self.database_key, existing) + continue + data = self.cached_test_function(choices, extend="full") + if data.status != Status.INTERESTING: + self.settings.database.delete(self.database_key, existing) + self.settings.database.delete(self.secondary_key, existing) + else: + if i < primary_corpus_size: + found_interesting_in_primary = True + assert not isinstance(data, _Overrun) + if choices_key(choices) != choices_key(data.choices): + all_interesting_in_primary_were_exact = False + if not self.settings.report_multiple_bugs: + break + if found_interesting_in_primary: + if all_interesting_in_primary_were_exact: + self.reused_previously_shrunk_test_case = True + + # Because self.database is not None (because self.has_existing_examples()) + # and self.database_key is not None (because we fetched using it above), + # we can guarantee self.pareto_front is not None + assert self.pareto_front is not None + + # If we've not found any interesting examples so far we try some of + # the pareto front from the last run. + if len(corpus) < desired_size and not self.interesting_examples: + desired_extra = desired_size - len(corpus) + pareto_corpus = list(self.settings.database.fetch(self.pareto_key)) + if len(pareto_corpus) > desired_extra: + pareto_corpus = self.random.sample(pareto_corpus, desired_extra) + pareto_corpus.sort(key=shortlex) + + for existing in pareto_corpus: + choices = choices_from_bytes(existing) + if choices is None: + self.settings.database.delete(self.pareto_key, existing) + continue + data = self.cached_test_function(choices, extend="full") + if data not in self.pareto_front: + self.settings.database.delete(self.pareto_key, existing) + if data.status == Status.INTERESTING: + break + + def exit_with(self, reason: ExitReason) -> None: + if self.ignore_limits: + return + self.statistics["stopped-because"] = reason.describe(self.settings) + if self.best_observed_targets: + self.statistics["targets"] = dict(self.best_observed_targets) + self.debug(f"exit_with({reason.name})") + self.exit_reason = reason + raise RunIsComplete + + def should_generate_more(self) -> bool: + # End the generation phase where we would have ended it if no bugs had + # been found. This reproduces the exit logic in `self.test_function`, + # but with the important distinction that this clause will move on to + # the shrinking phase having found one or more bugs, while the other + # will exit having found zero bugs. + if self.valid_examples >= self.settings.max_examples or self.call_count >= max( + self.settings.max_examples * 10, 1000 + ): # pragma: no cover + return False + + # If we haven't found a bug, keep looking - if we hit any limits on + # the number of tests to run that will raise an exception and stop + # the run. + if not self.interesting_examples: + return True + # Users who disable shrinking probably want to exit as fast as possible. + # If we've found a bug and won't report more than one, stop looking. + # If we first saw a bug more than 10 seconds ago, stop looking. + elif ( + Phase.shrink not in self.settings.phases + or not self.settings.report_multiple_bugs + or time.monotonic() - self.first_bug_found_time > 10 + ): + return False + assert isinstance(self.first_bug_found_at, int) + assert isinstance(self.last_bug_found_at, int) + assert self.first_bug_found_at <= self.last_bug_found_at <= self.call_count + # Otherwise, keep searching for between ten and 'a heuristic' calls. + # We cap 'calls after first bug' so errors are reported reasonably + # soon even for tests that are allowed to run for a very long time, + # or sooner if the latest half of our test effort has been fruitless. + return self.call_count < MIN_TEST_CALLS or self.call_count < min( + self.first_bug_found_at + 1000, self.last_bug_found_at * 2 + ) + + def generate_new_examples(self) -> None: + if Phase.generate not in self.settings.phases: + return + if self.interesting_examples: + # The example database has failing examples from a previous run, + # so we'd rather report that they're still failing ASAP than take + # the time to look for additional failures. + return + + self.debug("Generating new examples") + + assert self.should_generate_more() + self._switch_to_hypothesis_provider = True + zero_data = self.cached_test_function((ChoiceTemplate("simplest", count=None),)) + if zero_data.status > Status.OVERRUN: + assert isinstance(zero_data, ConjectureResult) + # if the crosshair backend cannot proceed, it does not (and cannot) + # realize the symbolic values, with the intent that Hypothesis will + # throw away this test case. We usually do, but if it's the zero data + # then we try to pin it here, which requires realizing the symbolics. + # + # We don't (yet) rely on the zero data being pinned, and so + # it's simply a very slight performance loss to simply not pin it + # if doing so would error. + if zero_data.cannot_proceed_scope is None: # pragma: no branch + self.__data_cache.pin( + self._cache_key(zero_data.choices), zero_data.as_result() + ) # Pin forever + + if zero_data.status == Status.OVERRUN or ( + zero_data.status == Status.VALID + and isinstance(zero_data, ConjectureResult) + and zero_data.length * 2 > BUFFER_SIZE + ): + fail_health_check( + self.settings, + "The smallest natural input for this test is very " + "large. This makes it difficult for Hypothesis to generate " + "good inputs, especially when trying to shrink failing inputs." + "\n\n" + "Consider reducing the amount of data generated by the strategy. " + "Also consider introducing small alternative values for some " + "strategies. For example, could you " + "mark some arguments as optional by replacing `some_complex_strategy`" + "with `st.none() | some_complex_strategy`?" + "\n\n" + "If you are confident that the size of the smallest natural input " + "to your test cannot be reduced, you can suppress this health check " + "with @settings(suppress_health_check=[HealthCheck.large_base_example]). " + "See " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#hypothesis.HealthCheck " + "for details.", + HealthCheck.large_base_example, + ) + + self.health_check_state = HealthCheckState() + + # We attempt to use the size of the minimal generated test case starting + # from a given novel prefix as a guideline to generate smaller test + # cases for an initial period, by restriscting ourselves to test cases + # that are not much larger than it. + # + # Calculating the actual minimal generated test case is hard, so we + # take a best guess that zero extending a prefix produces the minimal + # test case starting with that prefix (this is true for our built in + # strategies). This is only a reasonable thing to do if the resulting + # test case is valid. If we regularly run into situations where it is + # not valid then this strategy is a waste of time, so we want to + # abandon it early. In order to do this we track how many times in a + # row it has failed to work, and abort small test case generation when + # it has failed too many times in a row. + consecutive_zero_extend_is_invalid = 0 + + # We control growth during initial example generation, for two + # reasons: + # + # * It gives us an opportunity to find small examples early, which + # gives us a fast path for easy to find bugs. + # * It avoids low probability events where we might end up + # generating very large examples during health checks, which + # on slower machines can trigger HealthCheck.too_slow. + # + # The heuristic we use is that we attempt to estimate the smallest + # extension of this prefix, and limit the size to no more than + # an order of magnitude larger than that. If we fail to estimate + # the size accurately, we skip over this prefix and try again. + # + # We need to tune the example size based on the initial prefix, + # because any fixed size might be too small, and any size based + # on the strategy in general can fall afoul of strategies that + # have very different sizes for different prefixes. + # + # We previously set a minimum value of 10 on small_example_cap, with the + # reasoning of avoiding flaky health checks. However, some users set a + # low max_examples for performance. A hard lower bound in this case biases + # the distribution towards small (and less powerful) examples. Flaky + # and loud health checks are better than silent performance degradation. + small_example_cap = min(self.settings.max_examples // 10, 50) + optimise_at = max(self.settings.max_examples // 2, small_example_cap + 1, 10) + ran_optimisations = False + self._switch_to_hypothesis_provider = False + + while self.should_generate_more(): + # we don't yet integrate DataTree with backends. Instead of generating + # a novel prefix, ask the backend for an input. + if not self.using_hypothesis_backend: + data = self.new_conjecture_data([]) + with suppress(BackendCannotProceed): + self.test_function(data) + continue + + self._current_phase = "generate" + prefix = self.generate_novel_prefix() + if ( + self.valid_examples <= small_example_cap + and self.call_count <= 5 * small_example_cap + and not self.interesting_examples + and consecutive_zero_extend_is_invalid < 5 + ): + minimal_example = self.cached_test_function( + prefix + (ChoiceTemplate("simplest", count=None),) + ) + + if minimal_example.status < Status.VALID: + consecutive_zero_extend_is_invalid += 1 + continue + # Because the Status code is greater than Status.VALID, it cannot be + # Status.OVERRUN, which guarantees that the minimal_example is a + # ConjectureResult object. + assert isinstance(minimal_example, ConjectureResult) + consecutive_zero_extend_is_invalid = 0 + minimal_extension = len(minimal_example.choices) - len(prefix) + max_length = len(prefix) + minimal_extension * 5 + + # We could end up in a situation where even though the prefix was + # novel when we generated it, because we've now tried zero extending + # it not all possible continuations of it will be novel. In order to + # avoid making redundant test calls, we rerun it in simulation mode + # first. If this has a predictable result, then we don't bother + # running the test function for real here. If however we encounter + # some novel behaviour, we try again with the real test function, + # starting from the new novel prefix that has discovered. + trial_data = self.new_conjecture_data(prefix, max_choices=max_length) + try: + self.tree.simulate_test_function(trial_data) + continue + except PreviouslyUnseenBehaviour: + pass + + # If the simulation entered part of the tree that has been killed, + # we don't want to run this. + assert isinstance(trial_data.observer, TreeRecordingObserver) + if trial_data.observer.killed: + continue + + # We might have hit the cap on number of examples we should + # run when calculating the minimal example. + if not self.should_generate_more(): + break + + prefix = trial_data.choices + else: + max_length = None + + data = self.new_conjecture_data(prefix, max_choices=max_length) + self.test_function(data) + + if ( + data.status is Status.OVERRUN + and max_length is not None + and "invalid because" not in data.events + ): + data.events["invalid because"] = ( + "reduced max size for early examples (avoids flaky health checks)" + ) + + self.generate_mutations_from(data) + + # Although the optimisations are logically a distinct phase, we + # actually normally run them as part of example generation. The + # reason for this is that we cannot guarantee that optimisation + # actually exhausts our budget: It might finish running and we + # discover that actually we still could run a bunch more test cases + # if we want. + if ( + self.valid_examples >= max(small_example_cap, optimise_at) + and not ran_optimisations + ): + ran_optimisations = True + self._current_phase = "target" + self.optimise_targets() + + def generate_mutations_from(self, data: ConjectureData | ConjectureResult) -> None: + # A thing that is often useful but rarely happens by accident is + # to generate the same value at multiple different points in the + # test case. + # + # Rather than make this the responsibility of individual strategies + # we implement a small mutator that just takes parts of the test + # case with the same label and tries replacing one of them with a + # copy of the other and tries running it. If we've made a good + # guess about what to put where, this will run a similar generated + # test case with more duplication. + if ( + # An OVERRUN doesn't have enough information about the test + # case to mutate, so we just skip those. + data.status >= Status.INVALID + # This has a tendency to trigger some weird edge cases during + # generation so we don't let it run until we're done with the + # health checks. + and self.health_check_state is None + ): + initial_calls = self.call_count + failed_mutations = 0 + + while ( + self.should_generate_more() + # We implement fairly conservative checks for how long we + # we should run mutation for, as it's generally not obvious + # how helpful it is for any given test case. + and self.call_count <= initial_calls + 5 + and failed_mutations <= 5 + ): + groups = data.spans.mutator_groups + if not groups: + break + + group = self.random.choice(groups) + (start1, end1), (start2, end2) = self.random.sample(sorted(group), 2) + if start1 > start2: + (start1, end1), (start2, end2) = (start2, end2), (start1, end1) + + if ( + start1 <= start2 <= end2 <= end1 + ): # pragma: no cover # flaky on conjecture-cover tests + # One span entirely contains the other. The strategy is very + # likely some kind of tree. e.g. we might have + # + # ┌─────┐ + # ┌─────┤ a ├──────┐ + # │ └─────┘ │ + # ┌──┴──┐ ┌──┴──┐ + # ┌──┤ b ├──┐ ┌──┤ c ├──┐ + # │ └──┬──┘ │ │ └──┬──┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + # │ d │ │ e │ │ f │ │ g │ │ h │ │ i │ + # └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + # + # where each node is drawn from the same strategy and so + # has the same span label. We might have selected the spans + # corresponding to the a and c nodes, which is the entire + # tree and the subtree of (and including) c respectively. + # + # There are two possible mutations we could apply in this case: + # 1. replace a with c (replace child with parent) + # 2. replace c with a (replace parent with child) + # + # (1) results in multiple partial copies of the + # parent: + # ┌─────┐ + # ┌─────┤ a ├────────────┐ + # │ └─────┘ │ + # ┌──┴──┐ ┌─┴───┐ + # ┌──┤ b ├──┐ ┌─────┤ a ├──────┐ + # │ └──┬──┘ │ │ └─────┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌──┴──┐ ┌──┴──┐ + # │ d │ │ e │ │ f │ ┌──┤ b ├──┐ ┌──┤ c ├──┐ + # └───┘ └───┘ └───┘ │ └──┬──┘ │ │ └──┬──┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + # │ d │ │ e │ │ f │ │ g │ │ h │ │ i │ + # └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + # + # While (2) results in truncating part of the parent: + # + # ┌─────┐ + # ┌──┤ c ├──┐ + # │ └──┬──┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + # │ g │ │ h │ │ i │ + # └───┘ └───┘ └───┘ + # + # (1) is the same as Example IV.4. in Nautilus (NDSS '19) + # (https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf), + # except we do not repeat the replacement additional times + # (the paper repeats it once for a total of two copies). + # + # We currently only apply mutation (1), and ignore mutation + # (2). The reason is that the attempt generated from (2) is + # always something that Hypothesis could easily have generated + # itself, by simply not making various choices. Whereas + # duplicating the exact value + structure of particular choices + # in (1) would have been hard for Hypothesis to generate by + # chance. + # + # TODO: an extension of this mutation might repeat (1) on + # a geometric distribution between 0 and ~10 times. We would + # need to find the corresponding span to recurse on in the new + # choices, probably just by using the choices index. + + # case (1): duplicate the choices in start1:start2. + attempt = data.choices[:start2] + data.choices[start1:] + else: + (start, end) = self.random.choice([(start1, end1), (start2, end2)]) + replacement = data.choices[start:end] + # We attempt to replace both the examples with + # whichever choice we made. Note that this might end + # up messing up and getting the example boundaries + # wrong - labels matching are only a best guess as to + # whether the two are equivalent - but it doesn't + # really matter. It may not achieve the desired result, + # but it's still a perfectly acceptable choice sequence + # to try. + attempt = ( + data.choices[:start1] + + replacement + + data.choices[end1:start2] + + replacement + + data.choices[end2:] + ) + + try: + new_data = self.cached_test_function( + attempt, + # We set error_on_discard so that we don't end up + # entering parts of the tree we consider redundant + # and not worth exploring. + error_on_discard=True, + ) + except ContainsDiscard: + failed_mutations += 1 + continue + + if new_data is Overrun: + failed_mutations += 1 # pragma: no cover # annoying case + else: + assert isinstance(new_data, ConjectureResult) + if ( + new_data.status >= data.status + and choices_key(data.choices) != choices_key(new_data.choices) + and all( + k in new_data.target_observations + and new_data.target_observations[k] >= v + for k, v in data.target_observations.items() + ) + ): + data = new_data + failed_mutations = 0 + else: + failed_mutations += 1 + + def optimise_targets(self) -> None: + """If any target observations have been made, attempt to optimise them + all.""" + if not self.should_optimise: + return + from hypothesis.internal.conjecture.optimiser import Optimiser + + # We want to avoid running the optimiser for too long in case we hit + # an unbounded target score. We start this off fairly conservatively + # in case interesting examples are easy to find and then ramp it up + # on an exponential schedule so we don't hamper the optimiser too much + # if it needs a long time to find good enough improvements. + max_improvements = 10 + while True: + prev_calls = self.call_count + + any_improvements = False + + for target, data in list(self.best_examples_of_observed_targets.items()): + optimiser = Optimiser( + self, data, target, max_improvements=max_improvements + ) + optimiser.run() + if optimiser.improvements > 0: + any_improvements = True + + if self.interesting_examples: + break + + max_improvements *= 2 + + if any_improvements: + continue + + if self.best_observed_targets: + self.pareto_optimise() + + if prev_calls == self.call_count: + break + + def pareto_optimise(self) -> None: + if self.pareto_front is not None: + ParetoOptimiser(self).run() + + def _run(self) -> None: + # have to use the primitive provider to interpret database bits... + self._switch_to_hypothesis_provider = True + with self._log_phase_statistics("reuse"): + self.reuse_existing_examples() + # Fast path for development: If the database gave us interesting + # examples from the previously stored primary key, don't try + # shrinking it again as it's unlikely to work. + if self.reused_previously_shrunk_test_case: + self.exit_with(ExitReason.finished) + # ...but we should use the supplied provider when generating... + self._switch_to_hypothesis_provider = False + with self._log_phase_statistics("generate"): + self.generate_new_examples() + # We normally run the targeting phase mixed in with the generate phase, + # but if we've been asked to run it but not generation then we have to + # run it explicitly on its own here. + if Phase.generate not in self.settings.phases: + self._current_phase = "target" + self.optimise_targets() + # ...and back to the primitive provider when shrinking. + self._switch_to_hypothesis_provider = True + with self._log_phase_statistics("shrink"): + self.shrink_interesting_examples() + self.exit_with(ExitReason.finished) + + def new_conjecture_data( + self, + prefix: Sequence[ChoiceT | ChoiceTemplate], + *, + observer: DataObserver | None = None, + max_choices: int | None = None, + ) -> ConjectureData: + provider = ( + HypothesisProvider if self._switch_to_hypothesis_provider else self.provider + ) + observer = observer or self.tree.new_observer() + if not self.using_hypothesis_backend: + observer = DataObserver() + + return ConjectureData( + prefix=prefix, + observer=observer, + provider=provider, + max_choices=max_choices, + random=self.random, + ) + + def shrink_interesting_examples(self) -> None: + """If we've found interesting examples, try to replace each of them + with a minimal interesting example with the same interesting_origin. + + We may find one or more examples with a new interesting_origin + during the shrink process. If so we shrink these too. + """ + if Phase.shrink not in self.settings.phases or not self.interesting_examples: + return + + self.debug("Shrinking interesting examples") + self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS + + for prev_data in sorted( + self.interesting_examples.values(), key=lambda d: sort_key(d.nodes) + ): + assert prev_data.status == Status.INTERESTING + data = self.new_conjecture_data(prev_data.choices) + self.test_function(data) + if data.status != Status.INTERESTING: + self.exit_with(ExitReason.flaky) + + self.clear_secondary_key() + + while len(self.shrunk_examples) < len(self.interesting_examples): + target, example = min( + ( + (k, v) + for k, v in self.interesting_examples.items() + if k not in self.shrunk_examples + ), + key=lambda kv: (sort_key(kv[1].nodes), shortlex(repr(kv[0]))), + ) + self.debug(f"Shrinking {target!r}: {example.choices}") + + if not self.settings.report_multiple_bugs: + # If multi-bug reporting is disabled, we shrink our currently-minimal + # failure, allowing 'slips' to any bug with a smaller minimal example. + self.shrink(example, lambda d: d.status == Status.INTERESTING) + return + + def predicate(d: ConjectureResult | _Overrun) -> bool: + if d.status < Status.INTERESTING: + return False + d = cast(ConjectureResult, d) + return d.interesting_origin == target + + self.shrink(example, predicate) + + self.shrunk_examples.add(target) + + def clear_secondary_key(self) -> None: + if self.has_existing_examples(): + # If we have any smaller examples in the secondary corpus, now is + # a good time to try them to see if they work as shrinks. They + # probably won't, but it's worth a shot and gives us a good + # opportunity to clear out the database. + + # It's not worth trying the primary corpus because we already + # tried all of those in the initial phase. + corpus = sorted( + self.settings.database.fetch(self.secondary_key), key=shortlex + ) + for c in corpus: + choices = choices_from_bytes(c) + if choices is None: + self.settings.database.delete(self.secondary_key, c) + continue + primary = { + choices_to_bytes(v.choices) + for v in self.interesting_examples.values() + } + if shortlex(c) > max(map(shortlex, primary)): + break + + self.cached_test_function(choices) + # We unconditionally remove c from the secondary key as it + # is either now primary or worse than our primary example + # of this reason for interestingness. + self.settings.database.delete(self.secondary_key, c) + + def shrink( + self, + example: ConjectureData | ConjectureResult, + predicate: ShrinkPredicateT | None = None, + allow_transition: ( + Callable[[ConjectureData | ConjectureResult, ConjectureData], bool] | None + ) = None, + ) -> ConjectureData | ConjectureResult: + s = self.new_shrinker(example, predicate, allow_transition) + s.shrink() + return s.shrink_target + + def new_shrinker( + self, + example: ConjectureData | ConjectureResult, + predicate: ShrinkPredicateT | None = None, + allow_transition: ( + Callable[[ConjectureData | ConjectureResult, ConjectureData], bool] | None + ) = None, + ) -> Shrinker: + return Shrinker( + self, + example, + predicate, + allow_transition=allow_transition, + explain=Phase.explain in self.settings.phases, + in_target_phase=self._current_phase == "target", + ) + + def passing_choice_sequences( + self, prefix: Sequence[ChoiceNode] = () + ) -> frozenset[tuple[ChoiceNode, ...]]: + """Return a collection of choice sequence nodes which cause the test to pass. + Optionally restrict this by a certain prefix, which is useful for explain mode. + """ + return frozenset( + cast(ConjectureResult, result).nodes + for key in self.__data_cache + if (result := self.__data_cache[key]).status is Status.VALID + and startswith(cast(ConjectureResult, result).nodes, prefix) + ) + + +class ContainsDiscard(Exception): + pass diff --git a/vendored/hypothesis/internal/conjecture/floats.py b/vendored/hypothesis/internal/conjecture/floats.py new file mode 100644 index 0000000..407686a --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/floats.py @@ -0,0 +1,219 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from array import array + +from hypothesis.internal.floats import float_to_int, int_to_float + +""" +This module implements support for arbitrary floating point numbers in +Conjecture. It doesn't make any attempt to get a good distribution, only to +get a format that will shrink well. + +It works by defining an encoding of non-negative floating point numbers +(including NaN values with a zero sign bit) that has good lexical shrinking +properties. + +This encoding is a tagged union of two separate encodings for floating point +numbers, with the tag being the first bit of 64 and the remaining 63-bits being +the payload. + +If the tag bit is 0, the next 7 bits are ignored, and the remaining 7 bytes are +interpreted as a 7 byte integer in big-endian order and then converted to a +float (there is some redundancy here, as 7 * 8 = 56, which is larger than the +largest integer that floating point numbers can represent exactly, so multiple +encodings may map to the same float). + +If the tag bit is 1, we instead use something that is closer to the normal +representation of floats (and can represent every non-negative float exactly) +but has a better ordering: + +1. NaNs are ordered after everything else. +2. Infinity is ordered after every finite number. +3. The sign is ignored unless two floating point numbers are identical in + absolute magnitude. In that case, the positive is ordered before the + negative. +4. Positive floating point numbers are ordered first by int(x) where + encoding(x) < encoding(y) if int(x) < int(y). +5. If int(x) == int(y) then x and y are sorted towards lower denominators of + their fractional parts. + +The format of this encoding of floating point goes as follows: + + [exponent] [mantissa] + +Each of these is the same size their equivalent in IEEE floating point, but are +in a different format. + +We translate exponents as follows: + + 1. The maximum exponent (2 ** 11 - 1) is left unchanged. + 2. We reorder the remaining exponents so that all of the positive exponents + are first, in increasing order, followed by all of the negative + exponents in decreasing order (where positive/negative is done by the + unbiased exponent e - 1023). + +We translate the mantissa as follows: + + 1. If the unbiased exponent is <= 0 we reverse it bitwise. + 2. If the unbiased exponent is >= 52 we leave it alone. + 3. If the unbiased exponent is in the range [1, 51] then we reverse the + low k bits, where k is 52 - unbiased exponent. + +The low bits correspond to the fractional part of the floating point number. +Reversing it bitwise means that we try to minimize the low bits, which kills +off the higher powers of 2 in the fraction first. +""" + + +MAX_EXPONENT = 0x7FF + +BIAS = 1023 +MAX_POSITIVE_EXPONENT = MAX_EXPONENT - 1 - BIAS + + +def exponent_key(e: int) -> float: + if e == MAX_EXPONENT: + return float("inf") + unbiased = e - BIAS + if unbiased < 0: + return 10000 - unbiased + else: + return unbiased + + +ENCODING_TABLE = array("H", sorted(range(MAX_EXPONENT + 1), key=exponent_key)) +DECODING_TABLE = array("H", [0]) * len(ENCODING_TABLE) + +for i, b in enumerate(ENCODING_TABLE): + DECODING_TABLE[b] = i + +del i, b + + +def decode_exponent(e: int) -> int: + """Take an integer and turn it into a suitable floating point exponent + such that lexicographically simpler leads to simpler floats.""" + assert 0 <= e <= MAX_EXPONENT + return ENCODING_TABLE[e] + + +def encode_exponent(e: int) -> int: + """Take a floating point exponent and turn it back into the equivalent + result from conjecture.""" + assert 0 <= e <= MAX_EXPONENT + return DECODING_TABLE[e] + + +def reverse_byte(b: int) -> int: + result = 0 + for _ in range(8): + result <<= 1 + result |= b & 1 + b >>= 1 + return result + + +# Table mapping individual bytes to the equivalent byte with the bits of the +# byte reversed. e.g. 1=0b1 is mapped to 0xb10000000=0x80=128. We use this +# precalculated table to simplify calculating the bitwise reversal of a longer +# integer. +REVERSE_BITS_TABLE = bytearray(map(reverse_byte, range(256))) + + +def reverse64(v: int) -> int: + """Reverse a 64-bit integer bitwise. + + We do this by breaking it up into 8 bytes. The 64-bit integer is then the + concatenation of each of these bytes. We reverse it by reversing each byte + on its own using the REVERSE_BITS_TABLE above, and then concatenating the + reversed bytes. + + In this case concatenating consists of shifting them into the right + position for the word and then oring the bits together. + """ + assert v.bit_length() <= 64 + return ( + (REVERSE_BITS_TABLE[(v >> 0) & 0xFF] << 56) + | (REVERSE_BITS_TABLE[(v >> 8) & 0xFF] << 48) + | (REVERSE_BITS_TABLE[(v >> 16) & 0xFF] << 40) + | (REVERSE_BITS_TABLE[(v >> 24) & 0xFF] << 32) + | (REVERSE_BITS_TABLE[(v >> 32) & 0xFF] << 24) + | (REVERSE_BITS_TABLE[(v >> 40) & 0xFF] << 16) + | (REVERSE_BITS_TABLE[(v >> 48) & 0xFF] << 8) + | (REVERSE_BITS_TABLE[(v >> 56) & 0xFF] << 0) + ) + + +MANTISSA_MASK = (1 << 52) - 1 + + +def reverse_bits(x: int, n: int) -> int: + assert x.bit_length() <= n <= 64 + x = reverse64(x) + x >>= 64 - n + return x + + +def update_mantissa(unbiased_exponent: int, mantissa: int) -> int: + if unbiased_exponent <= 0: + mantissa = reverse_bits(mantissa, 52) + elif unbiased_exponent <= 51: + n_fractional_bits = 52 - unbiased_exponent + fractional_part = mantissa & ((1 << n_fractional_bits) - 1) + mantissa ^= fractional_part + mantissa |= reverse_bits(fractional_part, n_fractional_bits) + return mantissa + + +def lex_to_float(i: int) -> float: + assert i.bit_length() <= 64 + has_fractional_part = i >> 63 + if has_fractional_part: + exponent = (i >> 52) & ((1 << 11) - 1) + exponent = decode_exponent(exponent) + mantissa = i & MANTISSA_MASK + mantissa = update_mantissa(exponent - BIAS, mantissa) + + assert mantissa.bit_length() <= 52 + + return int_to_float((exponent << 52) | mantissa) + else: + integral_part = i & ((1 << 56) - 1) + return float(integral_part) + + +def float_to_lex(f: float) -> int: + if is_simple(f): + assert f >= 0 + return int(f) + return base_float_to_lex(f) + + +def base_float_to_lex(f: float) -> int: + i = float_to_int(f) + i &= (1 << 63) - 1 + exponent = i >> 52 + mantissa = i & MANTISSA_MASK + mantissa = update_mantissa(exponent - BIAS, mantissa) + exponent = encode_exponent(exponent) + + assert mantissa.bit_length() <= 52 + return (1 << 63) | (exponent << 52) | mantissa + + +def is_simple(f: float) -> int: + try: + i = int(f) + except (ValueError, OverflowError): + return False + if i != f: + return False + return i.bit_length() <= 56 diff --git a/vendored/hypothesis/internal/conjecture/junkdrawer.py b/vendored/hypothesis/internal/conjecture/junkdrawer.py new file mode 100644 index 0000000..ef81176 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/junkdrawer.py @@ -0,0 +1,563 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""A module for miscellaneous useful bits and bobs that don't +obviously belong anywhere else. If you spot a better home for +anything that lives here, please move it.""" + +import array +import gc +import itertools +import sys +import time +import warnings +from array import ArrayType +from collections.abc import Callable, Iterable, Iterator, Sequence +from threading import Lock +from typing import ( + Any, + ClassVar, + Generic, + Literal, + TypeVar, + Union, + overload, +) + +from sortedcontainers import SortedList + +from hypothesis.errors import HypothesisWarning + +T = TypeVar("T") + + +def replace_all( + ls: Sequence[T], + replacements: Iterable[tuple[int, int, Sequence[T]]], +) -> list[T]: + """Substitute multiple replacement values into a list. + + Replacements is a list of (start, end, value) triples. + """ + + result: list[T] = [] + prev = 0 + offset = 0 + for u, v, r in replacements: + result.extend(ls[prev:u]) + result.extend(r) + prev = v + offset += len(r) - (v - u) + result.extend(ls[prev:]) + assert len(result) == len(ls) + offset + return result + + +class IntList(Sequence[int]): + """Class for storing a list of non-negative integers compactly. + + We store them as the smallest size integer array we can get + away with. When we try to add an integer that is too large, + we upgrade the array to the smallest word size needed to store + the new value.""" + + ARRAY_CODES: ClassVar[list[str]] = ["B", "H", "I", "L", "Q", "O"] + NEXT_ARRAY_CODE: ClassVar[dict[str, str]] = dict(itertools.pairwise(ARRAY_CODES)) + + __slots__ = ("__underlying",) + + def __init__(self, values: Sequence[int] = ()): + for code in self.ARRAY_CODES: + try: + underlying = self._array_or_list(code, values) + break + except OverflowError: + pass + else: # pragma: no cover + raise AssertionError(f"Could not create storage for {values!r}") + if isinstance(underlying, list): + for v in underlying: + if not isinstance(v, int) or v < 0: + raise ValueError(f"Could not create IntList for {values!r}") + self.__underlying: list[int] | ArrayType[int] = underlying + + @classmethod + def of_length(cls, n: int) -> "IntList": + return cls(array.array("B", [0]) * n) + + @staticmethod + def _array_or_list( + code: str, contents: Iterable[int] + ) -> Union[list[int], "ArrayType[int]"]: + if code == "O": + return list(contents) + return array.array(code, contents) + + def count(self, value: int) -> int: + return self.__underlying.count(value) + + def __repr__(self) -> str: + return f"IntList({list(self.__underlying)!r})" + + def __len__(self) -> int: + return len(self.__underlying) + + @overload + def __getitem__(self, i: int) -> int: ... # pragma: no cover + + @overload + def __getitem__( + self, i: slice + ) -> "list[int] | ArrayType[int]": ... # pragma: no cover + + def __getitem__(self, i: int | slice) -> "int | list[int] | ArrayType[int]": + return self.__underlying[i] + + def __delitem__(self, i: int | slice) -> None: + del self.__underlying[i] + + def insert(self, i: int, v: int) -> None: + self.__underlying.insert(i, v) + + def __iter__(self) -> Iterator[int]: + return iter(self.__underlying) + + def __eq__(self, other: object) -> bool: + if self is other: + return True + if not isinstance(other, IntList): + return NotImplemented + return self.__underlying == other.__underlying + + def __ne__(self, other: object) -> bool: + if self is other: + return False + if not isinstance(other, IntList): + return NotImplemented + return self.__underlying != other.__underlying + + def append(self, n: int) -> None: + # try the fast path of appending n first. If this overflows, use the + # __setitem__ path, which will upgrade the underlying array. + try: + self.__underlying.append(n) + except OverflowError: + i = len(self.__underlying) + self.__underlying.append(0) + self[i] = n + + def __setitem__(self, i: int, n: int) -> None: + while True: + try: + self.__underlying[i] = n + return + except OverflowError: + assert n > 0 + self.__upgrade() + + def extend(self, ls: Iterable[int]) -> None: + for n in ls: + self.append(n) + + def __upgrade(self) -> None: + assert isinstance(self.__underlying, array.array) + code = self.NEXT_ARRAY_CODE[self.__underlying.typecode] + self.__underlying = self._array_or_list(code, self.__underlying) + + +def binary_search(lo: int, hi: int, f: Callable[[int], bool]) -> int: + """Binary searches in [lo , hi) to find + n such that f(n) == f(lo) but f(n + 1) != f(lo). + It is implicitly assumed and will not be checked + that f(hi) != f(lo). + """ + + reference = f(lo) + + while lo + 1 < hi: + mid = (lo + hi) // 2 + if f(mid) == reference: + lo = mid + else: + hi = mid + return lo + + +class LazySequenceCopy(Generic[T]): + """A "copy" of a sequence that works by inserting a mask in front + of the underlying sequence, so that you can mutate it without changing + the underlying sequence. Effectively behaves as if you could do list(x) + in O(1) time. The full list API is not supported yet but there's no reason + in principle it couldn't be.""" + + def __init__(self, values: Sequence[T]): + self.__values = values + self.__len = len(values) + self.__mask: dict[int, T] | None = None + self.__popped_indices: SortedList[int] | None = None + + def __len__(self) -> int: + if self.__popped_indices is None: + return self.__len + return self.__len - len(self.__popped_indices) + + def pop(self, i: int = -1) -> T: + if len(self) == 0: + raise IndexError("Cannot pop from empty list") + i = self.__underlying_index(i) + + v = None + if self.__mask is not None: + v = self.__mask.pop(i, None) + if v is None: + v = self.__values[i] + + if self.__popped_indices is None: + self.__popped_indices = SortedList() + self.__popped_indices.add(i) + return v + + def swap(self, i: int, j: int) -> None: + """Swap the elements ls[i], ls[j].""" + if i == j: + return + self[i], self[j] = self[j], self[i] + + def __getitem__(self, i: int) -> T: + i = self.__underlying_index(i) + + default = self.__values[i] + if self.__mask is None: + return default + else: + return self.__mask.get(i, default) + + def __setitem__(self, i: int, v: T) -> None: + i = self.__underlying_index(i) + if self.__mask is None: + self.__mask = {} + self.__mask[i] = v + + def __underlying_index(self, i: int) -> int: + n = len(self) + if i < -n or i >= n: + raise IndexError(f"Index {i} out of range [0, {n})") + if i < 0: + i += n + assert 0 <= i < n + + if self.__popped_indices is not None: + # given an index i in the popped representation of the list, compute + # its corresponding index in the underlying list. given + # l = [1, 4, 2, 10, 188] + # l.pop(3) + # l.pop(1) + # assert l == [1, 2, 188] + # + # we want l[i] == self.__values[f(i)], where f is this function. + assert len(self.__popped_indices) <= len(self.__values) + + for idx in self.__popped_indices: + if idx > i: + break + i += 1 + return i + + # even though we have len + getitem, mypyc requires iter. + def __iter__(self) -> Iterable[T]: + for i in range(len(self)): + yield self[i] + + +def stack_depth_of_caller() -> int: + """Get stack size for caller's frame. + + From https://stackoverflow.com/a/47956089/9297601 , this is a simple + but much faster alternative to `len(inspect.stack(0))`. We use it + with get/set recursionlimit to make stack overflows non-flaky; see + https://github.com/HypothesisWorks/hypothesis/issues/2494 for details. + """ + frame = sys._getframe(2) + size = 1 + while frame: + frame = frame.f_back # type: ignore[assignment] + size += 1 + return size + + +class StackframeLimiter: + # StackframeLimiter is used to make the recursion limit warning issued via + # ensure_free_stackframes thread-safe. We track the known values we have + # passed to sys.setrecursionlimit in _known_limits, and only issue a warning + # if sys.getrecursionlimit is not in _known_limits. + # + # This will always be an under-approximation of when we would ideally issue + # this warning, since a non-hypothesis caller could coincidentaly set the + # recursion limit to one of our known limits. Currently, StackframeLimiter + # resets _known_limits whenever all of the ensure_free_stackframes contexts + # have exited. We could increase the power of the warning by tracking a + # refcount for each limit, and removing it as soon as the refcount hits zero. + # I didn't think this extra complexity is worth the minor power increase for + # what is already only a "nice to have" warning. + + def __init__(self): + self._active_contexts = 0 + self._known_limits: set[int] = set() + self._original_limit: int | None = None + + def _setrecursionlimit(self, new_limit: int, *, check: bool = True) -> None: + if ( + check + and (current_limit := sys.getrecursionlimit()) not in self._known_limits + ): + warnings.warn( + "The recursion limit will not be reset, since it was changed " + f"during test execution (from {self._original_limit} to {current_limit}).", + HypothesisWarning, + stacklevel=4, + ) + return + + self._known_limits.add(new_limit) + sys.setrecursionlimit(new_limit) + + def enter_context(self, new_limit: int, *, current_limit: int) -> None: + if self._active_contexts == 0: + # this is the first context on the stack. Record the true original + # limit, to restore later. + assert self._original_limit is None + self._original_limit = current_limit + self._known_limits.add(self._original_limit) + + self._active_contexts += 1 + self._setrecursionlimit(new_limit) + + def exit_context(self, new_limit: int, *, check: bool = True) -> None: + assert self._active_contexts > 0 + self._active_contexts -= 1 + + if self._active_contexts == 0: + # this is the last context to exit. Restore the true original + # limit and clear our known limits. + original_limit = self._original_limit + assert original_limit is not None + try: + self._setrecursionlimit(original_limit, check=check) + finally: + self._original_limit = None + # we want to clear the known limits, but preserve the limit + # we just set it to as known. + self._known_limits = {original_limit} + else: + self._setrecursionlimit(new_limit, check=check) + + +_stackframe_limiter = StackframeLimiter() +_stackframe_limiter_lock = Lock() + + +class ensure_free_stackframes: + """Context manager that ensures there are at least N free stackframes (for + a reasonable value of N). + """ + + def __enter__(self) -> None: + cur_depth = stack_depth_of_caller() + with _stackframe_limiter_lock: + self.old_limit = sys.getrecursionlimit() + # The default CPython recursionlimit is 1000, but pytest seems to bump + # it to 3000 during test execution. Let's make it something reasonable: + self.new_limit = cur_depth + 2000 + # Because we add to the recursion limit, to be good citizens we also + # add a check for unbounded recursion. The default limit is typically + # 1000/3000, so this can only ever trigger if something really strange + # is happening and it's hard to imagine an + # intentionally-deeply-recursive use of this code. + assert cur_depth <= 1000, ( + "Hypothesis would usually add %d to the stack depth of %d here, " + "but we are already much deeper than expected. Aborting now, to " + "avoid extending the stack limit in an infinite loop..." + % (self.new_limit - self.old_limit, self.old_limit) + ) + try: + _stackframe_limiter.enter_context( + self.new_limit, current_limit=self.old_limit + ) + except Exception: + # if the stackframe limiter raises a HypothesisWarning (under eg + # -Werror), __exit__ is not called, since we errored in __enter__. + # Preserve the state of the stackframe limiter by exiting, and + # avoid showing a duplicate warning with check=False. + _stackframe_limiter.exit_context(self.old_limit, check=False) + raise + + def __exit__(self, *args, **kwargs): + with _stackframe_limiter_lock: + _stackframe_limiter.exit_context(self.old_limit) + + +def find_integer(f: Callable[[int], bool]) -> int: + """Finds a (hopefully large) integer such that f(n) is True and f(n + 1) is + False. + + f(0) is assumed to be True and will not be checked. + """ + # We first do a linear scan over the small numbers and only start to do + # anything intelligent if f(4) is true. This is because it's very hard to + # win big when the result is small. If the result is 0 and we try 2 first + # then we've done twice as much work as we needed to! + for i in range(1, 5): + if not f(i): + return i - 1 + + # We now know that f(4) is true. We want to find some number for which + # f(n) is *not* true. + # lo is the largest number for which we know that f(lo) is true. + lo = 4 + + # Exponential probe upwards until we find some value hi such that f(hi) + # is not true. Subsequently we maintain the invariant that hi is the + # smallest number for which we know that f(hi) is not true. + hi = 5 + while f(hi): + lo = hi + hi *= 2 + + # Now binary search until lo + 1 = hi. At that point we have f(lo) and not + # f(lo + 1), as desired.. + while lo + 1 < hi: + mid = (lo + hi) // 2 + if f(mid): + lo = mid + else: + hi = mid + return lo + + +class NotFound(Exception): + pass + + +class SelfOrganisingList(Generic[T]): + """A self-organising list with the move-to-front heuristic. + + A self-organising list is a collection which we want to retrieve items + that satisfy some predicate from. There is no faster way to do this than + a linear scan (as the predicates may be arbitrary), but the performance + of a linear scan can vary dramatically - if we happen to find a good item + on the first try it's O(1) after all. The idea of a self-organising list is + to reorder the list to try to get lucky this way as often as possible. + + There are various heuristics we could use for this, and it's not clear + which are best. We use the simplest, which is that every time we find + an item we move it to the "front" (actually the back in our implementation + because we iterate in reverse) of the list. + + """ + + def __init__(self, values: Iterable[T] = ()) -> None: + self.__values = list(values) + + def __repr__(self) -> str: + return f"SelfOrganisingList({self.__values!r})" + + def add(self, value: T) -> None: + """Add a value to this list.""" + self.__values.append(value) + + def find(self, condition: Callable[[T], bool]) -> T: + """Returns some value in this list such that ``condition(value)`` is + True. If no such value exists raises ``NotFound``.""" + for i in range(len(self.__values) - 1, -1, -1): + value = self.__values[i] + if condition(value): + del self.__values[i] + self.__values.append(value) + return value + raise NotFound("No values satisfying condition") + + +_gc_initialized = False +_gc_start: float = 0 +_gc_cumulative_time: float = 0 + +# Since gc_callback potentially runs in test context, and perf_counter +# might be monkeypatched, we store a reference to the real one. +_perf_counter = time.perf_counter + + +def gc_cumulative_time() -> float: + global _gc_initialized + + # I don't believe we need a lock for the _gc_cumulative_time increment here, + # since afaik each gc callback is only executed once when the garbage collector + # runs, by the thread which initiated the gc. + + if not _gc_initialized: + if hasattr(gc, "callbacks"): + # CPython + def gc_callback( + phase: Literal["start", "stop"], info: dict[str, int] + ) -> None: + global _gc_start, _gc_cumulative_time + try: + now = _perf_counter() + if phase == "start": + _gc_start = now + elif phase == "stop" and _gc_start > 0: + _gc_cumulative_time += now - _gc_start # pragma: no cover # ?? + except RecursionError: # pragma: no cover + # Avoid flakiness via UnraisableException, which is caught and + # warned by pytest. The actual callback (this function) is + # validated to never trigger a RecursionError itself when + # when called by gc.collect. + # Anyway, we should hit the same error on "start" + # and "stop", but to ensure we don't get out of sync we just + # signal that there is no matching start. + _gc_start = 0 + return + + gc.callbacks.insert(0, gc_callback) + elif hasattr(gc, "hooks"): # pragma: no cover # pypy only + # PyPy + def hook(stats: Any) -> None: + global _gc_cumulative_time + try: + _gc_cumulative_time += stats.duration + except RecursionError: + pass + + if gc.hooks.on_gc_minor is None: + gc.hooks.on_gc_minor = hook + if gc.hooks.on_gc_collect_step is None: + gc.hooks.on_gc_collect_step = hook + + _gc_initialized = True + + return _gc_cumulative_time + + +def startswith(l1: Sequence[T], l2: Sequence[T]) -> bool: + if len(l1) < len(l2): + return False + return all(v1 == v2 for v1, v2 in zip(l1[: len(l2)], l2, strict=False)) + + +def endswith(l1: Sequence[T], l2: Sequence[T]) -> bool: + if len(l1) < len(l2): + return False + return all(v1 == v2 for v1, v2 in zip(l1[-len(l2) :], l2, strict=False)) + + +def bits_to_bytes(n: int) -> int: + """The number of bytes required to represent an n-bit number. + Equivalent to (n + 7) // 8, but slightly faster. This really is + called enough times that that matters.""" + return (n + 7) >> 3 diff --git a/vendored/hypothesis/internal/conjecture/optimiser.py b/vendored/hypothesis/internal/conjecture/optimiser.py new file mode 100644 index 0000000..2d0b738 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/optimiser.py @@ -0,0 +1,204 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.compat import int_from_bytes, int_to_bytes +from hypothesis.internal.conjecture.choice import ChoiceT, choice_permitted +from hypothesis.internal.conjecture.data import ConjectureResult, Status, _Overrun +from hypothesis.internal.conjecture.engine import ConjectureRunner +from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes, find_integer +from hypothesis.internal.conjecture.pareto import NO_SCORE + + +class Optimiser: + """A fairly basic optimiser designed to increase the value of scores for + targeted property-based testing. + + This implements a fairly naive hill climbing algorithm based on randomly + regenerating parts of the test case to attempt to improve the result. It is + not expected to produce amazing results, because it is designed to be run + in a fairly small testing budget, so it prioritises finding easy wins and + bailing out quickly if that doesn't work. + + For more information about targeted property-based testing, see + Löscher, Andreas, and Konstantinos Sagonas. "Targeted property-based + testing." Proceedings of the 26th ACM SIGSOFT International Symposium on + Software Testing and Analysis. ACM, 2017. + """ + + def __init__( + self, + engine: ConjectureRunner, + data: ConjectureResult, + target: str, + max_improvements: int = 100, + ) -> None: + """Optimise ``target`` starting from ``data``. Will stop either when + we seem to have found a local maximum or when the target score has + been improved ``max_improvements`` times. This limit is in place to + deal with the fact that the target score may not be bounded above.""" + self.engine = engine + self.current_data = data + self.target = target + self.max_improvements = max_improvements + self.improvements = 0 + + def run(self) -> None: + self.hill_climb() + + def score_function(self, data: ConjectureResult) -> float: + return data.target_observations.get(self.target, NO_SCORE) + + @property + def current_score(self) -> float: + return self.score_function(self.current_data) + + def consider_new_data(self, data: ConjectureResult | _Overrun) -> bool: + """Consider a new data object as a candidate target. If it is better + than the current one, return True.""" + if data.status < Status.VALID: + return False + assert isinstance(data, ConjectureResult) + score = self.score_function(data) + if score < self.current_score: + return False + if score > self.current_score: + self.improvements += 1 + self.current_data = data + return True + assert score == self.current_score + # We allow transitions that leave the score unchanged as long as they + # don't increase the number of nodes. This gives us a certain amount of + # freedom for lateral moves that will take us out of local maxima. + if len(data.nodes) <= len(self.current_data.nodes): + self.current_data = data + return True + return False + + def hill_climb(self) -> None: + """The main hill climbing loop where we actually do the work: Take + data, and attempt to improve its score for target. select_example takes + a data object and returns an index to an example where we should focus + our efforts.""" + + nodes_examined = set() + + prev: ConjectureResult | None = None + i = len(self.current_data.nodes) - 1 + while i >= 0 and self.improvements <= self.max_improvements: + if prev is not self.current_data: + i = len(self.current_data.nodes) - 1 + prev = self.current_data + + if i in nodes_examined: + i -= 1 + continue + + nodes_examined.add(i) + node = self.current_data.nodes[i] + assert node.index is not None + # we can only (sensibly & easily) define hill climbing for + # numeric-style nodes. It's not clear hill-climbing a string is + # useful, for instance. + if node.type not in {"integer", "float", "bytes", "boolean"}: + continue + + def attempt_replace(k: int) -> bool: + """ + Try replacing the current node in the current best test case + with a value which is "k times larger", where the exact notion + of "larger" depends on the choice_type. + + Note that we use the *current* best and not the one we started with. + This helps ensure that if we luck into a good draw when making + random choices we get to keep the good bits. + """ + # we don't want to infinitely drive up an unbounded score. + if abs(k) > 2**20: + return False + + node = self.current_data.nodes[i] + assert node.index is not None + if node.was_forced: + return False # pragma: no cover + + new_choice: ChoiceT + if node.type in {"integer", "float"}: + assert isinstance(node.value, (int, float)) + new_choice = node.value + k + elif node.type == "boolean": + assert isinstance(node.value, bool) + if abs(k) > 1: + return False + if k == -1: + new_choice = False + if k == 1: + new_choice = True + if k == 0: # pragma: no cover + new_choice = node.value + else: + assert node.type == "bytes" + assert isinstance(node.value, bytes) + v = int_from_bytes(node.value) + # can't go below zero for bytes + if v + k < 0: + return False + v += k + # allow adding k to increase the number of bytes. we don't want + # to decrease so that b"01" doesn't turn into b"1". + size = max(len(node.value), bits_to_bytes(v.bit_length())) + new_choice = int_to_bytes(v, size) + + if not choice_permitted(new_choice, node.constraints): + return False + + for _ in range(3): + choices = self.current_data.choices + attempt_choices = ( + choices[: node.index] + + (new_choice,) + + choices[node.index + 1 :] + ) + attempt = self.engine.cached_test_function( + attempt_choices, extend="full" + ) + + if self.consider_new_data(attempt): + return True + + if attempt.status is Status.OVERRUN: + return False + + assert isinstance(attempt, ConjectureResult) + if len(attempt.nodes) == len(self.current_data.nodes): + return False + + for j, ex in enumerate(self.current_data.spans): + if ex.start >= node.index + 1: + break # pragma: no cover + if ex.end <= node.index: + continue + ex_attempt = attempt.spans[j] + if ex.choice_count == ex_attempt.choice_count: + continue # pragma: no cover + replacement = attempt.choices[ex_attempt.start : ex_attempt.end] + if self.consider_new_data( + self.engine.cached_test_function( + choices[: node.index] + + replacement + + self.current_data.choices[ex.end :] + ) + ): + return True + return False + + # we don't know whether a target score increases or decreases with + # respect to the value of some node, so try both directions. + find_integer(lambda k: attempt_replace(k)) + find_integer(lambda k: attempt_replace(-k)) diff --git a/vendored/hypothesis/internal/conjecture/pareto.py b/vendored/hypothesis/internal/conjecture/pareto.py new file mode 100644 index 0000000..7c39d9f --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/pareto.py @@ -0,0 +1,361 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections.abc import Callable, Iterator +from enum import Enum +from random import Random +from typing import TYPE_CHECKING + +from sortedcontainers import SortedList + +from hypothesis.internal.conjecture.choice import choices_key +from hypothesis.internal.conjecture.data import ( + ConjectureData, + ConjectureResult, + Status, + _Overrun, +) +from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy +from hypothesis.internal.conjecture.shrinker import sort_key + +NO_SCORE = float("-inf") + +if TYPE_CHECKING: + from hypothesis.internal.conjecture.engine import ConjectureRunner + + +class DominanceRelation(Enum): + NO_DOMINANCE = 0 + EQUAL = 1 + LEFT_DOMINATES = 2 + RIGHT_DOMINATES = 3 + + +def dominance(left: ConjectureResult, right: ConjectureResult) -> DominanceRelation: + """Returns the dominance relation between ``left`` and ``right``, according + to the rules that one ConjectureResult dominates another if and only if it + is better in every way. + + The things we currently consider to be "better" are: + + * Something that is smaller in shrinking order is better. + * Something that has higher status is better. + * Each ``interesting_origin`` is treated as its own score, so if two + interesting examples have different origins then neither dominates + the other. + * For each target observation, a higher score is better. + + In "normal" operation where there are no bugs or target observations, the + pareto front only has one element (the smallest valid test case), but for + more structured or failing tests it can be useful to track, and future work + will depend on it more.""" + + left_key = sort_key(left.nodes) + right_key = sort_key(right.nodes) + if left_key == right_key: + return DominanceRelation.EQUAL + + if right_key < left_key: + result = dominance(left=right, right=left) + if result == DominanceRelation.LEFT_DOMINATES: + return DominanceRelation.RIGHT_DOMINATES + else: + # Because we have sort_key(left) < sort_key(right) the only options + # are that right is better than left or that the two are + # incomparable. + assert result == DominanceRelation.NO_DOMINANCE + return result + + # Either left is better or there is no dominance relationship. + assert left_key < right_key + + # The right is more interesting + if left.status < right.status: + return DominanceRelation.NO_DOMINANCE + + if not right.tags.issubset(left.tags): + return DominanceRelation.NO_DOMINANCE + + # Things that are interesting for different reasons are incomparable in + # the dominance relationship. + if ( + left.status == Status.INTERESTING + and right.interesting_origin is not None + and left.interesting_origin != right.interesting_origin + ): + return DominanceRelation.NO_DOMINANCE + + for target in set(left.target_observations) | set(right.target_observations): + left_score = left.target_observations.get(target, NO_SCORE) + right_score = right.target_observations.get(target, NO_SCORE) + if right_score > left_score: + return DominanceRelation.NO_DOMINANCE + + return DominanceRelation.LEFT_DOMINATES + + +class ParetoFront: + """Maintains an approximate pareto front of ConjectureData objects. That + is, we try to maintain a collection of objects such that no element of the + collection is pareto dominated by any other. In practice we don't quite + manage that, because doing so is computationally very expensive. Instead + we maintain a random sample of data objects that are "rarely" dominated by + any other element of the collection (roughly, no more than about 10%). + + Only valid test cases are considered to belong to the pareto front - any + test case with a status less than valid is discarded. + + Note that the pareto front is potentially quite large, and currently this + will store the entire front in memory. This is bounded by the number of + valid examples we run, which is max_examples in normal execution, and + currently we do not support workflows with large max_examples which have + large values of max_examples very well anyway, so this isn't a major issue. + In future we may weish to implement some sort of paging out to disk so that + we can work with larger fronts. + + Additionally, because this is only an approximate pareto front, there are + scenarios where it can be much larger than the actual pareto front. There + isn't a huge amount we can do about this - checking an exact pareto front + is intrinsically quadratic. + + "Most" of the time we should be relatively close to the true pareto front, + say within an order of magnitude, but it's not hard to construct scenarios + where this is not the case. e.g. suppose we enumerate all valid test cases + in increasing shortlex order as s_1, ..., s_n, ... and have scores f and + g such that f(s_i) = min(i, N) and g(s_i) = 1 if i >= N, then the pareto + front is the set {s_1, ..., S_N}, but the only element of the front that + will dominate s_i when i > N is S_N, which we select with probability + 1 / N. A better data structure could solve this, but at the cost of more + expensive operations and higher per element memory use, so we'll wait to + see how much of a problem this is in practice before we try that. + """ + + def __init__(self, random: Random) -> None: + self.__random = random + self.__eviction_listeners: list[Callable[[ConjectureResult], None]] = [] + + self.front: SortedList[ConjectureResult] = SortedList( + key=lambda d: sort_key(d.nodes) + ) + self.__pending: ConjectureResult | None = None + + def add(self, data: ConjectureData | ConjectureResult | _Overrun) -> bool: + """Attempts to add ``data`` to the pareto front. Returns True if + ``data`` is now in the front, including if data is already in the + collection, and False otherwise""" + if data.status < Status.VALID: + return False + + assert not isinstance(data, _Overrun) + data = data.as_result() + assert not isinstance(data, _Overrun) + + if not self.front: + self.front.add(data) + return True + + if data in self.front: + return True + + # We add data to the pareto front by adding it unconditionally and then + # doing a certain amount of randomized "clear down" - testing a random + # set of elements (currently 10) to see if they are dominated by + # something else in the collection. If they are, we remove them. + self.front.add(data) + assert self.__pending is None + try: + self.__pending = data + + # We maintain a set of the current exact pareto front of the + # values we've sampled so far. When we sample a new element we + # either add it to this exact pareto front or remove it from the + # collection entirely. + front = LazySequenceCopy(self.front) + + # We track which values we are going to remove and remove them all + # at the end so the shape of the front doesn't change while we're + # using it. + to_remove: list[ConjectureResult] = [] + + # We now iteratively sample elements from the approximate pareto + # front to check whether they should be retained. When the set of + # dominators gets too large we have sampled at least 10 elements + # and it gets too expensive to continue, so we consider that enough + # due diligence. + i = self.front.index(data) + + # First we attempt to look for values that must be removed by the + # addition of the data. These are necessarily to the right of it + # in the list. + + failures = 0 + while i + 1 < len(front) and failures < 10: + j = self.__random.randrange(i + 1, len(front)) + candidate = front.pop(j) + dom = dominance(data, candidate) + assert dom != DominanceRelation.RIGHT_DOMINATES + if dom == DominanceRelation.LEFT_DOMINATES: + to_remove.append(candidate) + failures = 0 + else: + failures += 1 + + # Now we look at the points up to where we put data in to see if + # it is dominated. While we're here we spend some time looking for + # anything else that might be dominated too, compacting down parts + # of the list. + + dominators = [data] + + while i >= 0 and len(dominators) < 10: + front.swap(i, self.__random.randint(0, i)) + + candidate = front[i] + + already_replaced = False + j = 0 + while j < len(dominators): + v = dominators[j] + + dom = dominance(candidate, v) + if dom == DominanceRelation.LEFT_DOMINATES: + if not already_replaced: + already_replaced = True + dominators[j] = candidate + j += 1 + else: # pragma: no cover # flaky, by test_database_contains_only_pareto_front + dominators[j], dominators[-1] = ( + dominators[-1], + dominators[j], + ) + dominators.pop() + to_remove.append(v) + elif dom == DominanceRelation.RIGHT_DOMINATES: + to_remove.append(candidate) + break + elif dom == DominanceRelation.EQUAL: + break + else: + j += 1 + else: + dominators.append(candidate) + i -= 1 + + for v in to_remove: + self._remove(v) + return data in self.front + finally: + self.__pending = None + + def on_evict(self, f: Callable[[ConjectureResult], None]) -> None: + """Register a listener function that will be called with data when it + gets removed from the front because something else dominates it.""" + self.__eviction_listeners.append(f) + + def __contains__(self, data: object) -> bool: + if not isinstance(data, (ConjectureData, ConjectureResult)): + return False + + result = data.as_result() + if isinstance(result, _Overrun): + return False + + return result in self.front + + def __iter__(self) -> Iterator[ConjectureResult]: + return iter(self.front) + + def __getitem__(self, i: int) -> ConjectureResult: + return self.front[i] + + def __len__(self) -> int: + return len(self.front) + + def _remove(self, data: ConjectureResult) -> None: + try: + self.front.remove(data) + except ValueError: + return + if data is not self.__pending: + for f in self.__eviction_listeners: + f(data) + + +class ParetoOptimiser: + """Class for managing optimisation of the pareto front. That is, given the + current best known pareto front, this class runs an optimisation process + that attempts to bring it closer to the actual pareto front. + + Currently this is fairly basic and only handles pareto optimisation that + works by reducing the test case in the shortlex order. We expect it will + grow more powerful over time. + """ + + def __init__(self, engine: "ConjectureRunner") -> None: + self.__engine = engine + assert self.__engine.pareto_front is not None + self.front: ParetoFront = self.__engine.pareto_front + + def run(self) -> None: + seen = set() + + # We iterate backwards through the pareto front, using the shrinker to + # (hopefully) replace each example with a smaller one. Note that it's + # important that we start from the end for two reasons: Firstly, by + # doing it this way we ensure that any new front members we discover + # during optimisation will also get optimised (because they will be + # inserted into the part of the front that we haven't visited yet), + # and secondly we generally expect that we will not finish this process + # in a single run, because it's relatively expensive in terms of our + # example budget, and by starting from the end we ensure that each time + # we run the tests we improve the pareto front because we work on the + # bits that we haven't covered yet. + i = len(self.front) - 1 + prev = None + while i >= 0 and not self.__engine.interesting_examples: + assert self.front + i = min(i, len(self.front) - 1) + target = self.front[i] + if choices_key(target.choices) in seen: + i -= 1 + continue + assert target is not prev + prev = target + + def allow_transition(source, destination): + """Shrink to data that strictly pareto dominates the current + best value we've seen, which is the current target of the + shrinker. + + Note that during shrinking we may discover other smaller + examples that this function will reject and will get added to + the front. This is fine, because they will be processed on + later iterations of this loop.""" + if dominance(destination, source) == DominanceRelation.LEFT_DOMINATES: + # If ``destination`` dominates ``source`` then ``source`` + # must be dominated in the front - either ``destination`` is in + # the front, or it was not added to it because it was + # dominated by something in it. + self.front._remove(source) + return True + return False + + shrunk = self.__engine.shrink(target, allow_transition=allow_transition) + seen.add(choices_key(shrunk.choices)) + + # Note that the front may have changed shape arbitrarily when + # we ran the shrinker. If it didn't change shape then this is + # i - 1. If it did change shape then this is the largest value + # in the front which is smaller than the previous target, so + # is the correct place to resume from. In particular note that the + # size of the front might have grown because of slippage during the + # shrink, but all of the newly introduced elements will be smaller + # than `target`, so will be covered by this iteration. + i = self.front.front.bisect_left(target) diff --git a/vendored/hypothesis/internal/conjecture/provider_conformance.py b/vendored/hypothesis/internal/conjecture/provider_conformance.py new file mode 100644 index 0000000..558bb68 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/provider_conformance.py @@ -0,0 +1,502 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +import sys +from collections.abc import Collection, Iterable, Sequence +from typing import Any + +from hypothesis import ( + HealthCheck, + assume, + note, + settings as Settings, + strategies as st, +) +from hypothesis.errors import BackendCannotProceed +from hypothesis.internal.compat import batched +from hypothesis.internal.conjecture.choice import ( + ChoiceTypeT, + choice_permitted, +) +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.providers import ( + COLLECTION_DEFAULT_MAX_SIZE, + HypothesisProvider, + PrimitiveProvider, + with_register_backend, +) +from hypothesis.internal.floats import SMALLEST_SUBNORMAL, sign_aware_lte +from hypothesis.internal.intervalsets import IntervalSet +from hypothesis.stateful import RuleBasedStateMachine, initialize, precondition, rule +from hypothesis.strategies import DrawFn, SearchStrategy +from hypothesis.strategies._internal.strings import OneCharStringStrategy, TextStrategy + + +def build_intervals(intervals: list[int]) -> list[tuple[int, int]]: + if len(intervals) % 2: + intervals = intervals[:-1] + intervals.sort() + return list(batched(intervals, 2, strict=True)) + + +def interval_lists( + *, min_codepoint: int = 0, max_codepoint: int = sys.maxunicode, min_size: int = 0 +) -> SearchStrategy[Iterable[Sequence[int]]]: + return ( + st.lists( + st.integers(min_codepoint, max_codepoint), + unique=True, + min_size=min_size * 2, + ) + .map(sorted) + .map(build_intervals) + ) + + +def intervals( + *, min_codepoint: int = 0, max_codepoint: int = sys.maxunicode, min_size: int = 0 +) -> SearchStrategy[IntervalSet]: + return st.builds( + IntervalSet, + interval_lists( + min_codepoint=min_codepoint, max_codepoint=max_codepoint, min_size=min_size + ), + ) + + +@st.composite +def integer_weights( + draw: DrawFn, min_value: int | None = None, max_value: int | None = None +) -> dict[int, float]: + # Sampler doesn't play well with super small floats, so exclude them + weights = draw( + st.dictionaries( + st.integers(min_value=min_value, max_value=max_value), + st.floats(0.001, 1), + min_size=1, + max_size=255, + ) + ) + # invalid to have a weighting that disallows all possibilities + assume(sum(weights.values()) != 0) + # re-normalize probabilities to sum to some arbitrary target < 1 + target = draw(st.floats(0.001, 0.999)) + factor = target / sum(weights.values()) + weights = {k: v * factor for k, v in weights.items()} + # float rounding error can cause this to fail. + assume(0.001 <= sum(weights.values()) <= 0.999) + return weights + + +@st.composite +def integer_constraints( + draw, + *, + use_min_value=None, + use_max_value=None, + use_shrink_towards=None, + use_weights=None, + use_forced=False, +): + min_value = None + max_value = None + shrink_towards = 0 + weights = None + + if use_min_value is None: + use_min_value = draw(st.booleans()) + if use_max_value is None: + use_max_value = draw(st.booleans()) + use_shrink_towards = draw(st.booleans()) + if use_weights is None: + use_weights = ( + draw(st.booleans()) if (use_min_value and use_max_value) else False + ) + + # Invariants: + # (1) min_value <= forced <= max_value + # (2) sum(weights.values()) < 1 + # (3) len(weights) <= 255 + + if use_shrink_towards: + shrink_towards = draw(st.integers()) + + forced = draw(st.integers()) if use_forced else None + if use_weights: + assert use_max_value + assert use_min_value + + min_value = draw(st.integers(max_value=forced)) + min_val = max(min_value, forced) if forced is not None else min_value + max_value = draw(st.integers(min_value=min_val)) + + weights = draw(integer_weights(min_value, max_value)) + else: + if use_min_value: + min_value = draw(st.integers(max_value=forced)) + if use_max_value: + min_vals = [] + if min_value is not None: + min_vals.append(min_value) + if forced is not None: + min_vals.append(forced) + min_val = max(min_vals) if min_vals else None + max_value = draw(st.integers(min_value=min_val)) + + if forced is not None: + assume((forced - shrink_towards).bit_length() < 128) + + return { + "min_value": min_value, + "max_value": max_value, + "shrink_towards": shrink_towards, + "weights": weights, + "forced": forced, + } + + +@st.composite +def _collection_constraints( + draw: DrawFn, + *, + forced: Any | None, + use_min_size: bool | None = None, + use_max_size: bool | None = None, +) -> dict[str, int]: + min_size = 0 + max_size = COLLECTION_DEFAULT_MAX_SIZE + # collections are quite expensive in entropy. cap to avoid overruns. + cap = 50 + + if use_min_size is None: + use_min_size = draw(st.booleans()) + if use_max_size is None: + use_max_size = draw(st.booleans()) + + if use_min_size: + min_size = draw( + st.integers(0, min(len(forced), cap) if forced is not None else cap) + ) + + if use_max_size: + max_size = draw( + st.integers( + min_value=min_size if forced is None else max(min_size, len(forced)) + ) + ) + if forced is None: + # cap to some reasonable max size to avoid overruns. + max_size = min(max_size, min_size + 100) + + return {"min_size": min_size, "max_size": max_size} + + +@st.composite +def string_constraints( + draw: DrawFn, + *, + use_min_size: bool | None = None, + use_max_size: bool | None = None, + use_forced: bool = False, +) -> Any: + interval_set = draw(intervals()) + forced = ( + draw(TextStrategy(OneCharStringStrategy(interval_set))) if use_forced else None + ) + constraints = draw( + _collection_constraints( + forced=forced, use_min_size=use_min_size, use_max_size=use_max_size + ) + ) + # if the intervalset is empty, then the min size must be zero, because the + # only valid value is the empty string. + if len(interval_set) == 0: + constraints["min_size"] = 0 + + return {"intervals": interval_set, "forced": forced, **constraints} + + +@st.composite +def bytes_constraints( + draw: DrawFn, + *, + use_min_size: bool | None = None, + use_max_size: bool | None = None, + use_forced: bool = False, +) -> Any: + forced = draw(st.binary()) if use_forced else None + + constraints = draw( + _collection_constraints( + forced=forced, use_min_size=use_min_size, use_max_size=use_max_size + ) + ) + return {"forced": forced, **constraints} + + +@st.composite +def float_constraints( + draw, + *, + use_min_value=None, + use_max_value=None, + use_forced=False, +): + if use_min_value is None: + use_min_value = draw(st.booleans()) + if use_max_value is None: + use_max_value = draw(st.booleans()) + + forced = draw(st.floats()) if use_forced else None + pivot = forced if (use_forced and not math.isnan(forced)) else None + min_value = -math.inf + max_value = math.inf + smallest_nonzero_magnitude = SMALLEST_SUBNORMAL + allow_nan = True if (use_forced and math.isnan(forced)) else draw(st.booleans()) + + if use_min_value: + min_value = draw(st.floats(max_value=pivot, allow_nan=False)) + + if use_max_value: + if pivot is None: + min_val = min_value + else: + min_val = pivot if sign_aware_lte(min_value, pivot) else min_value + max_value = draw(st.floats(min_value=min_val, allow_nan=False)) + + largest_magnitude = max(abs(min_value), abs(max_value)) + # can't force something smaller than our smallest magnitude. + if pivot is not None and pivot != 0.0: + largest_magnitude = min(largest_magnitude, pivot) + + # avoid drawing from an empty range + if largest_magnitude > 0: + smallest_nonzero_magnitude = draw( + st.floats( + min_value=0, + # smallest_nonzero_magnitude breaks internal clamper invariants if + # it is allowed to be larger than the magnitude of {min, max}_value. + # + # Let's also be reasonable here; smallest_nonzero_magnitude is used + # for subnormals, so we will never provide a number above 1 in practice. + max_value=min(largest_magnitude, 1.0), + exclude_min=True, + ) + ) + + assert sign_aware_lte(min_value, max_value) + return { + "min_value": min_value, + "max_value": max_value, + "forced": forced, + "allow_nan": allow_nan, + "smallest_nonzero_magnitude": smallest_nonzero_magnitude, + } + + +@st.composite +def boolean_constraints(draw: DrawFn, *, use_forced: bool = False) -> Any: + forced = draw(st.booleans()) if use_forced else None + # avoid invalid forced combinations + p = draw(st.floats(0, 1, exclude_min=forced is True, exclude_max=forced is False)) + + return {"p": p, "forced": forced} + + +def constraints_strategy(choice_type, strategy_constraints=None, *, use_forced=False): + strategy = { + "boolean": boolean_constraints, + "integer": integer_constraints, + "float": float_constraints, + "bytes": bytes_constraints, + "string": string_constraints, + }[choice_type] + if strategy_constraints is None: + strategy_constraints = {} + return strategy(**strategy_constraints.get(choice_type, {}), use_forced=use_forced) + + +def choice_types_constraints(strategy_constraints=None, *, use_forced=False): + options: list[ChoiceTypeT] = ["boolean", "integer", "float", "bytes", "string"] + return st.one_of( + st.tuples( + st.just(name), + constraints_strategy(name, strategy_constraints, use_forced=use_forced), + ) + for name in options + ) + + +def run_conformance_test( + Provider: type[PrimitiveProvider], + *, + context_manager_exceptions: Collection[type[BaseException]] = (), + settings: Settings | None = None, + _realize_objects: SearchStrategy[Any] = ( + st.from_type(object) | st.from_type(type).flatmap(st.from_type) + ), +) -> None: + """ + Test that the given ``Provider`` class conforms to the |PrimitiveProvider| + interface. + + For instance, this tests that ``Provider`` does not return out of bounds + choices from any of the ``draw_*`` methods, or violate other invariants + which Hypothesis depends on. + + This function is intended to be called at test-time, not at runtime. It is + provided by Hypothesis to make it easy for third-party backend authors to + test their provider. Backend authors wishing to test their provider should + include a test similar to the following in their test suite: + + .. code-block:: python + + from hypothesis.internal.conjecture.provider_conformance import run_conformance_test + + def test_conformance(): + run_conformance_test(MyProvider) + + If your provider can raise control flow exceptions inside one of the five + ``draw_*`` methods that are handled by your provider's + ``per_test_case_context_manager``, pass a list of these exceptions types to + ``context_manager_exceptions``. Otherwise, ``run_conformance_test`` will + treat those exceptions as fatal errors. + """ + + class CopiesRealizationProvider(HypothesisProvider): + avoid_realization = Provider.avoid_realization + + with with_register_backend("copies_realization", CopiesRealizationProvider): + + @Settings( + settings, + suppress_health_check=[HealthCheck.too_slow], + backend="copies_realization", + ) + class ProviderConformanceTest(RuleBasedStateMachine): + def __init__(self): + super().__init__() + + @initialize(random=st.randoms()) + def setup(self, random): + if Provider.lifetime == "test_case": + data = ConjectureData(random=random, provider=Provider) + self.provider = data.provider + else: + self.provider = Provider(None) + + self.context_manager = self.provider.per_test_case_context_manager() + self.context_manager.__enter__() + self.frozen = False + + def _draw(self, choice_type, constraints): + del constraints["forced"] + draw_func = getattr(self.provider, f"draw_{choice_type}") + + try: + choice = draw_func(**constraints) + note(f"drew {choice_type} {choice}") + expected_type = { + "integer": int, + "float": float, + "bytes": bytes, + "string": str, + "boolean": bool, + }[choice_type] + assert isinstance(choice, expected_type) + assert choice_permitted(choice, constraints) + except context_manager_exceptions as e: + note( + f"caught exception {type(e)} in context_manager_exceptions: {e}" + ) + try: + self.context_manager.__exit__(type(e), e, None) + except BackendCannotProceed: + self.frozen = True + return None + + return choice + + @precondition(lambda self: not self.frozen) + @rule(constraints=integer_constraints()) + def draw_integer(self, constraints): + self._draw("integer", constraints) + + @precondition(lambda self: not self.frozen) + @rule(constraints=float_constraints()) + def draw_float(self, constraints): + self._draw("float", constraints) + + @precondition(lambda self: not self.frozen) + @rule(constraints=bytes_constraints()) + def draw_bytes(self, constraints): + self._draw("bytes", constraints) + + @precondition(lambda self: not self.frozen) + @rule(constraints=string_constraints()) + def draw_string(self, constraints): + self._draw("string", constraints) + + @precondition(lambda self: not self.frozen) + @rule(constraints=boolean_constraints()) + def draw_boolean(self, constraints): + self._draw("boolean", constraints) + + @precondition(lambda self: not self.frozen) + @rule(label=st.integers()) + def span_start(self, label): + self.provider.span_start(label) + + @precondition(lambda self: not self.frozen) + @rule(discard=st.booleans()) + def span_end(self, discard): + self.provider.span_end(discard) + + @precondition(lambda self: not self.frozen) + @rule() + def freeze(self): + # phase-transition, mimicking data.freeze() at the end of a test case. + self.frozen = True + self.context_manager.__exit__(None, None, None) + + @precondition(lambda self: self.frozen) + @rule(value=_realize_objects) + def realize(self, value): + # filter out nans and weirder things + try: + assume(value == value) + except Exception: + # e.g. value = Decimal('-sNaN') + assume(False) + + # if `value` is non-symbolic, the provider should return it as-is. + assert self.provider.realize(value) == value + + @precondition(lambda self: self.frozen) + @rule() + def observe_test_case(self): + observations = self.provider.observe_test_case() + assert isinstance(observations, dict) + + @precondition(lambda self: self.frozen) + @rule(lifetime=st.sampled_from(["test_function", "test_case"])) + def observe_information_messages(self, lifetime): + observations = self.provider.observe_information_messages( + lifetime=lifetime + ) + for observation in observations: + assert isinstance(observation, dict) + + def teardown(self): + if not self.frozen: + self.context_manager.__exit__(None, None, None) + + ProviderConformanceTest.TestCase().runTest() diff --git a/vendored/hypothesis/internal/conjecture/providers.py b/vendored/hypothesis/internal/conjecture/providers.py new file mode 100644 index 0000000..f1feae0 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/providers.py @@ -0,0 +1,1209 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import abc +import contextlib +import math +import sys +import warnings +from collections.abc import Iterable +from contextlib import AbstractContextManager, contextmanager +from functools import cached_property +from random import Random +from sys import float_info +from types import ModuleType +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Literal, + Optional, + TypeAlias, + TypedDict, + TypeVar, +) + +from sortedcontainers import SortedSet + +from hypothesis.errors import HypothesisWarning +from hypothesis.internal.cache import LRUCache +from hypothesis.internal.compat import WINDOWS, int_from_bytes +from hypothesis.internal.conjecture.choice import ( + ChoiceConstraintsT, + ChoiceT, + ChoiceTypeT, + FloatConstraints, + choice_constraints_key, + choice_permitted, +) +from hypothesis.internal.conjecture.floats import lex_to_float +from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes +from hypothesis.internal.conjecture.utils import ( + INT_SIZES, + INT_SIZES_SAMPLER, + Sampler, + many, +) +from hypothesis.internal.constants_ast import ( + Constants, + constants_from_module, + is_local_module_file, +) +from hypothesis.internal.floats import ( + SIGNALING_NAN, + float_to_int, + make_float_clamper, + next_down, + next_up, +) +from hypothesis.internal.intervalsets import IntervalSet +from hypothesis.internal.observability import InfoObservationType, TestCaseObservation + +if TYPE_CHECKING: + from hypothesis.internal.conjecture.data import ConjectureData + from hypothesis.internal.constants_ast import ConstantT + +T = TypeVar("T") +LifetimeT: TypeAlias = Literal["test_case", "test_function"] +COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large" + + +#: Registered Hypothesis backends. This is a dictionary where keys are the name +#: to be used in |settings.backend|. The value of a key can be either: +#: +#: * A string corresponding to an importable absolute path of a +#: |PrimitiveProvider| subclass +#: * A |PrimitiveProvider| subclass (the class itself, not an instance of the +#: class) +#: +#: Hypothesis will instantiate the corresponding |PrimitiveProvider| subclass +#: when the backend is requested by a test's |settings.backend| value. +#: +#: For example, the default Hypothesis backend is registered as: +#: +#: .. code-block:: python +#: +#: from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS +#: +#: AVAILABLE_PROVIDERS["hypothesis"] = "hypothesis.internal.conjecture.providers.HypothesisProvider" +#: # or +#: AVAILABLE_PROVIDERS["hypothesis"] = HypothesisProvider +#: +#: And can be used with: +#: +#: .. code-block:: python +#: +#: from hypothesis import given, settings, strategies as st +#: +#: @given(st.integers()) +#: @settings(backend="hypothesis") +#: def f(n): +#: pass +#: +#: Though, as ``backend="hypothesis"`` is the default setting, the above would +#: typically not have any effect. +#: +#: For third-party backend authors, we strongly encourage ensuring that +#: ``import hypothesis`` does not automatically import the expensive parts of +#: your package, by: +#: +#: - setting a string path here, instead of a provider class +#: - ensuring the registered hypothesis plugin path references a path which just +#: sets AVAILABLE_PROVIDERS and does not import your package +AVAILABLE_PROVIDERS: dict[str, str | type["PrimitiveProvider"]] = { + "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider", + "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider", +} +# cache the choice_permitted constants for a particular set of constraints. +CacheKeyT: TypeAlias = tuple[ChoiceTypeT, tuple[Any, ...]] +CacheValueT: TypeAlias = tuple[tuple["ConstantT", ...], tuple["ConstantT", ...]] +CONSTANTS_CACHE: LRUCache[CacheKeyT, CacheValueT] = LRUCache(1024) + +_constant_floats = ( + [ + 0.5, + 1.1, + 1.5, + 1.9, + 1.0 / 3, + 10e6, + 10e-6, + 1.175494351e-38, + next_up(0.0), + float_info.min, + float_info.max, + 3.402823466e38, + 9007199254740992.0, + 1 - 10e-6, + 2 + 10e-6, + 1.192092896e-07, + 2.2204460492503131e-016, + ] + + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32 + + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64 +) +_constant_floats.extend([-x for x in _constant_floats]) +assert all(isinstance(f, float) for f in _constant_floats) + +_constant_strings = { + # strings which can be interpreted as code / logic + "undefined", + "null", + "NULL", + "nil", + "NIL", + "true", + "false", + "True", + "False", + "TRUE", + "FALSE", + "None", + "none", + "if", + "then", + "else", + "__dict__", + "__proto__", # javascript + # strings which can be interpreted as a number + "0", + "1e100", + "0..0", + "0/0", + "1/0", + "+0.0", + "Infinity", + "-Infinity", + "Inf", + "INF", + "NaN", + "9" * 30, + # common ascii characters + ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~", + # common unicode characters + "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±", + # characters which increase in length when lowercased + "Ⱥ", + "Ⱦ", + # ligatures + "æœÆŒffʤʨß" + # emoticons + "(╯°□°)╯︵ ┻━┻)", + # emojis + "😍", + "🇺🇸", + # emoji modifiers + "🏻" # U+1F3FB Light Skin Tone, + "👍🏻", # 👍 followed by U+1F3FB + # RTL text + "الكل في المجمو عة", + # Ogham text, which contains the only character in the Space Separators + # unicode category (Zs) that isn't visually blank:  . # noqa: RUF003 + "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜", + # readable variations on text (bolt/italic/script) + "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", + "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", + "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", + "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", + "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", + # upsidown text + "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", + # reserved strings in windows + "NUL", + "COM1", + "LPT1", + # scunthorpe problem + "Scunthorpe", + # zalgo text + "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", + # + # examples from https://faultlore.com/blah/text-hates-you/ + "मनीष منش", + "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله", + "lorem لا بسم الله ipsum 你好1234你好", +} + + +# we don't actually care what order the constants are sorted in, just that the +# ordering is deterministic. +GLOBAL_CONSTANTS = Constants( + integers=SortedSet(), + floats=SortedSet(_constant_floats, key=float_to_int), + bytes=SortedSet(), + strings=SortedSet(_constant_strings), +) + +_local_constants = Constants( + integers=SortedSet(), + floats=SortedSet(key=float_to_int), + bytes=SortedSet(), + strings=SortedSet(), +) +# modules that we've already seen and processed for local constants. These are +# are all modules, not necessarily local ones. This lets us quickly see which +# modules are new without an expensive path.resolve() or is_local_module_file +# cache lookup. +_seen_modules: set[ModuleType] = set() +_sys_modules_len: int | None = None + + +def _get_local_constants() -> Constants: + global _sys_modules_len, _local_constants + + if sys.platform == "emscripten": # pragma: no cover + # pyodide builds bundle the stdlib in a nonstandard location, like + # `/lib/python312.zip/heapq.py`. To avoid identifying the entirety of + # the stdlib as local code and slowing down on emscripten, instead return + # that nothing is local. + # + # pyodide may provide some way to distinguish stdlib/third-party/local + # code. I haven't looked into it. If they do, we should correctly implement + # ModuleLocation for pyodide instead of this. + return _local_constants + + count_constants = len(_local_constants) + # We call this function once per HypothesisProvider instance, i.e. once per + # input, so it needs to be performant. The logic here is more complicated + # than necessary because of this. + # + # First, we check whether there are any new modules with a very cheap length + # check. This check can be fooled if a module is added while another module is + # removed, but the more correct check against tuple(sys.modules.keys()) is + # substantially more expensive. Such a new module would eventually be discovered + # if / when the length changes again in the future. + # + # If the length has changed, we find just modules we haven't seen before. Of + # those, we find the ones which correspond to local modules, and extract their + # constants. + + # careful: store sys.modules length when we first check to avoid race conditions + # with other threads loading a module before we set _sys_modules_len. + if (sys_modules_len := len(sys.modules)) != _sys_modules_len: + # set(_seen_modules) shouldn't typically be required, but I have run into + # a "set changed size during iteration" error here when running + # test_provider_conformance_crosshair. + new_modules = set(sys.modules.values()) - set(_seen_modules) + # Repeated SortedSet unions are expensive. Do the initial unions on a + # set(), then do a one-time union with _local_constants after. + new_constants = Constants() + for module in new_modules: + if ( + module_file := getattr(module, "__file__", None) + ) is not None and is_local_module_file(module_file): + new_constants |= constants_from_module(module) + _local_constants |= new_constants + _seen_modules.update(new_modules) + _sys_modules_len = sys_modules_len + + # if we add any new constant, invalidate the constant cache for permitted values. + # A more efficient approach would be invalidating just the keys with this + # choice_type. + if len(_local_constants) > count_constants: + CONSTANTS_CACHE.cache.clear() + + return _local_constants + + +@contextmanager +def with_register_backend(name, provider_cls): + try: + AVAILABLE_PROVIDERS[name] = provider_cls + yield + finally: + del AVAILABLE_PROVIDERS[name] + + +class _BackendInfoMsg(TypedDict): + type: InfoObservationType + title: str + content: str | dict[str, Any] + + +# TODO_DOCS: link to choice sequence explanation page + + +class PrimitiveProvider(abc.ABC): + """ + |PrimitiveProvider| is the implementation interface of a + :ref:`Hypothesis backend `. + + A |PrimitiveProvider| is required to implement the following five + ``draw_*`` methods: + + * |PrimitiveProvider.draw_integer| + * |PrimitiveProvider.draw_boolean| + * |PrimitiveProvider.draw_float| + * |PrimitiveProvider.draw_string| + * |PrimitiveProvider.draw_bytes| + + Each strategy in Hypothesis generates values by drawing a series of choices + from these five methods. By overriding them, a |PrimitiveProvider| can control + the distribution of inputs generated by Hypothesis. + + For example, :pypi:`hypothesis-crosshair` implements a |PrimitiveProvider| + which uses an SMT solver to generate inputs that uncover new branches. + + Once you implement a |PrimitiveProvider|, you can make it available for use + through |AVAILABLE_PROVIDERS|. + """ + + #: The lifetime of a |PrimitiveProvider| instance. Either ``test_function`` + #: or ``test_case``. + #: + #: If ``test_function`` (the default), a single provider instance will be + #: instantiated and used for the entirety of each test function (i.e., roughly + #: one provider per |@given| annotation). This can be useful for tracking state + #: over the entirety of a test function. + #: + #: If ``test_case``, a new provider instance will be instantiated and used for + #: each input Hypothesis generates. + #: + #: The ``conjecturedata`` argument to ``PrimitiveProvider.__init__`` will + #: be ``None`` for a lifetime of ``test_function``, and an instance of + #: ``ConjectureData`` for a lifetime of ``test_case``. + #: + #: Third-party providers likely want to set a lifetime of ``test_function``. + lifetime: ClassVar[LifetimeT] = "test_function" + + #: Solver-based backends such as ``hypothesis-crosshair`` use symbolic values + #: which record operations performed on them in order to discover new paths. + #: If ``avoid_realization`` is set to ``True``, hypothesis will avoid interacting + #: with symbolic choices returned by the provider in any way that would force + #: the solver to narrow the range of possible values for that symbolic. + #: + #: Setting this to ``True`` disables some hypothesis features and optimizations. + #: Only set this to ``True`` if it is necessary for your backend. + avoid_realization: ClassVar[bool] = False + + #: If ``True``, |PrimitiveProvider.on_observation| will be added as a + #: callback via |add_observability_callback|, enabling observability during + # the lifetime of this provider. If ``False``, |PrimitiveProvider.on_observation| + #: will never be called by Hypothesis. + #: + #: The opt-in behavior of observability is because enabling observability + #: might increase runtime or memory usage. + add_observability_callback: ClassVar[bool] = False + + def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None: + self._cd = conjecturedata + + @abc.abstractmethod + def draw_boolean( + self, + p: float = 0.5, + ) -> bool: + """ + Draw a boolean choice. + + Parameters + ---------- + p: float + The probability of returning ``True``. Between 0 and 1 inclusive. + + Except for ``0`` and ``1``, the value of ``p`` is a hint provided by + Hypothesis, and may be ignored by the backend. + + If ``0``, the provider must return ``False``. If ``1``, the provider + must return ``True``. + """ + raise NotImplementedError + + @abc.abstractmethod + def draw_integer( + self, + min_value: int | None = None, + max_value: int | None = None, + *, + weights: dict[int, float] | None = None, + shrink_towards: int = 0, + ) -> int: + """ + Draw an integer choice. + + Parameters + ---------- + min_value : int | None + (Inclusive) lower bound on the integer value. If ``None``, there is + no lower bound. + max_value : int | None + (Inclusive) upper bound on the integer value. If ``None``, there is + no upper bound. + weights: dict[int, float] | None + Maps keys in the range [``min_value``, ``max_value``] to the probability + of returning that key. + shrink_towards: int + The integer to shrink towards. This is not used during generation and + can be ignored by backends. + """ + raise NotImplementedError + + @abc.abstractmethod + def draw_float( + self, + *, + min_value: float = -math.inf, + max_value: float = math.inf, + allow_nan: bool = True, + smallest_nonzero_magnitude: float, + ) -> float: + """ + Draw a float choice. + + Parameters + ---------- + min_value : float + (Inclusive) lower bound on the float value. + max_value : float + (Inclusive) upper bound on the float value. + allow_nan : bool + If ``False``, it is invalid to return ``math.nan``. + smallest_nonzero_magnitude : float + The smallest allowed nonzero magnitude. ``draw_float`` should not + return a float ``f`` if ``abs(f) < smallest_nonzero_magnitude``. + """ + raise NotImplementedError + + @abc.abstractmethod + def draw_string( + self, + intervals: IntervalSet, + *, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> str: + """ + Draw a string choice. + + Parameters + ---------- + intervals : IntervalSet + The set of codepoints to sample from. + min_size : int + (Inclusive) lower bound on the string length. + max_size : int + (Inclusive) upper bound on the string length. + """ + raise NotImplementedError + + @abc.abstractmethod + def draw_bytes( + self, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> bytes: + """ + Draw a bytes choice. + + Parameters + ---------- + min_size : int + (Inclusive) lower bound on the bytes length. + max_size : int + (Inclusive) upper bound on the bytes length. + """ + raise NotImplementedError + + def per_test_case_context_manager(self) -> AbstractContextManager: + """ + Returns a context manager which will be entered each time Hypothesis + starts generating and executing one test case, and exited when that test + case finishes generating and executing, including if any exception is + thrown. + + In the lifecycle of a Hypothesis test, this is called before + generating strategy values for each test case. This is just before any + :ref:`custom executor ` is called. + + Even if not returning a custom context manager, |PrimitiveProvider| + subclasses are welcome to override this method to know when Hypothesis + starts and ends the execution of a single test case. + """ + return contextlib.nullcontext() + + def realize(self, value: T, *, for_failure: bool = False) -> T: + """ + Called whenever hypothesis requires a concrete (non-symbolic) value from + a potentially symbolic value. Hypothesis will not check that ``value`` is + symbolic before calling ``realize``, so you should handle the case where + ``value`` is non-symbolic. + + The returned value should be non-symbolic. If you cannot provide a value, + raise |BackendCannotProceed| with a value of ``"discard_test_case"``. + + If ``for_failure`` is ``True``, the value is associated with a failing example. + In this case, the backend should spend substantially more effort when + attempting to realize the value, since it is important to avoid discarding + failing examples. Backends may still raise |BackendCannotProceed| when + ``for_failure`` is ``True``, if realization is truly impossible or if + realization takes significantly longer than expected (say, 5 minutes). + """ + return value + + def replay_choices(self, choices: tuple[ChoiceT, ...]) -> None: + """ + Called when Hypothesis has discovered a choice sequence which the provider + may wish to enqueue to replay under its own instrumentation when we next + ask to generate a test case, rather than generating one from scratch. + + This is used to e.g. warm-start :pypi:`hypothesis-crosshair` with a corpus + of high-code-coverage inputs discovered by + `HypoFuzz `_. + """ + return None + + def observe_test_case(self) -> dict[str, Any]: + """Called at the end of the test case when :ref:`observability + ` is enabled. + + The return value should be a non-symbolic json-encodable dictionary, + and will be included in observations as ``observation["metadata"]["backend"]``. + """ + return {} + + def observe_information_messages( + self, *, lifetime: LifetimeT + ) -> Iterable[_BackendInfoMsg]: + """Called at the end of each test case and again at end of the test function. + + Return an iterable of ``{type: info/alert/error, title: str, content: str | dict}`` + dictionaries to be delivered as individual information messages. Hypothesis + adds the ``run_start`` timestamp and ``property`` name for you. + """ + assert lifetime in ("test_case", "test_function") + yield from [] + + def on_observation(self, observation: TestCaseObservation) -> None: # noqa: B027 + """ + Called at the end of each test case which uses this provider, with the same + ``observation["type"] == "test_case"`` observation that is passed to + other callbacks added via |add_observability_callback|. This method is not + called with ``observation["type"] in {"info", "alert", "error"}`` + observations. + + .. important:: + + For |PrimitiveProvider.on_observation| to be called by Hypothesis, + |PrimitiveProvider.add_observability_callback| must be set to ``True``. + + |PrimitiveProvider.on_observation| is explicitly opt-in, as enabling + observability might increase runtime or memory usage. + + Calls to this method are guaranteed to alternate with calls to + |PrimitiveProvider.per_test_case_context_manager|. For example: + + .. code-block:: python + + # test function starts + per_test_case_context_manager() + on_observation() + per_test_case_context_manager() + on_observation() + ... + # test function ends + + Note that |PrimitiveProvider.on_observation| will not be called for test + cases which did not use this provider during generation, for example + during |Phase.reuse| or |Phase.shrink|, or because Hypothesis switched + to the standard Hypothesis backend after this backend raised too many + |BackendCannotProceed| exceptions. + """ + + def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop + """Marks the beginning of a semantically meaningful span of choices. + + Spans are a depth-first tree structure. A span is opened by a call to + |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end| + closes the most recently opened span. So the following sequence of calls: + + .. code-block:: python + + span_start(label=1) + n1 = draw_integer() + span_start(label=2) + b1 = draw_boolean() + n2 = draw_integer() + span_end() + f1 = draw_float() + span_end() + + produces the following two spans of choices: + + .. code-block:: + + 1: [n1, b1, n2, f1] + 2: [b1, n2] + + Hypothesis uses spans to denote "semantically meaningful" sequences of + choices. For instance, Hypothesis opens a span for the sequence of choices + made while drawing from each strategy. Not every span corresponds to a + strategy; the generation of e.g. each element in |st.lists| is also marked + with a span, among others. + + ``label`` is an opaque integer, which has no defined semantics. + The only guarantee made by Hypothesis is that all spans with the same + "meaning" will share the same ``label``. So all spans from the same + strategy will share the same label, as will e.g. the spans for |st.lists| + elements. + + Providers can track calls to |PrimitiveProvider.span_start| and + |PrimitiveProvider.span_end| to learn something about the semantics of + the test's choice sequence. For instance, a provider could track the depth + of the span tree, or the number of unique labels, which says something about + the complexity of the choices being generated. Or a provider could track + the span tree across test cases in order to determine what strategies are + being used in what contexts. + + It is possible for Hypothesis to start and immediately stop a span, + without calling a ``draw_*`` method in between. These spans contain zero + choices. + + Hypothesis will always balance the number of calls to + |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call + to |PrimitiveProvider.span_start| will always be followed by a call to + |PrimitiveProvider.span_end| before the end of the test case. + + |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()`` + internally. + """ + + def span_end(self, discard: bool, /) -> None: # noqa: B027 + """Marks the end of a semantically meaningful span of choices. + + ``discard`` is ``True`` when the draw was filtered out or otherwise marked + as unlikely to contribute to the input data as seen by the user's test. + Note however that side effects can make this determination unsound. + + |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()`` + internally. + """ + + +class HypothesisProvider(PrimitiveProvider): + lifetime = "test_case" + + def __init__(self, conjecturedata: Optional["ConjectureData"], /): + super().__init__(conjecturedata) + self._random = None if self._cd is None else self._cd._random + + @cached_property + def _local_constants(self): + # defer computation of local constants until/if we need it + return _get_local_constants() + + def _maybe_draw_constant( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + p: float = 0.05, + ) -> Optional["ConstantT"]: + assert self._random is not None + assert choice_type != "boolean" + # check whether we even want a constant before spending time computing + # and caching the allowed constants. + if self._random.random() > p: + return None + + # note: this property access results in computation being done + assert self._local_constants is not None + + key = (choice_type, choice_constraints_key(choice_type, constraints)) + if key not in CONSTANTS_CACHE: + CONSTANTS_CACHE[key] = ( + tuple( + choice + for choice in GLOBAL_CONSTANTS.set_for_type(choice_type) + if choice_permitted(choice, constraints) + ), + tuple( + choice + for choice in self._local_constants.set_for_type(choice_type) + if choice_permitted(choice, constraints) + ), + ) + + # split constants into two pools, so we still have a good chance to draw + # global constants even if there are many local constants. + (global_constants, local_constants) = CONSTANTS_CACHE[key] + constants_lists = ([global_constants] if global_constants else []) + ( + [local_constants] if local_constants else [] + ) + if not constants_lists: + return None + + # At this point, we've decided to use a constant. Now we select which pool + # to draw that constant from. + # + # Note that this approach has a different probability distribution than + # attempting a random.random for both global_constants and local_constants. + constants = self._random.choice(constants_lists) + return self._random.choice(constants) + + def draw_boolean( + self, + p: float = 0.5, + ) -> bool: + assert self._random is not None + + if p <= 0: + return False + if p >= 1: + return True + + return self._random.random() < p + + def draw_integer( + self, + min_value: int | None = None, + max_value: int | None = None, + *, + weights: dict[int, float] | None = None, + shrink_towards: int = 0, + ) -> int: + assert self._cd is not None + if ( + constant := self._maybe_draw_constant( + "integer", + { + "min_value": min_value, + "max_value": max_value, + "weights": weights, + "shrink_towards": shrink_towards, + }, + ) + ) is not None: + assert isinstance(constant, int) + return constant + + center = 0 + if min_value is not None: + center = max(min_value, center) + if max_value is not None: + center = min(max_value, center) + + if weights is not None: + assert min_value is not None + assert max_value is not None + + # format of weights is a mapping of ints to p, where sum(p) < 1. + # The remaining probability mass is uniformly distributed over + # *all* ints (not just the unmapped ones; this is somewhat undesirable, + # but simplifies things). + # + # We assert that sum(p) is strictly less than 1 because it simplifies + # handling forced values when we can force into the unmapped probability + # mass. We should eventually remove this restriction. + sampler = Sampler( + [1 - sum(weights.values()), *weights.values()], observe=False + ) + # if we're forcing, it's easiest to force into the unmapped probability + # mass and then force the drawn value after. + idx = sampler.sample(self._cd) + + if idx == 0: + return self._draw_bounded_integer(min_value, max_value) + # implicit reliance on dicts being sorted for determinism + return list(weights)[idx - 1] + + if min_value is None and max_value is None: + return self._draw_unbounded_integer() + + if min_value is None: + assert max_value is not None + probe = max_value + 1 + while max_value < probe: + probe = center + self._draw_unbounded_integer() + return probe + + if max_value is None: + assert min_value is not None + probe = min_value - 1 + while probe < min_value: + probe = center + self._draw_unbounded_integer() + return probe + + return self._draw_bounded_integer(min_value, max_value) + + def draw_float( + self, + *, + min_value: float = -math.inf, + max_value: float = math.inf, + allow_nan: bool = True, + smallest_nonzero_magnitude: float, + ) -> float: + assert self._random is not None + + constraints: FloatConstraints = { + "min_value": min_value, + "max_value": max_value, + "allow_nan": allow_nan, + "smallest_nonzero_magnitude": smallest_nonzero_magnitude, + } + if ( + constant := self._maybe_draw_constant("float", constraints, p=0.15) + ) is not None: + assert isinstance(constant, float) + return constant + + # on top of the probability to draw a constant float, we independently + # upweight 0.0/-0.0, math.inf, -math.inf, nans, and boundary values. + weird_floats = [ + f + for f in [ + 0.0, + -0.0, + math.inf, + -math.inf, + math.nan, + -math.nan, + SIGNALING_NAN, + -SIGNALING_NAN, + min_value, + next_up(min_value), + min_value + 1, + max_value - 1, + next_down(max_value), + max_value, + ] + if choice_permitted(f, constraints) + ] + + if weird_floats and self._random.random() < 0.05: + return self._random.choice(weird_floats) + + clamper = make_float_clamper( + min_value, + max_value, + smallest_nonzero_magnitude=smallest_nonzero_magnitude, + allow_nan=allow_nan, + ) + + result = self._draw_float() + if allow_nan and math.isnan(result): + clamped = result # pragma: no cover + else: + clamped = clamper(result) + if float_to_int(clamped) != float_to_int(result) and not ( + math.isnan(result) and allow_nan + ): + result = clamped + return result + + def draw_string( + self, + intervals: IntervalSet, + *, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> str: + assert self._cd is not None + assert self._random is not None + + if len(intervals) == 0: + return "" + + if ( + constant := self._maybe_draw_constant( + "string", + {"intervals": intervals, "min_size": min_size, "max_size": max_size}, + ) + ) is not None: + assert isinstance(constant, str) + return constant + + average_size = min( + max(min_size * 2, min_size + 5), + 0.5 * (min_size + max_size), + ) + + chars = [] + elements = many( + self._cd, + min_size=min_size, + max_size=max_size, + average_size=average_size, + observe=False, + ) + while elements.more(): + if len(intervals) > 256: + if self.draw_boolean(0.2): + i = self._random.randint(256, len(intervals) - 1) + else: + i = self._random.randint(0, 255) + else: + i = self._random.randint(0, len(intervals) - 1) + + chars.append(intervals.char_in_shrink_order(i)) + + return "".join(chars) + + def draw_bytes( + self, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> bytes: + assert self._cd is not None + assert self._random is not None + + if ( + constant := self._maybe_draw_constant( + "bytes", {"min_size": min_size, "max_size": max_size} + ) + ) is not None: + assert isinstance(constant, bytes) + return constant + + buf = bytearray() + average_size = min( + max(min_size * 2, min_size + 5), + 0.5 * (min_size + max_size), + ) + elements = many( + self._cd, + min_size=min_size, + max_size=max_size, + average_size=average_size, + observe=False, + ) + while elements.more(): + buf += self._random.randbytes(1) + + return bytes(buf) + + def _draw_float(self) -> float: + assert self._random is not None + + f = lex_to_float(self._random.getrandbits(64)) + sign = 1 if self._random.getrandbits(1) else -1 + return sign * f + + def _draw_unbounded_integer(self) -> int: + assert self._cd is not None + assert self._random is not None + + size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)] + + r = self._random.getrandbits(size) + sign = r & 1 + r >>= 1 + if sign: + r = -r + return r + + def _draw_bounded_integer( + self, + lower: int, + upper: int, + *, + vary_size: bool = True, + ) -> int: + assert lower <= upper + assert self._cd is not None + assert self._random is not None + + if lower == upper: + return lower + + bits = (upper - lower).bit_length() + if bits > 24 and vary_size and self._random.random() < 7 / 8: + # For large ranges, we combine the uniform random distribution + # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our + # choice of unicode characters is uniform but the 32bit distribution is not. + idx = INT_SIZES_SAMPLER.sample(self._cd) + cap_bits = min(bits, INT_SIZES[idx]) + upper = min(upper, lower + 2**cap_bits - 1) + return self._random.randint(lower, upper) + + return self._random.randint(lower, upper) + + +# Masks for masking off the first byte of an n-bit buffer. +# The appropriate mask is stored at position n % 8. +BYTE_MASKS = [(1 << n) - 1 for n in range(8)] +BYTE_MASKS[0] = 255 + + +class BytestringProvider(PrimitiveProvider): + lifetime = "test_case" + + def __init__( + self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes + ): + super().__init__(conjecturedata) + self.bytestring = bytestring + self.index = 0 + self.drawn = bytearray() + + def _draw_bits(self, n): + if n == 0: # pragma: no cover + return 0 + n_bytes = bits_to_bytes(n) + if self.index + n_bytes > len(self.bytestring): + self._cd.mark_overrun() + buf = bytearray(self.bytestring[self.index : self.index + n_bytes]) + self.index += n_bytes + + buf[0] &= BYTE_MASKS[n % 8] + buf = bytes(buf) + self.drawn += buf + return int_from_bytes(buf) + + def draw_boolean( + self, + p: float = 0.5, + ) -> bool: + if p <= 0: + return False + if p >= 1: + return True + + # always use one byte for booleans to maintain constant draw size. + # If a probability requires more than 8 bits to represent precisely, + # the result will be slightly biased, but not badly. + bits = 8 + size = 2**bits + # always leave at least one value that can be true, even for very small + # p. + falsey = max(1, math.floor(size * (1 - p))) + n = self._draw_bits(bits) + return n >= falsey + + def draw_integer( + self, + min_value: int | None = None, + max_value: int | None = None, + *, + weights: dict[int, float] | None = None, + shrink_towards: int = 0, + ) -> int: + assert self._cd is not None + + # we explicitly ignore integer weights for now, as they are likely net + # negative on fuzzer performance. + + if min_value is None and max_value is None: + min_value = -(2**127) + max_value = 2**127 - 1 + elif min_value is None: + assert max_value is not None + min_value = max_value - 2**64 + elif max_value is None: + assert min_value is not None + max_value = min_value + 2**64 + + if min_value == max_value: + return min_value + + bits = (max_value - min_value).bit_length() + value = self._draw_bits(bits) + while not (min_value <= value <= max_value): + value = self._draw_bits(bits) + return value + + def draw_float( + self, + *, + min_value: float = -math.inf, + max_value: float = math.inf, + allow_nan: bool = True, + smallest_nonzero_magnitude: float, + ) -> float: + n = self._draw_bits(64) + sign = -1 if n >> 64 else 1 + f = sign * lex_to_float(n & ((1 << 64) - 1)) + clamper = make_float_clamper( + min_value, + max_value, + smallest_nonzero_magnitude=smallest_nonzero_magnitude, + allow_nan=allow_nan, + ) + return clamper(f) + + def _draw_collection(self, min_size, max_size, *, alphabet_size): + average_size = min( + max(min_size * 2, min_size + 5), + 0.5 * (min_size + max_size), + ) + elements = many( + self._cd, + min_size=min_size, + max_size=max_size, + average_size=average_size, + observe=False, + ) + values = [] + while elements.more(): + values.append(self.draw_integer(0, alphabet_size - 1)) + return values + + def draw_string( + self, + intervals: IntervalSet, + *, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> str: + values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals)) + return "".join(chr(intervals[v]) for v in values) + + def draw_bytes( + self, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> bytes: + values = self._draw_collection(min_size, max_size, alphabet_size=2**8) + return bytes(values) + + +class URandom(Random): + # we reimplement a Random instance instead of using SystemRandom, because + # os.urandom is not guaranteed to read from /dev/urandom. + + @staticmethod + def _urandom(size: int) -> bytes: + with open("/dev/urandom", "rb") as f: + return f.read(size) + + def getrandbits(self, k: int) -> int: + assert k >= 0 + size = bits_to_bytes(k) + n = int_from_bytes(self._urandom(size)) + # trim excess bits + return n >> (size * 8 - k) + + def random(self) -> float: + # adapted from random.SystemRandom.random + return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53) + + +class URandomProvider(HypothesisProvider): + # A provider which reads directly from /dev/urandom as its source of randomness. + # This provider exists to provide better Hypothesis integration with Antithesis + # (https://antithesis.com/), which interprets calls to /dev/urandom as the + # randomness to mutate. This effectively gives Antithesis control over + # the choices made by the URandomProvider. + # + # If you are not using Antithesis, you probably don't want to use this + # provider. + + def __init__(self, conjecturedata: Optional["ConjectureData"], /): + super().__init__(conjecturedata) + if WINDOWS: # pragma: no cover + warnings.warn( + "/dev/urandom is not available on windows. Falling back to " + 'standard PRNG generation (equivalent to backend="hypothesis").', + HypothesisWarning, + stacklevel=1, + ) + # don't overwrite the HypothesisProvider self._random attribute in + # this case + else: + self._random = URandom() diff --git a/vendored/hypothesis/internal/conjecture/shrinker.py b/vendored/hypothesis/internal/conjecture/shrinker.py new file mode 100644 index 0000000..8b5af7f --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinker.py @@ -0,0 +1,1764 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +from collections import defaultdict +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from typing import ( + TYPE_CHECKING, + Any, + Literal, + TypeAlias, + cast, +) + +from hypothesis.internal.conjecture.choice import ( + ChoiceNode, + ChoiceT, + choice_equal, + choice_from_index, + choice_key, + choice_permitted, + choice_to_index, +) +from hypothesis.internal.conjecture.data import ( + ConjectureData, + ConjectureResult, + Spans, + Status, + _Overrun, + draw_choice, +) +from hypothesis.internal.conjecture.junkdrawer import ( + endswith, + find_integer, + replace_all, + startswith, +) +from hypothesis.internal.conjecture.shrinking import ( + Bytes, + Float, + Integer, + Ordering, + String, +) +from hypothesis.internal.conjecture.shrinking.choicetree import ( + ChoiceTree, + prefix_selection_order, + random_selection_order, +) +from hypothesis.internal.floats import MAX_PRECISE_INTEGER + +if TYPE_CHECKING: + from random import Random + + from hypothesis.internal.conjecture.engine import ConjectureRunner + +ShrinkPredicateT: TypeAlias = Callable[[ConjectureResult | _Overrun], bool] + + +def sort_key(nodes: Sequence[ChoiceNode]) -> tuple[int, tuple[int, ...]]: + """Returns a sort key such that "simpler" choice sequences are smaller than + "more complicated" ones. + + We define sort_key so that x is simpler than y if x is shorter than y or if + they have the same length and map(choice_to_index, x) < map(choice_to_index, y). + + The reason for using this ordering is: + + 1. If x is shorter than y then that means we had to make fewer decisions + in constructing the test case when we ran x than we did when we ran y. + 2. If x is the same length as y then replacing a choice with a lower index + choice corresponds to replacing it with a simpler/smaller choice. + 3. Because choices drawn early in generation potentially get used in more + places they potentially have a more significant impact on the final + result, so it makes sense to prioritise reducing earlier choices over + later ones. + """ + return ( + len(nodes), + tuple(choice_to_index(node.value, node.constraints) for node in nodes), + ) + + +@dataclass(slots=True, frozen=False) +class ShrinkPass: + function: Any + name: str | None = None + last_prefix: Any = () + + # some execution statistics + calls: int = 0 + misaligned: int = 0 + shrinks: int = 0 + deletions: int = 0 + + def __post_init__(self): + if self.name is None: + self.name = self.function.__name__ + + def __hash__(self): + return hash(self.name) + + +class StopShrinking(Exception): + pass + + +class Shrinker: + """A shrinker is a child object of a ConjectureRunner which is designed to + manage the associated state of a particular shrink problem. That is, we + have some initial ConjectureData object and some property of interest + that it satisfies, and we want to find a ConjectureData object with a + shortlex (see sort_key above) smaller choice sequence that exhibits the same + property. + + Currently the only property of interest we use is that the status is + INTERESTING and the interesting_origin takes on some fixed value, but we + may potentially be interested in other use cases later. + However we assume that data with a status < VALID never satisfies the predicate. + + The shrinker keeps track of a value shrink_target which represents the + current best known ConjectureData object satisfying the predicate. + It refines this value by repeatedly running *shrink passes*, which are + methods that perform a series of transformations to the current shrink_target + and evaluate the underlying test function to find new ConjectureData + objects. If any of these satisfy the predicate, the shrink_target + is updated automatically. Shrinking runs until no shrink pass can + improve the shrink_target, at which point it stops. It may also be + terminated if the underlying engine throws RunIsComplete, but that + is handled by the calling code rather than the Shrinker. + + ======================= + Designing Shrink Passes + ======================= + + Generally a shrink pass is just any function that calls + cached_test_function and/or consider_new_nodes a number of times, + but there are a couple of useful things to bear in mind. + + A shrink pass *makes progress* if running it changes self.shrink_target + (i.e. it tries a shortlex smaller ConjectureData object satisfying + the predicate). The desired end state of shrinking is to find a + value such that no shrink pass can make progress, i.e. that we + are at a local minimum for each shrink pass. + + In aid of this goal, the main invariant that a shrink pass much + satisfy is that whether it makes progress must be deterministic. + It is fine (encouraged even) for the specific progress it makes + to be non-deterministic, but if you run a shrink pass, it makes + no progress, and then you immediately run it again, it should + never succeed on the second time. This allows us to stop as soon + as we have run each shrink pass and seen no progress on any of + them. + + This means that e.g. it's fine to try each of N deletions + or replacements in a random order, but it's not OK to try N random + deletions (unless you have already shrunk at least once, though we + don't currently take advantage of this loophole). + + Shrink passes need to be written so as to be robust against + change in the underlying shrink target. It is generally safe + to assume that the shrink target does not change prior to the + point of first modification - e.g. if you change no bytes at + index ``i``, all spans whose start is ``<= i`` still exist, + as do all blocks, and the data object is still of length + ``>= i + 1``. This can only be violated by bad user code which + relies on an external source of non-determinism. + + When the underlying shrink_target changes, shrink + passes should not run substantially more test_function calls + on success than they do on failure. Say, no more than a constant + factor more. In particular shrink passes should not iterate to a + fixed point. + + This means that shrink passes are often written with loops that + are carefully designed to do the right thing in the case that no + shrinks occurred and try to adapt to any changes to do a reasonable + job. e.g. say we wanted to write a shrink pass that tried deleting + each individual choice (this isn't an especially good pass, + but it leads to a simple illustrative example), we might do it + by iterating over the choice sequence like so: + + .. code-block:: python + + i = 0 + while i < len(self.shrink_target.nodes): + if not self.consider_new_nodes( + self.shrink_target.nodes[:i] + self.shrink_target.nodes[i + 1 :] + ): + i += 1 + + The reason for writing the loop this way is that i is always a + valid index into the current choice sequence, even if the current sequence + changes as a result of our actions. When the choice sequence changes, + we leave the index where it is rather than restarting from the + beginning, and carry on. This means that the number of steps we + run in this case is always bounded above by the number of steps + we would run if nothing works. + + Another thing to bear in mind about shrink pass design is that + they should prioritise *progress*. If you have N operations that + you need to run, you should try to order them in such a way as + to avoid stalling, where you have long periods of test function + invocations where no shrinks happen. This is bad because whenever + we shrink we reduce the amount of work the shrinker has to do + in future, and often speed up the test function, so we ideally + wanted those shrinks to happen much earlier in the process. + + Sometimes stalls are inevitable of course - e.g. if the pass + makes no progress, then the entire thing is just one long stall, + but it's helpful to design it so that stalls are less likely + in typical behaviour. + + The two easiest ways to do this are: + + * Just run the N steps in random order. As long as a + reasonably large proportion of the operations succeed, this + guarantees the expected stall length is quite short. The + book keeping for making sure this does the right thing when + it succeeds can be quite annoying. + * When you have any sort of nested loop, loop in such a way + that both loop variables change each time. This prevents + stalls which occur when one particular value for the outer + loop is impossible to make progress on, rendering the entire + inner loop into a stall. + + However, although progress is good, too much progress can be + a bad sign! If you're *only* seeing successful reductions, + that's probably a sign that you are making changes that are + too timid. Two useful things to offset this: + + * It's worth writing shrink passes which are *adaptive*, in + the sense that when operations seem to be working really + well we try to bundle multiple of them together. This can + often be used to turn what would be O(m) successful calls + into O(log(m)). + * It's often worth trying one or two special minimal values + before trying anything more fine grained (e.g. replacing + the whole thing with zero). + + """ + + def derived_value(fn): + """It's useful during shrinking to have access to derived values of + the current shrink target. + + This decorator allows you to define these as cached properties. They + are calculated once, then cached until the shrink target changes, then + recalculated the next time they are used.""" + + def accept(self): + try: + return self.__derived_values[fn.__name__] + except KeyError: + return self.__derived_values.setdefault(fn.__name__, fn(self)) + + accept.__name__ = fn.__name__ + return property(accept) + + def __init__( + self, + engine: "ConjectureRunner", + initial: ConjectureData | ConjectureResult, + predicate: ShrinkPredicateT | None, + *, + allow_transition: ( + Callable[[ConjectureData | ConjectureResult, ConjectureData], bool] | None + ), + explain: bool, + in_target_phase: bool = False, + ): + """Create a shrinker for a particular engine, with a given starting + point and predicate. When shrink() is called it will attempt to find an + example for which predicate is True and which is strictly smaller than + initial. + + Note that initial is a ConjectureData object, and predicate + takes ConjectureData objects. + """ + assert predicate is not None or allow_transition is not None + self.engine = engine + self.__predicate = predicate or (lambda data: True) + self.__allow_transition = allow_transition or (lambda source, destination: True) + self.__derived_values: dict = {} + + self.initial_size = len(initial.choices) + # We keep track of the current best example on the shrink_target + # attribute. + self.shrink_target = initial + self.clear_change_tracking() + self.shrinks = 0 + + # We terminate shrinks that seem to have reached their logical + # conclusion: If we've called the underlying test function at + # least self.max_stall times since the last time we shrunk, + # it's time to stop shrinking. + self.max_stall = 200 + self.initial_calls = self.engine.call_count + self.initial_misaligned = self.engine.misaligned_count + self.calls_at_last_shrink = self.initial_calls + + self.shrink_passes: list[ShrinkPass] = [ + ShrinkPass(self.try_trivial_spans), + self.node_program("X" * 5), + self.node_program("X" * 4), + self.node_program("X" * 3), + self.node_program("X" * 2), + self.node_program("X" * 1), + ShrinkPass(self.pass_to_descendant), + ShrinkPass(self.reorder_spans), + ShrinkPass(self.minimize_duplicated_choices), + ShrinkPass(self.minimize_individual_choices), + ShrinkPass(self.redistribute_numeric_pairs), + ShrinkPass(self.lower_integers_together), + ShrinkPass(self.lower_duplicated_characters), + ] + + # Because the shrinker is also used to `pareto_optimise` in the target phase, + # we sometimes want to allow extending buffers instead of aborting at the end. + self.__extend: Literal["full"] | int = "full" if in_target_phase else 0 + self.should_explain = explain + + @derived_value # type: ignore + def cached_calculations(self): + return {} + + def cached(self, *keys): + def accept(f): + cache_key = (f.__name__, *keys) + try: + return self.cached_calculations[cache_key] + except KeyError: + return self.cached_calculations.setdefault(cache_key, f()) + + return accept + + @property + def calls(self) -> int: + """Return the number of calls that have been made to the underlying + test function.""" + return self.engine.call_count + + @property + def misaligned(self) -> int: + return self.engine.misaligned_count + + def check_calls(self) -> None: + if self.calls - self.calls_at_last_shrink >= self.max_stall: + raise StopShrinking + + def cached_test_function( + self, nodes: Sequence[ChoiceNode] + ) -> tuple[bool, ConjectureResult | _Overrun | None]: + nodes = nodes[: len(self.nodes)] + + if startswith(nodes, self.nodes): + return (True, None) + + if sort_key(self.nodes) < sort_key(nodes): + return (False, None) + + # sometimes our shrinking passes try obviously invalid things. We handle + # discarding them in one place here. + if any(not choice_permitted(node.value, node.constraints) for node in nodes): + return (False, None) + + result = self.engine.cached_test_function( + [n.value for n in nodes], extend=self.__extend + ) + previous = self.shrink_target + self.incorporate_test_data(result) + self.check_calls() + return (previous is not self.shrink_target, result) + + def consider_new_nodes(self, nodes: Sequence[ChoiceNode]) -> bool: + return self.cached_test_function(nodes)[0] + + def incorporate_test_data(self, data): + """Takes a ConjectureData or Overrun object updates the current + shrink_target if this data represents an improvement over it.""" + if data.status < Status.VALID or data is self.shrink_target: + return + if ( + self.__predicate(data) + and sort_key(data.nodes) < sort_key(self.shrink_target.nodes) + and self.__allow_transition(self.shrink_target, data) + ): + self.update_shrink_target(data) + + def debug(self, msg: str) -> None: + self.engine.debug(msg) + + @property + def random(self) -> "Random": + return self.engine.random + + def shrink(self) -> None: + """Run the full set of shrinks and update shrink_target. + + This method is "mostly idempotent" - calling it twice is unlikely to + have any effect, though it has a non-zero probability of doing so. + """ + + try: + self.initial_coarse_reduction() + self.greedy_shrink() + except StopShrinking: + # If we stopped shrinking because we're making slow progress (instead of + # reaching a local optimum), don't run the explain-phase logic. + self.should_explain = False + finally: + if self.engine.report_debug_info: + + def s(n): + return "s" if n != 1 else "" + + total_deleted = self.initial_size - len(self.shrink_target.choices) + calls = self.engine.call_count - self.initial_calls + misaligned = self.engine.misaligned_count - self.initial_misaligned + + self.debug( + "---------------------\n" + "Shrink pass profiling\n" + "---------------------\n\n" + f"Shrinking made a total of {calls} call{s(calls)} of which " + f"{self.shrinks} shrank and {misaligned} were misaligned. This " + f"deleted {total_deleted} choices out of {self.initial_size}." + ) + for useful in [True, False]: + self.debug("") + if useful: + self.debug("Useful passes:") + else: + self.debug("Useless passes:") + self.debug("") + for pass_ in sorted( + self.shrink_passes, + key=lambda t: (-t.calls, t.deletions, t.shrinks), + ): + if pass_.calls == 0: + continue + if (pass_.shrinks != 0) != useful: + continue + + self.debug( + f" * {pass_.name} made {pass_.calls} call{s(pass_.calls)} of which " + f"{pass_.shrinks} shrank and {pass_.misaligned} were misaligned, " + f"deleting {pass_.deletions} choice{s(pass_.deletions)}." + ) + self.debug("") + self.explain() + + def explain(self) -> None: + + if not self.should_explain or not self.shrink_target.arg_slices: + return + + self.max_stall = 2**100 + shrink_target = self.shrink_target + nodes = self.nodes + choices = self.choices + chunks: dict[tuple[int, int], list[tuple[ChoiceT, ...]]] = defaultdict(list) + + # Before we start running experiments, let's check for known inputs which would + # make them redundant. The shrinking process means that we've already tried many + # variations on the minimal example, so this can save a lot of time. + seen_passing_seq = self.engine.passing_choice_sequences( + prefix=self.nodes[: min(self.shrink_target.arg_slices)[0]] + ) + + # Now that we've shrunk to a minimal failing example, it's time to try + # varying each part that we've noted will go in the final report. Consider + # slices in largest-first order + for start, end in sorted( + self.shrink_target.arg_slices, key=lambda x: (-(x[1] - x[0]), x) + ): + # Check for any previous examples that match the prefix and suffix, + # so we can skip if we found a passing example while shrinking. + if any( + startswith(seen, nodes[:start]) and endswith(seen, nodes[end:]) + for seen in seen_passing_seq + ): + continue + + # Run our experiments + n_same_failures = 0 + note = "or any other generated value" + # TODO: is 100 same-failures out of 500 attempts a good heuristic? + for n_attempt in range(500): # pragma: no branch + # no-branch here because we don't coverage-test the abort-at-500 logic. + + if n_attempt - 10 > n_same_failures * 5: + # stop early if we're seeing mostly invalid examples + break # pragma: no cover + + # replace start:end with random values + replacement = [] + for i in range(start, end): + node = nodes[i] + if not node.was_forced: + value = draw_choice( + node.type, node.constraints, random=self.random + ) + node = node.copy(with_value=value) + replacement.append(node.value) + + attempt = choices[:start] + tuple(replacement) + choices[end:] + result = self.engine.cached_test_function(attempt, extend="full") + + if result.status is Status.OVERRUN: + continue # pragma: no cover # flakily covered + result = cast(ConjectureResult, result) + if not ( + len(attempt) == len(result.choices) + and endswith(result.nodes, nodes[end:]) + ): + # Turns out this was a variable-length part, so grab the infix... + for span1, span2 in zip( + shrink_target.spans, result.spans, strict=False + ): + assert span1.start == span2.start + assert span1.start <= start + assert span1.label == span2.label + if span1.start == start and span1.end == end: + result_end = span2.end + break + else: + raise NotImplementedError("Expected matching prefixes") + + attempt = ( + choices[:start] + + result.choices[start:result_end] + + choices[end:] + ) + chunks[(start, end)].append(result.choices[start:result_end]) + result = self.engine.cached_test_function(attempt) + + if result.status is Status.OVERRUN: + continue # pragma: no cover # flakily covered + result = cast(ConjectureResult, result) + else: + chunks[(start, end)].append(result.choices[start:end]) + + if shrink_target is not self.shrink_target: # pragma: no cover + # If we've shrunk further without meaning to, bail out. + self.shrink_target.slice_comments.clear() + return + if result.status is Status.VALID: + # The test passed, indicating that this param can't vary freely. + # However, it's really hard to write a simple and reliable covering + # test, because of our `seen_passing_buffers` check above. + break # pragma: no cover + if self.__predicate(result): # pragma: no branch + n_same_failures += 1 + if n_same_failures >= 100: + self.shrink_target.slice_comments[(start, end)] = note + break + + # Finally, if we've found multiple independently-variable parts, check whether + # they can all be varied together. + if len(self.shrink_target.slice_comments) <= 1: + return + n_same_failures_together = 0 + chunks_by_start_index = sorted(chunks.items()) + for _ in range(500): # pragma: no branch + # no-branch here because we don't coverage-test the abort-at-500 logic. + new_choices: list[ChoiceT] = [] + prev_end = 0 + for (start, end), ls in chunks_by_start_index: + assert prev_end <= start < end, "these chunks must be nonoverlapping" + new_choices.extend(choices[prev_end:start]) + new_choices.extend(self.random.choice(ls)) + prev_end = end + + result = self.engine.cached_test_function(new_choices) + + # This *can't* be a shrink because none of the components were. + assert shrink_target is self.shrink_target + if result.status == Status.VALID: + self.shrink_target.slice_comments[(0, 0)] = ( + "The test sometimes passed when commented parts were varied together." + ) + break # Test passed, this param can't vary freely. + if self.__predicate(result): # pragma: no branch + n_same_failures_together += 1 + if n_same_failures_together >= 100: + self.shrink_target.slice_comments[(0, 0)] = ( + "The test always failed when commented parts were varied together." + ) + break + + def greedy_shrink(self) -> None: + """Run a full set of greedy shrinks (that is, ones that will only ever + move to a better target) and update shrink_target appropriately. + + This method iterates to a fixed point and so is idempontent - calling + it twice will have exactly the same effect as calling it once. + """ + self.fixate_shrink_passes(self.shrink_passes) + + def initial_coarse_reduction(self): + """Performs some preliminary reductions that should not be + repeated as part of the main shrink passes. + + The main reason why these can't be included as part of shrink + passes is that they have much more ability to make the test + case "worse". e.g. they might rerandomise part of it, significantly + increasing the value of individual nodes, which works in direct + opposition to the lexical shrinking and will frequently undo + its work. + """ + self.reduce_each_alternative() + + @derived_value # type: ignore + def spans_starting_at(self): + result = [[] for _ in self.shrink_target.nodes] + for i, ex in enumerate(self.spans): + # We can have zero-length spans that start at the end + if ex.start < len(result): + result[ex.start].append(i) + return tuple(map(tuple, result)) + + def reduce_each_alternative(self): + """This is a pass that is designed to rerandomise use of the + one_of strategy or things that look like it, in order to try + to move from later strategies to earlier ones in the branch + order. + + It does this by trying to systematically lower each value it + finds that looks like it might be the branch decision for + one_of, and then attempts to repair any changes in shape that + this causes. + """ + i = 0 + while i < len(self.shrink_target.nodes): + nodes = self.shrink_target.nodes + node = nodes[i] + if ( + node.type == "integer" + and not node.was_forced + and node.value <= 10 + and node.constraints["min_value"] == 0 + ): + assert isinstance(node.value, int) + + # We've found a plausible candidate for a ``one_of`` choice. + # We now want to see if the shape of the test case actually depends + # on it. If it doesn't, then we don't need to do this (comparatively + # costly) pass, and can let much simpler lexicographic reduction + # handle it later. + # + # We test this by trying to set the value to zero and seeing if the + # shape changes, as measured by either changing the number of subsequent + # nodes, or changing the nodes in such a way as to cause one of the + # previous values to no longer be valid in its position. + zero_attempt = self.cached_test_function( + nodes[:i] + (nodes[i].copy(with_value=0),) + nodes[i + 1 :] + )[1] + if ( + zero_attempt is not self.shrink_target + and zero_attempt is not None + and zero_attempt.status >= Status.VALID + ): + changed_shape = len(zero_attempt.nodes) != len(nodes) + + if not changed_shape: + for j in range(i + 1, len(nodes)): + zero_node = zero_attempt.nodes[j] + orig_node = nodes[j] + if ( + zero_node.type != orig_node.type + or not choice_permitted( + orig_node.value, zero_node.constraints + ) + ): + changed_shape = True + break + if changed_shape: + for v in range(node.value): + if self.try_lower_node_as_alternative(i, v): + break + i += 1 + + def try_lower_node_as_alternative(self, i, v): + """Attempt to lower `self.shrink_target.nodes[i]` to `v`, + while rerandomising and attempting to repair any subsequent + changes to the shape of the test case that this causes.""" + nodes = self.shrink_target.nodes + if self.consider_new_nodes( + nodes[:i] + (nodes[i].copy(with_value=v),) + nodes[i + 1 :] + ): + return True + + prefix = nodes[:i] + (nodes[i].copy(with_value=v),) + initial = self.shrink_target + spans = self.spans_starting_at[i] + for _ in range(3): + random_attempt = self.engine.cached_test_function( + [n.value for n in prefix], extend=len(nodes) + ) + if random_attempt.status < Status.VALID: + continue + self.incorporate_test_data(random_attempt) + for j in spans: + initial_span = initial.spans[j] + attempt_span = random_attempt.spans[j] + contents = random_attempt.nodes[attempt_span.start : attempt_span.end] + self.consider_new_nodes( + nodes[:i] + contents + nodes[initial_span.end :] + ) + if initial is not self.shrink_target: + return True + return False + + @derived_value # type: ignore + def shrink_pass_choice_trees(self) -> dict[Any, ChoiceTree]: + return defaultdict(ChoiceTree) + + def step(self, shrink_pass: ShrinkPass, *, random_order: bool = False) -> bool: + tree = self.shrink_pass_choice_trees[shrink_pass] + if tree.exhausted: + return False + + initial_shrinks = self.shrinks + initial_calls = self.calls + initial_misaligned = self.misaligned + size = len(self.shrink_target.choices) + assert shrink_pass.name is not None + self.engine.explain_next_call_as(shrink_pass.name) + + if random_order: + selection_order = random_selection_order(self.random) + else: + selection_order = prefix_selection_order(shrink_pass.last_prefix) + + try: + shrink_pass.last_prefix = tree.step( + selection_order, + lambda chooser: shrink_pass.function(chooser), + ) + finally: + shrink_pass.calls += self.calls - initial_calls + shrink_pass.misaligned += self.misaligned - initial_misaligned + shrink_pass.shrinks += self.shrinks - initial_shrinks + shrink_pass.deletions += size - len(self.shrink_target.choices) + self.engine.clear_call_explanation() + return True + + def fixate_shrink_passes(self, passes: list[ShrinkPass]) -> None: + """Run steps from each pass in ``passes`` until the current shrink target + is a fixed point of all of them.""" + any_ran = True + while any_ran: + any_ran = False + + reordering = {} + + # We run remove_discarded after every pass to do cleanup + # keeping track of whether that actually works. Either there is + # no discarded data and it is basically free, or it reliably works + # and deletes data, or it doesn't work. In that latter case we turn + # it off for the rest of this loop through the passes, but will + # try again once all of the passes have been run. + can_discard = self.remove_discarded() + + calls_at_loop_start = self.calls + + # We keep track of how many calls can be made by a single step + # without making progress and use this to test how much to pad + # out self.max_stall by as we go along. + max_calls_per_failing_step = 1 + + for sp in passes: + if can_discard: + can_discard = self.remove_discarded() + + before_sp = self.shrink_target + + # Run the shrink pass until it fails to make any progress + # max_failures times in a row. This implicitly boosts shrink + # passes that are more likely to work. + failures = 0 + max_failures = 20 + while failures < max_failures: + # We don't allow more than max_stall consecutive failures + # to shrink, but this means that if we're unlucky and the + # shrink passes are in a bad order where only the ones at + # the end are useful, if we're not careful this heuristic + # might stop us before we've tried everything. In order to + # avoid that happening, we make sure that there's always + # plenty of breathing room to make it through a single + # iteration of the fixate_shrink_passes loop. + self.max_stall = max( + self.max_stall, + 2 * max_calls_per_failing_step + + (self.calls - calls_at_loop_start), + ) + + prev = self.shrink_target + initial_calls = self.calls + # It's better for us to run shrink passes in a deterministic + # order, to avoid repeat work, but this can cause us to create + # long stalls when there are a lot of steps which fail to do + # anything useful. In order to avoid this, once we've noticed + # we're in a stall (i.e. half of max_failures calls have failed + # to do anything) we switch to randomly jumping around. If we + # find a success then we'll resume deterministic order from + # there which, with any luck, is in a new good region. + if not self.step(sp, random_order=failures >= max_failures // 2): + # step returns False when there is nothing to do because + # the entire choice tree is exhausted. If this happens + # we break because we literally can't run this pass any + # more than we already have until something else makes + # progress. + break + any_ran = True + + # Don't count steps that didn't actually try to do + # anything as failures. Otherwise, this call is a failure + # if it failed to make any changes to the shrink target. + if initial_calls != self.calls: + if prev is not self.shrink_target: + failures = 0 + else: + max_calls_per_failing_step = max( + max_calls_per_failing_step, self.calls - initial_calls + ) + failures += 1 + + # We reorder the shrink passes so that on our next run through + # we try good ones first. The rule is that shrink passes that + # did nothing useful are the worst, shrink passes that reduced + # the length are the best. + if self.shrink_target is before_sp: + reordering[sp] = 1 + elif len(self.choices) < len(before_sp.choices): + reordering[sp] = -1 + else: + reordering[sp] = 0 + + passes.sort(key=reordering.__getitem__) + + @property + def nodes(self) -> tuple[ChoiceNode, ...]: + return self.shrink_target.nodes + + @property + def choices(self) -> tuple[ChoiceT, ...]: + return self.shrink_target.choices + + @property + def spans(self) -> Spans: + return self.shrink_target.spans + + @derived_value # type: ignore + def spans_by_label(self): + """ + A mapping of labels to a list of spans with that label. Spans in the list + are ordered by their normal index order. + """ + + spans_by_label = defaultdict(list) + for ex in self.spans: + spans_by_label[ex.label].append(ex) + return dict(spans_by_label) + + @derived_value # type: ignore + def distinct_labels(self): + return sorted(self.spans_by_label, key=str) + + def pass_to_descendant(self, chooser): + """Attempt to replace each span with a descendant span. + + This is designed to deal with strategies that call themselves + recursively. For example, suppose we had: + + binary_tree = st.deferred( + lambda: st.one_of( + st.integers(), st.tuples(binary_tree, binary_tree))) + + This pass guarantees that we can replace any binary tree with one of + its subtrees - each of those will create an interval that the parent + could validly be replaced with, and this pass will try doing that. + + This is pretty expensive - it takes O(len(intervals)^2) - so we run it + late in the process when we've got the number of intervals as far down + as possible. + """ + + label = chooser.choose( + self.distinct_labels, lambda l: len(self.spans_by_label[l]) >= 2 + ) + + spans = self.spans_by_label[label] + i = chooser.choose(range(len(spans) - 1)) + ancestor = spans[i] + + if i + 1 == len(spans) or spans[i + 1].start >= ancestor.end: + return + + @self.cached(label, i) + def descendants(): + lo = i + 1 + hi = len(spans) + while lo + 1 < hi: + mid = (lo + hi) // 2 + if spans[mid].start >= ancestor.end: + hi = mid + else: + lo = mid + return [ + span + for span in spans[i + 1 : hi] + if span.choice_count < ancestor.choice_count + ] + + descendant = chooser.choose(descendants, lambda ex: ex.choice_count > 0) + + assert ancestor.start <= descendant.start + assert ancestor.end >= descendant.end + assert descendant.choice_count < ancestor.choice_count + + self.consider_new_nodes( + self.nodes[: ancestor.start] + + self.nodes[descendant.start : descendant.end] + + self.nodes[ancestor.end :] + ) + + def lower_common_node_offset(self): + """Sometimes we find ourselves in a situation where changes to one part + of the choice sequence unlock changes to other parts. Sometimes this is + good, but sometimes this can cause us to exhibit exponential slow + downs! + + e.g. suppose we had the following: + + m = draw(integers(min_value=0)) + n = draw(integers(min_value=0)) + assert abs(m - n) > 1 + + If this fails then we'll end up with a loop where on each iteration we + reduce each of m and n by 2 - m can't go lower because of n, then n + can't go lower because of m. + + This will take us O(m) iterations to complete, which is exponential in + the data size, as we gradually zig zag our way towards zero. + + This can only happen if we're failing to reduce the size of the choice + sequence: The number of iterations that reduce the length of the choice + sequence is bounded by that length. + + So what we do is this: We keep track of which nodes are changing, and + then if there's some non-zero common offset to them we try and minimize + them all at once by lowering that offset. + + This may not work, and it definitely won't get us out of all possible + exponential slow downs (an example of where it doesn't is where the + shape of the nodes changes as a result of this bouncing behaviour), + but it fails fast when it doesn't work and gets us out of a really + nastily slow case when it does. + """ + if len(self.__changed_nodes) <= 1: + return + + changed = [] + for i in sorted(self.__changed_nodes): + node = self.nodes[i] + if node.trivial or node.type != "integer": + continue + changed.append(node) + + if not changed: + return + + ints = [ + abs(node.value - node.constraints["shrink_towards"]) for node in changed + ] + offset = min(ints) + assert offset > 0 + + for i in range(len(ints)): + ints[i] -= offset + + st = self.shrink_target + + def offset_node(node, n): + return ( + node.index, + node.index + 1, + [node.copy(with_value=node.constraints["shrink_towards"] + n)], + ) + + def consider(n, sign): + return self.consider_new_nodes( + replace_all( + st.nodes, + [ + offset_node(node, sign * (n + v)) + for node, v in zip(changed, ints, strict=False) + ], + ) + ) + + # shrink from both sides + Integer.shrink(offset, lambda n: consider(n, 1)) + Integer.shrink(offset, lambda n: consider(n, -1)) + self.clear_change_tracking() + + def clear_change_tracking(self): + self.__last_checked_changed_at = self.shrink_target + self.__all_changed_nodes = set() + + def mark_changed(self, i): + self.__changed_nodes.add(i) + + @property + def __changed_nodes(self) -> set[int]: + if self.__last_checked_changed_at is self.shrink_target: + return self.__all_changed_nodes + + prev_target = self.__last_checked_changed_at + new_target = self.shrink_target + assert prev_target is not new_target + prev_nodes = prev_target.nodes + new_nodes = new_target.nodes + assert sort_key(new_target.nodes) < sort_key(prev_target.nodes) + + if len(prev_nodes) != len(new_nodes) or any( + n1.type != n2.type for n1, n2 in zip(prev_nodes, new_nodes, strict=True) + ): + # should we check constraints are equal as well? + self.__all_changed_nodes = set() + else: + assert len(prev_nodes) == len(new_nodes) + for i, (n1, n2) in enumerate(zip(prev_nodes, new_nodes, strict=True)): + assert n1.type == n2.type + if not choice_equal(n1.value, n2.value): + self.__all_changed_nodes.add(i) + + return self.__all_changed_nodes + + def update_shrink_target(self, new_target): + assert isinstance(new_target, ConjectureResult) + self.shrinks += 1 + # If we are just taking a long time to shrink we don't want to + # trigger this heuristic, so whenever we shrink successfully + # we give ourselves a bit of breathing room to make sure we + # would find a shrink that took that long to find the next time. + # The case where we're taking a long time but making steady + # progress is handled by `finish_shrinking_deadline` in engine.py + self.max_stall = max( + self.max_stall, (self.calls - self.calls_at_last_shrink) * 2 + ) + self.calls_at_last_shrink = self.calls + self.shrink_target = new_target + self.__derived_values = {} + + def try_shrinking_nodes(self, nodes, n): + """Attempts to replace each node in the nodes list with n. Returns + True if it succeeded (which may include some additional modifications + to shrink_target). + + In current usage it is expected that each of the nodes currently have + the same value and choice_type, although this is not essential. Note that + n must be < the node at min(nodes) or this is not a valid shrink. + + This method will attempt to do some small amount of work to delete data + that occurs after the end of the nodes. This is useful for cases where + there is some size dependency on the value of a node. + """ + # If the length of the shrink target has changed from under us such that + # the indices are out of bounds, give up on the replacement. + # TODO_BETTER_SHRINK: we probably want to narrow down the root cause here at some point. + if any(node.index >= len(self.nodes) for node in nodes): + return # pragma: no cover + + initial_attempt = replace_all( + self.nodes, + [(node.index, node.index + 1, [node.copy(with_value=n)]) for node in nodes], + ) + + attempt = self.cached_test_function(initial_attempt)[1] + + if attempt is None: + return False + + if attempt is self.shrink_target: + # if the initial shrink was a success, try lowering offsets. + self.lower_common_node_offset() + return True + + # If this produced something completely invalid we ditch it + # here rather than trying to persevere. + if attempt.status is Status.OVERRUN: + return False + + if attempt.status is Status.INVALID: + return False + + if attempt.misaligned_at is not None: + # we're invalid due to a misalignment in the tree. We'll try to fix + # a very specific type of misalignment here: where we have a node of + # {"size": n} and tried to draw the same node, but with {"size": m < n}. + # This can occur with eg + # + # n = data.draw_integer() + # s = data.draw_string(min_size=n) + # + # where we try lowering n, resulting in the test_function drawing a lower + # min_size than our attempt had for the draw_string node. + # + # We'll now try realigning this tree by: + # * replacing the constraints in our attempt with what test_function tried + # to draw in practice + # * truncating the value of that node to match min_size + # + # This helps in the specific case of drawing a value and then drawing + # a collection of that size...and not much else. In practice this + # helps because this antipattern is fairly common. + + # TODO we'll probably want to apply the same trick as in the valid + # case of this function of preserving from the right instead of + # preserving from the left. see test_can_shrink_variable_string_draws. + + (index, attempt_choice_type, attempt_constraints, _attempt_forced) = ( + attempt.misaligned_at + ) + node = self.nodes[index] + if node.type != attempt_choice_type: + return False # pragma: no cover + if node.was_forced: + return False # pragma: no cover + + if node.type in {"string", "bytes"}: + # if the size *increased*, we would have to guess what to pad with + # in order to try fixing up this attempt. Just give up. + if node.constraints["min_size"] <= attempt_constraints["min_size"]: + # attempts which increase min_size tend to overrun rather than + # be misaligned, making a covering case difficult. + return False # pragma: no cover + # the size decreased in our attempt. Try again, but truncate the value + # to that size by removing any elements past min_size. + return self.consider_new_nodes( + initial_attempt[: node.index] + + [ + initial_attempt[node.index].copy( + with_constraints=attempt_constraints, + with_value=initial_attempt[node.index].value[ + : attempt_constraints["min_size"] + ], + ) + ] + + initial_attempt[node.index :] + ) + + lost_nodes = len(self.nodes) - len(attempt.nodes) + if lost_nodes <= 0: + return False + + start = nodes[0].index + end = nodes[-1].index + 1 + # We now look for contiguous regions to delete that might help fix up + # this failed shrink. We only look for contiguous regions of the right + # lengths because doing anything more than that starts to get very + # expensive. See minimize_individual_choices for where we + # try to be more aggressive. + regions_to_delete = {(end, end + lost_nodes)} + + for ex in self.spans: + if ex.start > start: + continue + if ex.end <= end: + continue + + if ex.index >= len(attempt.spans): + continue # pragma: no cover + + replacement = attempt.spans[ex.index] + in_original = [c for c in ex.children if c.start >= end] + in_replaced = [c for c in replacement.children if c.start >= end] + + if len(in_replaced) >= len(in_original) or not in_replaced: + continue + + # We've found a span where some of the children went missing + # as a result of this change, and just replacing it with the data + # it would have had and removing the spillover didn't work. This + # means that some of its children towards the right must be + # important, so we try to arrange it so that it retains its + # rightmost children instead of its leftmost. + regions_to_delete.add( + (in_original[0].start, in_original[-len(in_replaced)].start) + ) + + for u, v in sorted(regions_to_delete, key=lambda x: x[1] - x[0], reverse=True): + try_with_deleted = initial_attempt[:u] + initial_attempt[v:] + if self.consider_new_nodes(try_with_deleted): + return True + + return False + + def remove_discarded(self): + """Try removing all bytes marked as discarded. + + This is primarily to deal with data that has been ignored while + doing rejection sampling - e.g. as a result of an integer range, or a + filtered strategy. + + Such data will also be handled by the adaptive_example_deletion pass, + but that pass is necessarily more conservative and will try deleting + each interval individually. The common case is that all data drawn and + rejected can just be thrown away immediately in one block, so this pass + will be much faster than trying each one individually when it works. + + returns False if there is discarded data and removing it does not work, + otherwise returns True. + """ + while self.shrink_target.has_discards: + discarded = [] + + for ex in self.shrink_target.spans: + if ( + ex.choice_count > 0 + and ex.discarded + and (not discarded or ex.start >= discarded[-1][-1]) + ): + discarded.append((ex.start, ex.end)) + + # This can happen if we have discards but they are all of + # zero length. This shouldn't happen very often so it's + # faster to check for it here than at the point of example + # generation. + if not discarded: + break + + attempt = list(self.nodes) + for u, v in reversed(discarded): + del attempt[u:v] + + if not self.consider_new_nodes(tuple(attempt)): + return False + return True + + @derived_value # type: ignore + def duplicated_nodes(self): + """Returns a list of nodes grouped (choice_type, value).""" + duplicates = defaultdict(list) + for node in self.nodes: + duplicates[(node.type, choice_key(node.value))].append(node) + return list(duplicates.values()) + + def node_program(self, program: str) -> ShrinkPass: + return ShrinkPass( + lambda chooser: self._node_program(chooser, program), + name=f"node_program_{program}", + ) + + def _node_program(self, chooser, program): + n = len(program) + # Adaptively attempt to run the node program at the current + # index. If this successfully applies the node program ``k`` times + # then this runs in ``O(log(k))`` test function calls. + i = chooser.choose(range(len(self.nodes) - n + 1)) + + # First, run the node program at the chosen index. If this fails, + # don't do any extra work, so that failure is as cheap as possible. + if not self.run_node_program(i, program, original=self.shrink_target): + return + + # Because we run in a random order we will often find ourselves in the middle + # of a region where we could run the node program. We thus start by moving + # left to the beginning of that region if possible in order to to start from + # the beginning of that region. + def offset_left(k): + return i - k * n + + i = offset_left( + find_integer( + lambda k: self.run_node_program( + offset_left(k), program, original=self.shrink_target + ) + ) + ) + + original = self.shrink_target + # Now try to run the node program multiple times here. + find_integer( + lambda k: self.run_node_program(i, program, original=original, repeats=k) + ) + + def minimize_duplicated_choices(self, chooser): + """Find choices that have been duplicated in multiple places and attempt + to minimize all of the duplicates simultaneously. + + This lets us handle cases where two values can't be shrunk + independently of each other but can easily be shrunk together. + For example if we had something like: + + ls = data.draw(lists(integers())) + y = data.draw(integers()) + assert y not in ls + + Suppose we drew y = 3 and after shrinking we have ls = [3]. If we were + to replace both 3s with 0, this would be a valid shrink, but if we were + to replace either 3 with 0 on its own the test would start passing. + + It is also useful for when that duplication is accidental and the value + of the choices don't matter very much because it allows us to replace + more values at once. + """ + nodes = chooser.choose(self.duplicated_nodes) + # we can't lower any nodes which are trivial. try proceeding with the + # remaining nodes. + nodes = [node for node in nodes if not node.trivial] + if len(nodes) <= 1: + return + + self.minimize_nodes(nodes) + + def redistribute_numeric_pairs(self, chooser): + """If there is a sum of generated numbers that we need their sum + to exceed some bound, lowering one of them requires raising the + other. This pass enables that.""" + + # look for a pair of nodes (node1, node2) which are both numeric + # and aren't separated by too many other nodes. We'll decrease node1 and + # increase node2 (note that the other way around doesn't make sense as + # it's strictly worse in the ordering). + def can_choose_node(node): + # don't choose nan, inf, or floats above the threshold where f + 1 > f + # (which is not necessarily true for floats above MAX_PRECISE_INTEGER). + # The motivation for the last condition is to avoid trying weird + # non-shrinks where we raise one node and think we lowered another + # (but didn't). + return node.type in {"integer", "float"} and not ( + node.type == "float" + and (math.isnan(node.value) or abs(node.value) >= MAX_PRECISE_INTEGER) + ) + + node1 = chooser.choose( + self.nodes, + lambda node: can_choose_node(node) and not node.trivial, + ) + node2 = chooser.choose( + self.nodes, + lambda node: can_choose_node(node) + # Note that it's fine for node2 to be trivial, because we're going to + # explicitly make it *not* trivial by adding to its value. + and not node.was_forced + # to avoid quadratic behavior, scan ahead only a small amount for + # the related node. + and node1.index < node.index <= node1.index + 4, + ) + + m: int | float = node1.value + n: int | float = node2.value + + def boost(k: int) -> bool: + # floats always shrink towards 0 + shrink_towards = ( + node1.constraints["shrink_towards"] if node1.type == "integer" else 0 + ) + if k > abs(m - shrink_towards): + return False + + # We are trying to move node1 (m) closer to shrink_towards, and node2 + # (n) farther away from shrink_towards. If m is below shrink_towards, + # we want to add to m and subtract from n, and vice versa if above + # shrink_towards. + if m < shrink_towards: + k = -k + + try: + v1 = m - k + v2 = n + k + except OverflowError: # pragma: no cover + # if n or m is a float and k is over sys.float_info.max, coercing + # k to a float will overflow. + return False + + # if we've increased node2 to the point that we're past max precision, + # give up - things have become too unstable. + if node1.type == "float" and abs(v2) >= MAX_PRECISE_INTEGER: + return False + + return self.consider_new_nodes( + self.nodes[: node1.index] + + (node1.copy(with_value=v1),) + + self.nodes[node1.index + 1 : node2.index] + + (node2.copy(with_value=v2),) + + self.nodes[node2.index + 1 :] + ) + + find_integer(boost) + + def lower_integers_together(self, chooser): + node1 = chooser.choose( + self.nodes, lambda n: n.type == "integer" and not n.trivial + ) + # Search up to 3 nodes ahead, to avoid quadratic time. + node2 = self.nodes[ + chooser.choose( + range(node1.index + 1, min(len(self.nodes), node1.index + 3 + 1)), + lambda i: self.nodes[i].type == "integer" + and not self.nodes[i].was_forced, + ) + ] + + # one might expect us to require node2 to be nontrivial, and to minimize + # the node which is closer to its shrink_towards, rather than node1 + # unconditionally. In reality, it's acceptable for us to transition node2 + # from trivial to nontrivial, because the shrink ordering is dominated by + # the complexity of the earlier node1. What matters is minimizing node1. + shrink_towards = node1.constraints["shrink_towards"] + + def consider(n): + return self.consider_new_nodes( + self.nodes[: node1.index] + + (node1.copy(with_value=node1.value - n),) + + self.nodes[node1.index + 1 : node2.index] + + (node2.copy(with_value=node2.value - n),) + + self.nodes[node2.index + 1 :] + ) + + find_integer(lambda n: consider(shrink_towards - n)) + find_integer(lambda n: consider(n - shrink_towards)) + + def lower_duplicated_characters(self, chooser): + """ + Select two string choices no more than 4 choices apart and simultaneously + lower characters which appear in both strings. This helps cases where the + same character must appear in two strings, but the actual value of the + character is not relevant. + + This shrinking pass currently only tries lowering *all* instances of the + duplicated character in both strings. So for instance, given two choices: + + "bbac" + "abbb" + + we would try lowering all five of the b characters simultaneously. This + may fail to shrink some cases where only certain character indices are + correlated, for instance if only the b at index 1 could be lowered + simultaneously and the rest did in fact actually have to be a `b`. + + It would be nice to try shrinking that case as well, but we would need good + safeguards because it could get very expensive to try all combinations. + I expect lowering all duplicates to handle most cases in the meantime. + """ + node1 = chooser.choose( + self.nodes, lambda n: n.type == "string" and not n.trivial + ) + + # limit search to up to 4 choices ahead, to avoid quadratic behavior + node2 = self.nodes[ + chooser.choose( + range(node1.index + 1, min(len(self.nodes), node1.index + 1 + 4)), + lambda i: self.nodes[i].type == "string" and not self.nodes[i].trivial + # select nodes which have at least one of the same character present + and set(node1.value) & set(self.nodes[i].value), + ) + ] + + duplicated_characters = set(node1.value) & set(node2.value) + # deterministic ordering + char = chooser.choose(sorted(duplicated_characters)) + intervals = node1.constraints["intervals"] + + def copy_node(node, n): + # replace all duplicate characters in each string. This might miss + # some shrinks compared to only replacing some, but trying all possible + # combinations of indices could get expensive if done without some + # thought. + return node.copy( + with_value=node.value.replace(char, intervals.char_in_shrink_order(n)) + ) + + Integer.shrink( + intervals.index_from_char_in_shrink_order(char), + lambda n: self.consider_new_nodes( + self.nodes[: node1.index] + + (copy_node(node1, n),) + + self.nodes[node1.index + 1 : node2.index] + + (copy_node(node2, n),) + + self.nodes[node2.index + 1 :] + ), + ) + + def minimize_nodes(self, nodes): + choice_type = nodes[0].type + value = nodes[0].value + # unlike choice_type and value, constraints are *not* guaranteed to be equal among all + # passed nodes. We arbitrarily use the constraints of the first node. I think + # this is unsound (= leads to us trying shrinks that could not have been + # generated), but those get discarded at test-time, and this enables useful + # slips where constraints are not equal but are close enough that doing the + # same operation on both basically just works. + constraints = nodes[0].constraints + assert all( + node.type == choice_type and choice_equal(node.value, value) + for node in nodes + ) + + if choice_type == "integer": + shrink_towards = constraints["shrink_towards"] + # try shrinking from both sides towards shrink_towards. + # we're starting from n = abs(shrink_towards - value). Because the + # shrinker will not check its starting value, we need to try + # shrinking to n first. + self.try_shrinking_nodes(nodes, abs(shrink_towards - value)) + Integer.shrink( + abs(shrink_towards - value), + lambda n: self.try_shrinking_nodes(nodes, shrink_towards + n), + ) + Integer.shrink( + abs(shrink_towards - value), + lambda n: self.try_shrinking_nodes(nodes, shrink_towards - n), + ) + elif choice_type == "float": + self.try_shrinking_nodes(nodes, abs(value)) + Float.shrink( + abs(value), + lambda val: self.try_shrinking_nodes(nodes, val), + ) + Float.shrink( + abs(value), + lambda val: self.try_shrinking_nodes(nodes, -val), + ) + elif choice_type == "boolean": + # must be True, otherwise would be trivial and not selected. + assert value is True + # only one thing to try: false! + self.try_shrinking_nodes(nodes, False) + elif choice_type == "bytes": + Bytes.shrink( + value, + lambda val: self.try_shrinking_nodes(nodes, val), + min_size=constraints["min_size"], + ) + elif choice_type == "string": + String.shrink( + value, + lambda val: self.try_shrinking_nodes(nodes, val), + intervals=constraints["intervals"], + min_size=constraints["min_size"], + ) + else: + raise NotImplementedError + + def try_trivial_spans(self, chooser): + i = chooser.choose(range(len(self.spans))) + + prev = self.shrink_target + nodes = self.shrink_target.nodes + span = self.spans[i] + prefix = nodes[: span.start] + replacement = tuple( + [ + ( + node + if node.was_forced + else node.copy( + with_value=choice_from_index(0, node.type, node.constraints) + ) + ) + for node in nodes[span.start : span.end] + ] + ) + suffix = nodes[span.end :] + attempt = self.cached_test_function(prefix + replacement + suffix)[1] + + if self.shrink_target is not prev: + return + + if isinstance(attempt, ConjectureResult): + new_span = attempt.spans[i] + new_replacement = attempt.nodes[new_span.start : new_span.end] + self.consider_new_nodes(prefix + new_replacement + suffix) + + def minimize_individual_choices(self, chooser): + """Attempt to minimize each choice in sequence. + + This is the pass that ensures that e.g. each integer we draw is a + minimum value. So it's the part that guarantees that if we e.g. do + + x = data.draw(integers()) + assert x < 10 + + then in our shrunk example, x = 10 rather than say 97. + + If we are unsuccessful at minimizing a choice of interest we then + check if that's because it's changing the size of the test case and, + if so, we also make an attempt to delete parts of the test case to + see if that fixes it. + + We handle most of the common cases in try_shrinking_nodes which is + pretty good at clearing out large contiguous blocks of dead space, + but it fails when there is data that has to stay in particular places + in the list. + """ + node = chooser.choose(self.nodes, lambda node: not node.trivial) + initial_target = self.shrink_target + + self.minimize_nodes([node]) + if self.shrink_target is not initial_target: + # the shrink target changed, so our shrink worked. Defer doing + # anything more intelligent until this shrink fails. + return + + # the shrink failed. One particularly common case where minimizing a + # node can fail is the antipattern of drawing a size and then drawing a + # collection of that size, or more generally when there is a size + # dependency on some single node. We'll explicitly try and fix up this + # common case here: if decreasing an integer node by one would reduce + # the size of the generated input, we'll try deleting things after that + # node and see if the resulting attempt works. + + if node.type != "integer": + # Only try this fixup logic on integer draws. Almost all size + # dependencies are on integer draws, and if it's not, it's doing + # something convoluted enough that it is unlikely to shrink well anyway. + # TODO: extent to floats? we probably currently fail on the following, + # albeit convoluted example: + # n = int(data.draw(st.floats())) + # s = data.draw(st.lists(st.integers(), min_size=n, max_size=n)) + return + + lowered = ( + self.nodes[: node.index] + + (node.copy(with_value=node.value - 1),) + + self.nodes[node.index + 1 :] + ) + attempt = self.cached_test_function(lowered)[1] + if ( + attempt is None + or attempt.status < Status.VALID + or len(attempt.nodes) == len(self.nodes) + or len(attempt.nodes) == node.index + 1 + ): + # no point in trying our size-dependency-logic if our attempt at + # lowering the node resulted in: + # * an invalid conjecture data + # * the same number of nodes as before + # * no nodes beyond the lowered node (nothing to try to delete afterwards) + return + + # If it were then the original shrink should have worked and we could + # never have got here. + assert attempt is not self.shrink_target + + @self.cached(node.index) + def first_span_after_node(): + lo = 0 + hi = len(self.spans) + while lo + 1 < hi: + mid = (lo + hi) // 2 + span = self.spans[mid] + if span.start >= node.index: + hi = mid + else: + lo = mid + return hi + + # we try deleting both entire spans, and single nodes. + # If we wanted to get more aggressive, we could try deleting n + # consecutive nodes (that don't cross a span boundary) for say + # n <= 2 or n <= 3. + if chooser.choose([True, False]): + span = self.spans[ + chooser.choose( + range(first_span_after_node, len(self.spans)), + lambda i: self.spans[i].choice_count > 0, + ) + ] + self.consider_new_nodes(lowered[: span.start] + lowered[span.end :]) + else: + node = self.nodes[chooser.choose(range(node.index + 1, len(self.nodes)))] + self.consider_new_nodes(lowered[: node.index] + lowered[node.index + 1 :]) + + def reorder_spans(self, chooser): + """This pass allows us to reorder the children of each span. + + For example, consider the following: + + .. code-block:: python + + import hypothesis.strategies as st + from hypothesis import given + + + @given(st.text(), st.text()) + def test_not_equal(x, y): + assert x != y + + Without the ability to reorder x and y this could fail either with + ``x=""``, ``y="0"``, or the other way around. With reordering it will + reliably fail with ``x=""``, ``y="0"``. + """ + span = chooser.choose(self.spans) + + label = chooser.choose(span.children).label + spans = [c for c in span.children if c.label == label] + if len(spans) <= 1: + return + + endpoints = [(span.start, span.end) for span in spans] + st = self.shrink_target + + Ordering.shrink( + range(len(spans)), + lambda indices: self.consider_new_nodes( + replace_all( + st.nodes, + [ + ( + u, + v, + st.nodes[spans[i].start : spans[i].end], + ) + for (u, v), i in zip(endpoints, indices, strict=True) + ], + ) + ), + key=lambda i: sort_key(st.nodes[spans[i].start : spans[i].end]), + ) + + def run_node_program(self, i, program, original, repeats=1): + """Node programs are a mini-DSL for node rewriting, defined as a sequence + of commands that can be run at some index into the nodes + + Commands are: + + * "X", delete this node + + This method runs the node program in ``program`` at node index + ``i`` on the ConjectureData ``original``. If ``repeats > 1`` then it + will attempt to approximate the results of running it that many times. + + Returns True if this successfully changes the underlying shrink target, + else False. + """ + if i + len(program) > len(original.nodes) or i < 0: + return False + attempt = list(original.nodes) + for _ in range(repeats): + for k, command in reversed(list(enumerate(program))): + j = i + k + if j >= len(attempt): + return False + + if command == "X": + del attempt[j] + else: + raise NotImplementedError(f"Unrecognised command {command!r}") + + return self.consider_new_nodes(attempt) diff --git a/vendored/hypothesis/internal/conjecture/shrinking/__init__.py b/vendored/hypothesis/internal/conjecture/shrinking/__init__.py new file mode 100644 index 0000000..0e12c67 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/__init__.py @@ -0,0 +1,18 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.conjecture.shrinking.bytes import Bytes +from hypothesis.internal.conjecture.shrinking.collection import Collection +from hypothesis.internal.conjecture.shrinking.floats import Float +from hypothesis.internal.conjecture.shrinking.integer import Integer +from hypothesis.internal.conjecture.shrinking.ordering import Ordering +from hypothesis.internal.conjecture.shrinking.string import String + +__all__ = ["Bytes", "Collection", "Float", "Integer", "Ordering", "String"] diff --git a/vendored/hypothesis/internal/conjecture/shrinking/bytes.py b/vendored/hypothesis/internal/conjecture/shrinking/bytes.py new file mode 100644 index 0000000..7fbc26f --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/bytes.py @@ -0,0 +1,23 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.conjecture.shrinking.collection import Collection +from hypothesis.internal.conjecture.shrinking.integer import Integer + + +class Bytes(Collection): + def __init__(self, initial, predicate, **kwargs): + super().__init__( + # implicit conversion from bytes to list of integers here + list(initial), + lambda val: predicate(bytes(val)), + ElementShrinker=Integer, + **kwargs, + ) diff --git a/vendored/hypothesis/internal/conjecture/shrinking/choicetree.py b/vendored/hypothesis/internal/conjecture/shrinking/choicetree.py new file mode 100644 index 0000000..7fd60bc --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/choicetree.py @@ -0,0 +1,161 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections import defaultdict +from collections.abc import Callable, Iterable, Sequence +from random import Random + +from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy + + +def prefix_selection_order( + prefix: Sequence[int], +) -> Callable[[int, int], Iterable[int]]: + """Select choices starting from ``prefix```, + preferring to move left then wrapping around + to the right.""" + + def selection_order(depth: int, n: int) -> Iterable[int]: + if depth < len(prefix): + i = prefix[depth] + if i >= n: + i = n - 1 + yield from range(i, -1, -1) + yield from range(n - 1, i, -1) + else: + yield from range(n - 1, -1, -1) + + return selection_order + + +def random_selection_order(random: Random) -> Callable[[int, int], Iterable[int]]: + """Select choices uniformly at random.""" + + def selection_order(depth: int, n: int) -> Iterable[int]: + pending = LazySequenceCopy(range(n)) + while pending: + i = random.randrange(0, len(pending)) + yield pending.pop(i) + + return selection_order + + +class Chooser: + """A source of nondeterminism for use in shrink passes.""" + + def __init__( + self, + tree: "ChoiceTree", + selection_order: Callable[[int, int], Iterable[int]], + ): + self.__selection_order = selection_order + self.__node_trail = [tree.root] + self.__choices: list[int] = [] + self.__finished = False + + def choose( + self, + values: Sequence[int], + condition: Callable[[int], bool] = lambda x: True, + ) -> int: + """Return some element of values satisfying the condition + that will not lead to an exhausted branch, or raise DeadBranch + if no such element exist". + """ + assert not self.__finished + node = self.__node_trail[-1] + if node.live_child_count is None: + node.live_child_count = len(values) + node.n = len(values) + + assert node.live_child_count > 0 or len(values) == 0 + + for i in self.__selection_order(len(self.__choices), len(values)): + if node.live_child_count == 0: + break + if not node.children[i].exhausted: + v = values[i] + if condition(v): + self.__choices.append(i) + self.__node_trail.append(node.children[i]) + return v + else: + node.children[i] = DeadNode + node.live_child_count -= 1 + assert node.live_child_count == 0 + raise DeadBranch + + def finish(self) -> Sequence[int]: + """Record the decisions made in the underlying tree and return + a prefix that can be used for the next Chooser to be used.""" + self.__finished = True + assert len(self.__node_trail) == len(self.__choices) + 1 + + result = tuple(self.__choices) + + self.__node_trail[-1].live_child_count = 0 + while len(self.__node_trail) > 1 and self.__node_trail[-1].exhausted: + self.__node_trail.pop() + assert len(self.__node_trail) == len(self.__choices) + i = self.__choices.pop() + target = self.__node_trail[-1] + target.children[i] = DeadNode + assert target.live_child_count is not None + target.live_child_count -= 1 + + return result + + +class ChoiceTree: + """Records sequences of choices made during shrinking so that we + can track what parts of a pass has run. Used to create Chooser + objects that are the main interface that a pass uses to make + decisions about what to do. + """ + + def __init__(self) -> None: + self.root = TreeNode() + + @property + def exhausted(self) -> bool: + return self.root.exhausted + + def step( + self, + selection_order: Callable[[int, int], Iterable[int]], + f: Callable[[Chooser], None], + ) -> Sequence[int]: + assert not self.exhausted + + chooser = Chooser(self, selection_order) + try: + f(chooser) + except DeadBranch: + pass + return chooser.finish() + + +class TreeNode: + def __init__(self) -> None: + self.children: dict[int, TreeNode] = defaultdict(TreeNode) + self.live_child_count: int | None = None + self.n: int | None = None + + @property + def exhausted(self) -> bool: + return self.live_child_count == 0 + + +DeadNode = TreeNode() +DeadNode.live_child_count = 0 + + +class DeadBranch(Exception): + pass diff --git a/vendored/hypothesis/internal/conjecture/shrinking/collection.py b/vendored/hypothesis/internal/conjecture/shrinking/collection.py new file mode 100644 index 0000000..cd51eed --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/collection.py @@ -0,0 +1,82 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections import Counter + +from hypothesis.internal.conjecture.shrinking.common import Shrinker +from hypothesis.internal.conjecture.shrinking.ordering import Ordering +from hypothesis.internal.conjecture.utils import identity + + +class Collection(Shrinker): + def setup( + self, *, ElementShrinker, min_size, to_order=identity, from_order=identity + ): + self.ElementShrinker = ElementShrinker + self.to_order = to_order + self.from_order = from_order + self.min_size = min_size + + def make_immutable(self, value): + return tuple(value) + + def short_circuit(self): + zero = self.from_order(0) + return self.consider([zero] * self.min_size) + + def left_is_better(self, left, right): + if len(left) < len(right): + return True + + # examine elements one by one from the left until an element differs. + for v1, v2 in zip(left, right, strict=False): + if self.to_order(v1) == self.to_order(v2): + continue + return self.to_order(v1) < self.to_order(v2) + + # equal length and all values were equal by our ordering, so must be equal + # by our ordering. + assert list(map(self.to_order, left)) == list(map(self.to_order, right)) + return False + + def run_step(self): + # try all-zero first; we already considered all-zero-and-smallest in + # short_circuit. + zero = self.from_order(0) + self.consider([zero] * len(self.current)) + + # try deleting each element in turn, starting from the back + # TODO_BETTER_SHRINK: adaptively delete here by deleting larger chunks at once + # if early deletes succeed. use find_integer. turns O(n) into O(log(n)) + for i in reversed(range(len(self.current))): + self.consider(self.current[:i] + self.current[i + 1 :]) + + # then try reordering + Ordering.shrink(self.current, self.consider, key=self.to_order) + + # then try minimizing all duplicated elements together simultaneously. This + # helps in cases like https://github.com/HypothesisWorks/hypothesis/issues/4286 + duplicated = {val for val, count in Counter(self.current).items() if count > 1} + for val in duplicated: + self.ElementShrinker.shrink( + self.to_order(val), + lambda v: self.consider( + tuple(self.from_order(v) if x == val else x for x in self.current) + ), + ) + + # then try minimizing each element in turn + for i, val in enumerate(self.current): + self.ElementShrinker.shrink( + self.to_order(val), + lambda v: self.consider( + self.current[:i] + (self.from_order(v),) + self.current[i + 1 :] + ), + ) diff --git a/vendored/hypothesis/internal/conjecture/shrinking/common.py b/vendored/hypothesis/internal/conjecture/shrinking/common.py new file mode 100644 index 0000000..8290ec6 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/common.py @@ -0,0 +1,180 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""This module implements various useful common functions for shrinking tasks.""" + + +class Shrinker: + """A Shrinker object manages a single value and a predicate it should + satisfy, and attempts to improve it in some direction, making it smaller + and simpler.""" + + def __init__( + self, + initial, + predicate, + *, + full=False, + debug=False, + name=None, + **kwargs, + ): + self.setup(**kwargs) + self.current = self.make_immutable(initial) + self.initial = self.current + self.full = full + self.changes = 0 + self.name = name + + self.__predicate = predicate + self.__seen = {self.make_canonical(self.current)} + self.debugging_enabled = debug + + @property + def calls(self) -> int: + return len(self.__seen) + + def __repr__(self) -> str: + return "{}({}initial={!r}, current={!r})".format( + type(self).__name__, + "" if self.name is None else f"{self.name!r}, ", + self.initial, + self.current, + ) + + def setup(self, **kwargs): + """Runs initial setup code. + + Convenience function for children that doesn't require messing + with the signature of init. + """ + + def delegate(self, other_class, convert_to, convert_from, **kwargs): + """Delegates shrinking to another shrinker class, by converting the + current value to and from it with provided functions.""" + self.call_shrinker( + other_class, + convert_to(self.current), + lambda v: self.consider(convert_from(v)), + **kwargs, + ) + + def call_shrinker(self, other_class, initial, predicate, **kwargs): + """Calls another shrinker class, passing through the relevant context + variables. + + Note we explicitly do not pass through full. + """ + + return other_class.shrink(initial, predicate, **kwargs) + + def debug(self, *args: object) -> None: + if self.debugging_enabled: + print("DEBUG", self, *args) + + @classmethod + def shrink(cls, initial, predicate, **kwargs): + """Shrink the value ``initial`` subject to the constraint that it + satisfies ``predicate``. + + Returns the shrunk value. + """ + shrinker = cls(initial, predicate, **kwargs) + shrinker.run() + return shrinker.current + + def run(self): + """Run for an appropriate number of steps to improve the current value. + + If self.full is True, will run until no further improvements can + be found. + """ + if self.short_circuit(): + return + if self.full: + prev = -1 + while self.changes != prev: + prev = self.changes + self.run_step() + else: + self.run_step() + self.debug("COMPLETE") + + def consider(self, value): + """Try using ``value`` as a possible candidate improvement. + + Return True if self.current is canonically equal to value after the call, either because + the value was incorporated as an improvement or because it had that value already. + """ + value = self.make_immutable(value) + self.debug(f"considering {value!r}") + canonical = self.make_canonical(value) + if canonical == self.make_canonical(self.current): + return True + if canonical in self.__seen: + return False + self.__seen.add(canonical) + self.check_invariants(value) + if not self.left_is_better(value, self.current): + self.debug(f"Rejected {value!r} as no better than {self.current=}") + return False + if self.__predicate(value): + self.debug(f"shrinking to {value!r}") + self.changes += 1 + self.current = value + return True + else: + self.debug(f"Rejected {value!r} not satisfying predicate") + return False + + def make_canonical(self, value): + """Convert immutable value into a canonical and hashable, but not necessarily equal, + representation of itself. + + This representation is used only for tracking already-seen values, not passed to the + shrinker. + + Defaults to just returning the (immutable) input value. + """ + return value + + def make_immutable(self, value): + """Convert value into an immutable representation of itself. + + It is these immutable versions that the shrinker will work on. + + Defaults to just returning the value. + """ + return value + + def check_invariants(self, value): + """Make appropriate assertions about the value to ensure that it is + valid for this shrinker. + + Does nothing by default. + """ + + def short_circuit(self): + """Possibly attempt to do some shrinking. + + If this returns True, the ``run`` method will terminate early + without doing any more work. + """ + return False + + def left_is_better(self, left, right): + """Returns True if the left is strictly simpler than the right + according to the standards of this shrinker.""" + raise NotImplementedError + + def run_step(self): + """Run a single step of the main shrink loop, attempting to improve the + current value.""" + raise NotImplementedError diff --git a/vendored/hypothesis/internal/conjecture/shrinking/floats.py b/vendored/hypothesis/internal/conjecture/shrinking/floats.py new file mode 100644 index 0000000..f55d3dd --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/floats.py @@ -0,0 +1,93 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +import sys + +from hypothesis.internal.conjecture.floats import float_to_lex +from hypothesis.internal.conjecture.shrinking.common import Shrinker +from hypothesis.internal.conjecture.shrinking.integer import Integer +from hypothesis.internal.floats import MAX_PRECISE_INTEGER, float_to_int + + +class Float(Shrinker): + def setup(self): + self.debugging_enabled = True + + def make_canonical(self, f): + if math.isnan(f): + # Distinguish different NaN bit patterns, while making each equal to itself. + # Wrap in tuple to avoid potential collision with (huge) finite floats. + return ("nan", float_to_int(f)) + return f + + def check_invariants(self, value): + # We only handle positive floats (including NaN) because we encode the sign + # separately anyway. + assert not (value < 0) + + def left_is_better(self, left, right): + lex1 = float_to_lex(left) + lex2 = float_to_lex(right) + return lex1 < lex2 + + def short_circuit(self): + # We check for a bunch of standard "large" floats. If we're currently + # worse than them and the shrink downwards doesn't help, abort early + # because there's not much useful we can do here. + + for g in [sys.float_info.max, math.inf, math.nan]: + self.consider(g) + + # If we're stuck at a nasty float don't try to shrink it further. + if not math.isfinite(self.current): + return True + + def run_step(self): + # above MAX_PRECISE_INTEGER, all floats are integers. Shrink like one. + # TODO_BETTER_SHRINK: at 2 * MAX_PRECISE_INTEGER, n - 1 == n - 2, and + # Integer.shrink will likely perform badly. We should have a specialized + # big-float shrinker, which mostly follows Integer.shrink but replaces + # n - 1 with next_down(n). + if self.current > MAX_PRECISE_INTEGER: + self.delegate(Integer, convert_to=int, convert_from=float) + return + + # Finally we get to the important bit: Each of these is a small change + # to the floating point number that corresponds to a large change in + # the lexical representation. Trying these ensures that our floating + # point shrink can always move past these obstacles. In particular it + # ensures we can always move to integer boundaries and shrink past a + # change that would require shifting the exponent while not changing + # the float value much. + + # First, try dropping precision bits by rounding the scaled value. We + # try values ordered from least-precise (integer) to more precise, ie. + # approximate lexicographical order. Once we find an acceptable shrink, + # self.consider discards the remaining attempts early and skips test + # invocation. The loop count sets max fractional bits to keep, and is a + # compromise between completeness and performance. + + for p in range(10): + scaled = self.current * 2**p # note: self.current may change in loop + for truncate in [math.floor, math.ceil]: + self.consider(truncate(scaled) / 2**p) + + if self.consider(int(self.current)): + self.debug("Just an integer now") + self.delegate(Integer, convert_to=int, convert_from=float) + return + + # Now try to minimize the top part of the fraction as an integer. This + # basically splits the float as k + x with 0 <= x < 1 and minimizes + # k as an integer, but without the precision issues that would have. + m, n = self.current.as_integer_ratio() + i, r = divmod(m, n) + self.call_shrinker(Integer, i, lambda k: self.consider((k * n + r) / n)) diff --git a/vendored/hypothesis/internal/conjecture/shrinking/integer.py b/vendored/hypothesis/internal/conjecture/shrinking/integer.py new file mode 100644 index 0000000..815d6e5 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/integer.py @@ -0,0 +1,75 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.conjecture.junkdrawer import find_integer +from hypothesis.internal.conjecture.shrinking.common import Shrinker + +""" +This module implements a shrinker for non-negative integers. +""" + + +class Integer(Shrinker): + """Attempts to find a smaller integer. Guaranteed things to try ``0``, + + ``1``, ``initial - 1``, ``initial - 2``. Plenty of optimisations beyond + that but those are the guaranteed ones. + """ + + def short_circuit(self): + for i in range(2): + if self.consider(i): + return True + self.mask_high_bits() + if self.size > 8: + # see if we can squeeze the integer into a single byte. + self.consider(self.current >> (self.size - 8)) + self.consider(self.current & 0xFF) + return self.current == 2 + + def check_invariants(self, value): + assert value >= 0 + + def left_is_better(self, left, right): + return left < right + + def run_step(self): + self.shift_right() + self.shrink_by_multiples(2) + self.shrink_by_multiples(1) + + def shift_right(self): + base = self.current + find_integer(lambda k: k <= self.size and self.consider(base >> k)) + + def mask_high_bits(self): + base = self.current + n = base.bit_length() + + @find_integer + def try_mask(k): + if k >= n: + return False + mask = (1 << (n - k)) - 1 + return self.consider(mask & base) + + @property + def size(self) -> int: + return self.current.bit_length() + + def shrink_by_multiples(self, k): + base = self.current + + @find_integer + def shrunk(n): + attempt = base - n * k + return attempt >= 0 and self.consider(attempt) + + return shrunk > 0 diff --git a/vendored/hypothesis/internal/conjecture/shrinking/ordering.py b/vendored/hypothesis/internal/conjecture/shrinking/ordering.py new file mode 100644 index 0000000..4e48bdf --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/ordering.py @@ -0,0 +1,96 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.conjecture.junkdrawer import find_integer +from hypothesis.internal.conjecture.shrinking.common import Shrinker +from hypothesis.internal.conjecture.utils import identity + + +class Ordering(Shrinker): + """A shrinker that tries to make a sequence more sorted. + + Will not change the length or the contents, only tries to reorder + the elements of the sequence. + """ + + def setup(self, key=identity): + self.key = key + + def make_immutable(self, value): + return tuple(value) + + def short_circuit(self): + # If we can flat out sort the target then there's nothing more to do. + return self.consider(sorted(self.current, key=self.key)) + + def left_is_better(self, left, right): + return tuple(map(self.key, left)) < tuple(map(self.key, right)) + + def check_invariants(self, value): + assert len(value) == len(self.current) + assert sorted(value) == sorted(self.current) + + def run_step(self): + self.sort_regions() + self.sort_regions_with_gaps() + + def sort_regions(self): + """Guarantees that for each i we have tried to swap index i with + index i + 1. + + This uses an adaptive algorithm that works by sorting contiguous + regions starting from each element. + """ + i = 0 + while i + 1 < len(self.current): + prefix = list(self.current[:i]) + k = find_integer( + lambda k: i + k <= len(self.current) + and self.consider( + prefix + + sorted(self.current[i : i + k], key=self.key) + + list(self.current[i + k :]) + ) + ) + i += k + + def sort_regions_with_gaps(self): + """Guarantees that for each i we have tried to swap index i with + index i + 2. + + This uses an adaptive algorithm that works by sorting contiguous + regions centered on each element, where that element is treated as + fixed and the elements around it are sorted.. + """ + for i in range(1, len(self.current) - 1): + if self.current[i - 1] <= self.current[i] <= self.current[i + 1]: + # The `continue` line is optimised out of the bytecode on + # CPython >= 3.7 (https://bugs.python.org/issue2506) and on + # PyPy, and so coverage cannot tell that it has been taken. + continue # pragma: no cover + + def can_sort(a, b): + if a < 0 or b > len(self.current): + return False + assert a <= i < b + split = i - a + values = sorted(self.current[a:i] + self.current[i + 1 : b]) + return self.consider( + list(self.current[:a]) + + values[:split] + + [self.current[i]] + + values[split:] + + list(self.current[b:]) + ) + + left = i + right = i + 1 + right += find_integer(lambda k: can_sort(left, right + k)) + find_integer(lambda k: can_sort(left - k, right)) diff --git a/vendored/hypothesis/internal/conjecture/shrinking/string.py b/vendored/hypothesis/internal/conjecture/shrinking/string.py new file mode 100644 index 0000000..bbb8252 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/shrinking/string.py @@ -0,0 +1,24 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.conjecture.shrinking.collection import Collection +from hypothesis.internal.conjecture.shrinking.integer import Integer + + +class String(Collection): + def __init__(self, initial, predicate, *, intervals, **kwargs): + super().__init__( + list(initial), + lambda val: predicate("".join(val)), + to_order=intervals.index_from_char_in_shrink_order, + from_order=intervals.char_in_shrink_order, + ElementShrinker=Integer, + **kwargs, + ) diff --git a/vendored/hypothesis/internal/conjecture/utils.py b/vendored/hypothesis/internal/conjecture/utils.py new file mode 100644 index 0000000..d27df88 --- /dev/null +++ b/vendored/hypothesis/internal/conjecture/utils.py @@ -0,0 +1,397 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import enum +import hashlib +import heapq +import math +import sys +from collections import OrderedDict, abc +from collections.abc import Callable, Sequence +from functools import lru_cache +from types import FunctionType +from typing import TYPE_CHECKING, TypeVar + +from hypothesis.errors import InvalidArgument +from hypothesis.internal.compat import int_from_bytes +from hypothesis.internal.floats import next_up +from hypothesis.internal.lambda_sources import _function_key + +if TYPE_CHECKING: + from hypothesis.internal.conjecture.data import ConjectureData + + +LABEL_MASK = 2**64 - 1 + + +def calc_label_from_name(name: str) -> int: + hashed = hashlib.sha384(name.encode()).digest() + return int_from_bytes(hashed[:8]) + + +def calc_label_from_callable(f: Callable) -> int: + if isinstance(f, FunctionType): + return calc_label_from_hash(_function_key(f, ignore_name=True)) + elif isinstance(f, type): + return calc_label_from_cls(f) + else: + # probably an instance defining __call__ + try: + return calc_label_from_hash(f) + except Exception: + # not hashable + return calc_label_from_cls(type(f)) + + +def calc_label_from_cls(cls: type) -> int: + return calc_label_from_name(cls.__qualname__) + + +def calc_label_from_hash(obj: object) -> int: + return calc_label_from_name(str(hash(obj))) + + +def combine_labels(*labels: int) -> int: + label = 0 + for l in labels: + label = (label << 1) & LABEL_MASK + label ^= l + return label + + +SAMPLE_IN_SAMPLER_LABEL = calc_label_from_name("a sample() in Sampler") +ONE_FROM_MANY_LABEL = calc_label_from_name("one more from many()") + + +T = TypeVar("T") + + +def identity(v: T) -> T: + return v + + +def check_sample( + values: type[enum.Enum] | Sequence[T], strategy_name: str +) -> Sequence[T]: + if "numpy" in sys.modules and isinstance(values, sys.modules["numpy"].ndarray): + if values.ndim != 1: + raise InvalidArgument( + "Only one-dimensional arrays are supported for sampling, " + f"and the given value has {values.ndim} dimensions (shape " + f"{values.shape}). This array would give samples of array slices " + "instead of elements! Use np.ravel(values) to convert " + "to a one-dimensional array, or tuple(values) if you " + "want to sample slices." + ) + elif not isinstance(values, (OrderedDict, abc.Sequence, enum.EnumMeta)): + raise InvalidArgument( + f"Cannot sample from {values!r} because it is not an ordered collection. " + f"Hypothesis goes to some length to ensure that the {strategy_name} " + "strategy has stable results between runs. To replay a saved " + "example, the sampled values must have the same iteration order " + "on every run - ruling out sets, dicts, etc due to hash " + "randomization. Most cases can simply use `sorted(values)`, but " + "mixed types or special values such as math.nan require careful " + "handling - and note that when simplifying an example, " + "Hypothesis treats earlier values as simpler." + ) + if isinstance(values, range): + # Pyright is unhappy with every way I've tried to type-annotate this + # function, so fine, we'll just ignore the analysis error. + return values # type: ignore + return tuple(values) + + +@lru_cache(64) +def compute_sampler_table(weights: tuple[float, ...]) -> list[tuple[int, int, float]]: + n = len(weights) + table: list[list[int | float | None]] = [[i, None, None] for i in range(n)] + total = sum(weights) + num_type = type(total) + + zero = num_type(0) # type: ignore + one = num_type(1) # type: ignore + + small: list[int] = [] + large: list[int] = [] + + probabilities = [w / total for w in weights] + scaled_probabilities: list[float] = [] + + for i, alternate_chance in enumerate(probabilities): + scaled = alternate_chance * n + scaled_probabilities.append(scaled) + if scaled == 1: + table[i][2] = zero + elif scaled < 1: + small.append(i) + else: + large.append(i) + heapq.heapify(small) + heapq.heapify(large) + + while small and large: + lo = heapq.heappop(small) + hi = heapq.heappop(large) + + assert lo != hi + assert scaled_probabilities[hi] > one + assert table[lo][1] is None + table[lo][1] = hi + table[lo][2] = one - scaled_probabilities[lo] + scaled_probabilities[hi] = ( + scaled_probabilities[hi] + scaled_probabilities[lo] + ) - one + + if scaled_probabilities[hi] < 1: + heapq.heappush(small, hi) + elif scaled_probabilities[hi] == 1: + table[hi][2] = zero + else: + heapq.heappush(large, hi) + while large: + table[large.pop()][2] = zero + while small: + table[small.pop()][2] = zero + + new_table: list[tuple[int, int, float]] = [] + for base, alternate, alternate_chance in table: + assert isinstance(base, int) + assert isinstance(alternate, int) or alternate is None + assert alternate_chance is not None + if alternate is None: + new_table.append((base, base, alternate_chance)) + elif alternate < base: + new_table.append((alternate, base, one - alternate_chance)) + else: + new_table.append((base, alternate, alternate_chance)) + new_table.sort() + return new_table + + +class Sampler: + """Sampler based on Vose's algorithm for the alias method. See + http://www.keithschwarz.com/darts-dice-coins/ for a good explanation. + + The general idea is that we store a table of triples (base, alternate, p). + base. We then pick a triple uniformly at random, and choose its alternate + value with probability p and else choose its base value. The triples are + chosen so that the resulting mixture has the right distribution. + + We maintain the following invariants to try to produce good shrinks: + + 1. The table is in lexicographic (base, alternate) order, so that choosing + an earlier value in the list always lowers (or at least leaves + unchanged) the value. + 2. base[i] < alternate[i], so that shrinking the draw always results in + shrinking the chosen element. + """ + + table: list[tuple[int, int, float]] # (base_idx, alt_idx, alt_chance) + + def __init__(self, weights: Sequence[float], *, observe: bool = True): + self.observe = observe + self.table = compute_sampler_table(tuple(weights)) + + def sample( + self, + data: "ConjectureData", + *, + forced: int | None = None, + ) -> int: + if self.observe: + data.start_span(SAMPLE_IN_SAMPLER_LABEL) + forced_choice = ( # pragma: no branch # https://github.com/nedbat/coveragepy/issues/1617 + None + if forced is None + else next( + (base, alternate, alternate_chance) + for (base, alternate, alternate_chance) in self.table + if forced == base or (forced == alternate and alternate_chance > 0) + ) + ) + base, alternate, alternate_chance = data.choice( + self.table, + forced=forced_choice, + observe=self.observe, + ) + forced_use_alternate = None + if forced is not None: + # we maintain this invariant when picking forced_choice above. + # This song and dance about alternate_chance > 0 is to avoid forcing + # e.g. draw_boolean(p=0, forced=True), which is an error. + forced_use_alternate = forced == alternate and alternate_chance > 0 + assert forced == base or forced_use_alternate + + use_alternate = data.draw_boolean( + alternate_chance, + forced=forced_use_alternate, + observe=self.observe, + ) + if self.observe: + data.stop_span() + if use_alternate: + assert forced is None or alternate == forced, (forced, alternate) + return alternate + else: + assert forced is None or base == forced, (forced, base) + return base + + +INT_SIZES = (8, 16, 32, 64, 128) +INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False) + + +class many: + """Utility class for collections. Bundles up the logic we use for "should I + keep drawing more values?" and handles starting and stopping examples in + the right place. + + Intended usage is something like: + + elements = many(data, ...) + while elements.more(): + add_stuff_to_result() + """ + + def __init__( + self, + data: "ConjectureData", + min_size: int, + max_size: int | float, + average_size: int | float, + *, + forced: int | None = None, + observe: bool = True, + ) -> None: + assert 0 <= min_size <= average_size <= max_size + assert forced is None or min_size <= forced <= max_size + self.min_size = min_size + self.max_size = max_size + self.data = data + self.forced_size = forced + self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size) + self.count = 0 + self.rejections = 0 + self.drawn = False + self.force_stop = False + self.rejected = False + self.observe = observe + + def stop_span(self): + if self.observe: + self.data.stop_span() + + def start_span(self, label): + if self.observe: + self.data.start_span(label) + + def more(self) -> bool: + """Should I draw another element to add to the collection?""" + if self.drawn: + self.stop_span() + + self.drawn = True + self.rejected = False + + self.start_span(ONE_FROM_MANY_LABEL) + if self.min_size == self.max_size: + # if we have to hit an exact size, draw unconditionally until that + # point, and no further. + should_continue = self.count < self.min_size + else: + forced_result = None + if self.force_stop: + # if our size is forced, we can't reject in a way that would + # cause us to differ from the forced size. + assert self.forced_size is None or self.count == self.forced_size + forced_result = False + elif self.count < self.min_size: + forced_result = True + elif self.count >= self.max_size: + forced_result = False + elif self.forced_size is not None: + forced_result = self.count < self.forced_size + should_continue = self.data.draw_boolean( + self.p_continue, + forced=forced_result, + observe=self.observe, + ) + + if should_continue: + self.count += 1 + return True + else: + self.stop_span() + return False + + def reject(self, why: str | None = None) -> None: + """Reject the last example (i.e. don't count it towards our budget of + elements because it's not going to go in the final collection).""" + assert self.count > 0 + self.count -= 1 + self.rejections += 1 + self.rejected = True + # We set a minimum number of rejections before we give up to avoid + # failing too fast when we reject the first draw. + if self.rejections > max(3, 2 * self.count): + if self.count < self.min_size: + self.data.mark_invalid(why) + else: + self.force_stop = True + + +SMALLEST_POSITIVE_FLOAT: float = next_up(0.0) or sys.float_info.min + + +@lru_cache +def _calc_p_continue(desired_avg: float, max_size: int | float) -> float: + """Return the p_continue which will generate the desired average size.""" + assert desired_avg <= max_size, (desired_avg, max_size) + if desired_avg == max_size: + return 1.0 + p_continue = 1 - 1.0 / (1 + desired_avg) + if p_continue == 0 or max_size == math.inf: + assert 0 <= p_continue < 1, p_continue + return p_continue + assert 0 < p_continue < 1, p_continue + # For small max_size, the infinite-series p_continue is a poor approximation, + # and while we can't solve the polynomial a few rounds of iteration quickly + # gets us a good approximate solution in almost all cases (sometimes exact!). + while _p_continue_to_avg(p_continue, max_size) > desired_avg: + # This is impossible over the reals, but *can* happen with floats. + p_continue -= 0.0001 + # If we've reached zero or gone negative, we want to break out of this loop, + # and do so even if we're on a system with the unsafe denormals-are-zero flag. + # We make that an explicit error in st.floats(), but here we'd prefer to + # just get somewhat worse precision on collection lengths. + if p_continue < SMALLEST_POSITIVE_FLOAT: + p_continue = SMALLEST_POSITIVE_FLOAT + break + # Let's binary-search our way to a better estimate! We tried fancier options + # like gradient descent, but this is numerically stable and works better. + hi = 1.0 + while desired_avg - _p_continue_to_avg(p_continue, max_size) > 0.01: + assert 0 < p_continue < hi, (p_continue, hi) + mid = (p_continue + hi) / 2 + if _p_continue_to_avg(mid, max_size) <= desired_avg: + p_continue = mid + else: + hi = mid + assert 0 < p_continue < 1, p_continue + assert _p_continue_to_avg(p_continue, max_size) <= desired_avg + return p_continue + + +def _p_continue_to_avg(p_continue: float, max_size: int | float) -> float: + """Return the average_size generated by this p_continue and max_size.""" + if p_continue >= 1: + return max_size + return (1.0 / (1 - p_continue) - 1) * (1 - p_continue**max_size) diff --git a/vendored/hypothesis/internal/constants_ast.py b/vendored/hypothesis/internal/constants_ast.py new file mode 100644 index 0000000..bcc1959 --- /dev/null +++ b/vendored/hypothesis/internal/constants_ast.py @@ -0,0 +1,274 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import ast +import hashlib +import inspect +import math +import sys +from ast import Constant, Expr, NodeVisitor, UnaryOp, USub +from collections.abc import Iterator, MutableSet +from functools import lru_cache +from itertools import chain +from pathlib import Path +from types import ModuleType +from typing import TypeAlias + +import hypothesis +from hypothesis.configuration import storage_directory +from hypothesis.internal.conjecture.choice import ChoiceTypeT +from hypothesis.internal.escalation import is_hypothesis_file + +ConstantT: TypeAlias = int | float | bytes | str + +# unfortunate collision with builtin. I don't want to name the init arg bytes_. +bytesT = bytes + + +class Constants: + def __init__( + self, + *, + integers: MutableSet[int] | None = None, + floats: MutableSet[float] | None = None, + bytes: MutableSet[bytes] | None = None, + strings: MutableSet[str] | None = None, + ): + self.integers: MutableSet[int] = set() if integers is None else integers + self.floats: MutableSet[float] = set() if floats is None else floats + self.bytes: MutableSet[bytesT] = set() if bytes is None else bytes + self.strings: MutableSet[str] = set() if strings is None else strings + + def set_for_type( + self, constant_type: type[ConstantT] | ChoiceTypeT + ) -> MutableSet[int] | MutableSet[float] | MutableSet[bytes] | MutableSet[str]: + if constant_type is int or constant_type == "integer": + return self.integers + elif constant_type is float or constant_type == "float": + return self.floats + elif constant_type is bytes or constant_type == "bytes": + return self.bytes + elif constant_type is str or constant_type == "string": + return self.strings + raise ValueError(f"unknown constant_type {constant_type}") + + def add(self, constant: ConstantT) -> None: + self.set_for_type(type(constant)).add(constant) # type: ignore + + def __contains__(self, constant: ConstantT) -> bool: + return constant in self.set_for_type(type(constant)) + + def __or__(self, other: "Constants") -> "Constants": + return Constants( + integers=self.integers | other.integers, # type: ignore + floats=self.floats | other.floats, # type: ignore + bytes=self.bytes | other.bytes, # type: ignore + strings=self.strings | other.strings, # type: ignore + ) + + def __iter__(self) -> Iterator[ConstantT]: + return iter(chain(self.integers, self.floats, self.bytes, self.strings)) + + def __len__(self) -> int: + return ( + len(self.integers) + len(self.floats) + len(self.bytes) + len(self.strings) + ) + + def __repr__(self) -> str: + return f"Constants({self.integers=}, {self.floats=}, {self.bytes=}, {self.strings=})" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Constants): + return False + return ( + self.integers == other.integers + and self.floats == other.floats + and self.bytes == other.bytes + and self.strings == other.strings + ) + + +class TooManyConstants(Exception): + # a control flow exception which we raise in ConstantsVisitor when the + # number of constants in a module gets too large. + pass + + +class ConstantVisitor(NodeVisitor): + CONSTANTS_LIMIT: int = 1024 + + def __init__(self, *, limit: bool): + super().__init__() + self.constants = Constants() + self.limit = limit + + def _add_constant(self, value: object) -> None: + if self.limit and len(self.constants) >= self.CONSTANTS_LIMIT: + raise TooManyConstants + + if isinstance(value, str) and ( + value.isspace() + or value == "" + # long strings are unlikely to be useful. + or len(value) > 20 + ): + return + if isinstance(value, bytes) and ( + value == b"" + # long bytes seem plausibly more likely to be useful than long strings + # (e.g. AES-256 has a 32 byte key), but we still want to cap at some + # point to avoid performance issues. + or len(value) > 50 + ): + return + if isinstance(value, bool): + return + if isinstance(value, float) and math.isinf(value): + # we already upweight inf. + return + if isinstance(value, int) and -100 < value < 100: + # we already upweight small integers. + return + + if isinstance(value, (int, float, bytes, str)): + self.constants.add(value) + return + + # I don't kow what case could go here, but am also not confident there + # isn't one. + return # pragma: no cover + + def visit_UnaryOp(self, node: UnaryOp) -> None: + # `a = -1` is actually a combination of a USub and the constant 1. + if ( + isinstance(node.op, USub) + and isinstance(node.operand, Constant) + and isinstance(node.operand.value, (int, float)) + and not isinstance(node.operand.value, bool) + ): + self._add_constant(-node.operand.value) + # don't recurse on this node to avoid adding the positive variant + return + + self.generic_visit(node) + + def visit_Expr(self, node: Expr) -> None: + if isinstance(node.value, Constant) and isinstance(node.value.value, str): + return + + self.generic_visit(node) + + def visit_JoinedStr(self, node): + # dont recurse on JoinedStr, i.e. f strings. Constants that appear *only* + # in f strings are unlikely to be helpful. + return + + def visit_Constant(self, node): + self._add_constant(node.value) + self.generic_visit(node) + + +def _constants_from_source(source: str | bytes, *, limit: bool) -> Constants: + tree = ast.parse(source) + visitor = ConstantVisitor(limit=limit) + + try: + visitor.visit(tree) + except TooManyConstants: + # in the case of an incomplete collection, return nothing, to avoid + # muddying caches etc. + return Constants() + + return visitor.constants + + +def _constants_file_str(constants: Constants) -> str: + return str(sorted(constants, key=lambda v: (str(type(v)), v))) + + +@lru_cache(4096) +def constants_from_module(module: ModuleType, *, limit: bool = True) -> Constants: + try: + module_file = inspect.getsourcefile(module) + # use type: ignore because we know this might error + source_bytes = Path(module_file).read_bytes() # type: ignore + except Exception: + return Constants() + + if limit and len(source_bytes) > 512 * 1024: + # Skip files over 512kb. For reference, the largest source file + # in Hypothesis is strategies/_internal/core.py at 107kb at time + # of writing. + return Constants() + + source_hash = hashlib.sha1(source_bytes).hexdigest()[:16] + # separate cache files for each limit param. see discussion in pull/4398 + cache_p = storage_directory("constants") / ( + source_hash + ("" if limit else "_nolimit") + ) + try: + return _constants_from_source(cache_p.read_bytes(), limit=limit) + except Exception: + # if the cached location doesn't exist, or it does exist but there was + # a problem reading it, fall back to standard computation of the constants + pass + + try: + constants = _constants_from_source(source_bytes, limit=limit) + except Exception: + # A bunch of things can go wrong here. + # * ast.parse may fail on the source code + # * NodeVisitor may hit a RecursionError (see many related issues on + # e.g. libcst https://github.com/Instagram/LibCST/issues?q=recursion), + # or a MemoryError (`"[1, " * 200 + "]" * 200`) + return Constants() + + try: + cache_p.parent.mkdir(parents=True, exist_ok=True) + cache_p.write_text( + f"# file: {module_file}\n# hypothesis_version: {hypothesis.__version__}\n\n" + # somewhat arbitrary sort order. The cache file doesn't *have* to be + # stable... but it is aesthetically pleasing, and means we could rely + # on it in the future! + + _constants_file_str(constants), + encoding="utf-8", + ) + except Exception: # pragma: no cover + pass + + return constants + + +@lru_cache(4096) +def is_local_module_file(path: str) -> bool: + from hypothesis.internal.scrutineer import ModuleLocation + + return ( + # Skip expensive path lookup for stdlib modules. + # This will cause false negatives if a user names their module the + # same as a stdlib module. + path not in sys.stdlib_module_names + # A path containing site-packages is extremely likely to be + # ModuleLocation.SITE_PACKAGES. Skip the expensive path lookup here. + and "/site-packages/" not in path + and ModuleLocation.from_path(path) is ModuleLocation.LOCAL + # normally, hypothesis is a third-party library and is not returned + # by local_modules. However, if it is installed as an editable package + # with pip install -e, then we will pick up on it. Just hardcode an + # ignore here. + and not is_hypothesis_file(path) + # avoid collecting constants from test files + and not ( + "test" in (p := Path(path)).parts + or "tests" in p.parts + or p.stem.startswith("test_") + or p.stem.endswith("_test") + ) + ) diff --git a/vendored/hypothesis/internal/coverage.py b/vendored/hypothesis/internal/coverage.py new file mode 100644 index 0000000..98cffed --- /dev/null +++ b/vendored/hypothesis/internal/coverage.py @@ -0,0 +1,109 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import json +import os +import sys +from collections.abc import Callable +from contextlib import contextmanager +from typing import TypeVar + +from hypothesis.internal.reflection import proxies + +""" +This module implements a custom coverage system that records conditions and +then validates that every condition has been seen to be both True and False +during the execution of our tests. + +The only thing we use it for at present is our argument validation functions, +where we assert that every validation function has been seen to both pass and +fail in the course of testing. + +When not running with a magic environment variable set, this module disables +itself and has essentially no overhead. +""" + +Func = TypeVar("Func", bound=Callable) +pretty_file_name_cache: dict[str, str] = {} + + +def pretty_file_name(f): + try: + return pretty_file_name_cache[f] + except KeyError: + pass + + parts = f.split(os.path.sep) + if "hypothesis" in parts: # pragma: no branch + parts = parts[-parts[::-1].index("hypothesis") :] + result = os.path.sep.join(parts) + pretty_file_name_cache[f] = result + return result + + +IN_COVERAGE_TESTS = os.getenv("HYPOTHESIS_INTERNAL_COVERAGE") == "true" +description_stack = [] + + +if IN_COVERAGE_TESTS: + # By this point, "branch-check" should have already been deleted by the + # tox config. We can't delete it here because of #1718. + + written: set[tuple[str, bool]] = set() + + def record_branch(name, value): + key = (name, value) + if key in written: + return + written.add(key) + with open(f"branch-check-{os.getpid()}", mode="a", encoding="utf-8") as log: + log.write(json.dumps({"name": name, "value": value}) + "\n") + + @contextmanager + def check_block(name, depth): + # We add an extra two callers to the stack: One for the contextmanager + # function, one for our actual caller, so we want to go two extra + # stack frames up. + caller = sys._getframe(depth + 2) + fname = pretty_file_name(caller.f_code.co_filename) + local_description = f"{name} at {fname}:{caller.f_lineno}" + try: + description_stack.append(local_description) + description = " in ".join(reversed(description_stack)) + " passed" + yield + record_branch(description, True) + except BaseException: + record_branch(description, False) + raise + finally: + description_stack.pop() + + @contextmanager + def check(name): + with check_block(name, 2): + yield + + def check_function(f: Func) -> Func: + @proxies(f) + def accept(*args, **kwargs): + # depth of 2 because of the proxy function calling us. + with check_block(f.__name__, 2): + return f(*args, **kwargs) + + return accept + +else: # pragma: no cover + + def check_function(f: Func) -> Func: + return f + + @contextmanager + def check(name): + yield diff --git a/vendored/hypothesis/internal/detection.py b/vendored/hypothesis/internal/detection.py new file mode 100644 index 0000000..6fa01a8 --- /dev/null +++ b/vendored/hypothesis/internal/detection.py @@ -0,0 +1,41 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from types import MethodType + + +def is_hypothesis_test(f: object) -> bool: + """ + Returns ``True`` if ``f`` represents a test function that has been defined + with Hypothesis. This is true for: + + * Functions decorated with |@given| + * The ``runTest`` method of stateful tests + + For example: + + .. code-block:: python + + @given(st.integers()) + def f(n): ... + + class MyStateMachine(RuleBasedStateMachine): ... + + assert is_hypothesis_test(f) + assert is_hypothesis_test(MyStateMachine.TestCase().runTest) + + .. seealso:: + + See also the :doc:`Detect Hypothesis tests + ` how-to. + """ + if isinstance(f, MethodType): + return is_hypothesis_test(f.__func__) + return getattr(f, "is_hypothesis_test", False) diff --git a/vendored/hypothesis/internal/entropy.py b/vendored/hypothesis/internal/entropy.py new file mode 100644 index 0000000..0082eb2 --- /dev/null +++ b/vendored/hypothesis/internal/entropy.py @@ -0,0 +1,267 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import contextlib +import gc +import random +import sys +import warnings +from collections.abc import Callable, Generator, Hashable +from itertools import count +from random import Random +from typing import TYPE_CHECKING, Any +from weakref import WeakValueDictionary + +import hypothesis.core +from hypothesis.errors import HypothesisWarning, InvalidArgument +from hypothesis.internal.compat import FREE_THREADED_CPYTHON, GRAALPY, PYPY + +if TYPE_CHECKING: + from typing import Protocol + + # we can't use this at runtime until from_type supports + # protocols -- breaks ghostwriter tests + class RandomLike(Protocol): + def seed(self, *args: Any, **kwargs: Any) -> Any: ... + def getstate(self, *args: Any, **kwargs: Any) -> Any: ... + def setstate(self, *args: Any, **kwargs: Any) -> Any: ... + +else: # pragma: no cover + RandomLike = random.Random + +_RKEY = count() +_global_random_rkey = next(_RKEY) +# This is effectively a WeakSet, which allows us to associate the saved states +# with their respective Random instances even as new ones are registered and old +# ones go out of scope and get garbage collected. Keys are ascending integers. +RANDOMS_TO_MANAGE: WeakValueDictionary[int, RandomLike] = WeakValueDictionary( + {_global_random_rkey: random} +) + + +class NumpyRandomWrapper: + def __init__(self) -> None: + assert "numpy" in sys.modules + # This class provides a shim that matches the numpy to stdlib random, + # and lets us avoid importing Numpy until it's already in use. + import numpy.random + + self.seed = numpy.random.seed + self.getstate = numpy.random.get_state + self.setstate = numpy.random.set_state + + +NP_RANDOM: RandomLike | None = None + + +if not (PYPY or GRAALPY): + + def _get_platform_base_refcount(r: Any) -> int: + return sys.getrefcount(r) + + # Determine the number of refcounts created by function scope for + # the given platform / version of Python. + _PLATFORM_REF_COUNT = _get_platform_base_refcount(object()) +else: # pragma: no cover + # PYPY and GRAALPY don't have `sys.getrefcount` + _PLATFORM_REF_COUNT = -1 + + +def register_random(r: RandomLike) -> None: + """Register (a weakref to) the given Random-like instance for management by + Hypothesis. + + You can pass instances of structural subtypes of ``random.Random`` + (i.e., objects with seed, getstate, and setstate methods) to + ``register_random(r)`` to have their states seeded and restored in the same + way as the global PRNGs from the ``random`` and ``numpy.random`` modules. + + All global PRNGs, from e.g. simulation or scheduling frameworks, should + be registered to prevent flaky tests. Hypothesis will ensure that the + PRNG state is consistent for all test runs, always seeding them to zero and + restoring the previous state after the test, or, reproducibly varied if you + choose to use the :func:`~hypothesis.strategies.random_module` strategy. + + ``register_random`` only makes `weakrefs + `_ to ``r``, + thus ``r`` will only be managed by Hypothesis as long as it has active + references elsewhere at runtime. The pattern ``register_random(MyRandom())`` + will raise a ``ReferenceError`` to help protect users from this issue. + This check does not occur for the PyPy interpreter. See the following example for + an illustration of this issue + + .. code-block:: python + + + def my_BROKEN_hook(): + r = MyRandomLike() + + # `r` will be garbage collected after the hook resolved + # and Hypothesis will 'forget' that it was registered + register_random(r) # Hypothesis will emit a warning + + + rng = MyRandomLike() + + + def my_WORKING_hook(): + register_random(rng) + """ + if not (hasattr(r, "seed") and hasattr(r, "getstate") and hasattr(r, "setstate")): + raise InvalidArgument(f"{r=} does not have all the required methods") + + if r in [ + random + for ref in RANDOMS_TO_MANAGE.data.copy().values() # type: ignore + if (random := ref()) is not None + ]: + return + + if not (PYPY or GRAALPY): # pragma: no branch + # PYPY and GRAALPY do not have `sys.getrefcount`. + gc.collect() + if not gc.get_referrers(r): + if sys.getrefcount(r) <= _PLATFORM_REF_COUNT: + raise ReferenceError( + f"`register_random` was passed `r={r}` which will be " + "garbage collected immediately after `register_random` creates a " + "weakref to it. This will prevent Hypothesis from managing this " + "PRNG. See the docs for `register_random` for more " + "details." + ) + elif not FREE_THREADED_CPYTHON: # pragma: no branch + # On CPython, check for the free-threaded build because + # gc.get_referrers() ignores objects with immortal refcounts + # and objects are immortalized in the Python 3.13 + # free-threading implementation at runtime. + + warnings.warn( + "It looks like `register_random` was passed an object that could " + "be garbage collected immediately after `register_random` creates " + "a weakref to it. This will prevent Hypothesis from managing this " + "PRNG. See the docs for `register_random` for more details.", + HypothesisWarning, + stacklevel=2, + ) + + RANDOMS_TO_MANAGE[next(_RKEY)] = r + + +# Used to make the warning issued by `deprecate_random_in_strategy` thread-safe, +# as well as to avoid warning on uses of st.randoms(). +# Store just the hash to reduce memory consumption. This is an underapproximation +# of membership (distinct items might have the same hash), which is fine for the +# warning, as it results in missed alarms, not false alarms. +_known_random_state_hashes: set[Any] = set() + + +def get_seeder_and_restorer( + seed: Hashable = 0, +) -> tuple[Callable[[], None], Callable[[], None]]: + """Return a pair of functions which respectively seed all and restore + the state of all registered PRNGs. + + This is used by the core engine via `deterministic_PRNG`, and by users + via `register_random`. We support registration of additional random.Random + instances (or other objects with seed, getstate, and setstate methods) + to force determinism on simulation or scheduling frameworks which avoid + using the global random state. See e.g. #1709. + """ + assert isinstance(seed, int) + assert 0 <= seed < 2**32 + states: dict[int, object] = {} + + if "numpy" in sys.modules: + global NP_RANDOM + if NP_RANDOM is None: + # Protect this from garbage-collection by adding it to global scope + NP_RANDOM = RANDOMS_TO_MANAGE[next(_RKEY)] = NumpyRandomWrapper() + + def seed_all() -> None: + assert not states + # access .data.copy().items() instead of .items() to avoid a "dictionary + # changed size during iteration" error under multithreading. + # + # I initially expected this to be fixed by + # https://github.com/python/cpython/commit/96d37dbcd23e65a7a57819aeced9034296ef747e, + # but I believe that is addressing the size change from weakrefs expiring + # during gc, not from the user adding new elements to the dict. + # + # Since we're accessing .data, we have to manually handle checking for + # expired ref instances during iteration. Normally WeakValueDictionary + # handles this for us. + # + # This command reproduces at time of writing: + # pytest hypothesis-python/tests/ -k test_intervals_are_equivalent_to_their_lists + # --parallel-threads 2 + for k, ref in RANDOMS_TO_MANAGE.data.copy().items(): # type: ignore + r = ref() + if r is None: + # ie the random instance has been gc'd + continue # pragma: no cover + states[k] = r.getstate() + if k == _global_random_rkey: + # r.seed sets the random's state. We want to add that state to + # _known_random_states before calling r.seed, in case a thread + # switch occurs between the two. To figure out the seed -> state + # mapping, set the seed on a dummy random and add that state to + # _known_random_state. + # + # we could use a global dummy random here, but then we'd have to + # put a lock around it, and it's not clear to me if that's more + # efficient than constructing a new instance each time. + dummy_random = Random() + dummy_random.seed(seed) + _known_random_state_hashes.add(hash(dummy_random.getstate())) + # we expect `assert r.getstate() == dummy_random.getstate()` to + # hold here, but thread switches means it might not. + + r.seed(seed) + + def restore_all() -> None: + for k, state in states.items(): + r = RANDOMS_TO_MANAGE.get(k) + if r is None: # i.e., has been garbage-collected + continue + + if k == _global_random_rkey: + _known_random_state_hashes.add(hash(state)) + r.setstate(state) + + states.clear() + + return seed_all, restore_all + + +@contextlib.contextmanager +def deterministic_PRNG(seed: int = 0) -> Generator[None, None, None]: + """Context manager that handles random.seed without polluting global state. + + See issue #1255 and PR #1295 for details and motivation - in short, + leaving the global pseudo-random number generator (PRNG) seeded is a very + bad idea in principle, and breaks all kinds of independence assumptions + in practice. + """ + if ( + hypothesis.core.threadlocal._hypothesis_global_random is None + ): # pragma: no cover + hypothesis.core.threadlocal._hypothesis_global_random = Random() + register_random(hypothesis.core.threadlocal._hypothesis_global_random) + + seed_all, restore_all = get_seeder_and_restorer(seed) + seed_all() + try: + yield + finally: + restore_all() + # TODO it would be nice to clean up _known_random_state_hashes when no + # active deterministic_PRNG contexts remain, to free memory (see similar + # logic in StackframeLimiter). But it's a bit annoying to get right, and + # likely not a big deal. diff --git a/vendored/hypothesis/internal/escalation.py b/vendored/hypothesis/internal/escalation.py new file mode 100644 index 0000000..031a990 --- /dev/null +++ b/vendored/hypothesis/internal/escalation.py @@ -0,0 +1,175 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import contextlib +import os +import sys +import textwrap +import traceback +from collections.abc import Callable +from dataclasses import dataclass +from functools import partial +from inspect import getfile, getsourcefile +from pathlib import Path +from types import ModuleType, TracebackType + +import hypothesis +from hypothesis.errors import _Trimmable +from hypothesis.internal.compat import BaseExceptionGroup +from hypothesis.utils.dynamicvariables import DynamicVariable + +FILE_CACHE: dict[ModuleType, dict[str, bool]] = {} + + +def belongs_to(package: ModuleType) -> Callable[[str], bool]: + if getattr(package, "__file__", None) is None: # pragma: no cover + return lambda filepath: False + + assert package.__file__ is not None + FILE_CACHE.setdefault(package, {}) + cache = FILE_CACHE[package] + root = Path(package.__file__).resolve().parent + + def accept(filepath: str) -> bool: + try: + return cache[filepath] + except KeyError: + pass + try: + Path(filepath).resolve().relative_to(root) + result = True + except Exception: + result = False + cache[filepath] = result + return result + + accept.__name__ = f"is_{package.__name__}_file" + return accept + + +is_hypothesis_file = belongs_to(hypothesis) + + +def get_trimmed_traceback( + exception: BaseException | None = None, +) -> TracebackType | None: + """Return the current traceback, minus any frames added by Hypothesis.""" + if exception is None: + _, exception, tb = sys.exc_info() + else: + tb = exception.__traceback__ + # Avoid trimming the traceback if we're in verbose mode, or the error + # was raised inside Hypothesis. Additionally, the environment variable + # HYPOTHESIS_NO_TRACEBACK_TRIM is respected if nonempty, because verbose + # mode is prohibitively slow when debugging strategy recursion errors. + assert hypothesis.settings.default is not None + if ( + tb is None + or os.environ.get("HYPOTHESIS_NO_TRACEBACK_TRIM") + or hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug + or ( + is_hypothesis_file(traceback.extract_tb(tb)[-1][0]) + and not isinstance(exception, _Trimmable) + ) + ): + return tb + while tb.tb_next is not None and ( + # If the frame is from one of our files, it's been added by Hypothesis. + is_hypothesis_file(getsourcefile(tb.tb_frame) or getfile(tb.tb_frame)) + # But our `@proxies` decorator overrides the source location, + # so we check for an attribute it injects into the frame too. + or tb.tb_frame.f_globals.get("__hypothesistracebackhide__") is True + ): + tb = tb.tb_next + return tb + + +@dataclass(slots=True, frozen=True) +class InterestingOrigin: + # The `interesting_origin` is how Hypothesis distinguishes between multiple + # failures, for reporting and also to replay from the example database (even + # if report_multiple_bugs=False). We traditionally use the exception type and + # location, but have extracted this logic in order to see through `except ...:` + # blocks and understand the __cause__ (`raise x from y`) or __context__ that + # first raised an exception as well as PEP-654 exception groups. + exc_type: type[BaseException] + filename: str | None + lineno: int | None + context: "InterestingOrigin | tuple[()]" + group_elems: "tuple[InterestingOrigin, ...]" + + def __str__(self) -> str: + ctx = "" + if self.context: + ctx = textwrap.indent(f"\ncontext: {self.context}", prefix=" ") + group = "" + if self.group_elems: + chunks = "\n ".join(str(x) for x in self.group_elems) + group = textwrap.indent(f"\nchild exceptions:\n {chunks}", prefix=" ") + return f"{self.exc_type.__name__} at {self.filename}:{self.lineno}{ctx}{group}" + + @classmethod + def from_exception( + cls, exception: BaseException, /, seen: tuple[BaseException, ...] = () + ) -> "InterestingOrigin": + filename, lineno = None, None + if tb := get_trimmed_traceback(exception): + filename, lineno, *_ = traceback.extract_tb(tb)[-1] + seen = (*seen, exception) + make = partial(cls.from_exception, seen=seen) + context: InterestingOrigin | tuple[()] = () + if exception.__context__ is not None and exception.__context__ not in seen: + context = make(exception.__context__) + return cls( + type(exception), + filename, + lineno, + # Note that if __cause__ is set it is always equal to __context__, explicitly + # to support introspection when debugging, so we can use that unconditionally. + context, + # We distinguish exception groups by the inner exceptions, as for __context__ + ( + tuple(make(exc) for exc in exception.exceptions if exc not in seen) + if isinstance(exception, BaseExceptionGroup) + else () + ), + ) + + +current_pytest_item = DynamicVariable(None) + + +def _get_exceptioninfo(): + # ExceptionInfo was moved to the top-level namespace in Pytest 7.0 + if "pytest" in sys.modules: + with contextlib.suppress(Exception): + # From Pytest 7, __init__ warns on direct calls. + return sys.modules["pytest"].ExceptionInfo.from_exc_info + if "_pytest._code" in sys.modules: # old versions only + with contextlib.suppress(Exception): + return sys.modules["_pytest._code"].ExceptionInfo + return None # pragma: no cover # coverage tests always use pytest + + +def format_exception(err, tb): + # Try using Pytest to match the currently configured traceback style + ExceptionInfo = _get_exceptioninfo() + if current_pytest_item.value is not None and ExceptionInfo is not None: + item = current_pytest_item.value + return str(item.repr_failure(ExceptionInfo((type(err), err, tb)))) + "\n" + + # Or use better_exceptions, if that's installed and enabled + if "better_exceptions" in sys.modules: + better_exceptions = sys.modules["better_exceptions"] + if sys.excepthook is better_exceptions.excepthook: + return "".join(better_exceptions.format_exception(type(err), err, tb)) + + # If all else fails, use the standard-library formatting tools + return "".join(traceback.format_exception(type(err), err, tb)) diff --git a/vendored/hypothesis/internal/filtering.py b/vendored/hypothesis/internal/filtering.py new file mode 100644 index 0000000..155e67e --- /dev/null +++ b/vendored/hypothesis/internal/filtering.py @@ -0,0 +1,364 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""Tools for understanding predicates, to satisfy them by construction. + +For example:: + + integers().filter(lambda x: x >= 0) -> integers(min_value=0) + +This is intractable in general, but reasonably easy for simple cases involving +numeric bounds, strings with length or regex constraints, and collection lengths - +and those are precisely the most common cases. When they arise in e.g. Pandas +dataframes, it's also pretty painful to do the constructive version by hand in +a library; so we prefer to share all the implementation effort here. +See https://github.com/HypothesisWorks/hypothesis/issues/2701 for details. +""" + +import ast +import inspect +import math +import operator +from collections.abc import Callable, Collection +from decimal import Decimal +from fractions import Fraction +from functools import partial +from typing import Any, NamedTuple, TypeVar + +from hypothesis.internal.compat import ceil, floor +from hypothesis.internal.floats import next_down, next_up +from hypothesis.internal.lambda_sources import lambda_description +from hypothesis.internal.reflection import get_pretty_function_description + +try: + # new in 3.14 + from functools import Placeholder # type: ignore +except ImportError: + Placeholder = object() + +Ex = TypeVar("Ex") +Predicate = Callable[[Ex], bool] + + +class ConstructivePredicate(NamedTuple): + """Return constraints to the appropriate strategy, and the predicate if needed. + + For example:: + + integers().filter(lambda x: x >= 0) + -> {"min_value": 0"}, None + + integers().filter(lambda x: x >= 0 and x % 7) + -> {"min_value": 0}, lambda x: x % 7 + + At least in principle - for now we usually return the predicate unchanged + if needed. + + We have a separate get-predicate frontend for each "group" of strategies; e.g. + for each numeric type, for strings, for bytes, for collection sizes, etc. + """ + + constraints: dict[str, Any] + predicate: Predicate | None + + @classmethod + def unchanged(cls, predicate: Predicate) -> "ConstructivePredicate": + return cls({}, predicate) + + def __repr__(self) -> str: + fn = get_pretty_function_description(self.predicate) + return f"{self.__class__.__name__}(constraints={self.constraints!r}, predicate={fn})" + + +ARG = object() + + +def convert(node: ast.AST, argname: str) -> object: + if isinstance(node, ast.Name): + if node.id != argname: + raise ValueError("Non-local variable") + return ARG + if isinstance(node, ast.Call): + if ( + isinstance(node.func, ast.Name) + and node.func.id == "len" + and len(node.args) == 1 + ): + # error unless comparison is to the len *of the lambda arg* + return convert(node.args[0], argname) + return ast.literal_eval(node) + + +def comp_to_constraints(x: ast.AST, op: ast.AST, y: ast.AST, *, argname: str) -> dict: + a = convert(x, argname) + b = convert(y, argname) + num = (int, float) + if not (a is ARG and isinstance(b, num)) and not (isinstance(a, num) and b is ARG): + # It would be possible to work out if comparisons between two literals + # are always true or false, but it's too rare to be worth the complexity. + # (and we can't even do `arg == arg`, because what if it's NaN?) + raise ValueError("Can't analyse this comparison") + + of_len = {"len": True} if isinstance(x, ast.Call) or isinstance(y, ast.Call) else {} + + if isinstance(op, ast.Lt): + if a is ARG: + return {"max_value": b, "exclude_max": True, **of_len} + return {"min_value": a, "exclude_min": True, **of_len} + elif isinstance(op, ast.LtE): + if a is ARG: + return {"max_value": b, **of_len} + return {"min_value": a, **of_len} + elif isinstance(op, ast.Eq): + if a is ARG: + return {"min_value": b, "max_value": b, **of_len} + return {"min_value": a, "max_value": a, **of_len} + elif isinstance(op, ast.GtE): + if a is ARG: + return {"min_value": b, **of_len} + return {"max_value": a, **of_len} + elif isinstance(op, ast.Gt): + if a is ARG: + return {"min_value": b, "exclude_min": True, **of_len} + return {"max_value": a, "exclude_max": True, **of_len} + raise ValueError("Unhandled comparison operator") # e.g. ast.Ne + + +def merge_preds(*con_predicates: ConstructivePredicate) -> ConstructivePredicate: + # This function is just kinda messy. Unfortunately the neatest way + # to do this is just to roll out each case and handle them in turn. + base = { + "min_value": -math.inf, + "max_value": math.inf, + "exclude_min": False, + "exclude_max": False, + } + predicate = None + for kw, p in con_predicates: + assert ( + not p or not predicate or p is predicate + ), "Can't merge two partially-constructive preds" + predicate = p or predicate + if "min_value" in kw: + if kw["min_value"] > base["min_value"]: + base["exclude_min"] = kw.get("exclude_min", False) + base["min_value"] = kw["min_value"] + elif kw["min_value"] == base["min_value"]: + base["exclude_min"] |= kw.get("exclude_min", False) + if "max_value" in kw: + if kw["max_value"] < base["max_value"]: + base["exclude_max"] = kw.get("exclude_max", False) + base["max_value"] = kw["max_value"] + elif kw["max_value"] == base["max_value"]: + base["exclude_max"] |= kw.get("exclude_max", False) + + has_len = {"len" in kw for kw, _ in con_predicates if kw} + assert len(has_len) <= 1, "can't mix numeric with length constraints" + if has_len == {True}: + base["len"] = True + + if not base["exclude_min"]: + del base["exclude_min"] + if base["min_value"] == -math.inf: + del base["min_value"] + if not base["exclude_max"]: + del base["exclude_max"] + if base["max_value"] == math.inf: + del base["max_value"] + return ConstructivePredicate(base, predicate) + + +def numeric_bounds_from_ast( + tree: ast.AST, argname: str, fallback: ConstructivePredicate +) -> ConstructivePredicate: + """Take an AST; return a ConstructivePredicate. + + >>> lambda x: x >= 0 + {"min_value": 0}, None + >>> lambda x: x < 10 + {"max_value": 10, "exclude_max": True}, None + >>> lambda x: len(x) >= 5 + {"min_value": 5, "len": True}, None + >>> lambda x: x >= y + {}, lambda x: x >= y + + See also https://greentreesnakes.readthedocs.io/en/latest/ + """ + if isinstance(tree, ast.Compare): + ops = tree.ops + vals = tree.comparators + comparisons = [(tree.left, ops[0], vals[0])] + for i, (op, val) in enumerate(zip(ops[1:], vals[1:], strict=True), start=1): + comparisons.append((vals[i - 1], op, val)) + bounds = [] + for comp in comparisons: + try: + constraints = comp_to_constraints(*comp, argname=argname) + # Because `len` could be redefined in the enclosing scope, we *always* + # have to apply the condition as a filter, in addition to rewriting. + pred = fallback.predicate if "len" in constraints else None + bounds.append(ConstructivePredicate(constraints, pred)) + except ValueError: + bounds.append(fallback) + return merge_preds(*bounds) + + if isinstance(tree, ast.BoolOp) and isinstance(tree.op, ast.And): + return merge_preds( + *(numeric_bounds_from_ast(node, argname, fallback) for node in tree.values) + ) + + return fallback + + +def get_numeric_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: + """Shared logic for understanding numeric bounds. + + We then specialise this in the other functions below, to ensure that e.g. + all the values are representable in the types that we're planning to generate + so that the strategy validation doesn't complain. + """ + unchanged = ConstructivePredicate.unchanged(predicate) + if ( + isinstance(predicate, partial) + and not predicate.keywords + and ( + len(predicate.args) == 1 + or (predicate.args[0] is Placeholder and len(predicate.args) == 2) + ) + ): + if len(predicate.args) == 1: + arg = predicate.args[0] + func = predicate.func + else: # pragma: no cover # Python 3.14+ only + assert predicate.args[0] is Placeholder + arg = predicate.args[1] + func = { # reverses the table below; eq is unchanged + operator.lt: operator.gt, + operator.le: operator.ge, + operator.ge: operator.le, + operator.gt: operator.lt, + }.get(predicate.func, predicate.func) + assert func not in (min_len, max_len) # sanity-check; these are private + + if ( + (isinstance(arg, Decimal) and Decimal.is_snan(arg)) + or not isinstance(arg, (int, float, Fraction, Decimal)) + or math.isnan(arg) + ): + return unchanged + options = { + # We're talking about op(arg, x) - the reverse of our usual intuition! + operator.lt: {"min_value": arg, "exclude_min": True}, # lambda x: arg < x + operator.le: {"min_value": arg}, # lambda x: arg <= x + operator.eq: {"min_value": arg, "max_value": arg}, # lambda x: arg == x + operator.ge: {"max_value": arg}, # lambda x: arg >= x + operator.gt: {"max_value": arg, "exclude_max": True}, # lambda x: arg > x + # Special-case our default predicates for length bounds + min_len: {"min_value": arg, "len": True}, + max_len: {"max_value": arg, "len": True}, + } + if func in options: + return ConstructivePredicate(options[func], None) + + # This section is a little complicated, but stepping through with comments should + # help to clarify it. We start by finding the source code for our predicate and + # parsing it to an abstract syntax tree; if this fails for any reason we bail out + # and fall back to standard rejection sampling (a running theme). + try: + if predicate.__name__ == "": + source = lambda_description(predicate) + else: + source = inspect.getsource(predicate) + tree: ast.AST = ast.parse(source) + except Exception: + return unchanged + + # Dig down to the relevant subtree - our tree is probably a Module containing + # either a FunctionDef, or an Expr which in turn contains a lambda definition. + while isinstance(tree, ast.Module) and len(tree.body) == 1: + tree = tree.body[0] + while isinstance(tree, ast.Expr): + tree = tree.value + + if isinstance(tree, ast.Lambda) and len(tree.args.args) == 1: + return numeric_bounds_from_ast(tree.body, tree.args.args[0].arg, unchanged) + elif isinstance(tree, ast.FunctionDef) and len(tree.args.args) == 1: + if len(tree.body) != 1 or not isinstance(tree.body[0], ast.Return): + # If the body of the function is anything but `return `, + # i.e. as simple as a lambda, we can't process it (yet). + return unchanged + argname = tree.args.args[0].arg + body = tree.body[0].value + assert isinstance(body, ast.AST) + return numeric_bounds_from_ast(body, argname, unchanged) + return unchanged + + +def get_integer_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: + constraints, predicate = get_numeric_predicate_bounds(predicate) + + if "min_value" in constraints: + if constraints["min_value"] == -math.inf: + del constraints["min_value"] + elif math.isinf(constraints["min_value"]): + return ConstructivePredicate({"min_value": 1, "max_value": -1}, None) + elif constraints["min_value"] != int(constraints["min_value"]): + constraints["min_value"] = ceil(constraints["min_value"]) + elif constraints.get("exclude_min", False): + constraints["min_value"] = int(constraints["min_value"]) + 1 + + if "max_value" in constraints: + if constraints["max_value"] == math.inf: + del constraints["max_value"] + elif math.isinf(constraints["max_value"]): + return ConstructivePredicate({"min_value": 1, "max_value": -1}, None) + elif constraints["max_value"] != int(constraints["max_value"]): + constraints["max_value"] = floor(constraints["max_value"]) + elif constraints.get("exclude_max", False): + constraints["max_value"] = int(constraints["max_value"]) - 1 + + kw_categories = {"min_value", "max_value", "len"} + constraints = {k: v for k, v in constraints.items() if k in kw_categories} + return ConstructivePredicate(constraints, predicate) + + +def get_float_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: + constraints, predicate = get_numeric_predicate_bounds(predicate) + + if "min_value" in constraints: + min_value = constraints["min_value"] + constraints["min_value"] = float(constraints["min_value"]) + if min_value < constraints["min_value"] or ( + min_value == constraints["min_value"] + and constraints.get("exclude_min", False) + ): + constraints["min_value"] = next_up(constraints["min_value"]) + + if "max_value" in constraints: + max_value = constraints["max_value"] + constraints["max_value"] = float(constraints["max_value"]) + if max_value > constraints["max_value"] or ( + max_value == constraints["max_value"] + and constraints.get("exclude_max", False) + ): + constraints["max_value"] = next_down(constraints["max_value"]) + + constraints = { + k: v for k, v in constraints.items() if k in {"min_value", "max_value"} + } + return ConstructivePredicate(constraints, predicate) + + +def max_len(size: int, element: Collection[object]) -> bool: + return len(element) <= size + + +def min_len(size: int, element: Collection[object]) -> bool: + return size <= len(element) diff --git a/vendored/hypothesis/internal/floats.py b/vendored/hypothesis/internal/floats.py new file mode 100644 index 0000000..93f09de --- /dev/null +++ b/vendored/hypothesis/internal/floats.py @@ -0,0 +1,209 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +import struct +from collections.abc import Callable +from sys import float_info +from typing import Literal, SupportsFloat, TypeAlias + +SignedIntFormat: TypeAlias = Literal["!h", "!i", "!q"] +UnsignedIntFormat: TypeAlias = Literal["!H", "!I", "!Q"] +IntFormat: TypeAlias = SignedIntFormat | UnsignedIntFormat +FloatFormat: TypeAlias = Literal["!e", "!f", "!d"] +Width: TypeAlias = Literal[16, 32, 64] + +# Format codes for (int, float) sized types, used for byte-wise casts. +# See https://docs.python.org/3/library/struct.html#format-characters +STRUCT_FORMATS: dict[int, tuple[UnsignedIntFormat, FloatFormat]] = { + 16: ("!H", "!e"), + 32: ("!I", "!f"), + 64: ("!Q", "!d"), +} + +TO_SIGNED_FORMAT: dict[UnsignedIntFormat, SignedIntFormat] = { + "!H": "!h", + "!I": "!i", + "!Q": "!q", +} + + +def reinterpret_bits(x: float | int, from_: str, to: str) -> float | int: + x = struct.unpack(to, struct.pack(from_, x))[0] + assert isinstance(x, (float, int)) + return x + + +def float_of(x: SupportsFloat, width: Width) -> float: + assert width in (16, 32, 64) + if width == 64: + return float(x) + elif width == 32: + return reinterpret_bits(float(x), "!f", "!f") + else: + return reinterpret_bits(float(x), "!e", "!e") + + +def is_negative(x: SupportsFloat) -> bool: + try: + return math.copysign(1.0, x) < 0 + except TypeError: + raise TypeError( + f"Expected float but got {x!r} of type {type(x).__name__}" + ) from None + + +def count_between_floats(x: float, y: float, width: int = 64) -> int: + assert x <= y + if is_negative(x): + if is_negative(y): + return float_to_int(x, width) - float_to_int(y, width) + 1 + else: + return count_between_floats(x, -0.0, width) + count_between_floats( + 0.0, y, width + ) + else: + assert not is_negative(y) + return float_to_int(y, width) - float_to_int(x, width) + 1 + + +def float_to_int(value: float, width: int = 64) -> int: + fmt_int, fmt_flt = STRUCT_FORMATS[width] + x = reinterpret_bits(value, fmt_flt, fmt_int) + assert isinstance(x, int) + return x + + +def int_to_float(value: int, width: int = 64) -> float: + fmt_int, fmt_flt = STRUCT_FORMATS[width] + return reinterpret_bits(value, fmt_int, fmt_flt) + + +def next_up(value: float, width: int = 64) -> float: + """Return the first float larger than finite `val` - IEEE 754's `nextUp`. + + From https://stackoverflow.com/a/10426033, with thanks to Mark Dickinson. + """ + assert isinstance(value, float), f"{value!r} of type {type(value)}" + if math.isnan(value) or (math.isinf(value) and value > 0): + return value + if value == 0.0 and is_negative(value): + return 0.0 + fmt_int, fmt_flt = STRUCT_FORMATS[width] + # Note: n is signed; float_to_int returns unsigned + fmt_int_signed = TO_SIGNED_FORMAT[fmt_int] + n = reinterpret_bits(value, fmt_flt, fmt_int_signed) + if n >= 0: + n += 1 + else: + n -= 1 + return reinterpret_bits(n, fmt_int_signed, fmt_flt) + + +def next_down(value: float, width: int = 64) -> float: + return -next_up(-value, width) + + +def next_down_normal(value: float, width: int, *, allow_subnormal: bool) -> float: + value = next_down(value, width) + if (not allow_subnormal) and 0 < abs(value) < width_smallest_normals[width]: + return 0.0 if value > 0 else -width_smallest_normals[width] + return value + + +def next_up_normal(value: float, width: int, *, allow_subnormal: bool) -> float: + return -next_down_normal(-value, width, allow_subnormal=allow_subnormal) + + +# Smallest positive non-zero numbers that is fully representable by an +# IEEE-754 float, calculated with the width's associated minimum exponent. +# Values from https://en.wikipedia.org/wiki/IEEE_754#Basic_and_interchange_formats +width_smallest_normals: dict[int, float] = { + 16: 2 ** -(2 ** (5 - 1) - 2), + 32: 2 ** -(2 ** (8 - 1) - 2), + 64: 2 ** -(2 ** (11 - 1) - 2), +} +assert width_smallest_normals[64] == float_info.min + +mantissa_mask = (1 << 52) - 1 + + +def make_float_clamper( + min_value: float, + max_value: float, + *, + allow_nan: bool, + smallest_nonzero_magnitude: float, +) -> Callable[[float], float]: + """ + Return a function that clamps positive floats into the given bounds. + """ + from hypothesis.internal.conjecture.choice import choice_permitted + + assert sign_aware_lte(min_value, max_value) + range_size = min(max_value - min_value, float_info.max) + + def float_clamper(f: float) -> float: + if choice_permitted( + f, + { + "min_value": min_value, + "max_value": max_value, + "allow_nan": allow_nan, + "smallest_nonzero_magnitude": smallest_nonzero_magnitude, + }, + ): + return f + # Outside bounds; pick a new value, sampled from the allowed range, + # using the mantissa bits. + mant = float_to_int(abs(f)) & mantissa_mask + f = min_value + range_size * (mant / mantissa_mask) + + # if we resampled into the space disallowed by smallest_nonzero_magnitude, + # default to smallest_nonzero_magnitude. + if 0 < abs(f) < smallest_nonzero_magnitude: + f = smallest_nonzero_magnitude + # we must have either -smallest_nonzero_magnitude <= min_value or + # smallest_nonzero_magnitude >= max_value, or no values would be + # possible. If smallest_nonzero_magnitude is not valid (because it's + # larger than max_value), then -smallest_nonzero_magnitude must be valid. + if smallest_nonzero_magnitude > max_value: + f *= -1 + + # Re-enforce the bounds (just in case of floating point arithmetic error) + return clamp(min_value, f, max_value) + + return float_clamper + + +def sign_aware_lte(x: float | int, y: float | int) -> bool: + """Less-than-or-equals, but strictly orders -0.0 and 0.0""" + if x == 0.0 == y: + return math.copysign(1.0, x) <= math.copysign(1.0, y) + else: + return x <= y + + +def clamp(lower: float | int, value: float | int, upper: float | int) -> float | int: + """Given a value and lower/upper bounds, 'clamp' the value so that + it satisfies lower <= value <= upper. NaN is mapped to lower.""" + # this seems pointless (and is for integers), but handles the -0.0/0.0 case. + if not sign_aware_lte(lower, value): + return lower + if not sign_aware_lte(value, upper): + return upper + return value + + +SMALLEST_SUBNORMAL = next_up(0.0) +SIGNALING_NAN = int_to_float(0x7FF8_0000_0000_0001) # nonzero mantissa +MAX_PRECISE_INTEGER = 2**53 +assert math.isnan(SIGNALING_NAN) +assert math.copysign(1, SIGNALING_NAN) == 1 diff --git a/vendored/hypothesis/internal/healthcheck.py b/vendored/hypothesis/internal/healthcheck.py new file mode 100644 index 0000000..356abc4 --- /dev/null +++ b/vendored/hypothesis/internal/healthcheck.py @@ -0,0 +1,21 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.errors import FailedHealthCheck + + +def fail_health_check(settings, message, label): + # Tell pytest to omit the body of this function from tracebacks + # https://docs.pytest.org/en/latest/example/simple.html#writing-well-integrated-assertion-helpers + __tracebackhide__ = True + + if label in settings.suppress_health_check: + return + raise FailedHealthCheck(message) diff --git a/vendored/hypothesis/internal/intervalsets.py b/vendored/hypothesis/internal/intervalsets.py new file mode 100644 index 0000000..ec2f3eb --- /dev/null +++ b/vendored/hypothesis/internal/intervalsets.py @@ -0,0 +1,311 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections.abc import Iterable, Sequence +from typing import TYPE_CHECKING, TypeAlias, cast, final + +if TYPE_CHECKING: + from typing_extensions import Self + +IntervalsT: TypeAlias = tuple[tuple[int, int], ...] + + +# @final makes mypy happy with the Self return annotations. We otherwise run +# afoul of: +# > You should not use Self as the return annotation if the method is not +# > guaranteed to return an instance of a subclass when the class is subclassed +# > https://docs.python.org/3/library/typing.html#typing.Self + + +@final +class IntervalSet: + """ + A compact and efficient representation of a set of ``(a, b)`` intervals. Can + be treated like a set of integers, in that ``n in intervals`` will return + ``True`` if ``n`` is contained in any of the ``(a, b)`` intervals, and + ``False`` otherwise. + """ + + @classmethod + def from_string(cls, s: str) -> "Self": + """Return a tuple of intervals, covering the codepoints of characters in `s`. + + >>> IntervalSet.from_string('abcdef0123456789') + ((48, 57), (97, 102)) + """ + x = cls([(ord(c), ord(c)) for c in sorted(s)]) + return x.union(x) + + def __init__(self, intervals: Iterable[Sequence[int]] = ()) -> None: + self.intervals: IntervalsT = cast( + IntervalsT, tuple(tuple(v) for v in intervals) + ) + # cast above is validated by this length assertion. check here instead of + # before to not exhaust generators before we create intervals from it + assert all(len(v) == 2 for v in self.intervals) + + self.offsets: list[int] = [0] + for u, v in self.intervals: + self.offsets.append(self.offsets[-1] + v - u + 1) + self.size = self.offsets.pop() + self._idx_of_zero = self.index_above(ord("0")) + self._idx_of_Z = min(self.index_above(ord("Z")), len(self) - 1) + + def __len__(self) -> int: + return self.size + + def __iter__(self) -> Iterable[int]: + for u, v in self.intervals: + yield from range(u, v + 1) + + def __getitem__(self, i: int) -> int: + if i < 0: + i = self.size + i + if i < 0 or i >= self.size: + raise IndexError(f"Invalid index {i} for [0, {self.size})") + # Want j = maximal such that offsets[j] <= i + + j = len(self.intervals) - 1 + if self.offsets[j] > i: + hi = j + lo = 0 + # Invariant: offsets[lo] <= i < offsets[hi] + while lo + 1 < hi: + mid = (lo + hi) // 2 + if self.offsets[mid] <= i: + lo = mid + else: + hi = mid + j = lo + t = i - self.offsets[j] + u, v = self.intervals[j] + r = u + t + assert r <= v + return r + + def __contains__(self, elem: str | int) -> bool: + if isinstance(elem, str): + elem = ord(elem) + assert 0 <= elem <= 0x10FFFF + return any(start <= elem <= end for start, end in self.intervals) + + def __repr__(self) -> str: + return f"IntervalSet({self.intervals!r})" + + def index(self, value: int) -> int: + for offset, (u, v) in zip(self.offsets, self.intervals, strict=True): + if u == value: + return offset + elif u > value: + raise ValueError(f"{value} is not in list") + if value <= v: + return offset + (value - u) + raise ValueError(f"{value} is not in list") + + def index_above(self, value: int) -> int: + for offset, (u, v) in zip(self.offsets, self.intervals, strict=True): + if u >= value: + return offset + if value <= v: + return offset + (value - u) + return self.size + + def __or__(self, other: "Self") -> "Self": + return self.union(other) + + def __sub__(self, other: "Self") -> "Self": + return self.difference(other) + + def __and__(self, other: "Self") -> "Self": + return self.intersection(other) + + def __eq__(self, other: object) -> bool: + return isinstance(other, IntervalSet) and (other.intervals == self.intervals) + + def __hash__(self) -> int: + return hash(self.intervals) + + def union(self, other: "Self") -> "Self": + """Merge two sequences of intervals into a single tuple of intervals. + + Any integer bounded by `x` or `y` is also bounded by the result. + + >>> union([(3, 10)], [(1, 2), (5, 17)]) + ((1, 17),) + """ + assert isinstance(other, type(self)) + x = self.intervals + y = other.intervals + if not x: + return IntervalSet(y) + if not y: + return IntervalSet(x) + intervals = sorted(x + y, reverse=True) + result = [intervals.pop()] + while intervals: + # 1. intervals is in descending order + # 2. pop() takes from the RHS. + # 3. (a, b) was popped 1st, then (u, v) was popped 2nd + # 4. Therefore: a <= u + # 5. We assume that u <= v and a <= b + # 6. So we need to handle 2 cases of overlap, and one disjoint case + # | u--v | u----v | u--v | + # | a----b | a--b | a--b | + u, v = intervals.pop() + a, b = result[-1] + if u <= b + 1: + # Overlap cases + result[-1] = (a, max(v, b)) + else: + # Disjoint case + result.append((u, v)) + return IntervalSet(result) + + def difference(self, other: "Self") -> "Self": + """Set difference for lists of intervals. That is, returns a list of + intervals that bounds all values bounded by x that are not also bounded by + y. x and y are expected to be in sorted order. + + For example difference([(1, 10)], [(2, 3), (9, 15)]) would + return [(1, 1), (4, 8)], removing the values 2, 3, 9 and 10 from the + interval. + """ + assert isinstance(other, type(self)) + x = self.intervals + y = other.intervals + if not y: + return IntervalSet(x) + x = list(map(list, x)) + i = 0 + j = 0 + result: list[Iterable[int]] = [] + while i < len(x) and j < len(y): + # Iterate in parallel over x and y. j stays pointing at the smallest + # interval in the left hand side that could still overlap with some + # element of x at index >= i. + # Similarly, i is not incremented until we know that it does not + # overlap with any element of y at index >= j. + + xl, xr = x[i] + assert xl <= xr + yl, yr = y[j] + assert yl <= yr + + if yr < xl: + # The interval at y[j] is strictly to the left of the interval at + # x[i], so will not overlap with it or any later interval of x. + j += 1 + elif yl > xr: + # The interval at y[j] is strictly to the right of the interval at + # x[i], so all of x[i] goes into the result as no further intervals + # in y will intersect it. + result.append(x[i]) + i += 1 + elif yl <= xl: + if yr >= xr: + # x[i] is contained entirely in y[j], so we just skip over it + # without adding it to the result. + i += 1 + else: + # The beginning of x[i] is contained in y[j], so we update the + # left endpoint of x[i] to remove this, and increment j as we + # now have moved past it. Note that this is not added to the + # result as is, as more intervals from y may intersect it so it + # may need updating further. + x[i][0] = yr + 1 + j += 1 + else: + # yl > xl, so the left hand part of x[i] is not contained in y[j], + # so there are some values we should add to the result. + result.append((xl, yl - 1)) + + if yr + 1 <= xr: + # If y[j] finishes before x[i] does, there may be some values + # in x[i] left that should go in the result (or they may be + # removed by a later interval in y), so we update x[i] to + # reflect that and increment j because it no longer overlaps + # with any remaining element of x. + x[i][0] = yr + 1 + j += 1 + else: + # Every element of x[i] other than the initial part we have + # already added is contained in y[j], so we move to the next + # interval. + i += 1 + # Any remaining intervals in x do not overlap with any of y, as if they did + # we would not have incremented j to the end, so can be added to the result + # as they are. + result.extend(x[i:]) + return IntervalSet(map(tuple, result)) + + def intersection(self, other: "Self") -> "Self": + """Set intersection for lists of intervals.""" + assert isinstance(other, type(self)), other + intervals = [] + i = j = 0 + while i < len(self.intervals) and j < len(other.intervals): + u, v = self.intervals[i] + U, V = other.intervals[j] + if u > V: + j += 1 + elif U > v: + i += 1 + else: + intervals.append((max(u, U), min(v, V))) + if v < V: + i += 1 + else: + j += 1 + return IntervalSet(intervals) + + def char_in_shrink_order(self, i: int) -> str: + # We would like it so that, where possible, shrinking replaces + # characters with simple ascii characters, so we rejig this + # bit so that the smallest values are 0, 1, 2, ..., Z. + # + # Imagine that numbers are laid out as abc0yyyZ... + # this rearranges them so that they are laid out as + # 0yyyZcba..., which gives a better shrinking order. + if i <= self._idx_of_Z: + # We want to rewrite the integers [0, n] inclusive + # to [zero_point, Z_point]. + n = self._idx_of_Z - self._idx_of_zero + if i <= n: + i += self._idx_of_zero + else: + # We want to rewrite the integers [n + 1, Z_point] to + # [zero_point, 0] (reversing the order so that codepoints below + # zero_point shrink upwards). + i = self._idx_of_zero - (i - n) + assert i < self._idx_of_zero + assert 0 <= i <= self._idx_of_Z + + return chr(self[i]) + + def index_from_char_in_shrink_order(self, c: str) -> int: + """ + Inverse of char_in_shrink_order. + """ + assert len(c) == 1 + i = self.index(ord(c)) + + if i <= self._idx_of_Z: + n = self._idx_of_Z - self._idx_of_zero + # Rewrite [zero_point, Z_point] to [0, n]. + if self._idx_of_zero <= i <= self._idx_of_Z: + i -= self._idx_of_zero + assert 0 <= i <= n + # Rewrite [zero_point, 0] to [n + 1, Z_point]. + else: + i = self._idx_of_zero - i + n + assert n + 1 <= i <= self._idx_of_Z + assert 0 <= i <= self._idx_of_Z + + return i diff --git a/vendored/hypothesis/internal/lambda_sources.py b/vendored/hypothesis/internal/lambda_sources.py new file mode 100644 index 0000000..596b4c8 --- /dev/null +++ b/vendored/hypothesis/internal/lambda_sources.py @@ -0,0 +1,430 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import ast +import hashlib +import inspect +import linecache +import sys +import textwrap +from collections.abc import Callable, MutableMapping +from inspect import Parameter +from typing import Any +from weakref import WeakKeyDictionary + +from hypothesis.internal import reflection +from hypothesis.internal.cache import LRUCache + +# we have several levels of caching for lambda descriptions. +# * LAMBDA_DESCRIPTION_CACHE maps a lambda f to its description _lambda_description(f). +# Note that _lambda_description(f) may not be identical to f as it appears in the +# source code file. +# * LAMBDA_DIGEST_DESCRIPTION_CACHE maps _function_key(f) to _lambda_description(f). +# _function_key implements something close to "ast equality": +# two syntactically identical (minus whitespace etc) lambdas appearing in +# different files have the same key. Cache hits here provide a fast path which +# avoids ast-parsing syntactic lambdas we've seen before. Two lambdas with the +# same _function_key will not have different _lambda_descriptions - if +# they do, that's a bug here. +# * AST_LAMBDAS_CACHE maps source code lines to a list of the lambdas found in +# that source code. A cache hit here avoids reparsing the ast. +LAMBDA_DESCRIPTION_CACHE: MutableMapping[Callable, str] = WeakKeyDictionary() +LAMBDA_DIGEST_DESCRIPTION_CACHE: LRUCache[tuple[Any], str] = LRUCache(max_size=1000) +AST_LAMBDAS_CACHE: LRUCache[tuple[str], list[ast.Lambda]] = LRUCache(max_size=100) + + +def extract_all_lambdas(tree): + lambdas = [] + + class Visitor(ast.NodeVisitor): + + def visit_Lambda(self, node): + lambdas.append(node) + self.visit(node.body) + + Visitor().visit(tree) + return lambdas + + +def extract_all_attributes(tree): + attributes = [] + + class Visitor(ast.NodeVisitor): + def visit_Attribute(self, node): + attributes.append(node) + self.visit(node.value) + + Visitor().visit(tree) + return attributes + + +def _function_key(f, *, bounded_size=False, ignore_name=False): + """Returns a digest that differentiates functions that have different sources. + + Either a function or a code object may be passed. If code object, default + arg/kwarg values are not recoverable - this is the best we can do, and is + sufficient for the use case of comparing nested lambdas. + """ + try: + code = f.__code__ + defaults_repr = repr((f.__defaults__, f.__kwdefaults__)) + except AttributeError: + code = f + defaults_repr = () + consts_repr = repr(code.co_consts) + if bounded_size: + # Compress repr to avoid keeping arbitrarily large strings pinned as cache + # keys. We don't do this unconditionally because hashing takes time, and is + # not necessary if the key is used just for comparison (and is not stored). + if len(consts_repr) > 48: + consts_repr = hashlib.sha384(consts_repr.encode()).digest() + if len(defaults_repr) > 48: + defaults_repr = hashlib.sha384(defaults_repr.encode()).digest() + return ( + consts_repr, + defaults_repr, + code.co_argcount, + code.co_kwonlyargcount, + code.co_code, + code.co_names, + code.co_varnames, + code.co_freevars, + ignore_name or code.co_name, + ) + + +class _op: + # Opcodes, from dis.opmap. These may change between major versions. + NOP = 9 + LOAD_FAST = 85 + LOAD_FAST_LOAD_FAST = 88 + LOAD_FAST_BORROW = 86 + LOAD_FAST_BORROW_LOAD_FAST_BORROW = 87 + + +def _normalize_code(f, l): + # A small selection of possible peephole code transformations, based on what + # is actually seen to differ between compilations in our test suite. Each + # entry contains two equivalent opcode sequences, plus a condition + # function called with their respective oparg sequences, which must return + # true for the transformation to be valid. + Checker = Callable[[list[int], list[int]], bool] + transforms: tuple[list[int], list[int], Checker | None] = [ + ([_op.NOP], [], lambda a, b: True), + ( + [_op.LOAD_FAST, _op.LOAD_FAST], + [_op.LOAD_FAST_LOAD_FAST], + lambda a, b: a == [b[0] >> 4, b[0] & 15], + ), + ( + [_op.LOAD_FAST_BORROW, _op.LOAD_FAST_BORROW], + [_op.LOAD_FAST_BORROW_LOAD_FAST_BORROW], + lambda a, b: a == [b[0] >> 4, b[0] & 15], + ), + ] + # augment with converse + transforms += [ + ( + ops_b, + ops_a, + condition and (lambda a, b, condition=condition: condition(b, a)), + ) + for ops_a, ops_b, condition in transforms + ] + + # Normalize equivalent code. We assume that each bytecode op is 2 bytes, + # which is the case since Python 3.6. Since the opcodes values may change + # between version, there is a risk that a transform may not be equivalent + # -- even so, the risk of a bad transform producing a false positive is + # minuscule. + co_code = list(l.__code__.co_code) + f_code = list(f.__code__.co_code) + + def alternating(code, i, n): + return code[i : i + 2 * n : 2] + + i = 2 + while i < max(len(co_code), len(f_code)): + # note that co_code is mutated in loop + if i < min(len(co_code), len(f_code)) and f_code[i] == co_code[i]: + i += 2 + else: + for op1, op2, condition in transforms: + if ( + op1 == alternating(f_code, i, len(op1)) + and op2 == alternating(co_code, i, len(op2)) + and condition( + alternating(f_code, i + 1, len(op1)), + alternating(co_code, i + 1, len(op2)), + ) + ): + break + else: + # no point in continuing since the bytecodes are different anyway + break + # Splice in the transform and continue + co_code = ( + co_code[:i] + f_code[i : i + 2 * len(op1)] + co_code[i + 2 * len(op2) :] + ) + i += 2 * len(op1) + + # Normalize consts, in particular replace any lambda consts with the + # corresponding const from the template function, IFF they have the same + # source key. + + f_consts = f.__code__.co_consts + l_consts = l.__code__.co_consts + if len(f_consts) == len(l_consts) and any( + inspect.iscode(l_const) for l_const in l_consts + ): + normalized_consts = [] + for f_const, l_const in zip(f_consts, l_consts, strict=True): + if ( + inspect.iscode(l_const) + and inspect.iscode(f_const) + and _function_key(f_const) == _function_key(l_const) + ): + # If the lambdas are compiled from the same source, make them be the + # same object so that the toplevel lambdas end up equal. Note that + # default arguments are not available on the code objects. But if the + # default arguments differ then the lambdas must also differ in other + # ways, since default arguments are set up from bytecode and constants. + # I.e., this appears to be safe wrt false positives. + normalized_consts.append(f_const) + else: + normalized_consts.append(l_const) + else: + normalized_consts = l_consts + + return l.__code__.replace( + co_code=bytes(co_code), + co_consts=tuple(normalized_consts), + ) + + +_module_map: dict[int, str] = {} + + +def _mimic_lambda_from_node(f, node): + # Compile the source (represented by an ast.Lambda node) in a context that + # as far as possible mimics the context that f was compiled in. If - and + # only if - this was the source of f then the result is indistinguishable + # from f itself (to a casual observer such as _function_key). + f_globals = f.__globals__.copy() + f_code = f.__code__ + source = ast.unparse(node) + + # Install values for non-literal argument defaults. Thankfully, these are + # always captured by value - so there is no interaction with the closure. + if f.__defaults__: + for f_default, l_default in zip( + f.__defaults__, node.args.defaults, strict=True + ): + if isinstance(l_default, ast.Name): + f_globals[l_default.id] = f_default + if f.__kwdefaults__: # pragma: no cover + for l_default, l_varname in zip( + node.args.kw_defaults, node.args.kwonlyargs, strict=True + ): + if isinstance(l_default, ast.Name): + f_globals[l_default.id] = f.__kwdefaults__[l_varname.arg] + + # CPython's compiler treats known imports differently than normal globals, + # so check if we use attributes from globals that are modules (if so, we + # import them explicitly and redundantly in the exec below) + referenced_modules = [ + (local_name, module) + for attr in extract_all_attributes(node) + if ( + isinstance(attr.value, ast.Name) + and (local_name := attr.value.id) + and inspect.ismodule(module := f_globals.get(local_name)) + ) + ] + + if not f_code.co_freevars and not referenced_modules: + compiled = eval(source, f_globals) + else: + if f_code.co_freevars: + # We have to reconstruct a local closure. The closure will have + # the same values as the original function, although this is not + # required for source/bytecode equality. + f_globals |= { + f"__lc{i}": c.cell_contents for i, c in enumerate(f.__closure__) + } + captures = [f"{name}=__lc{i}" for i, name in enumerate(f_code.co_freevars)] + capture_str = ";".join(captures) + ";" + else: + capture_str = "" + if referenced_modules: + # We add import statements for all referenced modules, since that + # influences the compiled code. The assumption is that these modules + # were explicitly imported, not assigned, in the source - if not, + # this may/will give a different compilation result. + global _module_map + if len(_module_map) != len(sys.modules): # pragma: no branch + _module_map = {id(module): name for name, module in sys.modules.items()} + imports = [ + (module_name, local_name) + for local_name, module in referenced_modules + if (module_name := _module_map.get(id(module))) is not None + ] + import_fragments = [f"{name} as {asname}" for name, asname in set(imports)] + import_str = f"import {','.join(import_fragments)}\n" + else: + import_str = "" + exec_str = ( + f"{import_str}def __construct_lambda(): {capture_str} return ({source})" + ) + exec(exec_str, f_globals) + compiled = f_globals["__construct_lambda"]() + + return compiled + + +def _lambda_code_matches_node(f, node): + try: + compiled = _mimic_lambda_from_node(f, node) + except (NameError, SyntaxError): # pragma: no cover # source is generated from ast + return False + if _function_key(f) == _function_key(compiled): + return True + # Try harder + compiled.__code__ = _normalize_code(f, compiled) + return _function_key(f) == _function_key(compiled) + + +def _check_unknown_perfectly_aligned_lambda(candidate): + # This is a monkeypatch point for our self-tests, to make unknown + # lambdas raise. + pass + + +def _lambda_description(f, leeway=50, *, fail_if_confused_with_perfect_candidate=False): + if hasattr(f, "__wrapped_target"): + f = f.__wrapped_target + + # You might be wondering how a lambda can have a return-type annotation? + # The answer is that we add this at runtime, in new_given_signature(), + # and we do support strange choices as applying @given() to a lambda. + sig = inspect.signature(f) + assert sig.return_annotation in (Parameter.empty, None), sig + + # Using pytest-xdist on Python 3.13, there's an entry in the linecache for + # file "", which then returns nonsense to getsource. Discard it. + linecache.cache.pop("", None) + + def format_lambda(body): + # The signature is more informative than the corresponding ast.unparse + # output in the case of default argument values, so add the signature + # to the unparsed body + return ( + f"lambda {str(sig)[1:-1]}: {body}" if sig.parameters else f"lambda: {body}" + ) + + if_confused = format_lambda("") + + try: + source_lines, lineno0 = inspect.findsource(f) + source_lines = tuple(source_lines) # make it hashable + except OSError: + return if_confused + + try: + all_lambdas = AST_LAMBDAS_CACHE[source_lines] + except KeyError: + # The source isn't already parsed, so we try to shortcut by parsing just + # the local block. If that fails to produce a code-identical lambda, + # fall through to the full parse. + local_lines = inspect.getblock(source_lines[lineno0:]) + local_block = textwrap.dedent("".join(local_lines)) + # The fairly common ".map(lambda x: ...)" case. This partial block + # isn't valid syntax, but it might be if we remove the leading ".". + local_block = local_block.removeprefix(".") + + try: + local_tree = ast.parse(local_block) + except SyntaxError: + pass + else: + local_lambdas = extract_all_lambdas(local_tree) + for candidate in local_lambdas: + if reflection.ast_arguments_matches_signature( + candidate.args, sig + ) and _lambda_code_matches_node(f, candidate): + return format_lambda(ast.unparse(candidate.body)) + + # Local parse failed or didn't produce a match, go ahead with the full parse + try: + tree = ast.parse("".join(source_lines)) + except SyntaxError: + all_lambdas = [] + else: + all_lambdas = extract_all_lambdas(tree) + AST_LAMBDAS_CACHE[source_lines] = all_lambdas + + aligned_lambdas = [] + for candidate in all_lambdas: + if ( + candidate.lineno - leeway <= lineno0 + 1 <= candidate.lineno + leeway + and reflection.ast_arguments_matches_signature(candidate.args, sig) + ): + aligned_lambdas.append(candidate) + + aligned_lambdas.sort(key=lambda c: abs(lineno0 + 1 - c.lineno)) + for candidate in aligned_lambdas: + if _lambda_code_matches_node(f, candidate): + return format_lambda(ast.unparse(candidate.body)) + + # None of the aligned lambdas match perfectly in generated code. + if aligned_lambdas and aligned_lambdas[0].lineno == lineno0 + 1: + _check_unknown_perfectly_aligned_lambda(aligned_lambdas[0]) + + return if_confused + + +def lambda_description(f): + """ + Returns a syntactically-valid expression describing `f`. This is often, but + not always, the exact lambda definition string which appears in the source code. + The difference comes from parsing the lambda ast into `tree` and then returning + the result of `ast.unparse(tree)`, which may differ in whitespace, double vs + single quotes, etc. + + Returns a string indicating an unknown body if the parsing gets confused in any way. + """ + try: + return LAMBDA_DESCRIPTION_CACHE[f] + except KeyError: + pass + + key = _function_key(f, bounded_size=True) + location = (f.__code__.co_filename, f.__code__.co_firstlineno) + try: + description, failed_locations = LAMBDA_DIGEST_DESCRIPTION_CACHE[key] + except KeyError: + failed_locations = set() + else: + # We got a hit in the digests cache, but only use it if either it has + # a good (known) description, or if it is unknown but we already tried + # to parse its exact source location before. + if "" not in description or location in failed_locations: + # use the cached result + LAMBDA_DESCRIPTION_CACHE[f] = description + return description + + description = _lambda_description(f) + LAMBDA_DESCRIPTION_CACHE[f] = description + if "" in description: + failed_locations.add(location) + else: + failed_locations.clear() # we have a good description now + LAMBDA_DIGEST_DESCRIPTION_CACHE[key] = description, failed_locations + return description diff --git a/vendored/hypothesis/internal/observability.py b/vendored/hypothesis/internal/observability.py new file mode 100644 index 0000000..84d5b51 --- /dev/null +++ b/vendored/hypothesis/internal/observability.py @@ -0,0 +1,564 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""Observability tools to spit out analysis-ready tables, one row per test case.""" + +import base64 +import dataclasses +import json +import math +import os +import sys +import threading +import time +import warnings +from collections.abc import Callable, Generator +from contextlib import contextmanager +from dataclasses import dataclass +from datetime import date, timedelta +from functools import lru_cache +from threading import Lock +from typing import ( + TYPE_CHECKING, + Any, + Literal, + Optional, + TypeAlias, + Union, + cast, +) + +from hypothesis.configuration import storage_directory +from hypothesis.errors import HypothesisWarning +from hypothesis.internal.conjecture.choice import ( + BooleanConstraints, + BytesConstraints, + ChoiceConstraintsT, + ChoiceNode, + ChoiceT, + ChoiceTypeT, + FloatConstraints, + IntegerConstraints, + StringConstraints, +) +from hypothesis.internal.escalation import InterestingOrigin +from hypothesis.internal.floats import float_to_int +from hypothesis.internal.intervalsets import IntervalSet + +if TYPE_CHECKING: + from hypothesis.internal.conjecture.data import ConjectureData, Spans, Status + + +Observation: TypeAlias = Union["InfoObservation", "TestCaseObservation"] +CallbackThreadT: TypeAlias = Callable[[Observation], None] +# for all_threads=True, we pass the thread id as well. +CallbackAllThreadsT: TypeAlias = Callable[[Observation, int], None] +CallbackT: TypeAlias = CallbackThreadT | CallbackAllThreadsT + +# thread_id: list[callback] +_callbacks: dict[int | None, list[CallbackThreadT]] = {} +# callbacks where all_threads=True was set +_callbacks_all_threads: list[CallbackAllThreadsT] = [] + + +@dataclass(slots=True, frozen=False) +class PredicateCounts: + satisfied: int = 0 + unsatisfied: int = 0 + + def update_count(self, *, condition: bool) -> None: + if condition: + self.satisfied += 1 + else: + self.unsatisfied += 1 + + +def _choice_to_json(choice: ChoiceT | None) -> Any: + if choice is None: + return None + # see the note on the same check in to_jsonable for why we cast large + # integers to floats. + if ( + isinstance(choice, int) + and not isinstance(choice, bool) + and abs(choice) >= 2**63 + ): + return ["integer", str(choice)] + elif isinstance(choice, bytes): + return ["bytes", base64.b64encode(choice).decode()] + elif isinstance(choice, float) and math.isnan(choice): + # handle nonstandard nan bit patterns. We don't need to do this for -0.0 + # vs 0.0 since json doesn't normalize -0.0 to 0.0. + return ["float", float_to_int(choice)] + return choice + + +def choices_to_json(choices: tuple[ChoiceT, ...]) -> list[Any]: + return [_choice_to_json(choice) for choice in choices] + + +def _constraints_to_json( + choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> dict[str, Any]: + constraints = constraints.copy() + if choice_type == "integer": + constraints = cast(IntegerConstraints, constraints) + return { + "min_value": _choice_to_json(constraints["min_value"]), + "max_value": _choice_to_json(constraints["max_value"]), + "weights": ( + None + if constraints["weights"] is None + # wrap up in a list, instead of a dict, because json dicts + # require string keys + else [ + (_choice_to_json(k), v) for k, v in constraints["weights"].items() + ] + ), + "shrink_towards": _choice_to_json(constraints["shrink_towards"]), + } + elif choice_type == "float": + constraints = cast(FloatConstraints, constraints) + return { + "min_value": _choice_to_json(constraints["min_value"]), + "max_value": _choice_to_json(constraints["max_value"]), + "allow_nan": constraints["allow_nan"], + "smallest_nonzero_magnitude": constraints["smallest_nonzero_magnitude"], + } + elif choice_type == "string": + constraints = cast(StringConstraints, constraints) + assert isinstance(constraints["intervals"], IntervalSet) + return { + "intervals": constraints["intervals"].intervals, + "min_size": _choice_to_json(constraints["min_size"]), + "max_size": _choice_to_json(constraints["max_size"]), + } + elif choice_type == "bytes": + constraints = cast(BytesConstraints, constraints) + return { + "min_size": _choice_to_json(constraints["min_size"]), + "max_size": _choice_to_json(constraints["max_size"]), + } + elif choice_type == "boolean": + constraints = cast(BooleanConstraints, constraints) + return { + "p": constraints["p"], + } + else: + raise NotImplementedError(f"unknown choice type {choice_type}") + + +def nodes_to_json(nodes: tuple[ChoiceNode, ...]) -> list[dict[str, Any]]: + return [ + { + "type": node.type, + "value": _choice_to_json(node.value), + "constraints": _constraints_to_json(node.type, node.constraints), + "was_forced": node.was_forced, + } + for node in nodes + ] + + +@dataclass(slots=True, frozen=True) +class ObservationMetadata: + traceback: str | None + reproduction_decorator: str | None + predicates: dict[str, PredicateCounts] + backend: dict[str, Any] + sys_argv: list[str] + os_getpid: int + imported_at: float + data_status: "Status" + phase: str + interesting_origin: InterestingOrigin | None + choice_nodes: tuple[ChoiceNode, ...] | None + choice_spans: Optional["Spans"] + + def to_json(self) -> dict[str, Any]: + data = { + "traceback": self.traceback, + "reproduction_decorator": self.reproduction_decorator, + "predicates": self.predicates, + "backend": self.backend, + "sys.argv": self.sys_argv, + "os.getpid()": self.os_getpid, + "imported_at": self.imported_at, + "data_status": self.data_status, + "phase": self.phase, + "interesting_origin": self.interesting_origin, + "choice_nodes": ( + None if self.choice_nodes is None else nodes_to_json(self.choice_nodes) + ), + "choice_spans": ( + None + if self.choice_spans is None + else [ + ( + # span.label is an int, but cast to string to avoid conversion + # to float (and loss of precision) for large label values. + # + # The value of this label is opaque to consumers anyway, so its + # type shouldn't matter as long as it's consistent. + str(span.label), + span.start, + span.end, + span.discarded, + ) + for span in self.choice_spans + ] + ), + } + # check that we didn't forget one + assert len(data) == len(dataclasses.fields(self)) + return data + + +@dataclass(slots=True, frozen=True) +class BaseObservation: + type: Literal["test_case", "info", "alert", "error"] + property: str + run_start: float + + +InfoObservationType = Literal["info", "alert", "error"] +TestCaseStatus = Literal["gave_up", "passed", "failed"] + + +@dataclass(slots=True, frozen=True) +class InfoObservation(BaseObservation): + type: InfoObservationType + title: str + content: str | dict + + +@dataclass(slots=True, frozen=True) +class TestCaseObservation(BaseObservation): + __test__ = False # no! bad pytest! + + type: Literal["test_case"] + status: TestCaseStatus + status_reason: str + representation: str + arguments: dict + how_generated: str + features: dict + coverage: dict[str, list[int]] | None + timing: dict[str, float] + metadata: ObservationMetadata + + +def add_observability_callback(f: CallbackT, /, *, all_threads: bool = False) -> None: + """ + Adds ``f`` as a callback for :ref:`observability `. ``f`` + should accept one argument, which is an observation. Whenever Hypothesis + produces a new observation, it calls each callback with that observation. + + If Hypothesis tests are being run from multiple threads, callbacks are tracked + per-thread. In other words, ``add_observability_callback(f)`` only adds ``f`` + as an observability callback for observations produced on that thread. + + If ``all_threads=True`` is passed, ``f`` will instead be registered as a + callback for all threads. This means it will be called for observations + generated by all threads, not just the thread which registered ``f`` as a + callback. In this case, ``f`` will be passed two arguments: the first is the + observation, and the second is the integer thread id from + :func:`python:threading.get_ident` where that observation was generated. + + We recommend against registering ``f`` as a callback for both ``all_threads=True`` + and the default ``all_threads=False``, due to unclear semantics with + |remove_observability_callback|. + """ + if all_threads: + _callbacks_all_threads.append(cast(CallbackAllThreadsT, f)) + return + + thread_id = threading.get_ident() + if thread_id not in _callbacks: + _callbacks[thread_id] = [] + + _callbacks[thread_id].append(cast(CallbackThreadT, f)) + + +def remove_observability_callback(f: CallbackT, /) -> None: + """ + Removes ``f`` from the :ref:`observability ` callbacks. + + If ``f`` is not in the list of observability callbacks, silently do nothing. + + If running under multiple threads, ``f`` will only be removed from the + callbacks for this thread. + """ + if f in _callbacks_all_threads: + _callbacks_all_threads.remove(cast(CallbackAllThreadsT, f)) + + thread_id = threading.get_ident() + if thread_id not in _callbacks: + return + + callbacks = _callbacks[thread_id] + if f in callbacks: + callbacks.remove(cast(CallbackThreadT, f)) + + if not callbacks: + del _callbacks[thread_id] + + +def observability_enabled() -> bool: + """ + Returns whether or not Hypothesis considers :ref:`observability ` + to be enabled. Observability is enabled if there is at least one observability + callback present. + + Callers might use this method to determine whether they should compute an + expensive representation that is only used under observability, for instance + by |alternative backends|. + """ + return bool(_callbacks) or bool(_callbacks_all_threads) + + +@contextmanager +def with_observability_callback( + f: Callable[[Observation], None], /, *, all_threads: bool = False +) -> Generator[None, None, None]: + """ + A simple context manager which calls |add_observability_callback| on ``f`` + when it enters and |remove_observability_callback| on ``f`` when it exits. + """ + add_observability_callback(f, all_threads=all_threads) + try: + yield + finally: + remove_observability_callback(f) + + +def deliver_observation(observation: Observation) -> None: + thread_id = threading.get_ident() + + for callback in _callbacks.get(thread_id, []): + callback(observation) + + for callback in _callbacks_all_threads: + callback(observation, thread_id) + + +class _TestcaseCallbacks: + def __bool__(self): + self._note_deprecation() + return bool(_callbacks) + + def _note_deprecation(self): + from hypothesis._settings import note_deprecation + + note_deprecation( + "hypothesis.internal.observability.TESTCASE_CALLBACKS is deprecated. " + "Replace TESTCASE_CALLBACKS.append with add_observability_callback, " + "TESTCASE_CALLBACKS.remove with remove_observability_callback, and " + "bool(TESTCASE_CALLBACKS) with observability_enabled().", + since="2025-08-01", + has_codemod=False, + ) + + def append(self, f): + self._note_deprecation() + add_observability_callback(f) + + def remove(self, f): + self._note_deprecation() + remove_observability_callback(f) + + +#: .. warning:: +#: +#: Deprecated in favor of |add_observability_callback|, +#: |remove_observability_callback|, and |observability_enabled|. +#: +#: |TESTCASE_CALLBACKS| remains a thin compatibility +#: shim which forwards ``.append``, ``.remove``, and ``bool()`` to those +#: three methods. It is not an attempt to be fully compatible with the previous +#: ``TESTCASE_CALLBACKS = []``, so iteration or other usages will not work +#: anymore. Please update to using the new methods instead. +#: +#: |TESTCASE_CALLBACKS| will eventually be removed. +TESTCASE_CALLBACKS = _TestcaseCallbacks() + + +def make_testcase( + *, + run_start: float, + property: str, + data: "ConjectureData", + how_generated: str, + representation: str = "", + timing: dict[str, float], + arguments: dict | None = None, + coverage: dict[str, list[int]] | None = None, + phase: str | None = None, + backend_metadata: dict[str, Any] | None = None, + status: ( + Union[TestCaseStatus, "Status"] | None + ) = None, # overrides automatic calculation + status_reason: str | None = None, # overrides automatic calculation + # added to calculated metadata. If keys overlap, the value from this `metadata` + # is used + metadata: dict[str, Any] | None = None, +) -> TestCaseObservation: + from hypothesis.core import reproduction_decorator + from hypothesis.internal.conjecture.data import Status + + # We should only be sending observability reports for datas that have finished + # being modified. + assert data.frozen + + if status_reason is not None: + pass + elif data.interesting_origin: + status_reason = str(data.interesting_origin) + elif phase == "shrink" and data.status == Status.OVERRUN: + status_reason = "exceeded size of current best example" + else: + status_reason = str(data.events.pop("invalid because", "")) + + status_map: dict[Status, TestCaseStatus] = { + Status.OVERRUN: "gave_up", + Status.INVALID: "gave_up", + Status.VALID: "passed", + Status.INTERESTING: "failed", + } + + if status is not None and isinstance(status, Status): + status = status_map[status] + if status is None: + status = status_map[data.status] + + return TestCaseObservation( + type="test_case", + status=status, + status_reason=status_reason, + representation=representation, + arguments={ + k.removeprefix("generate:"): v for k, v in (arguments or {}).items() + }, + how_generated=how_generated, # iid, mutation, etc. + features={ + **{ + f"target:{k}".strip(":"): v for k, v in data.target_observations.items() + }, + **data.events, + }, + coverage=coverage, + timing=timing, + metadata=ObservationMetadata( + **{ + "traceback": data.expected_traceback, + "reproduction_decorator": ( + reproduction_decorator(data.choices) if status == "failed" else None + ), + "predicates": dict(data._observability_predicates), + "backend": backend_metadata or {}, + "data_status": data.status, + "phase": phase, + "interesting_origin": data.interesting_origin, + "choice_nodes": data.nodes if OBSERVABILITY_CHOICES else None, + "choice_spans": data.spans if OBSERVABILITY_CHOICES else None, + **_system_metadata(), + # unpack last so it takes precedence for duplicate keys + **(metadata or {}), + } + ), + run_start=run_start, + property=property, + ) + + +_WROTE_TO = set() +_deliver_to_file_lock = Lock() + + +def _deliver_to_file( + observation: Observation, thread_id: int +) -> None: # pragma: no cover + from hypothesis.strategies._internal.utils import to_jsonable + + kind = "testcases" if observation.type == "test_case" else "info" + fname = storage_directory("observed", f"{date.today().isoformat()}_{kind}.jsonl") + fname.parent.mkdir(exist_ok=True, parents=True) + + observation_bytes = ( + json.dumps(to_jsonable(observation, avoid_realization=False)) + "\n" + ) + # only allow one conccurent file write to avoid write races. This is likely to make + # HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY quite slow under threading. A queue + # would be an improvement, but that requires a background thread, and I + # would prefer to avoid a thread in the single-threaded case. We could + # switch over to a queue if we detect multithreading, but it's tricky to get + # right. + with _deliver_to_file_lock: + _WROTE_TO.add(fname) + with fname.open(mode="a") as f: + f.write(observation_bytes) + + +_imported_at = time.time() + + +@lru_cache +def _system_metadata() -> dict[str, Any]: + return { + "sys_argv": sys.argv, + "os_getpid": os.getpid(), + "imported_at": _imported_at, + } + + +#: If ``False``, do not collect coverage information when observability is enabled. +#: +#: This is exposed both for performance (as coverage collection can be slow on +#: Python 3.11 and earlier) and size (if you do not use coverage information, +#: you may not want to store it in-memory). +OBSERVABILITY_COLLECT_COVERAGE = ( + "HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY_NOCOVER" not in os.environ +) +#: If ``True``, include the ``metadata.choice_nodes`` and ``metadata.spans`` keys +#: in test case observations. +#: +#: ``False`` by default. ``metadata.choice_nodes`` and ``metadata.spans`` can be +#: a substantial amount of data, and so must be opted-in to, even when +#: observability is enabled. +#: +#: .. warning:: +#: +#: EXPERIMENTAL AND UNSTABLE. We are actively working towards a better +#: interface for this as of June 2025, and this attribute may disappear or +#: be renamed without notice. +#: +OBSERVABILITY_CHOICES = "HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY_CHOICES" in os.environ + +if OBSERVABILITY_COLLECT_COVERAGE is False and ( + sys.version_info[:2] >= (3, 12) +): # pragma: no cover + warnings.warn( + "Coverage data collection should be quite fast in Python 3.12 or later " + "so there should be no need to turn coverage reporting off.", + HypothesisWarning, + stacklevel=2, + ) + +if ( + "HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY" in os.environ + or OBSERVABILITY_COLLECT_COVERAGE is False +): # pragma: no cover + add_observability_callback(_deliver_to_file, all_threads=True) + + # Remove files more than a week old, to cap the size on disk + max_age = (date.today() - timedelta(days=8)).isoformat() + for f in storage_directory("observed", intent_to_write=False).glob("*.jsonl"): + if f.stem < max_age: # pragma: no branch + f.unlink(missing_ok=True) diff --git a/vendored/hypothesis/internal/reflection.py b/vendored/hypothesis/internal/reflection.py new file mode 100644 index 0000000..87bf67a --- /dev/null +++ b/vendored/hypothesis/internal/reflection.py @@ -0,0 +1,529 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""This file can approximately be considered the collection of hypothesis going +to really unreasonable lengths to produce pretty output.""" + +import ast +import hashlib +import inspect +import re +import textwrap +import types +import warnings +from collections.abc import Callable, Sequence +from functools import partial, wraps +from inspect import Parameter, Signature +from io import StringIO +from keyword import iskeyword +from random import _inst as global_random_instance +from tokenize import COMMENT, generate_tokens, untokenize +from types import EllipsisType, ModuleType +from typing import TYPE_CHECKING, Any, TypeVar, Union +from unittest.mock import _patch as PatchType + +from hypothesis.errors import HypothesisWarning +from hypothesis.internal import lambda_sources +from hypothesis.internal.compat import is_typed_named_tuple +from hypothesis.utils.conventions import not_set +from hypothesis.vendor.pretty import pretty + +if TYPE_CHECKING: + from hypothesis.strategies._internal.strategies import SearchStrategy + +T = TypeVar("T") + + +def is_mock(obj: object) -> bool: + """Determine if the given argument is a mock type.""" + + # We want to be able to detect these when dealing with various test + # args. As they are sneaky and can look like almost anything else, + # we'll check this by looking for an attribute with a name that it's really + # unlikely to implement accidentally, and that anyone who implements it + # deliberately should know what they're doing. This is more robust than + # looking for types. + return hasattr(obj, "hypothesis_internal_is_this_a_mock_check") + + +def _clean_source(src: str) -> bytes: + """Return the source code as bytes, without decorators or comments. + + Because this is part of our database key, we reduce the cache invalidation + rate by ignoring decorators, comments, trailing whitespace, and empty lines. + We can't just use the (dumped) AST directly because it changes between Python + versions (e.g. ast.Constant) + """ + # Get the (one-indexed) line number of the function definition, and drop preceding + # lines - i.e. any decorators, so that adding `@example()`s keeps the same key. + try: + funcdef = ast.parse(src).body[0] + src = "".join(src.splitlines(keepends=True)[funcdef.lineno - 1 :]) + except Exception: + pass + # Remove blank lines and use the tokenize module to strip out comments, + # so that those can be changed without changing the database key. + try: + src = untokenize( + t for t in generate_tokens(StringIO(src).readline) if t.type != COMMENT + ) + except Exception: + pass + # Finally, remove any trailing whitespace and empty lines as a last cleanup. + return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode() + + +def function_digest(function: Any) -> bytes: + """Returns a string that is stable across multiple invocations across + multiple processes and is prone to changing significantly in response to + minor changes to the function. + + No guarantee of uniqueness though it usually will be. Digest collisions + lead to unfortunate but not fatal problems during database replay. + """ + hasher = hashlib.sha384() + try: + src = inspect.getsource(function) + except (OSError, TypeError): + # If we can't actually get the source code, try for the name as a fallback. + # NOTE: We might want to change this to always adding function.__qualname__, + # to differentiate f.x. two classes having the same function implementation + # with class-dependent behaviour. + try: + hasher.update(function.__name__.encode()) + except AttributeError: + pass + else: + hasher.update(_clean_source(src)) + try: + # This is additional to the source code because it can include the effects + # of decorators, or of post-hoc assignment to the .__signature__ attribute. + hasher.update(repr(get_signature(function)).encode()) + except Exception: + pass + try: + # We set this in order to distinguish e.g. @pytest.mark.parametrize cases. + hasher.update(function._hypothesis_internal_add_digest) + except AttributeError: + pass + return hasher.digest() + + +def check_signature(sig: Signature) -> None: + # Backport from Python 3.11; see https://github.com/python/cpython/pull/92065 + for p in sig.parameters.values(): + if iskeyword(p.name) and p.kind is not p.POSITIONAL_ONLY: + raise ValueError( + f"Signature {sig!r} contains a parameter named {p.name!r}, " + f"but this is a SyntaxError because `{p.name}` is a keyword. " + "You, or a library you use, must have manually created an " + "invalid signature - this will be an error in Python 3.11+" + ) + + +def get_signature( + target: Any, *, follow_wrapped: bool = True, eval_str: bool = False +) -> Signature: + # Special case for use of `@unittest.mock.patch` decorator, mimicking the + # behaviour of getfullargspec instead of reporting unusable arguments. + patches = getattr(target, "patchings", None) + if isinstance(patches, list) and all(isinstance(p, PatchType) for p in patches): + return Signature( + [ + Parameter("args", Parameter.VAR_POSITIONAL), + Parameter("keywargs", Parameter.VAR_KEYWORD), + ] + ) + + if isinstance(getattr(target, "__signature__", None), Signature): + # This special case covers unusual codegen like Pydantic models + sig = target.__signature__ + check_signature(sig) + # And *this* much more complicated block ignores the `self` argument + # if that's been (incorrectly) included in the custom signature. + if sig.parameters and (inspect.isclass(target) or inspect.ismethod(target)): + selfy = next(iter(sig.parameters.values())) + if ( + selfy.name == "self" + and selfy.default is Parameter.empty + and selfy.kind.name.startswith("POSITIONAL_") + ): + return sig.replace( + parameters=[v for k, v in sig.parameters.items() if k != "self"] + ) + return sig + sig = inspect.signature(target, follow_wrapped=follow_wrapped, eval_str=eval_str) + check_signature(sig) + return sig + + +def arg_is_required(param: Parameter) -> bool: + return param.default is Parameter.empty and param.kind in ( + Parameter.POSITIONAL_OR_KEYWORD, + Parameter.KEYWORD_ONLY, + ) + + +def required_args( + target: Callable[..., Any], + args: tuple["SearchStrategy[Any]", ...] = (), + kwargs: dict[str, Union["SearchStrategy[Any]", EllipsisType]] | None = None, +) -> set[str]: + """Return a set of names of required args to target that were not supplied + in args or kwargs. + + This is used in builds() to determine which arguments to attempt to + fill from type hints. target may be any callable (including classes + and bound methods). args and kwargs should be as they are passed to + builds() - that is, a tuple of values and a dict of names: values. + """ + kwargs = {} if kwargs is None else kwargs + # We start with a workaround for NamedTuples, which don't have nice inits + if inspect.isclass(target) and is_typed_named_tuple(target): + provided = set(kwargs) | set(target._fields[: len(args)]) + return set(target._fields) - provided + # Then we try to do the right thing with inspect.signature + try: + sig = get_signature(target) + except (ValueError, TypeError): + return set() + return { + name + for name, param in list(sig.parameters.items())[len(args) :] + if arg_is_required(param) and name not in kwargs + } + + +def convert_keyword_arguments( + function: Any, args: Sequence[object], kwargs: dict[str, object] +) -> tuple[tuple[object, ...], dict[str, object]]: + """Returns a pair of a tuple and a dictionary which would be equivalent + passed as positional and keyword args to the function. Unless function has + kwonlyargs or **kwargs the dictionary will always be empty. + """ + sig = inspect.signature(function, follow_wrapped=False) + bound = sig.bind(*args, **kwargs) + return bound.args, bound.kwargs + + +def convert_positional_arguments( + function: Any, args: Sequence[object], kwargs: dict[str, object] +) -> tuple[tuple[object, ...], dict[str, object]]: + """Return a tuple (new_args, new_kwargs) where all possible arguments have + been moved to kwargs. + + new_args will only be non-empty if function has pos-only args or *args. + """ + sig = inspect.signature(function, follow_wrapped=False) + bound = sig.bind(*args, **kwargs) + new_args = [] + new_kwargs = dict(bound.arguments) + for p in sig.parameters.values(): + if p.name in new_kwargs: + if p.kind is p.POSITIONAL_ONLY: + new_args.append(new_kwargs.pop(p.name)) + elif p.kind is p.VAR_POSITIONAL: + new_args.extend(new_kwargs.pop(p.name)) + elif p.kind is p.VAR_KEYWORD: + assert set(new_kwargs[p.name]).isdisjoint(set(new_kwargs) - {p.name}) + new_kwargs.update(new_kwargs.pop(p.name)) + return tuple(new_args), new_kwargs + + +def ast_arguments_matches_signature(args: ast.arguments, sig: Signature) -> bool: + expected: list[tuple[str, int]] = [] + for node in args.posonlyargs: + expected.append((node.arg, Parameter.POSITIONAL_ONLY)) + for node in args.args: + expected.append((node.arg, Parameter.POSITIONAL_OR_KEYWORD)) + if args.vararg is not None: + expected.append((args.vararg.arg, Parameter.VAR_POSITIONAL)) + for node in args.kwonlyargs: + expected.append((node.arg, Parameter.KEYWORD_ONLY)) + if args.kwarg is not None: + expected.append((args.kwarg.arg, Parameter.VAR_KEYWORD)) + return expected == [(p.name, p.kind) for p in sig.parameters.values()] + + +def is_first_param_referenced_in_function(f: Any) -> bool: + """Is the given name referenced within f?""" + try: + tree = ast.parse(textwrap.dedent(inspect.getsource(f))) + except Exception: + return True # Assume it's OK unless we know otherwise + name = next(iter(get_signature(f).parameters)) + return any( + isinstance(node, ast.Name) + and node.id == name + and isinstance(node.ctx, ast.Load) + for node in ast.walk(tree) + ) + + +def get_pretty_function_description(f: object) -> str: + if isinstance(f, partial): + return pretty(f) + if not hasattr(f, "__name__"): + return repr(f) + name = f.__name__ # type: ignore + if name == "": + return lambda_sources.lambda_description(f) + elif isinstance(f, (types.MethodType, types.BuiltinMethodType)): + self = f.__self__ + # Some objects, like `builtins.abs` are of BuiltinMethodType but have + # their module as __self__. This might include c-extensions generally? + if not (self is None or inspect.isclass(self) or inspect.ismodule(self)): + if self is global_random_instance: + return f"random.{name}" + return f"{self!r}.{name}" + elif isinstance(name, str) and getattr(dict, name, object()) is f: + # special case for keys/values views in from_type() / ghostwriter output + return f"dict.{name}" + return name + + +def nicerepr(v: Any) -> str: + if inspect.isfunction(v): + return get_pretty_function_description(v) + elif isinstance(v, type): + return v.__name__ + else: + # With TypeVar T, show List[T] instead of TypeError on List[~T] + return re.sub(r"(\[)~([A-Z][a-z]*\])", r"\g<1>\g<2>", pretty(v)) + + +def repr_call( + f: Any, args: Sequence[object], kwargs: dict[str, object], *, reorder: bool = True +) -> str: + # Note: for multi-line pretty-printing, see RepresentationPrinter.repr_call() + if reorder: + args, kwargs = convert_positional_arguments(f, args, kwargs) + + bits = [nicerepr(x) for x in args] + + for p in get_signature(f).parameters.values(): + if p.name in kwargs and not p.kind.name.startswith("VAR_"): + bits.append(f"{p.name}={nicerepr(kwargs.pop(p.name))}") + if kwargs: + for a in sorted(kwargs): + bits.append(f"{a}={nicerepr(kwargs[a])}") + + rep = nicerepr(f) + if rep.startswith("lambda") and ":" in rep: + rep = f"({rep})" + repr_len = len(rep) + sum(len(b) for b in bits) # approx + if repr_len > 30000: + warnings.warn( + "Generating overly large repr. This is an expensive operation, and with " + f"a length of {repr_len//1000} kB is unlikely to be useful. Use -Wignore " + "to ignore the warning, or -Werror to get a traceback.", + HypothesisWarning, + stacklevel=2, + ) + return rep + "(" + ", ".join(bits) + ")" + + +def check_valid_identifier(identifier: str) -> None: + if not identifier.isidentifier(): + raise ValueError(f"{identifier!r} is not a valid python identifier") + + +eval_cache: dict[str, ModuleType] = {} + + +def source_exec_as_module(source: str) -> ModuleType: + try: + return eval_cache[source] + except KeyError: + pass + + hexdigest = hashlib.sha384(source.encode()).hexdigest() + result = ModuleType("hypothesis_temporary_module_" + hexdigest) + assert isinstance(source, str) + exec(source, result.__dict__) + eval_cache[source] = result + return result + + +COPY_SIGNATURE_SCRIPT = """ +from hypothesis.utils.conventions import not_set + +def accept({funcname}): + def {name}{signature}: + return {funcname}({invocation}) + return {name} +""".lstrip() + + +def get_varargs( + sig: Signature, kind: int = Parameter.VAR_POSITIONAL +) -> Parameter | None: + for p in sig.parameters.values(): + if p.kind is kind: + return p + return None + + +def define_function_signature(name, docstring, signature): + """A decorator which sets the name, signature and docstring of the function + passed into it.""" + if name == "": + name = "_lambda_" + check_valid_identifier(name) + for a in signature.parameters: + check_valid_identifier(a) + + used_names = {*signature.parameters, name} + + newsig = signature.replace( + parameters=[ + p if p.default is signature.empty else p.replace(default=not_set) + for p in ( + p.replace(annotation=signature.empty) + for p in signature.parameters.values() + ) + ], + return_annotation=signature.empty, + ) + + pos_args = [ + p + for p in signature.parameters.values() + if p.kind.name.startswith("POSITIONAL_") + ] + + def accept(f): + fsig = inspect.signature(f, follow_wrapped=False) + must_pass_as_kwargs = [] + invocation_parts = [] + for p in pos_args: + if p.name not in fsig.parameters and get_varargs(fsig) is None: + must_pass_as_kwargs.append(p.name) + else: + invocation_parts.append(p.name) + if get_varargs(signature) is not None: + invocation_parts.append("*" + get_varargs(signature).name) + for k in must_pass_as_kwargs: + invocation_parts.append(f"{k}={k}") + for p in signature.parameters.values(): + if p.kind is p.KEYWORD_ONLY: + invocation_parts.append(f"{p.name}={p.name}") + varkw = get_varargs(signature, kind=Parameter.VAR_KEYWORD) + if varkw: + invocation_parts.append("**" + varkw.name) + + candidate_names = ["f"] + [f"f_{i}" for i in range(1, len(used_names) + 2)] + + for funcname in candidate_names: # pragma: no branch + if funcname not in used_names: + break + + source = COPY_SIGNATURE_SCRIPT.format( + name=name, + funcname=funcname, + signature=str(newsig), + invocation=", ".join(invocation_parts), + ) + result = source_exec_as_module(source).accept(f) + result.__doc__ = docstring + result.__defaults__ = tuple( + p.default + for p in signature.parameters.values() + if p.default is not signature.empty and "POSITIONAL" in p.kind.name + ) + kwdefaults = { + p.name: p.default + for p in signature.parameters.values() + if p.default is not signature.empty and p.kind is p.KEYWORD_ONLY + } + if kwdefaults: + result.__kwdefaults__ = kwdefaults + annotations = { + p.name: p.annotation + for p in signature.parameters.values() + if p.annotation is not signature.empty + } + if signature.return_annotation is not signature.empty: + annotations["return"] = signature.return_annotation + if annotations: + result.__annotations__ = annotations + return result + + return accept + + +def impersonate(target): + """Decorator to update the attributes of a function so that to external + introspectors it will appear to be the target function. + + Note that this updates the function in place, it doesn't return a + new one. + """ + + def accept(f): + # Lie shamelessly about where this code comes from, to hide the hypothesis + # internals from pytest, ipython, and other runtime introspection. + f.__code__ = f.__code__.replace( + co_filename=target.__code__.co_filename, + co_firstlineno=target.__code__.co_firstlineno, + ) + f.__name__ = target.__name__ + f.__module__ = target.__module__ + f.__doc__ = target.__doc__ + f.__globals__["__hypothesistracebackhide__"] = True + # But leave an breadcrumb for _describe_lambda to follow, it's + # just confused by the lies above + f.__wrapped_target = target + return f + + return accept + + +def proxies(target: T) -> Callable[[Callable], T]: + replace_sig = define_function_signature( + target.__name__.replace("", "_lambda_"), # type: ignore + target.__doc__, + get_signature(target, follow_wrapped=False), + ) + + def accept(proxy): + return impersonate(target)(wraps(target)(replace_sig(proxy))) + + return accept + + +def is_identity_function(f: Callable) -> bool: + try: + code = f.__code__ + except AttributeError: + try: + f = f.__call__ # type: ignore + code = f.__code__ + except AttributeError: + return False + + # We only accept a single unbound argument. While it would be possible to + # accept extra defaulted arguments, it would be pointless as they couldn't + # be referenced at all in the code object (or the co_code check would fail). + bound_args = int(inspect.ismethod(f)) + if code.co_argcount != bound_args + 1 or code.co_kwonlyargcount > 0: + return False + + # We know that f accepts a single positional argument, now check that its + # code object is simply "return first unbound argument". + template = (lambda self, x: x) if bound_args else (lambda x: x) # type: ignore + try: + return code.co_code == template.__code__.co_code + except AttributeError: # pragma: no cover # pypy only + # In PyPy, some builtin functions have a code object ('builtin-code') + # lacking co_code, perhaps because they are native-compiled and don't have + # a corresponding bytecode. Regardless, since Python doesn't have any + # builtin identity function it seems safe to say that this one isn't + return False diff --git a/vendored/hypothesis/internal/scrutineer.py b/vendored/hypothesis/internal/scrutineer.py new file mode 100644 index 0000000..a4665d1 --- /dev/null +++ b/vendored/hypothesis/internal/scrutineer.py @@ -0,0 +1,328 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import functools +import os +import re +import subprocess +import sys +import sysconfig +import types +from collections import defaultdict +from collections.abc import Iterable +from enum import IntEnum +from functools import lru_cache, reduce +from os import sep +from pathlib import Path +from typing import TypeAlias + +from hypothesis._settings import Phase, Verbosity +from hypothesis.internal.compat import PYPY +from hypothesis.internal.escalation import is_hypothesis_file + +Location: TypeAlias = tuple[str, int] +Branch: TypeAlias = tuple[Location | None, Location] +Trace: TypeAlias = set[Branch] + + +@functools.cache +def should_trace_file(fname: str) -> bool: + # fname.startswith("<") indicates runtime code-generation via compile, + # e.g. compile("def ...", "", "exec") in e.g. attrs methods. + return not (is_hypothesis_file(fname) or fname.startswith("<")) + + +# where possible, we'll use 3.12's new sys.monitoring module for low-overhead +# coverage instrumentation; on older python versions we'll use sys.settrace. +# tool_id = 1 is designated for coverage, but we intentionally choose a +# non-reserved tool id so we can co-exist with coverage tools. +MONITORING_TOOL_ID = 3 +if hasattr(sys, "monitoring"): + MONITORING_EVENTS = {sys.monitoring.events.LINE: "trace_line"} + + +class Tracer: + """A super-simple branch coverage tracer.""" + + __slots__ = ( + "_previous_location", + "_should_trace", + "_tried_and_failed_to_trace", + "branches", + ) + + def __init__(self, *, should_trace: bool) -> None: + self.branches: Trace = set() + self._previous_location: Location | None = None + self._tried_and_failed_to_trace = False + self._should_trace = should_trace and self.can_trace() + + @staticmethod + def can_trace() -> bool: + if PYPY: + return False + if hasattr(sys, "monitoring"): + return sys.monitoring.get_tool(MONITORING_TOOL_ID) is None + return sys.gettrace() is None + + def trace(self, frame, event, arg): + try: + if event == "call": + return self.trace + elif event == "line": + fname = frame.f_code.co_filename + if should_trace_file(fname): + current_location = (fname, frame.f_lineno) + self.branches.add((self._previous_location, current_location)) + self._previous_location = current_location + except RecursionError: + pass + + def trace_line(self, code: types.CodeType, line_number: int) -> None: + fname = code.co_filename + if not should_trace_file(fname): + # this function is only called on 3.12+, but we want to avoid an + # assertion to that effect for performance. + return sys.monitoring.DISABLE # type: ignore + + current_location = (fname, line_number) + self.branches.add((self._previous_location, current_location)) + self._previous_location = current_location + + def __enter__(self): + self._tried_and_failed_to_trace = False + + if not self._should_trace: + return self + + if not hasattr(sys, "monitoring"): + sys.settrace(self.trace) + return self + + try: + sys.monitoring.use_tool_id(MONITORING_TOOL_ID, "scrutineer") + except ValueError: + # another thread may have registered a tool for MONITORING_TOOL_ID + # since we checked in can_trace. + self._tried_and_failed_to_trace = True + return self + + for event, callback_name in MONITORING_EVENTS.items(): + sys.monitoring.set_events(MONITORING_TOOL_ID, event) + callback = getattr(self, callback_name) + sys.monitoring.register_callback(MONITORING_TOOL_ID, event, callback) + + return self + + def __exit__(self, *args, **kwargs): + if not self._should_trace: + return + + if not hasattr(sys, "monitoring"): + sys.settrace(None) + return + + if self._tried_and_failed_to_trace: + return + + sys.monitoring.free_tool_id(MONITORING_TOOL_ID) + for event in MONITORING_EVENTS: + sys.monitoring.register_callback(MONITORING_TOOL_ID, event, None) + + +UNHELPFUL_LOCATIONS = ( + # There's a branch which is only taken when an exception is active while exiting + # a contextmanager; this is probably after the fault has been triggered. + # Similar reasoning applies to a few other standard-library modules: even + # if the fault was later, these still aren't useful locations to report! + # Note: The list is post-processed, so use plain "/" for separator here. + "/contextlib.py", + "/inspect.py", + "/re.py", + "/re/__init__.py", # refactored in Python 3.11 + "/warnings.py", + # Quite rarely, the first AFNP line is in Pytest's internals. + "/_pytest/**", + "/pluggy/_*.py", + # used by pytest for failure formatting in the terminal. + # seen: pygments/lexer.py, pygments/formatters/, pygments/filter.py. + "/pygments/*", + # used by pytest for failure formatting + "/difflib.py", + "/reprlib.py", + "/typing.py", + "/conftest.py", + "/pprint.py", +) + + +def _glob_to_re(locs: Iterable[str]) -> str: + """Translate a list of glob patterns to a combined regular expression. + Only the * and ** wildcards are supported, and patterns including special + characters will only work by chance.""" + # fnmatch.translate is not an option since its "*" consumes path sep + return "|".join( + loc.replace(".", re.escape(".")) + .replace("**", r".+") + .replace("*", r"[^/]+") + .replace("/", re.escape(sep)) + + r"\Z" # right anchored + for loc in locs + ) + + +def get_explaining_locations(traces): + # Traces is a dict[interesting_origin | None, set[frozenset[tuple[str, int]]]] + # Each trace in the set might later become a Counter instead of frozenset. + if not traces: + return {} + + unions = {origin: set().union(*values) for origin, values in traces.items()} + seen_passing = {None}.union(*unions.pop(None, set())) + + always_failing_never_passing = { + origin: reduce(set.intersection, [set().union(*v) for v in values]) + - seen_passing + for origin, values in traces.items() + if origin is not None + } + + # Build the observed parts of the control-flow graph for each origin + cf_graphs = {origin: defaultdict(set) for origin in unions} + for origin, seen_arcs in unions.items(): + for src, dst in seen_arcs: + cf_graphs[origin][src].add(dst) + assert cf_graphs[origin][None], "Expected start node with >=1 successor" + + # For each origin, our explanation is the always_failing_never_passing lines + # which are reachable from the start node (None) without passing through another + # AFNP line. So here's a whatever-first search with early stopping: + explanations = defaultdict(set) + for origin in unions: + queue = {None} + seen = set() + while queue: + assert queue.isdisjoint(seen), f"Intersection: {queue & seen}" + src = queue.pop() + seen.add(src) + if src in always_failing_never_passing[origin]: + explanations[origin].add(src) + else: + queue.update(cf_graphs[origin][src] - seen) + + # The last step is to filter out explanations that we know would be uninformative. + # When this is the first AFNP location, we conclude that Scrutineer missed the + # real divergence (earlier in the trace) and drop that unhelpful explanation. + filter_regex = re.compile(_glob_to_re(UNHELPFUL_LOCATIONS)) + return { + origin: {loc for loc in afnp_locs if not filter_regex.search(loc[0])} + for origin, afnp_locs in explanations.items() + } + + +# see e.g. https://docs.python.org/3/library/sysconfig.html#posix-user +# for examples of these path schemes +STDLIB_DIRS = { + Path(sysconfig.get_path("platstdlib")).resolve(), + Path(sysconfig.get_path("stdlib")).resolve(), +} +SITE_PACKAGES_DIRS = { + Path(sysconfig.get_path("purelib")).resolve(), + Path(sysconfig.get_path("platlib")).resolve(), +} + +EXPLANATION_STUB = ( + "Explanation:", + " These lines were always and only run by failing examples:", +) + + +class ModuleLocation(IntEnum): + LOCAL = 0 + SITE_PACKAGES = 1 + STDLIB = 2 + + @classmethod + @lru_cache(1024) + def from_path(cls, path: str) -> "ModuleLocation": + path = Path(path).resolve() + # site-packages may be a subdir of stdlib or platlib, so it's important to + # check is_relative_to for this before the stdlib. + if any(path.is_relative_to(p) for p in SITE_PACKAGES_DIRS): + return cls.SITE_PACKAGES + if any(path.is_relative_to(p) for p in STDLIB_DIRS): + return cls.STDLIB + return cls.LOCAL + + +# show local files first, then site-packages, then stdlib +def _sort_key(path: str, lineno: int) -> tuple[int, str, int]: + return (ModuleLocation.from_path(path), path, lineno) + + +def make_report(explanations, *, cap_lines_at=5): + report = defaultdict(list) + for origin, locations in explanations.items(): + locations = list(locations) + locations.sort(key=lambda v: _sort_key(v[0], v[1])) + report_lines = [f" {fname}:{lineno}" for fname, lineno in locations] + if len(report_lines) > cap_lines_at + 1: + msg = " (and {} more with settings.verbosity >= verbose)" + report_lines[cap_lines_at:] = [msg.format(len(report_lines[cap_lines_at:]))] + if report_lines: # We might have filtered out every location as uninformative. + report[origin] = list(EXPLANATION_STUB) + report_lines + return report + + +def explanatory_lines(traces, settings): + if Phase.explain in settings.phases and sys.gettrace() and not traces: + return defaultdict(list) + # Return human-readable report lines summarising the traces + explanations = get_explaining_locations(traces) + max_lines = 5 if settings.verbosity <= Verbosity.normal else float("inf") + return make_report(explanations, cap_lines_at=max_lines) + + +# beware the code below; we're using some heuristics to make a nicer report... + + +@functools.lru_cache +def _get_git_repo_root() -> Path: + try: + where = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + check=True, + timeout=10, + capture_output=True, + text=True, + encoding="utf-8", + ).stdout.strip() + except Exception: # pragma: no cover + return Path().absolute().parents[-1] + else: + return Path(where) + + +def tractable_coverage_report(trace: Trace) -> dict[str, list[int]]: + """Report a simple coverage map which is (probably most) of the user's code.""" + coverage: dict = {} + t = dict(trace) + for file, line in set(t.keys()).union(t.values()) - {None}: # type: ignore + # On Python <= 3.11, we can use coverage.py xor Hypothesis' tracer, + # so the trace will be empty and this line never run under coverage. + coverage.setdefault(file, set()).add(line) # pragma: no cover + stdlib_fragment = f"{os.sep}lib{os.sep}python3.{sys.version_info.minor}{os.sep}" + return { + k: sorted(v) + for k, v in coverage.items() + if stdlib_fragment not in k + and (p := Path(k)).is_relative_to(_get_git_repo_root()) + and "site-packages" not in p.parts + } diff --git a/vendored/hypothesis/internal/validation.py b/vendored/hypothesis/internal/validation.py new file mode 100644 index 0000000..9266cb3 --- /dev/null +++ b/vendored/hypothesis/internal/validation.py @@ -0,0 +1,127 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import decimal +import math +from numbers import Rational, Real + +from hypothesis.errors import InvalidArgument +from hypothesis.internal.coverage import check_function + + +@check_function +def check_type(typ: type | tuple[type, ...], arg: object, name: str) -> None: + if not isinstance(arg, typ): + if isinstance(typ, tuple): + assert len(typ) >= 2, "Use bare type instead of len-1 tuple" + typ_string = "one of " + ", ".join(t.__name__ for t in typ) + else: + typ_string = typ.__name__ + + if typ_string == "SearchStrategy": + from hypothesis.strategies import SearchStrategy + + # Use hypothesis.strategies._internal.strategies.check_strategy + # instead, as it has some helpful "did you mean..." logic. + assert typ is not SearchStrategy, "use check_strategy instead" + + raise InvalidArgument( + f"Expected {typ_string} but got {name}={arg!r} (type={type(arg).__name__})" + ) + + +@check_function +def check_valid_integer(value, name): + """Checks that value is either unspecified, or a valid integer. + + Otherwise raises InvalidArgument. + """ + if value is None: + return + check_type(int, value, name) + + +@check_function +def check_valid_bound(value, name): + """Checks that value is either unspecified, or a valid interval bound. + + Otherwise raises InvalidArgument. + """ + if value is None or isinstance(value, (int, Rational)): + return + if not isinstance(value, (Real, decimal.Decimal)): + raise InvalidArgument(f"{name}={value!r} must be a real number.") + if math.isnan(value): + raise InvalidArgument(f"Invalid end point {name}={value!r}") + + +@check_function +def check_valid_magnitude(value, name): + """Checks that value is either unspecified, or a non-negative valid + interval bound. + + Otherwise raises InvalidArgument. + """ + check_valid_bound(value, name) + if value is not None and value < 0: + raise InvalidArgument(f"{name}={value!r} must not be negative.") + elif value is None and name == "min_magnitude": + raise InvalidArgument("Use min_magnitude=0 or omit the argument entirely.") + + +@check_function +def try_convert(typ, value, name): + if value is None: + return None + if isinstance(value, typ): + return value + try: + return typ(value) + except (TypeError, ValueError, ArithmeticError) as err: + raise InvalidArgument( + f"Cannot convert {name}={value!r} of type " + f"{type(value).__name__} to type {typ.__name__}" + ) from err + + +@check_function +def check_valid_size(value, name): + """Checks that value is either unspecified, or a valid non-negative size + expressed as an integer. + + Otherwise raises InvalidArgument. + """ + if value is None and name not in ("min_size", "size"): + return + check_type(int, value, name) + if value < 0: + raise InvalidArgument(f"Invalid size {name}={value!r} < 0") + + +@check_function +def check_valid_interval(lower_bound, upper_bound, lower_name, upper_name): + """Checks that lower_bound and upper_bound are either unspecified, or they + define a valid interval on the number line. + + Otherwise raises InvalidArgument. + """ + if lower_bound is None or upper_bound is None: + return + if upper_bound < lower_bound: + raise InvalidArgument( + f"Cannot have {upper_name}={upper_bound!r} < {lower_name}={lower_bound!r}" + ) + + +@check_function +def check_valid_sizes(min_size, max_size): + check_valid_size(min_size, "min_size") + check_valid_size(max_size, "max_size") + check_valid_interval(min_size, max_size, "min_size", "max_size") diff --git a/vendored/hypothesis/provisional.py b/vendored/hypothesis/provisional.py new file mode 100644 index 0000000..8becf92 --- /dev/null +++ b/vendored/hypothesis/provisional.py @@ -0,0 +1,204 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""This module contains various provisional APIs and strategies. + +It is intended for internal use, to ease code reuse, and is not stable. +Point releases may move or break the contents at any time! + +Internet strategies should conform to :rfc:`3986` or the authoritative +definitions it links to. If not, report the bug! +""" +# https://tools.ietf.org/html/rfc3696 + +import string +from functools import lru_cache +from importlib import resources + +from hypothesis import strategies as st +from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.strategies import DrawFn +from hypothesis.strategies._internal.utils import defines_strategy + +URL_SAFE_CHARACTERS = frozenset(string.ascii_letters + string.digits + "$-_.+!*'(),~") +FRAGMENT_SAFE_CHARACTERS = URL_SAFE_CHARACTERS | {"?", "/"} + + +@lru_cache(maxsize=1) +def get_top_level_domains() -> tuple[str, ...]: + # This file is sourced from http://data.iana.org/TLD/tlds-alpha-by-domain.txt + # The file contains additional information about the date that it was last updated. + traversable = resources.files("hypothesis.vendor") / "tlds-alpha-by-domain.txt" + _comment, *_tlds = traversable.read_text(encoding="utf-8").splitlines() + assert _comment.startswith("#") + + # Remove special-use domain names from the list. For more discussion + # see https://github.com/HypothesisWorks/hypothesis/pull/3572 + return ("COM", *sorted((d for d in _tlds if d != "ARPA"), key=len)) + + +@st.composite +def _recase_randomly(draw: DrawFn, tld: str) -> str: + tld = list(tld) + changes = draw(st.tuples(*(st.booleans() for _ in range(len(tld))))) + for i, change_case in enumerate(changes): + if change_case: + tld[i] = tld[i].lower() if tld[i].isupper() else tld[i].upper() + return "".join(tld) + + +class DomainNameStrategy(st.SearchStrategy[str]): + @staticmethod + def clean_inputs( + minimum: int, maximum: int, value: int | None, variable_name: str + ) -> int: + if value is None: + value = maximum + elif not isinstance(value, int): + raise InvalidArgument( + f"Expected integer but {variable_name} is a {type(value).__name__}" + ) + elif not minimum <= value <= maximum: + raise InvalidArgument( + f"Invalid value {minimum!r} < {variable_name}={value!r} < {maximum!r}" + ) + return value + + def __init__( + self, max_length: int | None = None, max_element_length: int | None = None + ) -> None: + """ + A strategy for :rfc:`1035` fully qualified domain names. + + The upper limit for max_length is 255 in accordance with :rfc:`1035#section-2.3.4` + The lower limit for max_length is 4, corresponding to a two letter domain + with a single letter subdomain. + The upper limit for max_element_length is 63 in accordance with :rfc:`1035#section-2.3.4` + The lower limit for max_element_length is 1 in accordance with :rfc:`1035#section-2.3.4` + """ + # https://tools.ietf.org/html/rfc1035#section-2.3.4 + + max_length = self.clean_inputs(4, 255, max_length, "max_length") + max_element_length = self.clean_inputs( + 1, 63, max_element_length, "max_element_length" + ) + + super().__init__() + self.max_length = max_length + self.max_element_length = max_element_length + + # These regular expressions are constructed to match the documented + # information in https://tools.ietf.org/html/rfc1035#section-2.3.1 + # which defines the allowed syntax of a subdomain string. + if self.max_element_length == 1: + label_regex = r"[a-zA-Z]" + elif self.max_element_length == 2: + label_regex = r"[a-zA-Z][a-zA-Z0-9]?" + else: + maximum_center_character_pattern_repetitions = self.max_element_length - 2 + label_regex = r"[a-zA-Z]([a-zA-Z0-9\-]{0,%d}[a-zA-Z0-9])?" % ( + maximum_center_character_pattern_repetitions, + ) + + # Construct reusable strategies here to avoid a performance hit by doing + # so repeatedly in do_draw. + + # 1 - Select a valid top-level domain (TLD) name + # 2 - Check that the number of characters in our selected TLD won't + # prevent us from generating at least a 1 character subdomain. + # 3 - Randomize the TLD between upper and lower case characters. + + self.domain_strategy = ( + st.sampled_from(get_top_level_domains()) + .filter(lambda tld: len(tld) + 2 <= self.max_length) + .flatmap(_recase_randomly) + ) + + # RFC-5890 s2.3.1 says such labels are reserved, and since we don't + # want to bother with xn-- punycode labels we'll exclude them all. + self.elem_strategy = st.from_regex(label_regex, fullmatch=True).filter( + lambda label: len(label) < 4 or label[2:4] != "--" + ) + + def do_draw(self, data: ConjectureData) -> str: + domain = data.draw(self.domain_strategy) + # The maximum possible number of subdomains is 126, + # 1 character subdomain + 1 '.' character, * 126 = 252, + # with a max of 255, that leaves 3 characters for a TLD. + # Allowing any more subdomains would not leave enough + # characters for even the shortest possible TLDs. + elements = cu.many(data, min_size=1, average_size=3, max_size=126) + while elements.more(): + # Generate a new valid subdomain using the regex strategy. + sub_domain = data.draw(self.elem_strategy) + if len(domain) + len(sub_domain) >= self.max_length: + data.stop_span(discard=True) + break + domain = sub_domain + "." + domain + return domain + + +@defines_strategy(force_reusable_values=True) +def domains( + *, max_length: int = 255, max_element_length: int = 63 +) -> st.SearchStrategy[str]: + """Generate :rfc:`1035` compliant fully qualified domain names.""" + return DomainNameStrategy( + max_length=max_length, max_element_length=max_element_length + ) + + +# The `urls()` strategy uses this to generate URL fragments (e.g. "#foo"). +# It has been extracted to top-level so that we can test it independently +# of `urls()`, which helps with getting non-flaky coverage of the lambda. +_url_fragments_strategy = ( + st.lists( + st.builds( + lambda char, encode: ( + f"%{ord(char):02X}" + if (encode or char not in FRAGMENT_SAFE_CHARACTERS) + else char + ), + st.characters(min_codepoint=0, max_codepoint=255), + st.booleans(), + ), + min_size=1, + ) + .map("".join) + .map("#{}".format) +) + + +@defines_strategy(force_reusable_values=True) +def urls() -> st.SearchStrategy[str]: + """A strategy for :rfc:`3986`, generating http/https URLs. + + The generated URLs could, at least in theory, be passed to an HTTP client + and fetched. + + """ + + def url_encode(s: str) -> str: + return "".join(c if c in URL_SAFE_CHARACTERS else f"%{ord(c):02X}" for c in s) + + schemes = st.sampled_from(["http", "https"]) + ports = st.integers(min_value=1, max_value=2**16 - 1).map(":{}".format) + paths = st.lists(st.text(string.printable).map(url_encode)).map("/".join) + + return st.builds( + "{}://{}{}/{}{}".format, + schemes, + domains(), + st.just("") | ports, + paths, + st.just("") | _url_fragments_strategy, + ) diff --git a/vendored/hypothesis/py.typed b/vendored/hypothesis/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/vendored/hypothesis/reporting.py b/vendored/hypothesis/reporting.py new file mode 100644 index 0000000..0f0af3f --- /dev/null +++ b/vendored/hypothesis/reporting.py @@ -0,0 +1,61 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections.abc import Callable +from contextlib import AbstractContextManager +from typing import TypeAlias + +from hypothesis._settings import Verbosity, settings +from hypothesis.internal.compat import escape_unicode_characters +from hypothesis.utils.dynamicvariables import DynamicVariable + + +def default(value: object) -> None: + try: + print(value) + except UnicodeEncodeError: + print(escape_unicode_characters(str(value))) + + +ReporterT: TypeAlias = Callable[[object], None] +reporter = DynamicVariable[ReporterT](default) + + +def current_reporter() -> ReporterT: + return reporter.value + + +def with_reporter(new_reporter: ReporterT) -> AbstractContextManager[None]: + return reporter.with_value(new_reporter) + + +def current_verbosity() -> Verbosity: + assert settings.default is not None + return settings.default.verbosity + + +def verbose_report(text: str) -> None: + if current_verbosity() >= Verbosity.verbose: + base_report(text) + + +def debug_report(text: str) -> None: + if current_verbosity() >= Verbosity.debug: + base_report(text) + + +def report(text: str) -> None: + if current_verbosity() >= Verbosity.normal: + base_report(text) + + +def base_report(text: str) -> None: + assert isinstance(text, str), f"unexpected non-str {text=}" + current_reporter()(text) diff --git a/vendored/hypothesis/stateful.py b/vendored/hypothesis/stateful.py new file mode 100644 index 0000000..8026d2d --- /dev/null +++ b/vendored/hypothesis/stateful.py @@ -0,0 +1,1178 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""This module provides support for a stateful style of testing, where tests +attempt to find a sequence of operations that cause a breakage rather than just +a single value. + +Notably, the set of steps available at any point may depend on the +execution to date. +""" + +import collections +import dataclasses +import inspect +from collections.abc import Callable, Iterable, Sequence +from dataclasses import dataclass, field +from functools import lru_cache +from io import StringIO +from time import perf_counter +from typing import Any, ClassVar, TypeVar, overload +from unittest import TestCase + +from hypothesis import strategies as st +from hypothesis._settings import ( + HealthCheck, + Verbosity, + note_deprecation, + settings as Settings, +) +from hypothesis.control import _current_build_context, current_build_context +from hypothesis.core import TestFunc, given +from hypothesis.errors import ( + FlakyStrategyDefinition, + InvalidArgument, + InvalidDefinition, +) +from hypothesis.internal.compat import add_note, batched +from hypothesis.internal.conjecture.engine import BUFFER_SIZE +from hypothesis.internal.conjecture.junkdrawer import gc_cumulative_time +from hypothesis.internal.conjecture.utils import calc_label_from_name +from hypothesis.internal.healthcheck import fail_health_check +from hypothesis.internal.observability import observability_enabled +from hypothesis.internal.reflection import ( + function_digest, + get_pretty_function_description, + nicerepr, + proxies, +) +from hypothesis.internal.validation import check_type +from hypothesis.reporting import current_verbosity, report +from hypothesis.strategies._internal.featureflags import FeatureStrategy +from hypothesis.strategies._internal.strategies import ( + Ex, + OneOfStrategy, + SearchStrategy, + check_strategy, +) +from hypothesis.vendor.pretty import RepresentationPrinter + +T = TypeVar("T") +STATE_MACHINE_RUN_LABEL = calc_label_from_name("another state machine step") + + +def _is_singleton(obj: object) -> bool: + """ + Returns True if two separately created instances of v will have the same id + (due to interning). + """ + # The range [-5, 256] is a cpython implementation detail. This may not work + # well on other platforms. + if isinstance(obj, int) and -5 <= obj <= 256: + return True + # cpython also interns compile-time strings, but let's just ignore those for + # now. + return isinstance(obj, bool) or obj is None + + +class _OmittedArgument: + """Sentinel class to prevent overlapping overloads in type hints. See comments + above the overloads of @rule.""" + + +class TestCaseProperty: # pragma: no cover + def __get__(self, obj, typ=None): + if obj is not None: + typ = type(obj) + return typ._to_test_case() + + def __set__(self, obj, value): + raise AttributeError("Cannot set TestCase") + + def __delete__(self, obj): + raise AttributeError("Cannot delete TestCase") + + +def get_state_machine_test( + state_machine_factory, *, settings=None, _min_steps=0, _flaky_state=None +): + # This function is split out from run_state_machine_as_test so that + # HypoFuzz can get and call the test function directly. + if settings is None: + try: + settings = state_machine_factory.TestCase.settings + check_type(Settings, settings, "state_machine_factory.TestCase.settings") + except AttributeError: + settings = Settings(deadline=None, suppress_health_check=list(HealthCheck)) + check_type(Settings, settings, "settings") + check_type(int, _min_steps, "_min_steps") + if _min_steps < 0: + # Because settings can vary via e.g. profiles, settings.stateful_step_count + # overrides this argument and we don't bother cross-validating. + raise InvalidArgument(f"_min_steps={_min_steps} must be non-negative.") + _flaky_state = _flaky_state or {} + + @settings + @given(st.data()) + def run_state_machine(data): + cd = data.conjecture_data + machine: RuleBasedStateMachine = state_machine_factory() + check_type(RuleBasedStateMachine, machine, "state_machine_factory()") + cd.hypothesis_runner = machine + machine._observability_predicates = cd._observability_predicates # alias + + print_steps = ( + current_build_context().is_final or current_verbosity() >= Verbosity.debug + ) + cd._stateful_repr_parts = [] + + def output(s): + if print_steps: + report(s) + if observability_enabled(): + cd._stateful_repr_parts.append(s) + + try: + output(f"state = {machine.__class__.__name__}()") + machine.check_invariants(settings, output, cd._stateful_run_times) + max_steps = settings.stateful_step_count + steps_run = 0 + + while True: + # We basically always want to run the maximum number of steps, + # but need to leave a small probability of terminating early + # in order to allow for reducing the number of steps once we + # find a failing test case, so we stop with probability of + # 2 ** -16 during normal operation but force a stop when we've + # generated enough steps. + cd.start_span(STATE_MACHINE_RUN_LABEL) + must_stop = None + if steps_run >= max_steps: + must_stop = True + elif steps_run <= _min_steps: + must_stop = False + elif cd.length > (0.8 * BUFFER_SIZE): + # Better to stop after fewer steps, than always overrun and retry. + # See https://github.com/HypothesisWorks/hypothesis/issues/3618 + must_stop = True + + start_draw = perf_counter() + start_gc = gc_cumulative_time() + if cd.draw_boolean(p=2**-16, forced=must_stop): + break + steps_run += 1 + + # Choose a rule to run, preferring an initialize rule if there are + # any which have not been run yet. + _flaky_state["selecting_rule"] = True + if machine._initialize_rules_to_run: + init_rules = [ + st.tuples(st.just(rule), st.fixed_dictionaries(rule.arguments)) + for rule in machine._initialize_rules_to_run + ] + rule, data = cd.draw(st.one_of(init_rules)) + machine._initialize_rules_to_run.remove(rule) + else: + rule, data = cd.draw(machine._rules_strategy) + _flaky_state["selecting_rule"] = False + draw_label = f"generate:rule:{rule.function.__name__}" + cd.draw_times.setdefault(draw_label, 0.0) + in_gctime = gc_cumulative_time() - start_gc + cd.draw_times[draw_label] += perf_counter() - start_draw - in_gctime + + # Pretty-print the values this rule was called with *before* calling + # _add_results_to_targets, to avoid printing arguments which are also + # a return value using the variable name they are assigned to. + # See https://github.com/HypothesisWorks/hypothesis/issues/2341 + if print_steps or observability_enabled(): + data_to_print = { + k: machine._pretty_print(v) for k, v in data.items() + } + + # Assign 'result' here in case executing the rule fails below + result = multiple() + try: + data = dict(data) + for k, v in list(data.items()): + if isinstance(v, VarReference): + data[k] = machine.names_to_values[v.name] + elif isinstance(v, list) and all( + isinstance(item, VarReference) for item in v + ): + data[k] = [machine.names_to_values[item.name] for item in v] + + label = f"execute:rule:{rule.function.__name__}" + start = perf_counter() + start_gc = gc_cumulative_time() + result = rule.function(machine, **data) + in_gctime = gc_cumulative_time() - start_gc + cd._stateful_run_times[label] += perf_counter() - start - in_gctime + + if rule.targets: + if isinstance(result, MultipleResults): + machine._add_results_to_targets(rule.targets, result.values) + else: + machine._add_results_to_targets(rule.targets, [result]) + elif result is not None: + fail_health_check( + settings, + "Rules should return None if they have no target bundle, " + f"but {rule.function.__qualname__} returned {result!r}", + HealthCheck.return_value, + ) + finally: + if print_steps or observability_enabled(): + # 'result' is only used if the step has target bundles. + # If it does, and the result is a 'MultipleResult', + # then 'print_step' prints a multi-variable assignment. + output(machine._repr_step(rule, data_to_print, result)) + machine.check_invariants(settings, output, cd._stateful_run_times) + cd.stop_span() + finally: + output("state.teardown()") + machine.teardown() + + # Use a machine digest to identify stateful tests in the example database + run_state_machine.hypothesis.inner_test._hypothesis_internal_add_digest = ( + function_digest(state_machine_factory) + ) + # Copy some attributes so @seed and @reproduce_failure "just work" + run_state_machine._hypothesis_internal_use_seed = getattr( + state_machine_factory, "_hypothesis_internal_use_seed", None + ) + run_state_machine._hypothesis_internal_use_reproduce_failure = getattr( + state_machine_factory, "_hypothesis_internal_use_reproduce_failure", None + ) + run_state_machine._hypothesis_internal_print_given_args = False + return run_state_machine + + +def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_steps=0): + """Run a state machine definition as a test, either silently doing nothing + or printing a minimal breaking program and raising an exception. + + state_machine_factory is anything which returns an instance of + RuleBasedStateMachine when called with no arguments - it can be a class or a + function. settings will be used to control the execution of the test. + """ + flaky_state = {"selecting_rule": False} + state_machine_test = get_state_machine_test( + state_machine_factory, + settings=settings, + _min_steps=_min_steps, + _flaky_state=flaky_state, + ) + try: + state_machine_test() + except FlakyStrategyDefinition as err: + if flaky_state["selecting_rule"]: + add_note( + err, + "while selecting a rule to run. This is usually caused by " + "a flaky precondition, or a bundle that was unexpectedly empty.", + ) + raise + + +class StateMachineMeta(type): + def __setattr__(cls, name, value): + if name == "settings" and isinstance(value, Settings): + descr = f"settings({value.show_changed()})" + raise AttributeError( + f"Assigning {cls.__name__}.settings = {descr} does nothing. Assign " + f"to {cls.__name__}.TestCase.settings, or use @{descr} as a decorator " + f"on the {cls.__name__} class." + ) + return super().__setattr__(name, value) + + +@dataclass(slots=True, frozen=True) +class _SetupState: + rules: list["Rule"] + invariants: list["Invariant"] + initializers: list["Rule"] + + +class RuleBasedStateMachine(metaclass=StateMachineMeta): + """A RuleBasedStateMachine gives you a structured way to define state machines. + + The idea is that a state machine carries the system under test and some supporting + data. This data can be stored in instance variables or + divided into Bundles. The state machine has a set of rules which may read data + from bundles (or just from normal strategies), push data onto + bundles, change the state of the machine, or verify properties. + At any given point a random applicable rule will be executed. + """ + + _setup_state_per_class: ClassVar[dict[type, _SetupState]] = {} + + def __init__(self) -> None: + setup_state = self.setup_state() + if not setup_state.rules: + raise InvalidDefinition( + f"State machine {type(self).__name__} defines no rules" + ) + + if isinstance(s := vars(type(self)).get("settings"), Settings): + tname = type(self).__name__ + descr = f"settings({s.show_changed()})" + raise InvalidDefinition( + f"Assigning settings = {descr} as a class attribute does nothing. " + f"Assign to {tname}.TestCase.settings, or use @{descr} as a decorator " + f"on the {tname} class." + ) + + self.rules = setup_state.rules + self.invariants = setup_state.invariants + # copy since we pop from this as we run initialize rules. + self._initialize_rules_to_run = setup_state.initializers.copy() + + self.bundles: dict[str, list] = {} + self.names_counters: collections.Counter = collections.Counter() + self.names_list: list[str] = [] + self.names_to_values: dict[str, Any] = {} + self.__stream = StringIO() + self.__printer = RepresentationPrinter( + self.__stream, context=_current_build_context.value + ) + self._rules_strategy = RuleStrategy(self) + + def _pretty_print(self, value): + if isinstance(value, VarReference): + return value.name + elif isinstance(value, list) and all( + isinstance(item, VarReference) for item in value + ): + return "[" + ", ".join([item.name for item in value]) + "]" + self.__stream.seek(0) + self.__stream.truncate(0) + self.__printer.output_width = 0 + self.__printer.buffer_width = 0 + self.__printer.buffer.clear() + self.__printer.pretty(value) + self.__printer.flush() + return self.__stream.getvalue() + + def __repr__(self): + return f"{type(self).__name__}({nicerepr(self.bundles)})" + + def _new_name(self, target): + result = f"{target}_{self.names_counters[target]}" + self.names_counters[target] += 1 + self.names_list.append(result) + return result + + def _last_names(self, n: int) -> list[str]: + len_ = len(self.names_list) + assert len_ >= n + return self.names_list[len_ - n :] + + def bundle(self, name): + return self.bundles.setdefault(name, []) + + @classmethod + def setup_state(cls): + try: + return cls._setup_state_per_class[cls] + except KeyError: + pass + + rules: list[Rule] = [] + initializers: list[Rule] = [] + invariants: list[Invariant] = [] + + for _name, f in inspect.getmembers(cls): + rule = getattr(f, RULE_MARKER, None) + initializer = getattr(f, INITIALIZE_RULE_MARKER, None) + invariant = getattr(f, INVARIANT_MARKER, None) + if rule is not None: + rules.append(rule) + if initializer is not None: + initializers.append(initializer) + if invariant is not None: + invariants.append(invariant) + + if ( + getattr(f, PRECONDITIONS_MARKER, None) is not None + and rule is None + and invariant is None + ): + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with @precondition, " + "but not @rule (or @invariant), which is not allowed. A " + "precondition must be combined with a rule or an invariant, " + "since it has no effect alone." + ) + + state = _SetupState( + rules=rules, initializers=initializers, invariants=invariants + ) + cls._setup_state_per_class[cls] = state + return state + + def _repr_step(self, rule: "Rule", data: Any, result: Any) -> str: + output_assignment = "" + extra_assignment_lines = [] + if rule.targets: + number_of_results = ( + len(result.values) if isinstance(result, MultipleResults) else 1 + ) + number_of_last_names = len(rule.targets) * number_of_results + last_names = self._last_names(number_of_last_names) + if isinstance(result, MultipleResults): + if len(result.values) == 1: + # len-1 tuples + output_per_target = [f"({name},)" for name in last_names] + output_assignment = " = ".join(output_per_target) + " = " + elif result.values: + # multiple values, multiple targets -- use the first target + # for the assignment from function, and do the other target + # assignments on separate lines + names_per_target = list(batched(last_names, number_of_results)) + first_target_output = ", ".join(names_per_target[0]) + output_assignment = first_target_output + " = " + for other_target_names in names_per_target[1:]: + other_target_output = ", ".join(other_target_names) + extra_assignment_lines.append( + other_target_output + " = " + first_target_output + ) + else: + output_assignment = " = ".join(last_names) + " = " + args = ", ".join(f"{k}={v}" for k, v in data.items()) + output_line = f"{output_assignment}state.{rule.function.__name__}({args})" + return "\n".join([output_line] + extra_assignment_lines) + + def _add_results_to_targets(self, targets, results): + # Note, the assignment order here is reflected in _repr_step + for target in targets: + for result in results: + name = self._new_name(target) + + def printer(obj, p, cycle, name=name): + return p.text(name) + + # see + # https://github.com/HypothesisWorks/hypothesis/pull/4266#discussion_r1949619102 + if not _is_singleton(result): + self.__printer.singleton_pprinters.setdefault(id(result), printer) + self.names_to_values[name] = result + self.bundles.setdefault(target, []).append(VarReference(name)) + + def check_invariants(self, settings, output, runtimes): + for invar in self.invariants: + if self._initialize_rules_to_run and not invar.check_during_init: + continue + if not all(precond(self) for precond in invar.preconditions): + continue + name = invar.function.__name__ + if ( + current_build_context().is_final + or settings.verbosity >= Verbosity.debug + or observability_enabled() + ): + output(f"state.{name}()") + start = perf_counter() + result = invar.function(self) + runtimes[f"execute:invariant:{name}"] += perf_counter() - start + if result is not None: + fail_health_check( + settings, + "The return value of an @invariant is always ignored, but " + f"{invar.function.__qualname__} returned {result!r} " + "instead of None", + HealthCheck.return_value, + ) + + def teardown(self): + """Called after a run has finished executing to clean up any necessary + state. + + Does nothing by default. + """ + + TestCase = TestCaseProperty() + + @classmethod + @lru_cache + def _to_test_case(cls): + class StateMachineTestCase(TestCase): + settings = Settings(deadline=None, suppress_health_check=list(HealthCheck)) + + def runTest(self): + run_state_machine_as_test(cls, settings=self.settings) + + runTest.is_hypothesis_test = True + runTest._hypothesis_state_machine_class = cls + + StateMachineTestCase.__name__ = cls.__name__ + ".TestCase" + StateMachineTestCase.__qualname__ = cls.__qualname__ + ".TestCase" + return StateMachineTestCase + + +@dataclass(slots=True, frozen=False) +class Rule: + targets: Any + function: Any + arguments: Any + preconditions: Any + bundles: tuple["Bundle", ...] = field(init=False) + _cached_hash: int | None = field(init=False, default=None) + _cached_repr: str | None = field(init=False, default=None) + arguments_strategies: dict[Any, Any] = field(init=False, default_factory=dict) + + def __post_init__(self): + bundles = [] + for k, v in sorted(self.arguments.items()): + assert not isinstance(v, BundleReferenceStrategy) + if isinstance(v, Bundle): + bundles.append(v) + consume = isinstance(v, BundleConsumer) + v = BundleReferenceStrategy(v.name, consume=consume) + self.arguments_strategies[k] = v + self.bundles = tuple(bundles) + + def __repr__(self) -> str: + if self._cached_repr is None: + bits = [ + f"{field.name}=" + f"{get_pretty_function_description(getattr(self, field.name))}" + for field in dataclasses.fields(self) + if getattr(self, field.name) + ] + self._cached_repr = f"{self.__class__.__name__}({', '.join(bits)})" + return self._cached_repr + + def __hash__(self): + # sampled_from uses hash in calc_label, and we want this to be fast when + # sampling stateful rules, so we cache here. + if self._cached_hash is None: + self._cached_hash = hash( + ( + self.targets, + self.function, + tuple(self.arguments.items()), + self.preconditions, + self.bundles, + ) + ) + return self._cached_hash + + +self_strategy = st.runner() + + +class BundleReferenceStrategy(SearchStrategy): + def __init__(self, name: str, *, consume: bool = False): + super().__init__() + self.name = name + self.consume = consume + + def do_draw(self, data): + machine = data.draw(self_strategy) + bundle = machine.bundle(self.name) + if not bundle: + data.mark_invalid(f"Cannot draw from empty bundle {self.name!r}") + # Shrink towards the right rather than the left. This makes it easier + # to delete data generated earlier, as when the error is towards the + # end there can be a lot of hard to remove padding. + position = data.draw_integer(0, len(bundle) - 1, shrink_towards=len(bundle)) + if self.consume: + return bundle.pop(position) # pragma: no cover # coverage is flaky here + else: + return bundle[position] + + +class Bundle(SearchStrategy[Ex]): + """A collection of values for use in stateful testing. + + Bundles are a kind of strategy where values can be added by rules, + and (like any strategy) used as inputs to future rules. + + The ``name`` argument they are passed is the they are referred to + internally by the state machine; no two bundles may have + the same name. It is idiomatic to use the attribute + being assigned to as the name of the Bundle:: + + class MyStateMachine(RuleBasedStateMachine): + keys = Bundle("keys") + + Bundles can contain the same value more than once; this becomes + relevant when using :func:`~hypothesis.stateful.consumes` to remove + values again. + + If the ``consume`` argument is set to True, then all values that are + drawn from this bundle will be consumed (as above) when requested. + """ + + def __init__( + self, name: str, *, consume: bool = False, draw_references: bool = True + ) -> None: + super().__init__() + self.name = name + self.__reference_strategy = BundleReferenceStrategy(name, consume=consume) + self.draw_references = draw_references + + def do_draw(self, data): + machine = data.draw(self_strategy) + reference = data.draw(self.__reference_strategy) + return machine.names_to_values[reference.name] + + def __repr__(self): + consume = self.__reference_strategy.consume + if consume is False: + return f"Bundle(name={self.name!r})" + return f"Bundle(name={self.name!r}, {consume=})" + + def calc_is_empty(self, recur): + # We assume that a bundle will grow over time + return False + + def is_currently_empty(self, data): + # ``self_strategy`` is an instance of the ``st.runner()`` strategy. + # Hence drawing from it only returns the current state machine without + # modifying the underlying choice sequence. + machine = data.draw(self_strategy) + return not bool(machine.bundle(self.name)) + + def flatmap(self, expand): + if self.draw_references: + return type(self)( + self.name, + consume=self.__reference_strategy.consume, + draw_references=False, + ).flatmap(expand) + return super().flatmap(expand) + + def __hash__(self): + # Making this hashable means we hit the fast path of "everything is + # hashable" in st.sampled_from label calculation when sampling which rule + # to invoke next. + + # Mix in "Bundle" for collision resistance + return hash(("Bundle", self.name)) + + +class BundleConsumer(Bundle[Ex]): + def __init__(self, bundle: Bundle[Ex]) -> None: + super().__init__(bundle.name, consume=True) + + +def consumes(bundle: Bundle[Ex]) -> SearchStrategy[Ex]: + """When introducing a rule in a RuleBasedStateMachine, this function can + be used to mark bundles from which each value used in a step with the + given rule should be removed. This function returns a strategy object + that can be manipulated and combined like any other. + + For example, a rule declared with + + ``@rule(value1=b1, value2=consumes(b2), value3=lists(consumes(b3)))`` + + will consume a value from Bundle ``b2`` and several values from Bundle + ``b3`` to populate ``value2`` and ``value3`` each time it is executed. + """ + if not isinstance(bundle, Bundle): + raise TypeError("Argument to be consumed must be a bundle.") + return BundleConsumer(bundle) + + +@dataclass(slots=True, frozen=True) +class MultipleResults(Iterable[Ex]): + values: tuple[Ex, ...] + + def __iter__(self): + return iter(self.values) + + +def multiple(*args: T) -> MultipleResults[T]: + """This function can be used to pass multiple results to the target(s) of + a rule. Just use ``return multiple(result1, result2, ...)`` in your rule. + + It is also possible to use ``return multiple()`` with no arguments in + order to end a rule without passing any result. + """ + return MultipleResults(args) + + +def _convert_targets(targets, target): + """Single validator and converter for target arguments.""" + if target is not None: + if targets: + raise InvalidArgument( + f"Passing both targets={targets!r} and target={target!r} is " + f"redundant - pass targets={(*targets, target)!r} instead." + ) + targets = (target,) + + converted_targets = [] + for t in targets: + if not isinstance(t, Bundle): + msg = "Got invalid target %r of type %r, but all targets must be Bundles." + if isinstance(t, OneOfStrategy): + msg += ( + "\nIt looks like you passed `one_of(a, b)` or `a | b` as " + "a target. You should instead pass `targets=(a, b)` to " + "add the return value of this rule to both the `a` and " + "`b` bundles, or define a rule for each target if it " + "should be added to exactly one." + ) + raise InvalidArgument(msg % (t, type(t))) + while isinstance(t, Bundle): + if isinstance(t, BundleConsumer): + note_deprecation( + f"Using consumes({t.name}) doesn't makes sense in this context. " + "This will be an error in a future version of Hypothesis.", + since="2021-09-08", + has_codemod=False, + stacklevel=2, + ) + t = t.name + converted_targets.append(t) + return tuple(converted_targets) + + +RULE_MARKER = "hypothesis_stateful_rule" +INITIALIZE_RULE_MARKER = "hypothesis_stateful_initialize_rule" +PRECONDITIONS_MARKER = "hypothesis_stateful_preconditions" +INVARIANT_MARKER = "hypothesis_stateful_invariant" + + +_RuleType = Callable[..., MultipleResults[Ex] | Ex] +_RuleWrapper = Callable[[_RuleType[Ex]], _RuleType[Ex]] + + +def _rule_qualname(f: Any) -> str: + # we define rules / invariants / initializes inside of wrapper functions, which + # makes f.__qualname__ look like: + # test_precondition..BadStateMachine.has_precondition_but_no_rule + # which is not ideal. This function returns just + # BadStateMachine.has_precondition_but_no_rule + # instead. + return f.__qualname__.rsplit(".")[-1] + + +# We cannot exclude `target` or `targets` from any of these signatures because +# otherwise they would be matched against the `kwargs`, either leading to +# overlapping overloads of incompatible return types, or a concrete +# implementation that does not accept all overloaded variant signatures. +# Although it is possible to reorder the variants to fix the former, it will +# always lead to the latter, as then the omitted parameter could be typed as +# a `SearchStrategy`, which the concrete implementation does not accept. +# +# Omitted `targets` parameters, where the default value is used, are typed with +# a special `_OmittedArgument` type. We cannot type them as `tuple[()]`, because +# `tuple[()]` is a subtype of `Sequence[Bundle[Ex]]`, leading to signature +# overlaps with incompatible return types. The `_OmittedArgument` type will never be +# encountered at runtime, and exists solely to annotate the default of `targets`. +# PEP 661 (Sentinel Values) might provide a more elegant alternative in the future. +# +# We could've also annotated `targets` as `tuple[_OmittedArgument]`, but then when +# both `target` and `targets` are provided, mypy describes the type error as an +# invalid argument type for `targets` (expected `tuple[_OmittedArgument]`, got ...). +# By annotating it as a bare `_OmittedArgument` type, mypy's error will warn that +# there is no overloaded signature matching the call, which is more descriptive. +# +# When `target` xor `targets` is provided, the function to decorate must return +# a value whose type matches the one stored in the bundle. When neither are +# provided, the function to decorate must return nothing. There is no variant +# for providing `target` and `targets`, as these parameters are mutually exclusive. +@overload +def rule( + *, + targets: Sequence[Bundle[Ex]], + target: None = ..., + **kwargs: SearchStrategy, +) -> _RuleWrapper[Ex]: # pragma: no cover + ... + + +@overload +def rule( + *, target: Bundle[Ex], targets: _OmittedArgument = ..., **kwargs: SearchStrategy +) -> _RuleWrapper[Ex]: # pragma: no cover + ... + + +@overload +def rule( + *, + target: None = ..., + targets: _OmittedArgument = ..., + **kwargs: SearchStrategy, +) -> Callable[[Callable[..., None]], Callable[..., None]]: # pragma: no cover + ... + + +def rule( + *, + targets: Sequence[Bundle[Ex]] | _OmittedArgument = (), + target: Bundle[Ex] | None = None, + **kwargs: SearchStrategy, +) -> _RuleWrapper[Ex] | Callable[[Callable[..., None]], Callable[..., None]]: + """Decorator for RuleBasedStateMachine. Any Bundle present in ``target`` or + ``targets`` will define where the end result of this function should go. If + both are empty then the end result will be discarded. + + ``target`` must be a Bundle, or if the result should be replicated to multiple + bundles you can pass a tuple of them as the ``targets`` argument. + It is invalid to use both arguments for a single rule. If the result + should go to exactly one of several bundles, define a separate rule for + each case. + + kwargs then define the arguments that will be passed to the function + invocation. If their value is a Bundle, or if it is ``consumes(b)`` + where ``b`` is a Bundle, then values that have previously been produced + for that bundle will be provided. If ``consumes`` is used, the value + will also be removed from the bundle. + + Any other kwargs should be strategies and values from them will be + provided. + """ + converted_targets = _convert_targets(targets, target) + for k, v in kwargs.items(): + check_strategy(v, name=k) + + def accept(f): + if getattr(f, INVARIANT_MARKER, None): + raise InvalidDefinition( + f"{_rule_qualname(f)} is used with both @rule and @invariant, " + "which is not allowed. A function may be either a rule or an " + "invariant, but not both." + ) + existing_rule = getattr(f, RULE_MARKER, None) + existing_initialize_rule = getattr(f, INITIALIZE_RULE_MARKER, None) + if existing_rule is not None: + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with @rule twice, which is " + "not allowed." + ) + if existing_initialize_rule is not None: + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with both @rule and " + "@initialize, which is not allowed." + ) + + preconditions = getattr(f, PRECONDITIONS_MARKER, ()) + rule = Rule( + targets=converted_targets, + arguments=kwargs, + function=f, + preconditions=preconditions, + ) + + @proxies(f) + def rule_wrapper(*args, **kwargs): + return f(*args, **kwargs) + + setattr(rule_wrapper, RULE_MARKER, rule) + return rule_wrapper + + return accept + + +# See also comments of `rule`'s overloads. +@overload +def initialize( + *, + targets: Sequence[Bundle[Ex]], + target: None = ..., + **kwargs: SearchStrategy, +) -> _RuleWrapper[Ex]: # pragma: no cover + ... + + +@overload +def initialize( + *, target: Bundle[Ex], targets: _OmittedArgument = ..., **kwargs: SearchStrategy +) -> _RuleWrapper[Ex]: # pragma: no cover + ... + + +@overload +def initialize( + *, + target: None = ..., + targets: _OmittedArgument = ..., + **kwargs: SearchStrategy, +) -> Callable[[Callable[..., None]], Callable[..., None]]: # pragma: no cover + ... + + +def initialize( + *, + targets: Sequence[Bundle[Ex]] | _OmittedArgument = (), + target: Bundle[Ex] | None = None, + **kwargs: SearchStrategy, +) -> _RuleWrapper[Ex] | Callable[[Callable[..., None]], Callable[..., None]]: + """Decorator for RuleBasedStateMachine. + + An initialize decorator behaves like a rule, but all ``@initialize()`` decorated + methods will be called before any ``@rule()`` decorated methods, in an arbitrary + order. Each ``@initialize()`` method will be called exactly once per run, unless + one raises an exception - after which only the ``.teardown()`` method will be run. + ``@initialize()`` methods may not have preconditions. + """ + converted_targets = _convert_targets(targets, target) + for k, v in kwargs.items(): + check_strategy(v, name=k) + + def accept(f): + if getattr(f, INVARIANT_MARKER, None): + raise InvalidDefinition( + f"{_rule_qualname(f)} is used with both @initialize and @invariant, " + "which is not allowed. A function may be either an initialization " + "rule or an invariant, but not both." + ) + existing_rule = getattr(f, RULE_MARKER, None) + existing_initialize_rule = getattr(f, INITIALIZE_RULE_MARKER, None) + if existing_rule is not None: + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with both @rule and " + "@initialize, which is not allowed." + ) + if existing_initialize_rule is not None: + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with @initialize twice, " + "which is not allowed." + ) + preconditions = getattr(f, PRECONDITIONS_MARKER, ()) + if preconditions: + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with both @initialize and " + "@precondition, which is not allowed. An initialization rule " + "runs unconditionally and may not have a precondition." + ) + rule = Rule( + targets=converted_targets, + arguments=kwargs, + function=f, + preconditions=preconditions, + ) + + @proxies(f) + def rule_wrapper(*args, **kwargs): + return f(*args, **kwargs) + + setattr(rule_wrapper, INITIALIZE_RULE_MARKER, rule) + return rule_wrapper + + return accept + + +@dataclass(slots=True, frozen=True) +class VarReference: + name: str + + +# There are multiple alternatives for annotating the `precond` type, all of them +# have drawbacks. See https://github.com/HypothesisWorks/hypothesis/pull/3068#issuecomment-906642371 +def precondition(precond: Callable[[Any], bool]) -> Callable[[TestFunc], TestFunc]: + """Decorator to apply a precondition for rules in a RuleBasedStateMachine. + Specifies a precondition for a rule to be considered as a valid step in the + state machine, which is more efficient than using :func:`~hypothesis.assume` + within the rule. The ``precond`` function will be called with the instance of + RuleBasedStateMachine and should return True or False. Usually it will need + to look at attributes on that instance. + + For example:: + + class MyTestMachine(RuleBasedStateMachine): + state = 1 + + @precondition(lambda self: self.state != 0) + @rule(numerator=integers()) + def divide_with(self, numerator): + self.state = numerator / self.state + + If multiple preconditions are applied to a single rule, it is only considered + a valid step when all of them return True. Preconditions may be applied to + invariants as well as rules. + """ + + def decorator(f): + @proxies(f) + def precondition_wrapper(*args, **kwargs): + return f(*args, **kwargs) + + existing_initialize_rule = getattr(f, INITIALIZE_RULE_MARKER, None) + if existing_initialize_rule is not None: + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with both @initialize and " + "@precondition, which is not allowed. An initialization rule " + "runs unconditionally and may not have a precondition." + ) + + rule = getattr(f, RULE_MARKER, None) + invariant = getattr(f, INVARIANT_MARKER, None) + if rule is not None: + assert invariant is None + new_rule = dataclasses.replace( + rule, preconditions=(*rule.preconditions, precond) + ) + setattr(precondition_wrapper, RULE_MARKER, new_rule) + elif invariant is not None: + assert rule is None + new_invariant = dataclasses.replace( + invariant, preconditions=(*invariant.preconditions, precond) + ) + setattr(precondition_wrapper, INVARIANT_MARKER, new_invariant) + else: + setattr( + precondition_wrapper, + PRECONDITIONS_MARKER, + (*getattr(f, PRECONDITIONS_MARKER, ()), precond), + ) + + return precondition_wrapper + + return decorator + + +@dataclass(slots=True, frozen=True) +class Invariant: + function: Any + preconditions: Any + check_during_init: bool + + def __repr__(self) -> str: + parts = [ + f"function={get_pretty_function_description(self.function)}", + f"{self.preconditions=}", + f"{self.check_during_init=}", + ] + return f"Invariant({', '.join(parts)})" + + +def invariant(*, check_during_init: bool = False) -> Callable[[TestFunc], TestFunc]: + """Decorator to apply an invariant for rules in a RuleBasedStateMachine. + The decorated function will be run after every rule and can raise an + exception to indicate failed invariants. + + For example:: + + class MyTestMachine(RuleBasedStateMachine): + state = 1 + + @invariant() + def is_nonzero(self): + assert self.state != 0 + + By default, invariants are only checked after all + :func:`@initialize() ` rules have been run. + Pass ``check_during_init=True`` for invariants which can also be checked + during initialization. + """ + check_type(bool, check_during_init, "check_during_init") + + def accept(f): + if getattr(f, RULE_MARKER, None) or getattr(f, INITIALIZE_RULE_MARKER, None): + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with both @invariant and " + "@rule, which is not allowed." + ) + existing_invariant = getattr(f, INVARIANT_MARKER, None) + if existing_invariant is not None: + raise InvalidDefinition( + f"{_rule_qualname(f)} has been decorated with @invariant twice, " + "which is not allowed." + ) + preconditions = getattr(f, PRECONDITIONS_MARKER, ()) + invar = Invariant( + function=f, + preconditions=preconditions, + check_during_init=check_during_init, + ) + + @proxies(f) + def invariant_wrapper(*args, **kwargs): + return f(*args, **kwargs) + + setattr(invariant_wrapper, INVARIANT_MARKER, invar) + return invariant_wrapper + + return accept + + +class RuleStrategy(SearchStrategy): + def __init__(self, machine: RuleBasedStateMachine) -> None: + super().__init__() + self.machine = machine + self.rules = machine.rules.copy() + + self.enabled_rules_strategy = st.shared( + FeatureStrategy(at_least_one_of={r.function.__name__ for r in self.rules}), + key=("enabled rules", machine), + ) + + # The order is a bit arbitrary. Primarily we're trying to group rules + # that write to the same location together, and to put rules with no + # target first as they have less effect on the structure. We order from + # fewer to more arguments on grounds that it will plausibly need less + # data. This probably won't work especially well and we could be + # smarter about it, but it's better than just doing it in definition + # order. + self.rules.sort( + key=lambda rule: ( + sorted(rule.targets), + len(rule.arguments), + rule.function.__name__, + ) + ) + self.rules_strategy = st.sampled_from(self.rules) + + def __repr__(self): + return f"{self.__class__.__name__}(machine={self.machine.__class__.__name__}({{...}}))" + + def do_draw(self, data): + if not any(self.is_valid(rule) for rule in self.rules): + rules = ", ".join([rule.function.__name__ for rule in self.rules]) + msg = ( + f"No progress can be made from state {self.machine!r}, because no " + f"available rule had a True precondition. rules: {rules}" + ) + raise InvalidDefinition(msg) from None + + feature_flags = data.draw(self.enabled_rules_strategy) + + def rule_is_enabled(r): + # Note: The order of the filters here is actually quite important, + # because checking is_enabled makes choices, so increases the size of + # the choice sequence. This means that if we are in a case where many + # rules are invalid we would make a lot more choices if we ask if they + # are enabled before we ask if they are valid, so our test cases would + # be artificially large. + return self.is_valid(r) and feature_flags.is_enabled(r.function.__name__) + + rule = data.draw(self.rules_strategy.filter(rule_is_enabled)) + + arguments = {} + for k, strat in rule.arguments_strategies.items(): + try: + arguments[k] = data.draw(strat) + except Exception as err: + rname = rule.function.__name__ + add_note(err, f"while generating {k!r} from {strat!r} for rule {rname}") + raise + return (rule, arguments) + + def is_valid(self, rule): + for b in rule.bundles: + if not self.machine.bundle(b.name): + return False + + predicates = self.machine._observability_predicates + desc = f"{self.machine.__class__.__qualname__}, rule {rule.function.__name__}," + for pred in rule.preconditions: + meets_precond = pred(self.machine) + where = f"{desc} precondition {get_pretty_function_description(pred)}" + predicates[where].update_count(condition=meets_precond) + if not meets_precond: + return False + + return True diff --git a/vendored/hypothesis/statistics.py b/vendored/hypothesis/statistics.py new file mode 100644 index 0000000..bdb2967 --- /dev/null +++ b/vendored/hypothesis/statistics.py @@ -0,0 +1,131 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +from collections import Counter +from collections.abc import Iterable +from typing import TYPE_CHECKING, cast + +from hypothesis._settings import Phase +from hypothesis.utils.dynamicvariables import DynamicVariable + +if TYPE_CHECKING: + from hypothesis.internal.conjecture.engine import PhaseStatistics, StatisticsDict + +collector = DynamicVariable(None) + + +def note_statistics(stats_dict: "StatisticsDict") -> None: + callback = collector.value + if callback is not None: + callback(stats_dict) + + +def describe_targets(best_targets: dict[str, float]) -> list[str]: + """Return a list of lines describing the results of `target`, if any.""" + # These lines are included in the general statistics description below, + # but also printed immediately below failing examples to alleviate the + # "threshold problem" where shrinking can make severe bug look trivial. + # See https://github.com/HypothesisWorks/hypothesis/issues/2180 + if not best_targets: + return [] + elif len(best_targets) == 1: + label, score = next(iter(best_targets.items())) + return [f"Highest target score: {score:g} ({label=})"] + else: + lines = ["Highest target scores:"] + for label, score in sorted(best_targets.items(), key=lambda x: x[::-1]): + lines.append(f"{score:>16g} ({label=})") + return lines + + +def format_ms(times: Iterable[float]) -> str: + """Format `times` into a string representing approximate milliseconds. + + `times` is a collection of durations in seconds. + """ + ordered = sorted(times) + n = len(ordered) - 1 + if n < 0 or any(math.isnan(t) for t in ordered): # pragma: no cover + return "NaN ms" + lower = int(ordered[math.floor(n * 0.05)] * 1000) + upper = int(ordered[math.ceil(n * 0.95)] * 1000) + if upper == 0: + return "< 1ms" + elif lower == upper: + return f"~ {lower}ms" + else: + return f"~ {lower}-{upper} ms" + + +def describe_statistics(stats_dict: "StatisticsDict") -> str: + """Return a multi-line string describing the passed run statistics. + + `stats_dict` must be a dictionary of data in the format collected by + `hypothesis.internal.conjecture.engine.ConjectureRunner.statistics`. + + We DO NOT promise that this format will be stable or supported over + time, but do aim to make it reasonably useful for downstream users. + It's also meant to support benchmarking for research purposes. + + This function is responsible for the report which is printed in the + terminal for our pytest --hypothesis-show-statistics option. + """ + lines = [stats_dict["nodeid"] + ":\n"] if "nodeid" in stats_dict else [] + prev_failures = 0 + for phase in (p.name for p in list(Phase)[1:]): + d = cast("PhaseStatistics", stats_dict.get(phase + "-phase", {})) + # Basic information we report for every phase + cases = d.get("test-cases", []) + if not cases: + continue + statuses = Counter(t["status"] for t in cases) + runtime_ms = format_ms(t["runtime"] for t in cases) + drawtime_ms = format_ms(t["drawtime"] for t in cases) + lines.append( + f" - during {phase} phase ({d['duration-seconds']:.2f} seconds):\n" + f" - Typical runtimes: {runtime_ms}, of which {drawtime_ms} in data generation\n" + f" - {statuses['valid']} passing examples, {statuses['interesting']} " + f"failing examples, {statuses['invalid'] + statuses['overrun']} invalid examples" + ) + # If we've found new distinct failures in this phase, report them + distinct_failures = d["distinct-failures"] - prev_failures + if distinct_failures: + plural = distinct_failures > 1 + lines.append( + " - Found {}{} distinct error{} in this phase".format( + distinct_failures, " more" * bool(prev_failures), "s" * plural + ) + ) + prev_failures = d["distinct-failures"] + # Report events during the generate phase, if there were any + if phase == "generate": + events = Counter(sum((t["events"] for t in cases), [])) + if events: + lines.append(" - Events:") + lines += [ + f" * {100 * v / len(cases):.2f}%, {k}" + for k, v in sorted(events.items(), key=lambda x: (-x[1], x[0])) + ] + # Some additional details on the shrinking phase + if phase == "shrink": + lines.append( + " - Tried {} shrinks of which {} were successful".format( + len(cases), d["shrinks-successful"] + ) + ) + lines.append("") + + target_lines = describe_targets(stats_dict.get("targets", {})) + if target_lines: + lines.append(" - " + target_lines[0]) + lines.extend(" " + l for l in target_lines[1:]) + lines.append(" - Stopped because " + stats_dict["stopped-because"]) + return "\n".join(lines) diff --git a/vendored/hypothesis/strategies/__init__.py b/vendored/hypothesis/strategies/__init__.py new file mode 100644 index 0000000..80bef4e --- /dev/null +++ b/vendored/hypothesis/strategies/__init__.py @@ -0,0 +1,141 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.strategies._internal import SearchStrategy +from hypothesis.strategies._internal.collections import tuples +from hypothesis.strategies._internal.core import ( + DataObject, + DrawFn, + binary, + booleans, + builds, + characters, + complex_numbers, + composite, + data, + decimals, + deferred, + dictionaries, + emails, + fixed_dictionaries, + fractions, + from_regex, + from_type, + frozensets, + functions, + iterables, + lists, + permutations, + random_module, + randoms, + recursive, + register_type_strategy, + runner, + sampled_from, + sets, + shared, + slices, + text, + uuids, +) +from hypothesis.strategies._internal.datetime import ( + dates, + datetimes, + timedeltas, + times, + timezone_keys, + timezones, +) +from hypothesis.strategies._internal.ipaddress import ip_addresses +from hypothesis.strategies._internal.misc import just, none, nothing +from hypothesis.strategies._internal.numbers import floats, integers +from hypothesis.strategies._internal.strategies import one_of +from hypothesis.strategies._internal.utils import _all_strategies + +# The implementation of all of these lives in `_strategies.py`, but we +# re-export them via this module to avoid exposing implementation details +# to over-zealous tab completion in editors that do not respect __all__. + + +__all__ = [ + "DataObject", + "DrawFn", + "SearchStrategy", + "binary", + "booleans", + "builds", + "characters", + "complex_numbers", + "composite", + "data", + "dates", + "datetimes", + "decimals", + "deferred", + "dictionaries", + "emails", + "fixed_dictionaries", + "floats", + "fractions", + "from_regex", + "from_type", + "frozensets", + "functions", + "integers", + "ip_addresses", + "iterables", + "just", + "lists", + "none", + "nothing", + "one_of", + "permutations", + "random_module", + "randoms", + "recursive", + "register_type_strategy", + "runner", + "sampled_from", + "sets", + "shared", + "slices", + "text", + "timedeltas", + "times", + "timezone_keys", + "timezones", + "tuples", + "uuids", +] + + +def _check_exports(_public): + assert set(__all__) == _public, (set(__all__) - _public, _public - set(__all__)) + + # Verify that all exported strategy functions were registered with + # @declares_strategy. + + existing_strategies = set(_all_strategies) - {"_maybe_nil_uuids"} + + exported_strategies = set(__all__) - { + "DataObject", + "DrawFn", + "SearchStrategy", + "composite", + "register_type_strategy", + } + assert existing_strategies == exported_strategies, ( + existing_strategies - exported_strategies, + exported_strategies - existing_strategies, + ) + + +_check_exports({n for n in dir() if n[0] not in "_@"}) +del _check_exports diff --git a/vendored/hypothesis/strategies/_internal/__init__.py b/vendored/hypothesis/strategies/_internal/__init__.py new file mode 100644 index 0000000..c65cc85 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/__init__.py @@ -0,0 +1,16 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""Package defining SearchStrategy, which is the core type that Hypothesis uses +to explore data.""" + +from .strategies import SearchStrategy, check_strategy + +__all__ = ["SearchStrategy", "check_strategy"] diff --git a/vendored/hypothesis/strategies/_internal/attrs.py b/vendored/hypothesis/strategies/_internal/attrs.py new file mode 100644 index 0000000..4e057df --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/attrs.py @@ -0,0 +1,218 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +# Since Hypothesis doesn't have a hard dependency on attrs, be careful to only import +# this file when attrs is in sys.modules. + +from collections.abc import Collection, Generator, Iterable, Sequence +from functools import reduce +from itertools import chain +from types import EllipsisType +from typing import Any, TypeVar + +import attr + +# attr/validators.pyi does not expose types for these, even though they exist +# in source. +from attr.validators import ( # type: ignore + _AndValidator, + _InstanceOfValidator, + _InValidator, + _OptionalValidator, +) +from attrs import Attribute, AttrsInstance, Factory + +from hypothesis import strategies as st +from hypothesis.errors import ResolutionFailed +from hypothesis.internal.compat import get_type_hints +from hypothesis.strategies._internal.core import BuildsStrategy +from hypothesis.strategies._internal.strategies import SearchStrategy +from hypothesis.strategies._internal.types import is_a_type, type_sorting_key +from hypothesis.utils.conventions import infer + +T = TypeVar("T") + + +def get_attribute_by_alias( + fields: Iterable[Attribute], + alias: str, + *, + target: type[AttrsInstance] | None = None, +) -> Attribute: + """ + Get an attrs attribute by its alias, rather than its name (compare + getattr(fields, name)). + + ``target`` is used only to provide a nicer error message, and can be safely + omitted. + """ + # attrs supports defining an alias for a field, which is the name used when + # defining __init__. The init args are what we pull from when determining + # what parameters we need to supply to the class, so it's what we need to + # match against as well, rather than the class-level attribute name. + matched_fields = [f for f in fields if f.alias == alias] + if not matched_fields: + raise TypeError( + f"Unexpected keyword argument {alias} for attrs class" + f"{f' {target}' if target else ''}. Expected one of " + f"{[f.name for f in fields]}" + ) + # alias is used as an arg in __init__, so it is guaranteed to be unique, if + # it exists. + assert len(matched_fields) == 1 + return matched_fields[0] + + +def from_attrs( + target: type[AttrsInstance], + args: tuple[SearchStrategy[Any], ...], + kwargs: dict[str, SearchStrategy[Any] | EllipsisType], + to_infer: Iterable[str], +) -> SearchStrategy: + """An internal version of builds(), specialised for Attrs classes.""" + attributes: tuple[Attribute, ...] = attr.fields(target) + kwargs = {k: v for k, v in kwargs.items() if v is not infer} + for name in to_infer: + attrib = get_attribute_by_alias(attributes, name, target=target) + kwargs[name] = from_attrs_attribute(attrib, target) + # We might make this strategy more efficient if we added a layer here that + # retries drawing if validation fails, for improved composition. + # The treatment of timezones in datetimes() provides a precedent. + return BuildsStrategy(target, args, kwargs) + + +def from_attrs_attribute( + attrib: Attribute, target: type[AttrsInstance] +) -> SearchStrategy: + """Infer a strategy from the metadata on an attr.Attribute object.""" + # Try inferring from the default argument. Note that this will only help if + # the user passed `...` to builds() for this attribute, but in that case + # we use it as the minimal example. + default: SearchStrategy = st.nothing() + # attr/__init__.pyi uses overloads to declare Factory as a function, not a + # class. This is a fib - at runtime and always, it is a class. + if isinstance(attrib.default, Factory): # type: ignore + assert attrib.default is not None + if not attrib.default.takes_self: + default = st.builds(attrib.default.factory) + elif attrib.default is not attr.NOTHING: + default = st.just(attrib.default) + + # Try inferring None, exact values, or type from attrs provided validators. + + # updated to none() on seeing an OptionalValidator + null: SearchStrategy = st.nothing() + # list of in_ validator collections to sample from + in_collections = [] + # type constraints to pass to types_to_strategy() + validator_types = set() + if attrib.validator is not None: + validator = attrib.validator + if isinstance(validator, _OptionalValidator): + null = st.none() + validator = validator.validator + if isinstance(validator, _AndValidator): + vs = validator._validators + else: + vs = [validator] + for v in vs: + if isinstance(v, _InValidator): + if isinstance(v.options, str): + in_collections.append(list(all_substrings(v.options))) + else: + in_collections.append(v.options) + elif isinstance(v, _InstanceOfValidator): + validator_types.add(v.type) + + # This is the important line. We compose the final strategy from various + # parts. The default value, if any, is the minimal shrink, followed by + # None (again, if allowed). We then prefer to sample from values passed + # to an in_ validator if available, but infer from a type otherwise. + # Pick one because (sampled_from((1, 2)) | from_type(int)) would usually + # fail validation by generating e.g. zero! + if in_collections: + sample = st.sampled_from(list(ordered_intersection(in_collections))) + strat = default | null | sample + else: + strat = default | null | types_to_strategy(attrib, validator_types) + + # Better to give a meaningful error here than an opaque "could not draw" + # when we try to get a value but have lost track of where this was created. + if strat.is_empty: + raise ResolutionFailed( + "Cannot infer a strategy from the default, validator, type, or " + f"converter for attribute={attrib!r} of class={target!r}" + ) + return strat + + +def types_to_strategy(attrib: Attribute, types: Collection[Any]) -> SearchStrategy: + """Find all the type metadata for this attribute, reconcile it, and infer a + strategy from the mess.""" + # If we know types from the validator(s), that's sufficient. + if len(types) == 1: + (typ,) = types + if isinstance(typ, tuple): + return st.one_of(*map(st.from_type, typ)) + return st.from_type(typ) + elif types: + # We have a list of tuples of types, and want to find a type + # (or tuple of types) that is a subclass of all of of them. + type_tuples = [k if isinstance(k, tuple) else (k,) for k in types] + # Flatten the list, filter types that would fail validation, and + # sort so that ordering is stable between runs and shrinks well. + allowed = [ + t + for t in set(sum(type_tuples, ())) + if all(issubclass(t, tup) for tup in type_tuples) + ] + allowed.sort(key=type_sorting_key) + return st.one_of([st.from_type(t) for t in allowed]) + + # Otherwise, try the `type` attribute as a fallback, and finally try + # the type hints on a converter (desperate!) before giving up. + if is_a_type(getattr(attrib, "type", None)): + assert attrib.type is not None + # The convoluted test is because variable annotations may be stored + # in string form; attrs doesn't evaluate them and we don't handle them. + # See PEP 526, PEP 563, and Hypothesis issue #1004 for details. + return st.from_type(attrib.type) + + converter = getattr(attrib, "converter", None) + if isinstance(converter, type): + return st.from_type(converter) + elif callable(converter): + hints = get_type_hints(converter) + if "return" in hints: + return st.from_type(hints["return"]) + + return st.nothing() + + +def ordered_intersection(in_: Sequence[Iterable[T]]) -> Generator[T, None, None]: + """Set union of n sequences, ordered for reproducibility across runs.""" + intersection = reduce(set.intersection, in_, set(in_[0])) + for x in chain.from_iterable(in_): + if x in intersection: + yield x + intersection.remove(x) + + +def all_substrings(s: str) -> Generator[str, None, None]: + """Generate all substrings of `s`, in order of length then occurrence. + Includes the empty string (first), and any duplicates that are present. + + >>> list(all_substrings('010')) + ['', '0', '1', '0', '01', '10', '010'] + """ + yield s[:0] + for n, _ in enumerate(s): + for i in range(len(s) - n): + yield s[i : i + n + 1] diff --git a/vendored/hypothesis/strategies/_internal/collections.py b/vendored/hypothesis/strategies/_internal/collections.py new file mode 100644 index 0000000..d585d29 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/collections.py @@ -0,0 +1,390 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import copy +import math +from collections.abc import Callable, Iterable +from typing import Any, overload + +from hypothesis import strategies as st +from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.engine import BUFFER_SIZE +from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy +from hypothesis.internal.conjecture.utils import combine_labels +from hypothesis.internal.filtering import get_integer_predicate_bounds +from hypothesis.internal.reflection import is_identity_function +from hypothesis.strategies._internal.strategies import ( + T3, + T4, + T5, + Ex, + FilteredStrategy, + RecurT, + SampledFromStrategy, + SearchStrategy, + T, + check_strategy, + filter_not_satisfied, +) +from hypothesis.strategies._internal.utils import cacheable, defines_strategy +from hypothesis.utils.conventions import UniqueIdentifier + + +class TupleStrategy(SearchStrategy[tuple[Ex, ...]]): + """A strategy responsible for fixed length tuples based on heterogeneous + strategies for each of their elements.""" + + def __init__(self, strategies: Iterable[SearchStrategy[Any]]): + super().__init__() + self.element_strategies = tuple(strategies) + + def do_validate(self) -> None: + for s in self.element_strategies: + s.validate() + + def calc_label(self) -> int: + return combine_labels( + self.class_label, *(s.label for s in self.element_strategies) + ) + + def __repr__(self) -> str: + tuple_string = ", ".join(map(repr, self.element_strategies)) + return f"TupleStrategy(({tuple_string}))" + + def calc_has_reusable_values(self, recur: RecurT) -> bool: + return all(recur(e) for e in self.element_strategies) + + def do_draw(self, data: ConjectureData) -> tuple[Ex, ...]: + return tuple(data.draw(e) for e in self.element_strategies) + + def calc_is_empty(self, recur: RecurT) -> bool: + return any(recur(e) for e in self.element_strategies) + + +@overload +def tuples() -> SearchStrategy[tuple[()]]: # pragma: no cover + ... + + +@overload +def tuples(__a1: SearchStrategy[Ex]) -> SearchStrategy[tuple[Ex]]: # pragma: no cover + ... + + +@overload +def tuples( + __a1: SearchStrategy[Ex], __a2: SearchStrategy[T] +) -> SearchStrategy[tuple[Ex, T]]: # pragma: no cover + ... + + +@overload +def tuples( + __a1: SearchStrategy[Ex], __a2: SearchStrategy[T], __a3: SearchStrategy[T3] +) -> SearchStrategy[tuple[Ex, T, T3]]: # pragma: no cover + ... + + +@overload +def tuples( + __a1: SearchStrategy[Ex], + __a2: SearchStrategy[T], + __a3: SearchStrategy[T3], + __a4: SearchStrategy[T4], +) -> SearchStrategy[tuple[Ex, T, T3, T4]]: # pragma: no cover + ... + + +@overload +def tuples( + __a1: SearchStrategy[Ex], + __a2: SearchStrategy[T], + __a3: SearchStrategy[T3], + __a4: SearchStrategy[T4], + __a5: SearchStrategy[T5], +) -> SearchStrategy[tuple[Ex, T, T3, T4, T5]]: # pragma: no cover + ... + + +@overload +def tuples( + *args: SearchStrategy[Any], +) -> SearchStrategy[tuple[Any, ...]]: # pragma: no cover + ... + + +@cacheable +@defines_strategy() +def tuples(*args: SearchStrategy[Any]) -> SearchStrategy[tuple[Any, ...]]: + """Return a strategy which generates a tuple of the same length as args by + generating the value at index i from args[i]. + + e.g. tuples(integers(), integers()) would generate a tuple of length + two with both values an integer. + + Examples from this strategy shrink by shrinking their component parts. + """ + for arg in args: + check_strategy(arg) + + return TupleStrategy(args) + + +class ListStrategy(SearchStrategy[list[Ex]]): + """A strategy for lists which takes a strategy for its elements and the + allowed lengths, and generates lists with the correct size and contents.""" + + _nonempty_filters: tuple[Callable[[Any], Any], ...] = (bool, len, tuple, list) + + def __init__( + self, + elements: SearchStrategy[Ex], + min_size: int = 0, + max_size: float | int | None = math.inf, + ): + super().__init__() + self.min_size = min_size or 0 + self.max_size = max_size if max_size is not None else math.inf + assert 0 <= self.min_size <= self.max_size + self.average_size = min( + max(self.min_size * 2, self.min_size + 5), + 0.5 * (self.min_size + self.max_size), + ) + self.element_strategy = elements + if min_size > BUFFER_SIZE: + raise InvalidArgument( + f"{self!r} can never generate an example, because min_size is larger " + "than Hypothesis supports. Including it is at best slowing down your " + "tests for no benefit; at worst making them fail (maybe flakily) with " + "a HealthCheck error." + ) + + def calc_label(self) -> int: + return combine_labels(self.class_label, self.element_strategy.label) + + def do_validate(self) -> None: + self.element_strategy.validate() + if self.is_empty: + raise InvalidArgument( + "Cannot create non-empty lists with elements drawn from " + f"strategy {self.element_strategy!r} because it has no values." + ) + if self.element_strategy.is_empty and 0 < self.max_size < float("inf"): + raise InvalidArgument( + f"Cannot create a collection of max_size={self.max_size!r}, " + "because no elements can be drawn from the element strategy " + f"{self.element_strategy!r}" + ) + + def calc_is_empty(self, recur: RecurT) -> bool: + if self.min_size == 0: + return False + return recur(self.element_strategy) + + def do_draw(self, data: ConjectureData) -> list[Ex]: + if self.element_strategy.is_empty: + assert self.min_size == 0 + return [] + + elements = cu.many( + data, + min_size=self.min_size, + max_size=self.max_size, + average_size=self.average_size, + ) + result = [] + while elements.more(): + result.append(data.draw(self.element_strategy)) + return result + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}({self.element_strategy!r}, " + f"min_size={self.min_size:_}, max_size={self.max_size:_})" + ) + + def filter(self, condition: Callable[[list[Ex]], Any]) -> SearchStrategy[list[Ex]]: + if condition in self._nonempty_filters or is_identity_function(condition): + assert self.max_size >= 1, "Always-empty is special cased in st.lists()" + if self.min_size >= 1: + return self + new = copy.copy(self) + new.min_size = 1 + return new + + constraints, pred = get_integer_predicate_bounds(condition) + if constraints.get("len") and ( + "min_value" in constraints or "max_value" in constraints + ): + new = copy.copy(self) + new.min_size = max( + self.min_size, constraints.get("min_value", self.min_size) + ) + new.max_size = min( + self.max_size, constraints.get("max_value", self.max_size) + ) + # Unsatisfiable filters are easiest to understand without rewriting. + if new.min_size > new.max_size: + return SearchStrategy.filter(self, condition) + # Recompute average size; this is cheaper than making it into a property. + new.average_size = min( + max(new.min_size * 2, new.min_size + 5), + 0.5 * (new.min_size + new.max_size), + ) + if pred is None: + return new + return SearchStrategy.filter(new, condition) + + return SearchStrategy.filter(self, condition) + + +class UniqueListStrategy(ListStrategy[Ex]): + def __init__( + self, + elements: SearchStrategy[Ex], + min_size: int, + max_size: float | int | None, + # TODO: keys are guaranteed to be Hashable, not just Any, but this makes + # other things harder to type + keys: tuple[Callable[[Ex], Any], ...], + tuple_suffixes: SearchStrategy[tuple[Ex, ...]] | None, + ): + super().__init__(elements, min_size, max_size) + self.keys = keys + self.tuple_suffixes = tuple_suffixes + + def do_draw(self, data: ConjectureData) -> list[Ex]: + if self.element_strategy.is_empty: + assert self.min_size == 0 + return [] + + elements = cu.many( + data, + min_size=self.min_size, + max_size=self.max_size, + average_size=self.average_size, + ) + seen_sets: tuple[set[Ex], ...] = tuple(set() for _ in self.keys) + # actually list[Ex], but if self.tuple_suffixes is present then Ex is a + # tuple[T, ...] because self.element_strategy is a TuplesStrategy, and + # appending a concrete tuple to `result: list[Ex]` makes mypy unhappy + # without knowing that Ex = tuple. + result: list[Any] = [] + + # We construct a filtered strategy here rather than using a check-and-reject + # approach because some strategies have special logic for generation under a + # filter, and FilteredStrategy can consolidate multiple filters. + def not_yet_in_unique_list(val: Ex) -> bool: # type: ignore # covariant type param + return all( + key(val) not in seen + for key, seen in zip(self.keys, seen_sets, strict=True) + ) + + filtered = FilteredStrategy( + self.element_strategy, conditions=(not_yet_in_unique_list,) + ) + while elements.more(): + value = filtered.do_filtered_draw(data) + if value is filter_not_satisfied: + elements.reject(f"Aborted test because unable to satisfy {filtered!r}") + else: + assert not isinstance(value, UniqueIdentifier) + for key, seen in zip(self.keys, seen_sets, strict=True): + seen.add(key(value)) + if self.tuple_suffixes is not None: + value = (value, *data.draw(self.tuple_suffixes)) # type: ignore + result.append(value) + assert self.max_size >= len(result) >= self.min_size + return result + + +class UniqueSampledListStrategy(UniqueListStrategy): + def do_draw(self, data: ConjectureData) -> list[Ex]: + assert isinstance(self.element_strategy, SampledFromStrategy) + + should_draw = cu.many( + data, + min_size=self.min_size, + max_size=self.max_size, + average_size=self.average_size, + ) + seen_sets: tuple[set[Ex], ...] = tuple(set() for _ in self.keys) + result: list[Any] = [] + + remaining = LazySequenceCopy(self.element_strategy.elements) + + while remaining and should_draw.more(): + j = data.draw_integer(0, len(remaining) - 1) + value = self.element_strategy._transform(remaining.pop(j)) + if value is not filter_not_satisfied and all( + key(value) not in seen + for key, seen in zip(self.keys, seen_sets, strict=True) + ): + for key, seen in zip(self.keys, seen_sets, strict=True): + seen.add(key(value)) + if self.tuple_suffixes is not None: + value = (value, *data.draw(self.tuple_suffixes)) + result.append(value) + else: + should_draw.reject( + "UniqueSampledListStrategy filter not satisfied or value already seen" + ) + assert self.max_size >= len(result) >= self.min_size + return result + + +class FixedDictStrategy(SearchStrategy[dict[Any, Any]]): + """A strategy which produces dicts with a fixed set of keys, given a + strategy for each of their equivalent values. + + e.g. {'foo' : some_int_strategy} would generate dicts with the single + key 'foo' mapping to some integer. + """ + + def __init__( + self, + mapping: dict[Any, SearchStrategy[Any]], + *, + optional: dict[Any, SearchStrategy[Any]] | None, + ): + super().__init__() + dict_type = type(mapping) + self.mapping = mapping + keys = tuple(mapping.keys()) + self.fixed = st.tuples(*[mapping[k] for k in keys]).map( + lambda value: dict_type(zip(keys, value, strict=True)) + ) + self.optional = optional + + def do_draw(self, data: ConjectureData) -> dict[Any, Any]: + value = data.draw(self.fixed) + if self.optional is None: + return value + + remaining = [k for k, v in self.optional.items() if not v.is_empty] + should_draw = cu.many( + data, min_size=0, max_size=len(remaining), average_size=len(remaining) / 2 + ) + while should_draw.more(): + j = data.draw_integer(0, len(remaining) - 1) + remaining[-1], remaining[j] = remaining[j], remaining[-1] + key = remaining.pop() + value[key] = data.draw(self.optional[key]) + return value + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.fixed) + + def __repr__(self) -> str: + if self.optional is not None: + return f"fixed_dictionaries({self.mapping!r}, optional={self.optional!r})" + return f"fixed_dictionaries({self.mapping!r})" diff --git a/vendored/hypothesis/strategies/_internal/core.py b/vendored/hypothesis/strategies/_internal/core.py new file mode 100644 index 0000000..bd3be43 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/core.py @@ -0,0 +1,2701 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import codecs +import enum +import math +import operator +import random +import re +import string +import sys +import typing +import warnings +from collections.abc import Callable, Collection, Hashable, Iterable, Sequence +from contextvars import ContextVar +from decimal import Context, Decimal, localcontext +from fractions import Fraction +from functools import reduce +from inspect import Parameter, Signature, isabstract, isclass +from re import Pattern +from types import EllipsisType, FunctionType, GenericAlias +from typing import ( + Annotated, + Any, + AnyStr, + Concatenate, + Literal, + NewType, + NoReturn, + ParamSpec, + Protocol, + TypeAlias, + TypeVar, + cast, + get_args, + get_origin, + overload, +) +from uuid import UUID + +from hypothesis._settings import note_deprecation +from hypothesis.control import ( + cleanup, + current_build_context, + deprecate_random_in_strategy, + note, + should_note, +) +from hypothesis.errors import ( + HypothesisSideeffectWarning, + HypothesisWarning, + InvalidArgument, + ResolutionFailed, + RewindRecursive, + SmallSearchSpaceWarning, +) +from hypothesis.internal.cathetus import cathetus +from hypothesis.internal.charmap import ( + Categories, + CategoryName, + as_general_categories, + categories as all_categories, +) +from hypothesis.internal.compat import ( + bit_count, + ceil, + floor, + get_type_hints, + is_typed_named_tuple, +) +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.utils import ( + calc_label_from_callable, + calc_label_from_name, + check_sample, + combine_labels, + identity, +) +from hypothesis.internal.entropy import get_seeder_and_restorer +from hypothesis.internal.floats import float_of +from hypothesis.internal.reflection import ( + define_function_signature, + get_pretty_function_description, + get_signature, + is_first_param_referenced_in_function, + nicerepr, + repr_call, + required_args, +) +from hypothesis.internal.validation import ( + check_type, + check_valid_integer, + check_valid_interval, + check_valid_magnitude, + check_valid_size, + check_valid_sizes, + try_convert, +) +from hypothesis.strategies._internal import SearchStrategy, check_strategy +from hypothesis.strategies._internal.collections import ( + FixedDictStrategy, + ListStrategy, + TupleStrategy, + UniqueListStrategy, + UniqueSampledListStrategy, + tuples, +) +from hypothesis.strategies._internal.deferred import DeferredStrategy +from hypothesis.strategies._internal.functions import FunctionStrategy +from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies +from hypothesis.strategies._internal.misc import BooleansStrategy, just, none, nothing +from hypothesis.strategies._internal.numbers import ( + IntegersStrategy, + Real, + floats, + integers, +) +from hypothesis.strategies._internal.recursive import RecursiveStrategy +from hypothesis.strategies._internal.shared import SharedStrategy +from hypothesis.strategies._internal.strategies import ( + Ex, + SampledFromStrategy, + T, + one_of, +) +from hypothesis.strategies._internal.strings import ( + BytesStrategy, + OneCharStringStrategy, + TextStrategy, + _check_is_single_character, +) +from hypothesis.strategies._internal.utils import cacheable, defines_strategy +from hypothesis.utils.conventions import not_set +from hypothesis.vendor.pretty import RepresentationPrinter + + +@cacheable +@defines_strategy(force_reusable_values=True) +def booleans() -> SearchStrategy[bool]: + """Returns a strategy which generates instances of :class:`python:bool`. + + Examples from this strategy will shrink towards ``False`` (i.e. + shrinking will replace ``True`` with ``False`` where possible). + """ + return BooleansStrategy() + + +@overload +def sampled_from(elements: Sequence[T]) -> SearchStrategy[T]: # pragma: no cover + ... + + +@overload +def sampled_from(elements: type[enum.Enum]) -> SearchStrategy[Any]: # pragma: no cover + # `SearchStrategy[Enum]` is unreliable due to metaclass issues. + ... + + +@overload +def sampled_from( + elements: type[enum.Enum] | Sequence[Any], +) -> SearchStrategy[Any]: # pragma: no cover + ... + + +@defines_strategy(eager="try") +def sampled_from( + elements: type[enum.Enum] | Sequence[Any], +) -> SearchStrategy[Any]: + """Returns a strategy which generates any value present in ``elements``. + + Note that as with :func:`~hypothesis.strategies.just`, values will not be + copied and thus you should be careful of using mutable data. + + ``sampled_from`` supports ordered collections, as well as + :class:`~python:enum.Enum` objects. :class:`~python:enum.Flag` objects + may also generate any combination of their members. + + Examples from this strategy shrink by replacing them with values earlier in + the list. So e.g. ``sampled_from([10, 1])`` will shrink by trying to replace + 1 values with 10, and ``sampled_from([1, 10])`` will shrink by trying to + replace 10 values with 1. + + It is an error to sample from an empty sequence, because returning :func:`nothing` + makes it too easy to silently drop parts of compound strategies. If you need + that behaviour, use ``sampled_from(seq) if seq else nothing()``. + """ + values = check_sample(elements, "sampled_from") + force_repr = None + # check_sample converts to tuple unconditionally, but we want to preserve + # square braces for list reprs. + # This will not cover custom sequence implementations which return different + # braces (or other, more unusual things) for their reprs, but this is a tradeoff + # between repr accuracy and greedily-evaluating all sequence reprs (at great + # cost for large sequences). + force_repr_braces = ("[", "]") if isinstance(elements, list) else None + if isinstance(elements, type) and issubclass(elements, enum.Enum): + force_repr = f"sampled_from({elements.__module__}.{elements.__name__})" + + if isclass(elements) and issubclass(elements, enum.Flag): + # Combinations of enum.Flag members (including empty) are also members. We generate these + # dynamically, because static allocation takes O(2^n) memory. LazyStrategy is used for the + # ease of force_repr. + # Add all named values, both flag bits (== list(elements)) and aliases. The aliases are + # necessary for full coverage for flags that would fail enum.NAMED_FLAGS check, and they + # are also nice values to shrink to. + flags = sorted( + set(elements.__members__.values()), + key=lambda v: (bit_count(v.value), v.value), + ) + # Finally, try to construct the empty state if it is not named. It's placed at the + # end so that we shrink to named values. + flags_with_empty = flags + if not flags or flags[0].value != 0: + try: + flags_with_empty = [*flags, elements(0)] + except TypeError: # pragma: no cover + # Happens on some python versions (at least 3.12) when there are no named values + pass + inner = [ + # Consider one or no named flags set, with shrink-to-named-flag behaviour. + # Special cases (length zero or one) are handled by the inner sampled_from. + sampled_from(flags_with_empty), + ] + if len(flags) > 1: + inner += [ + # Uniform distribution over number of named flags or combinations set. The overlap + # at r=1 is intentional, it may lead to oversampling but gives consistent shrinking + # behaviour. + integers(min_value=1, max_value=len(flags)) + .flatmap(lambda r: sets(sampled_from(flags), min_size=r, max_size=r)) + .map(lambda s: elements(reduce(operator.or_, s))), + ] + return LazyStrategy(one_of, args=inner, kwargs={}, force_repr=force_repr) + if not values: + + def has_annotations(elements): + if sys.version_info[:2] < (3, 14): + return vars(elements).get("__annotations__") + else: # pragma: no cover # covered by 3.14 tests + import annotationlib + + return bool(annotationlib.get_annotations(elements)) + + if ( + isinstance(elements, type) + and issubclass(elements, enum.Enum) + and has_annotations(elements) + ): + # See https://github.com/HypothesisWorks/hypothesis/issues/2923 + raise InvalidArgument( + f"Cannot sample from {elements.__module__}.{elements.__name__} " + "because it contains no elements. It does however have annotations, " + "so maybe you tried to write an enum as if it was a dataclass?" + ) + raise InvalidArgument("Cannot sample from a length-zero sequence.") + if len(values) == 1: + return just(values[0]) + return SampledFromStrategy( + values, force_repr=force_repr, force_repr_braces=force_repr_braces + ) + + +def _gets_first_item(fn: Callable) -> bool: + # Introspection for either `itemgetter(0)`, or `lambda x: x[0]` + if isinstance(fn, FunctionType): + s = get_pretty_function_description(fn) + return bool(re.fullmatch(s, r"lambda ([a-z]+): \1\[0\]")) + return isinstance(fn, operator.itemgetter) and repr(fn) == "operator.itemgetter(0)" + + +@cacheable +@defines_strategy() +def lists( + elements: SearchStrategy[Ex], + *, + min_size: int = 0, + max_size: int | None = None, + unique_by: ( + None | Callable[[Ex], Hashable] | tuple[Callable[[Ex], Hashable], ...] + ) = None, + unique: bool = False, +) -> SearchStrategy[list[Ex]]: + """Returns a list containing values drawn from elements with length in the + interval [min_size, max_size] (no bounds in that direction if these are + None). If max_size is 0, only the empty list will be drawn. + + If ``unique`` is True (or something that evaluates to True), we compare direct + object equality, as if unique_by was ``lambda x: x``. This comparison only + works for hashable types. + + If ``unique_by`` is not None it must be a callable or tuple of callables + returning a hashable type when given a value drawn from elements. The + resulting list will satisfy the condition that for ``i`` != ``j``, + ``unique_by(result[i])`` != ``unique_by(result[j])``. + + If ``unique_by`` is a tuple of callables the uniqueness will be respective + to each callable. + + For example, the following will produce two columns of integers with both + columns being unique respectively. + + .. code-block:: pycon + + >>> twoints = st.tuples(st.integers(), st.integers()) + >>> st.lists(twoints, unique_by=(lambda x: x[0], lambda x: x[1])) + + Examples from this strategy shrink by trying to remove elements from the + list, and by shrinking each individual element of the list. + """ + check_valid_sizes(min_size, max_size) + check_strategy(elements, "elements") + if unique: + if unique_by is not None: + raise InvalidArgument( + "cannot specify both unique and unique_by " + "(you probably only want to set unique_by)" + ) + else: + unique_by = identity + + if max_size == 0: + return builds(list) + if unique_by is not None: + if not (callable(unique_by) or isinstance(unique_by, tuple)): + raise InvalidArgument( + f"{unique_by=} is not a callable or tuple of callables" + ) + if callable(unique_by): + unique_by = (unique_by,) + if len(unique_by) == 0: + raise InvalidArgument("unique_by is empty") + for i, f in enumerate(unique_by): + if not callable(f): + raise InvalidArgument(f"unique_by[{i}]={f!r} is not a callable") + # Note that lazy strategies automatically unwrap when passed to a defines_strategy + # function. + tuple_suffixes = None + if ( + # We're generating a list of tuples unique by the first element, perhaps + # via st.dictionaries(), and this will be more efficient if we rearrange + # our strategy somewhat to draw the first element then draw add the rest. + isinstance(elements, TupleStrategy) + and len(elements.element_strategies) >= 1 + and all(_gets_first_item(fn) for fn in unique_by) + ): + unique_by = (identity,) + tuple_suffixes = TupleStrategy(elements.element_strategies[1:]) + elements = elements.element_strategies[0] + + # UniqueSampledListStrategy offers a substantial performance improvement for + # unique arrays with few possible elements, e.g. of eight-bit integer types. + if ( + isinstance(elements, IntegersStrategy) + and elements.start is not None + and elements.end is not None + and (elements.end - elements.start) <= 255 + ): + elements = SampledFromStrategy( + sorted(range(elements.start, elements.end + 1), key=abs) # type: ignore + if elements.end < 0 or elements.start > 0 + else ( + list(range(elements.end + 1)) + + list(range(-1, elements.start - 1, -1)) + ) + ) + + if isinstance(elements, SampledFromStrategy): + element_count = len(elements.elements) + if min_size > element_count: + raise InvalidArgument( + f"Cannot create a collection of {min_size=} unique " + f"elements with values drawn from only {element_count} distinct " + "elements" + ) + + if max_size is not None: + max_size = min(max_size, element_count) + else: + max_size = element_count + + return UniqueSampledListStrategy( + elements=elements, + max_size=max_size, + min_size=min_size, + keys=unique_by, + tuple_suffixes=tuple_suffixes, + ) + + return UniqueListStrategy( + elements=elements, + max_size=max_size, + min_size=min_size, + keys=unique_by, + tuple_suffixes=tuple_suffixes, + ) + return ListStrategy(elements, min_size=min_size, max_size=max_size) + + +@cacheable +@defines_strategy() +def sets( + elements: SearchStrategy[Ex], + *, + min_size: int = 0, + max_size: int | None = None, +) -> SearchStrategy[set[Ex]]: + """This has the same behaviour as lists, but returns sets instead. + + Note that Hypothesis cannot tell if values are drawn from elements + are hashable until running the test, so you can define a strategy + for sets of an unhashable type but it will fail at test time. + + Examples from this strategy shrink by trying to remove elements from the + set, and by shrinking each individual element of the set. + """ + return lists( + elements=elements, min_size=min_size, max_size=max_size, unique=True + ).map(set) + + +@cacheable +@defines_strategy() +def frozensets( + elements: SearchStrategy[Ex], + *, + min_size: int = 0, + max_size: int | None = None, +) -> SearchStrategy[frozenset[Ex]]: + """This is identical to the sets function but instead returns + frozensets.""" + return lists( + elements=elements, min_size=min_size, max_size=max_size, unique=True + ).map(frozenset) + + +class PrettyIter: + def __init__(self, values): + self._values = values + self._iter = iter(self._values) + + def __iter__(self): + return self._iter + + def __next__(self): + return next(self._iter) + + def __repr__(self) -> str: + return f"iter({self._values!r})" + + +@defines_strategy() +def iterables( + elements: SearchStrategy[Ex], + *, + min_size: int = 0, + max_size: int | None = None, + unique_by: ( + None | Callable[[Ex], Hashable] | tuple[Callable[[Ex], Hashable], ...] + ) = None, + unique: bool = False, +) -> SearchStrategy[Iterable[Ex]]: + """This has the same behaviour as lists, but returns iterables instead. + + Some iterables cannot be indexed (e.g. sets) and some do not have a + fixed length (e.g. generators). This strategy produces iterators, + which cannot be indexed and do not have a fixed length. This ensures + that you do not accidentally depend on sequence behaviour. + """ + return lists( + elements=elements, + min_size=min_size, + max_size=max_size, + unique_by=unique_by, + unique=unique, + ).map(PrettyIter) + + +# this type definition is imprecise, in multiple ways: +# * mapping and optional can be of different types: +# s: dict[str | int, int] = st.fixed_dictionaries( +# {"a": st.integers()}, optional={1: st.integers()} +# ) +# * the values in either mapping or optional need not all be of the same type: +# s: dict[str, int | bool] = st.fixed_dictionaries( +# {"a": st.integers(), "b": st.booleans()} +# ) +# * the arguments may be of any dict-compatible type, in which case the return +# value will be of that type instead of dit +# +# Overloads may help here, but I doubt we'll be able to satisfy all these +# constraints. +# +# Here's some platonic ideal test cases for revealed_types.py, with the understanding +# that some may not be achievable: +# +# ("fixed_dictionaries({'a': booleans()})", "dict[str, bool]"), +# ("fixed_dictionaries({'a': booleans(), 'b': integers()})", "dict[str, bool | int]"), +# ("fixed_dictionaries({}, optional={'a': booleans()})", "dict[str, bool]"), +# ( +# "fixed_dictionaries({'a': booleans()}, optional={1: booleans()})", +# "dict[str | int, bool]", +# ), +# ( +# "fixed_dictionaries({'a': booleans()}, optional={1: integers()})", +# "dict[str | int, bool | int]", +# ), + + +@defines_strategy() +def fixed_dictionaries( + mapping: dict[T, SearchStrategy[Ex]], + *, + optional: dict[T, SearchStrategy[Ex]] | None = None, +) -> SearchStrategy[dict[T, Ex]]: + """Generates a dictionary of the same type as mapping with a fixed set of + keys mapping to strategies. ``mapping`` must be a dict subclass. + + Generated values have all keys present in mapping, in iteration order, + with the corresponding values drawn from mapping[key]. + + If ``optional`` is passed, the generated value *may or may not* contain each + key from ``optional`` and a value drawn from the corresponding strategy. + Generated values may contain optional keys in an arbitrary order. + + Examples from this strategy shrink by shrinking each individual value in + the generated dictionary, and omitting optional key-value pairs. + """ + check_type(dict, mapping, "mapping") + for k, v in mapping.items(): + check_strategy(v, f"mapping[{k!r}]") + + if optional is not None: + check_type(dict, optional, "optional") + for k, v in optional.items(): + check_strategy(v, f"optional[{k!r}]") + if type(mapping) != type(optional): + raise InvalidArgument( + f"Got arguments of different types: " + f"mapping={nicerepr(type(mapping))}, " + f"optional={nicerepr(type(optional))}" + ) + if set(mapping) & set(optional): + raise InvalidArgument( + "The following keys were in both mapping and optional, " + f"which is invalid: {set(mapping) & set(optional)!r}" + ) + + return FixedDictStrategy(mapping, optional=optional) + + +_get_first_item = operator.itemgetter(0) + + +@cacheable +@defines_strategy() +def dictionaries( + keys: SearchStrategy[Ex], + values: SearchStrategy[T], + *, + dict_class: type = dict, + min_size: int = 0, + max_size: int | None = None, +) -> SearchStrategy[dict[Ex, T]]: + # Describing the exact dict_class to Mypy drops the key and value types, + # so we report Dict[K, V] instead of Mapping[Any, Any] for now. Sorry! + """Generates dictionaries of type ``dict_class`` with keys drawn from the ``keys`` + argument and values drawn from the ``values`` argument. + + The size parameters have the same interpretation as for + :func:`~hypothesis.strategies.lists`. + + Examples from this strategy shrink by trying to remove keys from the + generated dictionary, and by shrinking each generated key and value. + """ + check_valid_sizes(min_size, max_size) + if max_size == 0: + return fixed_dictionaries(dict_class()) + check_strategy(keys, "keys") + check_strategy(values, "values") + + return lists( + tuples(keys, values), + min_size=min_size, + max_size=max_size, + unique_by=_get_first_item, + ).map(dict_class) + + +@cacheable +@defines_strategy(force_reusable_values=True) +def characters( + *, + codec: str | None = None, + min_codepoint: int | None = None, + max_codepoint: int | None = None, + categories: Collection[CategoryName] | None = None, + exclude_categories: Collection[CategoryName] | None = None, + exclude_characters: Collection[str] | None = None, + include_characters: Collection[str] | None = None, + # Note: these arguments are deprecated aliases for backwards compatibility + blacklist_categories: Collection[CategoryName] | None = None, + whitelist_categories: Collection[CategoryName] | None = None, + blacklist_characters: Collection[str] | None = None, + whitelist_characters: Collection[str] | None = None, +) -> SearchStrategy[str]: + r"""Generates characters, length-one :class:`python:str`\ ings, + following specified filtering rules. + + - When no filtering rules are specified, any character can be produced. + - If ``min_codepoint`` or ``max_codepoint`` is specified, then only + characters having a codepoint in that range will be produced. + - If ``categories`` is specified, then only characters from those + Unicode categories will be produced. This is a further restriction, + characters must also satisfy ``min_codepoint`` and ``max_codepoint``. + - If ``exclude_categories`` is specified, then any character from those + categories will not be produced. You must not pass both ``categories`` + and ``exclude_categories``; these arguments are alternative ways to + specify exactly the same thing. + - If ``include_characters`` is specified, then any additional characters + in that list will also be produced. + - If ``exclude_characters`` is specified, then any characters in + that list will be not be produced. Any overlap between + ``include_characters`` and ``exclude_characters`` will raise an + exception. + - If ``codec`` is specified, only characters in the specified `codec encodings`_ + will be produced. + + The ``_codepoint`` arguments must be integers between zero and + :obj:`python:sys.maxunicode`. The ``_characters`` arguments must be + collections of length-one unicode strings, such as a unicode string. + + The ``_categories`` arguments must be used to specify either the + one-letter Unicode major category or the two-letter Unicode + `general category`_. For example, ``('Nd', 'Lu')`` signifies "Number, + decimal digit" and "Letter, uppercase". A single letter ('major category') + can be given to match all corresponding categories, for example ``'P'`` + for characters in any punctuation category. + + We allow codecs from the :mod:`codecs` module and their aliases, platform + specific and user-registered codecs if they are available, and + `python-specific text encodings`_ (but not text or binary transforms). + ``include_characters`` which cannot be encoded using this codec will + raise an exception. If non-encodable codepoints or categories are + explicitly allowed, the ``codec`` argument will exclude them without + raising an exception. + + .. _general category: https://en.wikipedia.org/wiki/Unicode_character_property + .. _codec encodings: https://docs.python.org/3/library/codecs.html#encodings-and-unicode + .. _python-specific text encodings: https://docs.python.org/3/library/codecs.html#python-specific-encodings + + Examples from this strategy shrink towards the codepoint for ``'0'``, + or the first allowable codepoint after it if ``'0'`` is excluded. + """ + check_valid_size(min_codepoint, "min_codepoint") + check_valid_size(max_codepoint, "max_codepoint") + check_valid_interval(min_codepoint, max_codepoint, "min_codepoint", "max_codepoint") + categories = cast(Categories | None, categories) + if categories is not None and exclude_categories is not None: + raise InvalidArgument( + f"Pass at most one of {categories=} and {exclude_categories=} - " + "these arguments both specify which categories are allowed, so it " + "doesn't make sense to use both in a single call." + ) + + # Handle deprecation of whitelist/blacklist arguments + has_old_arg = any(v is not None for k, v in locals().items() if "list" in k) + has_new_arg = any(v is not None for k, v in locals().items() if "lude" in k) + if has_old_arg and has_new_arg: + raise InvalidArgument( + "The deprecated blacklist/whitelist arguments cannot be used in " + "the same call as their replacement include/exclude arguments." + ) + if blacklist_categories is not None: + exclude_categories = blacklist_categories + if whitelist_categories is not None: + categories = whitelist_categories + if blacklist_characters is not None: + exclude_characters = blacklist_characters + if whitelist_characters is not None: + include_characters = whitelist_characters + + if ( + min_codepoint is None + and max_codepoint is None + and categories is None + and exclude_categories is None + and include_characters is not None + and codec is None + ): + raise InvalidArgument( + "Nothing is excluded by other arguments, so passing only " + f"{include_characters=} would have no effect. " + "Also pass categories=(), or use " + f"sampled_from({include_characters!r}) instead." + ) + exclude_characters = exclude_characters or "" + include_characters = include_characters or "" + if not_one_char := [c for c in exclude_characters if len(c) != 1]: + raise InvalidArgument( + "Elements of exclude_characters are required to be a single character, " + f"but {not_one_char!r} passed in {exclude_characters=} was not." + ) + if not_one_char := [c for c in include_characters if len(c) != 1]: + raise InvalidArgument( + "Elements of include_characters are required to be a single character, " + f"but {not_one_char!r} passed in {include_characters=} was not." + ) + overlap = set(exclude_characters).intersection(include_characters) + if overlap: + raise InvalidArgument( + f"Characters {sorted(overlap)!r} are present in both " + f"{include_characters=} and {exclude_characters=}" + ) + if categories is not None: + categories = as_general_categories(categories, "categories") + if exclude_categories is not None: + exclude_categories = as_general_categories( + exclude_categories, "exclude_categories" + ) + if categories is not None and not categories and not include_characters: + raise InvalidArgument( + "When `categories` is an empty collection and there are " + "no characters specified in include_characters, nothing can " + "be generated by the characters() strategy." + ) + both_cats = set(exclude_categories or ()).intersection(categories or ()) + if both_cats: + # Note: we check that exactly one of `categories` or `exclude_categories` is + # passed above, but retain this older check for the deprecated arguments. + raise InvalidArgument( + f"Categories {sorted(both_cats)!r} are present in both " + f"{categories=} and {exclude_categories=}" + ) + elif exclude_categories is not None: + categories = set(all_categories()) - set(exclude_categories) + del exclude_categories + + if codec is not None: + try: + codec = codecs.lookup(codec).name + # Check this is not a str-to-str or bytes-to-bytes codec; see + # https://docs.python.org/3/library/codecs.html#binary-transforms + "".encode(codec) + except LookupError: + raise InvalidArgument(f"{codec=} is not valid on this system") from None + except Exception: + raise InvalidArgument(f"{codec=} is not a valid codec") from None + + for char in include_characters: + try: + char.encode(encoding=codec, errors="strict") + except UnicodeEncodeError: + raise InvalidArgument( + f"Character {char!r} in {include_characters=} " + f"cannot be encoded with {codec=}" + ) from None + + # ascii and utf-8 are sufficient common that we have faster special handling + if codec == "ascii": + if (max_codepoint is None) or (max_codepoint > 127): + max_codepoint = 127 + codec = None + elif codec == "utf-8": + if categories is None: + categories = all_categories() + categories = tuple(c for c in categories if c != "Cs") + + return OneCharStringStrategy.from_characters_args( + categories=categories, + exclude_characters=exclude_characters, + min_codepoint=min_codepoint, + max_codepoint=max_codepoint, + include_characters=include_characters, + codec=codec, + ) + + +# Hide the deprecated aliases from documentation and casual inspection +characters.__signature__ = (__sig := get_signature(characters)).replace( # type: ignore + parameters=[p for p in __sig.parameters.values() if "list" not in p.name] +) + + +@cacheable +@defines_strategy(force_reusable_values=True) +def text( + alphabet: Collection[str] | SearchStrategy[str] = characters(codec="utf-8"), + *, + min_size: int = 0, + max_size: int | None = None, +) -> SearchStrategy[str]: + """Generates strings with characters drawn from ``alphabet``, which should + be a collection of length one strings or a strategy generating such strings. + + The default alphabet strategy can generate the full unicode range but + excludes surrogate characters because they are invalid in the UTF-8 + encoding. You can use :func:`~hypothesis.strategies.characters` without + arguments to find surrogate-related bugs such as :bpo:`34454`. + + ``min_size`` and ``max_size`` have the usual interpretations. + Note that Python measures string length by counting codepoints: U+00C5 + ``Å`` is a single character, while U+0041 U+030A ``Å`` is two - the ``A``, + and a combining ring above. + + Examples from this strategy shrink towards shorter strings, and with the + characters in the text shrinking as per the alphabet strategy. + This strategy does not :func:`~python:unicodedata.normalize` examples, + so generated strings may be in any or none of the 'normal forms'. + """ + check_valid_sizes(min_size, max_size) + if isinstance(alphabet, SearchStrategy): + char_strategy = unwrap_strategies(alphabet) + if isinstance(char_strategy, SampledFromStrategy): + # Check this via the up-front validation logic below, and incidentally + # convert into a `characters()` strategy for standard text shrinking. + return text(char_strategy.elements, min_size=min_size, max_size=max_size) + elif not isinstance(char_strategy, OneCharStringStrategy): + char_strategy = char_strategy.map(_check_is_single_character) + else: + non_string = [c for c in alphabet if not isinstance(c, str)] + if non_string: + raise InvalidArgument( + "The following elements in alphabet are not unicode " + f"strings: {non_string!r}" + ) + not_one_char = [c for c in alphabet if len(c) != 1] + if not_one_char: + raise InvalidArgument( + "The following elements in alphabet are not of length one, " + f"which leads to violation of size constraints: {not_one_char!r}" + ) + if alphabet in ["ascii", "utf-8"]: + warnings.warn( + f"st.text({alphabet!r}): it seems like you are trying to use the " + f"codec {alphabet!r}. st.text({alphabet!r}) instead generates " + f"strings using the literal characters {list(alphabet)!r}. To specify " + f"the {alphabet} codec, use st.text(st.characters(codec={alphabet!r})). " + "If you intended to use character literals, you can silence this " + "warning by reordering the characters.", + HypothesisWarning, + # this stacklevel is of course incorrect, but breaking out of the + # levels of LazyStrategy and validation isn't worthwhile. + stacklevel=1, + ) + char_strategy = ( + characters(categories=(), include_characters=alphabet) + if alphabet + else nothing() + ) + if (max_size == 0 or char_strategy.is_empty) and not min_size: + return just("") + # mypy is unhappy with ListStrategy(SearchStrategy[list[Ex]]) and then TextStrategy + # setting Ex = str. Mypy is correct to complain because we have an LSP violation + # here in the TextStrategy.do_draw override. Would need refactoring to resolve. + return TextStrategy(char_strategy, min_size=min_size, max_size=max_size) # type: ignore + + +@overload +def from_regex( + regex: bytes | Pattern[bytes], + *, + fullmatch: bool = False, +) -> SearchStrategy[bytes]: # pragma: no cover + ... + + +@overload +def from_regex( + regex: str | Pattern[str], + *, + fullmatch: bool = False, + alphabet: str | SearchStrategy[str] = characters(codec="utf-8"), +) -> SearchStrategy[str]: # pragma: no cover + ... + + +@cacheable +@defines_strategy() +def from_regex( + regex: AnyStr | Pattern[AnyStr], + *, + fullmatch: bool = False, + alphabet: str | SearchStrategy[str] | None = None, +) -> SearchStrategy[AnyStr]: + r"""Generates strings that contain a match for the given regex (i.e. ones + for which :func:`python:re.search` will return a non-None result). + + ``regex`` may be a pattern or :func:`compiled regex `. + Both byte-strings and unicode strings are supported, and will generate + examples of the same type. + + You can use regex flags such as :obj:`python:re.IGNORECASE` or + :obj:`python:re.DOTALL` to control generation. Flags can be passed either + in compiled regex or inside the pattern with a ``(?iLmsux)`` group. + + Some regular expressions are only partly supported - the underlying + strategy checks local matching and relies on filtering to resolve + context-dependent expressions. Using too many of these constructs may + cause health-check errors as too many examples are filtered out. This + mainly includes (positive or negative) lookahead and lookbehind groups. + + If you want the generated string to match the whole regex you should use + boundary markers. So e.g. ``r"\A.\Z"`` will return a single character + string, while ``"."`` will return any string, and ``r"\A.$"`` will return + a single character optionally followed by a ``"\n"``. + Alternatively, passing ``fullmatch=True`` will ensure that the whole + string is a match, as if you had used the ``\A`` and ``\Z`` markers. + + The ``alphabet=`` argument constrains the characters in the generated + string, as for :func:`text`, and is only supported for unicode strings. + + Examples from this strategy shrink towards shorter strings and lower + character values, with exact behaviour that may depend on the pattern. + """ + check_type((str, bytes, re.Pattern), regex, "regex") + check_type(bool, fullmatch, "fullmatch") + pattern = regex.pattern if isinstance(regex, re.Pattern) else regex + if alphabet is not None: + check_type((str, SearchStrategy), alphabet, "alphabet") + if not isinstance(pattern, str): + raise InvalidArgument("alphabet= is not supported for bytestrings") + alphabet = OneCharStringStrategy.from_alphabet(alphabet) + elif isinstance(pattern, str): + alphabet = characters(codec="utf-8") + + # TODO: We would like to move this to the top level, but pending some major + # refactoring it's hard to do without creating circular imports. + from hypothesis.strategies._internal.regex import regex_strategy + + return regex_strategy(regex, fullmatch, alphabet=alphabet) + + +@cacheable +@defines_strategy(force_reusable_values=True) +def binary( + *, + min_size: int = 0, + max_size: int | None = None, +) -> SearchStrategy[bytes]: + """Generates :class:`python:bytes`. + + The generated :class:`python:bytes` will have a length of at least ``min_size`` + and at most ``max_size``. If ``max_size`` is None there is no upper limit. + + Examples from this strategy shrink towards smaller strings and lower byte + values. + """ + check_valid_sizes(min_size, max_size) + return BytesStrategy(min_size, max_size) + + +@cacheable +@defines_strategy() +def randoms( + *, + note_method_calls: bool = False, + use_true_random: bool = False, +) -> SearchStrategy[random.Random]: + """Generates instances of ``random.Random``. The generated Random instances + are of a special HypothesisRandom subclass. + + - If ``note_method_calls`` is set to ``True``, Hypothesis will print the + randomly drawn values in any falsifying test case. This can be helpful + for debugging the behaviour of randomized algorithms. + - If ``use_true_random`` is set to ``True`` then values will be drawn from + their usual distribution, otherwise they will actually be Hypothesis + generated values (and will be shrunk accordingly for any failing test + case). Setting ``use_true_random=False`` will tend to expose bugs that + would occur with very low probability when it is set to True, and this + flag should only be set to True when your code relies on the distribution + of values for correctness. + + For managing global state, see the :func:`~hypothesis.strategies.random_module` + strategy and :func:`~hypothesis.register_random` function. + """ + check_type(bool, note_method_calls, "note_method_calls") + check_type(bool, use_true_random, "use_true_random") + + from hypothesis.strategies._internal.random import RandomStrategy + + return RandomStrategy( + use_true_random=use_true_random, note_method_calls=note_method_calls + ) + + +class RandomSeeder: + def __init__(self, seed): + self.seed = seed + + def __repr__(self): + return f"RandomSeeder({self.seed!r})" + + +class RandomModule(SearchStrategy): + def do_draw(self, data: ConjectureData) -> RandomSeeder: + # It would be unsafe to do run this method more than once per test case, + # because cleanup() runs tasks in FIFO order (at time of writing!). + # Fortunately, the random_module() strategy wraps us in shared(), so + # it's cached for all but the first of any number of calls. + seed = data.draw(integers(0, 2**32 - 1)) + seed_all, restore_all = get_seeder_and_restorer(seed) + seed_all() + cleanup(restore_all) + return RandomSeeder(seed) + + +@cacheable +@defines_strategy() +def random_module() -> SearchStrategy[RandomSeeder]: + """Hypothesis always seeds global PRNGs before running a test, and restores the + previous state afterwards. + + If having a fixed seed would unacceptably weaken your tests, and you + cannot use a ``random.Random`` instance provided by + :func:`~hypothesis.strategies.randoms`, this strategy calls + :func:`python:random.seed` with an arbitrary integer and passes you + an opaque object whose repr displays the seed value for debugging. + If ``numpy.random`` is available, that state is also managed, as is anything + managed by :func:`hypothesis.register_random`. + + Examples from these strategy shrink to seeds closer to zero. + """ + return shared(RandomModule(), key="hypothesis.strategies.random_module()") + + +class BuildsStrategy(SearchStrategy[Ex]): + def __init__( + self, + target: Callable[..., Ex], + args: tuple[SearchStrategy[Any], ...], + kwargs: dict[str, SearchStrategy[Any]], + ): + super().__init__() + self.target = target + self.args = args + self.kwargs = kwargs + + def calc_label(self) -> int: + return combine_labels( + self.class_label, + calc_label_from_callable(self.target), + *[strat.label for strat in self.args], + *[calc_label_from_name(k) for k in self.kwargs], + *[strat.label for strat in self.kwargs.values()], + ) + + def do_draw(self, data: ConjectureData) -> Ex: + args = [data.draw(s) for s in self.args] + kwargs = {k: data.draw(v) for k, v in self.kwargs.items()} + try: + obj = self.target(*args, **kwargs) + except TypeError as err: + if ( + isinstance(self.target, type) + and issubclass(self.target, enum.Enum) + and not (self.args or self.kwargs) + ): + name = self.target.__module__ + "." + self.target.__qualname__ + raise InvalidArgument( + f"Calling {name} with no arguments raised an error - " + f"try using sampled_from({name}) instead of builds({name})" + ) from err + if not (self.args or self.kwargs): + from .types import is_generic_type + + if isinstance(self.target, NewType) or is_generic_type(self.target): + raise InvalidArgument( + f"Calling {self.target!r} with no arguments raised an " + f"error - try using from_type({self.target!r}) instead " + f"of builds({self.target!r})" + ) from err + if getattr(self.target, "__no_type_check__", None) is True: + # Note: could use PEP-678 __notes__ here. Migrate over once we're + # using an `exceptiongroup` backport with support for that. + raise TypeError( + "This might be because the @no_type_check decorator prevented " + "Hypothesis from inferring a strategy for some required arguments." + ) from err + raise + + current_build_context().record_call(obj, self.target, args=args, kwargs=kwargs) + return obj + + def do_validate(self) -> None: + tuples(*self.args).validate() + fixed_dictionaries(self.kwargs).validate() + + def __repr__(self) -> str: + bits = [get_pretty_function_description(self.target)] + bits.extend(map(repr, self.args)) + bits.extend(f"{k}={v!r}" for k, v in self.kwargs.items()) + return f"builds({', '.join(bits)})" + + +@cacheable +@defines_strategy() +def builds( + target: Callable[..., Ex], + /, + *args: SearchStrategy[Any], + **kwargs: SearchStrategy[Any] | EllipsisType, +) -> SearchStrategy[Ex]: + """Generates values by drawing from ``args`` and ``kwargs`` and passing + them to the callable (provided as the first positional argument) in the + appropriate argument position. + + e.g. ``builds(target, integers(), flag=booleans())`` would draw an + integer ``i`` and a boolean ``b`` and call ``target(i, flag=b)``. + + If the callable has type annotations, they will be used to infer a strategy + for required arguments that were not passed to builds. You can also tell + builds to infer a strategy for an optional argument by passing ``...`` + (:obj:`python:Ellipsis`) as a keyword argument to builds, instead of a strategy for + that argument to the callable. + + If the callable is a class defined with :pypi:`attrs`, missing required + arguments will be inferred from the attribute on a best-effort basis, + e.g. by checking :ref:`attrs standard validators `. + Dataclasses are handled natively by the inference from type hints. + + Examples from this strategy shrink by shrinking the argument values to + the callable. + """ + if not callable(target): + from hypothesis.strategies._internal.types import is_a_union + + # before 3.14, unions were callable, so it got an error message in + # BuildsStrategy.do_draw. In 3.14+, unions are not callable, so + # we error earlier here instead. + suggestion = ( + f" Try using from_type({target}) instead?" if is_a_union(target) else "" + ) + raise InvalidArgument( + "The first positional argument to builds() must be a callable " + f"target to construct.{suggestion}" + ) + + if ... in args: # type: ignore # we only annotated the allowed types + # Avoid an implementation nightmare juggling tuples and worse things + raise InvalidArgument( + "... was passed as a positional argument to " + "builds(), but is only allowed as a keyword arg" + ) + required = required_args(target, args, kwargs) + to_infer = {k for k, v in kwargs.items() if v is ...} + if required or to_infer: + if ( + isinstance(target, type) + and (attr := sys.modules.get("attr")) is not None + and attr.has(target) + ): # pragma: no cover # covered by our attrs tests in check-niche + # Use our custom introspection for attrs classes + from hypothesis.strategies._internal.attrs import from_attrs + + return from_attrs(target, args, kwargs, required | to_infer) + # Otherwise, try using type hints + hints = get_type_hints(target) + if to_infer - set(hints): + badargs = ", ".join(sorted(to_infer - set(hints))) + raise InvalidArgument( + f"passed ... for {badargs}, but we cannot infer a strategy " + "because these arguments have no type annotation" + ) + infer_for = {k: v for k, v in hints.items() if k in (required | to_infer)} + if infer_for: + from hypothesis.strategies._internal.types import _global_type_lookup + + for kw, t in infer_for.items(): + if t in _global_type_lookup: + kwargs[kw] = from_type(t) + else: + # We defer resolution of these type annotations so that the obvious + # approach to registering recursive types just works. I.e., + # if we're inside `register_type_strategy(cls, builds(cls, ...))` + # and `...` contains recursion on `cls`. See + # https://github.com/HypothesisWorks/hypothesis/issues/3026 + kwargs[kw] = deferred(lambda t=t: from_type(t)) # type: ignore + + # validated by handling all EllipsisType in the to_infer case + kwargs = cast(dict[str, SearchStrategy], kwargs) + return BuildsStrategy(target, args, kwargs) + + +@cacheable +@defines_strategy(eager=True) +def from_type(thing: type[T]) -> SearchStrategy[T]: + """Looks up the appropriate search strategy for the given type. + + |st.from_type| is used internally to fill in missing arguments to + |st.builds| and can be used interactively + to explore what strategies are available or to debug type resolution. + + You can use |st.register_type_strategy| to + handle your custom types, or to globally redefine certain strategies - + for example excluding NaN from floats, or use timezone-aware instead of + naive time and datetime strategies. + + |st.from_type| looks up a strategy in the following order: + + 1. If ``thing`` is in the default lookup mapping or user-registered lookup, + return the corresponding strategy. The default lookup covers all types + with Hypothesis strategies, including extras where possible. + 2. If ``thing`` is from the :mod:`python:typing` module, return the + corresponding strategy (special logic). + 3. If ``thing`` has one or more subtypes in the merged lookup, return + the union of the strategies for those types that are not subtypes of + other elements in the lookup. + 4. Finally, if ``thing`` has type annotations for all required arguments, + and is not an abstract class, it is resolved via + |st.builds|. + 5. Because :mod:`abstract types ` cannot be instantiated, + we treat abstract types as the union of their concrete subclasses. + Note that this lookup works via inheritance but not via + :obj:`~python:abc.ABCMeta.register`, so you may still need to use + |st.register_type_strategy|. + + There is a valuable recipe for leveraging |st.from_type| to generate + "everything except" values from a specified type. I.e. + + .. code-block:: python + + def everything_except(excluded_types): + return ( + from_type(type) + .flatmap(from_type) + .filter(lambda x: not isinstance(x, excluded_types)) + ) + + For example, ``everything_except(int)`` returns a strategy that can + generate anything that |st.from_type| can ever generate, except for + instances of |int|, and excluding instances of types + added via |st.register_type_strategy|. + + This is useful when writing tests which check that invalid input is + rejected in a certain way. + """ + try: + with warnings.catch_warnings(): + warnings.simplefilter("error") + return _from_type(thing) + except Exception: + return _from_type_deferred(thing) + + +def _from_type_deferred(thing: type[Ex]) -> SearchStrategy[Ex]: + # This tricky little dance is because we want to show the repr of the actual + # underlying strategy wherever possible, as a form of user education, but + # would prefer to fall back to the default "from_type(...)" repr instead of + # "deferred(...)" for recursive types or invalid arguments. + try: + thing_repr = nicerepr(thing) + if hasattr(thing, "__module__"): + module_prefix = f"{thing.__module__}." + if not thing_repr.startswith(module_prefix): + thing_repr = module_prefix + thing_repr + repr_ = f"from_type({thing_repr})" + except Exception: # pragma: no cover + repr_ = None + return LazyStrategy( + lambda thing: deferred(lambda: _from_type(thing)), + (thing,), + {}, + force_repr=repr_, + ) + + +_recurse_guard: ContextVar = ContextVar("recurse_guard") + + +def _from_type(thing: type[Ex]) -> SearchStrategy[Ex]: + # TODO: We would like to move this to the top level, but pending some major + # refactoring it's hard to do without creating circular imports. + from hypothesis.strategies._internal import types + + def as_strategy(strat_or_callable, thing): + # User-provided strategies need some validation, and callables even more + # of it. We do this in three places, hence the helper function + if not isinstance(strat_or_callable, SearchStrategy): + assert callable(strat_or_callable) # Validated in register_type_strategy + strategy = strat_or_callable(thing) + else: + strategy = strat_or_callable + if strategy is NotImplemented: + return NotImplemented + if not isinstance(strategy, SearchStrategy): + raise ResolutionFailed( + f"Error: {thing} was registered for {nicerepr(strat_or_callable)}, " + f"but returned non-strategy {strategy!r}" + ) + if strategy.is_empty: + raise ResolutionFailed(f"Error: {thing!r} resolved to an empty strategy") + return strategy + + def from_type_guarded(thing): + """Returns the result of producer, or ... if recursion on thing is encountered""" + try: + recurse_guard = _recurse_guard.get() + except LookupError: + # We can't simply define the contextvar with default=[], as the + # default object would be shared across contexts + _recurse_guard.set(recurse_guard := []) + if thing in recurse_guard: + raise RewindRecursive(thing) + recurse_guard.append(thing) + try: + return _from_type(thing) + except RewindRecursive as rr: + if rr.target != thing: + raise + return ... # defer resolution + finally: + recurse_guard.pop() + + # Let registered extra modules handle their own recognized types first, before + # e.g. Unions are resolved + try: + known = thing in types._global_type_lookup + except TypeError: + # thing is not always hashable! + pass + else: + if not known: + for module, resolver in types._global_extra_lookup.items(): + if module in sys.modules: + strat = resolver(thing) + if strat is not None: + return strat + + if isinstance(thing, NewType): + # Check if we have an explicitly registered strategy for this thing, + # resolve it so, and otherwise resolve as for the base type. + if thing in types._global_type_lookup: + strategy = as_strategy(types._global_type_lookup[thing], thing) + if strategy is not NotImplemented: + return strategy + return _from_type(thing.__supertype__) + if types.is_a_type_alias_type(thing): # pragma: no cover # covered by 3.12+ tests + if thing in types._global_type_lookup: + strategy = as_strategy(types._global_type_lookup[thing], thing) + if strategy is not NotImplemented: + return strategy + return _from_type(thing.__value__) # type: ignore + if types.is_a_union(thing): + args = sorted(thing.__args__, key=types.type_sorting_key) # type: ignore + return one_of([_from_type(t) for t in args]) + if thing in types.LiteralStringTypes: # pragma: no cover + # We can't really cover this because it needs either + # typing-extensions or python3.11+ typing. + # `LiteralString` from runtime's point of view is just a string. + # Fallback to regular text. + return text() # type: ignore + + # We also have a special case for TypeVars. + # They are represented as instances like `~T` when they come here. + # We need to work with their type instead. + if isinstance(thing, TypeVar) and type(thing) in types._global_type_lookup: + strategy = as_strategy(types._global_type_lookup[type(thing)], thing) + if strategy is not NotImplemented: + return strategy + + if not types.is_a_type(thing): + if isinstance(thing, str): + # See https://github.com/HypothesisWorks/hypothesis/issues/3016 + raise InvalidArgument( + f"Got {thing!r} as a type annotation, but the forward-reference " + "could not be resolved from a string to a type. Consider using " + "`from __future__ import annotations` instead of forward-reference " + "strings." + ) + raise InvalidArgument(f"{thing=} must be a type") # pragma: no cover + + if thing in types.NON_RUNTIME_TYPES: + # Some code like `st.from_type(TypeAlias)` does not make sense. + # Because there are types in python that do not exist in runtime. + raise InvalidArgument( + f"Could not resolve {thing!r} to a strategy, " + f"because there is no such thing as a runtime instance of {thing!r}" + ) + + # Now that we know `thing` is a type, the first step is to check for an + # explicitly registered strategy. This is the best (and hopefully most + # common) way to resolve a type to a strategy. Note that the value in the + # lookup may be a strategy or a function from type -> strategy; and we + # convert empty results into an explicit error. + try: + if thing in types._global_type_lookup: + strategy = as_strategy(types._global_type_lookup[thing], thing) + if strategy is not NotImplemented: + return strategy + elif ( + isinstance(thing, GenericAlias) + and (to := get_origin(thing)) in types._global_type_lookup + ): + strategy = as_strategy(types._global_type_lookup[to], thing) + if strategy is not NotImplemented: + return strategy + except TypeError: # pragma: no cover + # This was originally due to a bizarre divergence in behaviour on Python 3.9.0: + # typing.Callable[[], foo] has __args__ = (foo,) but collections.abc.Callable + # has __args__ = ([], foo); and as a result is non-hashable. + # We've kept it because we turn out to have more type errors from... somewhere. + # FIXME: investigate that, maybe it should be fixed more precisely? + pass + + if (hasattr(typing, "_TypedDictMeta") and type(thing) is typing._TypedDictMeta) or ( + hasattr(types.typing_extensions, "_TypedDictMeta") # type: ignore + and type(thing) is types.typing_extensions._TypedDictMeta # type: ignore + ): # pragma: no cover + + def _get_annotation_arg(key, annotation_type): + try: + return get_args(annotation_type)[0] + except IndexError: + raise InvalidArgument( + f"`{key}: {annotation_type.__name__}` is not a valid type annotation" + ) from None + + # Taken from `Lib/typing.py` and modified: + def _get_typeddict_qualifiers(key, annotation_type): + qualifiers = [] + annotations = [] + while True: + annotation_origin = types.extended_get_origin(annotation_type) + if annotation_origin is Annotated: + if annotation_args := get_args(annotation_type): + annotation_type = annotation_args[0] + annotations.extend(annotation_args[1:]) + else: + break + elif annotation_origin in types.RequiredTypes: + qualifiers.append(types.RequiredTypes) + annotation_type = _get_annotation_arg(key, annotation_type) + elif annotation_origin in types.NotRequiredTypes: + qualifiers.append(types.NotRequiredTypes) + annotation_type = _get_annotation_arg(key, annotation_type) + elif annotation_origin in types.ReadOnlyTypes: + qualifiers.append(types.ReadOnlyTypes) + annotation_type = _get_annotation_arg(key, annotation_type) + else: + break + if annotations: + annotation_type = Annotated[(annotation_type, *annotations)] + return set(qualifiers), annotation_type + + # The __optional_keys__ attribute may or may not be present, but if there's no + # way to tell and we just have to assume that everything is required. + # See https://github.com/python/cpython/pull/17214 for details. + optional = set(getattr(thing, "__optional_keys__", ())) + required = set( + getattr(thing, "__required_keys__", get_type_hints(thing).keys()) + ) + anns = {} + for k, v in get_type_hints(thing).items(): + qualifiers, v = _get_typeddict_qualifiers(k, v) + # We ignore `ReadOnly` type for now, only unwrap it. + if types.RequiredTypes in qualifiers: + optional.discard(k) + required.add(k) + if types.NotRequiredTypes in qualifiers: + optional.add(k) + required.discard(k) + + anns[k] = from_type_guarded(v) + if anns[k] is ...: + anns[k] = _from_type_deferred(v) + + if not required.isdisjoint(optional): # pragma: no cover + # It is impossible to cover, because `typing.py` or `typing-extensions` + # won't allow creating incorrect TypedDicts, + # this is just a sanity check from our side. + raise InvalidArgument( + f"Required keys overlap with optional keys in a TypedDict:" + f" {required=}, {optional=}" + ) + if ( + (not anns) + and thing.__annotations__ + and ".." in getattr(thing, "__qualname__", "") + ): + raise InvalidArgument("Failed to retrieve type annotations for local type") + return fixed_dictionaries( # type: ignore + mapping={k: v for k, v in anns.items() if k in required}, + optional={k: v for k, v in anns.items() if k in optional}, + ) + + # If there's no explicitly registered strategy, maybe a subtype of thing + # is registered - if so, we can resolve it to the subclass strategy. + # We'll start by checking if thing is from from the typing module, + # because there are several special cases that don't play well with + # subclass and instance checks. + if ( + isinstance(thing, types.typing_root_type) + or (isinstance(get_origin(thing), type) and get_args(thing)) + or isinstance(thing, typing.ForwardRef) + ): + return types.from_typing_type(thing) + + # If it's not from the typing module, we get all registered types that are + # a subclass of `thing` and are not themselves a subtype of any other such + # type. For example, `Number -> integers() | floats()`, but bools() is + # not included because bool is a subclass of int as well as Number. + strategies = [ + s + for s in ( + as_strategy(v, thing) + for k, v in sorted(types._global_type_lookup.items(), key=repr) + if isinstance(k, type) + and issubclass(k, thing) + and sum(types.try_issubclass(k, typ) for typ in types._global_type_lookup) + == 1 + ) + if s is not NotImplemented + ] + if any(not s.is_empty for s in strategies): + return one_of(strategies) + + # If we don't have a strategy registered for this type or any subtype, we + # may be able to fall back on type annotations. + if issubclass(thing, enum.Enum): + return sampled_from(thing) + + # Finally, try to build an instance by calling the type object. Unlike builds(), + # this block *does* try to infer strategies for arguments with default values. + # That's because of the semantic different; builds() -> "call this with ..." + # so we only infer when *not* doing so would be an error; from_type() -> "give + # me arbitrary instances" so the greater variety is acceptable. + # And if it's *too* varied, express your opinions with register_type_strategy() + if not isabstract(thing): + # If we know that builds(thing) will fail, give a better error message + required = required_args(thing) + if required and not ( + required.issubset(get_type_hints(thing)) + or ((attr := sys.modules.get("attr")) is not None and attr.has(thing)) + or is_typed_named_tuple(thing) # weird enough that we have a specific check + ): + raise ResolutionFailed( + f"Could not resolve {thing!r} to a strategy; consider " + "using register_type_strategy" + ) + try: + hints = get_type_hints(thing) + params = get_signature(thing).parameters + except Exception: + params = {} # type: ignore + + posonly_args = [] + kwargs = {} + for k, p in params.items(): + if ( + p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY) + and k in hints + and k != "return" + ): + ps = from_type_guarded(hints[k]) + if p.default is not Parameter.empty and ps is not ...: + ps = just(p.default) | ps + if p.kind is Parameter.POSITIONAL_ONLY: + # builds() doesn't infer strategies for positional args, so: + if ps is ...: # pragma: no cover # rather fiddly to test + if p.default is Parameter.empty: + raise ResolutionFailed( + f"Could not resolve {thing!r} to a strategy; " + "consider using register_type_strategy" + ) + ps = just(p.default) + posonly_args.append(ps) + else: + kwargs[k] = ps + if ( + params + and not (posonly_args or kwargs) + and not issubclass(thing, BaseException) + ): + from_type_repr = repr_call(from_type, (thing,), {}) + builds_repr = repr_call(builds, (thing,), {}) + warnings.warn( + f"{from_type_repr} resolved to {builds_repr}, because we could not " + "find any (non-varargs) arguments. Use st.register_type_strategy() " + "to resolve to a strategy which can generate more than one value, " + "or silence this warning.", + SmallSearchSpaceWarning, + stacklevel=2, + ) + return builds(thing, *posonly_args, **kwargs) + + # And if it's an abstract type, we'll resolve to a union of subclasses instead. + subclasses = thing.__subclasses__() + if not subclasses: + raise ResolutionFailed( + f"Could not resolve {thing!r} to a strategy, because it is an abstract " + "type without any subclasses. Consider using register_type_strategy" + ) + + subclass_strategies: SearchStrategy = nothing() + for sc in subclasses: + try: + subclass_strategies |= _from_type(sc) + except Exception: + pass + if subclass_strategies.is_empty: + # We're unable to resolve subclasses now, but we might be able to later - + # so we'll just go back to the mixed distribution. + return sampled_from(subclasses).flatmap(_from_type) + return subclass_strategies + + +@cacheable +@defines_strategy(force_reusable_values=True) +def fractions( + min_value: Real | str | None = None, + max_value: Real | str | None = None, + *, + max_denominator: int | None = None, +) -> SearchStrategy[Fraction]: + """Returns a strategy which generates Fractions. + + If ``min_value`` is not None then all generated values are no less than + ``min_value``. If ``max_value`` is not None then all generated values are no + greater than ``max_value``. ``min_value`` and ``max_value`` may be anything accepted + by the :class:`~fractions.Fraction` constructor. + + If ``max_denominator`` is not None then the denominator of any generated + values is no greater than ``max_denominator``. Note that ``max_denominator`` must + be None or a positive integer. + + Examples from this strategy shrink towards smaller denominators, then + closer to zero. + """ + min_value = try_convert(Fraction, min_value, "min_value") + max_value = try_convert(Fraction, max_value, "max_value") + # These assertions tell Mypy what happened in try_convert + assert min_value is None or isinstance(min_value, Fraction) + assert max_value is None or isinstance(max_value, Fraction) + + check_valid_interval(min_value, max_value, "min_value", "max_value") + check_valid_integer(max_denominator, "max_denominator") + + if max_denominator is not None: + if max_denominator < 1: + raise InvalidArgument(f"{max_denominator=} must be >= 1") + if min_value is not None and min_value.denominator > max_denominator: + raise InvalidArgument( + f"The {min_value=} has a denominator greater than the " + f"{max_denominator=}" + ) + if max_value is not None and max_value.denominator > max_denominator: + raise InvalidArgument( + f"The {max_value=} has a denominator greater than the " + f"{max_denominator=}" + ) + + if min_value is not None and min_value == max_value: + return just(min_value) + + def dm_func(denom): + """Take denom, construct numerator strategy, and build fraction.""" + # Four cases of algebra to get integer bounds and scale factor. + min_num, max_num = None, None + if max_value is None and min_value is None: + pass + elif min_value is None: + max_num = denom * max_value.numerator + denom *= max_value.denominator + elif max_value is None: + min_num = denom * min_value.numerator + denom *= min_value.denominator + else: + low = min_value.numerator * max_value.denominator + high = max_value.numerator * min_value.denominator + scale = min_value.denominator * max_value.denominator + # After calculating our integer bounds and scale factor, we remove + # the gcd to avoid drawing more bytes for the example than needed. + # Note that `div` can be at most equal to `scale`. + div = math.gcd(scale, math.gcd(low, high)) + min_num = denom * low // div + max_num = denom * high // div + denom *= scale // div + + return builds( + Fraction, integers(min_value=min_num, max_value=max_num), just(denom) + ) + + if max_denominator is None: + return integers(min_value=1).flatmap(dm_func) + + return ( + integers(1, max_denominator) + .flatmap(dm_func) + .map(lambda f: f.limit_denominator(max_denominator)) + ) + + +def _as_finite_decimal( + value: Real | str | None, name: str, allow_infinity: bool | None, places: int | None +) -> Decimal | None: + """Convert decimal bounds to decimals, carefully.""" + assert name in ("min_value", "max_value") + if value is None: + return None + old = value + if isinstance(value, Fraction): + value = Context(prec=places).divide(value.numerator, value.denominator) + if old != value: + raise InvalidArgument( + f"{old!r} cannot be exactly represented as a decimal with {places=}" + ) + if not isinstance(value, Decimal): + with localcontext(Context()): # ensure that default traps are enabled + value = try_convert(Decimal, value, name) + assert isinstance(value, Decimal) + if value.is_nan(): + raise InvalidArgument(f"Invalid {name}={value!r}") + + # If you are reading this conditional, I am so sorry. I did my best. + finitude_old = value if isinstance(old, str) else old + if math.isfinite(finitude_old) != math.isfinite(value) or ( + value.is_finite() and Fraction(str(old)) != Fraction(str(value)) + ): + note_deprecation( + f"{old!r} cannot be exactly represented as a decimal with {places=}", + since="2025-11-02", + has_codemod=False, + stacklevel=1, + ) + + if value.is_finite(): + return value + assert value.is_infinite() + if (value < 0 if "min" in name else value > 0) and allow_infinity is not False: + return None + raise InvalidArgument(f"{allow_infinity=}, but {name}={value!r}") + + +@cacheable +@defines_strategy(force_reusable_values=True) +def decimals( + min_value: Real | str | None = None, + max_value: Real | str | None = None, + *, + allow_nan: bool | None = None, + allow_infinity: bool | None = None, + places: int | None = None, +) -> SearchStrategy[Decimal]: + """Generates instances of :class:`python:decimal.Decimal`, which may be: + + - A finite rational number, between ``min_value`` and ``max_value``. + - Not a Number, if ``allow_nan`` is True. None means "allow NaN, unless + ``min_value`` and ``max_value`` are not None". + - Positive or negative infinity, if ``max_value`` and ``min_value`` + respectively are None, and ``allow_infinity`` is not False. None means + "allow infinity, unless excluded by the min and max values". + + Note that where floats have one ``NaN`` value, Decimals have four: signed, + and either *quiet* or *signalling*. See `the decimal module docs + `_ for + more information on special values. + + If ``places`` is not None, all finite values drawn from the strategy will + have that number of digits after the decimal place. + + Examples from this strategy do not have a well defined shrink order but + try to maximize human readability when shrinking. + """ + # Convert min_value and max_value to Decimal values, and validate args + check_valid_integer(places, "places") + if places is not None and places < 0: + raise InvalidArgument(f"{places=} may not be negative") + min_value = _as_finite_decimal(min_value, "min_value", allow_infinity, places) + max_value = _as_finite_decimal(max_value, "max_value", allow_infinity, places) + check_valid_interval(min_value, max_value, "min_value", "max_value") + if allow_infinity and (None not in (min_value, max_value)): + raise InvalidArgument("Cannot allow infinity between finite bounds") + # Set up a strategy for finite decimals. Note that both floating and + # fixed-point decimals require careful handling to remain isolated from + # any external precision context - in short, we always work out the + # required precision for lossless operation and use context methods. + if places is not None: + # Fixed-point decimals are basically integers with a scale factor + def ctx(val): + """Return a context in which this value is lossless.""" + precision = ceil(math.log10(abs(val) or 1)) + places + 1 + return Context(prec=max([precision, 1])) + + def int_to_decimal(val): + context = ctx(val) + return context.quantize(context.multiply(val, factor), factor) + + factor = Decimal(10) ** -places + min_num, max_num = None, None + if min_value is not None: + min_num = ceil(ctx(min_value).divide(min_value, factor)) + if max_value is not None: + max_num = floor(ctx(max_value).divide(max_value, factor)) + if min_num is not None and max_num is not None and min_num > max_num: + raise InvalidArgument( + f"There are no decimals with {places} places between " + f"{min_value=} and {max_value=}" + ) + strat = integers(min_num, max_num).map(int_to_decimal) + else: + # Otherwise, they're like fractions featuring a power of ten + def fraction_to_decimal(val): + precision = ( + ceil(math.log10(abs(val.numerator) or 1) + math.log10(val.denominator)) + + 1 + ) + return Context(prec=precision or 1).divide( + Decimal(val.numerator), val.denominator + ) + + strat = fractions(min_value, max_value).map(fraction_to_decimal) + # Compose with sampled_from for infinities and NaNs as appropriate + special: list[Decimal] = [] + if allow_infinity or (allow_infinity is None and max_value is None): + special.append(Decimal("Infinity")) + if allow_infinity or (allow_infinity is None and min_value is None): + special.append(Decimal("-Infinity")) + if allow_nan or (allow_nan is None and (None in (min_value, max_value))): + special.extend(map(Decimal, ("NaN", "-NaN", "sNaN", "-sNaN"))) + return strat | (sampled_from(special) if special else nothing()) + + +@defines_strategy(eager=True) +def recursive( + base: SearchStrategy[Ex], + extend: Callable[[SearchStrategy[Any]], SearchStrategy[T]], + *, + max_leaves: int = 100, +) -> SearchStrategy[T | Ex]: + """base: A strategy to start from. + + extend: A function which takes a strategy and returns a new strategy. + + max_leaves: The maximum number of elements to be drawn from base on a given + run. + + This returns a strategy ``S`` such that ``S = extend(base | S)``. That is, + values may be drawn from base, or from any strategy reachable by mixing + applications of | and extend. + + An example may clarify: ``recursive(booleans(), lists)`` would return a + strategy that may return arbitrarily nested and mixed lists of booleans. + So e.g. ``False``, ``[True]``, ``[False, []]``, and ``[[[[True]]]]`` are + all valid values to be drawn from that strategy. + + Examples from this strategy shrink by trying to reduce the amount of + recursion and by shrinking according to the shrinking behaviour of base + and the result of extend. + + """ + + return RecursiveStrategy(base, extend, max_leaves) + + +class PermutationStrategy(SearchStrategy): + def __init__(self, values): + super().__init__() + self.values = values + + def do_draw(self, data): + # Reversed Fisher-Yates shuffle: swap each element with itself or with + # a later element. This shrinks i==j for each element, i.e. to no + # change. We don't consider the last element as it's always a no-op. + result = list(self.values) + for i in range(len(result) - 1): + j = data.draw_integer(i, len(result) - 1) + result[i], result[j] = result[j], result[i] + return result + + +@defines_strategy() +def permutations(values: Sequence[T]) -> SearchStrategy[list[T]]: + """Return a strategy which returns permutations of the ordered collection + ``values``. + + Examples from this strategy shrink by trying to become closer to the + original order of values. + """ + values = check_sample(values, "permutations") + if not values: + return builds(list) + + return PermutationStrategy(values) + + +class CompositeStrategy(SearchStrategy): + def __init__(self, definition, args, kwargs): + super().__init__() + self.definition = definition + self.args = args + self.kwargs = kwargs + + def do_draw(self, data): + return self.definition(data.draw, *self.args, **self.kwargs) + + def calc_label(self) -> int: + return combine_labels( + self.class_label, + calc_label_from_callable(self.definition), + ) + + +class DrawFn(Protocol): + """This type only exists so that you can write type hints for functions + decorated with :func:`@composite `. + + .. code-block:: python + + def draw(strategy: SearchStrategy[Ex], label: object = None) -> Ex: ... + + @composite + def list_and_index(draw: DrawFn) -> tuple[int, str]: + i = draw(integers()) # type of `i` inferred as 'int' + s = draw(text()) # type of `s` inferred as 'str' + return i, s + """ + + def __init__(self): + raise TypeError("Protocols cannot be instantiated") # pragma: no cover + + # Protocol overrides our signature for __init__, + # so we override it right back to make the docs look nice. + __signature__: Signature = Signature(parameters=[]) + + # We define this as a callback protocol because a simple typing.Callable is + # insufficient to fully represent the interface, due to the optional `label` + # parameter. + def __call__(self, strategy: SearchStrategy[Ex], label: object = None) -> Ex: + raise NotImplementedError + + +def _composite(f): + # Wrapped below, using ParamSpec if available + if isinstance(f, (classmethod, staticmethod)): + special_method = type(f) + f = f.__func__ + else: + special_method = None + + sig = get_signature(f) + params = tuple(sig.parameters.values()) + + if not (params and "POSITIONAL" in params[0].kind.name): + raise InvalidArgument( + "Functions wrapped with composite must take at least one " + "positional argument." + ) + if params[0].default is not sig.empty: + raise InvalidArgument("A default value for initial argument will never be used") + if not (f is typing._overload_dummy or is_first_param_referenced_in_function(f)): + note_deprecation( + "There is no reason to use @st.composite on a function which " + "does not call the provided draw() function internally.", + since="2022-07-17", + has_codemod=False, + ) + if get_origin(sig.return_annotation) is SearchStrategy: + ret_repr = repr(sig.return_annotation).replace("hypothesis.strategies.", "st.") + warnings.warn( + f"Return-type annotation is `{ret_repr}`, but the decorated " + "function should return a value (not a strategy)", + HypothesisWarning, + stacklevel=3, + ) + if params[0].kind.name != "VAR_POSITIONAL": + params = params[1:] + newsig = sig.replace( + parameters=params, + return_annotation=( + SearchStrategy + if sig.return_annotation is sig.empty + else SearchStrategy[sig.return_annotation] + ), + ) + + @defines_strategy() + @define_function_signature(f.__name__, f.__doc__, newsig) + def accept(*args, **kwargs): + return CompositeStrategy(f, args, kwargs) + + accept.__module__ = f.__module__ + accept.__signature__ = newsig + if special_method is not None: + return special_method(accept) + return accept + + +composite_doc = """ +Defines a strategy that is built out of potentially arbitrarily many other +strategies. + +@composite provides a callable ``draw`` as the first parameter to the decorated +function, which can be used to dynamically draw a value from any strategy. For +example: + +.. code-block:: python + + from hypothesis import strategies as st, given + + @st.composite + def values(draw): + n1 = draw(st.integers()) + n2 = draw(st.integers(min_value=n1)) + return (n1, n2) + + @given(values()) + def f(value): + (n1, n2) = value + assert n1 <= n2 + +@composite cannot mix test code and generation code. If you need that, use +|st.data|. + +If :func:`@composite ` is used to decorate a +method or classmethod, the ``draw`` argument must come before ``self`` or +``cls``. While we therefore recommend writing strategies as standalone functions +and using |st.register_type_strategy| to associate them with a class, methods +are supported and the ``@composite`` decorator may be applied either before or +after ``@classmethod`` or ``@staticmethod``. See :issue:`2578` and :pull:`2634` +for more details. + +Examples from this strategy shrink by shrinking the output of each draw call. +""" +if typing.TYPE_CHECKING or ParamSpec is not None: + P = ParamSpec("P") + + def composite( + f: Callable[Concatenate[DrawFn, P], Ex], + ) -> Callable[P, SearchStrategy[Ex]]: + return _composite(f) + +else: # pragma: no cover + + @cacheable + def composite(f: Callable[..., Ex]) -> Callable[..., SearchStrategy[Ex]]: + return _composite(f) + + +composite.__doc__ = composite_doc + + +@defines_strategy(force_reusable_values=True) +@cacheable +def complex_numbers( + *, + min_magnitude: Real = 0, + max_magnitude: Real | None = None, + allow_infinity: bool | None = None, + allow_nan: bool | None = None, + allow_subnormal: bool = True, + width: Literal[32, 64, 128] = 128, +) -> SearchStrategy[complex]: + """Returns a strategy that generates :class:`~python:complex` + numbers. + + This strategy draws complex numbers with constrained magnitudes. + The ``min_magnitude`` and ``max_magnitude`` parameters should be + non-negative :class:`~python:numbers.Real` numbers; a value + of ``None`` corresponds an infinite upper bound. + + If ``min_magnitude`` is nonzero or ``max_magnitude`` is finite, it + is an error to enable ``allow_nan``. If ``max_magnitude`` is finite, + it is an error to enable ``allow_infinity``. + + ``allow_infinity``, ``allow_nan``, and ``allow_subnormal`` are + applied to each part of the complex number separately, as for + :func:`~hypothesis.strategies.floats`. + + The magnitude constraints are respected up to a relative error + of (around) floating-point epsilon, due to implementation via + the system ``sqrt`` function. + + The ``width`` argument specifies the maximum number of bits of precision + required to represent the entire generated complex number. + Valid values are 32, 64 or 128, which correspond to the real and imaginary + components each having width 16, 32 or 64, respectively. + Passing ``width=64`` will still use the builtin 128-bit + :class:`~python:complex` class, but always for values which can be + exactly represented as two 32-bit floats. + + Examples from this strategy shrink by shrinking their real and + imaginary parts, as :func:`~hypothesis.strategies.floats`. + + If you need to generate complex numbers with particular real and + imaginary parts or relationships between parts, consider using + :func:`builds(complex, ...) ` or + :func:`@composite ` respectively. + """ + check_valid_magnitude(min_magnitude, "min_magnitude") + check_valid_magnitude(max_magnitude, "max_magnitude") + check_valid_interval(min_magnitude, max_magnitude, "min_magnitude", "max_magnitude") + if max_magnitude == math.inf: + max_magnitude = None + + if allow_infinity is None: + allow_infinity = bool(max_magnitude is None) + elif allow_infinity and max_magnitude is not None: + raise InvalidArgument(f"Cannot have {allow_infinity=} with {max_magnitude=}") + if allow_nan is None: + allow_nan = bool(min_magnitude == 0 and max_magnitude is None) + elif allow_nan and not (min_magnitude == 0 and max_magnitude is None): + raise InvalidArgument( + f"Cannot have {allow_nan=}, {min_magnitude=}, {max_magnitude=}" + ) + check_type(bool, allow_subnormal, "allow_subnormal") + if width not in (32, 64, 128): + raise InvalidArgument( + f"{width=}, but must be 32, 64 or 128 (other complex dtypes " + "such as complex192 or complex256 are not supported)" + # For numpy, these types would be supported (but not by CPython): + # https://numpy.org/doc/stable/reference/arrays.scalars.html#complex-floating-point-types + ) + component_width = width // 2 + allow_kw = { + "allow_nan": allow_nan, + "allow_infinity": allow_infinity, + # If we have a nonzero normal min_magnitude and draw a zero imaginary part, + # then allow_subnormal=True would be an error with the min_value to the floats() + # strategy for the real part. We therefore replace True with None. + "allow_subnormal": None if allow_subnormal else allow_subnormal, + "width": component_width, + } + + if min_magnitude == 0 and max_magnitude is None: + # In this simple but common case, there are no constraints on the + # magnitude and therefore no relationship between the real and + # imaginary parts. + return builds(complex, floats(**allow_kw), floats(**allow_kw)) # type: ignore + + @composite + def constrained_complex(draw): + # We downcast drawn floats to the desired (component) width so we + # guarantee the resulting complex values are representable. Note + # truncating the mantissa bits with float_of() cannot increase the + # magnitude of a float, so we are guaranteed to stay within the allowed + # range. See https://github.com/HypothesisWorks/hypothesis/issues/3573 + + # Draw the imaginary part, and determine the maximum real part given + # this and the max_magnitude + if max_magnitude is None: + zi = draw(floats(**allow_kw)) + rmax = None + else: + zi = draw( + floats( + -float_of(max_magnitude, component_width), + float_of(max_magnitude, component_width), + **allow_kw, + ) + ) + rmax = float_of(cathetus(max_magnitude, zi), component_width) + # Draw the real part from the allowed range given the imaginary part + if min_magnitude == 0 or math.fabs(zi) >= min_magnitude: + zr = draw(floats(None if rmax is None else -rmax, rmax, **allow_kw)) + else: + rmin = float_of(cathetus(min_magnitude, zi), component_width) + zr = draw(floats(rmin, rmax, **allow_kw)) + # Order of conditions carefully tuned so that for a given pair of + # magnitude arguments, we always either draw or do not draw the bool + # (crucial for good shrinking behaviour) but only invert when needed. + if min_magnitude > 0 and draw(booleans()) and math.fabs(zi) <= min_magnitude: + zr = -zr + return complex(zr, zi) + + return constrained_complex() + + +@defines_strategy(eager=True) +def shared( + base: SearchStrategy[Ex], + *, + key: Hashable | None = None, +) -> SearchStrategy[Ex]: + """Returns a strategy that draws a single shared value per run, drawn from + base. Any two shared instances with the same key will share the same value, + otherwise the identity of this strategy will be used. That is: + + >>> s = integers() # or any other strategy + >>> x = shared(s) + >>> y = shared(s) + + In the above x and y may draw different (or potentially the same) values. + In the following they will always draw the same: + + >>> x = shared(s, key="hi") + >>> y = shared(s, key="hi") + + Examples from this strategy shrink as per their base strategy. + """ + return SharedStrategy(base, key) + + +@composite +def _maybe_nil_uuids(draw, uuid): + # Equivalent to `random_uuids | just(...)`, with a stronger bias to the former. + if draw(data()).conjecture_data.draw_boolean(1 / 64): + return UUID("00000000-0000-0000-0000-000000000000") + return uuid + + +@cacheable +@defines_strategy(force_reusable_values=True) +def uuids( + *, version: Literal[1, 2, 3, 4, 5] | None = None, allow_nil: bool = False +) -> SearchStrategy[UUID]: + """Returns a strategy that generates :class:`UUIDs `. + + If the optional version argument is given, value is passed through + to :class:`~python:uuid.UUID` and only UUIDs of that version will + be generated. + + If ``allow_nil`` is True, generate the nil UUID much more often. + Otherwise, all returned values from this will be unique, so e.g. if you do + ``lists(uuids())`` the resulting list will never contain duplicates. + + Examples from this strategy don't have any meaningful shrink order. + """ + check_type(bool, allow_nil, "allow_nil") + if version not in (None, 1, 2, 3, 4, 5): + raise InvalidArgument( + f"{version=}, but version must be in " + "(None, 1, 2, 3, 4, 5) to pass to the uuid.UUID constructor." + ) + random_uuids = shared( + randoms(use_true_random=True), key="hypothesis.strategies.uuids.generator" + ).map(lambda r: UUID(version=version, int=r.getrandbits(128))) + + if allow_nil: + if version is not None: + raise InvalidArgument("The nil UUID is not of any version") + return random_uuids.flatmap(_maybe_nil_uuids) + return random_uuids + + +class RunnerStrategy(SearchStrategy): + def __init__(self, default): + super().__init__() + self.default = default + + def do_draw(self, data): + if data.hypothesis_runner is not_set: + if self.default is not_set: + raise InvalidArgument( + "Cannot use runner() strategy with no " + "associated runner or explicit default." + ) + return self.default + return data.hypothesis_runner + + +@defines_strategy(force_reusable_values=True) +def runner(*, default: Any = not_set) -> SearchStrategy[Any]: + """A strategy for getting "the current test runner", whatever that may be. + The exact meaning depends on the entry point, but it will usually be the + associated 'self' value for it. + + If you are using this in a rule for stateful testing, this strategy + will return the instance of the :class:`~hypothesis.stateful.RuleBasedStateMachine` + that the rule is running for. + + If there is no current test runner and a default is provided, return + that default. If no default is provided, raises InvalidArgument. + + Examples from this strategy do not shrink (because there is only one). + """ + return RunnerStrategy(default) + + +class DataObject: + """This type only exists so that you can write type hints for tests using + the :func:`~hypothesis.strategies.data` strategy. Do not use it directly! + """ + + # Note that "only exists" here really means "is only exported to users", + # but we want to treat it as "semi-stable", not document it as "public API". + + def __init__(self, data: ConjectureData) -> None: + self.count = 0 + self.conjecture_data = data + + __signature__ = Signature() # hide internals from Sphinx introspection + + def __repr__(self) -> str: + return "data(...)" + + def draw(self, strategy: SearchStrategy[Ex], label: Any = None) -> Ex: + """Like :obj:`~hypothesis.strategies.DrawFn`.""" + check_strategy(strategy, "strategy") + self.count += 1 + desc = f"Draw {self.count}{'' if label is None else f' ({label})'}" + with deprecate_random_in_strategy("{}from {!r}", desc, strategy): + result = self.conjecture_data.draw(strategy, observe_as=f"generate:{desc}") + + # optimization to avoid needless printer.pretty + if should_note(): + printer = RepresentationPrinter(context=current_build_context()) + printer.text(f"{desc}: ") + if self.conjecture_data.provider.avoid_realization: + printer.text("") + else: + printer.pretty(result) + note(printer.getvalue()) + return result + + +class DataStrategy(SearchStrategy): + def do_draw(self, data): + if data._shared_data_strategy is None: + data._shared_data_strategy = DataObject(data) + return data._shared_data_strategy + + def __repr__(self) -> str: + return "data()" + + def map(self, f): + self.__not_a_first_class_strategy("map") + + def filter(self, condition: Callable[[Ex], Any]) -> NoReturn: + self.__not_a_first_class_strategy("filter") + + def flatmap(self, f): + self.__not_a_first_class_strategy("flatmap") + + def example(self) -> NoReturn: + self.__not_a_first_class_strategy("example") + + def __not_a_first_class_strategy(self, name: str) -> NoReturn: + raise InvalidArgument( + f"Cannot call {name} on a DataStrategy. You should probably " + "be using @composite for whatever it is you're trying to do." + ) + + +@cacheable +@defines_strategy(eager=True) +def data() -> SearchStrategy[DataObject]: + """ + Provides an object ``data`` with a ``data.draw`` function which acts like + the ``draw`` callable provided by |st.composite|, in that it can be used + to dynamically draw values from strategies. |st.data| is more powerful + than |st.composite|, because it allows you to mix generation and test code. + + Here's an example of dynamically generating values using |st.data|: + + .. code-block:: python + + from hypothesis import strategies as st, given + + @given(st.data()) + def test_values(data): + n1 = data.draw(st.integers()) + n2 = data.draw(st.integers(min_value=n1)) + assert n1 + 1 <= n2 + + If the test fails, each draw will be printed with the falsifying example. + e.g. the above is wrong (it has a boundary condition error), so will print: + + .. code-block:: pycon + + Falsifying example: test_values(data=data(...)) + Draw 1: 0 + Draw 2: 0 + + Optionally, you can provide a label to identify values generated by each call + to ``data.draw()``. These labels can be used to identify values in the + output of a falsifying example. + + For instance: + + .. code-block:: python + + @given(st.data()) + def test_draw_sequentially(data): + x = data.draw(st.integers(), label="First number") + y = data.draw(st.integers(min_value=x), label="Second number") + assert x < y + + will produce: + + .. code-block:: pycon + + Falsifying example: test_draw_sequentially(data=data(...)) + Draw 1 (First number): 0 + Draw 2 (Second number): 0 + + Examples from this strategy shrink by shrinking the output of each draw call. + """ + return DataStrategy() + + +if sys.version_info < (3, 12): + # TypeAliasType is new in 3.12 + RegisterTypeT: TypeAlias = type[Ex] +else: # pragma: no cover # covered by test_mypy.py + from typing import TypeAliasType + + # see https://github.com/HypothesisWorks/hypothesis/issues/4410 + RegisterTypeT: TypeAlias = type[Ex] | TypeAliasType + + +def register_type_strategy( + custom_type: RegisterTypeT, + strategy: SearchStrategy[Ex] | Callable[[type[Ex]], SearchStrategy[Ex]], +) -> None: + """Add an entry to the global type-to-strategy lookup. + + This lookup is used in :func:`~hypothesis.strategies.builds` and + |@given|. + + :func:`~hypothesis.strategies.builds` will be used automatically for + classes with type annotations on ``__init__`` , so you only need to + register a strategy if one or more arguments need to be more tightly + defined than their type-based default, or if you want to supply a strategy + for an argument with a default value. + + ``strategy`` may be a search strategy, or a function that takes a type and + returns a strategy (useful for generic types). The function may return + :data:`NotImplemented` to conditionally not provide a strategy for the type + (the type will still be resolved by other methods, if possible, as if the + function was not registered). + + Note that you may not register a parametrised generic type (such as + ``MyCollection[int]``) directly, because the resolution logic does not + handle this case correctly. Instead, you may register a *function* for + ``MyCollection`` and `inspect the type parameters within that function + `__. + """ + # TODO: We would like to move this to the top level, but pending some major + # refactoring it's hard to do without creating circular imports. + from hypothesis.strategies._internal import types + + if not types.is_a_type(custom_type): + raise InvalidArgument(f"{custom_type=} must be a type") + if custom_type in types.NON_RUNTIME_TYPES: + raise InvalidArgument( + f"{custom_type=} is not allowed to be registered, " + f"because there is no such thing as a runtime instance of {custom_type!r}" + ) + if not (isinstance(strategy, SearchStrategy) or callable(strategy)): + raise InvalidArgument( + f"{strategy=} must be a SearchStrategy, or a function that takes " + "a generic type and returns a specific SearchStrategy" + ) + if isinstance(strategy, SearchStrategy): + with warnings.catch_warnings(): + warnings.simplefilter("error", HypothesisSideeffectWarning) + + # Calling is_empty forces materialization of lazy strategies. If this is done at import + # time, lazy strategies will warn about it; here, we force that warning to raise to + # avoid the materialization. Ideally, we'd just check if the strategy is lazy, but the + # lazy strategy may be wrapped underneath another strategy so that's complicated. + try: + if strategy.is_empty: + raise InvalidArgument(f"{strategy=} must not be empty") + except HypothesisSideeffectWarning: # pragma: no cover + pass + if types.has_type_arguments(custom_type): + raise InvalidArgument( + f"Cannot register generic type {custom_type!r}, because it has type " + "arguments which would not be handled. Instead, register a function " + f"for {get_origin(custom_type)!r} which can inspect specific type " + "objects and return a strategy." + ) + if ( + "pydantic.generics" in sys.modules + and issubclass(custom_type, sys.modules["pydantic.generics"].GenericModel) + and not re.search(r"[A-Za-z_]+\[.+\]", repr(custom_type)) + and callable(strategy) + ): # pragma: no cover + # See https://github.com/HypothesisWorks/hypothesis/issues/2940 + raise InvalidArgument( + f"Cannot register a function for {custom_type!r}, because parametrized " + "`pydantic.generics.GenericModel` subclasses aren't actually generic " + "types at runtime. In this case, you should register a strategy " + "directly for each parametrized form that you anticipate using." + ) + + types._global_type_lookup[custom_type] = strategy + from_type.__clear_cache() # type: ignore + + +@cacheable +@defines_strategy(eager=True) +def deferred(definition: Callable[[], SearchStrategy[Ex]]) -> SearchStrategy[Ex]: + """A deferred strategy allows you to write a strategy that references other + strategies that have not yet been defined. This allows for the easy + definition of recursive and mutually recursive strategies. + + The definition argument should be a zero-argument function that returns a + strategy. It will be evaluated the first time the strategy is used to + produce an example. + + Example usage: + + >>> import hypothesis.strategies as st + >>> x = st.deferred(lambda: st.booleans() | st.tuples(x, x)) + >>> x.example() + (((False, (True, True)), (False, True)), (True, True)) + >>> x.example() + True + + Mutual recursion also works fine: + + >>> a = st.deferred(lambda: st.booleans() | b) + >>> b = st.deferred(lambda: st.tuples(a, a)) + >>> a.example() + True + >>> b.example() + (False, (False, ((False, True), False))) + + Examples from this strategy shrink as they normally would from the strategy + returned by the definition. + """ + return DeferredStrategy(definition) + + +def domains() -> SearchStrategy[str]: + import hypothesis.provisional + + return hypothesis.provisional.domains() + + +@defines_strategy(force_reusable_values=True) +def emails( + *, domains: SearchStrategy[str] = LazyStrategy(domains, (), {}) +) -> SearchStrategy[str]: + """A strategy for generating email addresses as unicode strings. The + address format is specified in :rfc:`5322#section-3.4.1`. Values shrink + towards shorter local-parts and host domains. + + If ``domains`` is given then it must be a strategy that generates domain + names for the emails, defaulting to :func:`~hypothesis.provisional.domains`. + + This strategy is useful for generating "user data" for tests, as + mishandling of email addresses is a common source of bugs. + """ + local_chars = string.ascii_letters + string.digits + "!#$%&'*+-/=^_`{|}~" + local_part = text(local_chars, min_size=1, max_size=64) + # TODO: include dot-atoms, quoted strings, escaped chars, etc in local part + return builds("{}@{}".format, local_part, domains).filter( + lambda addr: len(addr) <= 254 + ) + + +def _functions(*, like, returns, pure): + # Wrapped up to use ParamSpec below + check_type(bool, pure, "pure") + if not callable(like): + raise InvalidArgument( + "The first argument to functions() must be a callable to imitate, " + f"but got non-callable like={nicerepr(like)!r}" + ) + if returns in (None, ...): + # Passing `None` has never been *documented* as working, but it still + # did from May 2020 to Jan 2022 so we'll avoid breaking it without cause. + hints = get_type_hints(like) + returns = from_type(hints.get("return", type(None))) + check_strategy(returns, "returns") + return FunctionStrategy(like, returns, pure) + + +if typing.TYPE_CHECKING or ParamSpec is not None: + + @overload + def functions( + *, pure: bool = ... + ) -> SearchStrategy[Callable[[], None]]: # pragma: no cover + ... + + @overload + def functions( + *, + like: Callable[P, T], + pure: bool = ..., + ) -> SearchStrategy[Callable[P, T]]: # pragma: no cover + ... + + @overload + def functions( + *, + returns: SearchStrategy[T], + pure: bool = ..., + ) -> SearchStrategy[Callable[[], T]]: # pragma: no cover + ... + + @overload + def functions( + *, + like: Callable[P, Any], + returns: SearchStrategy[T], + pure: bool = ..., + ) -> SearchStrategy[Callable[P, T]]: # pragma: no cover + ... + + @defines_strategy() + def functions(*, like=lambda: None, returns=..., pure=False): + # We shouldn't need overloads here, but mypy disallows default args for + # generics: https://github.com/python/mypy/issues/3737 + """functions(*, like=lambda: None, returns=..., pure=False) + + A strategy for functions, which can be used in callbacks. + + The generated functions will mimic the interface of ``like``, which must + be a callable (including a class, method, or function). The return value + for the function is drawn from the ``returns`` argument, which must be a + strategy. If ``returns`` is not passed, we attempt to infer a strategy + from the return-type annotation if present, falling back to :func:`~none`. + + If ``pure=True``, all arguments passed to the generated function must be + hashable, and if passed identical arguments the original return value will + be returned again - *not* regenerated, so beware mutable values. + + If ``pure=False``, generated functions do not validate their arguments, and + may return a different value if called again with the same arguments. + + Generated functions can only be called within the scope of the ``@given`` + which created them. + """ + return _functions(like=like, returns=returns, pure=pure) + +else: # pragma: no cover + + @defines_strategy() + def functions( + *, + like: Callable[..., Any] = lambda: None, + returns: SearchStrategy[Any] | EllipsisType = ..., + pure: bool = False, + ) -> SearchStrategy[Callable[..., Any]]: + """functions(*, like=lambda: None, returns=..., pure=False) + + A strategy for functions, which can be used in callbacks. + + The generated functions will mimic the interface of ``like``, which must + be a callable (including a class, method, or function). The return value + for the function is drawn from the ``returns`` argument, which must be a + strategy. If ``returns`` is not passed, we attempt to infer a strategy + from the return-type annotation if present, falling back to :func:`~none`. + + If ``pure=True``, all arguments passed to the generated function must be + hashable, and if passed identical arguments the original return value will + be returned again - *not* regenerated, so beware mutable values. + + If ``pure=False``, generated functions do not validate their arguments, and + may return a different value if called again with the same arguments. + + Generated functions can only be called within the scope of the ``@given`` + which created them. + """ + return _functions(like=like, returns=returns, pure=pure) + + +@composite +def slices(draw: Any, size: int) -> slice: + """Generates slices that will select indices up to the supplied size + + Generated slices will have start and stop indices that range from -size to size - 1 + and will step in the appropriate direction. Slices should only produce an empty selection + if the start and end are the same. + + Examples from this strategy shrink toward 0 and smaller values + """ + check_valid_size(size, "size") + if size == 0: + step = draw(none() | integers().filter(bool)) + return slice(None, None, step) + # For slices start is inclusive and stop is exclusive + start = draw(integers(0, size - 1) | none()) + stop = draw(integers(0, size) | none()) + + # Limit step size to be reasonable + if start is None and stop is None: + max_step = size + elif start is None: + max_step = stop + elif stop is None: + max_step = start + else: + max_step = abs(start - stop) + + step = draw(integers(1, max_step or 1)) + + if (draw(booleans()) and start == stop) or (stop or 0) < (start or 0): + step *= -1 + + if draw(booleans()) and start is not None: + start -= size + if draw(booleans()) and stop is not None: + stop -= size + if (not draw(booleans())) and step == 1: + step = None + + return slice(start, stop, step) diff --git a/vendored/hypothesis/strategies/_internal/datetime.py b/vendored/hypothesis/strategies/_internal/datetime.py new file mode 100644 index 0000000..62d97b9 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/datetime.py @@ -0,0 +1,474 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import datetime as dt +import operator as op +import warnings +import zoneinfo +from calendar import monthrange +from functools import cache, partial +from importlib import resources +from pathlib import Path + +from hypothesis.errors import InvalidArgument +from hypothesis.internal.validation import check_type, check_valid_interval +from hypothesis.strategies._internal.core import sampled_from +from hypothesis.strategies._internal.misc import just, none, nothing +from hypothesis.strategies._internal.strategies import SearchStrategy +from hypothesis.strategies._internal.utils import defines_strategy + +DATENAMES = ("year", "month", "day") +TIMENAMES = ("hour", "minute", "second", "microsecond") + + +def is_pytz_timezone(tz): + if not isinstance(tz, dt.tzinfo): + return False + module = type(tz).__module__ + return module == "pytz" or module.startswith("pytz.") + + +def replace_tzinfo(value, timezone): + if is_pytz_timezone(timezone): + # Pytz timezones are a little complicated, and using the .replace method + # can cause some weird issues, so we use their special "localize" instead. + # + # We use the fold attribute as a convenient boolean for is_dst, even though + # they're semantically distinct. For ambiguous or imaginary hours, fold says + # whether you should use the offset that applies before the gap (fold=0) or + # the offset that applies after the gap (fold=1). is_dst says whether you + # should choose the side that is "DST" or "STD" (STD->STD or DST->DST + # transitions are unclear as you might expect). + # + # WARNING: this is INCORRECT for timezones with negative DST offsets such as + # "Europe/Dublin", but it's unclear what we could do instead beyond + # documenting the problem and recommending use of `dateutil` instead. + return timezone.localize(value, is_dst=not value.fold) + return value.replace(tzinfo=timezone) + + +def datetime_does_not_exist(value): + """This function tests whether the given datetime can be round-tripped to and + from UTC. It is an exact inverse of (and very similar to) the dateutil method + https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.datetime_exists + """ + # Naive datetimes cannot be imaginary, but we need this special case because + # chaining .astimezone() ends with *the system local timezone*, not None. + # See bug report in https://github.com/HypothesisWorks/hypothesis/issues/2662 + if value.tzinfo is None: + return False + try: + # Does the naive portion of the datetime change when round-tripped to + # UTC? If so, or if this overflows, we say that it does not exist. + roundtrip = value.astimezone(dt.timezone.utc).astimezone(value.tzinfo) + except OverflowError: + # Overflows at datetime.min or datetime.max boundary condition. + # Rejecting these is acceptable, because timezones are close to + # meaningless before ~1900 and subject to a lot of change by + # 9999, so it should be a very small fraction of possible values. + return True + + if ( + value.tzinfo is not roundtrip.tzinfo + and value.utcoffset() != roundtrip.utcoffset() + ): + # This only ever occurs during imaginary (i.e. nonexistent) datetimes, + # and only for pytz timezones which do not follow PEP-495 semantics. + # (may exclude a few other edge cases, but you should use zoneinfo anyway) + return True + + assert value.tzinfo is roundtrip.tzinfo, "so only the naive portions are compared" + return value != roundtrip + + +def draw_capped_multipart( + data, min_value, max_value, duration_names=DATENAMES + TIMENAMES +): + assert isinstance(min_value, (dt.date, dt.time, dt.datetime)) + assert type(min_value) == type(max_value) + assert min_value <= max_value + result = {} + cap_low, cap_high = True, True + for name in duration_names: + low = getattr(min_value if cap_low else dt.datetime.min, name) + high = getattr(max_value if cap_high else dt.datetime.max, name) + if name == "day" and not cap_high: + _, high = monthrange(**result) + if name == "year": + val = data.draw_integer(low, high, shrink_towards=2000) + else: + val = data.draw_integer(low, high) + result[name] = val + cap_low = cap_low and val == low + cap_high = cap_high and val == high + if hasattr(min_value, "fold"): + # The `fold` attribute is ignored in comparison of naive datetimes. + # In tz-aware datetimes it would require *very* invasive changes to + # the logic above, and be very sensitive to the specific timezone + # (at the cost of efficient shrinking and mutation), so at least for + # now we stick with the status quo and generate it independently. + result["fold"] = data.draw_integer(0, 1) + return result + + +class DatetimeStrategy(SearchStrategy): + def __init__(self, min_value, max_value, timezones_strat, allow_imaginary): + super().__init__() + assert isinstance(min_value, dt.datetime) + assert isinstance(max_value, dt.datetime) + assert min_value.tzinfo is None + assert max_value.tzinfo is None + assert min_value <= max_value + assert isinstance(timezones_strat, SearchStrategy) + assert isinstance(allow_imaginary, bool) + self.min_value = min_value + self.max_value = max_value + self.tz_strat = timezones_strat + self.allow_imaginary = allow_imaginary + + def do_draw(self, data): + # We start by drawing a timezone, and an initial datetime. + tz = data.draw(self.tz_strat) + result = self.draw_naive_datetime_and_combine(data, tz) + + # TODO: with some probability, systematically search for one of + # - an imaginary time (if allowed), + # - a time within 24hrs of a leap second (if there any are within bounds), + # - other subtle, little-known, or nasty issues as described in + # https://github.com/HypothesisWorks/hypothesis/issues/69 + + # If we happened to end up with a disallowed imaginary time, reject it. + if (not self.allow_imaginary) and datetime_does_not_exist(result): + data.mark_invalid(f"{result} does not exist (usually a DST transition)") + return result + + def draw_naive_datetime_and_combine(self, data, tz): + result = draw_capped_multipart(data, self.min_value, self.max_value) + try: + return replace_tzinfo(dt.datetime(**result), timezone=tz) + except (ValueError, OverflowError): + data.mark_invalid( + f"Failed to draw a datetime between {self.min_value!r} and " + f"{self.max_value!r} with timezone from {self.tz_strat!r}." + ) + + +@defines_strategy(force_reusable_values=True) +def datetimes( + min_value: dt.datetime = dt.datetime.min, + max_value: dt.datetime = dt.datetime.max, + *, + timezones: SearchStrategy[dt.tzinfo | None] = none(), + allow_imaginary: bool = True, +) -> SearchStrategy[dt.datetime]: + """datetimes(min_value=datetime.datetime.min, max_value=datetime.datetime.max, *, timezones=none(), allow_imaginary=True) + + A strategy for generating datetimes, which may be timezone-aware. + + This strategy works by drawing a naive datetime between ``min_value`` + and ``max_value``, which must both be naive (have no timezone). + + ``timezones`` must be a strategy that generates either ``None``, for naive + datetimes, or :class:`~python:datetime.tzinfo` objects for 'aware' datetimes. + You can construct your own, though we recommend using one of these built-in + strategies: + + * with the standard library: :func:`hypothesis.strategies.timezones`; + * with :pypi:`dateutil `: + :func:`hypothesis.extra.dateutil.timezones`; or + * with :pypi:`pytz`: :func:`hypothesis.extra.pytz.timezones`. + + You may pass ``allow_imaginary=False`` to filter out "imaginary" datetimes + which did not (or will not) occur due to daylight savings, leap seconds, + timezone and calendar adjustments, etc. Imaginary datetimes are allowed + by default, because malformed timestamps are a common source of bugs. + + Examples from this strategy shrink towards midnight on January 1st 2000, + local time. + """ + # Why must bounds be naive? In principle, we could also write a strategy + # that took aware bounds, but the API and validation is much harder. + # If you want to generate datetimes between two particular moments in + # time I suggest (a) just filtering out-of-bounds values; (b) if bounds + # are very close, draw a value and subtract its UTC offset, handling + # overflows and nonexistent times; or (c) do something customised to + # handle datetimes in e.g. a four-microsecond span which is not + # representable in UTC. Handling (d), all of the above, leads to a much + # more complex API for all users and a useful feature for very few. + check_type(bool, allow_imaginary, "allow_imaginary") + check_type(dt.datetime, min_value, "min_value") + check_type(dt.datetime, max_value, "max_value") + if min_value.tzinfo is not None: + raise InvalidArgument(f"{min_value=} must not have tzinfo") + if max_value.tzinfo is not None: + raise InvalidArgument(f"{max_value=} must not have tzinfo") + check_valid_interval(min_value, max_value, "min_value", "max_value") + if not isinstance(timezones, SearchStrategy): + raise InvalidArgument( + f"{timezones=} must be a SearchStrategy that can " + "provide tzinfo for datetimes (either None or dt.tzinfo objects)" + ) + return DatetimeStrategy(min_value, max_value, timezones, allow_imaginary) + + +class TimeStrategy(SearchStrategy): + def __init__(self, min_value, max_value, timezones_strat): + super().__init__() + self.min_value = min_value + self.max_value = max_value + self.tz_strat = timezones_strat + + def do_draw(self, data): + result = draw_capped_multipart(data, self.min_value, self.max_value, TIMENAMES) + tz = data.draw(self.tz_strat) + return dt.time(**result, tzinfo=tz) + + +@defines_strategy(force_reusable_values=True) +def times( + min_value: dt.time = dt.time.min, + max_value: dt.time = dt.time.max, + *, + timezones: SearchStrategy[dt.tzinfo | None] = none(), +) -> SearchStrategy[dt.time]: + """times(min_value=datetime.time.min, max_value=datetime.time.max, *, timezones=none()) + + A strategy for times between ``min_value`` and ``max_value``. + + The ``timezones`` argument is handled as for :py:func:`datetimes`. + + Examples from this strategy shrink towards midnight, with the timezone + component shrinking as for the strategy that provided it. + """ + check_type(dt.time, min_value, "min_value") + check_type(dt.time, max_value, "max_value") + if min_value.tzinfo is not None: + raise InvalidArgument(f"{min_value=} must not have tzinfo") + if max_value.tzinfo is not None: + raise InvalidArgument(f"{max_value=} must not have tzinfo") + check_valid_interval(min_value, max_value, "min_value", "max_value") + return TimeStrategy(min_value, max_value, timezones) + + +class DateStrategy(SearchStrategy): + def __init__(self, min_value, max_value): + super().__init__() + assert isinstance(min_value, dt.date) + assert isinstance(max_value, dt.date) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def do_draw(self, data): + return dt.date( + **draw_capped_multipart(data, self.min_value, self.max_value, DATENAMES) + ) + + def filter(self, condition): + if ( + isinstance(condition, partial) + and len(args := condition.args) == 1 + and not condition.keywords + and isinstance(arg := args[0], dt.date) + and condition.func in (op.lt, op.le, op.eq, op.ge, op.gt) + ): + try: + arg += dt.timedelta(days={op.lt: 1, op.gt: -1}.get(condition.func, 0)) + except OverflowError: # gt date.max, or lt date.min + return nothing() + lo, hi = { + # We're talking about op(arg, x) - the reverse of our usual intuition! + op.lt: (arg, self.max_value), # lambda x: arg < x + op.le: (arg, self.max_value), # lambda x: arg <= x + op.eq: (arg, arg), # lambda x: arg == x + op.ge: (self.min_value, arg), # lambda x: arg >= x + op.gt: (self.min_value, arg), # lambda x: arg > x + }[condition.func] + lo = max(lo, self.min_value) + hi = min(hi, self.max_value) + print(lo, hi) + if hi < lo: + return nothing() + if lo <= self.min_value and self.max_value <= hi: + return self + return dates(lo, hi) + + return super().filter(condition) + + +@defines_strategy(force_reusable_values=True) +def dates( + min_value: dt.date = dt.date.min, max_value: dt.date = dt.date.max +) -> SearchStrategy[dt.date]: + """dates(min_value=datetime.date.min, max_value=datetime.date.max) + + A strategy for dates between ``min_value`` and ``max_value``. + + Examples from this strategy shrink towards January 1st 2000. + """ + check_type(dt.date, min_value, "min_value") + check_type(dt.date, max_value, "max_value") + check_valid_interval(min_value, max_value, "min_value", "max_value") + if min_value == max_value: + return just(min_value) + return DateStrategy(min_value, max_value) + + +class TimedeltaStrategy(SearchStrategy): + def __init__(self, min_value, max_value): + super().__init__() + assert isinstance(min_value, dt.timedelta) + assert isinstance(max_value, dt.timedelta) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def do_draw(self, data): + result = {} + low_bound = True + high_bound = True + for name in ("days", "seconds", "microseconds"): + low = getattr(self.min_value if low_bound else dt.timedelta.min, name) + high = getattr(self.max_value if high_bound else dt.timedelta.max, name) + val = data.draw_integer(low, high) + result[name] = val + low_bound = low_bound and val == low + high_bound = high_bound and val == high + return dt.timedelta(**result) + + +@defines_strategy(force_reusable_values=True) +def timedeltas( + min_value: dt.timedelta = dt.timedelta.min, + max_value: dt.timedelta = dt.timedelta.max, +) -> SearchStrategy[dt.timedelta]: + """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max) + + A strategy for timedeltas between ``min_value`` and ``max_value``. + + Examples from this strategy shrink towards zero. + """ + check_type(dt.timedelta, min_value, "min_value") + check_type(dt.timedelta, max_value, "max_value") + check_valid_interval(min_value, max_value, "min_value", "max_value") + if min_value == max_value: + return just(min_value) + return TimedeltaStrategy(min_value=min_value, max_value=max_value) + + +@cache +def _valid_key_cacheable(tzpath, key): + assert isinstance(tzpath, tuple) # zoneinfo changed, better update this function! + for root in tzpath: + if Path(root).joinpath(key).exists(): # pragma: no branch + # No branch because most systems only have one TZPATH component. + return True + else: # pragma: no cover + # This branch is only taken for names which are known to zoneinfo + # but not present on the filesystem, i.e. on Windows with tzdata, + # and so is never executed by our coverage tests. + *package_loc, resource_name = key.split("/") + package = "tzdata.zoneinfo." + ".".join(package_loc) + try: + return (resources.files(package) / resource_name).exists() + except ModuleNotFoundError: + return False + + +@defines_strategy(force_reusable_values=True) +def timezone_keys( + *, + # allow_alias: bool = True, + # allow_deprecated: bool = True, + allow_prefix: bool = True, +) -> SearchStrategy[str]: + """A strategy for :wikipedia:`IANA timezone names `. + + As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or + ``"America/New_York"``, this strategy can generate: + + - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``. + - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to + ``"Pacific/Auckland"``. Note that most but + not all deprecated timezone names are also aliases. + - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless + ``allow_prefix=False``. + + These strings are provided separately from Tzinfo objects - such as ZoneInfo + instances from the timezones() strategy - to facilitate testing of timezone + logic without needing workarounds to access non-canonical names. + + .. note:: + + `The tzdata package is required on Windows + `__. + ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed. + + On Windows, you may need to access IANA timezone data via the :pypi:`tzdata` + package. For non-IANA timezones, such as Windows-native names or GNU TZ + strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with + the :pypi:`dateutil ` package, e.g. + :meth:`dateutil:dateutil.tz.tzwin.list`. + """ + # check_type(bool, allow_alias, "allow_alias") + # check_type(bool, allow_deprecated, "allow_deprecated") + check_type(bool, allow_prefix, "allow_prefix") + + with warnings.catch_warnings(): + try: + warnings.simplefilter("ignore", EncodingWarning) + except NameError: # pragma: no cover + pass + # On Python 3.12 (and others?), `available_timezones()` opens files + # without specifying an encoding - which our selftests make an error. + available_timezones = ("UTC", *sorted(zoneinfo.available_timezones())) + + # TODO: filter out alias and deprecated names if disallowed + + # When prefixes are allowed, we first choose a key and then flatmap to get our + # choice with one of the available prefixes. That in turn means that we need + # some logic to determine which prefixes are available for a given key: + + def valid_key(key): + return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key) + + # TODO: work out how to place a higher priority on "weird" timezones + # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414 + strategy = sampled_from([key for key in available_timezones if valid_key(key)]) + + if not allow_prefix: + return strategy + + def sample_with_prefixes(zone): + keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}") + return sampled_from([key for key in keys_with_prefixes if valid_key(key)]) + + return strategy.flatmap(sample_with_prefixes) + + +@defines_strategy(force_reusable_values=True) +def timezones(*, no_cache: bool = False) -> SearchStrategy["zoneinfo.ZoneInfo"]: + """A strategy for :class:`python:zoneinfo.ZoneInfo` objects. + + If ``no_cache=True``, the generated instances are constructed using + :meth:`ZoneInfo.no_cache ` instead + of the usual constructor. This may change the semantics of your datetimes + in surprising ways, so only use it if you know that you need to! + + .. note:: + + `The tzdata package is required on Windows + `__. + ``pip install hypothesis[zoneinfo]`` installs it, if and only if needed. + """ + check_type(bool, no_cache, "no_cache") + return timezone_keys().map( + zoneinfo.ZoneInfo.no_cache if no_cache else zoneinfo.ZoneInfo + ) diff --git a/vendored/hypothesis/strategies/_internal/deferred.py b/vendored/hypothesis/strategies/_internal/deferred.py new file mode 100644 index 0000000..1688cf2 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/deferred.py @@ -0,0 +1,93 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import inspect +from collections.abc import Callable, Sequence + +from hypothesis.configuration import check_sideeffect_during_initialization +from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.strategies._internal.strategies import ( + Ex, + RecurT, + SearchStrategy, + check_strategy, +) + + +class DeferredStrategy(SearchStrategy[Ex]): + """A strategy which may be used before it is fully defined.""" + + def __init__(self, definition: Callable[[], SearchStrategy[Ex]]): + super().__init__() + self.__wrapped_strategy: SearchStrategy[Ex] | None = None + self.__in_repr: bool = False + self.__definition: Callable[[], SearchStrategy[Ex]] | None = definition + + @property + def wrapped_strategy(self) -> SearchStrategy[Ex]: + # we assign this before entering the condition to avoid a race condition + # under threading. See issue #4523. + definition = self.__definition + if self.__wrapped_strategy is None: + check_sideeffect_during_initialization("deferred evaluation of {!r}", self) + + if not inspect.isfunction(definition): + raise InvalidArgument( + f"Expected definition to be a function but got {definition!r} " + f"of type {type(definition).__name__} instead." + ) + result = definition() + if result is self: + raise InvalidArgument("Cannot define a deferred strategy to be itself") + check_strategy(result, "definition()") + self.__wrapped_strategy = result + self.__definition = None + return self.__wrapped_strategy + + @property + def branches(self) -> Sequence[SearchStrategy[Ex]]: + return self.wrapped_strategy.branches + + def calc_label(self) -> int: + """Deferred strategies don't have a calculated label, because we would + end up having to calculate the fixed point of some hash function in + order to calculate it when they recursively refer to themself! + + The label for the wrapped strategy will still appear because it + will be passed to draw. + """ + # This is actually the same as the parent class implementation, but we + # include it explicitly here in order to document that this is a + # deliberate decision. + return self.class_label + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.wrapped_strategy) + + def calc_has_reusable_values(self, recur: RecurT) -> bool: + return recur(self.wrapped_strategy) + + def __repr__(self) -> str: + if self.__wrapped_strategy is not None: + if self.__in_repr: + return f"(deferred@{id(self)!r})" + try: + self.__in_repr = True + return repr(self.__wrapped_strategy) + finally: + self.__in_repr = False + else: + description = get_pretty_function_description(self.__definition) + return f"deferred({description})" + + def do_draw(self, data: ConjectureData) -> Ex: + return data.draw(self.wrapped_strategy) diff --git a/vendored/hypothesis/strategies/_internal/featureflags.py b/vendored/hypothesis/strategies/_internal/featureflags.py new file mode 100644 index 0000000..04ab132 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/featureflags.py @@ -0,0 +1,132 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections.abc import Hashable, Iterable, Sequence +from typing import Any + +from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.strategies._internal.strategies import SearchStrategy + +FEATURE_LABEL = cu.calc_label_from_name("feature flag") + + +class FeatureFlags: + """Object that can be used to control a number of feature flags for a + given test run. + + This enables an approach to data generation called swarm testing ( + see Groce, Alex, et al. "Swarm testing." Proceedings of the 2012 + International Symposium on Software Testing and Analysis. ACM, 2012), in + which generation is biased by selectively turning some features off for + each test case generated. When there are many interacting features this can + find bugs that a pure generation strategy would otherwise have missed. + + FeatureFlags are designed to "shrink open", so that during shrinking they + become less restrictive. This allows us to potentially shrink to smaller + test cases that were forbidden during the generation phase because they + required disabled features. + """ + + def __init__( + self, + data: ConjectureData | None = None, + enabled: Sequence[Any] = (), + disabled: Sequence[Any] = (), + at_least_one_of: Iterable[Hashable] = (), + ): + self.__data = data + self.__is_disabled = {} + + for f in enabled: + self.__is_disabled[f] = False + + for f in disabled: + self.__is_disabled[f] = True + + # In the original swarm testing paper they turn features on or off + # uniformly at random. Instead we decide the probability with which to + # enable features up front. This can allow for scenarios where all or + # no features are enabled, which are vanishingly unlikely in the + # original model. + # + # We implement this as a single 8-bit integer and enable features which + # score >= that value. In particular when self.__baseline is 0, all + # features will be enabled. This is so that we shrink in the direction + # of more features being enabled. + if self.__data is not None: + self.__p_disabled = self.__data.draw_integer(0, 254) / 255 + else: + # If data is None we're in example mode so all that matters is the + # enabled/disabled lists above. We set this up so that everything + # else is enabled by default. + self.__p_disabled = 0.0 + + # The naive approach can lead to disabling e.g. every single rule on a + # RuleBasedStateMachine, which aborts the test as unable to make progress. + # Track the set of possible names, and ensure that at least one is enabled. + self.__at_least_one_of = set(at_least_one_of) + + def is_enabled(self, name: Any) -> bool: + """Tests whether the feature named ``name`` should be enabled on this + test run.""" + if self.__data is None or self.__data.frozen: + # Feature set objects might hang around after data generation has + # finished. If this happens then we just report all new features as + # enabled, because that's our shrinking direction and they have no + # impact on data generation if they weren't used while it was + # running. + return not self.__is_disabled.get(name, False) + + data = self.__data + + data.start_span(label=FEATURE_LABEL) + + # If we've already decided on this feature then we don't actually + # need to draw anything, but we do write the same decision to the + # input stream. This allows us to lazily decide whether a feature + # is enabled, because it means that if we happen to delete the part + # of the test case where we originally decided, the next point at + # which we make this decision just makes the decision it previously + # made. + oneof = self.__at_least_one_of + is_disabled = self.__data.draw_boolean( + self.__p_disabled, + forced=( + False + if len(oneof) == 1 and name in oneof + else self.__is_disabled.get(name) + ), + ) + self.__is_disabled[name] = is_disabled + if name in oneof and not is_disabled: + oneof.clear() + oneof.discard(name) + data.stop_span() + return not is_disabled + + def __repr__(self) -> str: + enabled = [] + disabled = [] + for name, is_disabled in self.__is_disabled.items(): + if is_disabled: + disabled.append(name) + else: + enabled.append(name) + return f"FeatureFlags({enabled=}, {disabled=})" + + +class FeatureStrategy(SearchStrategy[FeatureFlags]): + def __init__(self, at_least_one_of: Iterable[Hashable] = ()): + super().__init__() + self._at_least_one_of = frozenset(at_least_one_of) + + def do_draw(self, data: ConjectureData) -> FeatureFlags: + return FeatureFlags(data, at_least_one_of=self._at_least_one_of) diff --git a/vendored/hypothesis/strategies/_internal/flatmapped.py b/vendored/hypothesis/strategies/_internal/flatmapped.py new file mode 100644 index 0000000..92c200d --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/flatmapped.py @@ -0,0 +1,68 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections.abc import Callable +from typing import Generic, TypeVar + +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.utils import ( + calc_label_from_callable, + combine_labels, +) +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.strategies._internal.strategies import ( + RecurT, + SearchStrategy, + check_strategy, +) + +MappedFrom = TypeVar("MappedFrom") +MappedTo = TypeVar("MappedTo") + + +class FlatMapStrategy(SearchStrategy[MappedTo], Generic[MappedFrom, MappedTo]): + def __init__( + self, + base: SearchStrategy[MappedFrom], + expand: Callable[[MappedFrom], SearchStrategy[MappedTo]], + ): + super().__init__() + self.base = base + self.expand = expand + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.base) + + def calc_label(self) -> int: + return combine_labels( + self.class_label, + self.base.label, + calc_label_from_callable(self.expand), + ) + + def __repr__(self) -> str: + if not hasattr(self, "_cached_repr"): + self._cached_repr = ( + f"{self.base!r}.flatmap({get_pretty_function_description(self.expand)})" + ) + return self._cached_repr + + def do_draw(self, data: ConjectureData) -> MappedTo: + base = data.draw(self.base) + expanded = self.expand(base) + check_strategy(expanded) + return data.draw(expanded) + + @property + def branches(self) -> list[SearchStrategy[MappedTo]]: + return [ + FlatMapStrategy(strategy, expand=self.expand) + for strategy in self.base.branches + ] diff --git a/vendored/hypothesis/strategies/_internal/functions.py b/vendored/hypothesis/strategies/_internal/functions.py new file mode 100644 index 0000000..ac710cf --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/functions.py @@ -0,0 +1,62 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from weakref import WeakKeyDictionary + +from hypothesis.control import note, should_note +from hypothesis.errors import InvalidState +from hypothesis.internal.reflection import ( + convert_positional_arguments, + nicerepr, + proxies, + repr_call, +) +from hypothesis.strategies._internal.strategies import RecurT, SearchStrategy + + +class FunctionStrategy(SearchStrategy): + def __init__(self, like, returns, pure): + super().__init__() + self.like = like + self.returns = returns + self.pure = pure + # Using wekrefs-to-generated-functions means that the cache can be + # garbage-collected at the end of each example, reducing memory use. + self._cache = WeakKeyDictionary() + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.returns) + + def do_draw(self, data): + @proxies(self.like) + def inner(*args, **kwargs): + if data.frozen: + raise InvalidState( + f"This generated {nicerepr(self.like)} function can only " + "be called within the scope of the @given that created it." + ) + if self.pure: + args, kwargs = convert_positional_arguments(self.like, args, kwargs) + key = (args, frozenset(kwargs.items())) + cache = self._cache.setdefault(inner, {}) + if key not in cache: + cache[key] = data.draw(self.returns) + if should_note(): # optimization to avoid needless repr_call + rep = repr_call(self.like, args, kwargs, reorder=False) + note(f"Called function: {rep} -> {cache[key]!r}") + return cache[key] + else: + val = data.draw(self.returns) + if should_note(): + rep = repr_call(self.like, args, kwargs, reorder=False) + note(f"Called function: {rep} -> {val!r}") + return val + + return inner diff --git a/vendored/hypothesis/strategies/_internal/ipaddress.py b/vendored/hypothesis/strategies/_internal/ipaddress.py new file mode 100644 index 0000000..22d2e33 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/ipaddress.py @@ -0,0 +1,118 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network, ip_network +from typing import Literal + +from hypothesis.errors import InvalidArgument +from hypothesis.internal.validation import check_type +from hypothesis.strategies._internal.core import binary, sampled_from +from hypothesis.strategies._internal.numbers import integers +from hypothesis.strategies._internal.strategies import SearchStrategy +from hypothesis.strategies._internal.utils import defines_strategy + +# See https://www.iana.org/assignments/iana-ipv4-special-registry/ +SPECIAL_IPv4_RANGES = ( + "0.0.0.0/8", + "10.0.0.0/8", + "100.64.0.0/10", + "127.0.0.0/8", + "169.254.0.0/16", + "172.16.0.0/12", + "192.0.0.0/24", + "192.0.0.0/29", + "192.0.0.8/32", + "192.0.0.9/32", + "192.0.0.10/32", + "192.0.0.170/32", + "192.0.0.171/32", + "192.0.2.0/24", + "192.31.196.0/24", + "192.52.193.0/24", + "192.88.99.0/24", + "192.168.0.0/16", + "192.175.48.0/24", + "198.18.0.0/15", + "198.51.100.0/24", + "203.0.113.0/24", + "240.0.0.0/4", + "255.255.255.255/32", +) +# and https://www.iana.org/assignments/iana-ipv6-special-registry/ +SPECIAL_IPv6_RANGES = ( + "::1/128", + "::/128", + "::ffff:0:0/96", + "64:ff9b::/96", + "64:ff9b:1::/48", + "100::/64", + "2001::/23", + "2001::/32", + "2001:1::1/128", + "2001:1::2/128", + "2001:2::/48", + "2001:3::/32", + "2001:4:112::/48", + "2001:10::/28", + "2001:20::/28", + "2001:db8::/32", + "2002::/16", + "2620:4f:8000::/48", + "fc00::/7", + "fe80::/10", +) + + +@defines_strategy(force_reusable_values=True) +def ip_addresses( + *, + v: Literal[4, 6] | None = None, + network: str | IPv4Network | IPv6Network | None = None, +) -> SearchStrategy[IPv4Address | IPv6Address]: + r"""Generate IP addresses - ``v=4`` for :class:`~python:ipaddress.IPv4Address`\ es, + ``v=6`` for :class:`~python:ipaddress.IPv6Address`\ es, or leave unspecified + to allow both versions. + + ``network`` may be an :class:`~python:ipaddress.IPv4Network` or + :class:`~python:ipaddress.IPv6Network`, or a string representing a network such as + ``"127.0.0.0/24"`` or ``"2001:db8::/32"``. As well as generating addresses within + a particular routable network, this can be used to generate addresses from a + reserved range listed in the + `IANA `__ + `registries `__. + + If you pass both ``v`` and ``network``, they must be for the same version. + """ + if v is not None: + check_type(int, v, "v") + if v not in (4, 6): + raise InvalidArgument(f"{v=}, but only v=4 or v=6 are valid") + if network is None: + # We use the reserved-address registries to boost the chance + # of generating one of the various special types of address. + four = binary(min_size=4, max_size=4).map(IPv4Address) | sampled_from( + SPECIAL_IPv4_RANGES + ).flatmap(lambda network: ip_addresses(network=network)) + six = binary(min_size=16, max_size=16).map(IPv6Address) | sampled_from( + SPECIAL_IPv6_RANGES + ).flatmap(lambda network: ip_addresses(network=network)) + if v == 4: + return four + if v == 6: + return six + return four | six + if isinstance(network, str): + network = ip_network(network) + check_type((IPv4Network, IPv6Network), network, "network") + assert isinstance(network, (IPv4Network, IPv6Network)) # for Mypy + if v not in (None, network.version): + raise InvalidArgument(f"{v=} is incompatible with {network=}") + addr_type = IPv4Address if network.version == 4 else IPv6Address + return integers(int(network[0]), int(network[-1])).map(addr_type) diff --git a/vendored/hypothesis/strategies/_internal/lazy.py b/vendored/hypothesis/strategies/_internal/lazy.py new file mode 100644 index 0000000..2cbdeb9 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/lazy.py @@ -0,0 +1,176 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections.abc import Callable, Sequence +from inspect import signature +from typing import Any +from weakref import WeakKeyDictionary + +from hypothesis.configuration import check_sideeffect_during_initialization +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.reflection import ( + convert_keyword_arguments, + convert_positional_arguments, + get_pretty_function_description, + repr_call, +) +from hypothesis.strategies._internal.deferred import DeferredStrategy +from hypothesis.strategies._internal.strategies import Ex, RecurT, SearchStrategy +from hypothesis.utils.threading import ThreadLocal + +threadlocal = ThreadLocal(unwrap_depth=int, unwrap_cache=WeakKeyDictionary) + + +def unwrap_strategies(s): + # optimization + if not isinstance(s, (LazyStrategy, DeferredStrategy)): + return s + + try: + return threadlocal.unwrap_cache[s] + except KeyError: + pass + + threadlocal.unwrap_cache[s] = s + threadlocal.unwrap_depth += 1 + + try: + result = unwrap_strategies(s.wrapped_strategy) + threadlocal.unwrap_cache[s] = result + + try: + assert result.force_has_reusable_values == s.force_has_reusable_values + except AttributeError: + pass + + try: + result.force_has_reusable_values = s.force_has_reusable_values + except AttributeError: + pass + + return result + finally: + threadlocal.unwrap_depth -= 1 + if threadlocal.unwrap_depth <= 0: + threadlocal.unwrap_cache.clear() + assert threadlocal.unwrap_depth >= 0 + + +class LazyStrategy(SearchStrategy[Ex]): + """A strategy which is defined purely by conversion to and from another + strategy. + + Its parameter and distribution come from that other strategy. + """ + + def __init__( + self, + function: Callable[..., SearchStrategy[Ex]], + args: Sequence[object], + kwargs: dict[str, object], + *, + transforms: tuple[tuple[str, Callable[..., Any]], ...] = (), + force_repr: str | None = None, + ): + super().__init__() + self.__wrapped_strategy: SearchStrategy[Ex] | None = None + self.__representation: str | None = force_repr + self.function = function + self.__args = args + self.__kwargs = kwargs + self._transformations = transforms + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.wrapped_strategy) + + def calc_has_reusable_values(self, recur: RecurT) -> bool: + return recur(self.wrapped_strategy) + + def calc_is_cacheable(self, recur: RecurT) -> bool: + for source in (self.__args, self.__kwargs.values()): + for v in source: + if isinstance(v, SearchStrategy) and not v.is_cacheable: + return False + return True + + def calc_label(self) -> int: + return self.wrapped_strategy.label + + @property + def wrapped_strategy(self) -> SearchStrategy[Ex]: + if self.__wrapped_strategy is None: + check_sideeffect_during_initialization("lazy evaluation of {!r}", self) + + unwrapped_args = tuple(unwrap_strategies(s) for s in self.__args) + unwrapped_kwargs = { + k: unwrap_strategies(v) for k, v in self.__kwargs.items() + } + + base = self.function(*self.__args, **self.__kwargs) + if unwrapped_args == self.__args and unwrapped_kwargs == self.__kwargs: + _wrapped_strategy = base + else: + _wrapped_strategy = self.function(*unwrapped_args, **unwrapped_kwargs) + for method, fn in self._transformations: + _wrapped_strategy = getattr(_wrapped_strategy, method)(fn) + self.__wrapped_strategy = _wrapped_strategy + assert self.__wrapped_strategy is not None + return self.__wrapped_strategy + + def __with_transform(self, method, fn): + repr_ = self.__representation + if repr_: + repr_ = f"{repr_}.{method}({get_pretty_function_description(fn)})" + return LazyStrategy( + self.function, + self.__args, + self.__kwargs, + transforms=(*self._transformations, (method, fn)), + force_repr=repr_, + ) + + def map(self, pack): + return self.__with_transform("map", pack) + + def filter(self, condition): + return self.__with_transform("filter", condition) + + def do_validate(self) -> None: + w = self.wrapped_strategy + assert isinstance(w, SearchStrategy), f"{self!r} returned non-strategy {w!r}" + w.validate() + + def __repr__(self) -> str: + if self.__representation is None: + sig = signature(self.function) + pos = [p for p in sig.parameters.values() if "POSITIONAL" in p.kind.name] + if len(pos) > 1 or any(p.default is not sig.empty for p in pos): + _args, _kwargs = convert_positional_arguments( + self.function, self.__args, self.__kwargs + ) + else: + _args, _kwargs = convert_keyword_arguments( + self.function, self.__args, self.__kwargs + ) + kwargs_for_repr = { + k: v + for k, v in _kwargs.items() + if k not in sig.parameters or v is not sig.parameters[k].default + } + self.__representation = repr_call( + self.function, _args, kwargs_for_repr, reorder=False + ) + "".join( + f".{method}({get_pretty_function_description(fn)})" + for method, fn in self._transformations + ) + return self.__representation + + def do_draw(self, data: ConjectureData) -> Ex: + return data.draw(self.wrapped_strategy) diff --git a/vendored/hypothesis/strategies/_internal/misc.py b/vendored/hypothesis/strategies/_internal/misc.py new file mode 100644 index 0000000..cbcfa32 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/misc.py @@ -0,0 +1,138 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from collections.abc import Callable +from typing import TYPE_CHECKING, Any, NoReturn + +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.strategies._internal.strategies import ( + Ex, + RecurT, + SampledFromStrategy, + SearchStrategy, + T, + is_hashable, +) +from hypothesis.strategies._internal.utils import cacheable, defines_strategy +from hypothesis.utils.conventions import UniqueIdentifier + +if TYPE_CHECKING: + from typing_extensions import Never + + +class JustStrategy(SampledFromStrategy[Ex]): + """A strategy which always returns a single fixed value. + + It's implemented as a length-one SampledFromStrategy so that all our + special-case logic for filtering and sets applies also to just(x). + + The important difference from a SampledFromStrategy with only one + element to choose is that JustStrategy *never* touches the underlying + choice sequence, i.e. drawing neither reads from nor writes to `data`. + This is a reasonably important optimisation (or semantic distinction!) + for both JustStrategy and SampledFromStrategy. + """ + + @property + def value(self) -> Ex: + return self.elements[0] + + def __repr__(self) -> str: + suffix = "".join( + f".{name}({get_pretty_function_description(f)})" + for name, f in self._transformations + ) + if self.value is None: + return "none()" + suffix + return f"just({get_pretty_function_description(self.value)}){suffix}" + + def calc_is_cacheable(self, recur: RecurT) -> bool: + return is_hashable(self.value) + + def do_filtered_draw(self, data: ConjectureData) -> Ex | UniqueIdentifier: + # The parent class's `do_draw` implementation delegates directly to + # `do_filtered_draw`, which we can greatly simplify in this case since + # we have exactly one value. (This also avoids drawing any data.) + return self._transform(self.value) + + +@defines_strategy(eager=True) +def just(value: T) -> SearchStrategy[T]: + """Return a strategy which only generates ``value``. + + Note: ``value`` is not copied. Be wary of using mutable values. + + If ``value`` is the result of a callable, you can use + :func:`builds(callable) ` instead + of ``just(callable())`` to get a fresh value each time. + + Examples from this strategy do not shrink (because there is only one). + """ + return JustStrategy([value]) + + +@defines_strategy(force_reusable_values=True) +def none() -> SearchStrategy[None]: + """Return a strategy which only generates None. + + Examples from this strategy do not shrink (because there is only + one). + """ + return just(None) + + +class Nothing(SearchStrategy["Never"]): + def calc_is_empty(self, recur: RecurT) -> bool: + return True + + def do_draw(self, data: ConjectureData) -> NoReturn: + # This method should never be called because draw() will mark the + # data as invalid immediately because is_empty is True. + raise NotImplementedError("This should never happen") + + def calc_has_reusable_values(self, recur: RecurT) -> bool: + return True + + def __repr__(self) -> str: + return "nothing()" + + def map(self, pack: Callable[[Any], Any]) -> SearchStrategy["Never"]: + return self + + def filter(self, condition: Callable[[Any], Any]) -> "SearchStrategy[Never]": + return self + + def flatmap( + self, expand: Callable[[Any], "SearchStrategy[Any]"] + ) -> "SearchStrategy[Never]": + return self + + +NOTHING = Nothing() + + +@cacheable +@defines_strategy(eager=True) +def nothing() -> SearchStrategy["Never"]: + """This strategy never successfully draws a value and will always reject on + an attempt to draw. + + Examples from this strategy do not shrink (because there are none). + """ + return NOTHING + + +class BooleansStrategy(SearchStrategy[bool]): + def do_draw(self, data: ConjectureData) -> bool: + return data.draw_boolean() + + def __repr__(self) -> str: + return "booleans()" diff --git a/vendored/hypothesis/strategies/_internal/numbers.py b/vendored/hypothesis/strategies/_internal/numbers.py new file mode 100644 index 0000000..307bb37 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/numbers.py @@ -0,0 +1,528 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +from decimal import Decimal +from fractions import Fraction +from typing import Literal, cast + +from hypothesis.control import reject +from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.filtering import ( + get_float_predicate_bounds, + get_integer_predicate_bounds, +) +from hypothesis.internal.floats import ( + SMALLEST_SUBNORMAL, + float_of, + float_to_int, + int_to_float, + is_negative, + next_down, + next_down_normal, + next_up, + next_up_normal, + width_smallest_normals, +) +from hypothesis.internal.validation import ( + check_type, + check_valid_bound, + check_valid_interval, +) +from hypothesis.strategies._internal.misc import nothing +from hypothesis.strategies._internal.strategies import ( + SampledFromStrategy, + SearchStrategy, +) +from hypothesis.strategies._internal.utils import cacheable, defines_strategy + +# See https://github.com/python/mypy/issues/3186 - numbers.Real is wrong! +Real = int | float | Fraction | Decimal + + +class IntegersStrategy(SearchStrategy[int]): + def __init__(self, start: int | None, end: int | None) -> None: + super().__init__() + assert isinstance(start, int) or start is None + assert isinstance(end, int) or end is None + assert start is None or end is None or start <= end + self.start = start + self.end = end + + def __repr__(self) -> str: + if self.start is None and self.end is None: + return "integers()" + if self.end is None: + return f"integers(min_value={self.start})" + if self.start is None: + return f"integers(max_value={self.end})" + return f"integers({self.start}, {self.end})" + + def do_draw(self, data: ConjectureData) -> int: + # For bounded integers, make the bounds and near-bounds more likely. + weights = None + if ( + self.end is not None + and self.start is not None + and self.end - self.start > 127 + ): + weights = { + self.start: (2 / 128), + self.start + 1: (1 / 128), + self.end - 1: (1 / 128), + self.end: (2 / 128), + } + + return data.draw_integer( + min_value=self.start, max_value=self.end, weights=weights + ) + + def filter(self, condition): + if condition is math.isfinite: + return self + if condition in [math.isinf, math.isnan]: + return nothing() + constraints, pred = get_integer_predicate_bounds(condition) + + start, end = self.start, self.end + if "min_value" in constraints: + start = max(constraints["min_value"], -math.inf if start is None else start) + if "max_value" in constraints: + end = min(constraints["max_value"], math.inf if end is None else end) + + if start != self.start or end != self.end: + if start is not None and end is not None and start > end: + return nothing() + self = type(self)(start, end) + if pred is None: + return self + return super().filter(pred) + + +@cacheable +@defines_strategy(force_reusable_values=True) +def integers( + min_value: int | None = None, + max_value: int | None = None, +) -> SearchStrategy[int]: + """Returns a strategy which generates integers. + + If min_value is not None then all values will be >= min_value. If + max_value is not None then all values will be <= max_value + + Examples from this strategy will shrink towards zero, and negative values + will also shrink towards positive (i.e. -n may be replaced by +n). + """ + check_valid_bound(min_value, "min_value") + check_valid_bound(max_value, "max_value") + check_valid_interval(min_value, max_value, "min_value", "max_value") + + if min_value is not None: + if min_value != int(min_value): + raise InvalidArgument( + f"min_value={min_value!r} of type {type(min_value)!r} " + "cannot be exactly represented as an integer." + ) + min_value = int(min_value) + if max_value is not None: + if max_value != int(max_value): + raise InvalidArgument( + f"max_value={max_value!r} of type {type(max_value)!r} " + "cannot be exactly represented as an integer." + ) + max_value = int(max_value) + + return IntegersStrategy(min_value, max_value) + + +class FloatStrategy(SearchStrategy[float]): + """A strategy for floating point numbers.""" + + def __init__( + self, + *, + min_value: float, + max_value: float, + allow_nan: bool, + # The smallest nonzero number we can represent is usually a subnormal, but may + # be the smallest normal if we're running in unsafe denormals-are-zero mode. + # While that's usually an explicit error, we do need to handle the case where + # the user passes allow_subnormal=False. + smallest_nonzero_magnitude: float = SMALLEST_SUBNORMAL, + ): + super().__init__() + assert isinstance(allow_nan, bool) + assert smallest_nonzero_magnitude >= 0.0, "programmer error if this is negative" + if smallest_nonzero_magnitude == 0.0: # pragma: no cover + raise FloatingPointError( + "Got allow_subnormal=True, but we can't represent subnormal floats " + "right now, in violation of the IEEE-754 floating-point " + "specification. This is usually because something was compiled with " + "-ffast-math or a similar option, which sets global processor state. " + "See https://simonbyrne.github.io/notes/fastmath/ for a more detailed " + "writeup - and good luck!" + ) + self.min_value = min_value + self.max_value = max_value + self.allow_nan = allow_nan + self.smallest_nonzero_magnitude = smallest_nonzero_magnitude + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}({self.min_value=}, {self.max_value=}, " + f"{self.allow_nan=}, {self.smallest_nonzero_magnitude=})" + ).replace("self.", "") + + def do_draw(self, data: ConjectureData) -> float: + return data.draw_float( + min_value=self.min_value, + max_value=self.max_value, + allow_nan=self.allow_nan, + smallest_nonzero_magnitude=self.smallest_nonzero_magnitude, + ) + + def filter(self, condition): + # Handle a few specific weird cases. + if condition is math.isfinite: + return FloatStrategy( + min_value=max(self.min_value, next_up(float("-inf"))), + max_value=min(self.max_value, next_down(float("inf"))), + allow_nan=False, + smallest_nonzero_magnitude=self.smallest_nonzero_magnitude, + ) + if condition is math.isinf: + if permitted_infs := [ + x + for x in (-math.inf, math.inf) + if self.min_value <= x <= self.max_value + ]: + return SampledFromStrategy(permitted_infs) + return nothing() + if condition is math.isnan: + if not self.allow_nan: + return nothing() + return NanStrategy() + + constraints, pred = get_float_predicate_bounds(condition) + if not constraints: + return super().filter(pred) + min_bound = max(constraints.get("min_value", -math.inf), self.min_value) + max_bound = min(constraints.get("max_value", math.inf), self.max_value) + + # Adjustments for allow_subnormal=False, if any need to be made + if -self.smallest_nonzero_magnitude < min_bound < 0: + min_bound = -0.0 + elif 0 < min_bound < self.smallest_nonzero_magnitude: + min_bound = self.smallest_nonzero_magnitude + if -self.smallest_nonzero_magnitude < max_bound < 0: + max_bound = -self.smallest_nonzero_magnitude + elif 0 < max_bound < self.smallest_nonzero_magnitude: + max_bound = 0.0 + + if min_bound > max_bound: + return nothing() + if ( + min_bound > self.min_value + or self.max_value > max_bound + or (self.allow_nan and (-math.inf < min_bound or max_bound < math.inf)) + ): + self = type(self)( + min_value=min_bound, + max_value=max_bound, + allow_nan=False, + smallest_nonzero_magnitude=self.smallest_nonzero_magnitude, + ) + if pred is None: + return self + return super().filter(pred) + + +@cacheable +@defines_strategy(force_reusable_values=True) +def floats( + min_value: Real | None = None, + max_value: Real | None = None, + *, + allow_nan: bool | None = None, + allow_infinity: bool | None = None, + allow_subnormal: bool | None = None, + width: Literal[16, 32, 64] = 64, + exclude_min: bool = False, + exclude_max: bool = False, +) -> SearchStrategy[float]: + """Returns a strategy which generates floats. + + - If min_value is not None, all values will be ``>= min_value`` + (or ``> min_value`` if ``exclude_min``). + - If max_value is not None, all values will be ``<= max_value`` + (or ``< max_value`` if ``exclude_max``). + - If min_value or max_value is not None, it is an error to enable + allow_nan. + - If both min_value and max_value are not None, it is an error to enable + allow_infinity. + - If inferred values range does not include subnormal values, it is an error + to enable allow_subnormal. + + Where not explicitly ruled out by the bounds, + :wikipedia:`subnormals `, infinities, and NaNs are possible + values generated by this strategy. + + The width argument specifies the maximum number of bits of precision + required to represent the generated float. Valid values are 16, 32, or 64. + Passing ``width=32`` will still use the builtin 64-bit :class:`~python:float` class, + but always for values which can be exactly represented as a 32-bit float. + + The exclude_min and exclude_max argument can be used to generate numbers + from open or half-open intervals, by excluding the respective endpoints. + Excluding either signed zero will also exclude the other. + Attempting to exclude an endpoint which is None will raise an error; + use ``allow_infinity=False`` to generate finite floats. You can however + use e.g. ``min_value=-math.inf, exclude_min=True`` to exclude only + one infinite endpoint. + + Examples from this strategy have a complicated and hard to explain + shrinking behaviour, but it tries to improve "human readability". Finite + numbers will be preferred to infinity and infinity will be preferred to + NaN. + """ + check_type(bool, exclude_min, "exclude_min") + check_type(bool, exclude_max, "exclude_max") + + if allow_nan is None: + allow_nan = bool(min_value is None and max_value is None) + elif allow_nan and (min_value is not None or max_value is not None): + raise InvalidArgument(f"Cannot have {allow_nan=}, with min_value or max_value") + + if width not in (16, 32, 64): + raise InvalidArgument( + f"Got {width=}, but the only valid values " + "are the integers 16, 32, and 64." + ) + # Literal[16] accepts both 16 and 16.0. Normalize to the int 16 here, mainly + # for mypyc. We want to support width=16.0 to make e.g. width=mywidth / 2 for + # mywidth=32 easy. + width = cast(Literal[16, 32, 64], int(width)) + + check_valid_bound(min_value, "min_value") + check_valid_bound(max_value, "max_value") + + if math.copysign(1.0, -0.0) == 1.0: # pragma: no cover + raise FloatingPointError( + "Your Python install can't represent -0.0, which is required by the " + "IEEE-754 floating-point specification. This is probably because it was " + "compiled with an unsafe option like -ffast-math; for a more detailed " + "explanation see https://simonbyrne.github.io/notes/fastmath/" + ) + if allow_subnormal and next_up(0.0, width=width) == 0: # pragma: no cover + # Not worth having separate CI envs and dependencies just to cover this branch; + # discussion in https://github.com/HypothesisWorks/hypothesis/issues/3092 + # + # Erroring out here ensures that the database contents are interpreted + # consistently - which matters for such a foundational strategy, even if it's + # not always true for all user-composed strategies further up the stack. + from _hypothesis_ftz_detector import identify_ftz_culprits + + try: + ftz_pkg = identify_ftz_culprits() + except Exception: + ftz_pkg = None + if ftz_pkg: + ftz_msg = ( + f"This seems to be because the `{ftz_pkg}` package was compiled with " + f"-ffast-math or a similar option, which sets global processor state " + f"- see https://simonbyrne.github.io/notes/fastmath/ for details. " + f"If you don't know why {ftz_pkg} is installed, `pipdeptree -rp " + f"{ftz_pkg}` will show which packages depend on it." + ) + else: + ftz_msg = ( + "This is usually because something was compiled with -ffast-math " + "or a similar option, which sets global processor state. See " + "https://simonbyrne.github.io/notes/fastmath/ for a more detailed " + "writeup - and good luck!" + ) + raise FloatingPointError( + f"Got {allow_subnormal=}, but we can't represent " + f"subnormal floats right now, in violation of the IEEE-754 floating-point " + f"specification. {ftz_msg}" + ) + + min_arg, max_arg = min_value, max_value + if min_value is not None: + min_value = float_of(min_value, width) + assert isinstance(min_value, float) + if max_value is not None: + max_value = float_of(max_value, width) + assert isinstance(max_value, float) + + if min_value != min_arg: + raise InvalidArgument( + f"min_value={min_arg!r} cannot be exactly represented as a float " + f"of width {width} - use {min_value=} instead." + ) + if max_value != max_arg: + raise InvalidArgument( + f"max_value={max_arg!r} cannot be exactly represented as a float " + f"of width {width} - use {max_value=} instead." + ) + + if exclude_min and (min_value is None or min_value == math.inf): + raise InvalidArgument(f"Cannot exclude {min_value=}") + if exclude_max and (max_value is None or max_value == -math.inf): + raise InvalidArgument(f"Cannot exclude {max_value=}") + + assumed_allow_subnormal = allow_subnormal is None or allow_subnormal + if min_value is not None and ( + exclude_min or (min_arg is not None and min_value < min_arg) + ): + min_value = next_up_normal( + min_value, width, allow_subnormal=assumed_allow_subnormal + ) + if min_value == min_arg: + assert min_value == min_arg == 0 + assert is_negative(min_arg) + assert not is_negative(min_value) + min_value = next_up_normal( + min_value, width, allow_subnormal=assumed_allow_subnormal + ) + assert min_value > min_arg # type: ignore + if max_value is not None and ( + exclude_max or (max_arg is not None and max_value > max_arg) + ): + max_value = next_down_normal( + max_value, width, allow_subnormal=assumed_allow_subnormal + ) + if max_value == max_arg: + assert max_value == max_arg == 0 + assert is_negative(max_value) + assert not is_negative(max_arg) + max_value = next_down_normal( + max_value, width, allow_subnormal=assumed_allow_subnormal + ) + assert max_value < max_arg # type: ignore + + if min_value == -math.inf: + min_value = None + if max_value == math.inf: + max_value = None + + bad_zero_bounds = ( + min_value == max_value == 0 + and is_negative(max_value) + and not is_negative(min_value) + ) + if ( + min_value is not None + and max_value is not None + and (min_value > max_value or bad_zero_bounds) + ): + # This is a custom alternative to check_valid_interval, because we want + # to include the bit-width and exclusion information in the message. + msg = ( + f"There are no {width}-bit floating-point values between " + f"min_value={min_arg!r} and max_value={max_arg!r}" + ) + if exclude_min or exclude_max: + msg += f", {exclude_min=} and {exclude_max=}" + raise InvalidArgument(msg) + + if allow_infinity is None: + allow_infinity = bool(min_value is None or max_value is None) + elif allow_infinity: + if min_value is not None and max_value is not None: + raise InvalidArgument( + f"Cannot have {allow_infinity=}, with both min_value and max_value" + ) + elif min_value == math.inf: + if min_arg == math.inf: + raise InvalidArgument("allow_infinity=False excludes min_value=inf") + raise InvalidArgument( + f"exclude_min=True turns min_value={min_arg!r} into inf, " + "but allow_infinity=False" + ) + elif max_value == -math.inf: + if max_arg == -math.inf: + raise InvalidArgument("allow_infinity=False excludes max_value=-inf") + raise InvalidArgument( + f"exclude_max=True turns max_value={max_arg!r} into -inf, " + "but allow_infinity=False" + ) + + smallest_normal = width_smallest_normals[width] + if allow_subnormal is None: + if min_value is not None and max_value is not None: + if min_value == max_value: + allow_subnormal = -smallest_normal < min_value < smallest_normal + else: + allow_subnormal = ( + min_value < smallest_normal and max_value > -smallest_normal + ) + elif min_value is not None: + allow_subnormal = min_value < smallest_normal + elif max_value is not None: + allow_subnormal = max_value > -smallest_normal + else: + allow_subnormal = True + if allow_subnormal: + if min_value is not None and min_value >= smallest_normal: + raise InvalidArgument( + f"allow_subnormal=True, but minimum value {min_value} " + f"excludes values below float{width}'s " + f"smallest positive normal {smallest_normal}" + ) + if max_value is not None and max_value <= -smallest_normal: + raise InvalidArgument( + f"allow_subnormal=True, but maximum value {max_value} " + f"excludes values above float{width}'s " + f"smallest negative normal {-smallest_normal}" + ) + + if min_value is None: + min_value = float("-inf") + if max_value is None: + max_value = float("inf") + if not allow_infinity: + min_value = max(min_value, next_up(float("-inf"))) + max_value = min(max_value, next_down(float("inf"))) + assert isinstance(min_value, float) + assert isinstance(max_value, float) + smallest_nonzero_magnitude = ( + SMALLEST_SUBNORMAL if allow_subnormal else smallest_normal + ) + result: SearchStrategy = FloatStrategy( + min_value=min_value, + max_value=max_value, + allow_nan=allow_nan, + smallest_nonzero_magnitude=smallest_nonzero_magnitude, + ) + + if width < 64: + + def downcast(x: float) -> float: + try: + return float_of(x, width) + except OverflowError: # pragma: no cover + reject() + + result = result.map(downcast) + return result + + +class NanStrategy(SearchStrategy[float]): + """Strategy for sampling the space of nan float values.""" + + def do_draw(self, data: ConjectureData) -> float: + # Nans must have all exponent bits and the first mantissa bit set, so + # we generate by taking 64 random bits and setting the required ones. + sign_bit = int(data.draw_boolean()) << 63 + nan_bits = float_to_int(math.nan) + mantissa_bits = data.draw_integer(0, 2**52 - 1) + return int_to_float(sign_bit | nan_bits | mantissa_bits) diff --git a/vendored/hypothesis/strategies/_internal/random.py b/vendored/hypothesis/strategies/_internal/random.py new file mode 100644 index 0000000..523991c --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/random.py @@ -0,0 +1,442 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import abc +import inspect +import math +from dataclasses import dataclass, field +from random import Random +from typing import Any + +from hypothesis.control import should_note +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.reflection import define_function_signature +from hypothesis.reporting import report +from hypothesis.strategies._internal.core import lists, permutations, sampled_from +from hypothesis.strategies._internal.numbers import floats, integers +from hypothesis.strategies._internal.strategies import SearchStrategy + + +class HypothesisRandom(Random, abc.ABC): + """A subclass of Random designed to expose the seed it was initially + provided with.""" + + def __init__(self, *, note_method_calls: bool) -> None: + self._note_method_calls = note_method_calls + + def __deepcopy__(self, table): + return self.__copy__() + + @abc.abstractmethod + def seed(self, seed): + raise NotImplementedError + + @abc.abstractmethod + def getstate(self): + raise NotImplementedError + + @abc.abstractmethod + def setstate(self, state): + raise NotImplementedError + + @abc.abstractmethod + def _hypothesis_do_random(self, method, kwargs): + raise NotImplementedError + + def _hypothesis_log_random(self, method, kwargs, result): + if not (self._note_method_calls and should_note()): + return + + args, kwargs = convert_kwargs(method, kwargs) + argstr = ", ".join( + list(map(repr, args)) + [f"{k}={v!r}" for k, v in kwargs.items()] + ) + report(f"{self!r}.{method}({argstr}) -> {result!r}") + + +RANDOM_METHODS = [ + name + for name in [ + "_randbelow", + "betavariate", + "binomialvariate", + "choice", + "choices", + "expovariate", + "gammavariate", + "gauss", + "getrandbits", + "lognormvariate", + "normalvariate", + "paretovariate", + "randint", + "random", + "randrange", + "sample", + "shuffle", + "triangular", + "uniform", + "vonmisesvariate", + "weibullvariate", + "randbytes", + ] + if hasattr(Random, name) +] + + +# Fake shims to get a good signature +def getrandbits(self, n: int) -> int: # type: ignore + raise NotImplementedError + + +def random(self) -> float: # type: ignore + raise NotImplementedError + + +def _randbelow(self, n: int) -> int: # type: ignore + raise NotImplementedError + + +STUBS = {f.__name__: f for f in [getrandbits, random, _randbelow]} + + +SIGNATURES: dict[str, inspect.Signature] = {} + + +def sig_of(name): + try: + return SIGNATURES[name] + except KeyError: + pass + + target = getattr(Random, name) + result = inspect.signature(STUBS.get(name, target)) + SIGNATURES[name] = result + return result + + +def define_copy_method(name): + target = getattr(Random, name) + + def implementation(self, **kwargs): + result = self._hypothesis_do_random(name, kwargs) + self._hypothesis_log_random(name, kwargs, result) + return result + + sig = inspect.signature(STUBS.get(name, target)) + + result = define_function_signature(target.__name__, target.__doc__, sig)( + implementation + ) + + result.__module__ = __name__ + result.__qualname__ = "HypothesisRandom." + result.__name__ + + setattr(HypothesisRandom, name, result) + + +for r in RANDOM_METHODS: + define_copy_method(r) + + +@dataclass(slots=True, frozen=False) +class RandomState: + next_states: dict = field(default_factory=dict) + state_id: Any = None + + +def state_for_seed(data, seed): + if data.seeds_to_states is None: + data.seeds_to_states = {} + + seeds_to_states = data.seeds_to_states + try: + state = seeds_to_states[seed] + except KeyError: + state = RandomState() + seeds_to_states[seed] = state + + return state + + +def normalize_zero(f: float) -> float: + if f == 0.0: + return 0.0 + else: + return f + + +class ArtificialRandom(HypothesisRandom): + VERSION = 10**6 + + def __init__(self, *, note_method_calls: bool, data: ConjectureData) -> None: + super().__init__(note_method_calls=note_method_calls) + self.__data = data + self.__state = RandomState() + + def __repr__(self) -> str: + return "HypothesisRandom(generated data)" + + def __copy__(self) -> "ArtificialRandom": + result = ArtificialRandom( + note_method_calls=self._note_method_calls, + data=self.__data, + ) + result.setstate(self.getstate()) + return result + + def __convert_result(self, method, kwargs, result): + if method == "choice": + return kwargs.get("seq")[result] + if method in ("choices", "sample"): + seq = kwargs["population"] + return [seq[i] for i in result] + if method == "shuffle": + seq = kwargs["x"] + original = list(seq) + for i, i2 in enumerate(result): + seq[i] = original[i2] + return None + return result + + def _hypothesis_do_random(self, method, kwargs): + if method == "choices": + key = (method, len(kwargs["population"]), kwargs.get("k")) + elif method == "choice": + key = (method, len(kwargs["seq"])) + elif method == "shuffle": + key = (method, len(kwargs["x"])) + else: + key = (method, *sorted(kwargs)) + + try: + result, self.__state = self.__state.next_states[key] + except KeyError: + pass + else: + return self.__convert_result(method, kwargs, result) + + if method == "_randbelow": + result = self.__data.draw_integer(0, kwargs["n"] - 1) + elif method == "random": + # See https://github.com/HypothesisWorks/hypothesis/issues/4297 + # for numerics/bounds of "random" and "betavariate" + result = self.__data.draw(floats(0, 1, exclude_max=True)) + elif method == "betavariate": + result = self.__data.draw(floats(0, 1)) + elif method == "uniform": + a = normalize_zero(kwargs["a"]) + b = normalize_zero(kwargs["b"]) + result = self.__data.draw(floats(a, b)) + elif method in ("weibullvariate", "gammavariate"): + result = self.__data.draw(floats(min_value=0.0, allow_infinity=False)) + elif method in ("gauss", "normalvariate"): + mu = kwargs["mu"] + result = mu + self.__data.draw( + floats(allow_nan=False, allow_infinity=False) + ) + elif method == "vonmisesvariate": + result = self.__data.draw(floats(0, 2 * math.pi)) + elif method == "randrange": + if kwargs["stop"] is None: + stop = kwargs["start"] + start = 0 + else: + start = kwargs["start"] + stop = kwargs["stop"] + + step = kwargs["step"] + if start == stop: + raise ValueError(f"empty range for randrange({start}, {stop}, {step})") + + if step != 1: + endpoint = (stop - start) // step + if (start - stop) % step == 0: + endpoint -= 1 + + i = self.__data.draw_integer(0, endpoint) + result = start + i * step + else: + result = self.__data.draw_integer(start, stop - 1) + elif method == "randint": + result = self.__data.draw_integer(kwargs["a"], kwargs["b"]) + # New in Python 3.12, so not taken by our coverage job + elif method == "binomialvariate": # pragma: no cover + result = self.__data.draw_integer(0, kwargs["n"]) + elif method == "choice": + seq = kwargs["seq"] + result = self.__data.draw_integer(0, len(seq) - 1) + elif method == "choices": + k = kwargs["k"] + result = self.__data.draw( + lists( + integers(0, len(kwargs["population"]) - 1), + min_size=k, + max_size=k, + ) + ) + elif method == "sample": + k = kwargs["k"] + seq = kwargs["population"] + + if k > len(seq) or k < 0: + raise ValueError( + f"Sample size {k} not in expected range 0 <= k <= {len(seq)}" + ) + + if k == 0: + result = [] + else: + result = self.__data.draw( + lists( + sampled_from(range(len(seq))), + min_size=k, + max_size=k, + unique=True, + ) + ) + + elif method == "getrandbits": + result = self.__data.draw_integer(0, 2 ** kwargs["n"] - 1) + elif method == "triangular": + low = normalize_zero(kwargs["low"]) + high = normalize_zero(kwargs["high"]) + mode = normalize_zero(kwargs["mode"]) + if mode is None: + result = self.__data.draw(floats(low, high)) + elif self.__data.draw_boolean(0.5): + result = self.__data.draw(floats(mode, high)) + else: + result = self.__data.draw(floats(low, mode)) + elif method in ("paretovariate", "expovariate", "lognormvariate"): + result = self.__data.draw(floats(min_value=0.0)) + elif method == "shuffle": + result = self.__data.draw(permutations(range(len(kwargs["x"])))) + elif method == "randbytes": + n = int(kwargs["n"]) + result = self.__data.draw_bytes(min_size=n, max_size=n) + else: + raise NotImplementedError(method) + + new_state = RandomState() + self.__state.next_states[key] = (result, new_state) + self.__state = new_state + + return self.__convert_result(method, kwargs, result) + + def seed(self, seed): + self.__state = state_for_seed(self.__data, seed) + + def getstate(self): + if self.__state.state_id is not None: + return self.__state.state_id + + if self.__data.states_for_ids is None: + self.__data.states_for_ids = {} + states_for_ids = self.__data.states_for_ids + self.__state.state_id = len(states_for_ids) + states_for_ids[self.__state.state_id] = self.__state + + return self.__state.state_id + + def setstate(self, state): + self.__state = self.__data.states_for_ids[state] + + +DUMMY_RANDOM = Random(0) + + +def convert_kwargs(name, kwargs): + kwargs = dict(kwargs) + + signature = sig_of(name) + params = signature.parameters + + bound = signature.bind(DUMMY_RANDOM, **kwargs) + bound.apply_defaults() + + for k in list(kwargs): + if ( + kwargs[k] is params[k].default + or params[k].kind != inspect.Parameter.KEYWORD_ONLY + ): + kwargs.pop(k) + + arg_names = list(params)[1:] + + args = [] + + for a in arg_names: + if params[a].kind == inspect.Parameter.KEYWORD_ONLY: + break + args.append(bound.arguments[a]) + kwargs.pop(a, None) + + while args: + name = arg_names[len(args) - 1] + if args[-1] is params[name].default: + args.pop() + else: + break + + return (args, kwargs) + + +class TrueRandom(HypothesisRandom): + def __init__(self, seed, note_method_calls): + super().__init__(note_method_calls=note_method_calls) + self.__seed = seed + self.__random = Random(seed) + + def _hypothesis_do_random(self, method, kwargs): + fn = getattr(self.__random, method) + try: + return fn(**kwargs) + except TypeError: + pass + args, kwargs = convert_kwargs(method, kwargs) + return fn(*args, **kwargs) + + def __copy__(self) -> "TrueRandom": + result = TrueRandom( + seed=self.__seed, + note_method_calls=self._note_method_calls, + ) + result.setstate(self.getstate()) + return result + + def __repr__(self) -> str: + return f"Random({self.__seed!r})" + + def seed(self, seed): + self.__random.seed(seed) + self.__seed = seed + + def getstate(self): + return self.__random.getstate() + + def setstate(self, state): + self.__random.setstate(state) + + +class RandomStrategy(SearchStrategy[HypothesisRandom]): + def __init__(self, *, note_method_calls: bool, use_true_random: bool) -> None: + super().__init__() + self.__note_method_calls = note_method_calls + self.__use_true_random = use_true_random + + def do_draw(self, data: ConjectureData) -> HypothesisRandom: + if self.__use_true_random: + seed = data.draw_integer(0, 2**64 - 1) + return TrueRandom(seed=seed, note_method_calls=self.__note_method_calls) + else: + return ArtificialRandom( + note_method_calls=self.__note_method_calls, data=data + ) diff --git a/vendored/hypothesis/strategies/_internal/recursive.py b/vendored/hypothesis/strategies/_internal/recursive.py new file mode 100644 index 0000000..aa665ce --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/recursive.py @@ -0,0 +1,118 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import threading +from contextlib import contextmanager + +from hypothesis.errors import InvalidArgument +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.internal.validation import check_type +from hypothesis.strategies._internal.strategies import ( + OneOfStrategy, + SearchStrategy, + check_strategy, +) + + +class LimitReached(BaseException): + pass + + +class LimitedStrategy(SearchStrategy): + def __init__(self, strategy): + super().__init__() + self.base_strategy = strategy + self._threadlocal = threading.local() + + @property + def marker(self): + return getattr(self._threadlocal, "marker", 0) + + @marker.setter + def marker(self, value): + self._threadlocal.marker = value + + @property + def currently_capped(self): + return getattr(self._threadlocal, "currently_capped", False) + + @currently_capped.setter + def currently_capped(self, value): + self._threadlocal.currently_capped = value + + def __repr__(self) -> str: + return f"LimitedStrategy({self.base_strategy!r})" + + def do_validate(self) -> None: + self.base_strategy.validate() + + def do_draw(self, data): + assert self.currently_capped + if self.marker <= 0: + raise LimitReached + self.marker -= 1 + return data.draw(self.base_strategy) + + @contextmanager + def capped(self, max_templates): + try: + was_capped = self.currently_capped + self.currently_capped = True + self.marker = max_templates + yield + finally: + self.currently_capped = was_capped + + +class RecursiveStrategy(SearchStrategy): + def __init__(self, base, extend, max_leaves): + super().__init__() + self.max_leaves = max_leaves + self.base = base + self.limited_base = LimitedStrategy(base) + self.extend = extend + + strategies = [self.limited_base, self.extend(self.limited_base)] + while 2 ** (len(strategies) - 1) <= max_leaves: + strategies.append(extend(OneOfStrategy(tuple(strategies)))) + self.strategy = OneOfStrategy(strategies) + + def __repr__(self) -> str: + if not hasattr(self, "_cached_repr"): + self._cached_repr = "recursive(%r, %s, max_leaves=%d)" % ( + self.base, + get_pretty_function_description(self.extend), + self.max_leaves, + ) + return self._cached_repr + + def do_validate(self) -> None: + check_strategy(self.base, "base") + extended = self.extend(self.limited_base) + check_strategy(extended, f"extend({self.limited_base!r})") + self.limited_base.validate() + extended.validate() + check_type(int, self.max_leaves, "max_leaves") + if self.max_leaves <= 0: + raise InvalidArgument( + f"max_leaves={self.max_leaves!r} must be greater than zero" + ) + + def do_draw(self, data): + count = 0 + while True: + try: + with self.limited_base.capped(self.max_leaves): + return data.draw(self.strategy) + except LimitReached: + if count == 0: + msg = f"Draw for {self!r} exceeded max_leaves and had to be retried" + data.events[msg] = "" + count += 1 diff --git a/vendored/hypothesis/strategies/_internal/regex.py b/vendored/hypothesis/strategies/_internal/regex.py new file mode 100644 index 0000000..b51e90e --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/regex.py @@ -0,0 +1,576 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import operator +import re + +from hypothesis.errors import InvalidArgument +from hypothesis.internal import charmap +from hypothesis.strategies._internal.lazy import unwrap_strategies +from hypothesis.strategies._internal.strings import OneCharStringStrategy + +try: # pragma: no cover + import re._constants as sre + import re._parser as sre_parse + + ATOMIC_GROUP = sre.ATOMIC_GROUP + POSSESSIVE_REPEAT = sre.POSSESSIVE_REPEAT +except ImportError: # Python < 3.11 + import sre_constants as sre + import sre_parse + + ATOMIC_GROUP = object() + POSSESSIVE_REPEAT = object() + +from hypothesis import reject, strategies as st +from hypothesis.internal.charmap import as_general_categories, categories +from hypothesis.internal.compat import add_note, int_to_byte + +UNICODE_CATEGORIES = set(categories()) + + +SPACE_CHARS = set(" \t\n\r\f\v") +UNICODE_SPACE_CHARS = SPACE_CHARS | set("\x1c\x1d\x1e\x1f\x85") +UNICODE_DIGIT_CATEGORIES = {"Nd"} +UNICODE_SPACE_CATEGORIES = set(as_general_categories(["Z"])) +UNICODE_LETTER_CATEGORIES = set(as_general_categories(["L"])) +UNICODE_WORD_CATEGORIES = set(as_general_categories(["L", "N"])) + +# This is verbose, but correct on all versions of Python +BYTES_ALL = {int_to_byte(i) for i in range(256)} +BYTES_DIGIT = {b for b in BYTES_ALL if re.match(b"\\d", b)} +BYTES_SPACE = {b for b in BYTES_ALL if re.match(b"\\s", b)} +BYTES_WORD = {b for b in BYTES_ALL if re.match(b"\\w", b)} +BYTES_LOOKUP = { + sre.CATEGORY_DIGIT: BYTES_DIGIT, + sre.CATEGORY_SPACE: BYTES_SPACE, + sre.CATEGORY_WORD: BYTES_WORD, + sre.CATEGORY_NOT_DIGIT: BYTES_ALL - BYTES_DIGIT, + sre.CATEGORY_NOT_SPACE: BYTES_ALL - BYTES_SPACE, + sre.CATEGORY_NOT_WORD: BYTES_ALL - BYTES_WORD, +} + + +GROUP_CACHE_STRATEGY: st.SearchStrategy[dict] = st.shared( + st.builds(dict), key="hypothesis.regex.group_cache" +) + + +class IncompatibleWithAlphabet(InvalidArgument): + pass + + +@st.composite +def update_group(draw, group_name, strategy): + cache = draw(GROUP_CACHE_STRATEGY) + result = draw(strategy) + cache[group_name] = result + return result + + +@st.composite +def reuse_group(draw, group_name): + cache = draw(GROUP_CACHE_STRATEGY) + try: + return cache[group_name] + except KeyError: + reject() + + +@st.composite +def group_conditional(draw, group_name, if_yes, if_no): + cache = draw(GROUP_CACHE_STRATEGY) + if group_name in cache: + return draw(if_yes) + else: + return draw(if_no) + + +@st.composite +def clear_cache_after_draw(draw, base_strategy): + cache = draw(GROUP_CACHE_STRATEGY) + result = draw(base_strategy) + cache.clear() + return result + + +def chars_not_in_alphabet(alphabet, string): + # Given a string, return a tuple of the characters which are not in alphabet + if alphabet is None: + return () + intset = unwrap_strategies(alphabet).intervals + return tuple(c for c in string if c not in intset) + + +class Context: + __slots__ = ["flags"] + + def __init__(self, flags): + self.flags = flags + + +class CharactersBuilder: + """Helper object that allows to configure `characters` strategy with + various unicode categories and characters. Also allows negation of + configured set. + + :param negate: If True, configure :func:`hypothesis.strategies.characters` + to match anything other than configured character set + :param flags: Regex flags. They affect how and which characters are matched + """ + + def __init__(self, *, negate=False, flags=0, alphabet): + self._categories = set() + self._whitelist_chars = set() + self._blacklist_chars = set() + self._negate = negate + self._ignorecase = flags & re.IGNORECASE + self.code_to_char = chr + self._alphabet = unwrap_strategies(alphabet) + if flags & re.ASCII: + self._alphabet = OneCharStringStrategy( + self._alphabet.intervals & charmap.query(max_codepoint=127) + ) + + @property + def strategy(self): + """Returns resulting strategy that generates configured char set.""" + # Start by getting the set of all characters allowed by the pattern + white_chars = self._whitelist_chars - self._blacklist_chars + multi_chars = {c for c in white_chars if len(c) > 1} + intervals = charmap.query( + categories=self._categories, + exclude_characters=self._blacklist_chars, + include_characters=white_chars - multi_chars, + ) + # Then take the complement if this is from a negated character class + if self._negate: + intervals = charmap.query() - intervals + multi_chars.clear() + # and finally return the intersection with our alphabet + return OneCharStringStrategy(intervals & self._alphabet.intervals) | ( + st.sampled_from(sorted(multi_chars)) if multi_chars else st.nothing() + ) + + def add_category(self, category): + """Update unicode state to match sre_parse object ``category``.""" + if category == sre.CATEGORY_DIGIT: + self._categories |= UNICODE_DIGIT_CATEGORIES + elif category == sre.CATEGORY_NOT_DIGIT: + self._categories |= UNICODE_CATEGORIES - UNICODE_DIGIT_CATEGORIES + elif category == sre.CATEGORY_SPACE: + self._categories |= UNICODE_SPACE_CATEGORIES + self._whitelist_chars |= UNICODE_SPACE_CHARS + elif category == sre.CATEGORY_NOT_SPACE: + self._categories |= UNICODE_CATEGORIES - UNICODE_SPACE_CATEGORIES + self._blacklist_chars |= UNICODE_SPACE_CHARS + elif category == sre.CATEGORY_WORD: + self._categories |= UNICODE_WORD_CATEGORIES + self._whitelist_chars.add("_") + elif category == sre.CATEGORY_NOT_WORD: + self._categories |= UNICODE_CATEGORIES - UNICODE_WORD_CATEGORIES + self._blacklist_chars.add("_") + else: + raise NotImplementedError(f"Unknown character category: {category}") + + def add_char(self, c): + """Add given char to the whitelist.""" + self._whitelist_chars.add(c) + if ( + self._ignorecase + and re.match(re.escape(c), c.swapcase(), flags=re.IGNORECASE) is not None + ): + # Note that it is possible that `len(c.swapcase()) > 1` + self._whitelist_chars.add(c.swapcase()) + + +class BytesBuilder(CharactersBuilder): + def __init__(self, *, negate=False, flags=0): + self._whitelist_chars = set() + self._blacklist_chars = set() + self._negate = negate + self._alphabet = None + self._ignorecase = flags & re.IGNORECASE + self.code_to_char = int_to_byte + + @property + def strategy(self): + """Returns resulting strategy that generates configured char set.""" + allowed = self._whitelist_chars + if self._negate: + allowed = BYTES_ALL - allowed + return st.sampled_from(sorted(allowed)) + + def add_category(self, category): + """Update characters state to match sre_parse object ``category``.""" + self._whitelist_chars |= BYTES_LOOKUP[category] + + +@st.composite +def maybe_pad(draw, regex, strategy, left_pad_strategy, right_pad_strategy): + """Attempt to insert padding around the result of a regex draw while + preserving the match.""" + result = draw(strategy) + left_pad = draw(left_pad_strategy) + if left_pad and regex.search(left_pad + result): + result = left_pad + result + right_pad = draw(right_pad_strategy) + if right_pad and regex.search(result + right_pad): + result += right_pad + return result + + +def base_regex_strategy(regex, parsed=None, alphabet=None): + if parsed is None: + parsed = sre_parse.parse(regex.pattern, flags=regex.flags) + try: + s = _strategy( + parsed, + context=Context(flags=regex.flags), + is_unicode=isinstance(regex.pattern, str), + alphabet=alphabet, + ) + except Exception as err: + add_note(err, f"{alphabet=} {regex=}") + raise + return clear_cache_after_draw(s) + + +def regex_strategy( + regex, fullmatch, *, alphabet, _temp_jsonschema_hack_no_end_newline=False +): + if not hasattr(regex, "pattern"): + regex = re.compile(regex) + + is_unicode = isinstance(regex.pattern, str) + + parsed = sre_parse.parse(regex.pattern, flags=regex.flags) + + if fullmatch: + if not parsed: + return st.just("" if is_unicode else b"") + return base_regex_strategy(regex, parsed, alphabet).filter(regex.fullmatch) + + if not parsed: + if is_unicode: + return st.text(alphabet=alphabet) + else: + return st.binary() + + if is_unicode: + base_padding_strategy = st.text(alphabet=alphabet) + empty = st.just("") + newline = st.just("\n") + else: + base_padding_strategy = st.binary() + empty = st.just(b"") + newline = st.just(b"\n") + + right_pad = base_padding_strategy + left_pad = base_padding_strategy + + if parsed[-1][0] == sre.AT: + if parsed[-1][1] == sre.AT_END_STRING: + right_pad = empty + elif parsed[-1][1] == sre.AT_END: + if regex.flags & re.MULTILINE: + right_pad = st.one_of( + empty, st.builds(operator.add, newline, right_pad) + ) + else: + right_pad = st.one_of(empty, newline) + + # This will be removed when a regex-syntax-translation library exists. + # It's a pretty nasty hack, but means that we can match the semantics + # of JSONschema's compatible subset of ECMA regex, which is important + # for hypothesis-jsonschema and Schemathesis. See e.g. + # https://github.com/schemathesis/schemathesis/issues/1241 + if _temp_jsonschema_hack_no_end_newline: + right_pad = empty + + if parsed[0][0] == sre.AT: + if parsed[0][1] == sre.AT_BEGINNING_STRING: + left_pad = empty + elif parsed[0][1] == sre.AT_BEGINNING: + if regex.flags & re.MULTILINE: + left_pad = st.one_of(empty, st.builds(operator.add, left_pad, newline)) + else: + left_pad = empty + + base = base_regex_strategy(regex, parsed, alphabet).filter(regex.search) + + return maybe_pad(regex, base, left_pad, right_pad) + + +def _strategy(codes, context, is_unicode, *, alphabet): + """Convert SRE regex parse tree to strategy that generates strings matching + that regex represented by that parse tree. + + `codes` is either a list of SRE regex elements representations or a + particular element representation. Each element is a tuple of element code + (as string) and parameters. E.g. regex 'ab[0-9]+' compiles to following + elements: + + [ + (LITERAL, 97), + (LITERAL, 98), + (MAX_REPEAT, (1, 4294967295, [ + (IN, [ + (RANGE, (48, 57)) + ]) + ])) + ] + + The function recursively traverses regex element tree and converts each + element to strategy that generates strings that match that element. + + Context stores + 1. List of groups (for backreferences) + 2. Active regex flags (e.g. IGNORECASE, DOTALL, UNICODE, they affect + behavior of various inner strategies) + """ + + def recurse(codes): + return _strategy(codes, context, is_unicode, alphabet=alphabet) + + if is_unicode: + empty = "" + to_char = chr + else: + empty = b"" + to_char = int_to_byte + binary_char = st.binary(min_size=1, max_size=1) + + if not isinstance(codes, tuple): + # List of codes + strategies = [] + + i = 0 + while i < len(codes): + if codes[i][0] == sre.LITERAL and not context.flags & re.IGNORECASE: + # Merge subsequent "literals" into one `just()` strategy + # that generates corresponding text if no IGNORECASE + j = i + 1 + while j < len(codes) and codes[j][0] == sre.LITERAL: + j += 1 + + if i + 1 < j: + chars = empty.join(to_char(charcode) for _, charcode in codes[i:j]) + if invalid := chars_not_in_alphabet(alphabet, chars): + raise IncompatibleWithAlphabet( + f"Literal {chars!r} contains characters {invalid!r} " + f"which are not in the specified alphabet" + ) + strategies.append(st.just(chars)) + i = j + continue + + strategies.append(recurse(codes[i])) + i += 1 + + # We handle this separately at the top level, but some regex can + # contain empty lists internally, so we need to handle this here too. + if not strategies: + return st.just(empty) + + if len(strategies) == 1: + return strategies[0] + return st.tuples(*strategies).map(empty.join) + else: + # Single code + code, value = codes + if code == sre.LITERAL: + # Regex 'a' (single char) + c = to_char(value) + if chars_not_in_alphabet(alphabet, c): + raise IncompatibleWithAlphabet( + f"Literal {c!r} is not in the specified alphabet" + ) + if ( + context.flags & re.IGNORECASE + and c != c.swapcase() + and re.match(re.escape(c), c.swapcase(), re.IGNORECASE) is not None + and not chars_not_in_alphabet(alphabet, c.swapcase()) + ): + # We do the explicit check for swapped-case matching because + # eg 'ß'.upper() == 'SS' and ignorecase doesn't match it. + return st.sampled_from([c, c.swapcase()]) + return st.just(c) + + elif code == sre.NOT_LITERAL: + # Regex '[^a]' (negation of a single char) + c = to_char(value) + blacklist = {c} + if ( + context.flags & re.IGNORECASE + and re.match(re.escape(c), c.swapcase(), re.IGNORECASE) is not None + ): + # There are a few cases where .swapcase() returns two characters, + # but is still a case-insensitive match. In such cases we add *both* + # characters to our blacklist, to avoid doing the wrong thing for + # patterns such as r"[^\u0130]+" where "i\u0307" matches. + # + # (that's respectively 'Latin letter capital I with dot above' and + # 'latin latter i' + 'combining dot above'; see issue #2657) + # + # As a final additional wrinkle, "latin letter capital I" *also* + # case-insensitive-matches, with or without combining dot character. + # We therefore have to chain .swapcase() calls until a fixpoint. + stack = [c.swapcase()] + while stack: + for char in stack.pop(): + blacklist.add(char) + stack.extend(set(char.swapcase()) - blacklist) + + if is_unicode: + return OneCharStringStrategy( + unwrap_strategies(alphabet).intervals + & charmap.query(exclude_characters=blacklist) + ) + else: + return binary_char.filter(lambda c: c not in blacklist) + + elif code == sre.IN: + # Regex '[abc0-9]' (set of characters) + negate = value[0][0] == sre.NEGATE + if is_unicode: + builder = CharactersBuilder( + flags=context.flags, negate=negate, alphabet=alphabet + ) + else: + builder = BytesBuilder(flags=context.flags, negate=negate) + + for charset_code, charset_value in value: + if charset_code == sre.NEGATE: + # Regex '[^...]' (negation) + # handled by builder = CharactersBuilder(...) above + pass + elif charset_code == sre.LITERAL: + # Regex '[a]' (single char) + c = builder.code_to_char(charset_value) + if chars_not_in_alphabet(builder._alphabet, c): + raise IncompatibleWithAlphabet( + f"Literal {c!r} is not in the specified alphabet" + ) + builder.add_char(c) + elif charset_code == sre.RANGE: + # Regex '[a-z]' (char range) + low, high = charset_value + chars = empty.join(map(builder.code_to_char, range(low, high + 1))) + if len(chars) == len( + invalid := set(chars_not_in_alphabet(alphabet, chars)) + ): + raise IncompatibleWithAlphabet( + f"Charset '[{chr(low)}-{chr(high)}]' contains characters {invalid!r} " + f"which are not in the specified alphabet" + ) + for c in chars: + if isinstance(c, int): + c = int_to_byte(c) + if c not in invalid: + builder.add_char(c) + elif charset_code == sre.CATEGORY: + # Regex '[\w]' (char category) + builder.add_category(charset_value) + else: + # Currently there are no known code points other than + # handled here. This code is just future proofing + raise NotImplementedError(f"Unknown charset code: {charset_code}") + return builder.strategy + + elif code == sre.ANY: + # Regex '.' (any char) + if is_unicode: + assert alphabet is not None + if context.flags & re.DOTALL: + return alphabet + return OneCharStringStrategy( + unwrap_strategies(alphabet).intervals + & charmap.query(exclude_characters="\n") + ) + else: + if context.flags & re.DOTALL: + return binary_char + return binary_char.filter(lambda c: c != b"\n") + + elif code == sre.AT: + # Regexes like '^...', '...$', '\bfoo', '\Bfoo' + # An empty string (or newline) will match the token itself, but + # we don't and can't check the position (eg '%' at the end) + return st.just(empty) + + elif code == sre.SUBPATTERN: + # Various groups: '(...)', '(:...)' or '(?P...)' + old_flags = context.flags + context.flags = (context.flags | value[1]) & ~value[2] + + strat = _strategy(value[-1], context, is_unicode, alphabet=alphabet) + + context.flags = old_flags + + if value[0]: + strat = update_group(value[0], strat) + + return strat + + elif code == sre.GROUPREF: + # Regex '\\1' or '(?P=name)' (group reference) + return reuse_group(value) + + elif code == sre.ASSERT: + # Regex '(?=...)' or '(?<=...)' (positive lookahead/lookbehind) + return recurse(value[1]) + + elif code == sre.ASSERT_NOT: + # Regex '(?!...)' or '(? str: + if self.key is not None: + return f"shared({self.base!r}, key={self.key!r})" + else: + return f"shared({self.base!r})" + + def calc_label(self) -> int: + return self.base.calc_label() + + # Ideally would be -> Ex, but key collisions with different-typed values are + # possible. See https://github.com/HypothesisWorks/hypothesis/issues/4301. + def do_draw(self, data: ConjectureData) -> Any: + key = self.key or self + if key not in data._shared_strategy_draws: + drawn = data.draw(self.base) + data._shared_strategy_draws[key] = (drawn, self) + else: + drawn, other = data._shared_strategy_draws[key] + + # Check that the strategies shared under this key are equivalent + if self.label != other.label: + warnings.warn( + f"Different strategies are shared under {key=}. This" + " risks drawing values that are not valid examples for the strategy," + " or that have a narrower range than expected." + f" Conflicting strategies: ({self!r}, {other!r}).", + HypothesisWarning, + stacklevel=1, + ) + return drawn diff --git a/vendored/hypothesis/strategies/_internal/strategies.py b/vendored/hypothesis/strategies/_internal/strategies.py new file mode 100644 index 0000000..0fd21ea --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/strategies.py @@ -0,0 +1,1230 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import sys +import threading +import warnings +from collections import abc, defaultdict +from collections.abc import Callable, Sequence +from functools import lru_cache +from random import shuffle +from threading import RLock +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Generic, + Literal, + TypeAlias, + TypeVar, + cast, + overload, +) + +from hypothesis._settings import HealthCheck, Phase, Verbosity, settings +from hypothesis.control import _current_build_context, current_build_context +from hypothesis.errors import ( + HypothesisException, + HypothesisWarning, + InvalidArgument, + NonInteractiveExampleWarning, + UnsatisfiedAssumption, +) +from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.utils import ( + calc_label_from_cls, + calc_label_from_hash, + calc_label_from_name, + combine_labels, +) +from hypothesis.internal.coverage import check_function +from hypothesis.internal.reflection import ( + get_pretty_function_description, + is_identity_function, +) +from hypothesis.strategies._internal.utils import defines_strategy +from hypothesis.utils.conventions import UniqueIdentifier + +if TYPE_CHECKING: + Ex = TypeVar("Ex", covariant=True, default=Any) +else: + Ex = TypeVar("Ex", covariant=True) + +T = TypeVar("T") +T3 = TypeVar("T3") +T4 = TypeVar("T4") +T5 = TypeVar("T5") +MappedFrom = TypeVar("MappedFrom") +MappedTo = TypeVar("MappedTo") +RecurT: TypeAlias = Callable[["SearchStrategy"], bool] +calculating = UniqueIdentifier("calculating") + +MAPPED_SEARCH_STRATEGY_DO_DRAW_LABEL = calc_label_from_name( + "another attempted draw in MappedStrategy" +) + +FILTERED_SEARCH_STRATEGY_DO_DRAW_LABEL = calc_label_from_name( + "single loop iteration in FilteredStrategy" +) + +label_lock = RLock() + + +def recursive_property(strategy: "SearchStrategy", name: str, default: object) -> Any: + """Handle properties which may be mutually recursive among a set of + strategies. + + These are essentially lazily cached properties, with the ability to set + an override: If the property has not been explicitly set, we calculate + it on first access and memoize the result for later. + + The problem is that for properties that depend on each other, a naive + calculation strategy may hit infinite recursion. Consider for example + the property is_empty. A strategy defined as x = st.deferred(lambda: x) + is certainly empty (in order to draw a value from x we would have to + draw a value from x, for which we would have to draw a value from x, + ...), but in order to calculate it the naive approach would end up + calling x.is_empty in order to calculate x.is_empty in order to etc. + + The solution is one of fixed point calculation. We start with a default + value that is the value of the property in the absence of evidence to + the contrary, and then update the values of the property for all + dependent strategies until we reach a fixed point. + + The approach taken roughly follows that in section 4.2 of Adams, + Michael D., Celeste Hollenbeck, and Matthew Might. "On the complexity + and performance of parsing with derivatives." ACM SIGPLAN Notices 51.6 + (2016): 224-236. + """ + assert name in {"is_empty", "has_reusable_values", "is_cacheable"} + cache_key = "cached_" + name + calculation = "calc_" + name + force_key = "force_" + name + + def forced_value(target: SearchStrategy) -> Any: + try: + return getattr(target, force_key) + except AttributeError: + return getattr(target, cache_key) + + try: + return forced_value(strategy) + except AttributeError: + pass + + mapping: dict[SearchStrategy, Any] = {} + sentinel = object() + hit_recursion = False + + # For a first pass we do a direct recursive calculation of the + # property, but we block recursively visiting a value in the + # computation of its property: When that happens, we simply + # note that it happened and return the default value. + def recur(strat: SearchStrategy) -> Any: + nonlocal hit_recursion + try: + return forced_value(strat) + except AttributeError: + pass + result = mapping.get(strat, sentinel) + if result is calculating: + hit_recursion = True + return default + elif result is sentinel: + mapping[strat] = calculating + mapping[strat] = getattr(strat, calculation)(recur) + return mapping[strat] + return result + + recur(strategy) + + # If we hit self-recursion in the computation of any strategy + # value, our mapping at the end is imprecise - it may or may + # not have the right values in it. We now need to proceed with + # a more careful fixed point calculation to get the exact + # values. Hopefully our mapping is still pretty good and it + # won't take a large number of updates to reach a fixed point. + if hit_recursion: + needs_update = set(mapping) + + # We track which strategies use which in the course of + # calculating their property value. If A ever uses B in + # the course of calculating its value, then whenever the + # value of B changes we might need to update the value of + # A. + listeners: dict[SearchStrategy, set[SearchStrategy]] = defaultdict(set) + else: + needs_update = None + + def recur2(strat: SearchStrategy) -> Any: + def recur_inner(other: SearchStrategy) -> Any: + try: + return forced_value(other) + except AttributeError: + pass + listeners[other].add(strat) + result = mapping.get(other, sentinel) + if result is sentinel: + assert needs_update is not None + needs_update.add(other) + mapping[other] = default + return default + return result + + return recur_inner + + count = 0 + seen = set() + while needs_update: + count += 1 + # If we seem to be taking a really long time to stabilize we + # start tracking seen values to attempt to detect an infinite + # loop. This should be impossible, and most code will never + # hit the count, but having an assertion for it means that + # testing is easier to debug and we don't just have a hung + # test. + # Note: This is actually covered, by test_very_deep_deferral + # in tests/cover/test_deferred_strategies.py. Unfortunately it + # runs into a coverage bug. See + # https://github.com/nedbat/coveragepy/issues/605 + # for details. + if count > 50: # pragma: no cover + key = frozenset(mapping.items()) + assert key not in seen, (key, name) + seen.add(key) + to_update = needs_update + needs_update = set() + for strat in to_update: + new_value = getattr(strat, calculation)(recur2(strat)) + if new_value != mapping[strat]: + needs_update.update(listeners[strat]) + mapping[strat] = new_value + + # We now have a complete and accurate calculation of the + # property values for everything we have seen in the course of + # running this calculation. We simultaneously update all of + # them (not just the strategy we started out with). + for k, v in mapping.items(): + setattr(k, cache_key, v) + return getattr(strategy, cache_key) + + +class SearchStrategy(Generic[Ex]): + """A ``SearchStrategy`` tells Hypothesis how to generate that kind of input. + + This class is only part of the public API for use in type annotations, so that + you can write e.g. ``-> SearchStrategy[Foo]`` for your function which returns + ``builds(Foo, ...)``. Do not inherit from or directly instantiate this class. + """ + + __module__: str = "hypothesis.strategies" + LABELS: ClassVar[dict[type, int]] = {} + # triggers `assert isinstance(label, int)` under threading when setting this + # in init instead of a classvar. I'm not sure why, init should be safe. But + # this works so I'm not looking into it further atm. + __label: int | UniqueIdentifier | None = None + + def __init__(self): + self.validate_called: dict[int, bool] = {} + + def is_currently_empty(self, data: ConjectureData) -> bool: + """ + Returns whether this strategy is currently empty. Unlike ``empty``, + which is computed based on static information and cannot change, + ``is_currently_empty`` may change over time based on choices made + during the test case. + + This is currently only used for stateful testing, where |Bundle| grows a + list of values to choose from over the course of a test case. + + ``data`` will only be used for introspection. No values will be drawn + from it in a way that modifies the choice sequence. + """ + return self.is_empty + + @property + def is_empty(self) -> Any: + # Returns True if this strategy can never draw a value and will always + # result in the data being marked invalid. + # The fact that this returns False does not guarantee that a valid value + # can be drawn - this is not intended to be perfect, and is primarily + # intended to be an optimisation for some cases. + return recursive_property(self, "is_empty", True) + + # Returns True if values from this strategy can safely be reused without + # this causing unexpected behaviour. + + # True if values from this strategy can be implicitly reused (e.g. as + # background values in a numpy array) without causing surprising + # user-visible behaviour. Should be false for built-in strategies that + # produce mutable values, and for strategies that have been mapped/filtered + # by arbitrary user-provided functions. + @property + def has_reusable_values(self) -> Any: + return recursive_property(self, "has_reusable_values", True) + + @property + def is_cacheable(self) -> Any: + """ + Whether it is safe to hold on to instances of this strategy in a cache. + See _STRATEGY_CACHE. + """ + return recursive_property(self, "is_cacheable", True) + + def calc_is_cacheable(self, recur: RecurT) -> bool: + return True + + def calc_is_empty(self, recur: RecurT) -> bool: + # Note: It is correct and significant that the default return value + # from calc_is_empty is False despite the default value for is_empty + # being true. The reason for this is that strategies should be treated + # as empty absent evidence to the contrary, but most basic strategies + # are trivially non-empty and it would be annoying to have to override + # this method to show that. + return False + + def calc_has_reusable_values(self, recur: RecurT) -> bool: + return False + + def example(self) -> Ex: # FIXME + """Provide an example of the sort of value that this strategy generates. + + This method is designed for use in a REPL, and will raise an error if + called from inside |@given| or a strategy definition. For serious use, + see |@composite| or |st.data|. + """ + if getattr(sys, "ps1", None) is None: # pragma: no branch + # The other branch *is* covered in cover/test_examples.py; but as that + # uses `pexpect` for an interactive session `coverage` doesn't see it. + warnings.warn( + "The `.example()` method is good for exploring strategies, but should " + "only be used interactively. We recommend using `@given` for tests - " + "it performs better, saves and replays failures to avoid flakiness, " + f"and reports minimal examples. (strategy: {self!r})", + NonInteractiveExampleWarning, + stacklevel=2, + ) + + context = _current_build_context.value + if context is not None: + if context.data is not None and context.data.depth > 0: + raise HypothesisException( + "Using example() inside a strategy definition is a bad " + "idea. Instead consider using hypothesis.strategies.builds() " + "or @hypothesis.strategies.composite to define your strategy." + " See https://hypothesis.readthedocs.io/en/latest/data.html" + "#hypothesis.strategies.builds or " + "https://hypothesis.readthedocs.io/en/latest/data.html" + "#composite-strategies for more details." + ) + else: + raise HypothesisException( + "Using example() inside a test function is a bad " + "idea. Instead consider using hypothesis.strategies.data() " + "to draw more examples during testing. See " + "https://hypothesis.readthedocs.io/en/latest/data.html" + "#drawing-interactively-in-tests for more details." + ) + + try: + return self.__examples.pop() + except (AttributeError, IndexError): + self.__examples: list[Ex] = [] + + from hypothesis.core import given + + # Note: this function has a weird name because it might appear in + # tracebacks, and we want users to know that they can ignore it. + @given(self) + @settings( + database=None, + # generate only a few examples at a time to avoid slow interactivity + # for large strategies. The overhead of @given is very small relative + # to generation, so a small batch size is fine. + max_examples=10, + deadline=None, + verbosity=Verbosity.quiet, + phases=(Phase.generate,), + suppress_health_check=list(HealthCheck), + ) + def example_generating_inner_function( + ex: Ex, # type: ignore # mypy is overzealous in preventing covariant params + ) -> None: + self.__examples.append(ex) + + example_generating_inner_function() + shuffle(self.__examples) + return self.__examples.pop() + + def map(self, pack: Callable[[Ex], T]) -> "SearchStrategy[T]": + """Returns a new strategy which generates a value from this one, and + then returns ``pack(value)``. For example, ``integers().map(str)`` + could generate ``str(5)`` == ``"5"``. + """ + if is_identity_function(pack): + return self # type: ignore # Mypy has no way to know that `Ex == T` + return MappedStrategy(self, pack=pack) + + def flatmap( + self, expand: Callable[[Ex], "SearchStrategy[T]"] + ) -> "SearchStrategy[T]": # FIXME + """Old syntax for a special case of |@composite|: + + .. code-block:: python + + @st.composite + def flatmap_like(draw, base_strategy, expand): + value = draw(base_strategy) + new_strategy = expand(value) + return draw(new_strategy) + + We find that the greater readability of |@composite| usually outweighs + the verbosity, with a few exceptions for simple cases or recipes like + ``from_type(type).flatmap(from_type)`` ("pick a type, get a strategy for + any instance of that type, and then generate one of those"). + """ + from hypothesis.strategies._internal.flatmapped import FlatMapStrategy + + return FlatMapStrategy(self, expand=expand) + + # Note that we previously had condition extracted to a type alias as + # PredicateT. However, that was only useful when not specifying a relationship + # between the generic Ts and some other function param / return value. + # If we do want to - like here, where we want to say that the Ex arg to condition + # is of the same type as the strategy's Ex - then you need to write out the + # entire Callable[[Ex], Any] expression rather than use a type alias. + # TypeAlias is *not* simply a macro that inserts the text. TypeAlias will not + # reference the local TypeVar context. + def filter(self, condition: Callable[[Ex], Any]) -> "SearchStrategy[Ex]": + """Returns a new strategy that generates values from this strategy + which satisfy the provided condition. + + Note that if the condition is too hard to satisfy this might result + in your tests failing with an Unsatisfiable exception. + A basic version of the filtering logic would look something like: + + .. code-block:: python + + @st.composite + def filter_like(draw, strategy, condition): + for _ in range(3): + value = draw(strategy) + if condition(value): + return value + assume(False) + """ + return FilteredStrategy(self, conditions=(condition,)) + + @property + def branches(self) -> Sequence["SearchStrategy[Ex]"]: + return [self] + + def __or__(self, other: "SearchStrategy[T]") -> "SearchStrategy[Ex | T]": + """Return a strategy which produces values by randomly drawing from one + of this strategy or the other strategy. + + This method is part of the public API. + """ + if not isinstance(other, SearchStrategy): + raise ValueError(f"Cannot | a SearchStrategy with {other!r}") + + # Unwrap explicitly or'd strategies. This turns the + # common case of e.g. st.integers() | st.integers() | st.integers() from + # + # one_of(one_of(integers(), integers()), integers()) + # + # into + # + # one_of(integers(), integers(), integers()) + # + # This is purely an aesthetic unwrapping, for e.g. reprs. In practice + # we use .branches / .element_strategies to get the list of possible + # strategies, so this unwrapping is *not* necessary for correctness. + strategies: list[SearchStrategy] = [] + strategies.extend( + self.original_strategies if isinstance(self, OneOfStrategy) else [self] + ) + strategies.extend( + other.original_strategies if isinstance(other, OneOfStrategy) else [other] + ) + return OneOfStrategy(strategies) + + def __bool__(self) -> bool: + warnings.warn( + f"bool({self!r}) is always True, did you mean to draw a value?", + HypothesisWarning, + stacklevel=2, + ) + return True + + def validate(self) -> None: + """Throw an exception if the strategy is not valid. + + Strategies should implement ``do_validate``, which is called by this + method. They should not override ``validate``. + + This can happen due to invalid arguments, or lazy construction. + """ + thread_id = threading.get_ident() + if self.validate_called.get(thread_id, False): + return + # we need to set validate_called before calling do_validate, for + # recursive / deferred strategies. But if a thread switches after + # validate_called but before do_validate, we might have a strategy + # which does weird things like drawing when do_validate would error but + # its params are technically valid (e.g. a param was passed as 1.0 + # instead of 1) and get into weird internal states. + # + # There are two ways to fix this. + # (1) The first is a per-strategy lock around do_validate. Even though we + # expect near-zero lock contention, this still adds the lock overhead. + # (2) The second is allowing concurrent .validate calls. Since validation + # is (assumed to be) deterministic, both threads will produce the same + # end state, so the validation order or race conditions does not matter. + # + # In order to avoid the lock overhead of (1), we use (2) here. See also + # discussion in https://github.com/HypothesisWorks/hypothesis/pull/4473. + try: + self.validate_called[thread_id] = True + self.do_validate() + self.is_empty + self.has_reusable_values + except Exception: + self.validate_called[thread_id] = False + raise + + @property + def class_label(self) -> int: + cls = self.__class__ + try: + return cls.LABELS[cls] + except KeyError: + pass + result = calc_label_from_cls(cls) + cls.LABELS[cls] = result + return result + + @property + def label(self) -> int: + if isinstance((label := self.__label), int): + # avoid locking if we've already completely computed the label. + return label + + with label_lock: + if self.__label is calculating: + return 0 + self.__label = calculating + self.__label = self.calc_label() + return self.__label + + def calc_label(self) -> int: + return self.class_label + + def do_validate(self) -> None: + pass + + def do_draw(self, data: ConjectureData) -> Ex: + raise NotImplementedError(f"{type(self).__name__}.do_draw") + + +def _is_hashable(value: object) -> tuple[bool, int | None]: + # hashing can be expensive; return the hash value if we compute it, so that + # callers don't have to recompute. + try: + return (True, hash(value)) + except TypeError: + return (False, None) + + +def is_hashable(value: object) -> bool: + return _is_hashable(value)[0] + + +class SampledFromStrategy(SearchStrategy[Ex]): + """A strategy which samples from a set of elements. This is essentially + equivalent to using a OneOfStrategy over Just strategies but may be more + efficient and convenient. + """ + + _MAX_FILTER_CALLS: ClassVar[int] = 10_000 + + def __init__( + self, + elements: Sequence[Ex], + *, + force_repr: str | None = None, + force_repr_braces: tuple[str, str] | None = None, + transformations: tuple[ + tuple[Literal["filter", "map"], Callable[[Ex], Any]], + ..., + ] = (), + ): + super().__init__() + self.elements = cu.check_sample(elements, "sampled_from") + assert self.elements + self.force_repr = force_repr + self.force_repr_braces = force_repr_braces + self._transformations = transformations + + self._cached_repr: str | None = None + + def map(self, pack: Callable[[Ex], T]) -> SearchStrategy[T]: + s = type(self)( + self.elements, + force_repr=self.force_repr, + force_repr_braces=self.force_repr_braces, + transformations=(*self._transformations, ("map", pack)), + ) + # guaranteed by the ("map", pack) transformation + return cast(SearchStrategy[T], s) + + def filter(self, condition: Callable[[Ex], Any]) -> SearchStrategy[Ex]: + return type(self)( + self.elements, + force_repr=self.force_repr, + force_repr_braces=self.force_repr_braces, + transformations=(*self._transformations, ("filter", condition)), + ) + + def __repr__(self): + if self._cached_repr is None: + rep = get_pretty_function_description + elements_s = ( + ", ".join(rep(v) for v in self.elements[:512]) + ", ..." + if len(self.elements) > 512 + else ", ".join(rep(v) for v in self.elements) + ) + braces = self.force_repr_braces or ("(", ")") + instance_s = ( + self.force_repr or f"sampled_from({braces[0]}{elements_s}{braces[1]})" + ) + transforms_s = "".join( + f".{name}({get_pretty_function_description(f)})" + for name, f in self._transformations + ) + repr_s = instance_s + transforms_s + self._cached_repr = repr_s + return self._cached_repr + + def calc_label(self) -> int: + # strategy.label is effectively an under-approximation of structural + # equality (i.e., some strategies may have the same label when they are not + # structurally identical). More importantly for calculating the + # SampledFromStrategy label, we might have hash(s1) != hash(s2) even + # when s1 and s2 are structurally identical. For instance: + # + # s1 = st.sampled_from([st.none()]) + # s2 = st.sampled_from([st.none()]) + # assert hash(s1) != hash(s2) + # + # (see also test cases in test_labels.py). + # + # We therefore use the labels of any component strategies when calculating + # our label, and only use the hash if it is not a strategy. + # + # That's the ideal, anyway. In reality the logic is more complicated than + # necessary in order to be efficient in the presence of (very) large sequences: + # * add an unabashed special case for range, to avoid iteration over an + # enormous range when we know it is entirely integers. + # * if there is at least one strategy in self.elements, use strategy label, + # and the element hash otherwise. + # * if there are no strategies in self.elements, take the hash of the + # entire sequence. This prevents worst-case performance of hashing each + # element when a hash of the entire sequence would have sufficed. + # + # The worst case performance of this scheme is + # itertools.chain(range(2**100), [st.none()]), where it degrades to + # hashing every int in the range. + (elements_is_hashable, hash_value) = _is_hashable(self.elements) + if isinstance(self.elements, range) or ( + elements_is_hashable + and not any(isinstance(e, SearchStrategy) for e in self.elements) + ): + return combine_labels( + self.class_label, calc_label_from_name(str(hash_value)) + ) + + labels = [self.class_label] + for element in self.elements: + if not is_hashable(element): + continue + + labels.append( + element.label + if isinstance(element, SearchStrategy) + else calc_label_from_hash(element) + ) + + return combine_labels(*labels) + + def calc_has_reusable_values(self, recur: RecurT) -> bool: + # Because our custom .map/.filter implementations skip the normal + # wrapper strategies (which would automatically return False for us), + # we need to manually return False here if any transformations have + # been applied. + return not self._transformations + + def calc_is_cacheable(self, recur: RecurT) -> bool: + return is_hashable(self.elements) + + def _transform( + self, + # https://github.com/python/mypy/issues/7049, we're not writing `element` + # anywhere in the class so this is still type-safe. mypy is being more + # conservative than necessary + element: Ex, # type: ignore + ) -> Ex | UniqueIdentifier: + # Used in UniqueSampledListStrategy + for name, f in self._transformations: + if name == "map": + result = f(element) + if build_context := _current_build_context.value: + build_context.record_call(result, f, args=[element], kwargs={}) + element = result + else: + assert name == "filter" + if not f(element): + return filter_not_satisfied + return element + + def do_draw(self, data: ConjectureData) -> Ex: + result = self.do_filtered_draw(data) + if isinstance(result, SearchStrategy) and all( + isinstance(x, SearchStrategy) for x in self.elements + ): + data._sampled_from_all_strategies_elements_message = ( + "sampled_from was given a collection of strategies: " + "{!r}. Was one_of intended?", + self.elements, + ) + if result is filter_not_satisfied: + data.mark_invalid(f"Aborted test because unable to satisfy {self!r}") + assert not isinstance(result, UniqueIdentifier) + return result + + def get_element(self, i: int) -> Ex | UniqueIdentifier: + return self._transform(self.elements[i]) + + def do_filtered_draw(self, data: ConjectureData) -> Ex | UniqueIdentifier: + # Set of indices that have been tried so far, so that we never test + # the same element twice during a draw. + known_bad_indices: set[int] = set() + + # Start with ordinary rejection sampling. It's fast if it works, and + # if it doesn't work then it was only a small amount of overhead. + for _ in range(3): + i = data.draw_integer(0, len(self.elements) - 1) + if i not in known_bad_indices: + element = self.get_element(i) + if element is not filter_not_satisfied: + return element + if not known_bad_indices: + data.events[f"Retried draw from {self!r} to satisfy filter"] = "" + known_bad_indices.add(i) + + # If we've tried all the possible elements, give up now. + max_good_indices = len(self.elements) - len(known_bad_indices) + if not max_good_indices: + return filter_not_satisfied + + # Impose an arbitrary cutoff to prevent us from wasting too much time + # on very large element lists. + max_good_indices = min(max_good_indices, self._MAX_FILTER_CALLS - 3) + + # Before building the list of allowed indices, speculatively choose + # one of them. We don't yet know how many allowed indices there will be, + # so this choice might be out-of-bounds, but that's OK. + speculative_index = data.draw_integer(0, max_good_indices - 1) + + # Calculate the indices of allowed values, so that we can choose one + # of them at random. But if we encounter the speculatively-chosen one, + # just use that and return immediately. Note that we also track the + # allowed elements, in case of .map(some_stateful_function) + allowed: list[tuple[int, Ex]] = [] + for i in range(min(len(self.elements), self._MAX_FILTER_CALLS - 3)): + if i not in known_bad_indices: + element = self.get_element(i) + if element is not filter_not_satisfied: + assert not isinstance(element, UniqueIdentifier) + allowed.append((i, element)) + if len(allowed) > speculative_index: + # Early-exit case: We reached the speculative index, so + # we just return the corresponding element. + data.draw_integer(0, len(self.elements) - 1, forced=i) + return element + + # The speculative index didn't work out, but at this point we've built + # and can choose from the complete list of allowed indices and elements. + if allowed: + i, element = data.choice(allowed) + data.draw_integer(0, len(self.elements) - 1, forced=i) + return element + # If there are no allowed indices, the filter couldn't be satisfied. + return filter_not_satisfied + + +class OneOfStrategy(SearchStrategy[Ex]): + """Implements a union of strategies. Given a number of strategies this + generates values which could have come from any of them. + + The conditional distribution draws uniformly at random from some + non-empty subset of these strategies and then draws from the + conditional distribution of that strategy. + """ + + def __init__(self, strategies: Sequence[SearchStrategy[Ex]]): + super().__init__() + self.original_strategies = tuple(strategies) + self.__element_strategies: Sequence[SearchStrategy[Ex]] | None = None + self.__in_branches = False + self._branches_lock = RLock() + + def calc_is_empty(self, recur: RecurT) -> bool: + return all(recur(e) for e in self.original_strategies) + + def calc_has_reusable_values(self, recur: RecurT) -> bool: + return all(recur(e) for e in self.original_strategies) + + def calc_is_cacheable(self, recur: RecurT) -> bool: + return all(recur(e) for e in self.original_strategies) + + @property + def element_strategies(self) -> Sequence[SearchStrategy[Ex]]: + if self.__element_strategies is None: + # While strategies are hashable, they use object.__hash__ and are + # therefore distinguished only by identity. + # + # In principle we could "just" define a __hash__ method + # (and __eq__, but that's easy in terms of type() and hash()) + # to make this more powerful, but this is harder than it sounds: + # + # 1. Strategies are often distinguished by non-hashable attributes, + # or by attributes that have the same hash value ("^.+" / b"^.+"). + # 2. LazyStrategy: can't reify the wrapped strategy without breaking + # laziness, so there's a hash each for the lazy and the nonlazy. + # + # Having made several attempts, the minor benefits of making strategies + # hashable are simply not worth the engineering effort it would take. + # See also issues #2291 and #2327. + seen: set[SearchStrategy] = {self} + strategies: list[SearchStrategy] = [] + for arg in self.original_strategies: + check_strategy(arg) + if not arg.is_empty: + for s in arg.branches: + if s not in seen and not s.is_empty: + seen.add(s) + strategies.append(s) + self.__element_strategies = strategies + return self.__element_strategies + + def calc_label(self) -> int: + return combine_labels( + self.class_label, *(p.label for p in self.original_strategies) + ) + + def do_draw(self, data: ConjectureData) -> Ex: + strategy = data.draw( + SampledFromStrategy(self.element_strategies).filter( + lambda s: not s.is_currently_empty(data) + ) + ) + return data.draw(strategy) + + def __repr__(self) -> str: + return "one_of({})".format(", ".join(map(repr, self.original_strategies))) + + def do_validate(self) -> None: + for e in self.element_strategies: + e.validate() + + @property + def branches(self) -> Sequence[SearchStrategy[Ex]]: + if self.__element_strategies is not None: + # common fast path which avoids the lock + return self.element_strategies + + with self._branches_lock: + if not self.__in_branches: + try: + self.__in_branches = True + return self.element_strategies + finally: + self.__in_branches = False + else: + return [self] + + def filter(self, condition: Callable[[Ex], Any]) -> SearchStrategy[Ex]: + return FilteredStrategy( + OneOfStrategy([s.filter(condition) for s in self.original_strategies]), + conditions=(), + ) + + +@overload +def one_of( + __args: Sequence[SearchStrategy[Ex]], +) -> SearchStrategy[Ex]: # pragma: no cover + ... + + +@overload +def one_of(__a1: SearchStrategy[Ex]) -> SearchStrategy[Ex]: # pragma: no cover + ... + + +@overload +def one_of( + __a1: SearchStrategy[Ex], __a2: SearchStrategy[T] +) -> SearchStrategy[Ex | T]: # pragma: no cover + ... + + +@overload +def one_of( + __a1: SearchStrategy[Ex], __a2: SearchStrategy[T], __a3: SearchStrategy[T3] +) -> SearchStrategy[Ex | T | T3]: # pragma: no cover + ... + + +@overload +def one_of( + __a1: SearchStrategy[Ex], + __a2: SearchStrategy[T], + __a3: SearchStrategy[T3], + __a4: SearchStrategy[T4], +) -> SearchStrategy[Ex | T | T3 | T4]: # pragma: no cover + ... + + +@overload +def one_of( + __a1: SearchStrategy[Ex], + __a2: SearchStrategy[T], + __a3: SearchStrategy[T3], + __a4: SearchStrategy[T4], + __a5: SearchStrategy[T5], +) -> SearchStrategy[Ex | T | T3 | T4 | T5]: # pragma: no cover + ... + + +@overload +def one_of(*args: SearchStrategy[Any]) -> SearchStrategy[Any]: # pragma: no cover + ... + + +@defines_strategy(eager=True) +def one_of( + *args: Sequence[SearchStrategy[Any]] | SearchStrategy[Any], +) -> SearchStrategy[Any]: + # Mypy workaround alert: Any is too loose above; the return parameter + # should be the union of the input parameters. Unfortunately, Mypy <=0.600 + # raises errors due to incompatible inputs instead. See #1270 for links. + # v0.610 doesn't error; it gets inference wrong for 2+ arguments instead. + """Return a strategy which generates values from any of the argument + strategies. + + This may be called with one iterable argument instead of multiple + strategy arguments, in which case ``one_of(x)`` and ``one_of(*x)`` are + equivalent. + + Examples from this strategy will generally shrink to ones that come from + strategies earlier in the list, then shrink according to behaviour of the + strategy that produced them. In order to get good shrinking behaviour, + try to put simpler strategies first. e.g. ``one_of(none(), text())`` is + better than ``one_of(text(), none())``. + + This is especially important when using recursive strategies. e.g. + ``x = st.deferred(lambda: st.none() | st.tuples(x, x))`` will shrink well, + but ``x = st.deferred(lambda: st.tuples(x, x) | st.none())`` will shrink + very badly indeed. + """ + if len(args) == 1 and not isinstance(args[0], SearchStrategy): + try: + args = tuple(args[0]) + except TypeError: + pass + if len(args) == 1 and isinstance(args[0], SearchStrategy): + # This special-case means that we can one_of over lists of any size + # without incurring any performance overhead when there is only one + # strategy, and keeps our reprs simple. + return args[0] + if args and not any(isinstance(a, SearchStrategy) for a in args): + # And this special case is to give a more-specific error message if it + # seems that the user has confused `one_of()` for `sampled_from()`; + # the remaining validation is left to OneOfStrategy. See PR #2627. + raise InvalidArgument( + f"Did you mean st.sampled_from({list(args)!r})? st.one_of() is used " + "to combine strategies, but all of the arguments were of other types." + ) + # we've handled the case where args is a one-element sequence [(s1, s2, ...)] + # above, so we can assume it's an actual sequence of strategies. + args = cast(Sequence[SearchStrategy], args) + return OneOfStrategy(args) + + +class MappedStrategy(SearchStrategy[MappedTo], Generic[MappedFrom, MappedTo]): + """A strategy which is defined purely by conversion to and from another + strategy. + + Its parameter and distribution come from that other strategy. + """ + + def __init__( + self, + strategy: SearchStrategy[MappedFrom], + pack: Callable[[MappedFrom], MappedTo], + ) -> None: + super().__init__() + self.mapped_strategy = strategy + self.pack = pack + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.mapped_strategy) + + def calc_is_cacheable(self, recur: RecurT) -> bool: + return recur(self.mapped_strategy) + + def __repr__(self) -> str: + if not hasattr(self, "_cached_repr"): + self._cached_repr = f"{self.mapped_strategy!r}.map({get_pretty_function_description(self.pack)})" + return self._cached_repr + + def do_validate(self) -> None: + self.mapped_strategy.validate() + + def do_draw(self, data: ConjectureData) -> MappedTo: + with warnings.catch_warnings(): + if isinstance(self.pack, type) and issubclass( + self.pack, (abc.Mapping, abc.Set) + ): + warnings.simplefilter("ignore", BytesWarning) + for _ in range(3): + try: + data.start_span(MAPPED_SEARCH_STRATEGY_DO_DRAW_LABEL) + x = data.draw(self.mapped_strategy) + result = self.pack(x) + data.stop_span() + current_build_context().record_call( + result, self.pack, args=[x], kwargs={} + ) + return result + except UnsatisfiedAssumption: + data.stop_span(discard=True) + raise UnsatisfiedAssumption + + @property + def branches(self) -> Sequence[SearchStrategy[MappedTo]]: + return [ + MappedStrategy(strategy, pack=self.pack) + for strategy in self.mapped_strategy.branches + ] + + def filter( + self, condition: Callable[[MappedTo], Any] + ) -> "SearchStrategy[MappedTo]": + # Includes a special case so that we can rewrite filters on collection + # lengths, when most collections are `st.lists(...).map(the_type)`. + ListStrategy = _list_strategy_type() + if not isinstance(self.mapped_strategy, ListStrategy) or not ( + (isinstance(self.pack, type) and issubclass(self.pack, abc.Collection)) + or self.pack in _collection_ish_functions() + ): + return super().filter(condition) + + # Check whether our inner list strategy can rewrite this filter condition. + # If not, discard the result and _only_ apply a new outer filter. + new = ListStrategy.filter(self.mapped_strategy, condition) + if getattr(new, "filtered_strategy", None) is self.mapped_strategy: + return super().filter(condition) # didn't rewrite + + # Apply a new outer filter even though we rewrote the inner strategy, + # because some collections can change the list length (dict, set, etc). + return FilteredStrategy(type(self)(new, self.pack), conditions=(condition,)) + + +@lru_cache +def _list_strategy_type() -> Any: + from hypothesis.strategies._internal.collections import ListStrategy + + return ListStrategy + + +def _collection_ish_functions() -> Sequence[Any]: + funcs = [sorted] + if np := sys.modules.get("numpy"): + # c.f. https://numpy.org/doc/stable/reference/routines.array-creation.html + # Probably only `np.array` and `np.asarray` will be used in practice, + # but why should that stop us when we've already gone this far? + funcs += [ + np.empty_like, + np.eye, + np.identity, + np.ones_like, + np.zeros_like, + np.array, + np.asarray, + np.asanyarray, + np.ascontiguousarray, + np.asmatrix, + np.copy, + np.rec.array, + np.rec.fromarrays, + np.rec.fromrecords, + np.diag, + # bonus undocumented functions from tab-completion: + np.asarray_chkfinite, + np.asfortranarray, + ] + + return funcs + + +filter_not_satisfied = UniqueIdentifier("filter not satisfied") + + +class FilteredStrategy(SearchStrategy[Ex]): + def __init__( + self, strategy: SearchStrategy[Ex], conditions: tuple[Callable[[Ex], Any], ...] + ): + super().__init__() + if isinstance(strategy, FilteredStrategy): + # Flatten chained filters into a single filter with multiple conditions. + self.flat_conditions: tuple[Callable[[Ex], Any], ...] = ( + strategy.flat_conditions + conditions + ) + self.filtered_strategy: SearchStrategy[Ex] = strategy.filtered_strategy + else: + self.flat_conditions = conditions + self.filtered_strategy = strategy + + assert isinstance(self.flat_conditions, tuple) + assert not isinstance(self.filtered_strategy, FilteredStrategy) + + self.__condition: Callable[[Ex], Any] | None = None + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.filtered_strategy) + + def calc_is_cacheable(self, recur: RecurT) -> bool: + return recur(self.filtered_strategy) + + def __repr__(self) -> str: + if not hasattr(self, "_cached_repr"): + self._cached_repr = "{!r}{}".format( + self.filtered_strategy, + "".join( + f".filter({get_pretty_function_description(cond)})" + for cond in self.flat_conditions + ), + ) + return self._cached_repr + + def do_validate(self) -> None: + # Start by validating our inner filtered_strategy. If this was a LazyStrategy, + # validation also reifies it so that subsequent calls to e.g. `.filter()` will + # be passed through. + self.filtered_strategy.validate() + # So now we have a reified inner strategy, we'll replay all our saved + # predicates in case some or all of them can be rewritten. Note that this + # replaces the `fresh` strategy too! + fresh = self.filtered_strategy + for cond in self.flat_conditions: + fresh = fresh.filter(cond) + if isinstance(fresh, FilteredStrategy): + # In this case we have at least some non-rewritten filter predicates, + # so we just re-initialize the strategy. + FilteredStrategy.__init__( + self, fresh.filtered_strategy, fresh.flat_conditions + ) + else: + # But if *all* the predicates were rewritten... well, do_validate() is + # an in-place method so we still just re-initialize the strategy! + FilteredStrategy.__init__(self, fresh, ()) + + def filter(self, condition: Callable[[Ex], Any]) -> "FilteredStrategy[Ex]": + # If we can, it's more efficient to rewrite our strategy to satisfy the + # condition. We therefore exploit the fact that the order of predicates + # doesn't matter (`f(x) and g(x) == g(x) and f(x)`) by attempting to apply + # condition directly to our filtered strategy as the inner-most filter. + out = self.filtered_strategy.filter(condition) + # If it couldn't be rewritten, we'll get a new FilteredStrategy - and then + # combine the conditions of each in our expected newest=last order. + if isinstance(out, FilteredStrategy): + return FilteredStrategy( + out.filtered_strategy, self.flat_conditions + out.flat_conditions + ) + # But if it *could* be rewritten, we can return the more efficient form! + return FilteredStrategy(out, self.flat_conditions) + + @property + def condition(self) -> Callable[[Ex], Any]: + # We write this defensively to avoid any threading race conditions + # with our manual FilteredStrategy.__init__ for filter-rewriting. + # See https://github.com/HypothesisWorks/hypothesis/pull/4522. + if (condition := self.__condition) is not None: + return condition + + if len(self.flat_conditions) == 1: + # Avoid an extra indirection in the common case of only one condition. + condition = self.flat_conditions[0] + elif len(self.flat_conditions) == 0: + # Possible, if unlikely, due to filter predicate rewriting + condition = lambda _: True # type: ignore # covariant type param + else: + condition = lambda x: all( # type: ignore # covariant type param + cond(x) for cond in self.flat_conditions + ) + self.__condition = condition + return condition + + def do_draw(self, data: ConjectureData) -> Ex: + result = self.do_filtered_draw(data) + if result is not filter_not_satisfied: + return cast(Ex, result) + + data.mark_invalid(f"Aborted test because unable to satisfy {self!r}") + + def do_filtered_draw(self, data: ConjectureData) -> Ex | UniqueIdentifier: + for i in range(3): + data.start_span(FILTERED_SEARCH_STRATEGY_DO_DRAW_LABEL) + value = data.draw(self.filtered_strategy) + if self.condition(value): + data.stop_span() + return value + else: + data.stop_span(discard=True) + if i == 0: + data.events[f"Retried draw from {self!r} to satisfy filter"] = "" + + return filter_not_satisfied + + @property + def branches(self) -> Sequence[SearchStrategy[Ex]]: + return [ + FilteredStrategy(strategy=strategy, conditions=self.flat_conditions) + for strategy in self.filtered_strategy.branches + ] + + +@check_function +def check_strategy(arg: object, name: str = "") -> None: + assert isinstance(name, str) + if not isinstance(arg, SearchStrategy): + hint = "" + if isinstance(arg, (list, tuple)): + hint = ", such as st.sampled_from({}),".format(name or "...") + if name: + name += "=" + raise InvalidArgument( + f"Expected a SearchStrategy{hint} but got {name}{arg!r} " + f"(type={type(arg).__name__})" + ) diff --git a/vendored/hypothesis/strategies/_internal/strings.py b/vendored/hypothesis/strategies/_internal/strings.py new file mode 100644 index 0000000..9b4020e --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/strings.py @@ -0,0 +1,380 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import copy +import re +import warnings +from collections.abc import Collection +from functools import cache, lru_cache, partial +from typing import cast + +from hypothesis.errors import HypothesisWarning, InvalidArgument +from hypothesis.internal import charmap +from hypothesis.internal.charmap import Categories +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.providers import COLLECTION_DEFAULT_MAX_SIZE +from hypothesis.internal.filtering import max_len, min_len +from hypothesis.internal.intervalsets import IntervalSet +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.strategies._internal.collections import ListStrategy +from hypothesis.strategies._internal.lazy import unwrap_strategies +from hypothesis.strategies._internal.strategies import ( + OneOfStrategy, + SampledFromStrategy, + SearchStrategy, +) +from hypothesis.vendor.pretty import pretty + + +# Cache size is limited by sys.maxunicode, but passing None makes it slightly faster. +@cache +# this is part of our forward-facing validation, so we do *not* tell mypyc that c +# should be a str, because we don't want it to validate it before we can. +def _check_is_single_character(c: object) -> str: + # In order to mitigate the performance cost of this check, we use a shared cache, + # even at the cost of showing the culprit strategy in the error message. + if not isinstance(c, str): + type_ = get_pretty_function_description(type(c)) + raise InvalidArgument(f"Got non-string {c!r} (type {type_})") + if len(c) != 1: + raise InvalidArgument(f"Got {c!r} (length {len(c)} != 1)") + return c + + +class OneCharStringStrategy(SearchStrategy[str]): + """A strategy which generates single character strings of text type.""" + + def __init__(self, intervals: IntervalSet, force_repr: str | None = None) -> None: + super().__init__() + assert isinstance(intervals, IntervalSet) + self.intervals = intervals + self._force_repr = force_repr + + @classmethod + def from_characters_args( + cls, + *, + codec: str | None = None, + min_codepoint: int | None = None, + max_codepoint: int | None = None, + categories: Categories | None = None, + exclude_characters: Collection[str] = "", + include_characters: Collection[str] = "", + ) -> "OneCharStringStrategy": + assert set(categories or ()).issubset(charmap.categories()) + intervals = charmap.query( + min_codepoint=min_codepoint, + max_codepoint=max_codepoint, + categories=categories, + exclude_characters=exclude_characters, + include_characters=include_characters, + ) + if codec is not None: + intervals &= charmap.intervals_from_codec(codec) + + _arg_repr = ", ".join( + f"{k}={v!r}" + for k, v in [ + ("codec", codec), + ("min_codepoint", min_codepoint), + ("max_codepoint", max_codepoint), + ("categories", categories), + ("exclude_characters", exclude_characters), + ("include_characters", include_characters), + ] + if v not in (None, "") + and not ( + k == "categories" + # v has to be `categories` here. Help mypy along to infer that. + and set(cast(Categories, v)) == set(charmap.categories()) - {"Cs"} + ) + ) + if not intervals: + raise InvalidArgument( + "No characters are allowed to be generated by this " + f"combination of arguments: {_arg_repr}" + ) + return cls(intervals, force_repr=f"characters({_arg_repr})") + + @classmethod + def from_alphabet(cls, alphabet: str | SearchStrategy) -> "OneCharStringStrategy": + if isinstance(alphabet, str): + return cls.from_characters_args(categories=(), include_characters=alphabet) + + assert isinstance(alphabet, SearchStrategy) + char_strategy = unwrap_strategies(alphabet) + if isinstance(char_strategy, cls): + return char_strategy + elif isinstance(char_strategy, SampledFromStrategy): + for c in char_strategy.elements: + _check_is_single_character(c) + return cls.from_characters_args( + categories=(), + include_characters=char_strategy.elements, + ) + elif isinstance(char_strategy, OneOfStrategy): + intervals = IntervalSet() + for s in char_strategy.element_strategies: + intervals = intervals.union(cls.from_alphabet(s).intervals) + return cls(intervals, force_repr=repr(alphabet)) + raise InvalidArgument( + f"{alphabet=} must be a sampled_from() or characters() strategy" + ) + + def __repr__(self) -> str: + return self._force_repr or f"OneCharStringStrategy({self.intervals!r})" + + def do_draw(self, data: ConjectureData) -> str: + return data.draw_string(self.intervals, min_size=1, max_size=1) + + +_nonempty_names = ( + "capitalize", + "expandtabs", + "join", + "lower", + "rsplit", + "split", + "splitlines", + "swapcase", + "title", + "upper", +) +_nonempty_and_content_names = ( + "islower", + "isupper", + "isalnum", + "isalpha", + "isascii", + "isdigit", + "isspace", + "istitle", + "lstrip", + "rstrip", + "strip", +) + + +class TextStrategy(ListStrategy[str]): + def do_draw(self, data): + # if our element strategy is OneCharStringStrategy, we can skip the + # ListStrategy draw and jump right to data.draw_string. + # Doing so for user-provided element strategies is not correct in + # general, as they may define a different distribution than data.draw_string. + elems = unwrap_strategies(self.element_strategy) + if isinstance(elems, OneCharStringStrategy): + return data.draw_string( + elems.intervals, + min_size=self.min_size, + max_size=( + COLLECTION_DEFAULT_MAX_SIZE + if self.max_size == float("inf") + else self.max_size + ), + ) + return "".join(super().do_draw(data)) + + def __repr__(self) -> str: + args = [] + if repr(self.element_strategy) != "characters()": + args.append(repr(self.element_strategy)) + if self.min_size: + args.append(f"min_size={self.min_size}") + if self.max_size < float("inf"): + args.append(f"max_size={self.max_size}") + return f"text({', '.join(args)})" + + # See https://docs.python.org/3/library/stdtypes.html#string-methods + # These methods always return Truthy values for any nonempty string. + _nonempty_filters = ( + *ListStrategy._nonempty_filters, + str, + str.casefold, + str.encode, + *(getattr(str, n) for n in _nonempty_names), + ) + _nonempty_and_content_filters = ( + str.isdecimal, + str.isnumeric, + *(getattr(str, n) for n in _nonempty_and_content_names), + ) + + def filter(self, condition): + elems = unwrap_strategies(self.element_strategy) + if ( + condition is str.isidentifier + and self.max_size >= 1 + and isinstance(elems, OneCharStringStrategy) + ): + from hypothesis.strategies import builds, nothing + + id_start, id_continue = _identifier_characters() + if not (elems.intervals & id_start): + return nothing() + return builds( + "{}{}".format, + OneCharStringStrategy(elems.intervals & id_start), + TextStrategy( + OneCharStringStrategy(elems.intervals & id_continue), + min_size=max(0, self.min_size - 1), + max_size=self.max_size - 1, + ), + # Filter to ensure that NFKC normalization keeps working in future + ).filter(str.isidentifier) + if (new := _string_filter_rewrite(self, str, condition)) is not None: + return new + return super().filter(condition) + + +def _string_filter_rewrite(self, kind, condition): + if condition in (kind.lower, kind.title, kind.upper): + k = kind.__name__ + warnings.warn( + f"You applied {k}.{condition.__name__} as a filter, but this allows " + f"all nonempty strings! Did you mean {k}.is{condition.__name__}?", + HypothesisWarning, + stacklevel=2, + ) + + if ( + ( + kind is bytes + or isinstance( + unwrap_strategies(self.element_strategy), OneCharStringStrategy + ) + ) + and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern) + and isinstance(pattern.pattern, kind) + ): + from hypothesis.strategies._internal.regex import regex_strategy + + if condition.__name__ == "match": + # Replace with an easier-to-handle equivalent condition + caret, close = ("^(?:", ")") if kind is str else (b"^(?:", b")") + pattern = re.compile(caret + pattern.pattern + close, flags=pattern.flags) + condition = pattern.search + + if condition.__name__ in ("search", "findall", "fullmatch"): + s = regex_strategy( + pattern, + fullmatch=condition.__name__ == "fullmatch", + alphabet=self.element_strategy if kind is str else None, + ) + if self.min_size > 0: + s = s.filter(partial(min_len, self.min_size)) + if self.max_size < 1e999: + s = s.filter(partial(max_len, self.max_size)) + return s + elif condition.__name__ in ("finditer", "scanner"): + # PyPy implements `finditer` as an alias to their `scanner` method + warnings.warn( + f"You applied {pretty(condition)} as a filter, but this allows " + f"any string at all! Did you mean .findall ?", + HypothesisWarning, + stacklevel=3, + ) + return self + elif condition.__name__ == "split": + warnings.warn( + f"You applied {pretty(condition)} as a filter, but this allows " + f"any nonempty string! Did you mean .search ?", + HypothesisWarning, + stacklevel=3, + ) + return self.filter(bool) + + # We use ListStrategy filter logic for the conditions that *only* imply + # the string is nonempty. Here, we increment the min_size but still apply + # the filter for conditions that imply nonempty *and specific contents*. + if condition in self._nonempty_and_content_filters and self.max_size >= 1: + self = copy.copy(self) + self.min_size = max(1, self.min_size) + return ListStrategy.filter(self, condition) + + return None + + +# Excerpted from https://www.unicode.org/Public/15.0.0/ucd/PropList.txt +# Python updates it's Unicode version between minor releases, but fortunately +# these properties do not change between the Unicode versions in question. +_PROPLIST = """ +# ================================================ + +1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 6 + +# ================================================ + +00B7 ; Other_ID_Continue # Po MIDDLE DOT +0387 ; Other_ID_Continue # Po GREEK ANO TELEIA +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE + +# Total code points: 12 +""" + + +@lru_cache +def _identifier_characters() -> tuple[IntervalSet, IntervalSet]: + """See https://docs.python.org/3/reference/lexical_analysis.html#identifiers""" + # Start by computing the set of special characters + chars = {"Other_ID_Start": "", "Other_ID_Continue": ""} + for line in _PROPLIST.splitlines(): + if m := re.match(r"([0-9A-F.]+) +; (\w+) # ", line): + codes, prop = m.groups() + span = range(int(codes[:4], base=16), int(codes[-4:], base=16) + 1) + chars[prop] += "".join(chr(x) for x in span) + + # Then get the basic set by Unicode category and known extras + id_start = charmap.query( + categories=("Lu", "Ll", "Lt", "Lm", "Lo", "Nl"), + include_characters="_" + chars["Other_ID_Start"], + ) + id_start -= IntervalSet.from_string( + # Magic value: the characters which NFKC-normalize to be invalid identifiers. + # Conveniently they're all in `id_start`, so we only need to do this once. + "\u037a\u0e33\u0eb3\u2e2f\u309b\u309c\ufc5e\ufc5f\ufc60\ufc61\ufc62\ufc63" + "\ufdfa\ufdfb\ufe70\ufe72\ufe74\ufe76\ufe78\ufe7a\ufe7c\ufe7e\uff9e\uff9f" + ) + id_continue = id_start | charmap.query( + categories=("Mn", "Mc", "Nd", "Pc"), + include_characters=chars["Other_ID_Continue"], + ) + return id_start, id_continue + + +class BytesStrategy(SearchStrategy): + def __init__(self, min_size: int, max_size: int | None): + super().__init__() + self.min_size = min_size + self.max_size = ( + max_size if max_size is not None else COLLECTION_DEFAULT_MAX_SIZE + ) + + def do_draw(self, data: ConjectureData) -> bytes: + return data.draw_bytes(self.min_size, self.max_size) + + _nonempty_filters = ( + *ListStrategy._nonempty_filters, + bytes, + *(getattr(bytes, n) for n in _nonempty_names), + ) + _nonempty_and_content_filters = ( + *(getattr(bytes, n) for n in _nonempty_and_content_names), + ) + + def filter(self, condition): + if (new := _string_filter_rewrite(self, bytes, condition)) is not None: + return new + return ListStrategy.filter(self, condition) diff --git a/vendored/hypothesis/strategies/_internal/types.py b/vendored/hypothesis/strategies/_internal/types.py new file mode 100644 index 0000000..9965bff --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/types.py @@ -0,0 +1,1157 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import builtins +import collections +import collections.abc +import datetime +import decimal +import fractions +import functools +import inspect +import io +import ipaddress +import numbers +import operator +import os +import random +import re +import sys +import typing +import uuid +import warnings +import zoneinfo +from collections.abc import Iterator +from functools import partial +from pathlib import PurePath +from types import FunctionType +from typing import TYPE_CHECKING, Any, NewType, get_args, get_origin + +from hypothesis import strategies as st +from hypothesis.errors import HypothesisWarning, InvalidArgument, ResolutionFailed +from hypothesis.internal.compat import PYPY, BaseExceptionGroup, ExceptionGroup +from hypothesis.internal.conjecture.utils import many as conjecture_utils_many +from hypothesis.internal.filtering import max_len, min_len +from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.strategies._internal.ipaddress import ( + SPECIAL_IPv4_RANGES, + SPECIAL_IPv6_RANGES, + ip_addresses, +) +from hypothesis.strategies._internal.lazy import unwrap_strategies +from hypothesis.strategies._internal.strategies import OneOfStrategy + +if TYPE_CHECKING: + import annotated_types as at + +GenericAlias: typing.Any +UnionType: typing.Any +try: + # The type of PEP-604 unions (`int | str`), added in Python 3.10 + from types import GenericAlias, UnionType +except ImportError: + GenericAlias = () + UnionType = () + +try: + import typing_extensions +except ImportError: + typing_extensions = None # type: ignore + +try: + from typing import _AnnotatedAlias # type: ignore +except ImportError: + try: + from typing_extensions import _AnnotatedAlias + except ImportError: + _AnnotatedAlias = () + +ConcatenateTypes: tuple = () +try: + ConcatenateTypes += (typing.Concatenate,) +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.10` +try: + ConcatenateTypes += (typing_extensions.Concatenate,) +except AttributeError: # pragma: no cover + pass # `typing_extensions` might not be installed + +ParamSpecTypes: tuple = () +try: + ParamSpecTypes += (typing.ParamSpec,) +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.10` +try: + ParamSpecTypes += (typing_extensions.ParamSpec,) +except AttributeError: # pragma: no cover + pass # `typing_extensions` might not be installed + +TypeGuardTypes: tuple = () +try: + TypeGuardTypes += (typing.TypeGuard,) +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.10` +try: + TypeGuardTypes += (typing.TypeIs,) # type: ignore +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.13` +try: + TypeGuardTypes += (typing_extensions.TypeGuard, typing_extensions.TypeIs) +except AttributeError: # pragma: no cover + pass # `typing_extensions` might not be installed + + +RequiredTypes: tuple = () +try: + RequiredTypes += (typing.Required,) # type: ignore +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.11` +try: + RequiredTypes += (typing_extensions.Required,) +except AttributeError: # pragma: no cover + pass # `typing_extensions` might not be installed + + +NotRequiredTypes: tuple = () +try: + NotRequiredTypes += (typing.NotRequired,) # type: ignore +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.11` +try: + NotRequiredTypes += (typing_extensions.NotRequired,) +except AttributeError: # pragma: no cover + pass # `typing_extensions` might not be installed + + +ReadOnlyTypes: tuple = () +try: + ReadOnlyTypes += (typing.ReadOnly,) # type: ignore +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.13` +try: + ReadOnlyTypes += (typing_extensions.ReadOnly,) +except AttributeError: # pragma: no cover + pass # `typing_extensions` might not be installed + + +LiteralStringTypes: tuple = () +try: + LiteralStringTypes += (typing.LiteralString,) # type: ignore +except AttributeError: # pragma: no cover + pass # Is missing for `python<3.11` +try: + LiteralStringTypes += (typing_extensions.LiteralString,) +except AttributeError: # pragma: no cover + pass # `typing_extensions` might not be installed + + +# We need this function to use `get_origin` on 3.8 for types added later: +# in typing-extensions, so we prefer this function over regular `get_origin` +# when unwrapping `TypedDict`'s annotations. +try: + extended_get_origin = typing_extensions.get_origin +except AttributeError: # pragma: no cover + # `typing_extensions` might not be installed, in this case - fallback: + extended_get_origin = get_origin # type: ignore + + +# Used on `TypeVar` objects with no default: +NoDefaults = ( + getattr(typing, "NoDefault", object()), + getattr(typing_extensions, "NoDefault", object()), +) + +# We use this variable to be sure that we are working with a type from `typing`: +typing_root_type = (typing._Final, typing._GenericAlias) # type: ignore + +# We use this to disallow all non-runtime types from being registered and resolved. +# By "non-runtime" we mean: types that do not really exist in python's +# and are just added for more fancy type annotations. +# `Final` is a great example: it just indicates that this value can't be reassigned. +NON_RUNTIME_TYPES = ( + typing.Any, + typing.Annotated, + *ConcatenateTypes, + *ParamSpecTypes, + *TypeGuardTypes, +) +for name in ( + "ClassVar", + "Final", + "NoReturn", + "Self", + "Required", + "NotRequired", + "ReadOnly", + "Never", + "TypeAlias", + "TypeVarTuple", + "Unpack", +): + try: + NON_RUNTIME_TYPES += (getattr(typing, name),) + except AttributeError: + pass + try: + NON_RUNTIME_TYPES += (getattr(typing_extensions, name),) + except AttributeError: # pragma: no cover + pass # typing_extensions might not be installed + + +def type_sorting_key(t): + """Minimise to None, then non-container types, then container types.""" + if t is None or t is type(None): + return (-1, repr(t)) + t = get_origin(t) or t + is_container = int(try_issubclass(t, collections.abc.Container)) + return (is_container, repr(t)) + + +def _compatible_args(args, superclass_args): + """Check that the args of two generic types are compatible for try_issubclass.""" + assert superclass_args is not None + if args is None: + return True + return len(args) == len(superclass_args) and all( + # "a==b or either is a typevar" is a hacky approximation, but it's + # good enough for all the cases that I've seen so far and has the + # substantial virtue of (relative) simplicity. + a == b or isinstance(a, typing.TypeVar) or isinstance(b, typing.TypeVar) + for a, b in zip(args, superclass_args, strict=True) + ) + + +def try_issubclass(thing, superclass): + try: + # In this case we're looking at two distinct classes - which might be generics. + # That brings in some complications: + if issubclass(get_origin(thing) or thing, get_origin(superclass) or superclass): + superclass_args = get_args(superclass) + if not superclass_args: + # The superclass is not generic, so we're definitely a subclass. + return True + # Sadly this is just some really fiddly logic to handle all the cases + # of user-defined generic types, types inheriting from parametrised + # generics, and so on. If you need to change this code, read PEP-560 + # and Hypothesis issue #2951 closely first, and good luck. The tests + # will help you, I hope - good luck. + for orig_base in getattr(thing, "__orig_bases__", None) or [None]: + args = getattr(orig_base, "__args__", None) + if _compatible_args(args, superclass_args): + return True + return False + except (AttributeError, TypeError): + # Some types can't be the subject or object of an instance or subclass check + return False + + +def is_a_type_alias_type(thing): # pragma: no cover # covered by 3.12+ tests + # TypeAliasType is new in python 3.12, through the type statement. If we're + # before python 3.12 then this can't possibly by a TypeAliasType. + # + # https://docs.python.org/3/reference/simple_stmts.html#type + # https://docs.python.org/3/library/typing.html#typing.TypeAliasType + if sys.version_info < (3, 12): + return False + return isinstance(thing, typing.TypeAliasType) + + +def is_a_union(thing: object) -> bool: + """Return True if thing is a typing.Union or types.UnionType (in py310).""" + return isinstance(thing, UnionType) or get_origin(thing) is typing.Union + + +def is_a_type(thing: object) -> bool: + """ + Return True if thing is a type or a typing-like thing (union, generic type, etc). + """ + return ( + isinstance(thing, type) + or is_generic_type(thing) + or isinstance(thing, NewType) + or is_a_type_alias_type(thing) + # union and forwardref checks necessary from 3.14+. Before 3.14, they + # were covered by is_generic_type(thing). + or is_a_union(thing) + or isinstance(thing, typing.ForwardRef) + ) + + +def is_typing_literal(thing: object) -> bool: + return get_origin(thing) in ( + typing.Literal, + getattr(typing_extensions, "Literal", object()), + ) + + +def is_annotated_type(thing: object) -> bool: + return ( + isinstance(thing, _AnnotatedAlias) + and getattr(thing, "__args__", None) is not None + ) + + +def get_constraints_filter_map(): + if at := sys.modules.get("annotated_types"): + return { + # Due to the order of operator.gt/ge/lt/le arguments, order is inversed: + at.Gt: lambda constraint: partial(operator.lt, constraint.gt), + at.Ge: lambda constraint: partial(operator.le, constraint.ge), + at.Lt: lambda constraint: partial(operator.gt, constraint.lt), + at.Le: lambda constraint: partial(operator.ge, constraint.le), + at.MinLen: lambda constraint: partial(min_len, constraint.min_length), + at.MaxLen: lambda constraint: partial(max_len, constraint.max_length), + at.Predicate: lambda constraint: constraint.func, + } + return {} # pragma: no cover + + +def _get_constraints(args: tuple[Any, ...]) -> Iterator["at.BaseMetadata"]: + at = sys.modules.get("annotated_types") + for arg in args: + if at and isinstance(arg, at.BaseMetadata): + yield arg + elif getattr(arg, "__is_annotated_types_grouped_metadata__", False): + for subarg in arg: + if getattr(subarg, "__is_annotated_types_grouped_metadata__", False): + yield from _get_constraints(tuple(subarg)) + else: + yield subarg + elif at and isinstance(arg, slice) and arg.step in (1, None): + yield from at.Len(arg.start or 0, arg.stop) + + +def _flat_annotated_repr_parts(annotated_type): + # Helper to get a good error message in find_annotated_strategy() below. + type_reps = [ + get_pretty_function_description(a) + for a in annotated_type.__args__ + if not isinstance(a, typing.TypeVar) + ] + metadata_reps = [] + for m in getattr(annotated_type, "__metadata__", ()): + if is_annotated_type(m): + ts, ms = _flat_annotated_repr_parts(m) + type_reps.extend(ts) + metadata_reps.extend(ms) + else: + metadata_reps.append(get_pretty_function_description(m)) + return type_reps, metadata_reps + + +def find_annotated_strategy(annotated_type): + metadata = getattr(annotated_type, "__metadata__", ()) + + if any(is_annotated_type(arg) for arg in metadata): + # Annotated[Annotated[T], ...] is perfectly acceptable, but it's all to easy + # to instead write Annotated[T1, Annotated[T2, ...]] - and nobody else checks + # for that at runtime. Once you add generics this can be seriously confusing, + # so we go to some trouble to give a helpful error message. + # For details: https://github.com/HypothesisWorks/hypothesis/issues/3891 + ty_rep = repr(annotated_type).replace("typing.Annotated", "Annotated") + ts, ms = _flat_annotated_repr_parts(annotated_type) + bits = ", ".join([" | ".join(dict.fromkeys(ts or "?")), *dict.fromkeys(ms)]) + raise ResolutionFailed( + f"`{ty_rep}` is invalid because nesting Annotated is only allowed for " + f"the first (type) argument, not for later (metadata) arguments. " + f"Did you mean `Annotated[{bits}]`?" + ) + for arg in reversed(metadata): + if isinstance(arg, st.SearchStrategy): + return arg + + filter_conditions = [] + unsupported = [] + constraints_map = get_constraints_filter_map() + for constraint in _get_constraints(metadata): + if isinstance(constraint, st.SearchStrategy): + return constraint + if convert := constraints_map.get(type(constraint)): + filter_conditions.append(convert(constraint)) + else: + unsupported.append(constraint) + if unsupported: + msg = f"Ignoring unsupported {', '.join(map(repr, unsupported))}" + warnings.warn(msg, HypothesisWarning, stacklevel=2) + + base_strategy = st.from_type(annotated_type.__origin__) + for filter_condition in filter_conditions: + base_strategy = base_strategy.filter(filter_condition) + + return base_strategy + + +def has_type_arguments(type_): + """Decides whethere or not this type has applied type arguments.""" + args = getattr(type_, "__args__", None) + if args and isinstance(type_, (typing._GenericAlias, GenericAlias)): + # There are some cases when declared types do already have type arguments + # Like `Sequence`, that is `_GenericAlias(abc.Sequence[T])[T]` + parameters = getattr(type_, "__parameters__", None) + if parameters: # So, we need to know if type args are just "aliases" + return args != parameters + return bool(args) + + +def is_generic_type(type_): + """Decides whether a given type is generic or not.""" + # The ugly truth is that `MyClass`, `MyClass[T]`, and `MyClass[int]` are very different. + # We check for `MyClass[T]` and `MyClass[int]` with the first condition, + # while the second condition is for `MyClass`. + return isinstance(type_, (*typing_root_type, GenericAlias)) or ( + isinstance(type_, type) + and (typing.Generic in type_.__mro__ or hasattr(type_, "__class_getitem__")) + ) + + +__EVAL_TYPE_TAKES_TYPE_PARAMS = ( + "type_params" in inspect.signature(typing._eval_type).parameters # type: ignore +) + + +def _try_import_forward_ref(thing, typ, *, type_params): # pragma: no cover + """ + Tries to import a real bound or default type from ``ForwardRef`` in ``TypeVar``. + + This function is very "magical" to say the least, please don't use it. + This function fully covered, but is excluded from coverage + because we can only cover each path in a separate python version. + """ + try: + kw = {"globalns": vars(sys.modules[thing.__module__]), "localns": None} + if __EVAL_TYPE_TAKES_TYPE_PARAMS: + kw["type_params"] = type_params + return typing._eval_type(typ, **kw) + except (KeyError, AttributeError, NameError): + # We fallback to `ForwardRef` instance, you can register it as a type as well: + # >>> from typing import ForwardRef + # >>> from hypothesis import strategies as st + # >>> st.register_type_strategy(ForwardRef('YourType'), your_strategy) + return typ + + +def from_typing_type(thing): + # We start with Final, Literal, and Annotated, since they don't support `isinstance`. + # + # We then explicitly error on non-Generic types, which don't carry enough + # information to sensibly resolve to strategies at runtime. + # Finally, we run a variation of the subclass lookup in `st.from_type` + # among generic types in the lookup. + if get_origin(thing) == typing.Final: + return st.one_of([st.from_type(t) for t in thing.__args__]) + if is_typing_literal(thing): + args_dfs_stack = list(thing.__args__) + literals = [] + while args_dfs_stack: + arg = args_dfs_stack.pop() + if is_typing_literal(arg): # pragma: no cover + # Python 3.10+ flattens for us when constructing Literal objects + args_dfs_stack.extend(reversed(arg.__args__)) + else: + literals.append(arg) + return st.sampled_from(literals) + if is_annotated_type(thing): + return find_annotated_strategy(thing) + + # Some "generic" classes are not generic *in* anything - for example both + # Hashable and Sized have `__args__ == ()` + origin = get_origin(thing) or thing + if ( + origin in vars(collections.abc).values() + and len(getattr(thing, "__args__", None) or []) == 0 + ): + return st.from_type(origin) + + # Parametrised generic types have their __origin__ attribute set to the + # un-parametrised version, which we need to use in the subclass checks. + # i.e.: typing.List[int].__origin__ == list + mapping = { + k: v + for k, v in _global_type_lookup.items() + if is_generic_type(k) and try_issubclass(k, thing) + } + + # Discard any type which is not it's own origin, where the origin is also in the + # mapping. On old Python versions this could be due to redefinition of types + # between collections.abc and typing, but the logic seems reasonable to keep in + # case of similar situations now that's been fixed. + for t in sorted(mapping, key=type_sorting_key): + origin = get_origin(t) + if origin is not t and origin in mapping: + mapping.pop(t) + + # Drop some unusual cases for simplicity, including tuples or its + # subclasses (e.g. namedtuple) + if len(mapping) > 1: + _Environ = getattr(os, "_Environ", None) + mapping.pop(_Environ, None) + + tuple_types = [ + t + for t in mapping + if (isinstance(t, type) and issubclass(t, tuple)) or get_origin(t) is tuple + ] + if len(mapping) > len(tuple_types): + for tuple_type in tuple_types: + mapping.pop(tuple_type) + + if {dict, set}.intersection(mapping): + # ItemsView can cause test_lookup.py::test_specialised_collection_types + # to fail, due to weird isinstance behaviour around the elements. + mapping.pop(collections.abc.ItemsView, None) + mapping.pop(typing.ItemsView, None) + if collections.deque in mapping and len(mapping) > 1: + # Resolving generic sequences to include a deque is more trouble for e.g. + # the ghostwriter than it's worth, via undefined names in the repr. + mapping.pop(collections.deque) + + if ( + memoryview in mapping + and getattr(thing, "__args__", None) + and not hasattr(thing.__args__[0], "__buffer__") + ): # pragma: no cover # covered by 3.14+ + # Both memoryview and list are direct subclasses of Sequence. If we ask for + # st.from_type(Sequence[A]), we will get both list[A] and memoryview[A]. + # But unless A implements the buffer protocol with __buffer__, resolving + # memoryview[A] will error. + # + # Since the user didn't explicitly ask for memoryview, there's no reason + # to expect them to have implemented __buffer__. Remove memoryview in this + # case, before it can fail at resolution-time. + # + # Note: I intentionally did not add a `and len(mapping) > 1` condition here. + # If memoryview[A] is the only resolution for a strategy, but A is not a + # buffer protocol, our options are to (1) pop memoryview and raise + # ResolutionFailed, or (2) to keep memoryview in the mapping and error in + # resolve_memoryview. A failure in test_resolving_standard_contextmanager_as_generic + # (because memoryview is a context manager in 3.14) convinced me the former + # was less confusing to users. + mapping.pop(memoryview) + + elem_type = (getattr(thing, "__args__", None) or ["not int"])[0] + union_elems = elem_type.__args__ if is_a_union(elem_type) else () + allows_integer_elements = any( + isinstance(T, type) and try_issubclass(int, get_origin(T) or T) + for T in [*union_elems, elem_type] + ) + + if len(mapping) > 1: + # issubclass treats bytestring as a kind of sequence, which it is, + # but treating it as such breaks everything else when it is presumed + # to be a generic sequence or container that could hold any item. + # Except for sequences of integers, or unions which include integer! + # See https://github.com/HypothesisWorks/hypothesis/issues/2257 + # + # This block drops bytes from the types that can be generated + # if there is more than one allowed type, and the element type is + # not either `int` or a Union with `int` as one of its elements. + if not allows_integer_elements: + mapping.pop(bytes, None) + if sys.version_info[:2] <= (3, 13): + mapping.pop(collections.abc.ByteString, None) + elif ( + (not mapping) + and isinstance(thing, typing.ForwardRef) + and thing.__forward_arg__ in vars(builtins) + ): + return st.from_type(getattr(builtins, thing.__forward_arg__)) + + def is_maximal(t): + # For each k in the mapping, we use it if it's the most general type + # available, and exclude any more specific types. So if both + # Sequence and Collection are available, we use the most general Collection + # type. + # + # k being "the most general" is equivalent to saying that k is maximal + # in the partial ordering of types. Note that since the ordering is + # partial there may be multiple maximal elements. (This distinguishes + # maximal from maximum). + return sum(try_issubclass(t, T) for T in mapping) == 1 + + strategies = [ + (t, s if isinstance(s, st.SearchStrategy) else s(thing)) + for t, s in mapping.items() + if is_maximal(t) + ] + strategies = [(t, s) for t, s in strategies if s != NotImplemented] + + # 3.14+ removes typing.ByteString. typing.ByteString was the only reason we + # previously generated bytes for Sequence[int]. There is no equivalent + # for typing.ByteString in 3.14+, but we would still like to generate bytes + # for Sequence[int] and its supertypes. Special case that here. + if ( + sys.version_info[:2] >= (3, 14) + and allows_integer_elements + # For the same reason as the is_maximal check above, we only include + # this ByteString special case if it is not overridden by a more general + # available type. + # + # collections.abc.ByteString was a direct subclass of Sequence, so we + # use that as the standin type when checking. Note we compare to a count + # of 0, instead of 1, since in is_maximal `k` is already in `mapping`, + # and we expect `try_issubclass(k, k) == True`. + and try_issubclass(collections.abc.Sequence, thing) + and sum(try_issubclass(collections.abc.Sequence, T) for T in mapping) == 0 + ): # pragma: no cover # covered on 3.14+ + strategies.append((collections.abc.Sequence, st.binary())) + + # Sort strategies according to our type-sorting heuristic for stable output + strategies = [ + s for _k, s in sorted(strategies, key=lambda kv: type_sorting_key(kv[0])) + ] + + empty = ", ".join(repr(s) for s in strategies if s.is_empty) + if empty or not strategies: + raise ResolutionFailed( + f"Could not resolve {empty or thing} to a strategy; " + "consider using register_type_strategy" + ) + return st.one_of(strategies) + + +def can_cast(type, value): + """Determine if value can be cast to type.""" + try: + type(value) + return True + except Exception: + return False + + +def _networks(bits): + return st.tuples(st.integers(0, 2**bits - 1), st.integers(-bits, 0).map(abs)) + + +utc_offsets = st.builds( + datetime.timedelta, minutes=st.integers(0, 59), hours=st.integers(-23, 23) +) + +# These builtin and standard-library types have Hypothesis strategies, +# seem likely to appear in type annotations, or are otherwise notable. +# +# The strategies below must cover all possible values from the type, because +# many users treat them as comprehensive and one of Hypothesis' design goals +# is to avoid testing less than expected. +# +# As a general rule, we try to limit this to scalars because from_type() +# would have to decide on arbitrary collection elements, and we'd rather +# not (with typing module generic types and some builtins as exceptions). +# +# Strategy Callables may return NotImplemented, which should be treated in the +# same way as if the type was not registered. +# +# Note that NotImplemented cannot be typed in Python 3.8 because there's no type +# exposed for it, and NotImplemented itself is typed as Any so that it can be +# returned without being listed in a function signature: +# https://github.com/python/mypy/issues/6710#issuecomment-485580032 +_global_type_lookup: dict[ + type, st.SearchStrategy | typing.Callable[[type], st.SearchStrategy] +] = { + type(None): st.none(), + bool: st.booleans(), + int: st.integers(), + float: st.floats(), + complex: st.complex_numbers(), + fractions.Fraction: st.fractions(), + decimal.Decimal: st.decimals(), + str: st.text(), + bytes: st.binary(), + datetime.datetime: st.datetimes(), + datetime.date: st.dates(), + datetime.time: st.times(), + datetime.timedelta: st.timedeltas(), + datetime.timezone: st.builds(datetime.timezone, offset=utc_offsets) + | st.builds(datetime.timezone, offset=utc_offsets, name=st.text(st.characters())), + uuid.UUID: st.uuids(), + tuple: st.builds(tuple), + list: st.builds(list), + set: st.builds(set), + collections.abc.MutableSet: st.builds(set), + frozenset: st.builds(frozenset), + dict: st.builds(dict), + FunctionType: st.functions(), + type(Ellipsis): st.just(Ellipsis), + type(NotImplemented): st.just(NotImplemented), + bytearray: st.binary().map(bytearray), + numbers.Real: st.floats(), + numbers.Rational: st.fractions(), + numbers.Number: st.complex_numbers(), + numbers.Integral: st.integers(), + numbers.Complex: st.complex_numbers(), + slice: st.builds( + slice, + st.none() | st.integers(), + st.none() | st.integers(), + st.none() | st.integers(), + ), + range: st.one_of( + st.builds(range, st.integers(min_value=0)), + st.builds(range, st.integers(), st.integers()), + st.builds(range, st.integers(), st.integers(), st.integers().filter(bool)), + ), + ipaddress.IPv4Address: ip_addresses(v=4), + ipaddress.IPv6Address: ip_addresses(v=6), + ipaddress.IPv4Interface: _networks(32).map(ipaddress.IPv4Interface), + ipaddress.IPv6Interface: _networks(128).map(ipaddress.IPv6Interface), + ipaddress.IPv4Network: st.one_of( + _networks(32).map(lambda x: ipaddress.IPv4Network(x, strict=False)), + st.sampled_from(SPECIAL_IPv4_RANGES).map(ipaddress.IPv4Network), + ), + ipaddress.IPv6Network: st.one_of( + _networks(128).map(lambda x: ipaddress.IPv6Network(x, strict=False)), + st.sampled_from(SPECIAL_IPv6_RANGES).map(ipaddress.IPv6Network), + ), + os.PathLike: st.builds(PurePath, st.text()), + UnicodeDecodeError: st.builds( + UnicodeDecodeError, + st.just("unknown encoding"), + st.just(b""), + st.just(0), + st.just(0), + st.just("reason"), + ), + UnicodeEncodeError: st.builds( + UnicodeEncodeError, + st.just("unknown encoding"), + st.text(), + st.just(0), + st.just(0), + st.just("reason"), + ), + UnicodeTranslateError: st.builds( + UnicodeTranslateError, st.text(), st.just(0), st.just(0), st.just("reason") + ), + BaseExceptionGroup: st.builds( + BaseExceptionGroup, + st.text(), + st.lists(st.from_type(BaseException), min_size=1, max_size=5), + ), + ExceptionGroup: st.builds( + ExceptionGroup, + st.text(), + st.lists(st.from_type(Exception), min_size=1, max_size=5), + ), + enumerate: st.builds(enumerate, st.just(())), + filter: st.builds(filter, st.just(lambda _: None), st.just(())), + map: st.builds(map, st.just(lambda _: None), st.just(())), + reversed: st.builds(reversed, st.just(())), + zip: st.builds(zip), # avoids warnings on PyPy 7.3.14+ + property: st.builds(property, st.just(lambda _: None)), + classmethod: st.builds(classmethod, st.just(lambda self: self)), + staticmethod: st.builds(staticmethod, st.just(lambda self: self)), + super: st.builds(super, st.from_type(type)), + re.Match: st.text().map(lambda c: re.match(".", c, flags=re.DOTALL)).filter(bool), + re.Pattern: st.builds(re.compile, st.sampled_from(["", b""])), + random.Random: st.randoms(), + zoneinfo.ZoneInfo: st.timezones(), + # Pull requests with more types welcome! +} +if PYPY: + _global_type_lookup[builtins.sequenceiterator] = st.builds(iter, st.tuples()) # type: ignore + + +_fallback_type_strategy = st.sampled_from( + sorted(_global_type_lookup, key=type_sorting_key) +) +# subclass of MutableMapping, and so we resolve to a union which +# includes this... but we don't actually ever want to build one. +_global_type_lookup[os._Environ] = st.just(os.environ) + +if sys.version_info[:2] < (3, 14): + # Note: while ByteString notionally also represents the bytearray and + # memoryview types, it is a subclass of Hashable and those types are not. + # We therefore only generate the bytes type. type-ignored due to deprecation. + _global_type_lookup[typing.ByteString] = st.binary() # type: ignore + _global_type_lookup[collections.abc.ByteString] = st.binary() # type: ignore + + _global_type_lookup[memoryview] = st.binary().map(memoryview) + + +_global_type_lookup.update( + { + # TODO: SupportsAbs and SupportsRound should be covariant, ie have functions. + typing.SupportsAbs: st.one_of( + st.booleans(), + st.integers(), + st.floats(), + st.complex_numbers(), + st.fractions(), + st.decimals(), + st.timedeltas(), + ), + typing.SupportsRound: st.one_of( + st.booleans(), st.integers(), st.floats(), st.decimals(), st.fractions() + ), + typing.SupportsComplex: st.one_of( + st.booleans(), + st.integers(), + st.floats(), + st.complex_numbers(), + st.decimals(), + st.fractions(), + ), + typing.SupportsFloat: st.one_of( + st.booleans(), + st.integers(), + st.floats(), + st.decimals(), + st.fractions(), + # with floats its far more annoying to capture all + # the magic in a regex. so we just stringify some floats + st.floats().map(str), + ), + typing.SupportsInt: st.one_of( + st.booleans(), + st.integers(), + st.floats(), + st.uuids(), + st.decimals(), + # this generates strings that should able to be parsed into integers + st.from_regex(r"\A-?\d+\Z").filter(functools.partial(can_cast, int)), + ), + typing.SupportsIndex: st.integers() | st.booleans(), + typing.SupportsBytes: st.one_of( + st.booleans(), + st.binary(), + st.integers(0, 255), + # As with Reversible, we tuplize this for compatibility with Hashable. + st.lists(st.integers(0, 255)).map(tuple), + ), + typing.BinaryIO: st.builds(io.BytesIO, st.binary()), + typing.TextIO: st.builds(io.StringIO, st.text()), + } +) + + +# The "extra" lookups define a callable that either resolves to a strategy for +# this narrowly extra-specific type, or returns None to proceed with normal +# type resolution. The callable will only be called if the module is +# installed. To avoid the performance hit of importing anything here, we defer +# it until the method is called the first time, at which point we replace the +# entry in the lookup table with the direct call. +def _from_numpy_type(thing: type) -> st.SearchStrategy | None: + from hypothesis.extra.numpy import _from_type + + _global_extra_lookup["numpy"] = _from_type + return _from_type(thing) + + +_global_extra_lookup: dict[str, typing.Callable[[type], st.SearchStrategy | None]] = { + "numpy": _from_numpy_type, +} + + +def register(type_, fallback=None, *, module=typing): + if isinstance(type_, str): + # Use the name of generic types which are not available on all + # versions, and the function just won't be added to the registry; + # also works when module=None because typing_extensions isn't + # installed (nocover because it _is_ in our coverage tests). + type_ = getattr(module, type_, None) + if type_ is None: # pragma: no cover + return lambda f: f + + def inner(func): + nonlocal type_ + if fallback is None: + _global_type_lookup[type_] = func + return func + + @functools.wraps(func) + def really_inner(thing): + if getattr(thing, "__args__", None) is None: + return fallback + return func(thing) + + _global_type_lookup[type_] = really_inner + _global_type_lookup[get_origin(type_) or type_] = really_inner + return really_inner + + return inner + + +@register(type) +@register("Type") +@register("Type", module=typing_extensions) +def resolve_Type(thing): + if getattr(thing, "__args__", None) is None or get_args(thing) == (): + return _fallback_type_strategy + args = (thing.__args__[0],) + if is_a_union(args[0]): + args = args[0].__args__ + # Duplicate check from from_type here - only paying when needed. + args = list(args) + for i, a in enumerate(args): + if type(a) in (typing.ForwardRef, str): + try: + args[i] = getattr(builtins, getattr(a, "__forward_arg__", a)) + except AttributeError: + raise ResolutionFailed( + f"Cannot find the type referenced by {thing} - try using " + f"st.register_type_strategy({thing}, st.from_type(...))" + ) from None + return st.sampled_from(sorted(args, key=type_sorting_key)) + + +@register("List", st.builds(list)) +def resolve_List(thing): + return st.lists(st.from_type(thing.__args__[0])) + + +@register("Tuple", st.builds(tuple)) +def resolve_Tuple(thing): + elem_types = getattr(thing, "__args__", None) or () + if len(elem_types) == 2 and elem_types[-1] is Ellipsis: + return st.lists(st.from_type(elem_types[0])).map(tuple) + elif len(elem_types) == 1 and elem_types[0] == (): + return st.tuples() # Empty tuple; see issue #1583 + return st.tuples(*map(st.from_type, elem_types)) + + +def _can_hash(val): + try: + hash(val) + return True + except Exception: + return False + + +# Some types are subclasses of typing.Hashable, because they define a __hash__ +# method, but have non-hashable instances such as `Decimal("snan")` or may contain +# such instances (e.g. `FrozenSet[Decimal]`). We therefore keep this whitelist of +# types which are always hashable, and apply the `_can_hash` filter to all others. +# Our goal is not completeness, it's to get a small performance boost for the most +# common cases, and a short whitelist is basically free to maintain. +ALWAYS_HASHABLE_TYPES = {type(None), bool, int, float, complex, str, bytes} + + +def _from_hashable_type(type_): + if type_ in ALWAYS_HASHABLE_TYPES: + return st.from_type(type_) + else: + return st.from_type(type_).filter(_can_hash) + + +@register("Set", st.builds(set)) +@register(typing.MutableSet, st.builds(set)) +def resolve_Set(thing): + return st.sets(_from_hashable_type(thing.__args__[0])) + + +@register("FrozenSet", st.builds(frozenset)) +def resolve_FrozenSet(thing): + return st.frozensets(_from_hashable_type(thing.__args__[0])) + + +@register("Dict", st.builds(dict)) +def resolve_Dict(thing): + # If thing is a Collection instance, we need to fill in the values + keys, vals, *_ = thing.__args__ * 2 + return st.dictionaries( + _from_hashable_type(keys), + st.none() if vals is None else st.from_type(vals), + ) + + +@register("DefaultDict", st.builds(collections.defaultdict)) +@register("DefaultDict", st.builds(collections.defaultdict), module=typing_extensions) +def resolve_DefaultDict(thing): + return resolve_Dict(thing).map(lambda d: collections.defaultdict(None, d)) + + +@register(typing.ItemsView, st.builds(dict).map(dict.items)) +def resolve_ItemsView(thing): + return resolve_Dict(thing).map(dict.items) + + +@register(typing.KeysView, st.builds(dict).map(dict.keys)) +def resolve_KeysView(thing): + return st.dictionaries(_from_hashable_type(thing.__args__[0]), st.none()).map( + dict.keys + ) + + +@register(typing.ValuesView, st.builds(dict).map(dict.values)) +def resolve_ValuesView(thing): + return st.dictionaries(st.integers(), st.from_type(thing.__args__[0])).map( + dict.values + ) + + +@register(typing.Iterator, st.iterables(st.nothing())) +def resolve_Iterator(thing): + return st.iterables(st.from_type(thing.__args__[0])) + + +@register(collections.Counter, st.builds(collections.Counter)) +def resolve_Counter(thing): + return st.dictionaries( + keys=st.from_type(thing.__args__[0]), + values=st.integers(), + ).map(collections.Counter) + + +@register(collections.deque, st.builds(collections.deque)) +def resolve_deque(thing): + return st.lists(st.from_type(thing.__args__[0])).map(collections.deque) + + +@register(collections.ChainMap, st.builds(dict).map(collections.ChainMap)) +def resolve_ChainMap(thing): + return resolve_Dict(thing).map(collections.ChainMap) + + +@register(collections.OrderedDict, st.builds(dict).map(collections.OrderedDict)) +def resolve_OrderedDict(thing): + return resolve_Dict(thing).map(collections.OrderedDict) + + +@register(typing.Pattern, st.builds(re.compile, st.sampled_from(["", b""]))) +def resolve_Pattern(thing): + if isinstance(thing.__args__[0], typing.TypeVar): # pragma: no cover + # FIXME: this was covered on Python 3.8, but isn't on 3.10 - we should + # work out why not and write some extra tests to help avoid regressions. + return st.builds(re.compile, st.sampled_from(["", b""])) + return st.just(re.compile(thing.__args__[0]())) + + +@register( + typing.Match, + st.text().map(partial(re.match, ".", flags=re.DOTALL)).filter(bool), +) +def resolve_Match(thing): + if thing.__args__[0] == bytes: + return ( + st.binary(min_size=1) + .map(lambda c: re.match(b".", c, flags=re.DOTALL)) + .filter(bool) + ) + return st.text().map(lambda c: re.match(".", c, flags=re.DOTALL)).filter(bool) + + +class GeneratorStrategy(st.SearchStrategy): + def __init__(self, yields, returns): + super().__init__() + assert isinstance(yields, st.SearchStrategy) + assert isinstance(returns, st.SearchStrategy) + self.yields = yields + self.returns = returns + + def __repr__(self) -> str: + return f"" + + def do_draw(self, data): + elements = conjecture_utils_many(data, min_size=0, max_size=100, average_size=5) + while elements.more(): + yield data.draw(self.yields) + return data.draw(self.returns) + + +@register(typing.Generator, GeneratorStrategy(st.none(), st.none())) +def resolve_Generator(thing): + yields, _, returns = thing.__args__ + return GeneratorStrategy(st.from_type(yields), st.from_type(returns)) + + +@register(typing.Callable, st.functions()) +def resolve_Callable(thing): + # Generated functions either accept no arguments, or arbitrary arguments. + # This is looser than ideal, but anything tighter would generally break + # use of keyword arguments and we'd rather not force positional-only. + if not thing.__args__: # pragma: no cover # varies by minor version + return st.functions() + + *args_types, return_type = thing.__args__ + + # Note that a list can only appear in __args__ under Python 3.9 with the + # collections.abc version; see https://bugs.python.org/issue42195 + if len(args_types) == 1 and isinstance(args_types[0], list): + args_types = tuple(args_types[0]) # pragma: no cover + + pep612 = ConcatenateTypes + ParamSpecTypes + for arg in args_types: + # awkward dance because you can't use Concatenate in isistance or issubclass + if getattr(arg, "__origin__", arg) in pep612 or type(arg) in pep612: + raise InvalidArgument( + "Hypothesis can't yet construct a strategy for instances of a " + f"Callable type parametrized by {arg!r}. Consider using an " + "explicit strategy, or opening an issue." + ) + if get_origin(return_type) in TypeGuardTypes: + raise InvalidArgument( + "Hypothesis cannot yet construct a strategy for callables which " + f"are PEP-647 TypeGuards or PEP-742 TypeIs (got {return_type!r}). " + "Consider using an explicit strategy, or opening an issue." + ) + + if get_origin(thing) is collections.abc.Callable and return_type is None: + return_type = type(None) + + return st.functions( + like=(lambda *a, **k: None) if args_types else (lambda: None), + returns=st.from_type(return_type), + ) + + +@register(typing.TypeVar) +@register("TypeVar", module=typing_extensions) +def resolve_TypeVar(thing): + type_var_key = f"typevar={thing!r}" + + bound = getattr(thing, "__bound__", None) + default = getattr(thing, "__default__", NoDefaults[0]) + original_strategies = [] + + def resolve_strategies(typ): + if isinstance(typ, typing.ForwardRef): + # TODO: on Python 3.13 and later, we should work out what type_params + # could be part of this type, and pass them in here. + typ = _try_import_forward_ref(thing, typ, type_params=()) + strat = unwrap_strategies(st.from_type(typ)) + if not isinstance(strat, OneOfStrategy): + original_strategies.append(strat) + else: + original_strategies.extend(strat.original_strategies) + + if bound is not None: + resolve_strategies(bound) + if default not in NoDefaults: # pragma: no cover + # Coverage requires 3.13 or `typing_extensions` package. + resolve_strategies(default) + + if original_strategies: + # The bound / default was a union, or we resolved it as a union of subtypes, + # so we need to unpack the strategy to ensure consistency across uses. + # This incantation runs a sampled_from over the strategies inferred for + # each part of the union, wraps that in shared so that we only generate + # from one type per testcase, and flatmaps that back to instances. + return st.shared( + st.sampled_from(original_strategies), key=type_var_key + ).flatmap(lambda s: s) + + builtin_scalar_types = [type(None), bool, int, float, str, bytes] + return st.shared( + st.sampled_from( + # Constraints may be None or () on various Python versions. + getattr(thing, "__constraints__", None) + or builtin_scalar_types, + ), + key=type_var_key, + ).flatmap(st.from_type) + + +if sys.version_info[:2] >= (3, 14): + # memoryview is newly generic in 3.14. see + # https://github.com/python/cpython/issues/126012 + # and https://docs.python.org/3/library/stdtypes.html#memoryview + + @register(memoryview, st.binary().map(memoryview)) + def resolve_memoryview(thing): + return st.from_type(thing.__args__[0]).map(memoryview) diff --git a/vendored/hypothesis/strategies/_internal/utils.py b/vendored/hypothesis/strategies/_internal/utils.py new file mode 100644 index 0000000..c965643 --- /dev/null +++ b/vendored/hypothesis/strategies/_internal/utils.py @@ -0,0 +1,224 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import dataclasses +import sys +from collections.abc import Callable +from functools import partial +from typing import Literal, TypeAlias, TypeVar +from weakref import WeakValueDictionary + +from hypothesis.errors import InvalidArgument +from hypothesis.internal.cache import LRUReusedCache +from hypothesis.internal.floats import clamp, float_to_int +from hypothesis.internal.reflection import proxies +from hypothesis.vendor.pretty import pretty + +T = TypeVar("T") +ValueKey: TypeAlias = tuple[type, object] +# (fn, args, kwargs) +StrategyCacheKey: TypeAlias = tuple[ + object, tuple[ValueKey, ...], frozenset[tuple[str, ValueKey]] +] + +_all_strategies: WeakValueDictionary[str, Callable] = WeakValueDictionary() +# note: LRUReusedCache is already thread-local internally +_STRATEGY_CACHE = LRUReusedCache[StrategyCacheKey, object](1024) + + +def _value_key(value: object) -> ValueKey: + if isinstance(value, float): + return (float, float_to_int(value)) + return (type(value), value) + + +def clear_strategy_cache() -> None: + _STRATEGY_CACHE.clear() + + +def cacheable(fn: T) -> T: + from hypothesis.control import _current_build_context + from hypothesis.strategies._internal.strategies import SearchStrategy + + @proxies(fn) + def cached_strategy(*args, **kwargs): + context = _current_build_context.value + if context is not None and context.data.provider.avoid_realization: + return fn(*args, **kwargs) + + try: + kwargs_cache_key = {(k, _value_key(v)) for k, v in kwargs.items()} + except TypeError: + return fn(*args, **kwargs) + + cache_key = ( + fn, + tuple(_value_key(v) for v in args), + frozenset(kwargs_cache_key), + ) + try: + return _STRATEGY_CACHE[cache_key] + except KeyError: + pass + except TypeError: + return fn(*args, **kwargs) + + result = fn(*args, **kwargs) + if not isinstance(result, SearchStrategy) or result.is_cacheable: + _STRATEGY_CACHE[cache_key] = result + return result + + # note that calling this clears the full _STRATEGY_CACHE for all strategies, + # not just the cache for this strategy. + cached_strategy.__clear_cache = clear_strategy_cache # type: ignore + return cached_strategy + + +def defines_strategy( + *, + force_reusable_values: bool = False, + eager: bool | Literal["try"] = False, +) -> Callable[[T], T]: + """ + Each standard strategy function provided to users by Hypothesis should be + decorated with @defines_strategy. This registers the strategy with _all_strategies, + which is used in our own test suite to check that e.g. we document all strategies + in sphinx. + + If you're reading this and are the author of a third-party strategy library: + don't worry, third-party strategies don't need to be decorated with + @defines_strategy. This function is internal to Hypothesis and not intended + for outside use. + + Parameters + ---------- + force_reusable_values : bool + If ``True``, strategies returned from the strategy function will have + ``.has_reusable_values == True`` set, even if it uses maps/filters or + non-reusable strategies internally. This tells our numpy/pandas strategies + that they can implicitly use such strategies as background values. + eager : bool | "try" + If ``True``, strategies returned by the strategy function are returned + as-is, and not wrapped in LazyStrategy. + + If "try", we first attempt to call the strategy function and return the + resulting strategy. If this throws an exception, we treat it the same as + ``eager = False``, by returning the strategy function wrapped in a + LazyStrategy. + """ + + if eager is not False and force_reusable_values: # pragma: no cover + # We could support eager + force_reusable_values with a suitable wrapper, + # but there are currently no callers that request this combination. + raise InvalidArgument( + f"Passing both eager={eager} and force_reusable_values=True is " + "currently not supported" + ) + + def decorator(strategy_definition): + _all_strategies[strategy_definition.__name__] = strategy_definition + + if eager is True: + return strategy_definition + + @proxies(strategy_definition) + def accept(*args, **kwargs): + from hypothesis.strategies._internal.lazy import LazyStrategy + + if eager == "try": + # Why not try this unconditionally? Because we'd end up with very + # deep nesting of recursive strategies - better to be lazy unless we + # *know* that eager evaluation is the right choice. + try: + return strategy_definition(*args, **kwargs) + except Exception: + # If invoking the strategy definition raises an exception, + # wrap that up in a LazyStrategy so it happens again later. + pass + result = LazyStrategy(strategy_definition, args, kwargs) + if force_reusable_values: + # Setting `force_has_reusable_values` here causes the recursive + # property code to set `.has_reusable_values == True`. + result.force_has_reusable_values = True + assert result.has_reusable_values + return result + + accept.is_hypothesis_strategy_function = True + return accept + + return decorator + + +def _to_jsonable(obj: object, *, avoid_realization: bool, seen: set[int]) -> object: + if isinstance(obj, (str, int, float, bool, type(None))): + # We convert integers of 2**63 to floats, to avoid crashing external + # utilities with a 64 bit integer cap (notable, sqlite). See + # https://github.com/HypothesisWorks/hypothesis/pull/3797#discussion_r1413425110 + # and https://github.com/simonw/sqlite-utils/issues/605. + if isinstance(obj, int) and not isinstance(obj, bool) and abs(obj) >= 2**63: + # Silently clamp very large ints to max_float, to avoid OverflowError when + # casting to float. (but avoid adding more constraints to symbolic values) + if avoid_realization: + return "" + obj = clamp(-sys.float_info.max, obj, sys.float_info.max) + return float(obj) + return obj + if avoid_realization: + return "" + + obj_id = id(obj) + if obj_id in seen: + return pretty(obj, cycle=True) + + recur = partial( + _to_jsonable, avoid_realization=avoid_realization, seen=seen | {obj_id} + ) + if isinstance(obj, (list, tuple, set, frozenset)): + if isinstance(obj, tuple) and hasattr(obj, "_asdict"): + return recur(obj._asdict()) # treat namedtuples as dicts + return [recur(x) for x in obj] + if isinstance(obj, dict): + return { + k if isinstance(k, str) else pretty(k): recur(v) for k, v in obj.items() + } + + # Hey, might as well try calling a .to_json() method - it works for Pandas! + # We try this before the below general-purpose handlers to give folks a + # chance to control this behavior on their custom classes. + try: + return recur(obj.to_json()) # type: ignore + except Exception: + pass + + # Special handling for dataclasses, attrs, and pydantic classes + if dataclasses.is_dataclass(obj) and not isinstance(obj, type): + # Avoid dataclasses.asdict here to ensure that inner to_json overrides + # can get called as well + return { + field.name: recur(getattr(obj, field.name)) + for field in dataclasses.fields(obj) + } + if (attr := sys.modules.get("attr")) is not None and attr.has(type(obj)): + return recur(attr.asdict(obj, recurse=False)) + if (pyd := sys.modules.get("pydantic")) and isinstance(obj, pyd.BaseModel): + return recur(obj.model_dump()) + + # If all else fails, we'll just pretty-print as a string. + return pretty(obj) + + +def to_jsonable(obj: object, *, avoid_realization: bool) -> object: + """Recursively convert an object to json-encodable form. + + This is not intended to round-trip, but rather provide an analysis-ready + format for observability. To avoid side affects, we pretty-print all but + known types. + """ + return _to_jsonable(obj, avoid_realization=avoid_realization, seen=set()) diff --git a/vendored/hypothesis/utils/__init__.py b/vendored/hypothesis/utils/__init__.py new file mode 100644 index 0000000..ad785ff --- /dev/null +++ b/vendored/hypothesis/utils/__init__.py @@ -0,0 +1,12 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""hypothesis.utils is a package for things that you can consider part of the +semi-public Hypothesis API but aren't really the core point.""" diff --git a/vendored/hypothesis/utils/conventions.py b/vendored/hypothesis/utils/conventions.py new file mode 100644 index 0000000..7da7e20 --- /dev/null +++ b/vendored/hypothesis/utils/conventions.py @@ -0,0 +1,23 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + + +class UniqueIdentifier: + """A factory for sentinel objects with nice reprs.""" + + def __init__(self, identifier: str) -> None: + self.identifier = identifier + + def __repr__(self) -> str: + return self.identifier + + +infer = ... +not_set = UniqueIdentifier("not_set") diff --git a/vendored/hypothesis/utils/dynamicvariables.py b/vendored/hypothesis/utils/dynamicvariables.py new file mode 100644 index 0000000..11b5b89 --- /dev/null +++ b/vendored/hypothesis/utils/dynamicvariables.py @@ -0,0 +1,39 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import threading +from collections.abc import Generator +from contextlib import contextmanager +from typing import Generic, TypeVar + +T = TypeVar("T") + + +class DynamicVariable(Generic[T]): + def __init__(self, default: T) -> None: + self.default = default + self.data = threading.local() + + @property + def value(self) -> T: + return getattr(self.data, "value", self.default) + + @value.setter + def value(self, value: T) -> None: + self.data.value = value + + @contextmanager + def with_value(self, value: T) -> Generator[None, None, None]: + old_value = self.value + try: + self.data.value = value + yield + finally: + self.data.value = old_value diff --git a/vendored/hypothesis/utils/terminal.py b/vendored/hypothesis/utils/terminal.py new file mode 100644 index 0000000..7c45df1 --- /dev/null +++ b/vendored/hypothesis/utils/terminal.py @@ -0,0 +1,38 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import os +from typing import Literal + + +def guess_background_color() -> Literal["light", "dark", "unknown"]: + """Returns one of "dark", "light", or "unknown". + + This is basically just guessing, but better than always guessing "dark"! + See also https://stackoverflow.com/questions/2507337/ and + https://unix.stackexchange.com/questions/245378/ + """ + django_colors = os.getenv("DJANGO_COLORS", "") + for theme in ("light", "dark"): + if theme in django_colors.split(";"): + return theme + # Guessing based on the $COLORFGBG environment variable + try: + fg, *_, bg = os.getenv("COLORFGBG", "").split(";") + except Exception: + pass + else: + # 0=black, 7=light-grey, 15=white ; we don't interpret other colors + if fg in ("7", "15") and bg == "0": + return "dark" + elif fg == "0" and bg in ("7", "15"): + return "light" + # TODO: Guessing based on the xterm control sequence + return "unknown" diff --git a/vendored/hypothesis/utils/threading.py b/vendored/hypothesis/utils/threading.py new file mode 100644 index 0000000..8dbbb32 --- /dev/null +++ b/vendored/hypothesis/utils/threading.py @@ -0,0 +1,52 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import threading +from collections.abc import Callable +from typing import Any + + +class ThreadLocal: + """ + Manages thread-local state. ThreadLocal forwards getattr and setattr to a + threading.local() instance. The passed kwargs defines the available attributes + on the threadlocal and their default values. + + The only supported names to geattr and setattr are the keys of the passed kwargs. + """ + + def __init__(self, **kwargs: Callable) -> None: + for name, value in kwargs.items(): + if not callable(value): + raise TypeError(f"Attribute {name} must be a callable. Got {value}") + + self.__initialized = False + self.__kwargs = kwargs + self.__threadlocal = threading.local() + self.__initialized = True + + def __getattr__(self, name: str) -> Any: + if name not in self.__kwargs: + raise AttributeError(f"No attribute {name}") + + if not hasattr(self.__threadlocal, name): + default = self.__kwargs[name]() + setattr(self.__threadlocal, name, default) + + return getattr(self.__threadlocal, name) + + def __setattr__(self, name: str, value: Any) -> None: + # disable attribute-forwarding while initializing + if "_ThreadLocal__initialized" not in self.__dict__ or not self.__initialized: + super().__setattr__(name, value) + else: + if name not in self.__kwargs: + raise AttributeError(f"No attribute {name}") + setattr(self.__threadlocal, name, value) diff --git a/vendored/hypothesis/vendor/__init__.py b/vendored/hypothesis/vendor/__init__.py new file mode 100644 index 0000000..fcb1ac6 --- /dev/null +++ b/vendored/hypothesis/vendor/__init__.py @@ -0,0 +1,9 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. diff --git a/vendored/hypothesis/vendor/pretty.py b/vendored/hypothesis/vendor/pretty.py new file mode 100644 index 0000000..af790bb --- /dev/null +++ b/vendored/hypothesis/vendor/pretty.py @@ -0,0 +1,984 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +""" +Python advanced pretty printer. This pretty printer is intended to +replace the old `pprint` python module which does not allow developers +to provide their own pretty print callbacks. +This module is based on ruby's `prettyprint.rb` library by `Tanaka Akira`. +Example Usage +------------- +To get a string of the output use `pretty`:: + from pretty import pretty + string = pretty(complex_object) +Extending +--------- +The pretty library allows developers to add pretty printing rules for their +own objects. This process is straightforward. All you have to do is to +add a `_repr_pretty_` method to your object and call the methods on the +pretty printer passed:: + class MyObject(object): + def _repr_pretty_(self, p, cycle): + ... +Here is an example implementation of a `_repr_pretty_` method for a list +subclass:: + class MyList(list): + def _repr_pretty_(self, p, cycle): + if cycle: + p.text('MyList(...)') + else: + with p.group(8, 'MyList([', '])'): + for idx, item in enumerate(self): + if idx: + p.text(',') + p.breakable() + p.pretty(item) +The `cycle` parameter is `True` if pretty detected a cycle. You *have* to +react to that or the result is an infinite loop. `p.text()` just adds +non breaking text to the output, `p.breakable()` either adds a whitespace +or breaks here. If you pass it an argument it's used instead of the +default space. `p.pretty` prettyprints another object using the pretty print +method. +The first parameter to the `group` function specifies the extra indentation +of the next line. In this example the next item will either be on the same +line (if the items are short enough) or aligned with the right edge of the +opening bracket of `MyList`. +If you just want to indent something you can use the group function +without open / close parameters. You can also use this code:: + with p.indent(2): + ... +Inheritance diagram: +.. inheritance-diagram:: IPython.lib.pretty + :parts: 3 +:copyright: 2007 by Armin Ronacher. + Portions (c) 2009 by Robert Kern. +:license: BSD License. +""" + +import ast +import datetime +import re +import struct +import sys +import types +import warnings +from collections import Counter, OrderedDict, defaultdict, deque +from collections.abc import Callable, Generator, Iterable, Sequence +from contextlib import contextmanager, suppress +from enum import Enum, Flag +from functools import partial +from io import StringIO, TextIOBase +from math import copysign, isnan +from typing import TYPE_CHECKING, Any, Optional, TypeAlias, TypeVar + +if TYPE_CHECKING: + from hypothesis.control import BuildContext + +T = TypeVar("T") +PrettyPrintFunction: TypeAlias = Callable[[Any, "RepresentationPrinter", bool], None] + +__all__ = [ + "IDKey", + "RepresentationPrinter", + "pretty", +] + + +def _safe_getattr(obj: object, attr: str, default: Any | None = None) -> Any: + """Safe version of getattr. + + Same as getattr, but will return ``default`` on any Exception, + rather than raising. + + """ + try: + return getattr(obj, attr, default) + except Exception: + return default + + +def pretty(obj: object, *, cycle: bool = False) -> str: + """Pretty print the object's representation.""" + printer = RepresentationPrinter() + printer.pretty(obj, cycle=cycle) + return printer.getvalue() + + +class IDKey: + def __init__(self, value: object): + self.value = value + + def __hash__(self) -> int: + return hash((type(self), id(self.value))) + + def __eq__(self, __o: object) -> bool: + return isinstance(__o, type(self)) and id(self.value) == id(__o.value) + + +class RepresentationPrinter: + """Special pretty printer that has a `pretty` method that calls the pretty + printer for a python object. + + This class stores processing data on `self` so you must *never* use + this class in a threaded environment. Always lock it or + reinstantiate it. + + """ + + def __init__( + self, + output: TextIOBase | None = None, + *, + context: Optional["BuildContext"] = None, + ) -> None: + """Optionally pass the output stream and the current build context. + + We use the context to represent objects constructed by strategies by showing + *how* they were constructed, and add annotations showing which parts of the + minimal failing example can vary without changing the test result. + """ + self.broken: bool = False + self.output: TextIOBase = StringIO() if output is None else output + self.max_width: int = 79 + self.max_seq_length: int = 1000 + self.output_width: int = 0 + self.buffer_width: int = 0 + self.buffer: deque[Breakable | Text] = deque() + + root_group = Group(0) + self.group_stack = [root_group] + self.group_queue = GroupQueue(root_group) + self.indentation: int = 0 + + self.stack: list[int] = [] + self.singleton_pprinters: dict[int, PrettyPrintFunction] = {} + self.type_pprinters: dict[type, PrettyPrintFunction] = {} + self.deferred_pprinters: dict[tuple[str, str], PrettyPrintFunction] = {} + # If IPython has been imported, load up their pretty-printer registry + if "IPython.lib.pretty" in sys.modules: + ipp = sys.modules["IPython.lib.pretty"] + self.singleton_pprinters.update(ipp._singleton_pprinters) + self.type_pprinters.update(ipp._type_pprinters) + self.deferred_pprinters.update(ipp._deferred_type_pprinters) + # If there's overlap between our pprinters and IPython's, we'll use ours. + self.singleton_pprinters.update(_singleton_pprinters) + self.type_pprinters.update(_type_pprinters) + self.deferred_pprinters.update(_deferred_type_pprinters) + + # for which-parts-matter, we track a mapping from the (start_idx, end_idx) + # of slices into the minimal failing example; this is per-interesting_origin + # but we report each separately so that's someone else's problem here. + # Invocations of self.repr_call() can report the slice for each argument, + # which will then be used to look up the relevant comment if any. + self.known_object_printers: dict[IDKey, list[PrettyPrintFunction]] + self.slice_comments: dict[tuple[int, int], str] + if context is None: + self.known_object_printers = defaultdict(list) + self.slice_comments = {} + else: + self.known_object_printers = context.known_object_printers + self.slice_comments = context.data.slice_comments + assert all(isinstance(k, IDKey) for k in self.known_object_printers) + + def pretty(self, obj: object, *, cycle: bool = False) -> None: + """Pretty print the given object.""" + obj_id = id(obj) + cycle = cycle or obj_id in self.stack + self.stack.append(obj_id) + try: + with self.group(): + obj_class = _safe_getattr(obj, "__class__", None) or type(obj) + # First try to find registered singleton printers for the type. + try: + printer = self.singleton_pprinters[obj_id] + except (TypeError, KeyError): + pass + else: + return printer(obj, self, cycle) + + # Look for the _repr_pretty_ method which allows users + # to define custom pretty printing. + # Some objects automatically create any requested + # attribute. Try to ignore most of them by checking for + # callability. + pretty_method = _safe_getattr(obj, "_repr_pretty_", None) + if callable(pretty_method): + return pretty_method(self, cycle) + + # Next walk the mro and check for either: + # 1) a registered printer + # 2) a _repr_pretty_ method + for cls in obj_class.__mro__: + if cls in self.type_pprinters: + # printer registered in self.type_pprinters + return self.type_pprinters[cls](obj, self, cycle) + else: + # Check if the given class is specified in the deferred type + # registry; move it to the regular type registry if so. + key = ( + _safe_getattr(cls, "__module__", None), + _safe_getattr(cls, "__name__", None), + ) + if key in self.deferred_pprinters: + # Move the printer over to the regular registry. + printer = self.deferred_pprinters.pop(key) + self.type_pprinters[cls] = printer + return printer(obj, self, cycle) + else: + if hasattr(cls, "__attrs_attrs__"): # pragma: no cover + return pprint_fields( + obj, + self, + cycle, + [at.name for at in cls.__attrs_attrs__ if at.init], + ) + if hasattr(cls, "__dataclass_fields__"): + return pprint_fields( + obj, + self, + cycle, + [ + k + for k, v in cls.__dataclass_fields__.items() + if v.init + ], + ) + # Now check for object-specific printers which show how this + # object was constructed (a Hypothesis special feature). + printers = self.known_object_printers[IDKey(obj)] + if len(printers) == 1: + return printers[0](obj, self, cycle) + elif printers: + # We've ended up with multiple registered functions for the same + # object, which must have been returned from multiple calls due to + # e.g. memoization. If they all return the same string, we'll use + # the first; otherwise we'll pretend that *none* were registered. + # + # It's annoying, but still seems to be the best option for which- + # parts-matter too, as unreportable results aren't very useful. + strs = set() + for f in printers: + p = RepresentationPrinter() + f(obj, p, cycle) + strs.add(p.getvalue()) + if len(strs) == 1: + return printers[0](obj, self, cycle) + + # A user-provided repr. Find newlines and replace them with p.break_() + return _repr_pprint(obj, self, cycle) + finally: + self.stack.pop() + + def _break_outer_groups(self) -> None: + while self.max_width < self.output_width + self.buffer_width: + group = self.group_queue.deq() + if not group: + return + while group.breakables: + x = self.buffer.popleft() + self.output_width = x.output(self.output, self.output_width) + self.buffer_width -= x.width + while self.buffer and isinstance(self.buffer[0], Text): + x = self.buffer.popleft() + self.output_width = x.output(self.output, self.output_width) + self.buffer_width -= x.width + + def text(self, obj: str) -> None: + """Add literal text to the output.""" + width = len(obj) + if self.buffer: + text = self.buffer[-1] + if not isinstance(text, Text): + text = Text() + self.buffer.append(text) + text.add(obj, width) + self.buffer_width += width + self._break_outer_groups() + else: + self.output.write(obj) + self.output_width += width + + def breakable(self, sep: str = " ") -> None: + """Add a breakable separator to the output. + + This does not mean that it will automatically break here. If no + breaking on this position takes place the `sep` is inserted + which default to one space. + + """ + width = len(sep) + group = self.group_stack[-1] + if group.want_break: + self.flush() + self.output.write("\n" + " " * self.indentation) + self.output_width = self.indentation + self.buffer_width = 0 + else: + self.buffer.append(Breakable(sep, width, self)) + self.buffer_width += width + self._break_outer_groups() + + def break_(self) -> None: + """Explicitly insert a newline into the output, maintaining correct + indentation.""" + self.flush() + self.output.write("\n" + " " * self.indentation) + self.output_width = self.indentation + self.buffer_width = 0 + + @contextmanager + def indent(self, indent: int) -> Generator[None, None, None]: + """`with`-statement support for indenting/dedenting.""" + self.indentation += indent + try: + yield + finally: + self.indentation -= indent + + @contextmanager + def group( + self, indent: int = 0, open: str = "", close: str = "" + ) -> Generator[None, None, None]: + """Context manager for an indented group. + + with p.group(1, '{', '}'): + + The first parameter specifies the indentation for the next line + (usually the width of the opening text), the second and third the + opening and closing delimiters. + """ + self.begin_group(indent=indent, open=open) + try: + yield + finally: + self.end_group(dedent=indent, close=close) + + def begin_group(self, indent: int = 0, open: str = "") -> None: + """Use the `with group(...) context manager instead. + + The begin_group() and end_group() methods are for IPython compatibility only; + see https://github.com/HypothesisWorks/hypothesis/issues/3721 for details. + """ + if open: + self.text(open) + group = Group(self.group_stack[-1].depth + 1) + self.group_stack.append(group) + self.group_queue.enq(group) + self.indentation += indent + + def end_group(self, dedent: int = 0, close: str = "") -> None: + """See begin_group().""" + self.indentation -= dedent + group = self.group_stack.pop() + if not group.breakables: + self.group_queue.remove(group) + if close: + self.text(close) + + def _enumerate(self, seq: Iterable[T]) -> Generator[tuple[int, T], None, None]: + """Like enumerate, but with an upper limit on the number of items.""" + for idx, x in enumerate(seq): + if self.max_seq_length and idx >= self.max_seq_length: + self.text(",") + self.breakable() + self.text("...") + return + yield idx, x + + def flush(self) -> None: + """Flush data that is left in the buffer.""" + for data in self.buffer: + self.output_width += data.output(self.output, self.output_width) + self.buffer.clear() + self.buffer_width = 0 + + def getvalue(self) -> str: + assert isinstance(self.output, StringIO) + self.flush() + return self.output.getvalue() + + def maybe_repr_known_object_as_call( + self, + obj: object, + cycle: bool, + name: str, + args: Sequence[object], + kwargs: dict[str, object], + ) -> None: + # pprint this object as a call, _unless_ the call would be invalid syntax + # and the repr would be valid and there are not comments on arguments. + if cycle: + return self.text("<...>") + # Since we don't yet track comments for sub-argument parts, we omit the + # "if no comments" condition here for now. Add it when we revive + # https://github.com/HypothesisWorks/hypothesis/pull/3624/ + with suppress(Exception): + # Check whether the repr is valid syntax: + ast.parse(repr(obj)) + # Given that the repr is valid syntax, check the call: + p = RepresentationPrinter() + p.stack = self.stack.copy() + p.known_object_printers = self.known_object_printers + p.repr_call(name, args, kwargs) + # If the call is not valid syntax, use the repr + try: + ast.parse(p.getvalue()) + except Exception: + return _repr_pprint(obj, self, cycle) + return self.repr_call(name, args, kwargs) + + def repr_call( + self, + func_name: str, + args: Sequence[object], + kwargs: dict[str, object], + *, + force_split: bool | None = None, + arg_slices: dict[str, tuple[int, int]] | None = None, + leading_comment: str | None = None, + avoid_realization: bool = False, + ) -> None: + """Helper function to represent a function call. + + - func_name, args, and kwargs should all be pretty obvious. + - If split_lines, we'll force one-argument-per-line; otherwise we'll place + calls that fit on a single line (and split otherwise). + - arg_slices is a mapping from pos-idx or keyword to (start_idx, end_idx) + of the Conjecture buffer, by which we can look up comments to add. + """ + assert isinstance(func_name, str) + if func_name.startswith(("lambda:", "lambda ")): + func_name = f"({func_name})" + self.text(func_name) + all_args = [(None, v) for v in args] + list(kwargs.items()) + # int indicates the position of a positional argument, rather than a keyword + # argument. Currently no callers use this; see #3624. + comments: dict[int | str, object] = { + k: self.slice_comments[v] + for k, v in (arg_slices or {}).items() + if v in self.slice_comments + } + + if leading_comment or any(k in comments for k, _ in all_args): + # We have to split one arg per line in order to leave comments on them. + force_split = True + if force_split is None: + # We're OK with printing this call on a single line, but will it fit? + # If not, we'd rather fall back to one-argument-per-line instead. + p = RepresentationPrinter() + p.stack = self.stack.copy() + p.known_object_printers = self.known_object_printers + p.repr_call("_" * self.output_width, args, kwargs, force_split=False) + s = p.getvalue() + force_split = "\n" in s + + with self.group(indent=4, open="(", close=""): + for i, (k, v) in enumerate(all_args): + if force_split: + if i == 0 and leading_comment: + self.break_() + self.text(leading_comment) + self.break_() + else: + assert leading_comment is None # only passed by top-level report + self.breakable(" " if i else "") + if k: + self.text(f"{k}=") + if avoid_realization: + self.text("") + else: + self.pretty(v) + if force_split or i + 1 < len(all_args): + self.text(",") + comment = None + if k is not None: + comment = comments.get(i) or comments.get(k) + if comment: + self.text(f" # {comment}") + if all_args and force_split: + self.break_() + self.text(")") # after dedent + + +class Printable: + def output(self, stream: TextIOBase, output_width: int) -> int: # pragma: no cover + raise NotImplementedError + + +class Text(Printable): + def __init__(self) -> None: + self.objs: list[str] = [] + self.width: int = 0 + + def output(self, stream: TextIOBase, output_width: int) -> int: + for obj in self.objs: + stream.write(obj) + return output_width + self.width + + def add(self, obj: str, width: int) -> None: + self.objs.append(obj) + self.width += width + + +class Breakable(Printable): + def __init__(self, seq: str, width: int, pretty: RepresentationPrinter) -> None: + self.obj = seq + self.width = width + self.pretty = pretty + self.indentation = pretty.indentation + self.group = pretty.group_stack[-1] + self.group.breakables.append(self) + + def output(self, stream: TextIOBase, output_width: int) -> int: + self.group.breakables.popleft() + if self.group.want_break: + stream.write("\n" + " " * self.indentation) + return self.indentation + if not self.group.breakables: + self.pretty.group_queue.remove(self.group) + stream.write(self.obj) + return output_width + self.width + + +class Group(Printable): + def __init__(self, depth: int) -> None: + self.depth = depth + self.breakables: deque[Breakable] = deque() + self.want_break: bool = False + + +class GroupQueue: + def __init__(self, *groups: Group) -> None: + self.queue: list[list[Group]] = [] + for group in groups: + self.enq(group) + + def enq(self, group: Group) -> None: + depth = group.depth + while depth > len(self.queue) - 1: + self.queue.append([]) + self.queue[depth].append(group) + + def deq(self) -> Group | None: + for stack in self.queue: + for idx, group in enumerate(reversed(stack)): + if group.breakables: + del stack[idx] + group.want_break = True + return group + for group in stack: + group.want_break = True + del stack[:] + return None + + def remove(self, group: Group) -> None: + try: + self.queue[group.depth].remove(group) + except ValueError: + pass + + +def _seq_pprinter_factory(start: str, end: str, basetype: type) -> PrettyPrintFunction: + """Factory that returns a pprint function useful for sequences. + + Used by the default pprint for tuples, dicts, and lists. + """ + + def inner( + obj: tuple[object] | list[object], p: RepresentationPrinter, cycle: bool + ) -> None: + typ = type(obj) + if ( + basetype is not None + and typ is not basetype + and typ.__repr__ != basetype.__repr__ # type: ignore[comparison-overlap] + ): + # If the subclass provides its own repr, use it instead. + return p.text(typ.__repr__(obj)) + + if cycle: + return p.text(start + "..." + end) + step = len(start) + with p.group(step, start, end): + for idx, x in p._enumerate(obj): + if idx: + p.text(",") + p.breakable() + p.pretty(x) + if len(obj) == 1 and type(obj) is tuple: + # Special case for 1-item tuples. + p.text(",") + + return inner + + +def get_class_name(cls: type[object]) -> str: + class_name = _safe_getattr(cls, "__qualname__", cls.__name__) + assert isinstance(class_name, str) + return class_name + + +def _set_pprinter_factory( + start: str, end: str, basetype: type[object] +) -> PrettyPrintFunction: + """Factory that returns a pprint function useful for sets and + frozensets.""" + + def inner( + obj: set[Any] | frozenset[Any], + p: RepresentationPrinter, + cycle: bool, + ) -> None: + typ = type(obj) + if ( + basetype is not None + and typ is not basetype + and typ.__repr__ != basetype.__repr__ + ): + # If the subclass provides its own repr, use it instead. + return p.text(typ.__repr__(obj)) + + if cycle: + return p.text(start + "..." + end) + if not obj: + # Special case. + p.text(get_class_name(basetype) + "()") + else: + step = len(start) + with p.group(step, start, end): + # Like dictionary keys, try to sort the items if there aren't too many + items: Iterable[object] = obj + if not (p.max_seq_length and len(obj) >= p.max_seq_length): + try: + items = sorted(obj) + except Exception: + # Sometimes the items don't sort. + pass + for idx, x in p._enumerate(items): + if idx: + p.text(",") + p.breakable() + p.pretty(x) + + return inner + + +def _dict_pprinter_factory( + start: str, end: str, basetype: type[object] | None = None +) -> PrettyPrintFunction: + """Factory that returns a pprint function used by the default pprint of + dicts and dict proxies.""" + + def inner(obj: dict[object, object], p: RepresentationPrinter, cycle: bool) -> None: + typ = type(obj) + if ( + basetype is not None + and typ is not basetype + and typ.__repr__ != basetype.__repr__ + ): + # If the subclass provides its own repr, use it instead. + return p.text(typ.__repr__(obj)) + + if cycle: + return p.text("{...}") + with ( + p.group(1, start, end), + # If the dict contains both "" and b"" (empty string and empty bytes), we + # ignore the BytesWarning raised by `python -bb` mode. We can't use + # `.items()` because it might be a non-`dict` type of mapping. + warnings.catch_warnings(), + ): + warnings.simplefilter("ignore", BytesWarning) + for idx, key in p._enumerate(obj): + if idx: + p.text(",") + p.breakable() + p.pretty(key) + p.text(": ") + p.pretty(obj[key]) + + inner.__name__ = f"_dict_pprinter_factory({start!r}, {end!r}, {basetype!r})" + return inner + + +def _super_pprint(obj: Any, p: RepresentationPrinter, cycle: bool) -> None: + """The pprint for the super type.""" + with p.group(8, ""): + p.pretty(obj.__thisclass__) + p.text(",") + p.breakable() + p.pretty(obj.__self__) + + +def _re_pattern_pprint(obj: re.Pattern, p: RepresentationPrinter, cycle: bool) -> None: + """The pprint function for regular expression patterns.""" + p.text("re.compile(") + pattern = repr(obj.pattern) + if pattern[:1] in "uU": # pragma: no cover + pattern = pattern[1:] + prefix = "ur" + else: + prefix = "r" + pattern = prefix + pattern.replace("\\\\", "\\") + p.text(pattern) + if obj.flags: + p.text(",") + p.breakable() + done_one = False + for flag in ( + "TEMPLATE", + "IGNORECASE", + "LOCALE", + "MULTILINE", + "DOTALL", + "UNICODE", + "VERBOSE", + "DEBUG", + ): + if obj.flags & getattr(re, flag, 0): + if done_one: + p.text("|") + p.text("re." + flag) + done_one = True + p.text(")") + + +def _type_pprint(obj: type[object], p: RepresentationPrinter, cycle: bool) -> None: + """The pprint for classes and types.""" + # Heap allocated types might not have the module attribute, + # and others may set it to None. + + # Checks for a __repr__ override in the metaclass + # != rather than is not because pypy compatibility + if type(obj).__repr__ != type.__repr__: # type: ignore[comparison-overlap] + _repr_pprint(obj, p, cycle) + return + + mod = _safe_getattr(obj, "__module__", None) + try: + name = obj.__qualname__ + except Exception: # pragma: no cover + name = obj.__name__ + if not isinstance(name, str): + name = "" + + if mod in (None, "__builtin__", "builtins", "exceptions"): + p.text(name) + else: + p.text(mod + "." + name) + + +def _repr_pprint(obj: object, p: RepresentationPrinter, cycle: bool) -> None: + """A pprint that just redirects to the normal repr function.""" + # Find newlines and replace them with p.break_() + output = repr(obj) + for idx, output_line in enumerate(output.splitlines()): + if idx: + p.break_() + p.text(output_line) + + +def pprint_fields( + obj: object, p: RepresentationPrinter, cycle: bool, fields: Iterable[str] +) -> None: + name = get_class_name(obj.__class__) + if cycle: + return p.text(f"{name}(...)") + with p.group(1, name + "(", ")"): + for idx, field in enumerate(fields): + if idx: + p.text(",") + p.breakable() + p.text(field) + p.text("=") + p.pretty(getattr(obj, field)) + + +def _function_pprint( + obj: types.FunctionType | types.BuiltinFunctionType | types.MethodType, + p: RepresentationPrinter, + cycle: bool, +) -> None: + """Base pprint for all functions and builtin functions.""" + from hypothesis.internal.reflection import get_pretty_function_description + + p.text(get_pretty_function_description(obj)) + + +def _exception_pprint( + obj: BaseException, p: RepresentationPrinter, cycle: bool +) -> None: + """Base pprint for all exceptions.""" + name = getattr(obj.__class__, "__qualname__", obj.__class__.__name__) + if obj.__class__.__module__ not in ("exceptions", "builtins"): + name = f"{obj.__class__.__module__}.{name}" + step = len(name) + 1 + with p.group(step, name + "(", ")"): + for idx, arg in enumerate(getattr(obj, "args", ())): + if idx: + p.text(",") + p.breakable() + p.pretty(arg) + + +def _repr_integer(obj: int, p: RepresentationPrinter, cycle: bool) -> None: + if abs(obj) < 1_000_000_000: + p.text(repr(obj)) + elif abs(obj) < 10**640: + # add underscores for integers over ten decimal digits + p.text(f"{obj:#_d}") + else: + # for very very large integers, use hex because power-of-two bases are cheaper + # https://docs.python.org/3/library/stdtypes.html#integer-string-conversion-length-limitation + p.text(f"{obj:#_x}") + + +def _repr_float_counting_nans( + obj: float, p: RepresentationPrinter, cycle: bool +) -> None: + if isnan(obj): + if struct.pack("!d", abs(obj)) != struct.pack("!d", float("nan")): + show = hex(*struct.unpack("Q", struct.pack("d", obj))) + return p.text(f"struct.unpack('d', struct.pack('Q', {show}))[0]") + elif copysign(1.0, obj) == -1.0: + return p.text("-nan") + p.text(repr(obj)) + + +#: printers for builtin types +_type_pprinters: dict[type, PrettyPrintFunction] = { + int: _repr_integer, + float: _repr_float_counting_nans, + str: _repr_pprint, + tuple: _seq_pprinter_factory("(", ")", tuple), + list: _seq_pprinter_factory("[", "]", list), + dict: _dict_pprinter_factory("{", "}", dict), + set: _set_pprinter_factory("{", "}", set), + frozenset: _set_pprinter_factory("frozenset({", "})", frozenset), + super: _super_pprint, + re.Pattern: _re_pattern_pprint, + type: _type_pprint, + types.FunctionType: _function_pprint, + types.BuiltinFunctionType: _function_pprint, + types.MethodType: _function_pprint, + datetime.datetime: _repr_pprint, + datetime.timedelta: _repr_pprint, + BaseException: _exception_pprint, + slice: _repr_pprint, + range: _repr_pprint, + bytes: _repr_pprint, +} + +#: printers for types specified by name +_deferred_type_pprinters: dict[tuple[str, str], PrettyPrintFunction] = {} + + +def for_type_by_name( + type_module: str, type_name: str, func: PrettyPrintFunction +) -> PrettyPrintFunction | None: + """Add a pretty printer for a type specified by the module and name of a + type rather than the type object itself.""" + key = (type_module, type_name) + oldfunc = _deferred_type_pprinters.get(key) + _deferred_type_pprinters[key] = func + return oldfunc + + +#: printers for the default singletons +_singleton_pprinters: dict[int, PrettyPrintFunction] = dict.fromkeys( + map(id, [None, True, False, Ellipsis, NotImplemented]), _repr_pprint +) + + +def _defaultdict_pprint( + obj: defaultdict[object, object], p: RepresentationPrinter, cycle: bool +) -> None: + name = obj.__class__.__name__ + with p.group(len(name) + 1, name + "(", ")"): + if cycle: + p.text("...") + else: + p.pretty(obj.default_factory) + p.text(",") + p.breakable() + p.pretty(dict(obj)) + + +def _ordereddict_pprint( + obj: OrderedDict[object, object], p: RepresentationPrinter, cycle: bool +) -> None: + name = obj.__class__.__name__ + with p.group(len(name) + 1, name + "(", ")"): + if cycle: + p.text("...") + elif obj: + p.pretty(list(obj.items())) + + +def _deque_pprint(obj: deque[object], p: RepresentationPrinter, cycle: bool) -> None: + name = obj.__class__.__name__ + with p.group(len(name) + 1, name + "(", ")"): + if cycle: + p.text("...") + else: + p.pretty(list(obj)) + + +def _counter_pprint( + obj: Counter[object], p: RepresentationPrinter, cycle: bool +) -> None: + name = obj.__class__.__name__ + with p.group(len(name) + 1, name + "(", ")"): + if cycle: + p.text("...") + elif obj: + p.pretty(dict(obj)) + + +def _repr_dataframe( + obj: object, p: RepresentationPrinter, cycle: bool +) -> None: # pragma: no cover + with p.indent(4): + p.break_() + _repr_pprint(obj, p, cycle) + p.break_() + + +def _repr_enum(obj: Enum, p: RepresentationPrinter, cycle: bool) -> None: + tname = get_class_name(type(obj)) + if isinstance(obj, Flag): + p.text( + " | ".join(f"{tname}.{x.name}" for x in type(obj) if x & obj == x) + or f"{tname}({obj.value!r})" # if no matching members + ) + else: + p.text(f"{tname}.{obj.name}") + + +class _ReprDots: + def __repr__(self) -> str: + return "..." + + +def _repr_partial(obj: partial[Any], p: RepresentationPrinter, cycle: bool) -> None: + args, kw = obj.args, obj.keywords + if cycle: + args, kw = (_ReprDots(),), {} + p.repr_call(pretty(type(obj)), (obj.func, *args), kw) + + +for_type_by_name("collections", "defaultdict", _defaultdict_pprint) +for_type_by_name("collections", "OrderedDict", _ordereddict_pprint) +for_type_by_name("ordereddict", "OrderedDict", _ordereddict_pprint) +for_type_by_name("collections", "deque", _deque_pprint) +for_type_by_name("collections", "Counter", _counter_pprint) +for_type_by_name("pandas.core.frame", "DataFrame", _repr_dataframe) +for_type_by_name("enum", "Enum", _repr_enum) +for_type_by_name("functools", "partial", _repr_partial) diff --git a/vendored/hypothesis/vendor/tlds-alpha-by-domain.txt b/vendored/hypothesis/vendor/tlds-alpha-by-domain.txt new file mode 100644 index 0000000..68d5ff9 --- /dev/null +++ b/vendored/hypothesis/vendor/tlds-alpha-by-domain.txt @@ -0,0 +1,1439 @@ +# Version 2025111500, Last Updated Sat Nov 15 07:07:01 2025 UTC +AAA +AARP +ABB +ABBOTT +ABBVIE +ABC +ABLE +ABOGADO +ABUDHABI +AC +ACADEMY +ACCENTURE +ACCOUNTANT +ACCOUNTANTS +ACO +ACTOR +AD +ADS +ADULT +AE +AEG +AERO +AETNA +AF +AFL +AFRICA +AG +AGAKHAN +AGENCY +AI +AIG +AIRBUS +AIRFORCE +AIRTEL +AKDN +AL +ALIBABA +ALIPAY +ALLFINANZ +ALLSTATE +ALLY +ALSACE +ALSTOM +AM +AMAZON +AMERICANEXPRESS +AMERICANFAMILY +AMEX +AMFAM +AMICA +AMSTERDAM +ANALYTICS +ANDROID +ANQUAN +ANZ +AO +AOL +APARTMENTS +APP +APPLE +AQ +AQUARELLE +AR +ARAB +ARAMCO +ARCHI +ARMY +ARPA +ART +ARTE +AS +ASDA +ASIA +ASSOCIATES +AT +ATHLETA +ATTORNEY +AU +AUCTION +AUDI +AUDIBLE +AUDIO +AUSPOST +AUTHOR +AUTO +AUTOS +AW +AWS +AX +AXA +AZ +AZURE +BA +BABY +BAIDU +BANAMEX +BAND +BANK +BAR +BARCELONA +BARCLAYCARD +BARCLAYS +BAREFOOT +BARGAINS +BASEBALL +BASKETBALL +BAUHAUS +BAYERN +BB +BBC +BBT +BBVA +BCG +BCN +BD +BE +BEATS +BEAUTY +BEER +BERLIN +BEST +BESTBUY +BET +BF +BG +BH +BHARTI +BI +BIBLE +BID +BIKE +BING +BINGO +BIO +BIZ +BJ +BLACK +BLACKFRIDAY +BLOCKBUSTER +BLOG +BLOOMBERG +BLUE +BM +BMS +BMW +BN +BNPPARIBAS +BO +BOATS +BOEHRINGER +BOFA +BOM +BOND +BOO +BOOK +BOOKING +BOSCH +BOSTIK +BOSTON +BOT +BOUTIQUE +BOX +BR +BRADESCO +BRIDGESTONE +BROADWAY +BROKER +BROTHER +BRUSSELS +BS +BT +BUILD +BUILDERS +BUSINESS +BUY +BUZZ +BV +BW +BY +BZ +BZH +CA +CAB +CAFE +CAL +CALL +CALVINKLEIN +CAM +CAMERA +CAMP +CANON +CAPETOWN +CAPITAL +CAPITALONE +CAR +CARAVAN +CARDS +CARE +CAREER +CAREERS +CARS +CASA +CASE +CASH +CASINO +CAT +CATERING +CATHOLIC +CBA +CBN +CBRE +CC +CD +CENTER +CEO +CERN +CF +CFA +CFD +CG +CH +CHANEL +CHANNEL +CHARITY +CHASE +CHAT +CHEAP +CHINTAI +CHRISTMAS +CHROME +CHURCH +CI +CIPRIANI +CIRCLE +CISCO +CITADEL +CITI +CITIC +CITY +CK +CL +CLAIMS +CLEANING +CLICK +CLINIC +CLINIQUE +CLOTHING +CLOUD +CLUB +CLUBMED +CM +CN +CO +COACH +CODES +COFFEE +COLLEGE +COLOGNE +COM +COMMBANK +COMMUNITY +COMPANY +COMPARE +COMPUTER +COMSEC +CONDOS +CONSTRUCTION +CONSULTING +CONTACT +CONTRACTORS +COOKING +COOL +COOP +CORSICA +COUNTRY +COUPON +COUPONS +COURSES +CPA +CR +CREDIT +CREDITCARD +CREDITUNION +CRICKET +CROWN +CRS +CRUISE +CRUISES +CU +CUISINELLA +CV +CW +CX +CY +CYMRU +CYOU +CZ +DAD +DANCE +DATA +DATE +DATING +DATSUN +DAY +DCLK +DDS +DE +DEAL +DEALER +DEALS +DEGREE +DELIVERY +DELL +DELOITTE +DELTA +DEMOCRAT +DENTAL +DENTIST +DESI +DESIGN +DEV +DHL +DIAMONDS +DIET +DIGITAL +DIRECT +DIRECTORY +DISCOUNT +DISCOVER +DISH +DIY +DJ +DK +DM +DNP +DO +DOCS +DOCTOR +DOG +DOMAINS +DOT +DOWNLOAD +DRIVE +DTV +DUBAI +DUPONT +DURBAN +DVAG +DVR +DZ +EARTH +EAT +EC +ECO +EDEKA +EDU +EDUCATION +EE +EG +EMAIL +EMERCK +ENERGY +ENGINEER +ENGINEERING +ENTERPRISES +EPSON +EQUIPMENT +ER +ERICSSON +ERNI +ES +ESQ +ESTATE +ET +EU +EUROVISION +EUS +EVENTS +EXCHANGE +EXPERT +EXPOSED +EXPRESS +EXTRASPACE +FAGE +FAIL +FAIRWINDS +FAITH +FAMILY +FAN +FANS +FARM +FARMERS +FASHION +FAST +FEDEX +FEEDBACK +FERRARI +FERRERO +FI +FIDELITY +FIDO +FILM +FINAL +FINANCE +FINANCIAL +FIRE +FIRESTONE +FIRMDALE +FISH +FISHING +FIT +FITNESS +FJ +FK +FLICKR +FLIGHTS +FLIR +FLORIST +FLOWERS +FLY +FM +FO +FOO +FOOD +FOOTBALL +FORD +FOREX +FORSALE +FORUM +FOUNDATION +FOX +FR +FREE +FRESENIUS +FRL +FROGANS +FRONTIER +FTR +FUJITSU +FUN +FUND +FURNITURE +FUTBOL +FYI +GA +GAL +GALLERY +GALLO +GALLUP +GAME +GAMES +GAP +GARDEN +GAY +GB +GBIZ +GD +GDN +GE +GEA +GENT +GENTING +GEORGE +GF +GG +GGEE +GH +GI +GIFT +GIFTS +GIVES +GIVING +GL +GLASS +GLE +GLOBAL +GLOBO +GM +GMAIL +GMBH +GMO +GMX +GN +GODADDY +GOLD +GOLDPOINT +GOLF +GOO +GOODYEAR +GOOG +GOOGLE +GOP +GOT +GOV +GP +GQ +GR +GRAINGER +GRAPHICS +GRATIS +GREEN +GRIPE +GROCERY +GROUP +GS +GT +GU +GUCCI +GUGE +GUIDE +GUITARS +GURU +GW +GY +HAIR +HAMBURG +HANGOUT +HAUS +HBO +HDFC +HDFCBANK +HEALTH +HEALTHCARE +HELP +HELSINKI +HERE +HERMES +HIPHOP +HISAMITSU +HITACHI +HIV +HK +HKT +HM +HN +HOCKEY +HOLDINGS +HOLIDAY +HOMEDEPOT +HOMEGOODS +HOMES +HOMESENSE +HONDA +HORSE +HOSPITAL +HOST +HOSTING +HOT +HOTELS +HOTMAIL +HOUSE +HOW +HR +HSBC +HT +HU +HUGHES +HYATT +HYUNDAI +IBM +ICBC +ICE +ICU +ID +IE +IEEE +IFM +IKANO +IL +IM +IMAMAT +IMDB +IMMO +IMMOBILIEN +IN +INC +INDUSTRIES +INFINITI +INFO +ING +INK +INSTITUTE +INSURANCE +INSURE +INT +INTERNATIONAL +INTUIT +INVESTMENTS +IO +IPIRANGA +IQ +IR +IRISH +IS +ISMAILI +IST +ISTANBUL +IT +ITAU +ITV +JAGUAR +JAVA +JCB +JE +JEEP +JETZT +JEWELRY +JIO +JLL +JM +JMP +JNJ +JO +JOBS +JOBURG +JOT +JOY +JP +JPMORGAN +JPRS +JUEGOS +JUNIPER +KAUFEN +KDDI +KE +KERRYHOTELS +KERRYPROPERTIES +KFH +KG +KH +KI +KIA +KIDS +KIM +KINDLE +KITCHEN +KIWI +KM +KN +KOELN +KOMATSU +KOSHER +KP +KPMG +KPN +KR +KRD +KRED +KUOKGROUP +KW +KY +KYOTO +KZ +LA +LACAIXA +LAMBORGHINI +LAMER +LAND +LANDROVER +LANXESS +LASALLE +LAT +LATINO +LATROBE +LAW +LAWYER +LB +LC +LDS +LEASE +LECLERC +LEFRAK +LEGAL +LEGO +LEXUS +LGBT +LI +LIDL +LIFE +LIFEINSURANCE +LIFESTYLE +LIGHTING +LIKE +LILLY +LIMITED +LIMO +LINCOLN +LINK +LIVE +LIVING +LK +LLC +LLP +LOAN +LOANS +LOCKER +LOCUS +LOL +LONDON +LOTTE +LOTTO +LOVE +LPL +LPLFINANCIAL +LR +LS +LT +LTD +LTDA +LU +LUNDBECK +LUXE +LUXURY +LV +LY +MA +MADRID +MAIF +MAISON +MAKEUP +MAN +MANAGEMENT +MANGO +MAP +MARKET +MARKETING +MARKETS +MARRIOTT +MARSHALLS +MATTEL +MBA +MC +MCKINSEY +MD +ME +MED +MEDIA +MEET +MELBOURNE +MEME +MEMORIAL +MEN +MENU +MERCKMSD +MG +MH +MIAMI +MICROSOFT +MIL +MINI +MINT +MIT +MITSUBISHI +MK +ML +MLB +MLS +MM +MMA +MN +MO +MOBI +MOBILE +MODA +MOE +MOI +MOM +MONASH +MONEY +MONSTER +MORMON +MORTGAGE +MOSCOW +MOTO +MOTORCYCLES +MOV +MOVIE +MP +MQ +MR +MS +MSD +MT +MTN +MTR +MU +MUSEUM +MUSIC +MV +MW +MX +MY +MZ +NA +NAB +NAGOYA +NAME +NAVY +NBA +NC +NE +NEC +NET +NETBANK +NETFLIX +NETWORK +NEUSTAR +NEW +NEWS +NEXT +NEXTDIRECT +NEXUS +NF +NFL +NG +NGO +NHK +NI +NICO +NIKE +NIKON +NINJA +NISSAN +NISSAY +NL +NO +NOKIA +NORTON +NOW +NOWRUZ +NOWTV +NP +NR +NRA +NRW +NTT +NU +NYC +NZ +OBI +OBSERVER +OFFICE +OKINAWA +OLAYAN +OLAYANGROUP +OLLO +OM +OMEGA +ONE +ONG +ONL +ONLINE +OOO +OPEN +ORACLE +ORANGE +ORG +ORGANIC +ORIGINS +OSAKA +OTSUKA +OTT +OVH +PA +PAGE +PANASONIC +PARIS +PARS +PARTNERS +PARTS +PARTY +PAY +PCCW +PE +PET +PF +PFIZER +PG +PH +PHARMACY +PHD +PHILIPS +PHONE +PHOTO +PHOTOGRAPHY +PHOTOS +PHYSIO +PICS +PICTET +PICTURES +PID +PIN +PING +PINK +PIONEER +PIZZA +PK +PL +PLACE +PLAY +PLAYSTATION +PLUMBING +PLUS +PM +PN +PNC +POHL +POKER +POLITIE +PORN +POST +PR +PRAXI +PRESS +PRIME +PRO +PROD +PRODUCTIONS +PROF +PROGRESSIVE +PROMO +PROPERTIES +PROPERTY +PROTECTION +PRU +PRUDENTIAL +PS +PT +PUB +PW +PWC +PY +QA +QPON +QUEBEC +QUEST +RACING +RADIO +RE +READ +REALESTATE +REALTOR +REALTY +RECIPES +RED +REDUMBRELLA +REHAB +REISE +REISEN +REIT +RELIANCE +REN +RENT +RENTALS +REPAIR +REPORT +REPUBLICAN +REST +RESTAURANT +REVIEW +REVIEWS +REXROTH +RICH +RICHARDLI +RICOH +RIL +RIO +RIP +RO +ROCKS +RODEO +ROGERS +ROOM +RS +RSVP +RU +RUGBY +RUHR +RUN +RW +RWE +RYUKYU +SA +SAARLAND +SAFE +SAFETY +SAKURA +SALE +SALON +SAMSCLUB +SAMSUNG +SANDVIK +SANDVIKCOROMANT +SANOFI +SAP +SARL +SAS +SAVE +SAXO +SB +SBI +SBS +SC +SCB +SCHAEFFLER +SCHMIDT +SCHOLARSHIPS +SCHOOL +SCHULE +SCHWARZ +SCIENCE +SCOT +SD +SE +SEARCH +SEAT +SECURE +SECURITY +SEEK +SELECT +SENER +SERVICES +SEVEN +SEW +SEX +SEXY +SFR +SG +SH +SHANGRILA +SHARP +SHELL +SHIA +SHIKSHA +SHOES +SHOP +SHOPPING +SHOUJI +SHOW +SI +SILK +SINA +SINGLES +SITE +SJ +SK +SKI +SKIN +SKY +SKYPE +SL +SLING +SM +SMART +SMILE +SN +SNCF +SO +SOCCER +SOCIAL +SOFTBANK +SOFTWARE +SOHU +SOLAR +SOLUTIONS +SONG +SONY +SOY +SPA +SPACE +SPORT +SPOT +SR +SRL +SS +ST +STADA +STAPLES +STAR +STATEBANK +STATEFARM +STC +STCGROUP +STOCKHOLM +STORAGE +STORE +STREAM +STUDIO +STUDY +STYLE +SU +SUCKS +SUPPLIES +SUPPLY +SUPPORT +SURF +SURGERY +SUZUKI +SV +SWATCH +SWISS +SX +SY +SYDNEY +SYSTEMS +SZ +TAB +TAIPEI +TALK +TAOBAO +TARGET +TATAMOTORS +TATAR +TATTOO +TAX +TAXI +TC +TCI +TD +TDK +TEAM +TECH +TECHNOLOGY +TEL +TEMASEK +TENNIS +TEVA +TF +TG +TH +THD +THEATER +THEATRE +TIAA +TICKETS +TIENDA +TIPS +TIRES +TIROL +TJ +TJMAXX +TJX +TK +TKMAXX +TL +TM +TMALL +TN +TO +TODAY +TOKYO +TOOLS +TOP +TORAY +TOSHIBA +TOTAL +TOURS +TOWN +TOYOTA +TOYS +TR +TRADE +TRADING +TRAINING +TRAVEL +TRAVELERS +TRAVELERSINSURANCE +TRUST +TRV +TT +TUBE +TUI +TUNES +TUSHU +TV +TVS +TW +TZ +UA +UBANK +UBS +UG +UK +UNICOM +UNIVERSITY +UNO +UOL +UPS +US +UY +UZ +VA +VACATIONS +VANA +VANGUARD +VC +VE +VEGAS +VENTURES +VERISIGN +VERSICHERUNG +VET +VG +VI +VIAJES +VIDEO +VIG +VIKING +VILLAS +VIN +VIP +VIRGIN +VISA +VISION +VIVA +VIVO +VLAANDEREN +VN +VODKA +VOLVO +VOTE +VOTING +VOTO +VOYAGE +VU +WALES +WALMART +WALTER +WANG +WANGGOU +WATCH +WATCHES +WEATHER +WEATHERCHANNEL +WEBCAM +WEBER +WEBSITE +WED +WEDDING +WEIBO +WEIR +WF +WHOSWHO +WIEN +WIKI +WILLIAMHILL +WIN +WINDOWS +WINE +WINNERS +WME +WOLTERSKLUWER +WOODSIDE +WORK +WORKS +WORLD +WOW +WS +WTC +WTF +XBOX +XEROX +XIHUAN +XIN +XN--11B4C3D +XN--1CK2E1B +XN--1QQW23A +XN--2SCRJ9C +XN--30RR7Y +XN--3BST00M +XN--3DS443G +XN--3E0B707E +XN--3HCRJ9C +XN--3PXU8K +XN--42C2D9A +XN--45BR5CYL +XN--45BRJ9C +XN--45Q11C +XN--4DBRK0CE +XN--4GBRIM +XN--54B7FTA0CC +XN--55QW42G +XN--55QX5D +XN--5SU34J936BGSG +XN--5TZM5G +XN--6FRZ82G +XN--6QQ986B3XL +XN--80ADXHKS +XN--80AO21A +XN--80AQECDR1A +XN--80ASEHDB +XN--80ASWG +XN--8Y0A063A +XN--90A3AC +XN--90AE +XN--90AIS +XN--9DBQ2A +XN--9ET52U +XN--9KRT00A +XN--B4W605FERD +XN--BCK1B9A5DRE4C +XN--C1AVG +XN--C2BR7G +XN--CCK2B3B +XN--CCKWCXETD +XN--CG4BKI +XN--CLCHC0EA0B2G2A9GCD +XN--CZR694B +XN--CZRS0T +XN--CZRU2D +XN--D1ACJ3B +XN--D1ALF +XN--E1A4C +XN--ECKVDTC9D +XN--EFVY88H +XN--FCT429K +XN--FHBEI +XN--FIQ228C5HS +XN--FIQ64B +XN--FIQS8S +XN--FIQZ9S +XN--FJQ720A +XN--FLW351E +XN--FPCRJ9C3D +XN--FZC2C9E2C +XN--FZYS8D69UVGM +XN--G2XX48C +XN--GCKR3F0F +XN--GECRJ9C +XN--GK3AT1E +XN--H2BREG3EVE +XN--H2BRJ9C +XN--H2BRJ9C8C +XN--HXT814E +XN--I1B6B1A6A2E +XN--IMR513N +XN--IO0A7I +XN--J1AEF +XN--J1AMH +XN--J6W193G +XN--JLQ480N2RG +XN--JVR189M +XN--KCRX77D1X4A +XN--KPRW13D +XN--KPRY57D +XN--KPUT3I +XN--L1ACC +XN--LGBBAT1AD8J +XN--MGB9AWBF +XN--MGBA3A3EJT +XN--MGBA3A4F16A +XN--MGBA7C0BBN0A +XN--MGBAAM7A8H +XN--MGBAB2BD +XN--MGBAH1A3HJKRD +XN--MGBAI9AZGQP6J +XN--MGBAYH7GPA +XN--MGBBH1A +XN--MGBBH1A71E +XN--MGBC0A9AZCG +XN--MGBCA7DZDO +XN--MGBCPQ6GPA1A +XN--MGBERP4A5D4AR +XN--MGBGU82A +XN--MGBI4ECEXP +XN--MGBPL2FH +XN--MGBT3DHD +XN--MGBTX2B +XN--MGBX4CD0AB +XN--MIX891F +XN--MK1BU44C +XN--MXTQ1M +XN--NGBC5AZD +XN--NGBE9E0A +XN--NGBRX +XN--NODE +XN--NQV7F +XN--NQV7FS00EMA +XN--NYQY26A +XN--O3CW4H +XN--OGBPF8FL +XN--OTU796D +XN--P1ACF +XN--P1AI +XN--PGBS0DH +XN--PSSY2U +XN--Q7CE6A +XN--Q9JYB4C +XN--QCKA1PMC +XN--QXA6A +XN--QXAM +XN--RHQV96G +XN--ROVU88B +XN--RVC1E0AM3E +XN--S9BRJ9C +XN--SES554G +XN--T60B56A +XN--TCKWE +XN--TIQ49XQYJ +XN--UNUP4Y +XN--VERMGENSBERATER-CTB +XN--VERMGENSBERATUNG-PWB +XN--VHQUV +XN--VUQ861B +XN--W4R85EL8FHU5DNRA +XN--W4RS40L +XN--WGBH1C +XN--WGBL6A +XN--XHQ521B +XN--XKC2AL3HYE2A +XN--XKC2DL3A5EE0H +XN--Y9A3AQ +XN--YFRO4I67O +XN--YGBI2AMMX +XN--ZFR164B +XXX +XYZ +YACHTS +YAHOO +YAMAXUN +YANDEX +YE +YODOBASHI +YOGA +YOKOHAMA +YOU +YOUTUBE +YT +YUN +ZA +ZAPPOS +ZARA +ZERO +ZIP +ZM +ZONE +ZUERICH +ZW diff --git a/vendored/hypothesis/version.py b/vendored/hypothesis/version.py new file mode 100644 index 0000000..755ea0d --- /dev/null +++ b/vendored/hypothesis/version.py @@ -0,0 +1,12 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +__version_info__ = (6, 148, 8) +__version__ = ".".join(map(str, __version_info__)) diff --git a/vendored/requirements.in b/vendored/requirements.in new file mode 100644 index 0000000..68ee446 --- /dev/null +++ b/vendored/requirements.in @@ -0,0 +1 @@ +hypothesis diff --git a/vendored/requirements.txt b/vendored/requirements.txt new file mode 100644 index 0000000..121883f --- /dev/null +++ b/vendored/requirements.txt @@ -0,0 +1,14 @@ +# +# This file is autogenerated by pip-compile with Python 3.13 +# by the following command: +# +# pip-compile --generate-hashes --output-file=requirements.txt requirements.in +# +hypothesis==6.148.8 \ + --hash=sha256:c1842f47f974d74661b3779a26032f8b91bc1eb30d84741714d3712d7f43e85e \ + --hash=sha256:fa6b2ae029bc02f9d2d6c2257b0cbf2dc3782362457d2027a038ad7f4209c385 + # via -r requirements.in +sortedcontainers==2.4.0 \ + --hash=sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88 \ + --hash=sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0 + # via hypothesis diff --git a/vendored/sortedcontainers/__init__.py b/vendored/sortedcontainers/__init__.py new file mode 100644 index 0000000..a141dd1 --- /dev/null +++ b/vendored/sortedcontainers/__init__.py @@ -0,0 +1,74 @@ +"""Sorted Containers -- Sorted List, Sorted Dict, Sorted Set + +Sorted Containers is an Apache2 licensed containers library, written in +pure-Python, and fast as C-extensions. + +Python's standard library is great until you need a sorted collections +type. Many will attest that you can get really far without one, but the moment +you **really need** a sorted list, dict, or set, you're faced with a dozen +different implementations, most using C-extensions without great documentation +and benchmarking. + +In Python, we can do better. And we can do it in pure-Python! + +:: + + >>> from sortedcontainers import SortedList + >>> sl = SortedList(['e', 'a', 'c', 'd', 'b']) + >>> sl + SortedList(['a', 'b', 'c', 'd', 'e']) + >>> sl *= 1000000 + >>> sl.count('c') + 1000000 + >>> sl[-3:] + ['e', 'e', 'e'] + >>> from sortedcontainers import SortedDict + >>> sd = SortedDict({'c': 3, 'a': 1, 'b': 2}) + >>> sd + SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> sd.popitem(index=-1) + ('c', 3) + >>> from sortedcontainers import SortedSet + >>> ss = SortedSet('abracadabra') + >>> ss + SortedSet(['a', 'b', 'c', 'd', 'r']) + >>> ss.bisect_left('c') + 2 + +Sorted Containers takes all of the work out of Python sorted types - making +your deployment and use of Python easy. There's no need to install a C compiler +or pre-build and distribute custom extensions. Performance is a feature and +testing has 100% coverage with unit tests and hours of stress. + +:copyright: (c) 2014-2019 by Grant Jenks. +:license: Apache 2.0, see LICENSE for more details. + +""" + + +from .sortedlist import SortedList, SortedKeyList, SortedListWithKey +from .sortedset import SortedSet +from .sorteddict import ( + SortedDict, + SortedKeysView, + SortedItemsView, + SortedValuesView, +) + +__all__ = [ + 'SortedList', + 'SortedKeyList', + 'SortedListWithKey', + 'SortedDict', + 'SortedKeysView', + 'SortedItemsView', + 'SortedValuesView', + 'SortedSet', +] + +__title__ = 'sortedcontainers' +__version__ = '2.4.0' +__build__ = 0x020400 +__author__ = 'Grant Jenks' +__license__ = 'Apache 2.0' +__copyright__ = '2014-2019, Grant Jenks' diff --git a/vendored/sortedcontainers/sorteddict.py b/vendored/sortedcontainers/sorteddict.py new file mode 100644 index 0000000..910f260 --- /dev/null +++ b/vendored/sortedcontainers/sorteddict.py @@ -0,0 +1,812 @@ +"""Sorted Dict +============== + +:doc:`Sorted Containers` is an Apache2 licensed Python sorted +collections library, written in pure-Python, and fast as C-extensions. The +:doc:`introduction` is the best way to get started. + +Sorted dict implementations: + +.. currentmodule:: sortedcontainers + +* :class:`SortedDict` +* :class:`SortedKeysView` +* :class:`SortedItemsView` +* :class:`SortedValuesView` + +""" + +import sys +import warnings + +from itertools import chain + +from .sortedlist import SortedList, recursive_repr +from .sortedset import SortedSet + +############################################################################### +# BEGIN Python 2/3 Shims +############################################################################### + +try: + from collections.abc import ( + ItemsView, KeysView, Mapping, ValuesView, Sequence + ) +except ImportError: + from collections import ItemsView, KeysView, Mapping, ValuesView, Sequence + +############################################################################### +# END Python 2/3 Shims +############################################################################### + + +class SortedDict(dict): + """Sorted dict is a sorted mutable mapping. + + Sorted dict keys are maintained in sorted order. The design of sorted dict + is simple: sorted dict inherits from dict to store items and maintains a + sorted list of keys. + + Sorted dict keys must be hashable and comparable. The hash and total + ordering of keys must not change while they are stored in the sorted dict. + + Mutable mapping methods: + + * :func:`SortedDict.__getitem__` (inherited from dict) + * :func:`SortedDict.__setitem__` + * :func:`SortedDict.__delitem__` + * :func:`SortedDict.__iter__` + * :func:`SortedDict.__len__` (inherited from dict) + + Methods for adding items: + + * :func:`SortedDict.setdefault` + * :func:`SortedDict.update` + + Methods for removing items: + + * :func:`SortedDict.clear` + * :func:`SortedDict.pop` + * :func:`SortedDict.popitem` + + Methods for looking up items: + + * :func:`SortedDict.__contains__` (inherited from dict) + * :func:`SortedDict.get` (inherited from dict) + * :func:`SortedDict.peekitem` + + Methods for views: + + * :func:`SortedDict.keys` + * :func:`SortedDict.items` + * :func:`SortedDict.values` + + Methods for miscellany: + + * :func:`SortedDict.copy` + * :func:`SortedDict.fromkeys` + * :func:`SortedDict.__reversed__` + * :func:`SortedDict.__eq__` (inherited from dict) + * :func:`SortedDict.__ne__` (inherited from dict) + * :func:`SortedDict.__repr__` + * :func:`SortedDict._check` + + Sorted list methods available (applies to keys): + + * :func:`SortedList.bisect_left` + * :func:`SortedList.bisect_right` + * :func:`SortedList.count` + * :func:`SortedList.index` + * :func:`SortedList.irange` + * :func:`SortedList.islice` + * :func:`SortedList._reset` + + Additional sorted list methods available, if key-function used: + + * :func:`SortedKeyList.bisect_key_left` + * :func:`SortedKeyList.bisect_key_right` + * :func:`SortedKeyList.irange_key` + + Sorted dicts may only be compared for equality and inequality. + + """ + def __init__(self, *args, **kwargs): + """Initialize sorted dict instance. + + Optional key-function argument defines a callable that, like the `key` + argument to the built-in `sorted` function, extracts a comparison key + from each dictionary key. If no function is specified, the default + compares the dictionary keys directly. The key-function argument must + be provided as a positional argument and must come before all other + arguments. + + Optional iterable argument provides an initial sequence of pairs to + initialize the sorted dict. Each pair in the sequence defines the key + and corresponding value. If a key is seen more than once, the last + value associated with it is stored in the new sorted dict. + + Optional mapping argument provides an initial mapping of items to + initialize the sorted dict. + + If keyword arguments are given, the keywords themselves, with their + associated values, are added as items to the dictionary. If a key is + specified both in the positional argument and as a keyword argument, + the value associated with the keyword is stored in the + sorted dict. + + Sorted dict keys must be hashable, per the requirement for Python's + dictionaries. Keys (or the result of the key-function) must also be + comparable, per the requirement for sorted lists. + + >>> d = {'alpha': 1, 'beta': 2} + >>> SortedDict([('alpha', 1), ('beta', 2)]) == d + True + >>> SortedDict({'alpha': 1, 'beta': 2}) == d + True + >>> SortedDict(alpha=1, beta=2) == d + True + + """ + if args and (args[0] is None or callable(args[0])): + _key = self._key = args[0] + args = args[1:] + else: + _key = self._key = None + + self._list = SortedList(key=_key) + + # Reaching through ``self._list`` repeatedly adds unnecessary overhead + # so cache references to sorted list methods. + + _list = self._list + self._list_add = _list.add + self._list_clear = _list.clear + self._list_iter = _list.__iter__ + self._list_reversed = _list.__reversed__ + self._list_pop = _list.pop + self._list_remove = _list.remove + self._list_update = _list.update + + # Expose some sorted list methods publicly. + + self.bisect_left = _list.bisect_left + self.bisect = _list.bisect_right + self.bisect_right = _list.bisect_right + self.index = _list.index + self.irange = _list.irange + self.islice = _list.islice + self._reset = _list._reset + + if _key is not None: + self.bisect_key_left = _list.bisect_key_left + self.bisect_key_right = _list.bisect_key_right + self.bisect_key = _list.bisect_key + self.irange_key = _list.irange_key + + self._update(*args, **kwargs) + + + @property + def key(self): + """Function used to extract comparison key from keys. + + Sorted dict compares keys directly when the key function is none. + + """ + return self._key + + + @property + def iloc(self): + """Cached reference of sorted keys view. + + Deprecated in version 2 of Sorted Containers. Use + :func:`SortedDict.keys` instead. + + """ + # pylint: disable=attribute-defined-outside-init + try: + return self._iloc + except AttributeError: + warnings.warn( + 'sorted_dict.iloc is deprecated.' + ' Use SortedDict.keys() instead.', + DeprecationWarning, + stacklevel=2, + ) + _iloc = self._iloc = SortedKeysView(self) + return _iloc + + + def clear(self): + + """Remove all items from sorted dict. + + Runtime complexity: `O(n)` + + """ + dict.clear(self) + self._list_clear() + + + def __delitem__(self, key): + """Remove item from sorted dict identified by `key`. + + ``sd.__delitem__(key)`` <==> ``del sd[key]`` + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> del sd['b'] + >>> sd + SortedDict({'a': 1, 'c': 3}) + >>> del sd['z'] + Traceback (most recent call last): + ... + KeyError: 'z' + + :param key: `key` for item lookup + :raises KeyError: if key not found + + """ + dict.__delitem__(self, key) + self._list_remove(key) + + + def __iter__(self): + """Return an iterator over the keys of the sorted dict. + + ``sd.__iter__()`` <==> ``iter(sd)`` + + Iterating the sorted dict while adding or deleting items may raise a + :exc:`RuntimeError` or fail to iterate over all keys. + + """ + return self._list_iter() + + + def __reversed__(self): + """Return a reverse iterator over the keys of the sorted dict. + + ``sd.__reversed__()`` <==> ``reversed(sd)`` + + Iterating the sorted dict while adding or deleting items may raise a + :exc:`RuntimeError` or fail to iterate over all keys. + + """ + return self._list_reversed() + + + def __setitem__(self, key, value): + """Store item in sorted dict with `key` and corresponding `value`. + + ``sd.__setitem__(key, value)`` <==> ``sd[key] = value`` + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict() + >>> sd['c'] = 3 + >>> sd['a'] = 1 + >>> sd['b'] = 2 + >>> sd + SortedDict({'a': 1, 'b': 2, 'c': 3}) + + :param key: key for item + :param value: value for item + + """ + if key not in self: + self._list_add(key) + dict.__setitem__(self, key, value) + + _setitem = __setitem__ + + + def __or__(self, other): + if not isinstance(other, Mapping): + return NotImplemented + items = chain(self.items(), other.items()) + return self.__class__(self._key, items) + + + def __ror__(self, other): + if not isinstance(other, Mapping): + return NotImplemented + items = chain(other.items(), self.items()) + return self.__class__(self._key, items) + + + def __ior__(self, other): + self._update(other) + return self + + + def copy(self): + """Return a shallow copy of the sorted dict. + + Runtime complexity: `O(n)` + + :return: new sorted dict + + """ + return self.__class__(self._key, self.items()) + + __copy__ = copy + + + @classmethod + def fromkeys(cls, iterable, value=None): + """Return a new sorted dict initailized from `iterable` and `value`. + + Items in the sorted dict have keys from `iterable` and values equal to + `value`. + + Runtime complexity: `O(n*log(n))` + + :return: new sorted dict + + """ + return cls((key, value) for key in iterable) + + + def keys(self): + """Return new sorted keys view of the sorted dict's keys. + + See :class:`SortedKeysView` for details. + + :return: new sorted keys view + + """ + return SortedKeysView(self) + + + def items(self): + """Return new sorted items view of the sorted dict's items. + + See :class:`SortedItemsView` for details. + + :return: new sorted items view + + """ + return SortedItemsView(self) + + + def values(self): + """Return new sorted values view of the sorted dict's values. + + See :class:`SortedValuesView` for details. + + :return: new sorted values view + + """ + return SortedValuesView(self) + + + if sys.hexversion < 0x03000000: + def __make_raise_attributeerror(original, alternate): + # pylint: disable=no-self-argument + message = ( + 'SortedDict.{original}() is not implemented.' + ' Use SortedDict.{alternate}() instead.' + ).format(original=original, alternate=alternate) + def method(self): + # pylint: disable=missing-docstring,unused-argument + raise AttributeError(message) + method.__name__ = original # pylint: disable=non-str-assignment-to-dunder-name + method.__doc__ = message + return property(method) + + iteritems = __make_raise_attributeerror('iteritems', 'items') + iterkeys = __make_raise_attributeerror('iterkeys', 'keys') + itervalues = __make_raise_attributeerror('itervalues', 'values') + viewitems = __make_raise_attributeerror('viewitems', 'items') + viewkeys = __make_raise_attributeerror('viewkeys', 'keys') + viewvalues = __make_raise_attributeerror('viewvalues', 'values') + + + class _NotGiven(object): + # pylint: disable=too-few-public-methods + def __repr__(self): + return '' + + __not_given = _NotGiven() + + def pop(self, key, default=__not_given): + """Remove and return value for item identified by `key`. + + If the `key` is not found then return `default` if given. If `default` + is not given then raise :exc:`KeyError`. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> sd.pop('c') + 3 + >>> sd.pop('z', 26) + 26 + >>> sd.pop('y') + Traceback (most recent call last): + ... + KeyError: 'y' + + :param key: `key` for item + :param default: `default` value if key not found (optional) + :return: value for item + :raises KeyError: if `key` not found and `default` not given + + """ + if key in self: + self._list_remove(key) + return dict.pop(self, key) + else: + if default is self.__not_given: + raise KeyError(key) + return default + + + def popitem(self, index=-1): + """Remove and return ``(key, value)`` pair at `index` from sorted dict. + + Optional argument `index` defaults to -1, the last item in the sorted + dict. Specify ``index=0`` for the first item in the sorted dict. + + If the sorted dict is empty, raises :exc:`KeyError`. + + If the `index` is out of range, raises :exc:`IndexError`. + + Runtime complexity: `O(log(n))` + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> sd.popitem() + ('c', 3) + >>> sd.popitem(0) + ('a', 1) + >>> sd.popitem(100) + Traceback (most recent call last): + ... + IndexError: list index out of range + + :param int index: `index` of item (default -1) + :return: key and value pair + :raises KeyError: if sorted dict is empty + :raises IndexError: if `index` out of range + + """ + if not self: + raise KeyError('popitem(): dictionary is empty') + + key = self._list_pop(index) + value = dict.pop(self, key) + return (key, value) + + + def peekitem(self, index=-1): + """Return ``(key, value)`` pair at `index` in sorted dict. + + Optional argument `index` defaults to -1, the last item in the sorted + dict. Specify ``index=0`` for the first item in the sorted dict. + + Unlike :func:`SortedDict.popitem`, the sorted dict is not modified. + + If the `index` is out of range, raises :exc:`IndexError`. + + Runtime complexity: `O(log(n))` + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> sd.peekitem() + ('c', 3) + >>> sd.peekitem(0) + ('a', 1) + >>> sd.peekitem(100) + Traceback (most recent call last): + ... + IndexError: list index out of range + + :param int index: index of item (default -1) + :return: key and value pair + :raises IndexError: if `index` out of range + + """ + key = self._list[index] + return key, self[key] + + + def setdefault(self, key, default=None): + """Return value for item identified by `key` in sorted dict. + + If `key` is in the sorted dict then return its value. If `key` is not + in the sorted dict then insert `key` with value `default` and return + `default`. + + Optional argument `default` defaults to none. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict() + >>> sd.setdefault('a', 1) + 1 + >>> sd.setdefault('a', 10) + 1 + >>> sd + SortedDict({'a': 1}) + + :param key: key for item + :param default: value for item (default None) + :return: value for item identified by `key` + + """ + if key in self: + return self[key] + dict.__setitem__(self, key, default) + self._list_add(key) + return default + + + def update(self, *args, **kwargs): + """Update sorted dict with items from `args` and `kwargs`. + + Overwrites existing items. + + Optional arguments `args` and `kwargs` may be a mapping, an iterable of + pairs or keyword arguments. See :func:`SortedDict.__init__` for + details. + + :param args: mapping or iterable of pairs + :param kwargs: keyword arguments mapping + + """ + if not self: + dict.update(self, *args, **kwargs) + self._list_update(dict.__iter__(self)) + return + + if not kwargs and len(args) == 1 and isinstance(args[0], dict): + pairs = args[0] + else: + pairs = dict(*args, **kwargs) + + if (10 * len(pairs)) > len(self): + dict.update(self, pairs) + self._list_clear() + self._list_update(dict.__iter__(self)) + else: + for key in pairs: + self._setitem(key, pairs[key]) + + _update = update + + + def __reduce__(self): + """Support for pickle. + + The tricks played with caching references in + :func:`SortedDict.__init__` confuse pickle so customize the reducer. + + """ + items = dict.copy(self) + return (type(self), (self._key, items)) + + + @recursive_repr() + def __repr__(self): + """Return string representation of sorted dict. + + ``sd.__repr__()`` <==> ``repr(sd)`` + + :return: string representation + + """ + _key = self._key + type_name = type(self).__name__ + key_arg = '' if _key is None else '{0!r}, '.format(_key) + item_format = '{0!r}: {1!r}'.format + items = ', '.join(item_format(key, self[key]) for key in self._list) + return '{0}({1}{{{2}}})'.format(type_name, key_arg, items) + + + def _check(self): + """Check invariants of sorted dict. + + Runtime complexity: `O(n)` + + """ + _list = self._list + _list._check() + assert len(self) == len(_list) + assert all(key in self for key in _list) + + +def _view_delitem(self, index): + """Remove item at `index` from sorted dict. + + ``view.__delitem__(index)`` <==> ``del view[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> view = sd.keys() + >>> del view[0] + >>> sd + SortedDict({'b': 2, 'c': 3}) + >>> del view[-1] + >>> sd + SortedDict({'b': 2}) + >>> del view[:] + >>> sd + SortedDict({}) + + :param index: integer or slice for indexing + :raises IndexError: if index out of range + + """ + _mapping = self._mapping + _list = _mapping._list + dict_delitem = dict.__delitem__ + if isinstance(index, slice): + keys = _list[index] + del _list[index] + for key in keys: + dict_delitem(_mapping, key) + else: + key = _list.pop(index) + dict_delitem(_mapping, key) + + +class SortedKeysView(KeysView, Sequence): + """Sorted keys view is a dynamic view of the sorted dict's keys. + + When the sorted dict's keys change, the view reflects those changes. + + The keys view implements the set and sequence abstract base classes. + + """ + __slots__ = () + + + @classmethod + def _from_iterable(cls, it): + return SortedSet(it) + + + def __getitem__(self, index): + """Lookup key at `index` in sorted keys views. + + ``skv.__getitem__(index)`` <==> ``skv[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> skv = sd.keys() + >>> skv[0] + 'a' + >>> skv[-1] + 'c' + >>> skv[:] + ['a', 'b', 'c'] + >>> skv[100] + Traceback (most recent call last): + ... + IndexError: list index out of range + + :param index: integer or slice for indexing + :return: key or list of keys + :raises IndexError: if index out of range + + """ + return self._mapping._list[index] + + + __delitem__ = _view_delitem + + +class SortedItemsView(ItemsView, Sequence): + """Sorted items view is a dynamic view of the sorted dict's items. + + When the sorted dict's items change, the view reflects those changes. + + The items view implements the set and sequence abstract base classes. + + """ + __slots__ = () + + + @classmethod + def _from_iterable(cls, it): + return SortedSet(it) + + + def __getitem__(self, index): + """Lookup item at `index` in sorted items view. + + ``siv.__getitem__(index)`` <==> ``siv[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> siv = sd.items() + >>> siv[0] + ('a', 1) + >>> siv[-1] + ('c', 3) + >>> siv[:] + [('a', 1), ('b', 2), ('c', 3)] + >>> siv[100] + Traceback (most recent call last): + ... + IndexError: list index out of range + + :param index: integer or slice for indexing + :return: item or list of items + :raises IndexError: if index out of range + + """ + _mapping = self._mapping + _mapping_list = _mapping._list + + if isinstance(index, slice): + keys = _mapping_list[index] + return [(key, _mapping[key]) for key in keys] + + key = _mapping_list[index] + return key, _mapping[key] + + + __delitem__ = _view_delitem + + +class SortedValuesView(ValuesView, Sequence): + """Sorted values view is a dynamic view of the sorted dict's values. + + When the sorted dict's values change, the view reflects those changes. + + The values view implements the sequence abstract base class. + + """ + __slots__ = () + + + def __getitem__(self, index): + """Lookup value at `index` in sorted values view. + + ``siv.__getitem__(index)`` <==> ``siv[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sd = SortedDict({'a': 1, 'b': 2, 'c': 3}) + >>> svv = sd.values() + >>> svv[0] + 1 + >>> svv[-1] + 3 + >>> svv[:] + [1, 2, 3] + >>> svv[100] + Traceback (most recent call last): + ... + IndexError: list index out of range + + :param index: integer or slice for indexing + :return: value or list of values + :raises IndexError: if index out of range + + """ + _mapping = self._mapping + _mapping_list = _mapping._list + + if isinstance(index, slice): + keys = _mapping_list[index] + return [_mapping[key] for key in keys] + + key = _mapping_list[index] + return _mapping[key] + + + __delitem__ = _view_delitem diff --git a/vendored/sortedcontainers/sortedlist.py b/vendored/sortedcontainers/sortedlist.py new file mode 100644 index 0000000..e3b58eb --- /dev/null +++ b/vendored/sortedcontainers/sortedlist.py @@ -0,0 +1,2646 @@ +"""Sorted List +============== + +:doc:`Sorted Containers` is an Apache2 licensed Python sorted +collections library, written in pure-Python, and fast as C-extensions. The +:doc:`introduction` is the best way to get started. + +Sorted list implementations: + +.. currentmodule:: sortedcontainers + +* :class:`SortedList` +* :class:`SortedKeyList` + +""" +# pylint: disable=too-many-lines +from __future__ import print_function + +import sys +import traceback + +from bisect import bisect_left, bisect_right, insort +from itertools import chain, repeat, starmap +from math import log +from operator import add, eq, ne, gt, ge, lt, le, iadd +from textwrap import dedent + +############################################################################### +# BEGIN Python 2/3 Shims +############################################################################### + +try: + from collections.abc import Sequence, MutableSequence +except ImportError: + from collections import Sequence, MutableSequence + +from functools import wraps +from sys import hexversion + +if hexversion < 0x03000000: + from itertools import imap as map # pylint: disable=redefined-builtin + from itertools import izip as zip # pylint: disable=redefined-builtin + try: + from thread import get_ident + except ImportError: + from dummy_thread import get_ident +else: + from functools import reduce + try: + from _thread import get_ident + except ImportError: + from _dummy_thread import get_ident + + +def recursive_repr(fillvalue='...'): + "Decorator to make a repr function return fillvalue for a recursive call." + # pylint: disable=missing-docstring + # Copied from reprlib in Python 3 + # https://hg.python.org/cpython/file/3.6/Lib/reprlib.py + + def decorating_function(user_function): + repr_running = set() + + @wraps(user_function) + def wrapper(self): + key = id(self), get_ident() + if key in repr_running: + return fillvalue + repr_running.add(key) + try: + result = user_function(self) + finally: + repr_running.discard(key) + return result + + return wrapper + + return decorating_function + +############################################################################### +# END Python 2/3 Shims +############################################################################### + + +class SortedList(MutableSequence): + """Sorted list is a sorted mutable sequence. + + Sorted list values are maintained in sorted order. + + Sorted list values must be comparable. The total ordering of values must + not change while they are stored in the sorted list. + + Methods for adding values: + + * :func:`SortedList.add` + * :func:`SortedList.update` + * :func:`SortedList.__add__` + * :func:`SortedList.__iadd__` + * :func:`SortedList.__mul__` + * :func:`SortedList.__imul__` + + Methods for removing values: + + * :func:`SortedList.clear` + * :func:`SortedList.discard` + * :func:`SortedList.remove` + * :func:`SortedList.pop` + * :func:`SortedList.__delitem__` + + Methods for looking up values: + + * :func:`SortedList.bisect_left` + * :func:`SortedList.bisect_right` + * :func:`SortedList.count` + * :func:`SortedList.index` + * :func:`SortedList.__contains__` + * :func:`SortedList.__getitem__` + + Methods for iterating values: + + * :func:`SortedList.irange` + * :func:`SortedList.islice` + * :func:`SortedList.__iter__` + * :func:`SortedList.__reversed__` + + Methods for miscellany: + + * :func:`SortedList.copy` + * :func:`SortedList.__len__` + * :func:`SortedList.__repr__` + * :func:`SortedList._check` + * :func:`SortedList._reset` + + Sorted lists use lexicographical ordering semantics when compared to other + sequences. + + Some methods of mutable sequences are not supported and will raise + not-implemented error. + + """ + DEFAULT_LOAD_FACTOR = 1000 + + + def __init__(self, iterable=None, key=None): + """Initialize sorted list instance. + + Optional `iterable` argument provides an initial iterable of values to + initialize the sorted list. + + Runtime complexity: `O(n*log(n))` + + >>> sl = SortedList() + >>> sl + SortedList([]) + >>> sl = SortedList([3, 1, 2, 5, 4]) + >>> sl + SortedList([1, 2, 3, 4, 5]) + + :param iterable: initial values (optional) + + """ + assert key is None + self._len = 0 + self._load = self.DEFAULT_LOAD_FACTOR + self._lists = [] + self._maxes = [] + self._index = [] + self._offset = 0 + + if iterable is not None: + self._update(iterable) + + + def __new__(cls, iterable=None, key=None): + """Create new sorted list or sorted-key list instance. + + Optional `key`-function argument will return an instance of subtype + :class:`SortedKeyList`. + + >>> sl = SortedList() + >>> isinstance(sl, SortedList) + True + >>> sl = SortedList(key=lambda x: -x) + >>> isinstance(sl, SortedList) + True + >>> isinstance(sl, SortedKeyList) + True + + :param iterable: initial values (optional) + :param key: function used to extract comparison key (optional) + :return: sorted list or sorted-key list instance + + """ + # pylint: disable=unused-argument + if key is None: + return object.__new__(cls) + else: + if cls is SortedList: + return object.__new__(SortedKeyList) + else: + raise TypeError('inherit SortedKeyList for key argument') + + + @property + def key(self): # pylint: disable=useless-return + """Function used to extract comparison key from values. + + Sorted list compares values directly so the key function is none. + + """ + return None + + + def _reset(self, load): + """Reset sorted list load factor. + + The `load` specifies the load-factor of the list. The default load + factor of 1000 works well for lists from tens to tens-of-millions of + values. Good practice is to use a value that is the cube root of the + list size. With billions of elements, the best load factor depends on + your usage. It's best to leave the load factor at the default until you + start benchmarking. + + See :doc:`implementation` and :doc:`performance-scale` for more + information. + + Runtime complexity: `O(n)` + + :param int load: load-factor for sorted list sublists + + """ + values = reduce(iadd, self._lists, []) + self._clear() + self._load = load + self._update(values) + + + def clear(self): + """Remove all values from sorted list. + + Runtime complexity: `O(n)` + + """ + self._len = 0 + del self._lists[:] + del self._maxes[:] + del self._index[:] + self._offset = 0 + + _clear = clear + + + def add(self, value): + """Add `value` to sorted list. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList() + >>> sl.add(3) + >>> sl.add(1) + >>> sl.add(2) + >>> sl + SortedList([1, 2, 3]) + + :param value: value to add to sorted list + + """ + _lists = self._lists + _maxes = self._maxes + + if _maxes: + pos = bisect_right(_maxes, value) + + if pos == len(_maxes): + pos -= 1 + _lists[pos].append(value) + _maxes[pos] = value + else: + insort(_lists[pos], value) + + self._expand(pos) + else: + _lists.append([value]) + _maxes.append(value) + + self._len += 1 + + + def _expand(self, pos): + """Split sublists with length greater than double the load-factor. + + Updates the index when the sublist length is less than double the load + level. This requires incrementing the nodes in a traversal from the + leaf node to the root. For an example traversal see + ``SortedList._loc``. + + """ + _load = self._load + _lists = self._lists + _index = self._index + + if len(_lists[pos]) > (_load << 1): + _maxes = self._maxes + + _lists_pos = _lists[pos] + half = _lists_pos[_load:] + del _lists_pos[_load:] + _maxes[pos] = _lists_pos[-1] + + _lists.insert(pos + 1, half) + _maxes.insert(pos + 1, half[-1]) + + del _index[:] + else: + if _index: + child = self._offset + pos + while child: + _index[child] += 1 + child = (child - 1) >> 1 + _index[0] += 1 + + + def update(self, iterable): + """Update sorted list by adding all values from `iterable`. + + Runtime complexity: `O(k*log(n))` -- approximate. + + >>> sl = SortedList() + >>> sl.update([3, 1, 2]) + >>> sl + SortedList([1, 2, 3]) + + :param iterable: iterable of values to add + + """ + _lists = self._lists + _maxes = self._maxes + values = sorted(iterable) + + if _maxes: + if len(values) * 4 >= self._len: + _lists.append(values) + values = reduce(iadd, _lists, []) + values.sort() + self._clear() + else: + _add = self.add + for val in values: + _add(val) + return + + _load = self._load + _lists.extend(values[pos:(pos + _load)] + for pos in range(0, len(values), _load)) + _maxes.extend(sublist[-1] for sublist in _lists) + self._len = len(values) + del self._index[:] + + _update = update + + + def __contains__(self, value): + """Return true if `value` is an element of the sorted list. + + ``sl.__contains__(value)`` <==> ``value in sl`` + + Runtime complexity: `O(log(n))` + + >>> sl = SortedList([1, 2, 3, 4, 5]) + >>> 3 in sl + True + + :param value: search for value in sorted list + :return: true if `value` in sorted list + + """ + _maxes = self._maxes + + if not _maxes: + return False + + pos = bisect_left(_maxes, value) + + if pos == len(_maxes): + return False + + _lists = self._lists + idx = bisect_left(_lists[pos], value) + + return _lists[pos][idx] == value + + + def discard(self, value): + """Remove `value` from sorted list if it is a member. + + If `value` is not a member, do nothing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList([1, 2, 3, 4, 5]) + >>> sl.discard(5) + >>> sl.discard(0) + >>> sl == [1, 2, 3, 4] + True + + :param value: `value` to discard from sorted list + + """ + _maxes = self._maxes + + if not _maxes: + return + + pos = bisect_left(_maxes, value) + + if pos == len(_maxes): + return + + _lists = self._lists + idx = bisect_left(_lists[pos], value) + + if _lists[pos][idx] == value: + self._delete(pos, idx) + + + def remove(self, value): + """Remove `value` from sorted list; `value` must be a member. + + If `value` is not a member, raise ValueError. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList([1, 2, 3, 4, 5]) + >>> sl.remove(5) + >>> sl == [1, 2, 3, 4] + True + >>> sl.remove(0) + Traceback (most recent call last): + ... + ValueError: 0 not in list + + :param value: `value` to remove from sorted list + :raises ValueError: if `value` is not in sorted list + + """ + _maxes = self._maxes + + if not _maxes: + raise ValueError('{0!r} not in list'.format(value)) + + pos = bisect_left(_maxes, value) + + if pos == len(_maxes): + raise ValueError('{0!r} not in list'.format(value)) + + _lists = self._lists + idx = bisect_left(_lists[pos], value) + + if _lists[pos][idx] == value: + self._delete(pos, idx) + else: + raise ValueError('{0!r} not in list'.format(value)) + + + def _delete(self, pos, idx): + """Delete value at the given `(pos, idx)`. + + Combines lists that are less than half the load level. + + Updates the index when the sublist length is more than half the load + level. This requires decrementing the nodes in a traversal from the + leaf node to the root. For an example traversal see + ``SortedList._loc``. + + :param int pos: lists index + :param int idx: sublist index + + """ + _lists = self._lists + _maxes = self._maxes + _index = self._index + + _lists_pos = _lists[pos] + + del _lists_pos[idx] + self._len -= 1 + + len_lists_pos = len(_lists_pos) + + if len_lists_pos > (self._load >> 1): + _maxes[pos] = _lists_pos[-1] + + if _index: + child = self._offset + pos + while child > 0: + _index[child] -= 1 + child = (child - 1) >> 1 + _index[0] -= 1 + elif len(_lists) > 1: + if not pos: + pos += 1 + + prev = pos - 1 + _lists[prev].extend(_lists[pos]) + _maxes[prev] = _lists[prev][-1] + + del _lists[pos] + del _maxes[pos] + del _index[:] + + self._expand(prev) + elif len_lists_pos: + _maxes[pos] = _lists_pos[-1] + else: + del _lists[pos] + del _maxes[pos] + del _index[:] + + + def _loc(self, pos, idx): + """Convert an index pair (lists index, sublist index) into a single + index number that corresponds to the position of the value in the + sorted list. + + Many queries require the index be built. Details of the index are + described in ``SortedList._build_index``. + + Indexing requires traversing the tree from a leaf node to the root. The + parent of each node is easily computable at ``(pos - 1) // 2``. + + Left-child nodes are always at odd indices and right-child nodes are + always at even indices. + + When traversing up from a right-child node, increment the total by the + left-child node. + + The final index is the sum from traversal and the index in the sublist. + + For example, using the index from ``SortedList._build_index``:: + + _index = 14 5 9 3 2 4 5 + _offset = 3 + + Tree:: + + 14 + 5 9 + 3 2 4 5 + + Converting an index pair (2, 3) into a single index involves iterating + like so: + + 1. Starting at the leaf node: offset + alpha = 3 + 2 = 5. We identify + the node as a left-child node. At such nodes, we simply traverse to + the parent. + + 2. At node 9, position 2, we recognize the node as a right-child node + and accumulate the left-child in our total. Total is now 5 and we + traverse to the parent at position 0. + + 3. Iteration ends at the root. + + The index is then the sum of the total and sublist index: 5 + 3 = 8. + + :param int pos: lists index + :param int idx: sublist index + :return: index in sorted list + + """ + if not pos: + return idx + + _index = self._index + + if not _index: + self._build_index() + + total = 0 + + # Increment pos to point in the index to len(self._lists[pos]). + + pos += self._offset + + # Iterate until reaching the root of the index tree at pos = 0. + + while pos: + + # Right-child nodes are at odd indices. At such indices + # account the total below the left child node. + + if not pos & 1: + total += _index[pos - 1] + + # Advance pos to the parent node. + + pos = (pos - 1) >> 1 + + return total + idx + + + def _pos(self, idx): + """Convert an index into an index pair (lists index, sublist index) + that can be used to access the corresponding lists position. + + Many queries require the index be built. Details of the index are + described in ``SortedList._build_index``. + + Indexing requires traversing the tree to a leaf node. Each node has two + children which are easily computable. Given an index, pos, the + left-child is at ``pos * 2 + 1`` and the right-child is at ``pos * 2 + + 2``. + + When the index is less than the left-child, traversal moves to the + left sub-tree. Otherwise, the index is decremented by the left-child + and traversal moves to the right sub-tree. + + At a child node, the indexing pair is computed from the relative + position of the child node as compared with the offset and the remaining + index. + + For example, using the index from ``SortedList._build_index``:: + + _index = 14 5 9 3 2 4 5 + _offset = 3 + + Tree:: + + 14 + 5 9 + 3 2 4 5 + + Indexing position 8 involves iterating like so: + + 1. Starting at the root, position 0, 8 is compared with the left-child + node (5) which it is greater than. When greater the index is + decremented and the position is updated to the right child node. + + 2. At node 9 with index 3, we again compare the index to the left-child + node with value 4. Because the index is the less than the left-child + node, we simply traverse to the left. + + 3. At node 4 with index 3, we recognize that we are at a leaf node and + stop iterating. + + 4. To compute the sublist index, we subtract the offset from the index + of the leaf node: 5 - 3 = 2. To compute the index in the sublist, we + simply use the index remaining from iteration. In this case, 3. + + The final index pair from our example is (2, 3) which corresponds to + index 8 in the sorted list. + + :param int idx: index in sorted list + :return: (lists index, sublist index) pair + + """ + if idx < 0: + last_len = len(self._lists[-1]) + + if (-idx) <= last_len: + return len(self._lists) - 1, last_len + idx + + idx += self._len + + if idx < 0: + raise IndexError('list index out of range') + elif idx >= self._len: + raise IndexError('list index out of range') + + if idx < len(self._lists[0]): + return 0, idx + + _index = self._index + + if not _index: + self._build_index() + + pos = 0 + child = 1 + len_index = len(_index) + + while child < len_index: + index_child = _index[child] + + if idx < index_child: + pos = child + else: + idx -= index_child + pos = child + 1 + + child = (pos << 1) + 1 + + return (pos - self._offset, idx) + + + def _build_index(self): + """Build a positional index for indexing the sorted list. + + Indexes are represented as binary trees in a dense array notation + similar to a binary heap. + + For example, given a lists representation storing integers:: + + 0: [1, 2, 3] + 1: [4, 5] + 2: [6, 7, 8, 9] + 3: [10, 11, 12, 13, 14] + + The first transformation maps the sub-lists by their length. The + first row of the index is the length of the sub-lists:: + + 0: [3, 2, 4, 5] + + Each row after that is the sum of consecutive pairs of the previous + row:: + + 1: [5, 9] + 2: [14] + + Finally, the index is built by concatenating these lists together:: + + _index = [14, 5, 9, 3, 2, 4, 5] + + An offset storing the start of the first row is also stored:: + + _offset = 3 + + When built, the index can be used for efficient indexing into the list. + See the comment and notes on ``SortedList._pos`` for details. + + """ + row0 = list(map(len, self._lists)) + + if len(row0) == 1: + self._index[:] = row0 + self._offset = 0 + return + + head = iter(row0) + tail = iter(head) + row1 = list(starmap(add, zip(head, tail))) + + if len(row0) & 1: + row1.append(row0[-1]) + + if len(row1) == 1: + self._index[:] = row1 + row0 + self._offset = 1 + return + + size = 2 ** (int(log(len(row1) - 1, 2)) + 1) + row1.extend(repeat(0, size - len(row1))) + tree = [row0, row1] + + while len(tree[-1]) > 1: + head = iter(tree[-1]) + tail = iter(head) + row = list(starmap(add, zip(head, tail))) + tree.append(row) + + reduce(iadd, reversed(tree), self._index) + self._offset = size * 2 - 1 + + + def __delitem__(self, index): + """Remove value at `index` from sorted list. + + ``sl.__delitem__(index)`` <==> ``del sl[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList('abcde') + >>> del sl[2] + >>> sl + SortedList(['a', 'b', 'd', 'e']) + >>> del sl[:2] + >>> sl + SortedList(['d', 'e']) + + :param index: integer or slice for indexing + :raises IndexError: if index out of range + + """ + if isinstance(index, slice): + start, stop, step = index.indices(self._len) + + if step == 1 and start < stop: + if start == 0 and stop == self._len: + return self._clear() + elif self._len <= 8 * (stop - start): + values = self._getitem(slice(None, start)) + if stop < self._len: + values += self._getitem(slice(stop, None)) + self._clear() + return self._update(values) + + indices = range(start, stop, step) + + # Delete items from greatest index to least so + # that the indices remain valid throughout iteration. + + if step > 0: + indices = reversed(indices) + + _pos, _delete = self._pos, self._delete + + for index in indices: + pos, idx = _pos(index) + _delete(pos, idx) + else: + pos, idx = self._pos(index) + self._delete(pos, idx) + + + def __getitem__(self, index): + """Lookup value at `index` in sorted list. + + ``sl.__getitem__(index)`` <==> ``sl[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList('abcde') + >>> sl[1] + 'b' + >>> sl[-1] + 'e' + >>> sl[2:5] + ['c', 'd', 'e'] + + :param index: integer or slice for indexing + :return: value or list of values + :raises IndexError: if index out of range + + """ + _lists = self._lists + + if isinstance(index, slice): + start, stop, step = index.indices(self._len) + + if step == 1 and start < stop: + # Whole slice optimization: start to stop slices the whole + # sorted list. + + if start == 0 and stop == self._len: + return reduce(iadd, self._lists, []) + + start_pos, start_idx = self._pos(start) + start_list = _lists[start_pos] + stop_idx = start_idx + stop - start + + # Small slice optimization: start index and stop index are + # within the start list. + + if len(start_list) >= stop_idx: + return start_list[start_idx:stop_idx] + + if stop == self._len: + stop_pos = len(_lists) - 1 + stop_idx = len(_lists[stop_pos]) + else: + stop_pos, stop_idx = self._pos(stop) + + prefix = _lists[start_pos][start_idx:] + middle = _lists[(start_pos + 1):stop_pos] + result = reduce(iadd, middle, prefix) + result += _lists[stop_pos][:stop_idx] + + return result + + if step == -1 and start > stop: + result = self._getitem(slice(stop + 1, start + 1)) + result.reverse() + return result + + # Return a list because a negative step could + # reverse the order of the items and this could + # be the desired behavior. + + indices = range(start, stop, step) + return list(self._getitem(index) for index in indices) + else: + if self._len: + if index == 0: + return _lists[0][0] + elif index == -1: + return _lists[-1][-1] + else: + raise IndexError('list index out of range') + + if 0 <= index < len(_lists[0]): + return _lists[0][index] + + len_last = len(_lists[-1]) + + if -len_last < index < 0: + return _lists[-1][len_last + index] + + pos, idx = self._pos(index) + return _lists[pos][idx] + + _getitem = __getitem__ + + + def __setitem__(self, index, value): + """Raise not-implemented error. + + ``sl.__setitem__(index, value)`` <==> ``sl[index] = value`` + + :raises NotImplementedError: use ``del sl[index]`` and + ``sl.add(value)`` instead + + """ + message = 'use ``del sl[index]`` and ``sl.add(value)`` instead' + raise NotImplementedError(message) + + + def __iter__(self): + """Return an iterator over the sorted list. + + ``sl.__iter__()`` <==> ``iter(sl)`` + + Iterating the sorted list while adding or deleting values may raise a + :exc:`RuntimeError` or fail to iterate over all values. + + """ + return chain.from_iterable(self._lists) + + + def __reversed__(self): + """Return a reverse iterator over the sorted list. + + ``sl.__reversed__()`` <==> ``reversed(sl)`` + + Iterating the sorted list while adding or deleting values may raise a + :exc:`RuntimeError` or fail to iterate over all values. + + """ + return chain.from_iterable(map(reversed, reversed(self._lists))) + + + def reverse(self): + """Raise not-implemented error. + + Sorted list maintains values in ascending sort order. Values may not be + reversed in-place. + + Use ``reversed(sl)`` for an iterator over values in descending sort + order. + + Implemented to override `MutableSequence.reverse` which provides an + erroneous default implementation. + + :raises NotImplementedError: use ``reversed(sl)`` instead + + """ + raise NotImplementedError('use ``reversed(sl)`` instead') + + + def islice(self, start=None, stop=None, reverse=False): + """Return an iterator that slices sorted list from `start` to `stop`. + + The `start` and `stop` index are treated inclusive and exclusive, + respectively. + + Both `start` and `stop` default to `None` which is automatically + inclusive of the beginning and end of the sorted list. + + When `reverse` is `True` the values are yielded from the iterator in + reverse order; `reverse` defaults to `False`. + + >>> sl = SortedList('abcdefghij') + >>> it = sl.islice(2, 6) + >>> list(it) + ['c', 'd', 'e', 'f'] + + :param int start: start index (inclusive) + :param int stop: stop index (exclusive) + :param bool reverse: yield values in reverse order + :return: iterator + + """ + _len = self._len + + if not _len: + return iter(()) + + start, stop, _ = slice(start, stop).indices(self._len) + + if start >= stop: + return iter(()) + + _pos = self._pos + + min_pos, min_idx = _pos(start) + + if stop == _len: + max_pos = len(self._lists) - 1 + max_idx = len(self._lists[-1]) + else: + max_pos, max_idx = _pos(stop) + + return self._islice(min_pos, min_idx, max_pos, max_idx, reverse) + + + def _islice(self, min_pos, min_idx, max_pos, max_idx, reverse): + """Return an iterator that slices sorted list using two index pairs. + + The index pairs are (min_pos, min_idx) and (max_pos, max_idx), the + first inclusive and the latter exclusive. See `_pos` for details on how + an index is converted to an index pair. + + When `reverse` is `True`, values are yielded from the iterator in + reverse order. + + """ + _lists = self._lists + + if min_pos > max_pos: + return iter(()) + + if min_pos == max_pos: + if reverse: + indices = reversed(range(min_idx, max_idx)) + return map(_lists[min_pos].__getitem__, indices) + + indices = range(min_idx, max_idx) + return map(_lists[min_pos].__getitem__, indices) + + next_pos = min_pos + 1 + + if next_pos == max_pos: + if reverse: + min_indices = range(min_idx, len(_lists[min_pos])) + max_indices = range(max_idx) + return chain( + map(_lists[max_pos].__getitem__, reversed(max_indices)), + map(_lists[min_pos].__getitem__, reversed(min_indices)), + ) + + min_indices = range(min_idx, len(_lists[min_pos])) + max_indices = range(max_idx) + return chain( + map(_lists[min_pos].__getitem__, min_indices), + map(_lists[max_pos].__getitem__, max_indices), + ) + + if reverse: + min_indices = range(min_idx, len(_lists[min_pos])) + sublist_indices = range(next_pos, max_pos) + sublists = map(_lists.__getitem__, reversed(sublist_indices)) + max_indices = range(max_idx) + return chain( + map(_lists[max_pos].__getitem__, reversed(max_indices)), + chain.from_iterable(map(reversed, sublists)), + map(_lists[min_pos].__getitem__, reversed(min_indices)), + ) + + min_indices = range(min_idx, len(_lists[min_pos])) + sublist_indices = range(next_pos, max_pos) + sublists = map(_lists.__getitem__, sublist_indices) + max_indices = range(max_idx) + return chain( + map(_lists[min_pos].__getitem__, min_indices), + chain.from_iterable(sublists), + map(_lists[max_pos].__getitem__, max_indices), + ) + + + def irange(self, minimum=None, maximum=None, inclusive=(True, True), + reverse=False): + """Create an iterator of values between `minimum` and `maximum`. + + Both `minimum` and `maximum` default to `None` which is automatically + inclusive of the beginning and end of the sorted list. + + The argument `inclusive` is a pair of booleans that indicates whether + the minimum and maximum ought to be included in the range, + respectively. The default is ``(True, True)`` such that the range is + inclusive of both minimum and maximum. + + When `reverse` is `True` the values are yielded from the iterator in + reverse order; `reverse` defaults to `False`. + + >>> sl = SortedList('abcdefghij') + >>> it = sl.irange('c', 'f') + >>> list(it) + ['c', 'd', 'e', 'f'] + + :param minimum: minimum value to start iterating + :param maximum: maximum value to stop iterating + :param inclusive: pair of booleans + :param bool reverse: yield values in reverse order + :return: iterator + + """ + _maxes = self._maxes + + if not _maxes: + return iter(()) + + _lists = self._lists + + # Calculate the minimum (pos, idx) pair. By default this location + # will be inclusive in our calculation. + + if minimum is None: + min_pos = 0 + min_idx = 0 + else: + if inclusive[0]: + min_pos = bisect_left(_maxes, minimum) + + if min_pos == len(_maxes): + return iter(()) + + min_idx = bisect_left(_lists[min_pos], minimum) + else: + min_pos = bisect_right(_maxes, minimum) + + if min_pos == len(_maxes): + return iter(()) + + min_idx = bisect_right(_lists[min_pos], minimum) + + # Calculate the maximum (pos, idx) pair. By default this location + # will be exclusive in our calculation. + + if maximum is None: + max_pos = len(_maxes) - 1 + max_idx = len(_lists[max_pos]) + else: + if inclusive[1]: + max_pos = bisect_right(_maxes, maximum) + + if max_pos == len(_maxes): + max_pos -= 1 + max_idx = len(_lists[max_pos]) + else: + max_idx = bisect_right(_lists[max_pos], maximum) + else: + max_pos = bisect_left(_maxes, maximum) + + if max_pos == len(_maxes): + max_pos -= 1 + max_idx = len(_lists[max_pos]) + else: + max_idx = bisect_left(_lists[max_pos], maximum) + + return self._islice(min_pos, min_idx, max_pos, max_idx, reverse) + + + def __len__(self): + """Return the size of the sorted list. + + ``sl.__len__()`` <==> ``len(sl)`` + + :return: size of sorted list + + """ + return self._len + + + def bisect_left(self, value): + """Return an index to insert `value` in the sorted list. + + If the `value` is already present, the insertion point will be before + (to the left of) any existing values. + + Similar to the `bisect` module in the standard library. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList([10, 11, 12, 13, 14]) + >>> sl.bisect_left(12) + 2 + + :param value: insertion index of value in sorted list + :return: index + + """ + _maxes = self._maxes + + if not _maxes: + return 0 + + pos = bisect_left(_maxes, value) + + if pos == len(_maxes): + return self._len + + idx = bisect_left(self._lists[pos], value) + return self._loc(pos, idx) + + + def bisect_right(self, value): + """Return an index to insert `value` in the sorted list. + + Similar to `bisect_left`, but if `value` is already present, the + insertion point will be after (to the right of) any existing values. + + Similar to the `bisect` module in the standard library. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList([10, 11, 12, 13, 14]) + >>> sl.bisect_right(12) + 3 + + :param value: insertion index of value in sorted list + :return: index + + """ + _maxes = self._maxes + + if not _maxes: + return 0 + + pos = bisect_right(_maxes, value) + + if pos == len(_maxes): + return self._len + + idx = bisect_right(self._lists[pos], value) + return self._loc(pos, idx) + + bisect = bisect_right + _bisect_right = bisect_right + + + def count(self, value): + """Return number of occurrences of `value` in the sorted list. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList([1, 2, 2, 3, 3, 3, 4, 4, 4, 4]) + >>> sl.count(3) + 3 + + :param value: value to count in sorted list + :return: count + + """ + _maxes = self._maxes + + if not _maxes: + return 0 + + pos_left = bisect_left(_maxes, value) + + if pos_left == len(_maxes): + return 0 + + _lists = self._lists + idx_left = bisect_left(_lists[pos_left], value) + pos_right = bisect_right(_maxes, value) + + if pos_right == len(_maxes): + return self._len - self._loc(pos_left, idx_left) + + idx_right = bisect_right(_lists[pos_right], value) + + if pos_left == pos_right: + return idx_right - idx_left + + right = self._loc(pos_right, idx_right) + left = self._loc(pos_left, idx_left) + return right - left + + + def copy(self): + """Return a shallow copy of the sorted list. + + Runtime complexity: `O(n)` + + :return: new sorted list + + """ + return self.__class__(self) + + __copy__ = copy + + + def append(self, value): + """Raise not-implemented error. + + Implemented to override `MutableSequence.append` which provides an + erroneous default implementation. + + :raises NotImplementedError: use ``sl.add(value)`` instead + + """ + raise NotImplementedError('use ``sl.add(value)`` instead') + + + def extend(self, values): + """Raise not-implemented error. + + Implemented to override `MutableSequence.extend` which provides an + erroneous default implementation. + + :raises NotImplementedError: use ``sl.update(values)`` instead + + """ + raise NotImplementedError('use ``sl.update(values)`` instead') + + + def insert(self, index, value): + """Raise not-implemented error. + + :raises NotImplementedError: use ``sl.add(value)`` instead + + """ + raise NotImplementedError('use ``sl.add(value)`` instead') + + + def pop(self, index=-1): + """Remove and return value at `index` in sorted list. + + Raise :exc:`IndexError` if the sorted list is empty or index is out of + range. + + Negative indices are supported. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList('abcde') + >>> sl.pop() + 'e' + >>> sl.pop(2) + 'c' + >>> sl + SortedList(['a', 'b', 'd']) + + :param int index: index of value (default -1) + :return: value + :raises IndexError: if index is out of range + + """ + if not self._len: + raise IndexError('pop index out of range') + + _lists = self._lists + + if index == 0: + val = _lists[0][0] + self._delete(0, 0) + return val + + if index == -1: + pos = len(_lists) - 1 + loc = len(_lists[pos]) - 1 + val = _lists[pos][loc] + self._delete(pos, loc) + return val + + if 0 <= index < len(_lists[0]): + val = _lists[0][index] + self._delete(0, index) + return val + + len_last = len(_lists[-1]) + + if -len_last < index < 0: + pos = len(_lists) - 1 + loc = len_last + index + val = _lists[pos][loc] + self._delete(pos, loc) + return val + + pos, idx = self._pos(index) + val = _lists[pos][idx] + self._delete(pos, idx) + return val + + + def index(self, value, start=None, stop=None): + """Return first index of value in sorted list. + + Raise ValueError if `value` is not present. + + Index must be between `start` and `stop` for the `value` to be + considered present. The default value, None, for `start` and `stop` + indicate the beginning and end of the sorted list. + + Negative indices are supported. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> sl = SortedList('abcde') + >>> sl.index('d') + 3 + >>> sl.index('z') + Traceback (most recent call last): + ... + ValueError: 'z' is not in list + + :param value: value in sorted list + :param int start: start index (default None, start of sorted list) + :param int stop: stop index (default None, end of sorted list) + :return: index of value + :raises ValueError: if value is not present + + """ + _len = self._len + + if not _len: + raise ValueError('{0!r} is not in list'.format(value)) + + if start is None: + start = 0 + if start < 0: + start += _len + if start < 0: + start = 0 + + if stop is None: + stop = _len + if stop < 0: + stop += _len + if stop > _len: + stop = _len + + if stop <= start: + raise ValueError('{0!r} is not in list'.format(value)) + + _maxes = self._maxes + pos_left = bisect_left(_maxes, value) + + if pos_left == len(_maxes): + raise ValueError('{0!r} is not in list'.format(value)) + + _lists = self._lists + idx_left = bisect_left(_lists[pos_left], value) + + if _lists[pos_left][idx_left] != value: + raise ValueError('{0!r} is not in list'.format(value)) + + stop -= 1 + left = self._loc(pos_left, idx_left) + + if start <= left: + if left <= stop: + return left + else: + right = self._bisect_right(value) - 1 + + if start <= right: + return start + + raise ValueError('{0!r} is not in list'.format(value)) + + + def __add__(self, other): + """Return new sorted list containing all values in both sequences. + + ``sl.__add__(other)`` <==> ``sl + other`` + + Values in `other` do not need to be in sorted order. + + Runtime complexity: `O(n*log(n))` + + >>> sl1 = SortedList('bat') + >>> sl2 = SortedList('cat') + >>> sl1 + sl2 + SortedList(['a', 'a', 'b', 'c', 't', 't']) + + :param other: other iterable + :return: new sorted list + + """ + values = reduce(iadd, self._lists, []) + values.extend(other) + return self.__class__(values) + + __radd__ = __add__ + + + def __iadd__(self, other): + """Update sorted list with values from `other`. + + ``sl.__iadd__(other)`` <==> ``sl += other`` + + Values in `other` do not need to be in sorted order. + + Runtime complexity: `O(k*log(n))` -- approximate. + + >>> sl = SortedList('bat') + >>> sl += 'cat' + >>> sl + SortedList(['a', 'a', 'b', 'c', 't', 't']) + + :param other: other iterable + :return: existing sorted list + + """ + self._update(other) + return self + + + def __mul__(self, num): + """Return new sorted list with `num` shallow copies of values. + + ``sl.__mul__(num)`` <==> ``sl * num`` + + Runtime complexity: `O(n*log(n))` + + >>> sl = SortedList('abc') + >>> sl * 3 + SortedList(['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c']) + + :param int num: count of shallow copies + :return: new sorted list + + """ + values = reduce(iadd, self._lists, []) * num + return self.__class__(values) + + __rmul__ = __mul__ + + + def __imul__(self, num): + """Update the sorted list with `num` shallow copies of values. + + ``sl.__imul__(num)`` <==> ``sl *= num`` + + Runtime complexity: `O(n*log(n))` + + >>> sl = SortedList('abc') + >>> sl *= 3 + >>> sl + SortedList(['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c']) + + :param int num: count of shallow copies + :return: existing sorted list + + """ + values = reduce(iadd, self._lists, []) * num + self._clear() + self._update(values) + return self + + + def __make_cmp(seq_op, symbol, doc): + "Make comparator method." + def comparer(self, other): + "Compare method for sorted list and sequence." + if not isinstance(other, Sequence): + return NotImplemented + + self_len = self._len + len_other = len(other) + + if self_len != len_other: + if seq_op is eq: + return False + if seq_op is ne: + return True + + for alpha, beta in zip(self, other): + if alpha != beta: + return seq_op(alpha, beta) + + return seq_op(self_len, len_other) + + seq_op_name = seq_op.__name__ + comparer.__name__ = '__{0}__'.format(seq_op_name) + doc_str = """Return true if and only if sorted list is {0} `other`. + + ``sl.__{1}__(other)`` <==> ``sl {2} other`` + + Comparisons use lexicographical order as with sequences. + + Runtime complexity: `O(n)` + + :param other: `other` sequence + :return: true if sorted list is {0} `other` + + """ + comparer.__doc__ = dedent(doc_str.format(doc, seq_op_name, symbol)) + return comparer + + + __eq__ = __make_cmp(eq, '==', 'equal to') + __ne__ = __make_cmp(ne, '!=', 'not equal to') + __lt__ = __make_cmp(lt, '<', 'less than') + __gt__ = __make_cmp(gt, '>', 'greater than') + __le__ = __make_cmp(le, '<=', 'less than or equal to') + __ge__ = __make_cmp(ge, '>=', 'greater than or equal to') + __make_cmp = staticmethod(__make_cmp) + + + def __reduce__(self): + values = reduce(iadd, self._lists, []) + return (type(self), (values,)) + + + @recursive_repr() + def __repr__(self): + """Return string representation of sorted list. + + ``sl.__repr__()`` <==> ``repr(sl)`` + + :return: string representation + + """ + return '{0}({1!r})'.format(type(self).__name__, list(self)) + + + def _check(self): + """Check invariants of sorted list. + + Runtime complexity: `O(n)` + + """ + try: + assert self._load >= 4 + assert len(self._maxes) == len(self._lists) + assert self._len == sum(len(sublist) for sublist in self._lists) + + # Check all sublists are sorted. + + for sublist in self._lists: + for pos in range(1, len(sublist)): + assert sublist[pos - 1] <= sublist[pos] + + # Check beginning/end of sublists are sorted. + + for pos in range(1, len(self._lists)): + assert self._lists[pos - 1][-1] <= self._lists[pos][0] + + # Check _maxes index is the last value of each sublist. + + for pos in range(len(self._maxes)): + assert self._maxes[pos] == self._lists[pos][-1] + + # Check sublist lengths are less than double load-factor. + + double = self._load << 1 + assert all(len(sublist) <= double for sublist in self._lists) + + # Check sublist lengths are greater than half load-factor for all + # but the last sublist. + + half = self._load >> 1 + for pos in range(0, len(self._lists) - 1): + assert len(self._lists[pos]) >= half + + if self._index: + assert self._len == self._index[0] + assert len(self._index) == self._offset + len(self._lists) + + # Check index leaf nodes equal length of sublists. + + for pos in range(len(self._lists)): + leaf = self._index[self._offset + pos] + assert leaf == len(self._lists[pos]) + + # Check index branch nodes are the sum of their children. + + for pos in range(self._offset): + child = (pos << 1) + 1 + if child >= len(self._index): + assert self._index[pos] == 0 + elif child + 1 == len(self._index): + assert self._index[pos] == self._index[child] + else: + child_sum = self._index[child] + self._index[child + 1] + assert child_sum == self._index[pos] + except: + traceback.print_exc(file=sys.stdout) + print('len', self._len) + print('load', self._load) + print('offset', self._offset) + print('len_index', len(self._index)) + print('index', self._index) + print('len_maxes', len(self._maxes)) + print('maxes', self._maxes) + print('len_lists', len(self._lists)) + print('lists', self._lists) + raise + + +def identity(value): + "Identity function." + return value + + +class SortedKeyList(SortedList): + """Sorted-key list is a subtype of sorted list. + + The sorted-key list maintains values in comparison order based on the + result of a key function applied to every value. + + All the same methods that are available in :class:`SortedList` are also + available in :class:`SortedKeyList`. + + Additional methods provided: + + * :attr:`SortedKeyList.key` + * :func:`SortedKeyList.bisect_key_left` + * :func:`SortedKeyList.bisect_key_right` + * :func:`SortedKeyList.irange_key` + + Some examples below use: + + >>> from operator import neg + >>> neg + + >>> neg(1) + -1 + + """ + def __init__(self, iterable=None, key=identity): + """Initialize sorted-key list instance. + + Optional `iterable` argument provides an initial iterable of values to + initialize the sorted-key list. + + Optional `key` argument defines a callable that, like the `key` + argument to Python's `sorted` function, extracts a comparison key from + each value. The default is the identity function. + + Runtime complexity: `O(n*log(n))` + + >>> from operator import neg + >>> skl = SortedKeyList(key=neg) + >>> skl + SortedKeyList([], key=) + >>> skl = SortedKeyList([3, 1, 2], key=neg) + >>> skl + SortedKeyList([3, 2, 1], key=) + + :param iterable: initial values (optional) + :param key: function used to extract comparison key (optional) + + """ + self._key = key + self._len = 0 + self._load = self.DEFAULT_LOAD_FACTOR + self._lists = [] + self._keys = [] + self._maxes = [] + self._index = [] + self._offset = 0 + + if iterable is not None: + self._update(iterable) + + + def __new__(cls, iterable=None, key=identity): + return object.__new__(cls) + + + @property + def key(self): + "Function used to extract comparison key from values." + return self._key + + + def clear(self): + """Remove all values from sorted-key list. + + Runtime complexity: `O(n)` + + """ + self._len = 0 + del self._lists[:] + del self._keys[:] + del self._maxes[:] + del self._index[:] + + _clear = clear + + + def add(self, value): + """Add `value` to sorted-key list. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList(key=neg) + >>> skl.add(3) + >>> skl.add(1) + >>> skl.add(2) + >>> skl + SortedKeyList([3, 2, 1], key=) + + :param value: value to add to sorted-key list + + """ + _lists = self._lists + _keys = self._keys + _maxes = self._maxes + + key = self._key(value) + + if _maxes: + pos = bisect_right(_maxes, key) + + if pos == len(_maxes): + pos -= 1 + _lists[pos].append(value) + _keys[pos].append(key) + _maxes[pos] = key + else: + idx = bisect_right(_keys[pos], key) + _lists[pos].insert(idx, value) + _keys[pos].insert(idx, key) + + self._expand(pos) + else: + _lists.append([value]) + _keys.append([key]) + _maxes.append(key) + + self._len += 1 + + + def _expand(self, pos): + """Split sublists with length greater than double the load-factor. + + Updates the index when the sublist length is less than double the load + level. This requires incrementing the nodes in a traversal from the + leaf node to the root. For an example traversal see + ``SortedList._loc``. + + """ + _lists = self._lists + _keys = self._keys + _index = self._index + + if len(_keys[pos]) > (self._load << 1): + _maxes = self._maxes + _load = self._load + + _lists_pos = _lists[pos] + _keys_pos = _keys[pos] + half = _lists_pos[_load:] + half_keys = _keys_pos[_load:] + del _lists_pos[_load:] + del _keys_pos[_load:] + _maxes[pos] = _keys_pos[-1] + + _lists.insert(pos + 1, half) + _keys.insert(pos + 1, half_keys) + _maxes.insert(pos + 1, half_keys[-1]) + + del _index[:] + else: + if _index: + child = self._offset + pos + while child: + _index[child] += 1 + child = (child - 1) >> 1 + _index[0] += 1 + + + def update(self, iterable): + """Update sorted-key list by adding all values from `iterable`. + + Runtime complexity: `O(k*log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList(key=neg) + >>> skl.update([3, 1, 2]) + >>> skl + SortedKeyList([3, 2, 1], key=) + + :param iterable: iterable of values to add + + """ + _lists = self._lists + _keys = self._keys + _maxes = self._maxes + values = sorted(iterable, key=self._key) + + if _maxes: + if len(values) * 4 >= self._len: + _lists.append(values) + values = reduce(iadd, _lists, []) + values.sort(key=self._key) + self._clear() + else: + _add = self.add + for val in values: + _add(val) + return + + _load = self._load + _lists.extend(values[pos:(pos + _load)] + for pos in range(0, len(values), _load)) + _keys.extend(list(map(self._key, _list)) for _list in _lists) + _maxes.extend(sublist[-1] for sublist in _keys) + self._len = len(values) + del self._index[:] + + _update = update + + + def __contains__(self, value): + """Return true if `value` is an element of the sorted-key list. + + ``skl.__contains__(value)`` <==> ``value in skl`` + + Runtime complexity: `O(log(n))` + + >>> from operator import neg + >>> skl = SortedKeyList([1, 2, 3, 4, 5], key=neg) + >>> 3 in skl + True + + :param value: search for value in sorted-key list + :return: true if `value` in sorted-key list + + """ + _maxes = self._maxes + + if not _maxes: + return False + + key = self._key(value) + pos = bisect_left(_maxes, key) + + if pos == len(_maxes): + return False + + _lists = self._lists + _keys = self._keys + + idx = bisect_left(_keys[pos], key) + + len_keys = len(_keys) + len_sublist = len(_keys[pos]) + + while True: + if _keys[pos][idx] != key: + return False + if _lists[pos][idx] == value: + return True + idx += 1 + if idx == len_sublist: + pos += 1 + if pos == len_keys: + return False + len_sublist = len(_keys[pos]) + idx = 0 + + + def discard(self, value): + """Remove `value` from sorted-key list if it is a member. + + If `value` is not a member, do nothing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList([5, 4, 3, 2, 1], key=neg) + >>> skl.discard(1) + >>> skl.discard(0) + >>> skl == [5, 4, 3, 2] + True + + :param value: `value` to discard from sorted-key list + + """ + _maxes = self._maxes + + if not _maxes: + return + + key = self._key(value) + pos = bisect_left(_maxes, key) + + if pos == len(_maxes): + return + + _lists = self._lists + _keys = self._keys + idx = bisect_left(_keys[pos], key) + len_keys = len(_keys) + len_sublist = len(_keys[pos]) + + while True: + if _keys[pos][idx] != key: + return + if _lists[pos][idx] == value: + self._delete(pos, idx) + return + idx += 1 + if idx == len_sublist: + pos += 1 + if pos == len_keys: + return + len_sublist = len(_keys[pos]) + idx = 0 + + + def remove(self, value): + """Remove `value` from sorted-key list; `value` must be a member. + + If `value` is not a member, raise ValueError. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList([1, 2, 3, 4, 5], key=neg) + >>> skl.remove(5) + >>> skl == [4, 3, 2, 1] + True + >>> skl.remove(0) + Traceback (most recent call last): + ... + ValueError: 0 not in list + + :param value: `value` to remove from sorted-key list + :raises ValueError: if `value` is not in sorted-key list + + """ + _maxes = self._maxes + + if not _maxes: + raise ValueError('{0!r} not in list'.format(value)) + + key = self._key(value) + pos = bisect_left(_maxes, key) + + if pos == len(_maxes): + raise ValueError('{0!r} not in list'.format(value)) + + _lists = self._lists + _keys = self._keys + idx = bisect_left(_keys[pos], key) + len_keys = len(_keys) + len_sublist = len(_keys[pos]) + + while True: + if _keys[pos][idx] != key: + raise ValueError('{0!r} not in list'.format(value)) + if _lists[pos][idx] == value: + self._delete(pos, idx) + return + idx += 1 + if idx == len_sublist: + pos += 1 + if pos == len_keys: + raise ValueError('{0!r} not in list'.format(value)) + len_sublist = len(_keys[pos]) + idx = 0 + + + def _delete(self, pos, idx): + """Delete value at the given `(pos, idx)`. + + Combines lists that are less than half the load level. + + Updates the index when the sublist length is more than half the load + level. This requires decrementing the nodes in a traversal from the + leaf node to the root. For an example traversal see + ``SortedList._loc``. + + :param int pos: lists index + :param int idx: sublist index + + """ + _lists = self._lists + _keys = self._keys + _maxes = self._maxes + _index = self._index + keys_pos = _keys[pos] + lists_pos = _lists[pos] + + del keys_pos[idx] + del lists_pos[idx] + self._len -= 1 + + len_keys_pos = len(keys_pos) + + if len_keys_pos > (self._load >> 1): + _maxes[pos] = keys_pos[-1] + + if _index: + child = self._offset + pos + while child > 0: + _index[child] -= 1 + child = (child - 1) >> 1 + _index[0] -= 1 + elif len(_keys) > 1: + if not pos: + pos += 1 + + prev = pos - 1 + _keys[prev].extend(_keys[pos]) + _lists[prev].extend(_lists[pos]) + _maxes[prev] = _keys[prev][-1] + + del _lists[pos] + del _keys[pos] + del _maxes[pos] + del _index[:] + + self._expand(prev) + elif len_keys_pos: + _maxes[pos] = keys_pos[-1] + else: + del _lists[pos] + del _keys[pos] + del _maxes[pos] + del _index[:] + + + def irange(self, minimum=None, maximum=None, inclusive=(True, True), + reverse=False): + """Create an iterator of values between `minimum` and `maximum`. + + Both `minimum` and `maximum` default to `None` which is automatically + inclusive of the beginning and end of the sorted-key list. + + The argument `inclusive` is a pair of booleans that indicates whether + the minimum and maximum ought to be included in the range, + respectively. The default is ``(True, True)`` such that the range is + inclusive of both minimum and maximum. + + When `reverse` is `True` the values are yielded from the iterator in + reverse order; `reverse` defaults to `False`. + + >>> from operator import neg + >>> skl = SortedKeyList([11, 12, 13, 14, 15], key=neg) + >>> it = skl.irange(14.5, 11.5) + >>> list(it) + [14, 13, 12] + + :param minimum: minimum value to start iterating + :param maximum: maximum value to stop iterating + :param inclusive: pair of booleans + :param bool reverse: yield values in reverse order + :return: iterator + + """ + min_key = self._key(minimum) if minimum is not None else None + max_key = self._key(maximum) if maximum is not None else None + return self._irange_key( + min_key=min_key, max_key=max_key, + inclusive=inclusive, reverse=reverse, + ) + + + def irange_key(self, min_key=None, max_key=None, inclusive=(True, True), + reverse=False): + """Create an iterator of values between `min_key` and `max_key`. + + Both `min_key` and `max_key` default to `None` which is automatically + inclusive of the beginning and end of the sorted-key list. + + The argument `inclusive` is a pair of booleans that indicates whether + the minimum and maximum ought to be included in the range, + respectively. The default is ``(True, True)`` such that the range is + inclusive of both minimum and maximum. + + When `reverse` is `True` the values are yielded from the iterator in + reverse order; `reverse` defaults to `False`. + + >>> from operator import neg + >>> skl = SortedKeyList([11, 12, 13, 14, 15], key=neg) + >>> it = skl.irange_key(-14, -12) + >>> list(it) + [14, 13, 12] + + :param min_key: minimum key to start iterating + :param max_key: maximum key to stop iterating + :param inclusive: pair of booleans + :param bool reverse: yield values in reverse order + :return: iterator + + """ + _maxes = self._maxes + + if not _maxes: + return iter(()) + + _keys = self._keys + + # Calculate the minimum (pos, idx) pair. By default this location + # will be inclusive in our calculation. + + if min_key is None: + min_pos = 0 + min_idx = 0 + else: + if inclusive[0]: + min_pos = bisect_left(_maxes, min_key) + + if min_pos == len(_maxes): + return iter(()) + + min_idx = bisect_left(_keys[min_pos], min_key) + else: + min_pos = bisect_right(_maxes, min_key) + + if min_pos == len(_maxes): + return iter(()) + + min_idx = bisect_right(_keys[min_pos], min_key) + + # Calculate the maximum (pos, idx) pair. By default this location + # will be exclusive in our calculation. + + if max_key is None: + max_pos = len(_maxes) - 1 + max_idx = len(_keys[max_pos]) + else: + if inclusive[1]: + max_pos = bisect_right(_maxes, max_key) + + if max_pos == len(_maxes): + max_pos -= 1 + max_idx = len(_keys[max_pos]) + else: + max_idx = bisect_right(_keys[max_pos], max_key) + else: + max_pos = bisect_left(_maxes, max_key) + + if max_pos == len(_maxes): + max_pos -= 1 + max_idx = len(_keys[max_pos]) + else: + max_idx = bisect_left(_keys[max_pos], max_key) + + return self._islice(min_pos, min_idx, max_pos, max_idx, reverse) + + _irange_key = irange_key + + + def bisect_left(self, value): + """Return an index to insert `value` in the sorted-key list. + + If the `value` is already present, the insertion point will be before + (to the left of) any existing values. + + Similar to the `bisect` module in the standard library. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList([5, 4, 3, 2, 1], key=neg) + >>> skl.bisect_left(1) + 4 + + :param value: insertion index of value in sorted-key list + :return: index + + """ + return self._bisect_key_left(self._key(value)) + + + def bisect_right(self, value): + """Return an index to insert `value` in the sorted-key list. + + Similar to `bisect_left`, but if `value` is already present, the + insertion point will be after (to the right of) any existing values. + + Similar to the `bisect` module in the standard library. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedList([5, 4, 3, 2, 1], key=neg) + >>> skl.bisect_right(1) + 5 + + :param value: insertion index of value in sorted-key list + :return: index + + """ + return self._bisect_key_right(self._key(value)) + + bisect = bisect_right + + + def bisect_key_left(self, key): + """Return an index to insert `key` in the sorted-key list. + + If the `key` is already present, the insertion point will be before (to + the left of) any existing keys. + + Similar to the `bisect` module in the standard library. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList([5, 4, 3, 2, 1], key=neg) + >>> skl.bisect_key_left(-1) + 4 + + :param key: insertion index of key in sorted-key list + :return: index + + """ + _maxes = self._maxes + + if not _maxes: + return 0 + + pos = bisect_left(_maxes, key) + + if pos == len(_maxes): + return self._len + + idx = bisect_left(self._keys[pos], key) + + return self._loc(pos, idx) + + _bisect_key_left = bisect_key_left + + + def bisect_key_right(self, key): + """Return an index to insert `key` in the sorted-key list. + + Similar to `bisect_key_left`, but if `key` is already present, the + insertion point will be after (to the right of) any existing keys. + + Similar to the `bisect` module in the standard library. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedList([5, 4, 3, 2, 1], key=neg) + >>> skl.bisect_key_right(-1) + 5 + + :param key: insertion index of key in sorted-key list + :return: index + + """ + _maxes = self._maxes + + if not _maxes: + return 0 + + pos = bisect_right(_maxes, key) + + if pos == len(_maxes): + return self._len + + idx = bisect_right(self._keys[pos], key) + + return self._loc(pos, idx) + + bisect_key = bisect_key_right + _bisect_key_right = bisect_key_right + + + def count(self, value): + """Return number of occurrences of `value` in the sorted-key list. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList([4, 4, 4, 4, 3, 3, 3, 2, 2, 1], key=neg) + >>> skl.count(2) + 2 + + :param value: value to count in sorted-key list + :return: count + + """ + _maxes = self._maxes + + if not _maxes: + return 0 + + key = self._key(value) + pos = bisect_left(_maxes, key) + + if pos == len(_maxes): + return 0 + + _lists = self._lists + _keys = self._keys + idx = bisect_left(_keys[pos], key) + total = 0 + len_keys = len(_keys) + len_sublist = len(_keys[pos]) + + while True: + if _keys[pos][idx] != key: + return total + if _lists[pos][idx] == value: + total += 1 + idx += 1 + if idx == len_sublist: + pos += 1 + if pos == len_keys: + return total + len_sublist = len(_keys[pos]) + idx = 0 + + + def copy(self): + """Return a shallow copy of the sorted-key list. + + Runtime complexity: `O(n)` + + :return: new sorted-key list + + """ + return self.__class__(self, key=self._key) + + __copy__ = copy + + + def index(self, value, start=None, stop=None): + """Return first index of value in sorted-key list. + + Raise ValueError if `value` is not present. + + Index must be between `start` and `stop` for the `value` to be + considered present. The default value, None, for `start` and `stop` + indicate the beginning and end of the sorted-key list. + + Negative indices are supported. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> from operator import neg + >>> skl = SortedKeyList([5, 4, 3, 2, 1], key=neg) + >>> skl.index(2) + 3 + >>> skl.index(0) + Traceback (most recent call last): + ... + ValueError: 0 is not in list + + :param value: value in sorted-key list + :param int start: start index (default None, start of sorted-key list) + :param int stop: stop index (default None, end of sorted-key list) + :return: index of value + :raises ValueError: if value is not present + + """ + _len = self._len + + if not _len: + raise ValueError('{0!r} is not in list'.format(value)) + + if start is None: + start = 0 + if start < 0: + start += _len + if start < 0: + start = 0 + + if stop is None: + stop = _len + if stop < 0: + stop += _len + if stop > _len: + stop = _len + + if stop <= start: + raise ValueError('{0!r} is not in list'.format(value)) + + _maxes = self._maxes + key = self._key(value) + pos = bisect_left(_maxes, key) + + if pos == len(_maxes): + raise ValueError('{0!r} is not in list'.format(value)) + + stop -= 1 + _lists = self._lists + _keys = self._keys + idx = bisect_left(_keys[pos], key) + len_keys = len(_keys) + len_sublist = len(_keys[pos]) + + while True: + if _keys[pos][idx] != key: + raise ValueError('{0!r} is not in list'.format(value)) + if _lists[pos][idx] == value: + loc = self._loc(pos, idx) + if start <= loc <= stop: + return loc + elif loc > stop: + break + idx += 1 + if idx == len_sublist: + pos += 1 + if pos == len_keys: + raise ValueError('{0!r} is not in list'.format(value)) + len_sublist = len(_keys[pos]) + idx = 0 + + raise ValueError('{0!r} is not in list'.format(value)) + + + def __add__(self, other): + """Return new sorted-key list containing all values in both sequences. + + ``skl.__add__(other)`` <==> ``skl + other`` + + Values in `other` do not need to be in sorted-key order. + + Runtime complexity: `O(n*log(n))` + + >>> from operator import neg + >>> skl1 = SortedKeyList([5, 4, 3], key=neg) + >>> skl2 = SortedKeyList([2, 1, 0], key=neg) + >>> skl1 + skl2 + SortedKeyList([5, 4, 3, 2, 1, 0], key=) + + :param other: other iterable + :return: new sorted-key list + + """ + values = reduce(iadd, self._lists, []) + values.extend(other) + return self.__class__(values, key=self._key) + + __radd__ = __add__ + + + def __mul__(self, num): + """Return new sorted-key list with `num` shallow copies of values. + + ``skl.__mul__(num)`` <==> ``skl * num`` + + Runtime complexity: `O(n*log(n))` + + >>> from operator import neg + >>> skl = SortedKeyList([3, 2, 1], key=neg) + >>> skl * 2 + SortedKeyList([3, 3, 2, 2, 1, 1], key=) + + :param int num: count of shallow copies + :return: new sorted-key list + + """ + values = reduce(iadd, self._lists, []) * num + return self.__class__(values, key=self._key) + + + def __reduce__(self): + values = reduce(iadd, self._lists, []) + return (type(self), (values, self.key)) + + + @recursive_repr() + def __repr__(self): + """Return string representation of sorted-key list. + + ``skl.__repr__()`` <==> ``repr(skl)`` + + :return: string representation + + """ + type_name = type(self).__name__ + return '{0}({1!r}, key={2!r})'.format(type_name, list(self), self._key) + + + def _check(self): + """Check invariants of sorted-key list. + + Runtime complexity: `O(n)` + + """ + try: + assert self._load >= 4 + assert len(self._maxes) == len(self._lists) == len(self._keys) + assert self._len == sum(len(sublist) for sublist in self._lists) + + # Check all sublists are sorted. + + for sublist in self._keys: + for pos in range(1, len(sublist)): + assert sublist[pos - 1] <= sublist[pos] + + # Check beginning/end of sublists are sorted. + + for pos in range(1, len(self._keys)): + assert self._keys[pos - 1][-1] <= self._keys[pos][0] + + # Check _keys matches _key mapped to _lists. + + for val_sublist, key_sublist in zip(self._lists, self._keys): + assert len(val_sublist) == len(key_sublist) + for val, key in zip(val_sublist, key_sublist): + assert self._key(val) == key + + # Check _maxes index is the last value of each sublist. + + for pos in range(len(self._maxes)): + assert self._maxes[pos] == self._keys[pos][-1] + + # Check sublist lengths are less than double load-factor. + + double = self._load << 1 + assert all(len(sublist) <= double for sublist in self._lists) + + # Check sublist lengths are greater than half load-factor for all + # but the last sublist. + + half = self._load >> 1 + for pos in range(0, len(self._lists) - 1): + assert len(self._lists[pos]) >= half + + if self._index: + assert self._len == self._index[0] + assert len(self._index) == self._offset + len(self._lists) + + # Check index leaf nodes equal length of sublists. + + for pos in range(len(self._lists)): + leaf = self._index[self._offset + pos] + assert leaf == len(self._lists[pos]) + + # Check index branch nodes are the sum of their children. + + for pos in range(self._offset): + child = (pos << 1) + 1 + if child >= len(self._index): + assert self._index[pos] == 0 + elif child + 1 == len(self._index): + assert self._index[pos] == self._index[child] + else: + child_sum = self._index[child] + self._index[child + 1] + assert child_sum == self._index[pos] + except: + traceback.print_exc(file=sys.stdout) + print('len', self._len) + print('load', self._load) + print('offset', self._offset) + print('len_index', len(self._index)) + print('index', self._index) + print('len_maxes', len(self._maxes)) + print('maxes', self._maxes) + print('len_keys', len(self._keys)) + print('keys', self._keys) + print('len_lists', len(self._lists)) + print('lists', self._lists) + raise + + +SortedListWithKey = SortedKeyList diff --git a/vendored/sortedcontainers/sortedset.py b/vendored/sortedcontainers/sortedset.py new file mode 100644 index 0000000..be2b899 --- /dev/null +++ b/vendored/sortedcontainers/sortedset.py @@ -0,0 +1,733 @@ +"""Sorted Set +============= + +:doc:`Sorted Containers` is an Apache2 licensed Python sorted +collections library, written in pure-Python, and fast as C-extensions. The +:doc:`introduction` is the best way to get started. + +Sorted set implementations: + +.. currentmodule:: sortedcontainers + +* :class:`SortedSet` + +""" + +from itertools import chain +from operator import eq, ne, gt, ge, lt, le +from textwrap import dedent + +from .sortedlist import SortedList, recursive_repr + +############################################################################### +# BEGIN Python 2/3 Shims +############################################################################### + +try: + from collections.abc import MutableSet, Sequence, Set +except ImportError: + from collections import MutableSet, Sequence, Set + +############################################################################### +# END Python 2/3 Shims +############################################################################### + + +class SortedSet(MutableSet, Sequence): + """Sorted set is a sorted mutable set. + + Sorted set values are maintained in sorted order. The design of sorted set + is simple: sorted set uses a set for set-operations and maintains a sorted + list of values. + + Sorted set values must be hashable and comparable. The hash and total + ordering of values must not change while they are stored in the sorted set. + + Mutable set methods: + + * :func:`SortedSet.__contains__` + * :func:`SortedSet.__iter__` + * :func:`SortedSet.__len__` + * :func:`SortedSet.add` + * :func:`SortedSet.discard` + + Sequence methods: + + * :func:`SortedSet.__getitem__` + * :func:`SortedSet.__delitem__` + * :func:`SortedSet.__reversed__` + + Methods for removing values: + + * :func:`SortedSet.clear` + * :func:`SortedSet.pop` + * :func:`SortedSet.remove` + + Set-operation methods: + + * :func:`SortedSet.difference` + * :func:`SortedSet.difference_update` + * :func:`SortedSet.intersection` + * :func:`SortedSet.intersection_update` + * :func:`SortedSet.symmetric_difference` + * :func:`SortedSet.symmetric_difference_update` + * :func:`SortedSet.union` + * :func:`SortedSet.update` + + Methods for miscellany: + + * :func:`SortedSet.copy` + * :func:`SortedSet.count` + * :func:`SortedSet.__repr__` + * :func:`SortedSet._check` + + Sorted list methods available: + + * :func:`SortedList.bisect_left` + * :func:`SortedList.bisect_right` + * :func:`SortedList.index` + * :func:`SortedList.irange` + * :func:`SortedList.islice` + * :func:`SortedList._reset` + + Additional sorted list methods available, if key-function used: + + * :func:`SortedKeyList.bisect_key_left` + * :func:`SortedKeyList.bisect_key_right` + * :func:`SortedKeyList.irange_key` + + Sorted set comparisons use subset and superset relations. Two sorted sets + are equal if and only if every element of each sorted set is contained in + the other (each is a subset of the other). A sorted set is less than + another sorted set if and only if the first sorted set is a proper subset + of the second sorted set (is a subset, but is not equal). A sorted set is + greater than another sorted set if and only if the first sorted set is a + proper superset of the second sorted set (is a superset, but is not equal). + + """ + def __init__(self, iterable=None, key=None): + """Initialize sorted set instance. + + Optional `iterable` argument provides an initial iterable of values to + initialize the sorted set. + + Optional `key` argument defines a callable that, like the `key` + argument to Python's `sorted` function, extracts a comparison key from + each value. The default, none, compares values directly. + + Runtime complexity: `O(n*log(n))` + + >>> ss = SortedSet([3, 1, 2, 5, 4]) + >>> ss + SortedSet([1, 2, 3, 4, 5]) + >>> from operator import neg + >>> ss = SortedSet([3, 1, 2, 5, 4], neg) + >>> ss + SortedSet([5, 4, 3, 2, 1], key=) + + :param iterable: initial values (optional) + :param key: function used to extract comparison key (optional) + + """ + self._key = key + + # SortedSet._fromset calls SortedSet.__init__ after initializing the + # _set attribute. So only create a new set if the _set attribute is not + # already present. + + if not hasattr(self, '_set'): + self._set = set() + + self._list = SortedList(self._set, key=key) + + # Expose some set methods publicly. + + _set = self._set + self.isdisjoint = _set.isdisjoint + self.issubset = _set.issubset + self.issuperset = _set.issuperset + + # Expose some sorted list methods publicly. + + _list = self._list + self.bisect_left = _list.bisect_left + self.bisect = _list.bisect + self.bisect_right = _list.bisect_right + self.index = _list.index + self.irange = _list.irange + self.islice = _list.islice + self._reset = _list._reset + + if key is not None: + self.bisect_key_left = _list.bisect_key_left + self.bisect_key_right = _list.bisect_key_right + self.bisect_key = _list.bisect_key + self.irange_key = _list.irange_key + + if iterable is not None: + self._update(iterable) + + + @classmethod + def _fromset(cls, values, key=None): + """Initialize sorted set from existing set. + + Used internally by set operations that return a new set. + + """ + sorted_set = object.__new__(cls) + sorted_set._set = values + sorted_set.__init__(key=key) + return sorted_set + + + @property + def key(self): + """Function used to extract comparison key from values. + + Sorted set compares values directly when the key function is none. + + """ + return self._key + + + def __contains__(self, value): + """Return true if `value` is an element of the sorted set. + + ``ss.__contains__(value)`` <==> ``value in ss`` + + Runtime complexity: `O(1)` + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> 3 in ss + True + + :param value: search for value in sorted set + :return: true if `value` in sorted set + + """ + return value in self._set + + + def __getitem__(self, index): + """Lookup value at `index` in sorted set. + + ``ss.__getitem__(index)`` <==> ``ss[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> ss = SortedSet('abcde') + >>> ss[2] + 'c' + >>> ss[-1] + 'e' + >>> ss[2:5] + ['c', 'd', 'e'] + + :param index: integer or slice for indexing + :return: value or list of values + :raises IndexError: if index out of range + + """ + return self._list[index] + + + def __delitem__(self, index): + """Remove value at `index` from sorted set. + + ``ss.__delitem__(index)`` <==> ``del ss[index]`` + + Supports slicing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> ss = SortedSet('abcde') + >>> del ss[2] + >>> ss + SortedSet(['a', 'b', 'd', 'e']) + >>> del ss[:2] + >>> ss + SortedSet(['d', 'e']) + + :param index: integer or slice for indexing + :raises IndexError: if index out of range + + """ + _set = self._set + _list = self._list + if isinstance(index, slice): + values = _list[index] + _set.difference_update(values) + else: + value = _list[index] + _set.remove(value) + del _list[index] + + + def __make_cmp(set_op, symbol, doc): + "Make comparator method." + def comparer(self, other): + "Compare method for sorted set and set." + if isinstance(other, SortedSet): + return set_op(self._set, other._set) + elif isinstance(other, Set): + return set_op(self._set, other) + return NotImplemented + + set_op_name = set_op.__name__ + comparer.__name__ = '__{0}__'.format(set_op_name) + doc_str = """Return true if and only if sorted set is {0} `other`. + + ``ss.__{1}__(other)`` <==> ``ss {2} other`` + + Comparisons use subset and superset semantics as with sets. + + Runtime complexity: `O(n)` + + :param other: `other` set + :return: true if sorted set is {0} `other` + + """ + comparer.__doc__ = dedent(doc_str.format(doc, set_op_name, symbol)) + return comparer + + + __eq__ = __make_cmp(eq, '==', 'equal to') + __ne__ = __make_cmp(ne, '!=', 'not equal to') + __lt__ = __make_cmp(lt, '<', 'a proper subset of') + __gt__ = __make_cmp(gt, '>', 'a proper superset of') + __le__ = __make_cmp(le, '<=', 'a subset of') + __ge__ = __make_cmp(ge, '>=', 'a superset of') + __make_cmp = staticmethod(__make_cmp) + + + def __len__(self): + """Return the size of the sorted set. + + ``ss.__len__()`` <==> ``len(ss)`` + + :return: size of sorted set + + """ + return len(self._set) + + + def __iter__(self): + """Return an iterator over the sorted set. + + ``ss.__iter__()`` <==> ``iter(ss)`` + + Iterating the sorted set while adding or deleting values may raise a + :exc:`RuntimeError` or fail to iterate over all values. + + """ + return iter(self._list) + + + def __reversed__(self): + """Return a reverse iterator over the sorted set. + + ``ss.__reversed__()`` <==> ``reversed(ss)`` + + Iterating the sorted set while adding or deleting values may raise a + :exc:`RuntimeError` or fail to iterate over all values. + + """ + return reversed(self._list) + + + def add(self, value): + """Add `value` to sorted set. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> ss = SortedSet() + >>> ss.add(3) + >>> ss.add(1) + >>> ss.add(2) + >>> ss + SortedSet([1, 2, 3]) + + :param value: value to add to sorted set + + """ + _set = self._set + if value not in _set: + _set.add(value) + self._list.add(value) + + _add = add + + + def clear(self): + """Remove all values from sorted set. + + Runtime complexity: `O(n)` + + """ + self._set.clear() + self._list.clear() + + + def copy(self): + """Return a shallow copy of the sorted set. + + Runtime complexity: `O(n)` + + :return: new sorted set + + """ + return self._fromset(set(self._set), key=self._key) + + __copy__ = copy + + + def count(self, value): + """Return number of occurrences of `value` in the sorted set. + + Runtime complexity: `O(1)` + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> ss.count(3) + 1 + + :param value: value to count in sorted set + :return: count + + """ + return 1 if value in self._set else 0 + + + def discard(self, value): + """Remove `value` from sorted set if it is a member. + + If `value` is not a member, do nothing. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> ss.discard(5) + >>> ss.discard(0) + >>> ss == set([1, 2, 3, 4]) + True + + :param value: `value` to discard from sorted set + + """ + _set = self._set + if value in _set: + _set.remove(value) + self._list.remove(value) + + _discard = discard + + + def pop(self, index=-1): + """Remove and return value at `index` in sorted set. + + Raise :exc:`IndexError` if the sorted set is empty or index is out of + range. + + Negative indices are supported. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> ss = SortedSet('abcde') + >>> ss.pop() + 'e' + >>> ss.pop(2) + 'c' + >>> ss + SortedSet(['a', 'b', 'd']) + + :param int index: index of value (default -1) + :return: value + :raises IndexError: if index is out of range + + """ + # pylint: disable=arguments-differ + value = self._list.pop(index) + self._set.remove(value) + return value + + + def remove(self, value): + """Remove `value` from sorted set; `value` must be a member. + + If `value` is not a member, raise :exc:`KeyError`. + + Runtime complexity: `O(log(n))` -- approximate. + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> ss.remove(5) + >>> ss == set([1, 2, 3, 4]) + True + >>> ss.remove(0) + Traceback (most recent call last): + ... + KeyError: 0 + + :param value: `value` to remove from sorted set + :raises KeyError: if `value` is not in sorted set + + """ + self._set.remove(value) + self._list.remove(value) + + + def difference(self, *iterables): + """Return the difference of two or more sets as a new sorted set. + + The `difference` method also corresponds to operator ``-``. + + ``ss.__sub__(iterable)`` <==> ``ss - iterable`` + + The difference is all values that are in this sorted set but not the + other `iterables`. + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> ss.difference([4, 5, 6, 7]) + SortedSet([1, 2, 3]) + + :param iterables: iterable arguments + :return: new sorted set + + """ + diff = self._set.difference(*iterables) + return self._fromset(diff, key=self._key) + + __sub__ = difference + + + def difference_update(self, *iterables): + """Remove all values of `iterables` from this sorted set. + + The `difference_update` method also corresponds to operator ``-=``. + + ``ss.__isub__(iterable)`` <==> ``ss -= iterable`` + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> _ = ss.difference_update([4, 5, 6, 7]) + >>> ss + SortedSet([1, 2, 3]) + + :param iterables: iterable arguments + :return: itself + + """ + _set = self._set + _list = self._list + values = set(chain(*iterables)) + if (4 * len(values)) > len(_set): + _set.difference_update(values) + _list.clear() + _list.update(_set) + else: + _discard = self._discard + for value in values: + _discard(value) + return self + + __isub__ = difference_update + + + def intersection(self, *iterables): + """Return the intersection of two or more sets as a new sorted set. + + The `intersection` method also corresponds to operator ``&``. + + ``ss.__and__(iterable)`` <==> ``ss & iterable`` + + The intersection is all values that are in this sorted set and each of + the other `iterables`. + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> ss.intersection([4, 5, 6, 7]) + SortedSet([4, 5]) + + :param iterables: iterable arguments + :return: new sorted set + + """ + intersect = self._set.intersection(*iterables) + return self._fromset(intersect, key=self._key) + + __and__ = intersection + __rand__ = __and__ + + + def intersection_update(self, *iterables): + """Update the sorted set with the intersection of `iterables`. + + The `intersection_update` method also corresponds to operator ``&=``. + + ``ss.__iand__(iterable)`` <==> ``ss &= iterable`` + + Keep only values found in itself and all `iterables`. + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> _ = ss.intersection_update([4, 5, 6, 7]) + >>> ss + SortedSet([4, 5]) + + :param iterables: iterable arguments + :return: itself + + """ + _set = self._set + _list = self._list + _set.intersection_update(*iterables) + _list.clear() + _list.update(_set) + return self + + __iand__ = intersection_update + + + def symmetric_difference(self, other): + """Return the symmetric difference with `other` as a new sorted set. + + The `symmetric_difference` method also corresponds to operator ``^``. + + ``ss.__xor__(other)`` <==> ``ss ^ other`` + + The symmetric difference is all values tha are in exactly one of the + sets. + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> ss.symmetric_difference([4, 5, 6, 7]) + SortedSet([1, 2, 3, 6, 7]) + + :param other: `other` iterable + :return: new sorted set + + """ + diff = self._set.symmetric_difference(other) + return self._fromset(diff, key=self._key) + + __xor__ = symmetric_difference + __rxor__ = __xor__ + + + def symmetric_difference_update(self, other): + """Update the sorted set with the symmetric difference with `other`. + + The `symmetric_difference_update` method also corresponds to operator + ``^=``. + + ``ss.__ixor__(other)`` <==> ``ss ^= other`` + + Keep only values found in exactly one of itself and `other`. + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> _ = ss.symmetric_difference_update([4, 5, 6, 7]) + >>> ss + SortedSet([1, 2, 3, 6, 7]) + + :param other: `other` iterable + :return: itself + + """ + _set = self._set + _list = self._list + _set.symmetric_difference_update(other) + _list.clear() + _list.update(_set) + return self + + __ixor__ = symmetric_difference_update + + + def union(self, *iterables): + """Return new sorted set with values from itself and all `iterables`. + + The `union` method also corresponds to operator ``|``. + + ``ss.__or__(iterable)`` <==> ``ss | iterable`` + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> ss.union([4, 5, 6, 7]) + SortedSet([1, 2, 3, 4, 5, 6, 7]) + + :param iterables: iterable arguments + :return: new sorted set + + """ + return self.__class__(chain(iter(self), *iterables), key=self._key) + + __or__ = union + __ror__ = __or__ + + + def update(self, *iterables): + """Update the sorted set adding values from all `iterables`. + + The `update` method also corresponds to operator ``|=``. + + ``ss.__ior__(iterable)`` <==> ``ss |= iterable`` + + >>> ss = SortedSet([1, 2, 3, 4, 5]) + >>> _ = ss.update([4, 5, 6, 7]) + >>> ss + SortedSet([1, 2, 3, 4, 5, 6, 7]) + + :param iterables: iterable arguments + :return: itself + + """ + _set = self._set + _list = self._list + values = set(chain(*iterables)) + if (4 * len(values)) > len(_set): + _list = self._list + _set.update(values) + _list.clear() + _list.update(_set) + else: + _add = self._add + for value in values: + _add(value) + return self + + __ior__ = update + _update = update + + + def __reduce__(self): + """Support for pickle. + + The tricks played with exposing methods in :func:`SortedSet.__init__` + confuse pickle so customize the reducer. + + """ + return (type(self), (self._set, self._key)) + + + @recursive_repr() + def __repr__(self): + """Return string representation of sorted set. + + ``ss.__repr__()`` <==> ``repr(ss)`` + + :return: string representation + + """ + _key = self._key + key = '' if _key is None else ', key={0!r}'.format(_key) + type_name = type(self).__name__ + return '{0}({1!r}{2})'.format(type_name, list(self), key) + + + def _check(self): + """Check invariants of sorted set. + + Runtime complexity: `O(n)` + + """ + _set = self._set + _list = self._list + _list._check() + assert len(_set) == len(_list) + assert all(value in _set for value in _list)