From 6998c8638dc0c66954833ac7f2f156bfd3372181 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 18 Jul 2025 17:08:25 +0200 Subject: [PATCH 1/4] add benchmarks --- bench.json | 91 +++++++++++++++++ benchmark.ipynb | 217 +++++++++++++++++++++++++++++++++++++++++ libpysal/graph/base.py | 2 + 3 files changed, 310 insertions(+) create mode 100644 bench.json create mode 100644 benchmark.ipynb diff --git a/bench.json b/bench.json new file mode 100644 index 000000000..332817f03 --- /dev/null +++ b/bench.json @@ -0,0 +1,91 @@ +{ + "version": "4.13.1.dev14+ga01ec777", + "queen_int": { + "mean": 0.10920027082320302, + "std": 0.020187548225061188 + }, + "queen_str": { + "mean": 0.1017881458858028, + "std": 0.01833704227630996 + }, + "queen_int_strict": { + "mean": 0.31799160831142215, + "std": 0.0020068734272518417 + }, + "queen_str_strict": { + "mean": 0.3286190209211782, + "std": 0.01928133209227243 + }, + "knn_10_int": { + "mean": 0.024900841782800852, + "std": 0.00043173728306308373 + }, + "knn_10_str": { + "mean": 0.031059708283282816, + "std": 0.0003164549844492539 + }, + "knn_500_int": { + "mean": 0.22332576238550245, + "std": 0.005307254049118129 + }, + "knn_500_str": { + "mean": 0.4165452209301293, + "std": 0.0017880199415023917 + }, + "kernel_int": { + "mean": 0.3351672415621579, + "std": 0.023597386247916227 + }, + "kernel_str": { + "mean": 0.7452414750121534, + "std": 0.008023644628654479 + }, + "assign_self_weight_small_int": { + "mean": 0.004125737538561225, + "std": 0.0006155291350163758 + }, + "assign_self_weight_large_int": { + "mean": 0.09749369169585406, + "std": 0.004411739138039398 + }, + "assign_self_weight_small_str": { + "mean": 0.010200412361882627, + "std": 0.000504740384778999 + }, + "assign_self_weight_large_str": { + "mean": 0.2589453041087836, + "std": 0.004367613279158766 + }, + "sparse_small_int": { + "mean": 0.0003439947455190122, + "std": 0.00012388488322321412 + }, + "sparse_large_int": { + "mean": 0.01072925938735716, + "std": 0.0025470408238193213 + }, + "sparse_small_str": { + "mean": 0.0003441800721921027, + "std": 0.00032913256973422664 + }, + "sparse_large_str": { + "mean": 0.010543452413752675, + "std": 0.001791658153606046 + }, + "subgraph_small_int": { + "mean": 0.0059226417215541005, + "std": 0.00020078535750021596 + }, + "subgraph_large_int": { + "mean": 0.019824503781273962, + "std": 0.0022293929747628275 + }, + "subgraph_small_str": { + "mean": 0.006567858462221921, + "std": 0.00023698073401308311 + }, + "subgraph_large_str": { + "mean": 0.029949891753494738, + "std": 0.0016261504776117926 + } +} \ No newline at end of file diff --git a/benchmark.ipynb b/benchmark.ipynb new file mode 100644 index 000000000..c649d4159 --- /dev/null +++ b/benchmark.ipynb @@ -0,0 +1,217 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7cea941d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'4.13.1.dev14+ga01ec777'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "import time\n", + "from functools import wraps\n", + "\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "from geodatasets import get_path\n", + "\n", + "from libpysal import __version__\n", + "from libpysal.graph import Graph\n", + "\n", + "__version__" + ] + }, + { + "cell_type": "markdown", + "id": "9ade8f7b", + "metadata": {}, + "source": [ + "Timer decorator" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a56c4e55", + "metadata": {}, + "outputs": [], + "source": [ + "def timer(iterations):\n", + " \"\"\"Decorator to time function execution.\"\"\"\n", + "\n", + " def decorator(func):\n", + " @wraps(func)\n", + " def wrapper(*args, **kwargs):\n", + " times = []\n", + " for _ in range(iterations):\n", + " start_time = time.perf_counter()\n", + " _ = func(*args, **kwargs)\n", + " end_time = time.perf_counter()\n", + " times.append(end_time - start_time)\n", + " return {'mean': float(np.mean(times)), 'std': float(np.std(times))}\n", + "\n", + " return wrapper\n", + " return decorator" + ] + }, + { + "cell_type": "markdown", + "id": "cbff2730", + "metadata": {}, + "source": [ + "Test data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2b77e5d8", + "metadata": {}, + "outputs": [], + "source": [ + "gdf = gpd.read_file(get_path('geoda south'))\n", + "gdf_name = gdf.set_index(gdf.NAME + \" \" + gdf.STATE_NAME)\n", + "gdf_points = gdf.set_geometry(gdf.representative_point())\n", + "gdf_name_points = gdf_name.set_geometry(gdf_name.representative_point())\n", + "\n", + "small_int = Graph.build_knn(gdf_points, k=10)\n", + "large_int = Graph.build_knn(gdf_points, k=500)\n", + "small_str = Graph.build_knn(gdf_name_points, k=10)\n", + "large_str = Graph.build_knn(gdf_name_points, k=500)" + ] + }, + { + "cell_type": "markdown", + "id": "98beeb2f", + "metadata": {}, + "source": [ + "Timer functions" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3ffabda2", + "metadata": {}, + "outputs": [], + "source": [ + "@timer(10)\n", + "def queen(gdf, strict=False):\n", + " Graph.build_contiguity(gdf, strict=strict)\n", + "\n", + "\n", + "@timer(10)\n", + "def knn(gdf, k):\n", + " Graph.build_knn(gdf, k=k)\n", + "\n", + "@timer(5)\n", + "def kernel(gdf):\n", + " Graph.build_kernel(gdf)\n", + "\n", + "@timer(10)\n", + "def assign_self_weight(graph):\n", + " graph.assign_self_weight()\n", + "\n", + "@timer(1000)\n", + "def sparse(graph):\n", + " _ = graph.sparse\n", + "\n", + "@timer(10)\n", + "def subgraph(graph, ids):\n", + " graph.subgraph(ids)" + ] + }, + { + "cell_type": "markdown", + "id": "ba09f5d0", + "metadata": {}, + "source": [ + "Run benchmarks" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7204fa92", + "metadata": {}, + "outputs": [], + "source": [ + "results = {'version': __version__}\n", + "\n", + "results['queen_int'] = queen(gdf)\n", + "results['queen_str'] = queen(gdf_name)\n", + "results['queen_int_strict'] = queen(gdf, strict=True)\n", + "results['queen_str_strict'] = queen(gdf_name, strict=True)\n", + "results['knn_10_int'] = knn(gdf_points, k=10)\n", + "results['knn_10_str'] = knn(gdf_name_points, k=10)\n", + "results['knn_500_int'] = knn(gdf_points, k=500)\n", + "results['knn_500_str'] = knn(gdf_name_points, k=500)\n", + "results['kernel_int'] = kernel(gdf_points)\n", + "results['kernel_str'] = kernel(gdf_name_points)\n", + "results['assign_self_weight_small_int'] = assign_self_weight(small_int)\n", + "results['assign_self_weight_large_int'] = assign_self_weight(large_int)\n", + "results['assign_self_weight_small_str'] = assign_self_weight(small_str)\n", + "results['assign_self_weight_large_str'] = assign_self_weight(large_str)\n", + "results['sparse_small_int'] = sparse(small_int)\n", + "results['sparse_large_int'] = sparse(large_int)\n", + "results['sparse_small_str'] = sparse(small_str)\n", + "results['sparse_large_str'] = sparse(large_str)\n", + "results['subgraph_small_int'] = subgraph(small_int, gdf.index.to_series().sample(gdf.shape[0] // 5).values)\n", + "results['subgraph_large_int'] = subgraph(large_int, gdf.index.to_series().sample(gdf.shape[0] // 5).values)\n", + "results['subgraph_small_str'] = subgraph(small_str, gdf_name.index.to_series().sample(gdf.shape[0] // 5).values)\n", + "results['subgraph_large_str'] = subgraph(large_str, gdf_name.index.to_series().sample(gdf.shape[0] // 5).values)" + ] + }, + { + "cell_type": "markdown", + "id": "7da0707d", + "metadata": {}, + "source": [ + "Save to file" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "68e2986f", + "metadata": {}, + "outputs": [], + "source": [ + "with open('bench.json', 'w') as f:\n", + " json.dump(results, f, indent=4)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "default", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index 3dd4a0ea0..a6ca2d7a9 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -36,6 +36,8 @@ from .io._gwt import _read_gwt, _to_gwt from .io._parquet import _read_parquet, _to_parquet +cached_property = property # remove cache for benchmark purposes # noqa: F811 + ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C") # listed alphabetically From c4cdc839ddf8a3511cda8e63afb093a5f725c57c Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sat, 19 Jul 2025 16:25:59 +0200 Subject: [PATCH 2/4] use asv --- asv.conf.json | 194 ++++++++++++++++++++++++++++++++++++++ benchmarks/__init__.py | 0 benchmarks/bench_graph.py | 77 +++++++++++++++ libpysal/graph/base.py | 4 +- 4 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 asv.conf.json create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/bench_graph.py diff --git a/asv.conf.json b/asv.conf.json new file mode 100644 index 000000000..8cfc517c6 --- /dev/null +++ b/asv.conf.json @@ -0,0 +1,194 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "libpysal", + + // The project's homepage + "project_url": "http://pysal.org/libpysal", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": ".", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository. + // "repo_subdir": "", + + // Customizable commands for building the project. + // See asv.conf.json documentation. + // To build the package using pyproject.toml (PEP518), uncomment the following lines + // "build_command": [ + // "python -m pip install build", + // "python -m build", + // "python -mpip wheel -w {build_cache_dir} {build_dir}" + // ], + // To build the package using setuptools and a setup.py file, uncomment the following lines + // "build_command": [ + // "python setup.py build", + // "python -mpip wheel -w {build_cache_dir} {build_dir}" + // ], + + // Customizable commands for installing and uninstalling the project. + // See asv.conf.json documentation. + // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"], + // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + + // List of branches to benchmark. If not provided, defaults to "main" + // (for git) or "default" (for mercurial). + // "branches": ["main"], // for git + // "branches": ["default"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + // "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv", "mamba" (above 3.8) + // or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "existing", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "http://github.com/pysal/libpysal/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["3.8", "3.12"], + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + // "conda_channels": ["conda-forge", "defaults"], + + // A conda environment file that is used for environment creation. + "conda_environment_file": "ci/313-latest.yaml", + + // The matrix of dependencies to test. Each key of the "req" + // requirements dictionary is the name of a package (in PyPI) and + // the values are version numbers. An empty list or empty string + // indicates to just test against the default (latest) + // version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed + // via pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + // The ``@env`` and ``@env_nobuild`` keys contain the matrix of + // environment variables to pass to build and benchmark commands. + // An environment will be created for every combination of the + // cartesian product of the "@env" variables in this matrix. + // Variables in "@env_nobuild" will be passed to every environment + // during the benchmark phase, but will not trigger creation of + // new environments. A value of ``null`` means that the variable + // will not be set for the current combination. + // + // "matrix": { + // "req": { + // "numpy": ["1.6", "1.7"], + // "six": ["", null], // test with and without six installed + // "pip+emcee": [""] // emcee is only available for install with pip. + // }, + // "env": {"ENV_VAR_1": ["val1", "val2"]}, + // "env_nobuild": {"ENV_VAR_2": ["val3", null]}, + // }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // - req + // Required packages + // - env + // Environment variables + // - env_nobuild + // Non-build environment variables + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda + // {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1 + // ], + // + // "include": [ + // // additional env for python3.12 + // {"python": "3.12", "req": {"numpy": "1.26"}, "env_nobuild": {"FOO": "123"}}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "3.12", "req": {"libpython": ""}}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache results of the recent builds in each + // environment, making them faster to install next time. This is + // the number of builds to keep, per environment. + // "build_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // }, + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // }, +} diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/bench_graph.py b/benchmarks/bench_graph.py new file mode 100644 index 000000000..4cba9bd43 --- /dev/null +++ b/benchmarks/bench_graph.py @@ -0,0 +1,77 @@ +import geopandas as gpd +import numpy as np +from geodatasets import get_path + +from libpysal.graph import Graph + + +class TimeSuite: + """ + An example benchmark that times the performance of various kinds + of iterating over dictionaries in Python. + """ + + def setup(self, *args, **kwargs): + self.gdf = gpd.read_file(get_path("geoda south")) + self.gdf_str = self.gdf.set_index(self.gdf.NAME + " " + self.gdf.STATE_NAME) + self.gdf_points = self.gdf.set_geometry(self.gdf.representative_point()) + self.gdf_str_points = self.gdf_str.set_geometry( + self.gdf_str.representative_point() + ) + + self.graphs = { + "small_int": Graph.build_knn(self.gdf_points, k=10), + "large_int": Graph.build_knn(self.gdf_points, k=500), + "small_str": Graph.build_knn(self.gdf_str_points, k=10), + "large_str": Graph.build_knn(self.gdf_str_points, k=500), + } + self.ids = { + "int": self.gdf.index.to_series().sample(self.gdf.shape[0] // 5).values, + "str": self.gdf_str.index.to_series() + .sample(self.gdf_str.shape[0] // 5) + .values, + } + + def time_queen(self, idx, strict): + Graph.build_contiguity( + self.gdf if idx == "int" else self.gdf_str, + strict=strict, + ) + + time_queen.params = (["int", "str"], [True, False]) + time_queen.param_names = ["index", "strict"] + + def time_knn(self, idx, k): + Graph.build_knn(self.gdf_points if idx == "int" else self.gdf_str_points, k=k) + + time_knn.params = (["int", "str"], [10, 500]) + time_knn.param_names = ["index", "k"] + + def time_kernel(self, idx): + Graph.build_kernel(self.gdf_points if idx == "int" else self.gdf_str_points) + + time_kernel.params = ["int", "str"] + time_kernel.param_names = ["index"] + + def time_assign_self_weight(self, idx, size): + self.graphs[f"{size}_{idx}"].assign_self_weight() + + time_assign_self_weight.params = (["int", "str"], ["small", "large"]) + time_assign_self_weight.param_names = ["index", "graph_size"] + + def time_sparse(self, idx, size): + _ = self.graphs[f"{size}_{idx}"].sparse + + time_sparse.params = (["int", "str"], ["small", "large"]) + time_sparse.param_names = ["index", "graph_size"] + + def time_subgraph(self, idx, size): + self.graphs[f"{size}_{idx}"].subgraph(self.ids[idx]) + + time_subgraph.params = (["int", "str"], ["small", "large"]) + time_subgraph.param_names = ["index", "graph_size"] + + +# class MemSuite: +# def mem_list(self): +# return [0] * 256 diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index a6ca2d7a9..dce9ef605 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -1,4 +1,5 @@ import math +import os from functools import cached_property import numpy as np @@ -36,7 +37,8 @@ from .io._gwt import _read_gwt, _to_gwt from .io._parquet import _read_parquet, _to_parquet -cached_property = property # remove cache for benchmark purposes # noqa: F811 +if os.environ.get("ASV", "false") == "true": + cached_property = property # remove cache for benchmark purposes # noqa: F811 ALLOWED_TRANSFORMATIONS = ("O", "B", "R", "D", "V", "C") From 3755b1f60217b1be0b5bdae1057eef207e80f5d6 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sat, 19 Jul 2025 22:18:08 +0200 Subject: [PATCH 3/4] properly use asv --- .gitignore | 1 + asv.conf.json | 50 +++++++---- bench.json | 91 -------------------- benchmark.ipynb | 217 ------------------------------------------------ 4 files changed, 34 insertions(+), 325 deletions(-) delete mode 100644 bench.json delete mode 100644 benchmark.ipynb diff --git a/.gitignore b/.gitignore index 000966cbf..8e5690375 100644 --- a/.gitignore +++ b/.gitignore @@ -106,3 +106,4 @@ pysal/examples/snow_maps/soho_graph.qpj pysal/examples/snow_maps/soho_graph.shp pysal/examples/snow_maps/soho_graph.shx +.asv \ No newline at end of file diff --git a/asv.conf.json b/asv.conf.json index 8cfc517c6..eec978036 100644 --- a/asv.conf.json +++ b/asv.conf.json @@ -21,11 +21,11 @@ // Customizable commands for building the project. // See asv.conf.json documentation. // To build the package using pyproject.toml (PEP518), uncomment the following lines - // "build_command": [ - // "python -m pip install build", - // "python -m build", - // "python -mpip wheel -w {build_cache_dir} {build_dir}" - // ], + "build_command": [ + "python -m pip install build", + "python -m build", + "python -mpip wheel -w {build_cache_dir} {build_dir} --no-deps" + ], // To build the package using setuptools and a setup.py file, uncomment the following lines // "build_command": [ // "python setup.py build", @@ -39,7 +39,7 @@ // List of branches to benchmark. If not provided, defaults to "main" // (for git) or "default" (for mercurial). - // "branches": ["main"], // for git + "branches": ["main"], // for git // "branches": ["default"], // for mercurial // The DVCS being used. If not set, it will be automatically @@ -54,7 +54,7 @@ // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. - "environment_type": "existing", + "environment_type": "virtualenv", // timeout in seconds for installing any dependencies in environment // defaults to 10 min @@ -72,7 +72,7 @@ // "conda_channels": ["conda-forge", "defaults"], // A conda environment file that is used for environment creation. - "conda_environment_file": "ci/313-latest.yaml", + // "conda_environment_file": "ci/313-latest.yaml", // The matrix of dependencies to test. Each key of the "req" // requirements dictionary is the name of a package (in PyPI) and @@ -94,15 +94,31 @@ // new environments. A value of ``null`` means that the variable // will not be set for the current combination. // - // "matrix": { - // "req": { - // "numpy": ["1.6", "1.7"], - // "six": ["", null], // test with and without six installed - // "pip+emcee": [""] // emcee is only available for install with pip. - // }, - // "env": {"ENV_VAR_1": ["val1", "val2"]}, - // "env_nobuild": {"ENV_VAR_2": ["val3", null]}, - // }, + "matrix": { + "req": { + "numpy": "2.2", + "beautifulsoup4": "", + "geopandas": "", + "jinja2": "", + "packaging": "", + "pandas": "", + "platformdirs": "", + "requests": "", + "scikit-learn": "", + "scipy": "", + "shapely": "", + "numba": "", + "joblib": "", + "networkx": "", + "pyarrow": "", + "sqlalchemy": "", + "xarray": "", + "zstd": "", + "pandana": "", + "geodatasets": "", + "matplotlib": "" + } + }, // Combinations of libraries/python versions can be excluded/included // from the set to test. Each entry is a dictionary containing additional diff --git a/bench.json b/bench.json deleted file mode 100644 index 332817f03..000000000 --- a/bench.json +++ /dev/null @@ -1,91 +0,0 @@ -{ - "version": "4.13.1.dev14+ga01ec777", - "queen_int": { - "mean": 0.10920027082320302, - "std": 0.020187548225061188 - }, - "queen_str": { - "mean": 0.1017881458858028, - "std": 0.01833704227630996 - }, - "queen_int_strict": { - "mean": 0.31799160831142215, - "std": 0.0020068734272518417 - }, - "queen_str_strict": { - "mean": 0.3286190209211782, - "std": 0.01928133209227243 - }, - "knn_10_int": { - "mean": 0.024900841782800852, - "std": 0.00043173728306308373 - }, - "knn_10_str": { - "mean": 0.031059708283282816, - "std": 0.0003164549844492539 - }, - "knn_500_int": { - "mean": 0.22332576238550245, - "std": 0.005307254049118129 - }, - "knn_500_str": { - "mean": 0.4165452209301293, - "std": 0.0017880199415023917 - }, - "kernel_int": { - "mean": 0.3351672415621579, - "std": 0.023597386247916227 - }, - "kernel_str": { - "mean": 0.7452414750121534, - "std": 0.008023644628654479 - }, - "assign_self_weight_small_int": { - "mean": 0.004125737538561225, - "std": 0.0006155291350163758 - }, - "assign_self_weight_large_int": { - "mean": 0.09749369169585406, - "std": 0.004411739138039398 - }, - "assign_self_weight_small_str": { - "mean": 0.010200412361882627, - "std": 0.000504740384778999 - }, - "assign_self_weight_large_str": { - "mean": 0.2589453041087836, - "std": 0.004367613279158766 - }, - "sparse_small_int": { - "mean": 0.0003439947455190122, - "std": 0.00012388488322321412 - }, - "sparse_large_int": { - "mean": 0.01072925938735716, - "std": 0.0025470408238193213 - }, - "sparse_small_str": { - "mean": 0.0003441800721921027, - "std": 0.00032913256973422664 - }, - "sparse_large_str": { - "mean": 0.010543452413752675, - "std": 0.001791658153606046 - }, - "subgraph_small_int": { - "mean": 0.0059226417215541005, - "std": 0.00020078535750021596 - }, - "subgraph_large_int": { - "mean": 0.019824503781273962, - "std": 0.0022293929747628275 - }, - "subgraph_small_str": { - "mean": 0.006567858462221921, - "std": 0.00023698073401308311 - }, - "subgraph_large_str": { - "mean": 0.029949891753494738, - "std": 0.0016261504776117926 - } -} \ No newline at end of file diff --git a/benchmark.ipynb b/benchmark.ipynb deleted file mode 100644 index c649d4159..000000000 --- a/benchmark.ipynb +++ /dev/null @@ -1,217 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "7cea941d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'4.13.1.dev14+ga01ec777'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import json\n", - "import time\n", - "from functools import wraps\n", - "\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "from geodatasets import get_path\n", - "\n", - "from libpysal import __version__\n", - "from libpysal.graph import Graph\n", - "\n", - "__version__" - ] - }, - { - "cell_type": "markdown", - "id": "9ade8f7b", - "metadata": {}, - "source": [ - "Timer decorator" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "a56c4e55", - "metadata": {}, - "outputs": [], - "source": [ - "def timer(iterations):\n", - " \"\"\"Decorator to time function execution.\"\"\"\n", - "\n", - " def decorator(func):\n", - " @wraps(func)\n", - " def wrapper(*args, **kwargs):\n", - " times = []\n", - " for _ in range(iterations):\n", - " start_time = time.perf_counter()\n", - " _ = func(*args, **kwargs)\n", - " end_time = time.perf_counter()\n", - " times.append(end_time - start_time)\n", - " return {'mean': float(np.mean(times)), 'std': float(np.std(times))}\n", - "\n", - " return wrapper\n", - " return decorator" - ] - }, - { - "cell_type": "markdown", - "id": "cbff2730", - "metadata": {}, - "source": [ - "Test data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "2b77e5d8", - "metadata": {}, - "outputs": [], - "source": [ - "gdf = gpd.read_file(get_path('geoda south'))\n", - "gdf_name = gdf.set_index(gdf.NAME + \" \" + gdf.STATE_NAME)\n", - "gdf_points = gdf.set_geometry(gdf.representative_point())\n", - "gdf_name_points = gdf_name.set_geometry(gdf_name.representative_point())\n", - "\n", - "small_int = Graph.build_knn(gdf_points, k=10)\n", - "large_int = Graph.build_knn(gdf_points, k=500)\n", - "small_str = Graph.build_knn(gdf_name_points, k=10)\n", - "large_str = Graph.build_knn(gdf_name_points, k=500)" - ] - }, - { - "cell_type": "markdown", - "id": "98beeb2f", - "metadata": {}, - "source": [ - "Timer functions" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3ffabda2", - "metadata": {}, - "outputs": [], - "source": [ - "@timer(10)\n", - "def queen(gdf, strict=False):\n", - " Graph.build_contiguity(gdf, strict=strict)\n", - "\n", - "\n", - "@timer(10)\n", - "def knn(gdf, k):\n", - " Graph.build_knn(gdf, k=k)\n", - "\n", - "@timer(5)\n", - "def kernel(gdf):\n", - " Graph.build_kernel(gdf)\n", - "\n", - "@timer(10)\n", - "def assign_self_weight(graph):\n", - " graph.assign_self_weight()\n", - "\n", - "@timer(1000)\n", - "def sparse(graph):\n", - " _ = graph.sparse\n", - "\n", - "@timer(10)\n", - "def subgraph(graph, ids):\n", - " graph.subgraph(ids)" - ] - }, - { - "cell_type": "markdown", - "id": "ba09f5d0", - "metadata": {}, - "source": [ - "Run benchmarks" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "7204fa92", - "metadata": {}, - "outputs": [], - "source": [ - "results = {'version': __version__}\n", - "\n", - "results['queen_int'] = queen(gdf)\n", - "results['queen_str'] = queen(gdf_name)\n", - "results['queen_int_strict'] = queen(gdf, strict=True)\n", - "results['queen_str_strict'] = queen(gdf_name, strict=True)\n", - "results['knn_10_int'] = knn(gdf_points, k=10)\n", - "results['knn_10_str'] = knn(gdf_name_points, k=10)\n", - "results['knn_500_int'] = knn(gdf_points, k=500)\n", - "results['knn_500_str'] = knn(gdf_name_points, k=500)\n", - "results['kernel_int'] = kernel(gdf_points)\n", - "results['kernel_str'] = kernel(gdf_name_points)\n", - "results['assign_self_weight_small_int'] = assign_self_weight(small_int)\n", - "results['assign_self_weight_large_int'] = assign_self_weight(large_int)\n", - "results['assign_self_weight_small_str'] = assign_self_weight(small_str)\n", - "results['assign_self_weight_large_str'] = assign_self_weight(large_str)\n", - "results['sparse_small_int'] = sparse(small_int)\n", - "results['sparse_large_int'] = sparse(large_int)\n", - "results['sparse_small_str'] = sparse(small_str)\n", - "results['sparse_large_str'] = sparse(large_str)\n", - "results['subgraph_small_int'] = subgraph(small_int, gdf.index.to_series().sample(gdf.shape[0] // 5).values)\n", - "results['subgraph_large_int'] = subgraph(large_int, gdf.index.to_series().sample(gdf.shape[0] // 5).values)\n", - "results['subgraph_small_str'] = subgraph(small_str, gdf_name.index.to_series().sample(gdf.shape[0] // 5).values)\n", - "results['subgraph_large_str'] = subgraph(large_str, gdf_name.index.to_series().sample(gdf.shape[0] // 5).values)" - ] - }, - { - "cell_type": "markdown", - "id": "7da0707d", - "metadata": {}, - "source": [ - "Save to file" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "68e2986f", - "metadata": {}, - "outputs": [], - "source": [ - "with open('bench.json', 'w') as f:\n", - " json.dump(results, f, indent=4)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "default", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 0b9b4cd62db3445c778d5b626995f845c1d5ed07 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Sat, 19 Jul 2025 22:19:43 +0200 Subject: [PATCH 4/4] cleanup --- benchmarks/bench_graph.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/benchmarks/bench_graph.py b/benchmarks/bench_graph.py index 4cba9bd43..e1dc0c6fd 100644 --- a/benchmarks/bench_graph.py +++ b/benchmarks/bench_graph.py @@ -6,11 +6,6 @@ class TimeSuite: - """ - An example benchmark that times the performance of various kinds - of iterating over dictionaries in Python. - """ - def setup(self, *args, **kwargs): self.gdf = gpd.read_file(get_path("geoda south")) self.gdf_str = self.gdf.set_index(self.gdf.NAME + " " + self.gdf.STATE_NAME)