From de21e994ae99521dde47b0fde745b2b021627ed7 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 12:08:53 +0200 Subject: [PATCH 01/14] Prefer broadcast over reindex when possible --- xarray/core/dataset.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 68f0caa678d..1171d2df050 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3882,14 +3882,21 @@ def _validate_interp_indexer(x, new_x): # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, method, **kwargs) - elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims): - # For types that we do not understand do stepwise - # interpolation to avoid modifying the elements. - # reindex the variable instead because it supports - # booleans and objects and retains the dtype but inside - # this loop there might be some duplicate code that slows it - # down, therefore collect these signals and run it later: - reindex_vars.append(name) + elif dtype_kind in "ObU": + matching_dims = use_indexers.keys() & var.dims + + if matching_dims and all(var.sizes[d] == 1 for d in matching_dims): + # Broadcastable, can be handled quickly without reindex: + to_broadcast = (var.squeeze(),) + tuple(dest for index, dest in use_indexers.values()) + variables[name] = broadcast_variables(*to_broadcast)[0] + elif matching_dims: + # For types that we do not understand do stepwise + # interpolation to avoid modifying the elements. + # reindex the variable instead because it supports + # booleans and objects and retains the dtype but inside + # this loop there might be some duplicate code that slows it + # down, therefore collect these signals and run it later: + reindex_vars.append(name) elif all(d not in indexers for d in var.dims): # For anything else we can only keep variables if they # are not dependent on any coords that are being From 4db4610dfe70eb4c00f20a8bb7741d6c6d57537c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:12:31 +0000 Subject: [PATCH 02/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0aac1a5fe94..3eac1abf190 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3855,7 +3855,9 @@ def _validate_interp_indexer(x, new_x): if matching_dims and all(var.sizes[d] == 1 for d in matching_dims): # Broadcastable, can be handled quickly without reindex: - to_broadcast = (var.squeeze(),) + tuple(dest for index, dest in use_indexers.values()) + to_broadcast = (var.squeeze(),) + tuple( + dest for index, dest in use_indexers.values() + ) variables[name] = broadcast_variables(*to_broadcast)[0] elif matching_dims: # For types that we do not understand do stepwise From d256e721757ef58c42329676fc7061c8b04f0bf7 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 12:47:23 +0200 Subject: [PATCH 03/14] Update dataset.py --- xarray/core/dataset.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3eac1abf190..5fc08f9a4ac 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3853,13 +3853,13 @@ def _validate_interp_indexer(x, new_x): elif dtype_kind in "ObU": matching_dims = use_indexers.keys() & var.dims - if matching_dims and all(var.sizes[d] == 1 for d in matching_dims): + # if matching_dims and all(var.sizes[d] == 1 for d in matching_dims): # Broadcastable, can be handled quickly without reindex: - to_broadcast = (var.squeeze(),) + tuple( - dest for index, dest in use_indexers.values() - ) - variables[name] = broadcast_variables(*to_broadcast)[0] - elif matching_dims: + # to_broadcast = (var.squeeze(),) + tuple( + # dest for index, dest in use_indexers.values() + # ) + # variables[name] = broadcast_variables(*to_broadcast)[0] + if matching_dims: # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. # reindex the variable instead because it supports From f0f0d94cba02f1efd2a1528206c528f3f377fd4f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:48:17 +0000 Subject: [PATCH 04/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5fc08f9a4ac..8de83170ff8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3854,11 +3854,11 @@ def _validate_interp_indexer(x, new_x): matching_dims = use_indexers.keys() & var.dims # if matching_dims and all(var.sizes[d] == 1 for d in matching_dims): - # Broadcastable, can be handled quickly without reindex: - # to_broadcast = (var.squeeze(),) + tuple( - # dest for index, dest in use_indexers.values() - # ) - # variables[name] = broadcast_variables(*to_broadcast)[0] + # Broadcastable, can be handled quickly without reindex: + # to_broadcast = (var.squeeze(),) + tuple( + # dest for index, dest in use_indexers.values() + # ) + # variables[name] = broadcast_variables(*to_broadcast)[0] if matching_dims: # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. From d76c09a81d9d611511aa787efd1e04ee6048810f Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:24:10 +0200 Subject: [PATCH 05/14] Update dataset.py --- xarray/core/dataset.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8de83170ff8..d4b07f48bb3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3850,16 +3850,14 @@ def _validate_interp_indexer(x, new_x): # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, method, **kwargs) - elif dtype_kind in "ObU": - matching_dims = use_indexers.keys() & var.dims - - # if matching_dims and all(var.sizes[d] == 1 for d in matching_dims): - # Broadcastable, can be handled quickly without reindex: - # to_broadcast = (var.squeeze(),) + tuple( - # dest for index, dest in use_indexers.values() - # ) - # variables[name] = broadcast_variables(*to_broadcast)[0] - if matching_dims: + elif dtype_kind in "ObU" and use_indexers.keys() & var.dims: + if all(var.sizes[d] == 1 for d in (use_indexers.keys() & var.dims)): + # Broadcastable, can be handled quickly without reindex: + to_broadcast = (var.copy().squeeze(),) + tuple( + dest for index, dest in use_indexers.values() + ) + variables[name] = broadcast_variables(*to_broadcast)[0] + else: # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. # reindex the variable instead because it supports From 4b5250a1b4995e48081af68b8468e1ac40c21dac Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:26:05 +0200 Subject: [PATCH 06/14] Update dataset.py --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d4b07f48bb3..510bb77d09d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3850,7 +3850,7 @@ def _validate_interp_indexer(x, new_x): # For normal number types do the interpolation: var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims} variables[name] = missing.interp(var, var_indexers, method, **kwargs) - elif dtype_kind in "ObU" and use_indexers.keys() & var.dims: + elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims): if all(var.sizes[d] == 1 for d in (use_indexers.keys() & var.dims)): # Broadcastable, can be handled quickly without reindex: to_broadcast = (var.copy().squeeze(),) + tuple( From b6605661d29e08929023776c8ced9b16850b2199 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 14:03:15 +0200 Subject: [PATCH 07/14] Update interp.py --- asv_bench/benchmarks/interp.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/interp.py b/asv_bench/benchmarks/interp.py index 4b6691bcc0a..860e2c61b2f 100644 --- a/asv_bench/benchmarks/interp.py +++ b/asv_bench/benchmarks/interp.py @@ -25,24 +25,37 @@ def setup(self, *args, **kwargs): "var1": (("x", "y"), randn_xy), "var2": (("x", "t"), randn_xt), "var3": (("t",), randn_t), + "var4":(("z",), np.array(["text"])), + "var5":(("k",), np.array(["a", "b", "c"])), }, coords={ "x": np.arange(nx), "y": np.linspace(0, 1, ny), "t": pd.date_range("1970-01-01", periods=nt, freq="D"), "x_coords": ("x", np.linspace(1.1, 2.1, nx)), + "z": np.array([1]), + "k": np.linspace(0, nx, 3) }, ) @parameterized(["method", "is_short"], (["linear", "cubic"], [True, False])) - def time_interpolation(self, method, is_short): + def time_interpolation_numeric_1d(self, method, is_short): new_x = new_x_short if is_short else new_x_long - self.ds.interp(x=new_x, method=method).load() + self.ds.interp(x=new_x, method=method).compute() @parameterized(["method"], (["linear", "nearest"])) - def time_interpolation_2d(self, method): - self.ds.interp(x=new_x_long, y=new_y_long, method=method).load() + def time_interpolation_numeric_2d(self, method): + self.ds.interp(x=new_x_long, y=new_y_long, method=method).compute() + @parameterized(["is_short"], ([True, False])) + def time_interpolation_string_scalar(self, is_short): + new_z = new_x_short if is_short else new_x_long + self.ds.interp(z=new_z).compute() + + @parameterized(["is_short"], ([True, False])) + def time_interpolation_string_1d(self, is_short): + new_k = new_x_short if is_short else new_x_long + self.ds.interp(k=new_k).compute() class InterpolationDask(Interpolation): def setup(self, *args, **kwargs): From 1850ea8ff8be188867d9bfdf4f457d8c3d4ac533 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 12:03:42 +0000 Subject: [PATCH 08/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- asv_bench/benchmarks/interp.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/interp.py b/asv_bench/benchmarks/interp.py index 860e2c61b2f..ca1d0a2dd89 100644 --- a/asv_bench/benchmarks/interp.py +++ b/asv_bench/benchmarks/interp.py @@ -25,8 +25,8 @@ def setup(self, *args, **kwargs): "var1": (("x", "y"), randn_xy), "var2": (("x", "t"), randn_xt), "var3": (("t",), randn_t), - "var4":(("z",), np.array(["text"])), - "var5":(("k",), np.array(["a", "b", "c"])), + "var4": (("z",), np.array(["text"])), + "var5": (("k",), np.array(["a", "b", "c"])), }, coords={ "x": np.arange(nx), @@ -34,7 +34,7 @@ def setup(self, *args, **kwargs): "t": pd.date_range("1970-01-01", periods=nt, freq="D"), "x_coords": ("x", np.linspace(1.1, 2.1, nx)), "z": np.array([1]), - "k": np.linspace(0, nx, 3) + "k": np.linspace(0, nx, 3), }, ) @@ -57,6 +57,7 @@ def time_interpolation_string_1d(self, is_short): new_k = new_x_short if is_short else new_x_long self.ds.interp(k=new_k).compute() + class InterpolationDask(Interpolation): def setup(self, *args, **kwargs): requires_dask() From 6c7b325c062d6ddf2b0268c9c24864c3617035e2 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:27:15 +0200 Subject: [PATCH 09/14] Update test_interp.py --- xarray/tests/test_interp.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 7d5a9bf3db4..0481b1c49da 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -1065,6 +1065,25 @@ def test_interp1d_complex_out_of_bounds() -> None: assert_identical(actual, expected) +@requires_scipy +def test_interp_non_numeric_scalar() -> None: + ds = xr.Dataset( + { + "non_numeric": ("time", np.array(["a"])), + }, + coords={"time": (np.array([0]))}, + ) + actual = ds.interp(time=np.linspace(0, 3, 3)) + + expected = xr.Dataset( + { + "non_numeric": ("time", np.array(["a", "a", "a"])), + }, + coords={"time": np.linspace(0, 3, 3)}, + ) + xr.testing.assert_identical(actual, expected) + + @requires_scipy def test_interp_non_numeric_1d() -> None: ds = xr.Dataset( From 742acb9067d677907a428e3471b04118380e9e71 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:48:11 +0200 Subject: [PATCH 10/14] Update whats-new.rst --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index add40bb6b81..0a961f57220 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,11 @@ Internal Changes ~~~~~~~~~~~~~~~~ +Performance +~~~~~~~~~~~ +- Speed up non-numeric scalars when calling :py:meth:`Dataset.interp`. (:issue:`10054`, :pull:`10554`) + By `Jimmy Westling `_. + .. _whats-new.2025.07.1: v2025.07.1 (July 09, 2025) From 4c06cd5e04331c8814fcbdebc7c605611c5af1a1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:47:33 +0200 Subject: [PATCH 11/14] add test for copy vs view --- xarray/tests/test_interp.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 0481b1c49da..dd3906cfd59 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -1083,6 +1083,9 @@ def test_interp_non_numeric_scalar() -> None: ) xr.testing.assert_identical(actual, expected) + # Make sure the array is a copy: + assert actual["non_numeric"].data.base is None + @requires_scipy def test_interp_non_numeric_1d() -> None: From 89b7b2471a3ba56a0be651d3d7c19c1751266be0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:47:47 +0200 Subject: [PATCH 12/14] deep copy --- xarray/core/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 510bb77d09d..d99608ea68f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3853,10 +3853,10 @@ def _validate_interp_indexer(x, new_x): elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims): if all(var.sizes[d] == 1 for d in (use_indexers.keys() & var.dims)): # Broadcastable, can be handled quickly without reindex: - to_broadcast = (var.copy().squeeze(),) + tuple( + to_broadcast = (var.squeeze(),) + tuple( dest for index, dest in use_indexers.values() ) - variables[name] = broadcast_variables(*to_broadcast)[0] + variables[name] = broadcast_variables(*to_broadcast)[0].copy(deep=True) else: # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. From dd8fa407c0dffeb6888d583d1b609eb73f4a1ffe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:48:16 +0000 Subject: [PATCH 13/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d99608ea68f..234493e9315 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3856,7 +3856,9 @@ def _validate_interp_indexer(x, new_x): to_broadcast = (var.squeeze(),) + tuple( dest for index, dest in use_indexers.values() ) - variables[name] = broadcast_variables(*to_broadcast)[0].copy(deep=True) + variables[name] = broadcast_variables(*to_broadcast)[0].copy( + deep=True + ) else: # For types that we do not understand do stepwise # interpolation to avoid modifying the elements. From 9118a2f3569780d76c411940a17c9e41a4afc73b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:52:26 +0200 Subject: [PATCH 14/14] Apply suggestions from code review Co-authored-by: Deepak Cherian --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 234493e9315..f79df3da7c2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3854,7 +3854,7 @@ def _validate_interp_indexer(x, new_x): if all(var.sizes[d] == 1 for d in (use_indexers.keys() & var.dims)): # Broadcastable, can be handled quickly without reindex: to_broadcast = (var.squeeze(),) + tuple( - dest for index, dest in use_indexers.values() + dest for _, dest in use_indexers.values() ) variables[name] = broadcast_variables(*to_broadcast)[0].copy( deep=True