xarray-contrib
diff --git a/‎.github/workflows/ci.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flox/aggregations.py‎
Lines changed: 153 additions & 25 deletions b/‎flox/aggregations.py‎
Lines changed: 153 additions & 25 deletions
diff --git a/‎flox/core.py‎
Lines changed: 8 additions & 1 deletion b/‎flox/core.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎flox/multiarray.py‎
Lines changed: 97 additions & 0 deletions b/‎flox/multiarray.py‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎flox/xrutils.py‎
Lines changed: 6 additions & 0 deletions b/‎flox/xrutils.py‎
Lines changed: 6 additions & 0 deletions
@@ -106,7 +106,7 @@ jobs:
           cache-dependency-glob: "pyproject.toml"
       - name: Install xarray and dependencies
         run: |
-          uv add --dev .[complete] pint>=0.22
+          uv add --dev ".[complete]" "pint>=0.22"
       - name: Install upstream flox
         run: |
           uv add git+https://github.com/dcherian/flox.git@${{ github.ref }}
 
@@ -15,6 +15,8 @@
 from . import aggregate_flox, aggregate_npg, xrutils
 from . import xrdtypes as dtypes
 from .lib import dask_array_type, sparse_array_type
+from .multiarray import MultiArray
+from .xrutils import notnull
 
 if TYPE_CHECKING:
     FuncTuple = tuple[Callable | str, ...]
@@ -161,8 +163,8 @@ def __init__(
         self,
         name: str,
         *,
-        numpy: str | None = None,
-        chunk: str | FuncTuple | None,
+        numpy: partial | str | None = None,
+        chunk: partial | str | FuncTuple | None,
         combine: str | FuncTuple | None,
         preprocess: Callable | None = None,
         finalize: Callable | None = None,
@@ -343,57 +345,183 @@ def _mean_finalize(sum_, count):
 )
 
 
-# TODO: fix this for complex numbers
-def _var_finalize(sumsq, sum_, count, ddof=0):
+def var_chunk(
+    group_idx, array, *, skipna: bool, engine: str, axis=-1, size=None, fill_value=None, dtype=None
+):
+    # Calculate length and sum - important for the adjustment terms to sum squared deviations
+    array_lens = generic_aggregate(
+        group_idx,
+        array,
+        func="nanlen",
+        engine=engine,
+        axis=axis,
+        size=size,
+        fill_value=0,  # Unpack fill value bc it's currently defined for multiarray
+        dtype=dtype,
+    )
+
+    array_sums = generic_aggregate(
+        group_idx,
+        array,
+        func="nansum" if skipna else "sum",
+        engine=engine,
+        axis=axis,
+        size=size,
+        fill_value=0,  # Unpack fill value bc it's currently defined for multiarray
+        dtype=dtype,
+    )
+
+    # Calculate sum squared deviations - the main part of variance sum
     with np.errstate(invalid="ignore", divide="ignore"):
-        result = (sumsq - (sum_**2 / count)) / (count - ddof)
-    result[count <= ddof] = np.nan
-    return result
+        array_means = array_sums / array_lens
+
+    sum_squared_deviations = generic_aggregate(
+        group_idx,
+        (array - array_means[..., group_idx]) ** 2,
+        func="nansum" if skipna else "sum",
+        engine=engine,
+        axis=axis,
+        size=size,
+        fill_value=0,  # Unpack fill value bc it's currently defined for multiarray
+        dtype=dtype,
+    )
+
+    return MultiArray((sum_squared_deviations, array_sums, array_lens))
+
+
+def _var_combine(array, axis, keepdims=True):
+    def clip_last(array, ax, n=1):
+        """Return array except the last element along axis
+        Purely included to tidy up the adj_terms line
+        """
+        assert n > 0, "Clipping nothing off the end isn't implemented"
+        not_last = [slice(None, None) for i in range(array.ndim)]
+        not_last[ax] = slice(None, -n)
+        return array[*not_last]
+
+    def clip_first(array, ax, n=1):
+        """Return array except the first element along axis
+        Purely included to tidy up the adj_terms line
+        """
+        not_first = [slice(None, None) for i in range(array.ndim)]
+        not_first[ax] = slice(n, None)
+        return array[*not_first]
+
+    for ax in axis:
+        if array.shape[ax] == 1:
+            continue
+
+        sum_deviations, sum_X, sum_len = array.arrays
+
+        # Calculate parts needed for cascading combination
+        cumsum_X = np.cumsum(sum_X, axis=ax)
+        cumsum_len = np.cumsum(sum_len, axis=ax)
+
+        # There will be instances in which one or both chunks being merged are empty
+        # In which case, the adjustment term should be zero, but will throw a divide-by-zero error
+        # We're going to add a constant to the bottom of the adjustment term equation on those instances
+        # and count on the zeros on the top making our adjustment term still zero
+        zero_denominator = (clip_last(cumsum_len, ax) == 0) | (clip_first(sum_len, ax) == 0)
+
+        # Adjustment terms to tweak the sum of squared deviations because not every chunk has the same mean
+        with np.errstate(invalid="ignore", divide="ignore"):
+            adj_terms = (
+                clip_last(cumsum_len, ax) * clip_first(sum_X, ax)
+                - clip_first(sum_len, ax) * clip_last(cumsum_X, ax)
+            ) ** 2 / (
+                clip_last(cumsum_len, ax)
+                * clip_first(sum_len, ax)
+                * (clip_last(cumsum_len, ax) + clip_first(sum_len, ax))
+                + zero_denominator.astype(int)
+            )
+
+        check = adj_terms * zero_denominator
+        assert np.all(check[notnull(check)] == 0), (
+            "Instances where we add something to the denominator must come out to zero"
+        )
+
+        array = MultiArray(
+            (
+                np.sum(sum_deviations, axis=ax, keepdims=keepdims)
+                + np.sum(adj_terms, axis=ax, keepdims=keepdims),  # sum of squared deviations
+                np.sum(sum_X, axis=ax, keepdims=keepdims),  # sum of array items
+                np.sum(sum_len, axis=ax, keepdims=keepdims),  # sum of array lengths
+            )
+        )
+    return array
+
+
+def is_var_chunk_reduction(agg: Callable) -> bool:
+    if isinstance(agg, partial):
+        agg = agg.func
+    return agg is blockwise_or_numpy_var or agg is var_chunk
+
+
+def _var_finalize(multiarray, ddof=0):
+    den = multiarray.arrays[2]
+    den -= ddof
+    # preserve nans for groups with 0 obs; so these values are -ddof
+    with np.errstate(invalid="ignore", divide="ignore"):
+        ret = multiarray.arrays[0]
+        ret /= den
+    ret[den < 0] = np.nan
+    return ret
 
 
-def _std_finalize(sumsq, sum_, count, ddof=0):
-    return np.sqrt(_var_finalize(sumsq, sum_, count, ddof))
+def _std_finalize(multiarray, ddof=0):
+    return np.sqrt(_var_finalize(multiarray, ddof))
+
+
+def blockwise_or_numpy_var(*args, skipna: bool, ddof=0, std=False, **kwargs):
+    res = _var_finalize(var_chunk(*args, skipna=skipna, **kwargs), ddof)
+    return np.sqrt(res) if std else res
 
 
 # var, std always promote to float, so we set nan
 var = Aggregation(
     "var",
-    chunk=("sum_of_squares", "sum", "nanlen"),
-    combine=("sum", "sum", "sum"),
+    chunk=partial(var_chunk, skipna=False),
+    numpy=partial(blockwise_or_numpy_var, skipna=False),
+    combine=(_var_combine,),
     finalize=_var_finalize,
-    fill_value=0,
+    fill_value=((0, 0, 0),),
     final_fill_value=np.nan,
-    dtypes=(None, None, np.intp),
+    dtypes=(None,),
     final_dtype=np.floating,
 )
+
 nanvar = Aggregation(
     "nanvar",
-    chunk=("nansum_of_squares", "nansum", "nanlen"),
-    combine=("sum", "sum", "sum"),
+    chunk=partial(var_chunk, skipna=True),
+    numpy=partial(blockwise_or_numpy_var, skipna=True),
+    combine=(_var_combine,),
     finalize=_var_finalize,
-    fill_value=0,
+    fill_value=((0, 0, 0),),
     final_fill_value=np.nan,
-    dtypes=(None, None, np.intp),
+    dtypes=(None,),
     final_dtype=np.floating,
 )
+
 std = Aggregation(
     "std",
-    chunk=("sum_of_squares", "sum", "nanlen"),
-    combine=("sum", "sum", "sum"),
+    chunk=partial(var_chunk, skipna=False),
+    numpy=partial(blockwise_or_numpy_var, skipna=False, std=True),
+    combine=(_var_combine,),
     finalize=_std_finalize,
-    fill_value=0,
+    fill_value=((0, 0, 0),),
     final_fill_value=np.nan,
-    dtypes=(None, None, np.intp),
+    dtypes=(None,),
     final_dtype=np.floating,
 )
 nanstd = Aggregation(
     "nanstd",
-    chunk=("nansum_of_squares", "nansum", "nanlen"),
-    combine=("sum", "sum", "sum"),
+    chunk=partial(var_chunk, skipna=True),
+    numpy=partial(blockwise_or_numpy_var, skipna=True, std=True),
+    combine=(_var_combine,),
     finalize=_std_finalize,
-    fill_value=0,
+    fill_value=((0, 0, 0),),
     final_fill_value=np.nan,
-    dtypes=(None, None, np.intp),
+    dtypes=(None,),
     final_dtype=np.floating,
 )
 
 
@@ -44,6 +44,7 @@
     _atleast_1d,
     _initialize_aggregation,
     generic_aggregate,
+    is_var_chunk_reduction,
     quantile_new_dims_func,
 )
 from .cache import memoize
@@ -1289,7 +1290,8 @@ def chunk_reduce(
     # optimize that out.
     previous_reduction: T_Func = ""
     for reduction, fv, kw, dt in zip(funcs, fill_values, kwargss, dtypes):
-        if empty:
+        # UGLY! but this is because the `var` breaks our design assumptions
+        if empty and not is_var_chunk_reduction(reduction):
             result = np.full(shape=final_array_shape, fill_value=fv, like=array)
         elif is_nanlen(reduction) and is_nanlen(previous_reduction):
             result = results["intermediates"][-1]
@@ -1298,6 +1300,10 @@ def chunk_reduce(
             kw_func = dict(size=size, dtype=dt, fill_value=fv)
             kw_func.update(kw)
 
+            # UGLY! but this is because the `var` breaks our design assumptions
+            if is_var_chunk_reduction(reduction):
+                kw_func.update(engine=engine)
+
             if callable(reduction):
                 # passing a custom reduction for npg to apply per-group is really slow!
                 # So this `reduction` has to do the groupby-aggregation
@@ -2785,6 +2791,7 @@ def groupby_reduce(
             array = array.view(np.int64)
         elif is_cftime:
             offset = array.min()
+            assert offset is not None
             array = datetime_to_numeric(array, offset, datetime_unit="us")
 
     if nax == 1 and by_.ndim > 1 and expected_ is None:
 
@@ -0,0 +1,97 @@
+from collections.abc import Callable
+from typing import Self
+
+import numpy as np
+
+MULTIARRAY_HANDLED_FUNCTIONS: dict[Callable, Callable] = {}
+
+
+class MultiArray:
+    arrays: tuple[np.ndarray, ...]
+
+    def __init__(self, arrays):
+        self.arrays = arrays
+        assert all(arrays[0].shape == a.shape for a in arrays), "Expect all arrays to have the same shape"
+
+    def astype(self, dt, **kwargs) -> Self:
+        return type(self)(tuple(array.astype(dt, **kwargs) for array in self.arrays))
+
+    def reshape(self, shape, **kwargs) -> Self:
+        return type(self)(tuple(array.reshape(shape, **kwargs) for array in self.arrays))
+
+    def squeeze(self, axis=None) -> Self:
+        return type(self)(tuple(array.squeeze(axis) for array in self.arrays))
+
+    def __setitem__(self, key, value) -> None:
+        assert len(value) == len(self.arrays)
+        for array, val in zip(self.arrays, value):
+            array[key] = val
+
+    def __array_function__(self, func, types, args, kwargs):
+        if func not in MULTIARRAY_HANDLED_FUNCTIONS:
+            return NotImplemented
+        # Note: this allows subclasses that don't override
+        # __array_function__ to handle MyArray objects
+        # if not all(issubclass(t, MyArray) for t in types): # I can't see this being relevant at all for this code, but maybe it's safer to leave it in?
+        # return NotImplemented
+        return MULTIARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs)
+
+    # Shape is needed, seems likely that the other two might be
+    # Making some strong assumptions here that all the arrays are the same shape, and I don't really like this
+    @property
+    def dtype(self) -> np.dtype:
+        return self.arrays[0].dtype
+
+    @property
+    def shape(self) -> tuple[int, ...]:
+        return self.arrays[0].shape
+
+    @property
+    def ndim(self) -> int:
+        return self.arrays[0].ndim
+
+    def __getitem__(self, key) -> Self:
+        return type(self)([array[key] for array in self.arrays])
+
+
+def implements(numpy_function):
+    """Register an __array_function__ implementation for MyArray objects."""
+
+    def decorator(func):
+        MULTIARRAY_HANDLED_FUNCTIONS[numpy_function] = func
+        return func
+
+    return decorator
+
+
+@implements(np.expand_dims)
+def expand_dims(multiarray, axis) -> MultiArray:
+    return MultiArray(tuple(np.expand_dims(a, axis) for a in multiarray.arrays))
+
+
+@implements(np.concatenate)
+def concatenate(multiarrays, axis) -> MultiArray:
+    n_arrays = len(multiarrays[0].arrays)
+    for ma in multiarrays[1:]:
+        assert len(ma.arrays) == n_arrays
+    return MultiArray(
+        tuple(np.concatenate(tuple(ma.arrays[i] for ma in multiarrays), axis) for i in range(n_arrays))
+    )
+
+
+@implements(np.transpose)
+def transpose(multiarray, axes) -> MultiArray:
+    return MultiArray(tuple(np.transpose(a, axes) for a in multiarray.arrays))
+
+
+@implements(np.squeeze)
+def squeeze(multiarray, axis) -> MultiArray:
+    return MultiArray(tuple(np.squeeze(a, axis) for a in multiarray.arrays))
+
+
+@implements(np.full)
+def full(shape, fill_values, *args, **kwargs) -> MultiArray:
+    """All arguments except fill_value are shared by each array in the MultiArray.
+    Iterate over fill_values to create arrays
+    """
+    return MultiArray(tuple(np.full(shape, fv, *args, **kwargs) for fv in fill_values))
@@ -147,6 +147,9 @@ def is_scalar(value: Any, include_0d: bool = True) -> bool:
 
 
 def notnull(data):
+    if isinstance(data, tuple) and len(data) == 3 and data == (0, 0, 0):
+        # boo: another special case for Var
+        return True
     if not is_duck_array(data):
         data = np.asarray(data)
 
@@ -164,6 +167,9 @@ def notnull(data):
 
 
 def isnull(data: Any):
+    if isinstance(data, tuple) and len(data) == 3 and data == (0, 0, 0):
+        # boo: another special case for Var
+        return False
     if data is None:
         return False
     if not is_duck_array(data):