Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 81 additions & 134 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,123 +8,77 @@
import sys
import warnings
from collections import defaultdict
from collections.abc import (
Collection,
Hashable,
Iterable,
Iterator,
Mapping,
MutableMapping,
Sequence,
)
from collections.abc import (Collection, Hashable, Iterable, Iterator, Mapping,
MutableMapping, Sequence)
from html import escape
from numbers import Number
from operator import methodcaller
from os import PathLike
from typing import IO, TYPE_CHECKING, Any, Callable, Generic, Literal, cast, overload
from typing import (IO, TYPE_CHECKING, Any, Callable, Generic, Literal, cast,
overload)

import numpy as np

# remove once numpy 2.0 is the oldest supported version
try:
from numpy.exceptions import RankWarning # type: ignore[attr-defined,unused-ignore]
except ImportError:
from numpy import RankWarning

import pandas as pd
from line_profiler import profile as codeflash_line_profile
codeflash_line_profile.enable(output_prefix='/tmp/codeflash_hyrdbv3n/baseline_lprof')

from xarray.coding.calendar_ops import convert_calendar, interp_calendar
from xarray.coding.cftimeindex import CFTimeIndex, _parse_array_of_cftime_strings
from xarray.core import (
alignment,
duck_array_ops,
formatting,
formatting_html,
ops,
utils,
)
from xarray.coding.cftimeindex import (CFTimeIndex,
_parse_array_of_cftime_strings)
from xarray.core import alignment
from xarray.core import dtypes as xrdtypes
from xarray.core import duck_array_ops, formatting, formatting_html, ops, utils
from xarray.core._aggregations import DatasetAggregations
from xarray.core.alignment import (
_broadcast_helper,
_get_broadcast_dims_map_common_coords,
align,
)
from xarray.core.alignment import (_broadcast_helper,
_get_broadcast_dims_map_common_coords,
align)
from xarray.core.arithmetic import DatasetArithmetic
from xarray.core.common import (
DataWithCoords,
_contains_datetime_like_objects,
get_chunksizes,
)
from xarray.core.common import (DataWithCoords,
_contains_datetime_like_objects,
get_chunksizes)
from xarray.core.computation import unify_chunks
from xarray.core.coordinates import (
Coordinates,
DatasetCoordinates,
assert_coordinate_consistent,
create_coords_with_default_indexes,
)
from xarray.core.coordinates import (Coordinates, DatasetCoordinates,
assert_coordinate_consistent,
create_coords_with_default_indexes)
from xarray.core.duck_array_ops import datetime_to_numeric
from xarray.core.indexes import (
Index,
Indexes,
PandasIndex,
PandasMultiIndex,
assert_no_index_corrupted,
create_default_index_implicit,
filter_indexes_from_coords,
isel_indexes,
remove_unused_levels_categories,
roll_indexes,
)
from xarray.core.indexes import (Index, Indexes, PandasIndex, PandasMultiIndex,
assert_no_index_corrupted,
create_default_index_implicit,
filter_indexes_from_coords, isel_indexes,
remove_unused_levels_categories, roll_indexes)
from xarray.core.indexing import is_fancy_indexer, map_index_queries
from xarray.core.merge import (
dataset_merge_method,
dataset_update_method,
merge_coordinates_without_align,
merge_core,
)
from xarray.core.merge import (dataset_merge_method, dataset_update_method,
merge_coordinates_without_align, merge_core)
from xarray.core.missing import get_clean_interp_index
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.types import (
QuantileMethods,
Self,
T_ChunkDim,
T_Chunks,
T_DataArray,
T_DataArrayOrSet,
T_Dataset,
ZarrWriteModes,
)
from xarray.core.utils import (
Default,
Frozen,
FrozenMappingWarningOnValuesAccess,
HybridMappingProxy,
OrderedSet,
_default,
decode_numpy_dict_values,
drop_dims_from_indexers,
either_dict_or_kwargs,
emit_user_level_warning,
infix_dims,
is_dict_like,
is_duck_array,
is_duck_dask_array,
is_scalar,
maybe_wrap_array,
)
from xarray.core.variable import (
IndexVariable,
Variable,
as_variable,
broadcast_variables,
calculate_dimensions,
)
from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager
from xarray.core.types import (QuantileMethods, Self, T_ChunkDim, T_Chunks,
T_DataArray, T_DataArrayOrSet, T_Dataset,
ZarrWriteModes)
from xarray.core.utils import (Default, Frozen,
FrozenMappingWarningOnValuesAccess,
HybridMappingProxy, OrderedSet, _default,
decode_numpy_dict_values,
drop_dims_from_indexers, either_dict_or_kwargs,
emit_user_level_warning, infix_dims,
is_dict_like, is_duck_array, is_duck_dask_array,
is_scalar, maybe_wrap_array)
from xarray.core.variable import (IndexVariable, Variable, as_variable,
broadcast_variables, calculate_dimensions)
from xarray.namedarray.parallelcompat import (get_chunked_array_type,
guess_chunkmanager)
from xarray.namedarray.pycompat import array_type, is_chunked_array
from xarray.plot.accessor import DatasetPlotAccessor
from xarray.util.deprecation_helpers import _deprecate_positional_args

# remove once numpy 2.0 is the oldest supported version
try:
from numpy.exceptions import \
RankWarning # type: ignore[attr-defined,unused-ignore]
except ImportError:
from numpy import RankWarning



if TYPE_CHECKING:
from dask.dataframe import DataFrame as DaskDataFrame
from dask.delayed import Delayed
Expand All @@ -134,31 +88,19 @@
from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes
from xarray.core.dataarray import DataArray
from xarray.core.groupby import DatasetGroupBy
from xarray.core.merge import CoercibleMapping, CoercibleValue, _MergeResult
from xarray.core.merge import (CoercibleMapping, CoercibleValue,
_MergeResult)
from xarray.core.resample import DatasetResample
from xarray.core.rolling import DatasetCoarsen, DatasetRolling
from xarray.core.types import (
CFCalendar,
CoarsenBoundaryOptions,
CombineAttrsOptions,
CompatOptions,
DataVars,
DatetimeLike,
DatetimeUnitOptions,
Dims,
DsCompatible,
ErrorOptions,
ErrorOptionsWithWarn,
InterpOptions,
JoinOptions,
PadModeOptions,
PadReflectOptions,
QueryEngineOptions,
QueryParserOptions,
ReindexMethodOptions,
SideOptions,
T_Xarray,
)
from xarray.core.types import (CFCalendar, CoarsenBoundaryOptions,
CombineAttrsOptions, CompatOptions,
DataVars, DatetimeLike, DatetimeUnitOptions,
Dims, DsCompatible, ErrorOptions,
ErrorOptionsWithWarn, InterpOptions,
JoinOptions, PadModeOptions,
PadReflectOptions, QueryEngineOptions,
QueryParserOptions, ReindexMethodOptions,
SideOptions, T_Xarray)
from xarray.core.weighted import DatasetWeighted
from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint

Expand Down Expand Up @@ -410,6 +352,7 @@ def _initialize_feasible(lb, ub):
return param_defaults, bounds_defaults


@codeflash_line_profile
def merge_data_and_coords(data_vars: DataVars, coords) -> _MergeResult:
"""Used in Dataset.__init__."""
if isinstance(coords, Coordinates):
Expand Down Expand Up @@ -2934,7 +2877,9 @@ def isel(

"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel")
if any(is_fancy_indexer(idx) for idx in indexers.values()):
# Use a local variable to avoid repeated lookup in the loop
indexers_values = indexers.values()
if any(is_fancy_indexer(idx) for idx in indexers_values):
return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims)

# Much faster algorithm for when all indexers are ints, slices, one-dimensional
Expand All @@ -2947,15 +2892,23 @@ def isel(

indexes, index_variables = isel_indexes(self.xindexes, indexers)

# Speed: use precomputed set intersection for clarity in loop
coord_names_set = coord_names

for name, var in self._variables.items():
# preserve variable order
if name in index_variables:
var = index_variables[name]
else:
var_indexers = {k: v for k, v in indexers.items() if k in var.dims}
# Avoid building dict when unnecessary
var_indexers = None
if var.dims:
shared = [k for k in indexers if k in var.dims]
if shared:
var_indexers = {k: indexers[k] for k in shared}
if var_indexers:
var = var.isel(var_indexers)
if drop and var.ndim == 0 and name in coord_names:
if drop and var.ndim == 0 and name in coord_names_set:
coord_names.remove(name)
continue
variables[name] = var
Expand Down Expand Up @@ -10178,12 +10131,9 @@ def groupby(
Dataset.resample
DataArray.resample
"""
from xarray.core.groupby import (
DatasetGroupBy,
ResolvedGrouper,
UniqueGrouper,
_validate_groupby_squeeze,
)
from xarray.core.groupby import (DatasetGroupBy, ResolvedGrouper,
UniqueGrouper,
_validate_groupby_squeeze)

_validate_groupby_squeeze(squeeze)
rgrouper = ResolvedGrouper(UniqueGrouper(), group, self)
Expand Down Expand Up @@ -10263,12 +10213,9 @@ def groupby_bins(
----------
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
"""
from xarray.core.groupby import (
BinGrouper,
DatasetGroupBy,
ResolvedGrouper,
_validate_groupby_squeeze,
)
from xarray.core.groupby import (BinGrouper, DatasetGroupBy,
ResolvedGrouper,
_validate_groupby_squeeze)

_validate_groupby_squeeze(squeeze)
grouper = BinGrouper(
Expand Down
Loading