From 3003426e7f9eb0284a71bc4506a53e7d5f808ae7 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 2 Apr 2026 22:21:12 +0200 Subject: [PATCH] Improve performance of dataset indexing (#11276) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian --- xarray/core/dataset_utils.py | 4 ++-- xarray/core/indexes.py | 12 ++++++++++-- xarray/core/variable.py | 12 ++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset_utils.py b/xarray/core/dataset_utils.py index 12504333ac9..7abfc06b5ef 100644 --- a/xarray/core/dataset_utils.py +++ b/xarray/core/dataset_utils.py @@ -67,8 +67,8 @@ def _get_virtual_variable( dim_sizes = {} if key in dim_sizes: - data = pd.Index(range(dim_sizes[key]), name=key) - variable = IndexVariable((key,), data) + data = pd.RangeIndex(dim_sizes[key], name=key) + variable = IndexVariable((key,), data, fastpath=True) return key, key, variable if not isinstance(key, str): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 46276029ddc..2242e57e482 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -807,7 +807,9 @@ def create_variables( encoding = None data = PandasIndexingAdapter(self.index, dtype=self.coord_dtype) - var = IndexVariable(self.dim, data, attrs=attrs, encoding=encoding) + var = IndexVariable( + self.dim, data, attrs=attrs, encoding=encoding, fastpath=True + ) return {name: var} def to_pandas_index(self) -> pd.Index: @@ -829,6 +831,9 @@ def isel( # scalar indexer: drop index return None + if isinstance(indxr, slice) and indxr == slice(None): + return self + return self._replace(self.index[indxr]) # type: ignore[index,unused-ignore] def sel( @@ -2164,7 +2169,10 @@ def _apply_indexes_fast(indexes: Indexes[Index], args: Mapping[Any, Any], func: new_index = getattr(index, func)(index_args) if new_index is not None: new_indexes.update(dict.fromkeys(index_vars, new_index)) - new_index_vars = new_index.create_variables(index_vars) + if new_index is index: + new_index_vars = index_vars + else: + new_index_vars = new_index.create_variables(index_vars) new_index_variables.update(new_index_vars) else: for k in index_vars: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e4f6946f6d3..750fff0bd67 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -634,6 +634,18 @@ def _broadcast_indexes(self, key): positions. """ key = self._item_key_to_tuple(key) # key is a tuple + # Fast path: key is already a tuple of the right length with only + # ints and slices (the common case from Variable.isel) + if ( + isinstance(key, tuple) + and len(key) == self.ndim + and all( + not isinstance(k, bool) and isinstance(k, BASIC_INDEXING_TYPES) + for k in key + ) + ): + return self._broadcast_indexes_basic(key) + # key is a tuple of full size key = indexing.expanded_indexer(key, self.ndim) # Convert a scalar Variable to a 0d-array