From a9df51b017f02bd005972b2bdaf18a5e3c383541 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 00:46:10 +0200 Subject: [PATCH 01/19] CoW: add readonly flag to ExtensionArrays, return read-only EA/ndarray in .array/EA.to_numpy() --- pandas/core/arrays/_mixins.py | 3 ++ pandas/core/arrays/arrow/array.py | 3 ++ pandas/core/arrays/base.py | 65 +++++++++++++++++++++++++ pandas/core/arrays/datetimelike.py | 7 ++- pandas/core/arrays/interval.py | 3 ++ pandas/core/arrays/masked.py | 14 +++++- pandas/core/arrays/numpy_.py | 23 +++++++-- pandas/core/arrays/period.py | 6 ++- pandas/core/arrays/sparse/array.py | 8 ++- pandas/core/arrays/string_.py | 3 ++ pandas/core/dtypes/astype.py | 11 +++++ pandas/core/indexes/base.py | 2 + pandas/core/internals/blocks.py | 6 ++- pandas/core/series.py | 5 +- pandas/tests/extension/base/setitem.py | 57 ++++++++++++++++++++++ pandas/tests/extension/date/array.py | 3 ++ pandas/tests/extension/decimal/array.py | 3 ++ pandas/tests/extension/json/array.py | 3 ++ 18 files changed, 215 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 26585e7bab8e3..2eb235f87d231 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -252,6 +252,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self: return self._from_backing_data(new_values) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + key = check_array_indexer(self, key) value = self._validate_setitem_value(value) self._ndarray[key] = value diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 919453b29b7f9..61082f920a4ab 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1967,6 +1967,9 @@ def __setitem__(self, key, value) -> None: ------- None """ + if self._readonly: + raise ValueError("Cannot modify readonly array") + # GH50085: unwrap 1D indexers if isinstance(key, tuple) and len(key) == 1: key = key[0] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d11e2271f9574..5d32e7f025416 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -40,6 +40,7 @@ validate_insert_loc, ) +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.cast import maybe_cast_pointwise_result from pandas.core.dtypes.common import ( is_list_like, @@ -269,6 +270,8 @@ class ExtensionArray: # strictly less than 2000 to be below Index.__pandas_priority__. __pandas_priority__ = 1000 + _readonly = False + # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ @@ -482,6 +485,11 @@ def __setitem__(self, key, value) -> None: Returns ------- None + + Raises + ------ + ValueError + If the array is readonly and modification is attempted. """ # Some notes to the ExtensionArray implementer who may have ended up # here. While this method is not required for the interface, if you @@ -501,8 +509,59 @@ def __setitem__(self, key, value) -> None: # __init__ method coerces that value, then so should __setitem__ # Note, also, that Series/DataFrame.where internally use __setitem__ # on a copy of the data. + # Check if the array is readonly + if self._readonly: + raise ValueError("Cannot modify readonly array") + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") + @property + def readonly(self) -> bool: + """ + Whether the array is readonly. + + If True, attempts to modify the array via __setitem__ will raise + a ValueError. + + Returns + ------- + bool + True if the array is readonly, False otherwise. + + Examples + -------- + >>> arr = pd.array([1, 2, 3]) + >>> arr.readonly + False + >>> arr.readonly = True + >>> arr[0] = 5 + Traceback (most recent call last): + ... + ValueError: Cannot modify readonly ExtensionArray + """ + return getattr(self, "_readonly", False) + + @readonly.setter + def readonly(self, value: bool) -> None: + """ + Set the readonly state of the array. + + Parameters + ---------- + value : bool + True to make the array readonly, False to make it writable. + + Examples + -------- + >>> arr = pd.array([1, 2, 3]) + >>> arr.readonly = True + >>> arr.readonly + True + """ + if not isinstance(value, bool): + raise TypeError("readonly must be a boolean") + self._readonly = value + def __len__(self) -> int: """ Length of this array @@ -595,8 +654,14 @@ def to_numpy( result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: result = result.copy() + elif self._readonly and astype_is_view(self.dtype, result.dtype): + # If the ExtensionArray is readonly, make the numpy array readonly too + result = result.view() + result.flags.writeable = False + if na_value is not lib.no_default: result[self.isna()] = na_value # type: ignore[index] + return result # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d5e654c95577e..91f6646d07c21 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -368,7 +368,12 @@ def __array__( if copy is True: return np.array(self._ndarray, dtype=dtype) - return self._ndarray + + result = self._ndarray + if self._readonly: + result = result.view() + result.flags.writeable = False + return result @overload def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ... diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 4bcbe2eedee47..9996eb93ea8f0 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -729,6 +729,9 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index fefd70fef35c9..59cbda6fd1e64 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -23,6 +23,7 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import doc +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( is_bool, @@ -290,6 +291,9 @@ def _validate_setitem_value(self, value): raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'") def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + key = check_array_indexer(self, key) if is_scalar(value): @@ -520,6 +524,9 @@ def to_numpy( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) data = self._data.astype(dtype, copy=copy) + if self._readonly and astype_is_view(self.dtype, dtype): + data = data.view() + data.flags.writeable = False return data @doc(ExtensionArray.tolist) @@ -596,7 +603,12 @@ def __array__( if copy is False: if not self._hasna: # special case, here we can simply return the underlying data - return np.array(self._data, dtype=dtype, copy=copy) + result = np.array(self._data, dtype=dtype, copy=copy) + # If the ExtensionArray is readonly, make the numpy array readonly too + if self._readonly: + result = result.view() + result.flags.writeable = False + return result raise ValueError( "Unable to avoid copy while creating an array as requested." ) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index fd2c8c9d63362..ebede913e77fc 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -12,7 +12,10 @@ from pandas._libs.tslibs import is_supported_dtype from pandas.compat.numpy import function as nv -from pandas.core.dtypes.astype import astype_array +from pandas.core.dtypes.astype import ( + astype_array, + astype_is_view, +) from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import NumpyEADtype @@ -160,8 +163,19 @@ def __array__( ) -> np.ndarray: if copy is not None: # Note: branch avoids `copy=None` for NumPy 1.x support - return np.array(self._ndarray, dtype=dtype, copy=copy) - return np.asarray(self._ndarray, dtype=dtype) + result = np.array(self._ndarray, dtype=dtype, copy=copy) + else: + result = np.asarray(self._ndarray, dtype=dtype) + + if ( + self._readonly + and not copy + and (dtype is None or astype_is_view(self.dtype, dtype)) + ): + result = result.view() + result.flags.writeable = False + + return result def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # Lightly modified version of @@ -512,6 +526,9 @@ def to_numpy( result[mask] = na_value else: result = self._ndarray + if not copy and self._readonly: + result = result.view() + result.flags.writeable = False result = np.asarray(result, dtype=dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ae92e17332c76..d2b3448a59896 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -393,7 +393,11 @@ def __array__( # For NumPy 1.x compatibility we cannot use copy=None. And # `copy=False` has the meaning of `copy=None` here: if not copy: - return np.asarray(self.asi8, dtype=dtype) + result = np.asarray(self.asi8, dtype=dtype) + if self._readonly: + result = result.view() + result.flags.writeable = False + return result else: return np.array(self.asi8, dtype=dtype) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 137dbb6e4d139..fbf457e414c13 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -562,7 +562,11 @@ def __array__( if copy is True: return np.array(self.sp_values) else: - return self.sp_values + result = self.sp_values + if self._readonly: + result = result.view() + result.flags.writeable = False + return result if copy is False: raise ValueError( @@ -591,6 +595,8 @@ def __array__( return out def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") # I suppose we could allow setting of non-fill_value elements. # TODO(SparseArray.__setitem__): remove special cases in # ExtensionBlock.where diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f52b709a59de9..abf77b57e2a49 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -769,6 +769,9 @@ def _maybe_convert_setitem_value(self, value): return value def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + value = self._maybe_convert_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..f9cdb50254730 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -24,8 +24,11 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, ExtensionDtype, + IntervalDtype, NumpyEADtype, + PeriodDtype, ) if TYPE_CHECKING: @@ -283,6 +286,14 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype) return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None) + elif new_dtype == object and isinstance(dtype, (PeriodDtype, IntervalDtype)): + return False + + elif isinstance(dtype, CategoricalDtype) and not isinstance( + new_dtype, CategoricalDtype + ): + return False + numpy_dtype = getattr(dtype, "numpy_dtype", None) new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8c5a03a6de50..4172e159b0d90 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4972,6 +4972,8 @@ def array(self) -> ExtensionArray: from pandas.core.arrays.numpy_ import NumpyExtensionArray array = NumpyExtensionArray(array) + array = array.view() + array._readonly = True return array @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6aa5062b8ed86..f778cf618d077 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2359,7 +2359,9 @@ def external_values(values: ArrayLike) -> ArrayLike: if isinstance(values, np.ndarray): values = values.view() values.flags.writeable = False - - # TODO(CoW) we should also mark our ExtensionArrays as read-only + else: + # ExtensionArrays + values = values.view() + values._readonly = True return values diff --git a/pandas/core/series.py b/pandas/core/series.py index ce5b2e5ed8de5..1bd46f779831b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -820,7 +820,10 @@ def _references(self) -> BlockValuesRefs: @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[prop-decorator] @property def array(self) -> ExtensionArray: - return self._mgr.array_values() + arr = self._mgr.array_values() + arr = arr.view() + arr._readonly = True + return arr def __len__(self) -> int: """ diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 185d6d750cace..3c81ff4d457ea 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -454,3 +454,60 @@ def test_setitem_2d_values(self, data): df.loc[[0, 1], :] = df.loc[[1, 0], :].values assert (df.loc[0, :] == original[1]).all() assert (df.loc[1, :] == original[0]).all() + + def test_readonly_property(self, data): + assert data.readonly is False + + data.readonly = True + assert data.readonly is True + + data_orig = data.copy() + assert data_orig.readonly is False + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[0] = data[1] + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[0:3] = data[1] + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[np.array([True] * len(data))] = data[1] + + tm.assert_extension_array_equal(data, data_orig) + + def test_readonly_propagates_to_numpy_array(self, data): + data.readonly = True + + # when we ask for a copy, the result should never be readonly + arr = np.array(data) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = np.asarray(data) + arr2 = np.asarray(data) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + def test_readonly_propagates_to_numpy_array_method(self, data): + data.readonly = True + + # when we ask for a copy, the result should never be readonly + arr = data.to_numpy(copy=True) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = data.to_numpy(copy=False) + arr2 = data.to_numpy(copy=False) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + # non-NA fill value should always result in a copy + if data.isna().any(): + arr = data.to_numpy(copy=False, na_value=data[0]) + assert arr.flags.writeable diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index 0c51570189a7c..e6a4441bca97c 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -149,6 +149,9 @@ def __getitem__(self, item: PositionalIndexer): raise NotImplementedError("only ints are supported as indexes") def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if not isinstance(key, int): raise NotImplementedError("only ints are supported as indexes") diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2ee6a73ec4054..de8e45679ea59 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -195,6 +195,9 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if is_list_like(value): if is_scalar(key): raise ValueError("setting an array element with a sequence.") diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index b110911bda400..7c01bdef88c0a 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -120,6 +120,9 @@ def __getitem__(self, item): return type(self)([self.data[i] for i in item]) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if isinstance(key, numbers.Integral): self.data[key] = value else: From 9cd6e4f39fc109ed5326606cb11c6905d8b34716 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 01:16:39 +0200 Subject: [PATCH 02/19] cleanup --- pandas/core/arrays/base.py | 47 -------------------------------------- 1 file changed, 47 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 5d32e7f025416..5c488b6aa0906 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -515,53 +515,6 @@ def __setitem__(self, key, value) -> None: raise NotImplementedError(f"{type(self)} does not implement __setitem__.") - @property - def readonly(self) -> bool: - """ - Whether the array is readonly. - - If True, attempts to modify the array via __setitem__ will raise - a ValueError. - - Returns - ------- - bool - True if the array is readonly, False otherwise. - - Examples - -------- - >>> arr = pd.array([1, 2, 3]) - >>> arr.readonly - False - >>> arr.readonly = True - >>> arr[0] = 5 - Traceback (most recent call last): - ... - ValueError: Cannot modify readonly ExtensionArray - """ - return getattr(self, "_readonly", False) - - @readonly.setter - def readonly(self, value: bool) -> None: - """ - Set the readonly state of the array. - - Parameters - ---------- - value : bool - True to make the array readonly, False to make it writable. - - Examples - -------- - >>> arr = pd.array([1, 2, 3]) - >>> arr.readonly = True - >>> arr.readonly - True - """ - if not isinstance(value, bool): - raise TypeError("readonly must be a boolean") - self._readonly = value - def __len__(self) -> int: """ Length of this array From c6f37d1f0cfd50c70aa729da03c355eb2f17e24b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 01:36:38 +0200 Subject: [PATCH 03/19] fixup attribute name in tests --- pandas/tests/extension/base/setitem.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 3c81ff4d457ea..853b314e76a8e 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -456,13 +456,13 @@ def test_setitem_2d_values(self, data): assert (df.loc[1, :] == original[0]).all() def test_readonly_property(self, data): - assert data.readonly is False + assert data._readonly is False - data.readonly = True - assert data.readonly is True + data._readonly = True + assert data._readonly is True data_orig = data.copy() - assert data_orig.readonly is False + assert data_orig._readonly is False with pytest.raises(ValueError, match="Cannot modify readonly array"): data[0] = data[1] @@ -476,7 +476,7 @@ def test_readonly_property(self, data): tm.assert_extension_array_equal(data, data_orig) def test_readonly_propagates_to_numpy_array(self, data): - data.readonly = True + data._readonly = True # when we ask for a copy, the result should never be readonly arr = np.array(data) @@ -492,7 +492,7 @@ def test_readonly_propagates_to_numpy_array(self, data): assert arr1.flags.writeable def test_readonly_propagates_to_numpy_array_method(self, data): - data.readonly = True + data._readonly = True # when we ask for a copy, the result should never be readonly arr = data.to_numpy(copy=True) From 8058d9ad53ef933ec3fb650c6bd8caf597dc1da8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 08:49:59 +0200 Subject: [PATCH 04/19] fix tests --- pandas/_libs/ops.pyx | 4 ++-- pandas/tests/arrays/integer/test_indexing.py | 2 +- pandas/tests/arrays/numpy_/test_numpy.py | 7 ------- pandas/tests/arrays/test_datetimelike.py | 4 ++-- pandas/tests/extension/test_common.py | 2 +- pandas/tests/extension/test_numpy.py | 2 +- pandas/tests/internals/test_internals.py | 2 +- pandas/tests/series/test_constructors.py | 4 ++-- 8 files changed, 10 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 567bfc02a2950..8b53e842a7988 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(object[:] values, object val, object op) -> ndarray: +def scalar_binop(ndarray[object] values, object val, object op) -> ndarray: """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(object[:] left, object[:] right, object op) -> ndarray: +def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray: """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. diff --git a/pandas/tests/arrays/integer/test_indexing.py b/pandas/tests/arrays/integer/test_indexing.py index 4b953d699108b..ce801db5cb58d 100644 --- a/pandas/tests/arrays/integer/test_indexing.py +++ b/pandas/tests/arrays/integer/test_indexing.py @@ -12,7 +12,7 @@ def test_array_setitem_nullable_boolean_mask(): def test_array_setitem(): # GH 31446 - arr = pd.Series([1, 2], dtype="Int64").array + arr = pd.array([1, 2], dtype="Int64") arr[arr > 1] = 1 expected = pd.array([1, 1], dtype="Int64") diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 620a553d5a731..af3a793a3ef77 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -159,13 +159,6 @@ def test_to_numpy(): # Setitem -def test_setitem_series(): - ser = pd.Series([1, 2, 3]) - ser.array[0] = 10 - expected = pd.Series([10, 2, 3]) - tm.assert_series_equal(ser, expected) - - def test_setitem(any_numpy_array): nparr = any_numpy_array arr = NumpyExtensionArray(nparr, copy=True) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d1ef29b0bf8a0..a2eeee1447111 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1248,8 +1248,8 @@ def test_invalid_nat_setitem_array(arr, non_casting_nats): @pytest.mark.parametrize( "arr", [ - pd.date_range("2000", periods=4).array, - pd.timedelta_range("2000", periods=4).array, + pd.date_range("2000", periods=4).array.copy(), + pd.timedelta_range("2000", periods=4).array.copy(), ], ) def test_to_numpy_extra(arr): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 5eda0f00f54ca..9f782770a02e4 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -101,5 +101,5 @@ def test_ellipsis_index(): # String comparison because there's no native way to compare slices. # Before the fix for GH#42430, last_item_arg would get set to the 2D slice # (Ellipsis, slice(None, 1, None)) - out = df["col1"].array.last_item_arg + out = df["col1"]._values.last_item_arg assert str(out) == "slice(None, 1, None)" diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 79cfb736941d6..e6301d9a23604 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -77,7 +77,7 @@ def allow_in_pandas(monkeypatch): @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": - return pd.Series([(i,) for i in range(100)]).array + return pd.Series([(i,) for i in range(100)]).array.copy() return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 11e6b99204aee..2622310aaacd6 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1347,7 +1347,7 @@ def check_series_setitem(self, elem, index: Index, inplace: bool): ser[: len(elem)] = elem if inplace: - assert ser.array is arr # i.e. setting was done inplace + assert ser._values is arr # i.e. setting was done inplace else: assert ser.dtype == object diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f82451a2be84d..f8af23de89297 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -525,9 +525,9 @@ def test_categorical_sideeffects_free(self): # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=False) - assert s.values is cat + assert s._values is cat s = s.cat.rename_categories([1, 2, 3]) - assert s.values is not cat + assert s._values is not cat exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) From 91465ee00540b109892c38b40520cbe1ef112b44 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 10:05:00 +0200 Subject: [PATCH 05/19] more test fixes --- pandas/core/arrays/sparse/array.py | 2 ++ pandas/core/dtypes/astype.py | 5 ++++- pandas/tests/base/test_conversion.py | 2 +- pandas/tests/extension/base/setitem.py | 7 ++++++- pandas/tests/extension/json/array.py | 1 - pandas/tests/extension/json/test_json.py | 6 ++++++ 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fbf457e414c13..2912444a5185e 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -975,6 +975,8 @@ def __getitem__( # _NestedSequence[Union[bool, int]]], ...]]" data_slice = self.to_dense()[key] # type: ignore[index] elif isinstance(key, slice): + if key == slice(None): + return type(self)._simple_new(self.sp_values, self.sp_index, self.dtype) # Avoid densifying when handling contiguous slices if key.step is None or key.step == 1: start = 0 if key.start is None else key.start diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index f9cdb50254730..75464cbb2b5f9 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -25,6 +25,7 @@ ) from pandas.core.dtypes.dtypes import ( CategoricalDtype, + DatetimeTZDtype, ExtensionDtype, IntervalDtype, NumpyEADtype, @@ -286,7 +287,9 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype) return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None) - elif new_dtype == object and isinstance(dtype, (PeriodDtype, IntervalDtype)): + elif new_dtype == object and isinstance( + dtype, (DatetimeTZDtype, PeriodDtype, IntervalDtype) + ): return False elif isinstance(dtype, CategoricalDtype) and not isinstance( diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index e3a821519c638..bddd28d29ba1c 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -285,7 +285,7 @@ def test_array(arr, attr, index_or_series): arr = getattr(arr, attr) result = getattr(result, attr) - assert result is arr + assert np.shares_memory(result, arr) def test_array_multiindex_raises(): diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 853b314e76a8e..e9389a4e57549 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -510,4 +510,9 @@ def test_readonly_propagates_to_numpy_array_method(self, data): # non-NA fill value should always result in a copy if data.isna().any(): arr = data.to_numpy(copy=False, na_value=data[0]) - assert arr.flags.writeable + if isinstance(data.dtype, pd.ArrowDtype) and data.dtype.kind == "f": + # for float dtype, after the fillna, the conversion from pyarrow to + # numpy is zero-copy, and pyarrow will mark the array as readonly + assert not arr.flags.writeable + else: + assert arr.flags.writeable diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 7c01bdef88c0a..7e224b2416e10 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -155,7 +155,6 @@ def __array__(self, dtype=None, copy=None): raise ValueError( "Unable to avoid copy while creating an array as requested." ) - if dtype is None: dtype = object if dtype == object: diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 4bc9562f1895d..895d5df8810ad 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -418,6 +418,12 @@ def test_setitem_slice_array(self, data): def test_setitem_invalid(self, data, invalid_scalar): super().test_setitem_invalid(data, invalid_scalar) + @pytest.mark.xfail( + reason="result readonly flag is incorrect and does not support na_value" + ) + def test_readonly_propagates_to_numpy_array_method(self, data): + super().test_readonly_propagates_to_numpy_array_method(data) + @pytest.mark.xfail(reason="only integer scalar arrays can be converted") def test_setitem_2d_values(self, data): super().test_setitem_2d_values(data) From 856dc0242d2f67c7caa52ec2a1d34729fb100a47 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 10:21:30 +0200 Subject: [PATCH 06/19] add tests for .array being readonly --- pandas/tests/copy_view/test_array.py | 37 +++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 2b3ef9201d918..81b77bb4abdee 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -19,10 +19,11 @@ "method", [ lambda ser: ser.values, + lambda ser: np.asarray(ser.array), lambda ser: np.asarray(ser), lambda ser: np.array(ser, copy=False), ], - ids=["values", "asarray", "array"], + ids=["values", "array", "np.asarray", "np.array"], ) def test_series_values(method): ser = Series([1, 2, 3], name="name") @@ -105,24 +106,38 @@ def test_series_to_numpy(): assert arr.flags.writeable is True -def test_series_array_ea_dtypes(): +@pytest.mark.parametrize( + "method", + [ + lambda ser: np.asarray(ser.array), + lambda ser: np.asarray(ser), + lambda ser: np.asarray(ser, dtype="int64"), + lambda ser: np.array(ser, copy=False), + ], + ids=["array", "np.asarray", "np.asarray-dtype", "np.array"], +) +def test_series_values_ea_dtypes(method): ser = Series([1, 2, 3], dtype="Int64") - arr = np.asarray(ser, dtype="int64") - assert np.shares_memory(arr, get_array(ser)) - assert arr.flags.writeable is False + arr = method(ser) - arr = np.asarray(ser) assert np.shares_memory(arr, get_array(ser)) assert arr.flags.writeable is False -def test_dataframe_array_ea_dtypes(): +@pytest.mark.parametrize( + "method", + [ + lambda df: df.values, + lambda df: np.asarray(df), + lambda df: np.asarray(df, dtype="int64"), + lambda df: np.array(df, copy=False), + ], + ids=["values", "np.asarray", "np.asarray-dtype", "np.array"], +) +def test_dataframe_array_ea_dtypes(method): df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") - arr = np.asarray(df, dtype="int64") - assert np.shares_memory(arr, get_array(df, "a")) - assert arr.flags.writeable is False + arr = method(df) - arr = np.asarray(df) assert np.shares_memory(arr, get_array(df, "a")) assert arr.flags.writeable is False From ee1ed6ed56f8ebfa1068fe9e5ae60694fa11001e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 3 Aug 2025 11:48:12 +0200 Subject: [PATCH 07/19] typing --- pandas/core/arrays/_mixins.py | 6 ++++++ pandas/core/arrays/_utils.py | 2 +- pandas/core/arrays/base.py | 6 ++++++ pandas/core/arrays/numpy_.py | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 2eb235f87d231..87eed4d8f32cc 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -115,6 +115,12 @@ def _validate_scalar(self, value): # ------------------------------------------------------------------------ + @overload + def view(self) -> Self: ... + + @overload + def view(self, dtype: Dtype | None = ...) -> ArrayLike: ... + def view(self, dtype: Dtype | None = None) -> ArrayLike: # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py index 6b46396d5efdf..5a5ac6cb48039 100644 --- a/pandas/core/arrays/_utils.py +++ b/pandas/core/arrays/_utils.py @@ -22,7 +22,7 @@ def to_numpy_dtype_inference( arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool -) -> tuple[npt.DTypeLike, Any]: +) -> tuple[np.dtype | None, Any]: if dtype is None and is_numeric_dtype(arr.dtype): dtype_given = False if hasna: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9a1bd9c453b21..4ac48716b9561 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1853,6 +1853,12 @@ def copy(self) -> Self: """ raise AbstractMethodError(self) + @overload + def view(self) -> Self: ... + + @overload + def view(self, dtype: Dtype | None = ...) -> ArrayLike: ... + def view(self, dtype: Dtype | None = None) -> ArrayLike: """ Return a view on the array. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ebede913e77fc..37c3e574124ed 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -159,7 +159,7 @@ def dtype(self) -> NumpyEADtype: # NumPy Array Interface def __array__( - self, dtype: NpDtype | None = None, copy: bool | None = None + self, dtype: np.dtype | None = None, copy: bool | None = None ) -> np.ndarray: if copy is not None: # Note: branch avoids `copy=None` for NumPy 1.x support From a7abee3f0e6982cbd30cf2f005270409cb708aa3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 7 Sep 2025 11:06:17 +0200 Subject: [PATCH 08/19] address feedback: use _values in tests + add comment --- pandas/core/arrays/sparse/array.py | 1 + pandas/tests/arrays/test_datetimelike.py | 4 ++-- pandas/tests/extension/test_numpy.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 01750e9aaae0a..2cd55aec042a1 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -993,6 +993,7 @@ def __getitem__( data_slice = self.to_dense()[key] # type: ignore[index] elif isinstance(key, slice): if key == slice(None): + # to ensure arr[:] (used by view()) does not make a copy return type(self)._simple_new(self.sp_values, self.sp_index, self.dtype) # Avoid densifying when handling contiguous slices if key.step is None or key.step == 1: diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index a2eeee1447111..111179d390953 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1248,8 +1248,8 @@ def test_invalid_nat_setitem_array(arr, non_casting_nats): @pytest.mark.parametrize( "arr", [ - pd.date_range("2000", periods=4).array.copy(), - pd.timedelta_range("2000", periods=4).array.copy(), + pd.date_range("2000", periods=4)._values, + pd.timedelta_range("2000", periods=4)._values, ], ) def test_to_numpy_extra(arr): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index e6301d9a23604..9ee9add4d10f5 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -77,7 +77,7 @@ def allow_in_pandas(monkeypatch): @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": - return pd.Series([(i,) for i in range(100)]).array.copy() + return pd.Series([(i,) for i in range(100)])._values return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) From f235aa376dd16f124e3857f16900f6fe11aa9b93 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 8 Sep 2025 22:00:57 +0200 Subject: [PATCH 09/19] update typing --- pandas/core/arrays/masked.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 22461a817d8ec..634b60f7168fd 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -517,11 +517,11 @@ def to_numpy( hasna = self._hasna dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna) if dtype is None: - dtype = object + dtype = np.dtype(object) if hasna: if ( - dtype != object + dtype != np.dtype(object) and not is_string_dtype(dtype) and na_value is libmissing.NA ): From 5cfb0f87843979a7ca792fc92107fad18f8f0943 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 9 Sep 2025 14:13:40 +0200 Subject: [PATCH 10/19] fix numpy test setup --- pandas/tests/extension/test_numpy.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 9ee9add4d10f5..eb737efd57516 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -77,8 +77,10 @@ def allow_in_pandas(monkeypatch): @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": - return pd.Series([(i,) for i in range(100)])._values - return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) + arr = pd.Series([(i,) for i in range(100)])._values + else: + arr = np.arange(1, 101, dtype=dtype._dtype) + return NumpyExtensionArray(arr) @pytest.fixture From ab4b9769424eefa190cbfc832f49a61fa9c22655 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Sep 2025 09:42:59 +0200 Subject: [PATCH 11/19] readonly -> read-only --- pandas/core/arrays/_mixins.py | 2 +- pandas/core/arrays/arrow/array.py | 2 +- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/interval.py | 2 +- pandas/core/arrays/masked.py | 2 +- pandas/core/arrays/sparse/array.py | 2 +- pandas/core/arrays/string_.py | 2 +- pandas/tests/extension/base/setitem.py | 6 +++--- pandas/tests/extension/date/array.py | 2 +- pandas/tests/extension/decimal/array.py | 2 +- pandas/tests/extension/json/array.py | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 35f1417f11ba8..6cce61365086c 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -257,7 +257,7 @@ def shift(self, periods: int = 1, fill_value=None) -> Self: def __setitem__(self, key, value) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") key = check_array_indexer(self, key) value = self._validate_setitem_value(value) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index eaa187e5d8c97..07f441318a02d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2088,7 +2088,7 @@ def __setitem__(self, key, value) -> None: None """ if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") # GH50085: unwrap 1D indexers if isinstance(key, tuple) and len(key) == 1: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 14edcc7659d62..53638ed36af46 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -480,7 +480,7 @@ def __setitem__(self, key, value) -> None: # on a copy of the data. # Check if the array is readonly if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") raise NotImplementedError(f"{type(self)} does not implement __setitem__.") diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index b3833d0c5d582..46c83242d7f21 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -730,7 +730,7 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: def __setitem__(self, key, value) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 634b60f7168fd..8cdd86e1192df 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -307,7 +307,7 @@ def _validate_setitem_value(self, value): def __setitem__(self, key, value) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 2cd55aec042a1..8cec13a25daeb 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -596,7 +596,7 @@ def __array__( def __setitem__(self, key, value) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") # I suppose we could allow setting of non-fill_value elements. # TODO(SparseArray.__setitem__): remove special cases in # ExtensionBlock.where diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index a8d10466c7c99..698083d8de143 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -815,7 +815,7 @@ def _maybe_convert_setitem_value(self, value): def __setitem__(self, key, value) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") value = self._maybe_convert_setitem_value(value) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index e9389a4e57549..51273170f8f03 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -464,13 +464,13 @@ def test_readonly_property(self, data): data_orig = data.copy() assert data_orig._readonly is False - with pytest.raises(ValueError, match="Cannot modify readonly array"): + with pytest.raises(ValueError, match="Cannot modify read-only array"): data[0] = data[1] - with pytest.raises(ValueError, match="Cannot modify readonly array"): + with pytest.raises(ValueError, match="Cannot modify read-only array"): data[0:3] = data[1] - with pytest.raises(ValueError, match="Cannot modify readonly array"): + with pytest.raises(ValueError, match="Cannot modify read-only array"): data[np.array([True] * len(data))] = data[1] tm.assert_extension_array_equal(data, data_orig) diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index e0470e630fa75..dd275b01e734e 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -149,7 +149,7 @@ def __getitem__(self, item: PositionalIndexer): def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") if not isinstance(key, int): raise NotImplementedError("only ints are supported as indexes") diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 9e01f4c460532..f103b4faf3b15 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -205,7 +205,7 @@ def astype(self, dtype, copy=True): def __setitem__(self, key, value) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") if is_list_like(value): if is_scalar(key): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 34e1fe6c47635..76ecdb64c214f 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -127,7 +127,7 @@ def __getitem__(self, item): def __setitem__(self, key, value) -> None: if self._readonly: - raise ValueError("Cannot modify readonly array") + raise ValueError("Cannot modify read-only array") if isinstance(key, numbers.Integral): self.data[key] = value From 6e08cc8fb65e1f6ad528e0abdae9cc29cf84e444 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Sep 2025 12:27:39 +0200 Subject: [PATCH 12/19] add specific tests --- pandas/core/arrays/masked.py | 2 +- .../tests/arrays/boolean/test_construction.py | 10 +++++++ pandas/tests/arrays/floating/test_to_numpy.py | 23 ++++++++++++++++ pandas/tests/arrays/integer/test_dtypes.py | 16 +++++++++++ pandas/tests/arrays/numpy_/test_numpy.py | 27 +++++++++++++++++++ pandas/tests/arrays/string_/test_string.py | 10 +++++++ pandas/tests/arrays/test_datetimelike.py | 22 +++++++++++++++ 7 files changed, 109 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 8cdd86e1192df..6ac3aef2a2ec5 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -539,7 +539,7 @@ def to_numpy( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) data = self._data.astype(dtype, copy=copy) - if self._readonly and astype_is_view(self.dtype, dtype): + if self._readonly and not copy and astype_is_view(self.dtype, dtype): data = data.view() data.flags.writeable = False return data diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index d821c52d3becb..45634aa278176 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -323,3 +323,13 @@ def test_to_numpy_copy(): result = arr.to_numpy(dtype=bool, copy=True) result[0] = False tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) + + +def test_to_numpy_readonly(): + arr = pd.array([True, False, True], dtype="boolean") + arr._readonly = True + result = arr.to_numpy(dtype=bool) + assert not result.flags.writeable + + result = arr.to_numpy(dtype="int64") + assert result.flags.writeable diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py index e954cecba417a..898ccdad81432 100644 --- a/pandas/tests/arrays/floating/test_to_numpy.py +++ b/pandas/tests/arrays/floating/test_to_numpy.py @@ -130,3 +130,26 @@ def test_to_numpy_copy(): result = arr.to_numpy(dtype="float64", copy=True) result[0] = 10 tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64")) + + +def test_to_numpy_readonly(): + arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") + arr._readonly = True + result = arr.to_numpy(dtype="float64") + assert not result.flags.writeable + + result = arr.to_numpy(dtype="float64", copy=True) + assert result.flags.writeable + + result = arr.to_numpy(dtype="float32") + assert result.flags.writeable + + result = arr.to_numpy(dtype="object") + assert result.flags.writeable + + +def test_asarray_readonly(): + arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") + arr._readonly = True + result = np.asarray(arr, copy=False) + assert not result.flags.writeable diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 1b4f070d47e4e..280ecf73597e8 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -276,6 +276,22 @@ def test_to_numpy_na_raises(dtype): a.to_numpy(dtype=dtype) +def test_to_numpy_readonly(): + arr = pd.array([0, 1], dtype="Int64") + arr._readonly = True + result = arr.to_numpy() + assert not result.flags.writeable + + result = arr.to_numpy(dtype="int64", copy=True) + assert result.flags.writeable + + result = arr.to_numpy(dtype="int32") + assert result.flags.writeable + + result = arr.to_numpy(dtype="object") + assert result.flags.writeable + + def test_astype_str(using_infer_string): a = pd.array([1, 2, None], dtype="Int64") diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index af3a793a3ef77..f9dac27b08906 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -155,6 +155,33 @@ def test_to_numpy(): tm.assert_numpy_array_equal(result, expected) +def test_to_numpy_readonly(): + arr = NumpyExtensionArray(np.array([1, 2, 3])) + arr._readonly = True + result = arr.to_numpy() + assert not result.flags.writeable + + result = arr.to_numpy(copy=True) + assert result.flags.writeable + + result = arr.to_numpy(dtype="f8") + assert result.flags.writeable + + +@pytest.mark.parametrize("dtype", [None, "int64"]) +def test_asarray_readonly(dtype): + arr = NumpyExtensionArray(np.array([1, 2, 3])) + arr._readonly = True + result = np.asarray(arr, dtype=dtype) + assert not result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=True) + assert result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=False) + assert not result.flags.writeable + + # ---------------------------------------------------------------------------- # Setitem diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index f10ebda94dc6a..44ba8741b8106 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -753,6 +753,16 @@ def test_to_numpy_na_value(dtype, nulls_fixture): tm.assert_numpy_array_equal(result, expected) +def test_to_numpy_readonly(dtype): + arr = pd.array(["a", pd.NA, "b"], dtype=dtype) + arr._readonly = True + result = arr.to_numpy() + if dtype.storage == "python": + assert not result.flags.writeable + else: + assert result.flags.writeable + + def test_isin(dtype, fixed_now_ts): s = pd.Series(["a", "b", None], dtype=dtype) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 111179d390953..6a3568184b990 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1274,6 +1274,28 @@ def test_to_numpy_extra(arr): tm.assert_equal(arr, original) +@pytest.mark.parametrize( + "arr", + [ + pd.date_range("2000", periods=4)._values, + pd.timedelta_range("2000", periods=4)._values, + ], +) +def test_to_numpy_extra_readonly(arr): + arr[0] = NaT + original = arr.copy() + arr._readonly = True + + result = arr.to_numpy(dtype=object) + assert result.flags.writeable + + # numpy does not do zero-copy conversion from M8 to i8 + result = arr.to_numpy(dtype="int64") + assert result.flags.writeable + + tm.assert_equal(arr, original) + + @pytest.mark.parametrize("as_index", [True, False]) @pytest.mark.parametrize( "values", From 5a5170ecc5101736c8c5d1babf99718304ba8fdb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Sep 2025 13:08:50 +0200 Subject: [PATCH 13/19] fix tests for old numpy and 32bit --- pandas/tests/arrays/floating/test_to_numpy.py | 15 +++++++++++++-- pandas/tests/arrays/numpy_/test_numpy.py | 5 ++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py index 898ccdad81432..79769abf3fb9d 100644 --- a/pandas/tests/arrays/floating/test_to_numpy.py +++ b/pandas/tests/arrays/floating/test_to_numpy.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.compat.numpy import np_version_gt2 + import pandas as pd import pandas._testing as tm from pandas.core.arrays import FloatingArray @@ -148,8 +150,17 @@ def test_to_numpy_readonly(): assert result.flags.writeable -def test_asarray_readonly(): +@pytest.mark.skipif(not np_version_gt2, reason="copy keyword introduced in np 2.0") +@pytest.mark.parametrize("dtype", [None, "float64"]) +def test_asarray_readonly(dtype): arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") arr._readonly = True - result = np.asarray(arr, copy=False) + + result = np.asarray(arr, dtype=dtype) + assert not result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=True) + assert result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=False) assert not result.flags.writeable diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index f9dac27b08906..1dbd03ec516cf 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.compat.numpy import np_version_gt2 + from pandas.core.dtypes.dtypes import NumpyEADtype import pandas as pd @@ -168,9 +170,10 @@ def test_to_numpy_readonly(): assert result.flags.writeable +@pytest.mark.skipif(not np_version_gt2) @pytest.mark.parametrize("dtype", [None, "int64"]) def test_asarray_readonly(dtype): - arr = NumpyExtensionArray(np.array([1, 2, 3])) + arr = NumpyExtensionArray(np.array([1, 2, 3], dtype="int64")) arr._readonly = True result = np.asarray(arr, dtype=dtype) assert not result.flags.writeable From af62da83bddbc68fb339a7586ccbab912bac5ed6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 10 Sep 2025 19:39:22 +0200 Subject: [PATCH 14/19] fixup skipif --- pandas/tests/arrays/numpy_/test_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 1dbd03ec516cf..f0e495cac4b3d 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -170,7 +170,7 @@ def test_to_numpy_readonly(): assert result.flags.writeable -@pytest.mark.skipif(not np_version_gt2) +@pytest.mark.skipif(not np_version_gt2, reason="copy keyword introduced in np 2.0") @pytest.mark.parametrize("dtype", [None, "int64"]) def test_asarray_readonly(dtype): arr = NumpyExtensionArray(np.array([1, 2, 3], dtype="int64")) From a4accf873a5c0973a54d3a3981db283c3be07171 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Sep 2025 12:34:33 +0200 Subject: [PATCH 15/19] add whatsnew note --- doc/source/whatsnew/v2.3.3.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index aaed7544d9975..ec32fbc8b224b 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -18,6 +18,16 @@ Most changes in this release are related to :class:`StringDtype` which will become the default string dtype in pandas 3.0. See :ref:`whatsnew_230.upcoming_changes` for more details. +.. _whatsnew_233.string_fixes.improvements: + +Improvements +^^^^^^^^^^^^ +- Accessing the underlying NumPy array of a DataFrame or Series will return a read-only + array if the array shares data with the original DataFrame or Series (:ref:`copy_on_write_read_only_na`). + This logic is expanded to accessing the underlying pandas ExtensionArray + through ``.array`` (or ``.values`` depending on the dtype) as well (:issue:`61925`). + + .. _whatsnew_233.string_fixes.bugs: Bug fixes From 84e83c73aa6d739c9ebf5c812bddc7860c2334bb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Sep 2025 20:05:33 +0200 Subject: [PATCH 16/19] let getitem propagate readonly property --- pandas/core/arrays/_mixins.py | 7 ++++++- pandas/core/arrays/arrow/array.py | 5 ++++- pandas/core/arrays/base.py | 16 ++++++++++++++++ pandas/core/arrays/interval.py | 5 ++++- pandas/core/arrays/masked.py | 5 ++++- pandas/core/arrays/sparse/array.py | 6 +++++- pandas/tests/extension/base/getitem.py | 7 +++++++ pandas/tests/extension/decimal/array.py | 5 ++++- pandas/tests/extension/json/array.py | 9 +++++++-- 9 files changed, 57 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 6cce61365086c..b441346d86383 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -284,7 +284,10 @@ def __getitem__( result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) - return self._from_backing_data(result) + result = self._from_backing_data(result) + if self._getitem_returns_view(key): + result._readonly = self._readonly + return result # error: Incompatible types in assignment (expression has type "ExtensionArray", # variable has type "Union[int, slice, ndarray]") @@ -295,6 +298,8 @@ def __getitem__( return self._box_func(result) result = self._from_backing_data(result) + if self._getitem_returns_view(key): + result._readonly = self._readonly return result def _pad_or_backfill( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b3ebdaaf29cd8..9189190358992 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -730,7 +730,10 @@ def __getitem__(self, item: PositionalIndexer): value = self._pa_array[item] if isinstance(value, pa.ChunkedArray): - return self._from_pyarrow_array(value) + result = self._from_pyarrow_array(value) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result else: pa_type = self._pa_array.type scalar = value.as_py() diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 2772c7cac897c..880fa21b5c73f 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -433,6 +433,22 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: """ raise AbstractMethodError(self) + def _getitem_returns_view(self, key) -> bool: + if not isinstance(key, tuple): + key = (key,) + + # filter out Ellipsis and np.newaxis + key = tuple(k for k in key if k is not Ellipsis and k is not np.newaxis) + if not key: + return True + # single integer gives view if selecting subset of 2D array + if self.ndim == 2 and lib.is_integer(key[0]): + return True + # slices always give views + if all(isinstance(k, slice) for k in key): + return True + return False + def __setitem__(self, key, value) -> None: """ Set one or more values inplace. diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index ea8ebc02dad5c..d7d37171a6496 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -724,7 +724,10 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray, # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray], # ndarray[Any, Any]]" - return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] + result = self._simple_new(left, right, dtype=self.dtype) + if self._getitem_returns_view(key): + result._readonly = self._readonly + return result # type: ignore[arg-type] def __setitem__(self, key, value) -> None: if self._readonly: diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 6ac3aef2a2ec5..1798cb501e123 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -200,7 +200,10 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: return self.dtype.na_value return self._data[item] - return self._simple_new(self._data[item], newmask) + result = self._simple_new(self._data[item], newmask) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result def _pad_or_backfill( self, diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index a1bd71b4e6881..a8c89d5300066 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1008,7 +1008,11 @@ def __getitem__( elif isinstance(key, slice): if key == slice(None): # to ensure arr[:] (used by view()) does not make a copy - return type(self)._simple_new(self.sp_values, self.sp_index, self.dtype) + result = type(self)._simple_new( + self.sp_values, self.sp_index, self.dtype + ) + result._readonly = self._readonly + return result # Avoid densifying when handling contiguous slices if key.step is None or key.step == 1: start = 0 if key.start is None else key.start diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 1f3680bf67e90..a778e1dc51ba6 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -467,3 +467,10 @@ def test_item(self, data): with pytest.raises(ValueError, match=msg): s.item() + + def test_getitem_propagates_readonly_property(self, data): + # ensure read-only propagates if getitem returns view + data._readonly = True + + result = data[:] + assert result._readonly diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index b193d4ec9cd45..42bd35b4c9c6b 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -177,7 +177,10 @@ def __getitem__(self, item): else: # array, slice. item = pd.api.indexers.check_array_indexer(self, item) - return type(self)(self._data[item]) + result = type(self)(self._data[item]) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result def take(self, indexer, allow_fill=False, fill_value=None): from pandas.api.extensions import take diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 76ecdb64c214f..9de05824837ce 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -105,10 +105,15 @@ def __getitem__(self, item): return self.data[item] elif isinstance(item, slice) and item == slice(None): # Make sure we get a view - return type(self)(self.data) + result = type(self)(self.data) + result._readonly = self._readonly + return result elif isinstance(item, slice): # slice - return type(self)(self.data[item]) + result = type(self)(self.data[item]) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result elif not is_list_like(item): # e.g. "foo" or 2.5 # exception message copied from numpy From 9f78d765d8bb02994ae8938eef4555b3c0884c90 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 8 Oct 2025 19:44:13 +0200 Subject: [PATCH 17/19] fixup merge --- doc/source/whatsnew/v3.0.0.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..7c9bde32cae61 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1232,6 +1232,10 @@ Other - Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`) - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`) - Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) +- Accessing the underlying NumPy array of a DataFrame or Series will return a read-only + array if the array shares data with the original DataFrame or Series (:ref:`copy_on_write_read_only_na`). + This logic is expanded to accessing the underlying pandas ExtensionArray + through ``.array`` (or ``.values`` depending on the dtype) as well (:issue:`61925`). .. ***DO NOT USE THIS SECTION*** From fa063bf4ff56a9193ec1a0256587824bc098d0f9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 21 Oct 2025 10:12:57 +0200 Subject: [PATCH 18/19] fix typing --- pandas/core/arrays/interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 72cca70ba909c..3ebbe1c10b95d 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -830,10 +830,10 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray, # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray], # ndarray[Any, Any]]" - result = self._simple_new(left, right, dtype=self.dtype) + result = self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] if self._getitem_returns_view(key): result._readonly = self._readonly - return result # type: ignore[arg-type] + return result def __setitem__(self, key, value) -> None: if self._readonly: From ef4a36fd8903a491bf7e61b4d5bff6ac6c1d4f1d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 21 Oct 2025 13:39:49 +0200 Subject: [PATCH 19/19] add test for fillna --- pandas/tests/extension/base/missing.py | 14 ++++++++++++++ .../tests/extension/decimal/test_decimal.py | 4 ++++ pandas/tests/extension/test_arrow.py | 15 +++++++++++++++ pandas/tests/extension/test_interval.py | 4 ++++ pandas/tests/extension/test_numpy.py | 5 +++++ pandas/tests/extension/test_sparse.py | 13 +++++++++++++ pandas/tests/extension/test_string.py | 19 +++++++++++++++++++ 7 files changed, 74 insertions(+) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index cee565d4f7c1e..c4e5094977691 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -122,6 +122,20 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data tm.assert_extension_array_equal(result, data) + def test_fillna_readonly(self, data_missing): + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + + # but with copy=False, this raises for EAs that respect the copy keyword + with pytest.raises(ValueError, match="Cannot modify read-only array"): + data.fillna(data_missing[1], copy=False) + tm.assert_extension_array_equal(data, data_missing) + def test_fillna_series(self, data_missing): fill_value = data_missing[1] ser = pd.Series(data_missing) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 538c025546426..5374121a37f63 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -171,6 +171,10 @@ def test_fillna_limit_series(self, data_missing): ): super().test_fillna_limit_series(data_missing) + @pytest.mark.xfail(reason="copy keyword is missing") + def test_fillna_readonly(self, data_missing): + super().test_fillna_readonly(data_missing) + def test_series_repr(self, data): # Overriding this base test to explicitly test that # the custom _formatter is used diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2aa1b658fdf7b..a97ee602275a2 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -687,6 +687,21 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data tm.assert_extension_array_equal(result, data) + def test_fillna_readonly(self, data_missing): + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + + # copy=False is generally not honored by ArrowExtensionArray, always + # returns new data -> same result as above + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + @pytest.mark.xfail( reason="GH 45419: pyarrow.ChunkedArray does not support views", run=False ) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index c457d702ba043..ada34e7ace680 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -100,6 +100,10 @@ def test_fillna_limit_series(self, data_missing): def test_fillna_length_mismatch(self, data_missing): super().test_fillna_length_mismatch(data_missing) + @pytest.mark.xfail(reason="copy=False is not Implemented") + def test_fillna_readonly(self, data_missing): + super().test_fillna_readonly(data_missing) + @pytest.mark.filterwarnings( "ignore:invalid value encountered in cast:RuntimeWarning" ) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index ba71e5c963de0..c3f619e4263df 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -342,6 +342,11 @@ def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) + @skip_nested + def test_fillna_readonly(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_readonly(data_missing) + @skip_nested def test_setitem_invalid(self, data, invalid_scalar): # object dtype can hold anything, so doesn't raise diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 72f9a13910cc2..9203dfbb0d76e 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -237,6 +237,19 @@ def test_isna(self, data_missing): def test_fillna_no_op_returns_copy(self, data, request): super().test_fillna_no_op_returns_copy(data) + def test_fillna_readonly(self, data_missing): + # copy=False keyword is not ignored by SparseArray.fillna + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + + # copy=False is ignored -> so same result as above + result = data.fillna(data_missing[1], copy=False) + assert result[0] == data_missing[1] + @pytest.mark.xfail(reason="Unsupported") def test_fillna_series(self, data_missing): # this one looks doable. diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 45d1e30648423..3382711c20450 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -169,6 +169,25 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data tm.assert_extension_array_equal(result, data) + def test_fillna_readonly(self, data_missing): + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + + # copy=False is generally not honored by Arrow-backed array, always + # returns new data -> same result as above + if data.dtype.storage == "pyarrow": + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + else: + with pytest.raises(ValueError, match="Cannot modify read-only array"): + data.fillna(data_missing[1], copy=False) + tm.assert_extension_array_equal(data, data_missing) + def _get_expected_exception( self, op_name: str, obj, other ) -> type[Exception] | tuple[type[Exception], ...] | None: