diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 44bc82008e718..d9fcc72d07d13 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1240,6 +1240,10 @@ Other - Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`) - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`) - Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) +- Accessing the underlying NumPy array of a DataFrame or Series will return a read-only + array if the array shares data with the original DataFrame or Series (:ref:`copy_on_write_read_only_na`). + This logic is expanded to accessing the underlying pandas ExtensionArray + through ``.array`` (or ``.values`` depending on the dtype) as well (:issue:`61925`). .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 567bfc02a2950..8b53e842a7988 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(object[:] values, object val, object op) -> ndarray: +def scalar_binop(ndarray[object] values, object val, object op) -> ndarray: """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(object[:] left, object[:] right, object op) -> ndarray: +def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray: """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 84aca81420fe1..27eeda325a8b4 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -258,6 +258,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self: return self._from_backing_data(new_values) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + key = check_array_indexer(self, key) value = self._validate_setitem_value(value) self._ndarray[key] = value @@ -283,7 +286,10 @@ def __getitem__( result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) - return self._from_backing_data(result) + result = self._from_backing_data(result) + if self._getitem_returns_view(key): + result._readonly = self._readonly + return result # error: Incompatible types in assignment (expression has type "ExtensionArray", # variable has type "Union[int, slice, ndarray]") @@ -294,6 +300,8 @@ def __getitem__( return self._box_func(result) result = self._from_backing_data(result) + if self._getitem_returns_view(key): + result._readonly = self._readonly return result def _pad_or_backfill( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 08f1ffdcc6a0d..8317831c2542b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -790,7 +790,10 @@ def __getitem__(self, item: PositionalIndexer): value = self._pa_array[item] if isinstance(value, pa.ChunkedArray): - return self._from_pyarrow_array(value) + result = self._from_pyarrow_array(value) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result else: pa_type = self._pa_array.type scalar = value.as_py() @@ -2188,6 +2191,9 @@ def __setitem__(self, key, value) -> None: ------- None """ + if self._readonly: + raise ValueError("Cannot modify read-only array") + # GH50085: unwrap 1D indexers if isinstance(key, tuple) and len(key) == 1: key = key[0] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e091ecf18668d..8309fb984fd21 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -37,6 +37,7 @@ validate_insert_loc, ) +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.common import ( is_list_like, is_scalar, @@ -268,6 +269,8 @@ class ExtensionArray: # strictly less than 2000 to be below Index.__pandas_priority__. __pandas_priority__ = 1000 + _readonly = False + # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ @@ -430,6 +433,22 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: """ raise AbstractMethodError(self) + def _getitem_returns_view(self, key) -> bool: + if not isinstance(key, tuple): + key = (key,) + + # filter out Ellipsis and np.newaxis + key = tuple(k for k in key if k is not Ellipsis and k is not np.newaxis) + if not key: + return True + # single integer gives view if selecting subset of 2D array + if self.ndim == 2 and lib.is_integer(key[0]): + return True + # slices always give views + if all(isinstance(k, slice) for k in key): + return True + return False + def __setitem__(self, key, value) -> None: """ Set one or more values inplace. @@ -454,6 +473,11 @@ def __setitem__(self, key, value) -> None: Returns ------- None + + Raises + ------ + ValueError + If the array is readonly and modification is attempted. """ # Some notes to the ExtensionArray implementer who may have ended up # here. While this method is not required for the interface, if you @@ -473,6 +497,10 @@ def __setitem__(self, key, value) -> None: # __init__ method coerces that value, then so should __setitem__ # Note, also, that Series/DataFrame.where internally use __setitem__ # on a copy of the data. + # Check if the array is readonly + if self._readonly: + raise ValueError("Cannot modify read-only array") + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") def __len__(self) -> int: @@ -567,8 +595,14 @@ def to_numpy( result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: result = result.copy() + elif self._readonly and astype_is_view(self.dtype, result.dtype): + # If the ExtensionArray is readonly, make the numpy array readonly too + result = result.view() + result.flags.writeable = False + if na_value is not lib.no_default: result[self.isna()] = na_value # type: ignore[index] + return result # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 97113d98a2aab..b7401bbff0f39 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -368,7 +368,12 @@ def __array__( if copy is True: return np.array(self._ndarray, dtype=dtype) - return self._ndarray + + result = self._ndarray + if self._readonly: + result = result.view() + result.flags.writeable = False + return result @overload def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ... diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index b0472c70557e5..3ebbe1c10b95d 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -830,9 +830,15 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray, # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray], # ndarray[Any, Any]]" - return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] + result = self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] + if self._getitem_returns_view(key): + result._readonly = self._readonly + return result def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index cdba53662e6fa..0ec6d1a7b8469 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -27,6 +27,7 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import doc +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( @@ -206,7 +207,10 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: return self.dtype.na_value return self._data[item] - return self._simple_new(self._data[item], newmask) + result = self._simple_new(self._data[item], newmask) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result def _pad_or_backfill( self, @@ -312,6 +316,9 @@ def _validate_setitem_value(self, value): raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'") def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + key = check_array_indexer(self, key) if is_scalar(value): @@ -524,11 +531,11 @@ def to_numpy( hasna = self._hasna dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna) if dtype is None: - dtype = object + dtype = np.dtype(object) if hasna: if ( - dtype != object + dtype != np.dtype(object) and not is_string_dtype(dtype) and na_value is libmissing.NA ): @@ -546,6 +553,9 @@ def to_numpy( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) data = self._data.astype(dtype, copy=copy) + if self._readonly and not copy and astype_is_view(self.dtype, dtype): + data = data.view() + data.flags.writeable = False return data @doc(ExtensionArray.tolist) @@ -622,7 +632,12 @@ def __array__( if copy is False: if not self._hasna: # special case, here we can simply return the underlying data - return np.array(self._data, dtype=dtype, copy=copy) + result = np.array(self._data, dtype=dtype, copy=copy) + # If the ExtensionArray is readonly, make the numpy array readonly too + if self._readonly: + result = result.view() + result.flags.writeable = False + return result raise ValueError( "Unable to avoid copy while creating an array as requested." ) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index eca47d3c9657f..9d174a49b5db1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -14,7 +14,10 @@ from pandas._libs.tslibs import is_supported_dtype from pandas.compat.numpy import function as nv -from pandas.core.dtypes.astype import astype_array +from pandas.core.dtypes.astype import ( + astype_array, + astype_is_view, +) from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, maybe_downcast_to_dtype, @@ -179,12 +182,23 @@ def dtype(self) -> NumpyEADtype: # NumPy Array Interface def __array__( - self, dtype: NpDtype | None = None, copy: bool | None = None + self, dtype: np.dtype | None = None, copy: bool | None = None ) -> np.ndarray: if copy is not None: # Note: branch avoids `copy=None` for NumPy 1.x support - return np.array(self._ndarray, dtype=dtype, copy=copy) - return np.asarray(self._ndarray, dtype=dtype) + result = np.array(self._ndarray, dtype=dtype, copy=copy) + else: + result = np.asarray(self._ndarray, dtype=dtype) + + if ( + self._readonly + and not copy + and (dtype is None or astype_is_view(self.dtype, dtype)) + ): + result = result.view() + result.flags.writeable = False + + return result def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # Lightly modified version of @@ -545,6 +559,9 @@ def to_numpy( result[mask] = na_value else: result = self._ndarray + if not copy and self._readonly: + result = result.view() + result.flags.writeable = False result = np.asarray(result, dtype=dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 18e4ff31164ac..da9a70ac51379 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -397,7 +397,11 @@ def __array__( # For NumPy 1.x compatibility we cannot use copy=None. And # `copy=False` has the meaning of `copy=None` here: if not copy: - return np.asarray(self.asi8, dtype=dtype) + result = np.asarray(self.asi8, dtype=dtype) + if self._readonly: + result = result.view() + result.flags.writeable = False + return result else: return np.array(self.asi8, dtype=dtype) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 83e16f5d4b8db..34521b1b97bab 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -568,7 +568,11 @@ def __array__( if copy is True: return np.array(self.sp_values) else: - return self.sp_values + result = self.sp_values + if self._readonly: + result = result.view() + result.flags.writeable = False + return result if copy is False: raise ValueError( @@ -597,6 +601,8 @@ def __array__( return out def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") # I suppose we could allow setting of non-fill_value elements. # TODO(SparseArray.__setitem__): remove special cases in # ExtensionBlock.where @@ -983,6 +989,13 @@ def __getitem__( elif isinstance(key, tuple): data_slice = self.to_dense()[key] elif isinstance(key, slice): + if key == slice(None): + # to ensure arr[:] (used by view()) does not make a copy + result = type(self)._simple_new( + self.sp_values, self.sp_index, self.dtype + ) + result._readonly = self._readonly + return result # Avoid densifying when handling contiguous slices if key.step is None or key.step == 1: start = 0 if key.start is None else key.start diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index ec591d7711fa9..8d47eea8001fd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -804,6 +804,9 @@ def _maybe_convert_setitem_value(self, value): return value def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + value = self._maybe_convert_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..75464cbb2b5f9 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -24,8 +24,12 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, ExtensionDtype, + IntervalDtype, NumpyEADtype, + PeriodDtype, ) if TYPE_CHECKING: @@ -283,6 +287,16 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype) return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None) + elif new_dtype == object and isinstance( + dtype, (DatetimeTZDtype, PeriodDtype, IntervalDtype) + ): + return False + + elif isinstance(dtype, CategoricalDtype) and not isinstance( + new_dtype, CategoricalDtype + ): + return False + numpy_dtype = getattr(dtype, "numpy_dtype", None) new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c716bd4e636bd..088b323ad3acd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4976,6 +4976,8 @@ def array(self) -> ExtensionArray: from pandas.core.arrays.numpy_ import NumpyExtensionArray array = NumpyExtensionArray(array) + array = array.view() + array._readonly = True return array @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 54b89c3bbe48c..786b86d41bb48 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2377,7 +2377,9 @@ def external_values(values: ArrayLike) -> ArrayLike: if isinstance(values, np.ndarray): values = values.view() values.flags.writeable = False - - # TODO(CoW) we should also mark our ExtensionArrays as read-only + else: + # ExtensionArrays + values = values.view() + values._readonly = True return values diff --git a/pandas/core/series.py b/pandas/core/series.py index f3aaee26fe470..bf86a1ecc49c0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -821,7 +821,10 @@ def _references(self) -> BlockValuesRefs: @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[prop-decorator] @property def array(self) -> ExtensionArray: - return self._mgr.array_values() + arr = self._mgr.array_values() + arr = arr.view() + arr._readonly = True + return arr def __len__(self) -> int: """ diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index d821c52d3becb..45634aa278176 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -323,3 +323,13 @@ def test_to_numpy_copy(): result = arr.to_numpy(dtype=bool, copy=True) result[0] = False tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) + + +def test_to_numpy_readonly(): + arr = pd.array([True, False, True], dtype="boolean") + arr._readonly = True + result = arr.to_numpy(dtype=bool) + assert not result.flags.writeable + + result = arr.to_numpy(dtype="int64") + assert result.flags.writeable diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py index fc9e260923d32..b573ecd5ebf07 100644 --- a/pandas/tests/arrays/floating/test_to_numpy.py +++ b/pandas/tests/arrays/floating/test_to_numpy.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.compat.numpy import np_version_gt2 + import pandas as pd import pandas._testing as tm from pandas.core.arrays import FloatingArray @@ -142,3 +144,35 @@ def test_to_numpy_copy(): result = arr.to_numpy(dtype="float64", copy=True) result[0] = 10 tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64")) + + +def test_to_numpy_readonly(): + arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") + arr._readonly = True + result = arr.to_numpy(dtype="float64") + assert not result.flags.writeable + + result = arr.to_numpy(dtype="float64", copy=True) + assert result.flags.writeable + + result = arr.to_numpy(dtype="float32") + assert result.flags.writeable + + result = arr.to_numpy(dtype="object") + assert result.flags.writeable + + +@pytest.mark.skipif(not np_version_gt2, reason="copy keyword introduced in np 2.0") +@pytest.mark.parametrize("dtype", [None, "float64"]) +def test_asarray_readonly(dtype): + arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") + arr._readonly = True + + result = np.asarray(arr, dtype=dtype) + assert not result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=True) + assert result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=False) + assert not result.flags.writeable diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 1b4f070d47e4e..280ecf73597e8 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -276,6 +276,22 @@ def test_to_numpy_na_raises(dtype): a.to_numpy(dtype=dtype) +def test_to_numpy_readonly(): + arr = pd.array([0, 1], dtype="Int64") + arr._readonly = True + result = arr.to_numpy() + assert not result.flags.writeable + + result = arr.to_numpy(dtype="int64", copy=True) + assert result.flags.writeable + + result = arr.to_numpy(dtype="int32") + assert result.flags.writeable + + result = arr.to_numpy(dtype="object") + assert result.flags.writeable + + def test_astype_str(using_infer_string): a = pd.array([1, 2, None], dtype="Int64") diff --git a/pandas/tests/arrays/integer/test_indexing.py b/pandas/tests/arrays/integer/test_indexing.py index 4b953d699108b..ce801db5cb58d 100644 --- a/pandas/tests/arrays/integer/test_indexing.py +++ b/pandas/tests/arrays/integer/test_indexing.py @@ -12,7 +12,7 @@ def test_array_setitem_nullable_boolean_mask(): def test_array_setitem(): # GH 31446 - arr = pd.Series([1, 2], dtype="Int64").array + arr = pd.array([1, 2], dtype="Int64") arr[arr > 1] = 1 expected = pd.array([1, 1], dtype="Int64") diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 620a553d5a731..f0e495cac4b3d 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.compat.numpy import np_version_gt2 + from pandas.core.dtypes.dtypes import NumpyEADtype import pandas as pd @@ -155,15 +157,36 @@ def test_to_numpy(): tm.assert_numpy_array_equal(result, expected) -# ---------------------------------------------------------------------------- -# Setitem +def test_to_numpy_readonly(): + arr = NumpyExtensionArray(np.array([1, 2, 3])) + arr._readonly = True + result = arr.to_numpy() + assert not result.flags.writeable + + result = arr.to_numpy(copy=True) + assert result.flags.writeable + result = arr.to_numpy(dtype="f8") + assert result.flags.writeable -def test_setitem_series(): - ser = pd.Series([1, 2, 3]) - ser.array[0] = 10 - expected = pd.Series([10, 2, 3]) - tm.assert_series_equal(ser, expected) + +@pytest.mark.skipif(not np_version_gt2, reason="copy keyword introduced in np 2.0") +@pytest.mark.parametrize("dtype", [None, "int64"]) +def test_asarray_readonly(dtype): + arr = NumpyExtensionArray(np.array([1, 2, 3], dtype="int64")) + arr._readonly = True + result = np.asarray(arr, dtype=dtype) + assert not result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=True) + assert result.flags.writeable + + result = np.asarray(arr, dtype=dtype, copy=False) + assert not result.flags.writeable + + +# ---------------------------------------------------------------------------- +# Setitem def test_setitem(any_numpy_array): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 04abfafde6995..14f39694c7463 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -746,6 +746,16 @@ def test_to_numpy_na_value(dtype, nulls_fixture): tm.assert_numpy_array_equal(result, expected) +def test_to_numpy_readonly(dtype): + arr = pd.array(["a", pd.NA, "b"], dtype=dtype) + arr._readonly = True + result = arr.to_numpy() + if dtype.storage == "python": + assert not result.flags.writeable + else: + assert result.flags.writeable + + def test_isin(dtype, fixed_now_ts): s = pd.Series(["a", "b", None], dtype=dtype) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d1ef29b0bf8a0..6a3568184b990 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1248,8 +1248,8 @@ def test_invalid_nat_setitem_array(arr, non_casting_nats): @pytest.mark.parametrize( "arr", [ - pd.date_range("2000", periods=4).array, - pd.timedelta_range("2000", periods=4).array, + pd.date_range("2000", periods=4)._values, + pd.timedelta_range("2000", periods=4)._values, ], ) def test_to_numpy_extra(arr): @@ -1274,6 +1274,28 @@ def test_to_numpy_extra(arr): tm.assert_equal(arr, original) +@pytest.mark.parametrize( + "arr", + [ + pd.date_range("2000", periods=4)._values, + pd.timedelta_range("2000", periods=4)._values, + ], +) +def test_to_numpy_extra_readonly(arr): + arr[0] = NaT + original = arr.copy() + arr._readonly = True + + result = arr.to_numpy(dtype=object) + assert result.flags.writeable + + # numpy does not do zero-copy conversion from M8 to i8 + result = arr.to_numpy(dtype="int64") + assert result.flags.writeable + + tm.assert_equal(arr, original) + + @pytest.mark.parametrize("as_index", [True, False]) @pytest.mark.parametrize( "values", diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 3d075857c3fd9..2f1ae66299356 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -283,7 +283,7 @@ def test_array(arr, attr, index_or_series): arr = getattr(arr, attr) result = getattr(result, attr) - assert result is arr + assert np.shares_memory(result, arr) def test_array_multiindex_raises(): diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index ec983e60e312d..aed7afb8fb123 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -19,10 +19,11 @@ "method", [ lambda ser: ser.values, + lambda ser: np.asarray(ser.array), lambda ser: np.asarray(ser), lambda ser: np.array(ser, copy=False), ], - ids=["values", "asarray", "array"], + ids=["values", "array", "np.asarray", "np.array"], ) def test_series_values(method): ser = Series([1, 2, 3], name="name") @@ -105,24 +106,38 @@ def test_series_to_numpy(): assert arr.flags.writeable is True -def test_series_array_ea_dtypes(): +@pytest.mark.parametrize( + "method", + [ + lambda ser: np.asarray(ser.array), + lambda ser: np.asarray(ser), + lambda ser: np.asarray(ser, dtype="int64"), + lambda ser: np.array(ser, copy=False), + ], + ids=["array", "np.asarray", "np.asarray-dtype", "np.array"], +) +def test_series_values_ea_dtypes(method): ser = Series([1, 2, 3], dtype="Int64") - arr = np.asarray(ser, dtype="int64") - assert np.shares_memory(arr, get_array(ser)) - assert arr.flags.writeable is False + arr = method(ser) - arr = np.asarray(ser) assert np.shares_memory(arr, get_array(ser)) assert arr.flags.writeable is False -def test_dataframe_array_ea_dtypes(): +@pytest.mark.parametrize( + "method", + [ + lambda df: df.values, + lambda df: np.asarray(df), + lambda df: np.asarray(df, dtype="int64"), + lambda df: np.array(df, copy=False), + ], + ids=["values", "np.asarray", "np.asarray-dtype", "np.array"], +) +def test_dataframe_array_ea_dtypes(method): df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") - arr = np.asarray(df, dtype="int64") - assert np.shares_memory(arr, get_array(df, "a")) - assert arr.flags.writeable is False + arr = method(df) - arr = np.asarray(df) assert np.shares_memory(arr, get_array(df, "a")) assert arr.flags.writeable is False diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 09e8d8ba93092..6b8d579b98fd2 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -467,3 +467,10 @@ def test_item(self, data): with pytest.raises(ValueError, match=msg): s.item() + + def test_getitem_propagates_readonly_property(self, data): + # ensure read-only propagates if getitem returns view + data._readonly = True + + result = data[:] + assert result._readonly diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index cee565d4f7c1e..c4e5094977691 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -122,6 +122,20 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data tm.assert_extension_array_equal(result, data) + def test_fillna_readonly(self, data_missing): + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + + # but with copy=False, this raises for EAs that respect the copy keyword + with pytest.raises(ValueError, match="Cannot modify read-only array"): + data.fillna(data_missing[1], copy=False) + tm.assert_extension_array_equal(data, data_missing) + def test_fillna_series(self, data_missing): fill_value = data_missing[1] ser = pd.Series(data_missing) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index b273c9b9f092a..775542019182b 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -472,3 +472,65 @@ def test_setitem_2d_values(self, data): df.loc[[0, 1], :] = df.loc[[1, 0], :].values assert (df.loc[0, :] == original[1]).all() assert (df.loc[1, :] == original[0]).all() + + def test_readonly_property(self, data): + assert data._readonly is False + + data._readonly = True + assert data._readonly is True + + data_orig = data.copy() + assert data_orig._readonly is False + + with pytest.raises(ValueError, match="Cannot modify read-only array"): + data[0] = data[1] + + with pytest.raises(ValueError, match="Cannot modify read-only array"): + data[0:3] = data[1] + + with pytest.raises(ValueError, match="Cannot modify read-only array"): + data[np.array([True] * len(data))] = data[1] + + tm.assert_extension_array_equal(data, data_orig) + + def test_readonly_propagates_to_numpy_array(self, data): + data._readonly = True + + # when we ask for a copy, the result should never be readonly + arr = np.array(data) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = np.asarray(data) + arr2 = np.asarray(data) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + def test_readonly_propagates_to_numpy_array_method(self, data): + data._readonly = True + + # when we ask for a copy, the result should never be readonly + arr = data.to_numpy(copy=True) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = data.to_numpy(copy=False) + arr2 = data.to_numpy(copy=False) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + # non-NA fill value should always result in a copy + if data.isna().any(): + arr = data.to_numpy(copy=False, na_value=data[0]) + if isinstance(data.dtype, pd.ArrowDtype) and data.dtype.kind == "f": + # for float dtype, after the fillna, the conversion from pyarrow to + # numpy is zero-copy, and pyarrow will mark the array as readonly + assert not arr.flags.writeable + else: + assert arr.flags.writeable diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index d99adba0ef8c0..dd275b01e734e 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -148,6 +148,9 @@ def __getitem__(self, item: PositionalIndexer): raise NotImplementedError("only ints are supported as indexes") def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + if not isinstance(key, int): raise NotImplementedError("only ints are supported as indexes") diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index aaba6295e5815..d6fb31b776930 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -177,7 +177,10 @@ def __getitem__(self, item): else: # array, slice. item = pd.api.indexers.check_array_indexer(self, item) - return type(self)(self._data[item]) + result = type(self)(self._data[item]) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result def take(self, indexer, allow_fill=False, fill_value=None): from pandas.api.extensions import take @@ -203,6 +206,9 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + if is_list_like(value): if is_scalar(key): raise ValueError("setting an array element with a sequence.") diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 538c025546426..5374121a37f63 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -171,6 +171,10 @@ def test_fillna_limit_series(self, data_missing): ): super().test_fillna_limit_series(data_missing) + @pytest.mark.xfail(reason="copy keyword is missing") + def test_fillna_readonly(self, data_missing): + super().test_fillna_readonly(data_missing) + def test_series_repr(self, data): # Overriding this base test to explicitly test that # the custom _formatter is used diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 828e4415bd295..937648ab97b6b 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -105,10 +105,15 @@ def __getitem__(self, item): return self.data[item] elif isinstance(item, slice) and item == slice(None): # Make sure we get a view - return type(self)(self.data) + result = type(self)(self.data) + result._readonly = self._readonly + return result elif isinstance(item, slice): # slice - return type(self)(self.data[item]) + result = type(self)(self.data[item]) + if self._getitem_returns_view(item): + result._readonly = self._readonly + return result elif not is_list_like(item): # e.g. "foo" or 2.5 # exception message copied from numpy @@ -126,6 +131,9 @@ def __getitem__(self, item): return type(self)([self.data[i] for i in item]) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify read-only array") + if isinstance(key, numbers.Integral): self.data[key] = value else: @@ -158,7 +166,6 @@ def __array__(self, dtype=None, copy=None): raise ValueError( "Unable to avoid copy while creating an array as requested." ) - if dtype is None: dtype = object if dtype == object: diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index cdb98c5342ecb..3848719197362 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -415,6 +415,12 @@ def test_setitem_slice_array(self, data): def test_setitem_invalid(self, data, invalid_scalar): super().test_setitem_invalid(data, invalid_scalar) + @pytest.mark.xfail( + reason="result readonly flag is incorrect and does not support na_value" + ) + def test_readonly_propagates_to_numpy_array_method(self, data): + super().test_readonly_propagates_to_numpy_array_method(data) + @pytest.mark.xfail(reason="only integer scalar arrays can be converted") def test_setitem_2d_values(self, data): super().test_setitem_2d_values(data) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2aa1b658fdf7b..a97ee602275a2 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -687,6 +687,21 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data tm.assert_extension_array_equal(result, data) + def test_fillna_readonly(self, data_missing): + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + + # copy=False is generally not honored by ArrowExtensionArray, always + # returns new data -> same result as above + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + @pytest.mark.xfail( reason="GH 45419: pyarrow.ChunkedArray does not support views", run=False ) diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 40192cbc83a01..aa5c91f478c45 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -106,5 +106,5 @@ def test_ellipsis_index(): # String comparison because there's no native way to compare slices. # Before the fix for GH#42430, last_item_arg would get set to the 2D slice # (Ellipsis, slice(None, 1, None)) - out = df["col1"].array.last_item_arg + out = df["col1"]._values.last_item_arg assert str(out) == "slice(None, 1, None)" diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index c457d702ba043..ada34e7ace680 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -100,6 +100,10 @@ def test_fillna_limit_series(self, data_missing): def test_fillna_length_mismatch(self, data_missing): super().test_fillna_length_mismatch(data_missing) + @pytest.mark.xfail(reason="copy=False is not Implemented") + def test_fillna_readonly(self, data_missing): + super().test_fillna_readonly(data_missing) + @pytest.mark.filterwarnings( "ignore:invalid value encountered in cast:RuntimeWarning" ) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 691ce9341b788..c3f619e4263df 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -77,8 +77,10 @@ def allow_in_pandas(monkeypatch): @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": - return pd.Series([(i,) for i in range(10)]).array - return NumpyExtensionArray(np.arange(1, 11, dtype=dtype._dtype)) + arr = pd.Series([(i,) for i in range(10)])._values + else: + arr = np.arange(1, 11, dtype=dtype._dtype) + return NumpyExtensionArray(arr) @pytest.fixture @@ -340,6 +342,11 @@ def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) + @skip_nested + def test_fillna_readonly(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_readonly(data_missing) + @skip_nested def test_setitem_invalid(self, data, invalid_scalar): # object dtype can hold anything, so doesn't raise diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 72f9a13910cc2..9203dfbb0d76e 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -237,6 +237,19 @@ def test_isna(self, data_missing): def test_fillna_no_op_returns_copy(self, data, request): super().test_fillna_no_op_returns_copy(data) + def test_fillna_readonly(self, data_missing): + # copy=False keyword is not ignored by SparseArray.fillna + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + + # copy=False is ignored -> so same result as above + result = data.fillna(data_missing[1], copy=False) + assert result[0] == data_missing[1] + @pytest.mark.xfail(reason="Unsupported") def test_fillna_series(self, data_missing): # this one looks doable. diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 45d1e30648423..3382711c20450 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -169,6 +169,25 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data tm.assert_extension_array_equal(result, data) + def test_fillna_readonly(self, data_missing): + data = data_missing.copy() + data._readonly = True + + # by default copy=True, then this works fine + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + tm.assert_extension_array_equal(data, data_missing) + + # copy=False is generally not honored by Arrow-backed array, always + # returns new data -> same result as above + if data.dtype.storage == "pyarrow": + result = data.fillna(data_missing[1]) + assert result[0] == data_missing[1] + else: + with pytest.raises(ValueError, match="Cannot modify read-only array"): + data.fillna(data_missing[1], copy=False) + tm.assert_extension_array_equal(data, data_missing) + def _get_expected_exception( self, op_name: str, obj, other ) -> type[Exception] | tuple[type[Exception], ...] | None: diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0add072a63053..fdd34332179bc 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1348,7 +1348,7 @@ def check_series_setitem(self, elem, index: Index, inplace: bool): ser[: len(elem)] = elem if inplace: - assert ser.array is arr # i.e. setting was done inplace + assert ser._values is arr # i.e. setting was done inplace else: assert ser.dtype == object diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3c3a06583883f..018ae06562148 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -530,9 +530,9 @@ def test_categorical_sideeffects_free(self): # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=False) - assert s.values is cat + assert s._values is cat s = s.cat.rename_categories([1, 2, 3]) - assert s.values is not cat + assert s._values is not cat exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s)