Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
a9df51b
CoW: add readonly flag to ExtensionArrays, return read-only EA/ndarra…
jorisvandenbossche Jul 22, 2025
9cd6e4f
cleanup
jorisvandenbossche Jul 22, 2025
c6f37d1
fixup attribute name in tests
jorisvandenbossche Jul 22, 2025
8058d9a
fix tests
jorisvandenbossche Jul 23, 2025
91465ee
more test fixes
jorisvandenbossche Jul 23, 2025
856dc02
add tests for .array being readonly
jorisvandenbossche Jul 23, 2025
828fadc
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Aug 3, 2025
ee1ed6e
typing
jorisvandenbossche Aug 3, 2025
3f7bc3e
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Aug 19, 2025
1765fe7
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Sep 7, 2025
a7abee3
address feedback: use _values in tests + add comment
jorisvandenbossche Sep 7, 2025
4b6ced0
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Sep 8, 2025
f235aa3
update typing
jorisvandenbossche Sep 8, 2025
f76bbc8
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Sep 8, 2025
5cfb0f8
fix numpy test setup
jorisvandenbossche Sep 9, 2025
ab4b976
readonly -> read-only
jorisvandenbossche Sep 10, 2025
6e08cc8
add specific tests
jorisvandenbossche Sep 10, 2025
5a5170e
fix tests for old numpy and 32bit
jorisvandenbossche Sep 10, 2025
af62da8
fixup skipif
jorisvandenbossche Sep 10, 2025
0af4b39
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Sep 12, 2025
959451e
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Sep 21, 2025
a4accf8
add whatsnew note
jorisvandenbossche Sep 21, 2025
d5d4db4
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Sep 21, 2025
84e83c7
let getitem propagate readonly property
jorisvandenbossche Sep 21, 2025
6ec9b06
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Oct 8, 2025
9f78d76
fixup merge
jorisvandenbossche Oct 8, 2025
f6f300e
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Oct 10, 2025
c243f75
Merge remote-tracking branch 'upstream/main' into cow-ea-readonly
jorisvandenbossche Oct 21, 2025
fa063bf
fix typing
jorisvandenbossche Oct 21, 2025
ef4a36f
add test for fillna
jorisvandenbossche Oct 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,10 @@ Other
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
- Accessing the underlying NumPy array of a DataFrame or Series will return a read-only
array if the array shares data with the original DataFrame or Series (:ref:`copy_on_write_read_only_na`).
This logic is expanded to accessing the underlying pandas ExtensionArray
through ``.array`` (or ``.values`` depending on the dtype) as well (:issue:`61925`).

.. ***DO NOT USE THIS SECTION***

Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_binop(object[:] values, object val, object op) -> ndarray:
def scalar_binop(ndarray[object] values, object val, object op) -> ndarray:
"""
Apply the given binary operator `op` between each element of the array
`values` and the scalar `val`.
Expand Down Expand Up @@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray:

@cython.wraparound(False)
@cython.boundscheck(False)
def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
"""
Apply the given binary operator `op` pointwise to the elements of
arrays `left` and `right`.
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self:
return self._from_backing_data(new_values)

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify read-only array")

key = check_array_indexer(self, key)
value = self._validate_setitem_value(value)
self._ndarray[key] = value
Expand All @@ -283,7 +286,10 @@ def __getitem__(
result = self._ndarray[key]
if self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)
result = self._from_backing_data(result)
if self._getitem_returns_view(key):
result._readonly = self._readonly
return result

# error: Incompatible types in assignment (expression has type "ExtensionArray",
# variable has type "Union[int, slice, ndarray]")
Expand All @@ -294,6 +300,8 @@ def __getitem__(
return self._box_func(result)

result = self._from_backing_data(result)
if self._getitem_returns_view(key):
result._readonly = self._readonly
return result

def _pad_or_backfill(
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,10 @@ def __getitem__(self, item: PositionalIndexer):

value = self._pa_array[item]
if isinstance(value, pa.ChunkedArray):
return self._from_pyarrow_array(value)
result = self._from_pyarrow_array(value)
if self._getitem_returns_view(item):
result._readonly = self._readonly
return result
else:
pa_type = self._pa_array.type
scalar = value.as_py()
Expand Down Expand Up @@ -2188,6 +2191,9 @@ def __setitem__(self, key, value) -> None:
-------
None
"""
if self._readonly:
raise ValueError("Cannot modify read-only array")

# GH50085: unwrap 1D indexers
if isinstance(key, tuple) and len(key) == 1:
key = key[0]
Expand Down
34 changes: 34 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
validate_insert_loc,
)

from pandas.core.dtypes.astype import astype_is_view
from pandas.core.dtypes.common import (
is_list_like,
is_scalar,
Expand Down Expand Up @@ -268,6 +269,8 @@ class ExtensionArray:
# strictly less than 2000 to be below Index.__pandas_priority__.
__pandas_priority__ = 1000

_readonly = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use arr.flags.writeable to be consistent with numpy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this was easier for a quick POC ;)
It would indeed keep it more consistent in usage, so that might be a reason to add a flags attribute, so code that needs to work with both ndarray or EA can use one code path. But I don't think we would ever add any of the other flags that numpy has, so not sure it would then be worth to add a nested attribute for this.


# ------------------------------------------------------------------------
# Constructors
# ------------------------------------------------------------------------
Expand Down Expand Up @@ -430,6 +433,22 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
"""
raise AbstractMethodError(self)

def _getitem_returns_view(self, key) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we expect anyone to override this? i.e. does it need to be a method? or just convenient to put it here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess this makes it easy for subclass authors to use

if not isinstance(key, tuple):
key = (key,)

# filter out Ellipsis and np.newaxis
key = tuple(k for k in key if k is not Ellipsis and k is not np.newaxis)
if not key:
return True
# single integer gives view if selecting subset of 2D array
if self.ndim == 2 and lib.is_integer(key[0]):
return True
# slices always give views
if all(isinstance(k, slice) for k in key):
return True
return False

def __setitem__(self, key, value) -> None:
"""
Set one or more values inplace.
Expand All @@ -454,6 +473,11 @@ def __setitem__(self, key, value) -> None:
Returns
-------
None

Raises
------
ValueError
If the array is readonly and modification is attempted.
"""
# Some notes to the ExtensionArray implementer who may have ended up
# here. While this method is not required for the interface, if you
Expand All @@ -473,6 +497,10 @@ def __setitem__(self, key, value) -> None:
# __init__ method coerces that value, then so should __setitem__
# Note, also, that Series/DataFrame.where internally use __setitem__
# on a copy of the data.
# Check if the array is readonly
if self._readonly:
raise ValueError("Cannot modify read-only array")

raise NotImplementedError(f"{type(self)} does not implement __setitem__.")

def __len__(self) -> int:
Expand Down Expand Up @@ -567,8 +595,14 @@ def to_numpy(
result = np.asarray(self, dtype=dtype)
if copy or na_value is not lib.no_default:
result = result.copy()
elif self._readonly and astype_is_view(self.dtype, result.dtype):
# If the ExtensionArray is readonly, make the numpy array readonly too
result = result.view()
result.flags.writeable = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be done below the setting of na_value on L616?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, because in that case the result array is already a copy, so no need to take a read-only view in that case


if na_value is not lib.no_default:
result[self.isna()] = na_value # type: ignore[index]

return result

# ------------------------------------------------------------------------
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,12 @@ def __array__(

if copy is True:
return np.array(self._ndarray, dtype=dtype)
return self._ndarray

result = self._ndarray
if self._readonly:
result = result.view()
result.flags.writeable = False
return result

@overload
def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ...
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,9 +830,15 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA:
# "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray,
# ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray],
# ndarray[Any, Any]]"
return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]
result = self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]
if self._getitem_returns_view(key):
result._readonly = self._readonly
return result

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify read-only array")

value_left, value_right = self._validate_setitem_value(value)
key = check_array_indexer(self, key)

Expand Down
23 changes: 19 additions & 4 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc

from pandas.core.dtypes.astype import astype_is_view
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -206,7 +207,10 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
return self.dtype.na_value
return self._data[item]

return self._simple_new(self._data[item], newmask)
result = self._simple_new(self._data[item], newmask)
if self._getitem_returns_view(item):
result._readonly = self._readonly
return result

def _pad_or_backfill(
self,
Expand Down Expand Up @@ -312,6 +316,9 @@ def _validate_setitem_value(self, value):
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify read-only array")

key = check_array_indexer(self, key)

if is_scalar(value):
Expand Down Expand Up @@ -524,11 +531,11 @@ def to_numpy(
hasna = self._hasna
dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
if dtype is None:
dtype = object
dtype = np.dtype(object)

if hasna:
if (
dtype != object
dtype != np.dtype(object)
and not is_string_dtype(dtype)
and na_value is libmissing.NA
):
Expand All @@ -546,6 +553,9 @@ def to_numpy(
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
data = self._data.astype(dtype, copy=copy)
if self._readonly and not copy and astype_is_view(self.dtype, dtype):
data = data.view()
data.flags.writeable = False
return data

@doc(ExtensionArray.tolist)
Expand Down Expand Up @@ -622,7 +632,12 @@ def __array__(
if copy is False:
if not self._hasna:
# special case, here we can simply return the underlying data
return np.array(self._data, dtype=dtype, copy=copy)
result = np.array(self._data, dtype=dtype, copy=copy)
# If the ExtensionArray is readonly, make the numpy array readonly too
if self._readonly:
result = result.view()
result.flags.writeable = False
return result
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)
Expand Down
25 changes: 21 additions & 4 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
from pandas._libs.tslibs import is_supported_dtype
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.astype import astype_array
from pandas.core.dtypes.astype import (
astype_array,
astype_is_view,
)
from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike,
maybe_downcast_to_dtype,
Expand Down Expand Up @@ -179,12 +182,23 @@ def dtype(self) -> NumpyEADtype:
# NumPy Array Interface

def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
self, dtype: np.dtype | None = None, copy: bool | None = None
) -> np.ndarray:
if copy is not None:
# Note: branch avoids `copy=None` for NumPy 1.x support
return np.array(self._ndarray, dtype=dtype, copy=copy)
return np.asarray(self._ndarray, dtype=dtype)
result = np.array(self._ndarray, dtype=dtype, copy=copy)
else:
result = np.asarray(self._ndarray, dtype=dtype)

if (
self._readonly
and not copy
and (dtype is None or astype_is_view(self.dtype, dtype))
):
result = result.view()
result.flags.writeable = False

return result

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# Lightly modified version of
Expand Down Expand Up @@ -545,6 +559,9 @@ def to_numpy(
result[mask] = na_value
else:
result = self._ndarray
if not copy and self._readonly:
result = result.view()
result.flags.writeable = False

result = np.asarray(result, dtype=dtype)

Expand Down
6 changes: 5 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,11 @@ def __array__(
# For NumPy 1.x compatibility we cannot use copy=None. And
# `copy=False` has the meaning of `copy=None` here:
if not copy:
return np.asarray(self.asi8, dtype=dtype)
result = np.asarray(self.asi8, dtype=dtype)
if self._readonly:
result = result.view()
result.flags.writeable = False
return result
else:
return np.array(self.asi8, dtype=dtype)

Expand Down
15 changes: 14 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,11 @@ def __array__(
if copy is True:
return np.array(self.sp_values)
else:
return self.sp_values
result = self.sp_values
if self._readonly:
result = result.view()
result.flags.writeable = False
return result

if copy is False:
raise ValueError(
Expand Down Expand Up @@ -597,6 +601,8 @@ def __array__(
return out

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify read-only array")
# I suppose we could allow setting of non-fill_value elements.
# TODO(SparseArray.__setitem__): remove special cases in
# ExtensionBlock.where
Expand Down Expand Up @@ -983,6 +989,13 @@ def __getitem__(
elif isinstance(key, tuple):
data_slice = self.to_dense()[key]
elif isinstance(key, slice):
if key == slice(None):
# to ensure arr[:] (used by view()) does not make a copy
result = type(self)._simple_new(
self.sp_values, self.sp_index, self.dtype
)
result._readonly = self._readonly
return result
# Avoid densifying when handling contiguous slices
if key.step is None or key.step == 1:
start = 0 if key.start is None else key.start
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,9 @@ def _maybe_convert_setitem_value(self, value):
return value

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify read-only array")

value = self._maybe_convert_setitem_value(value)

key = check_array_indexer(self, key)
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
IntervalDtype,
NumpyEADtype,
PeriodDtype,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -283,6 +287,16 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype)
return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None)

elif new_dtype == object and isinstance(
dtype, (DatetimeTZDtype, PeriodDtype, IntervalDtype)
):
return False

elif isinstance(dtype, CategoricalDtype) and not isinstance(
new_dtype, CategoricalDtype
):
return False

numpy_dtype = getattr(dtype, "numpy_dtype", None)
new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None)

Expand Down
Loading
Loading