Skip to content

Commit 4bdac7c

Browse files
[backport 2.3.x] String dtype: more informative repr (keeping brief __str__) (#61148) (#62329)
1 parent 6690762 commit 4bdac7c

File tree

5 files changed

+25
-19
lines changed

5 files changed

+25
-19
lines changed

pandas/core/arrays/string_.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,10 @@ class StringDtype(StorageExtensionDtype):
119119
Examples
120120
--------
121121
>>> pd.StringDtype()
122-
string[python]
122+
<StringDtype(storage='python', na_value=<NA>)>
123123
124124
>>> pd.StringDtype(storage="pyarrow")
125-
string[pyarrow]
125+
<StringDtype(na_value=<NA>)>
126126
"""
127127

128128
@property
@@ -194,11 +194,8 @@ def __init__(
194194
self._na_value = na_value
195195

196196
def __repr__(self) -> str:
197-
if self._na_value is libmissing.NA:
198-
return f"{self.name}[{self.storage}]"
199-
else:
200-
# TODO add more informative repr
201-
return self.name
197+
storage = "" if self.storage == "pyarrow" else "storage='python', "
198+
return f"<StringDtype({storage}na_value={self._na_value})>"
202199

203200
def __eq__(self, other: object) -> bool:
204201
# we need to override the base class __eq__ because na_value (NA or NaN)

pandas/core/generic.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7021,12 +7021,12 @@ def convert_dtypes(
70217021
2 3 z <NA> <NA> 20 200.0
70227022
70237023
>>> dfn.dtypes
7024-
a Int32
7025-
b string[python]
7026-
c boolean
7027-
d string[python]
7028-
e Int64
7029-
f Float64
7024+
a Int32
7025+
b string
7026+
c boolean
7027+
d string
7028+
e Int64
7029+
f Float64
70307030
dtype: object
70317031
70327032
Start with a Series of strings and missing data represented by ``np.nan``.

pandas/io/formats/format.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@
6666
ExtensionArray,
6767
TimedeltaArray,
6868
)
69-
from pandas.core.arrays.string_ import StringDtype
7069
from pandas.core.base import PandasObject
7170
import pandas.core.common as com
7271
from pandas.core.indexes.api import (
@@ -1232,8 +1231,6 @@ def _format(x):
12321231
return self.na_rep
12331232
elif isinstance(x, PandasObject):
12341233
return str(x)
1235-
elif isinstance(x, StringDtype):
1236-
return repr(x)
12371234
else:
12381235
# object dtype
12391236
return str(formatter(x))

pandas/tests/arrays/string_/test_string.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,18 @@ def test_repr(dtype):
123123
assert repr(df.A.array) == expected
124124

125125

126+
def test_dtype_repr(dtype):
127+
if dtype.storage == "pyarrow":
128+
if dtype.na_value is pd.NA:
129+
assert repr(dtype) == "<StringDtype(na_value=<NA>)>"
130+
else:
131+
assert repr(dtype) == "<StringDtype(na_value=nan)>"
132+
elif dtype.na_value is pd.NA:
133+
assert repr(dtype) == "<StringDtype(storage='python', na_value=<NA>)>"
134+
else:
135+
assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
136+
137+
126138
def test_none_to_nan(cls, dtype):
127139
a = cls._from_sequence(["a", None, "b"], dtype=dtype)
128140
assert a[1] is not None

pandas/tests/io/formats/test_to_string.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -758,9 +758,9 @@ def test_to_string_string_dtype(self):
758758
result = df.dtypes.to_string()
759759
expected = dedent(
760760
"""\
761-
x string[pyarrow]
762-
y string[python]
763-
z int64[pyarrow]"""
761+
x string
762+
y string
763+
z int64[pyarrow]"""
764764
)
765765
assert result == expected
766766

0 commit comments

Comments
 (0)