9090 ExtensionArray ,
9191)
9292from pandas .core .arrays ._mixins import NDArrayBackedExtensionArray
93+ from pandas .core .arrays .string_ import StringDtype
9394import pandas .core .common as com
9495from pandas .core .construction import (
9596 ensure_wrapped_if_datetimelike ,
@@ -2399,21 +2400,9 @@ def _factorize_keys(
23992400 rk = ensure_int64 (rk .codes )
24002401
24012402 elif isinstance (lk , ExtensionArray ) and lk .dtype == rk .dtype :
2402- if not isinstance (lk , BaseMaskedArray ) and not (
2403- # exclude arrow dtypes that would get cast to object
2404- isinstance (lk .dtype , ArrowDtype )
2405- and (
2406- is_numeric_dtype (lk .dtype .numpy_dtype )
2407- or is_string_dtype (lk .dtype )
2408- and not sort
2409- )
2403+ if (isinstance (lk .dtype , ArrowDtype ) and is_string_dtype (lk .dtype )) or (
2404+ isinstance (lk .dtype , StringDtype ) and lk .dtype .storage == "pyarrow"
24102405 ):
2411- lk , _ = lk ._values_for_factorize ()
2412-
2413- # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
2414- # "_values_for_factorize"
2415- rk , _ = rk ._values_for_factorize () # type: ignore[union-attr]
2416- elif isinstance (lk .dtype , ArrowDtype ) and is_string_dtype (lk .dtype ):
24172406 import pyarrow as pa
24182407 import pyarrow .compute as pc
24192408
@@ -2436,6 +2425,21 @@ def _factorize_keys(
24362425 return rlab , llab , count
24372426 return llab , rlab , count
24382427
2428+ if not isinstance (lk , BaseMaskedArray ) and not (
2429+ # exclude arrow dtypes that would get cast to object
2430+ isinstance (lk .dtype , ArrowDtype )
2431+ and (
2432+ is_numeric_dtype (lk .dtype .numpy_dtype )
2433+ or is_string_dtype (lk .dtype )
2434+ and not sort
2435+ )
2436+ ):
2437+ lk , _ = lk ._values_for_factorize ()
2438+
2439+ # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
2440+ # "_values_for_factorize"
2441+ rk , _ = rk ._values_for_factorize () # type: ignore[union-attr]
2442+
24392443 if needs_i8_conversion (lk .dtype ) and lk .dtype == rk .dtype :
24402444 # GH#23917 TODO: Needs tests for non-matching dtypes
24412445 # GH#23917 TODO: needs tests for case where lk is integer-dtype
0 commit comments