diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 68ca06564d3a6..3b1af77e5f70e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -73,10 +73,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Period.freq GL08" \ -i "pandas.Period.ordinal GL08" \ -i "pandas.errors.IncompatibleFrequency SA01,SS06,EX01" \ + -i "pandas.errors.InvalidVersion GL08" \ -i "pandas.api.extensions.ExtensionArray.value_counts EX01,RT03,SA01" \ - -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \ - -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \ - -i "pandas.core.resample.Resampler.quantile PR01,PR07" \ + -i "pandas.api.typing.DataFrameGroupBy.plot PR02" \ + -i "pandas.api.typing.SeriesGroupBy.plot PR02" \ + -i "pandas.api.typing.Resampler.quantile PR01,PR07" \ + -i "pandas.arrays.NumpyExtensionArray GL08" \ -i "pandas.tseries.offsets.BDay PR02,SA01" \ -i "pandas.tseries.offsets.BHalfYearBegin.is_on_offset GL08" \ -i "pandas.tseries.offsets.BHalfYearBegin.n GL08" \ diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 004651ac0074f..181ae030e1227 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -5,7 +5,7 @@ ======= GroupBy ======= -.. currentmodule:: pandas.core.groupby +.. currentmodule:: pandas.api.typing :class:`pandas.api.typing.DataFrameGroupBy` and :class:`pandas.api.typing.SeriesGroupBy` instances are returned by groupby calls :func:`pandas.DataFrame.groupby` and @@ -40,7 +40,7 @@ Function application helper NamedAgg -.. currentmodule:: pandas.core.groupby +.. currentmodule:: pandas.api.typing Function application -------------------- diff --git a/doc/source/reference/resampling.rst b/doc/source/reference/resampling.rst index 2e0717081b129..b6cc8c6a1addb 100644 --- a/doc/source/reference/resampling.rst +++ b/doc/source/reference/resampling.rst @@ -5,7 +5,7 @@ ========== Resampling ========== -.. currentmodule:: pandas.core.resample +.. currentmodule:: pandas.api.typing :class:`pandas.api.typing.Resampler` instances are returned by resample calls: :func:`pandas.DataFrame.resample`, :func:`pandas.Series.resample`. diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst index 2bd63f02faf69..80ecf236a0daf 100644 --- a/doc/source/reference/window.rst +++ b/doc/source/reference/window.rst @@ -17,7 +17,7 @@ calls: :func:`pandas.DataFrame.ewm` and :func:`pandas.Series.ewm`. Rolling window functions ------------------------ -.. currentmodule:: pandas.core.window.rolling +.. currentmodule:: pandas.api.typing .. autosummary:: :toctree: api/ @@ -48,7 +48,8 @@ Rolling window functions Weighted window functions ------------------------- -.. currentmodule:: pandas.core.window.rolling +.. currentmodule:: pandas.api.typing + .. autosummary:: :toctree: api/ @@ -62,7 +63,8 @@ Weighted window functions Expanding window functions -------------------------- -.. currentmodule:: pandas.core.window.expanding +.. currentmodule:: pandas.api.typing + .. autosummary:: :toctree: api/ @@ -93,7 +95,8 @@ Expanding window functions Exponentially-weighted window functions --------------------------------------- -.. currentmodule:: pandas.core.window.ewm +.. currentmodule:: pandas.api.typing + .. autosummary:: :toctree: api/ diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index 9c37f317a805e..a500fd6baac2c 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -455,7 +455,7 @@ by evaluate arithmetic and boolean expression all at once for large :class:`~pan :func:`~pandas.eval` is many orders of magnitude slower for smaller expressions or objects than plain Python. A good rule of thumb is to only use :func:`~pandas.eval` when you have a - :class:`~pandas.core.frame.DataFrame` with more than 10,000 rows. + :class:`~pandas.DataFrame` with more than 10,000 rows. Supported syntax ~~~~~~~~~~~~~~~~ diff --git a/meson.build b/meson.build index 6a00e52481108..156dbb6c63e7c 100644 --- a/meson.build +++ b/meson.build @@ -18,6 +18,9 @@ versioneer = files('generate_version.py') add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language: 'c') add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language: 'cpp') +# Enables settings __module__ on cdef classes +# https://github.com/cython/cython/issues/7231 +add_project_arguments('-DCYTHON_USE_TYPE_SPECS=1', language: 'c') # Allow supporting older numpys than the version compiled against # Set the define to the min supported version of numpy for pandas diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 6d9e2a69abf47..35949d7683abc 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -117,6 +117,8 @@ class OptionError(AttributeError, KeyError): OptionError: No such option """ + __module__ = "pandas.errors" + # # User API @@ -441,6 +443,8 @@ def __dir__(self) -> list[str]: options = DictWrapper(_global_config) +# DictWrapper defines a custom setattr +object.__setattr__(options, "__module__", "pandas") # # Functions for use by pandas developers, in addition to User - api diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 5d0876591a151..a1cd4c9d15447 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -382,6 +382,7 @@ cdef class Interval(IntervalMixin): >>> year_2017.length Timedelta('365 days 00:00:00') """ + __module__ = "pandas" _typ = "interval" __array_priority__ = 1000 @@ -444,6 +445,7 @@ cdef class Interval(IntervalMixin): >>> interval.closed 'left' """ + __module__ = "pandas" def __init__(self, left, right, str closed="right"): # note: it is faster to just do these checks than to use a special diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 039720017aa7b..ae0bbeb7afa4c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2944,7 +2944,9 @@ class _NoDefault(Enum): # Note: no_default is exported to the public API in pandas.api.extensions no_default = _NoDefault.no_default # Sentinel indicating the default value. +no_default.__module__ = "pandas.api.extensions" NoDefault = Literal[_NoDefault.no_default] +NoDefault.__module__ = "pandas.api.typing" @cython.boundscheck(False) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index a7aea7e766304..a67c533d03e0e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -393,7 +393,7 @@ class NAType(C_NAType): >>> True | pd.NA True """ - __module__ = "pandas" + __module__ = "pandas.api.typing" _instance = None @@ -546,3 +546,4 @@ class NAType(C_NAType): C_NA = NAType() # C-visible NA = C_NA # Python-visible +NA.__module__ = "pandas" diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 3e22c13e37f76..a0265297fe873 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -372,7 +372,7 @@ class NaTType(_NaT): 1 NaT """ - __module__ = "pandas" + __module__ = "pandas.api.typing" def __new__(cls): cdef _NaT base @@ -1871,6 +1871,7 @@ default 'raise' c_NaT = NaTType() # C-visible NaT = c_NaT # Python-visible +NaT.__module__ = "pandas" # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 1b7f04fe17238..0fc7a6945d2e0 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -192,6 +192,7 @@ class OutOfBoundsDatetime(ValueError): OutOfBoundsDatetime: Parsing "08335394550" to datetime overflows, at position 0 """ + __module__ = "pandas.errors" pass @@ -212,6 +213,7 @@ class OutOfBoundsTimedelta(ValueError): OutOfBoundsTimedelta: Cannot cast 139999 days 00:00:00 to unit='ns' without overflow. """ + __module__ = "pandas.errors" # Timedelta analogue to OutOfBoundsDatetime pass diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index a44d819c7899a..ad3fb72d31559 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1819,6 +1819,8 @@ class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): >>> ts + pd.DateOffset(hour=8) Timestamp('2017-01-01 08:10:11') """ + __module__ = "pandas" + def __setattr__(self, name, value): raise AttributeError("DateOffset objects are immutable.") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 95a18f8cb2cad..facf430060e73 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1631,6 +1631,7 @@ class IncompatibleFrequency(TypeError): Raised when trying to compare or operate between Periods with different frequencies. """ + __module__ = "pandas.errors" pass diff --git a/pandas/_typing.py b/pandas/_typing.py index c9af531fd90b7..23598bd2bc517 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -107,6 +107,8 @@ class SequenceNotStr(Protocol[_T_co]): + __module__: str = "pandas.api.typing.aliases" + @overload def __getitem__(self, index: SupportsIndex, /) -> _T_co: ... @@ -278,12 +280,16 @@ def tell(self) -> int: class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]): + __module__: str = "pandas.api.typing.aliases" + def read(self, n: int = ..., /) -> AnyStr_co: # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File ... class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]): + __module__: str = "pandas.api.typing.aliases" + def write(self, b: AnyStr_contra, /) -> Any: # for gzip.GzipFile, bz2.BZ2File ... @@ -294,14 +300,20 @@ def flush(self) -> Any: class ReadPickleBuffer(ReadBuffer[bytes], Protocol): + __module__: str = "pandas.api.typing.aliases" + def readline(self) -> bytes: ... class WriteExcelBuffer(WriteBuffer[bytes], Protocol): + __module__: str = "pandas.api.typing.aliases" + def truncate(self, size: int | None = ..., /) -> int: ... class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol): + __module__: str = "pandas.api.typing.aliases" + def __iter__(self) -> Iterator[AnyStr_co]: # for engine=python ... diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b305cbfaa3a1e..468f24a07cb4a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -88,6 +88,8 @@ class BaseExecutionEngine(abc.ABC): simply runs the code with the Python interpreter and pandas. """ + __module__ = "pandas.api.executors" + @staticmethod @abc.abstractmethod def map( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 63052c4f4fea9..08f1ffdcc6a0d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -296,6 +296,8 @@ class ArrowExtensionArray( Length: 3, dtype: int64[pyarrow] """ # noqa: E501 (http link too long) + __module__ = "pandas.arrays" + _pa_array: pa.ChunkedArray _dtype: ArrowDtype diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e8ca51ef92a94..e091ecf18668d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -256,6 +256,8 @@ class ExtensionArray: https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/list/array.py """ + __module__ = "pandas.api.extensions" + # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. _typ = "extension" @@ -1806,7 +1808,7 @@ def take( .. code-block:: python def take(self, indices, allow_fill=False, fill_value=None): - from pandas.core.algorithms import take + from pandas.api.extensions import take # If the ExtensionArray is backed by an ndarray, then # just pass that here instead of coercing to object. @@ -2812,6 +2814,8 @@ class ExtensionScalarOpsMixin(ExtensionOpsMixin): with NumPy arrays. """ + __module__ = "pandas.api.extensions" + @classmethod def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None): """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index aca2cafe80889..aa28e846413f0 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -304,6 +304,8 @@ class BooleanArray(BaseMaskedArray): Length: 3, dtype: boolean """ + __module__ = "pandas.arrays" + _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d59499ed99c75..b570cc90e4948 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -361,6 +361,8 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi 'c' """ + __module__ = "pandas" + # For comparisons, so that numpy uses our implementation if the compare # ops, which raise __array_priority__ = 1000 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 64d0347aa815e..4cf5f4b13890e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -223,6 +223,8 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): Length: 2, dtype: datetime64[s] """ + __module__ = "pandas.arrays" + _typ = "datetimearray" _internal_fill_value = np.datetime64("NaT", "ns") _recognized_scalars = (datetime, np.datetime64) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index ed6ed6b22ad48..e547c3fe76089 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -129,6 +129,8 @@ class FloatingArray(NumericArray): Length: 3, dtype: Float32 """ + __module__ = "pandas.arrays" + _dtype_cls = FloatingDtype diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 366b508f9d400..7a8ca85a83db5 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -142,6 +142,8 @@ class IntegerArray(NumericArray): Length: 3, dtype: UInt16 """ + __module__ = "pandas.arrays" + _dtype_cls = IntegerDtype diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index ace868bda52d3..b0472c70557e5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -243,6 +243,8 @@ class IntervalArray(IntervalMixin, ExtensionArray): :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. """ + __module__ = "pandas.arrays" + can_hold_na = True _na_value = _fill_value = np.nan diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index fab51ffa56919..eca47d3c9657f 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -57,6 +57,7 @@ class NumpyExtensionArray( NDArrayBackedExtensionArray, ObjectStringArrayMixin, ): + __module__ = "pandas.arrays" """ A pandas ExtensionArray for NumPy data. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 90388336ba83d..18e4ff31164ac 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -177,6 +177,8 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] Length: 2, dtype: period[D] """ + __module__ = "pandas.arrays" + # array priority higher than numpy scalars __array_priority__ = 1000 _typ = "periodarray" # ABCPeriodArray diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index e6ff67af78700..396a8b67c2d2d 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -380,6 +380,8 @@ class SparseArray(OpsMixin, PandasObject, ExtensionArray): Indices: array([2, 3], dtype=int32) """ + __module__ = "pandas.arrays" + _subtyp = "sparse_array" # register ABCSparseArray _hidden_attrs = PandasObject._hidden_attrs | frozenset([]) _sparse_index: SparseIndex diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index efcfddbebb0d4..ec591d7711fa9 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -633,6 +633,8 @@ class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] Length: 3, dtype: boolean """ + __module__ = "pandas.arrays" + # undo the NumpyExtensionArray hack _typ = "extension" diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 9046d83dcc09f..489556536ca31 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -125,6 +125,8 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr Length: 4, dtype: string """ + __module__ = "pandas.arrays" + # error: Incompatible types in assignment (expression has type "StringDtype", # base class "ArrowExtensionArray" defined the type as "ArrowDtype") _dtype: StringDtype # type: ignore[assignment] diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 2b942041edf89..9b3452c318f9c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -147,6 +147,8 @@ class TimedeltaArray(dtl.TimelikeOps): Length: 2, dtype: timedelta64[ns] """ + __module__ = "pandas.arrays" + _typ = "timedeltaarray" _internal_fill_value = np.timedelta64("NaT", "ns") _recognized_scalars = (timedelta, np.timedelta64, Tick) diff --git a/pandas/core/col.py b/pandas/core/col.py index eec1d241df92d..0182188dd0317 100644 --- a/pandas/core/col.py +++ b/pandas/core/col.py @@ -76,6 +76,8 @@ class Expression: This is not meant to be instantiated directly. Instead, use :meth:`pandas.col`. """ + __module__ = "pandas.api.typing" + def __init__(self, func: Callable[[DataFrame], Any], repr_str: str) -> None: self._func = func self._repr_str = repr_str diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index ae48ad153e2d6..709f96125da39 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -111,6 +111,8 @@ class property**. provided for registering virtual subclasses. """ + __module__ = "pandas.api.extensions" + _metadata: tuple[str, ...] = () def __str__(self) -> str: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 447b7f90f2314..fc7cc59ecfb6a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -359,7 +359,7 @@ def is_datetime64tz_dtype(arr_or_dtype) -> bool: >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas import DatetimeTZDtype >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") >>> s = pd.Series([], dtype=dtype) >>> is_datetime64tz_dtype(dtype) @@ -407,7 +407,7 @@ def is_timedelta64_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_timedelta64_dtype + >>> from pandas.api.types import is_timedelta64_dtype >>> is_timedelta64_dtype(object) False >>> is_timedelta64_dtype(np.timedelta64) @@ -452,7 +452,7 @@ def is_period_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_period_dtype + >>> from pandas.api.types import is_period_dtype >>> is_period_dtype(object) False >>> is_period_dtype(pd.PeriodDtype(freq="D")) @@ -507,7 +507,7 @@ def is_interval_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_interval_dtype + >>> from pandas.api.types import is_interval_dtype >>> is_interval_dtype(object) False >>> is_interval_dtype(pd.IntervalDtype()) @@ -684,10 +684,10 @@ def is_dtype_equal(source, target) -> bool: True >>> is_dtype_equal(object, "category") False - >>> from pandas.core.dtypes.dtypes import CategoricalDtype + >>> from pandas.api.types import CategoricalDtype >>> is_dtype_equal(CategoricalDtype(), "category") True - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas.api.types import DatetimeTZDtype >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") False """ @@ -811,7 +811,7 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_signed_integer_dtype + >>> from pandas.api.types import is_signed_integer_dtype >>> is_signed_integer_dtype(str) False >>> is_signed_integer_dtype(int) @@ -1006,7 +1006,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool: Examples -------- >>> from pandas.api.types import is_datetime64_any_dtype - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas.api.types import DatetimeTZDtype >>> is_datetime64_any_dtype(str) False >>> is_datetime64_any_dtype(int) @@ -1066,7 +1066,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: Examples -------- >>> from pandas.api.types import is_datetime64_ns_dtype - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas.api.types import DatetimeTZDtype >>> is_datetime64_ns_dtype(str) False >>> is_datetime64_ns_dtype(int) @@ -1121,7 +1121,7 @@ def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_timedelta64_ns_dtype + >>> from pandas.api.types import is_timedelta64_ns_dtype >>> is_timedelta64_ns_dtype(np.dtype("m8[ns]")) True >>> is_timedelta64_ns_dtype(np.dtype("m8[ps]")) # Wrong frequency diff --git a/pandas/core/flags.py b/pandas/core/flags.py index eceb86dc61d9f..a98380e9f7d16 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -55,6 +55,8 @@ class Flags: """ + __module__ = "pandas" + _keys: set[str] = {"allows_duplicate_labels"} def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 95bb51f29db40..a45ce1f385e4d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -253,6 +253,8 @@ class Grouper: Freq: 17min, dtype: int64 """ + __module__ = "pandas" + sort: bool dropna: bool _grouper: Index | None diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 99130da641efb..2c2413c74f2fa 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -58,6 +58,8 @@ class BaseIndexer: 4 4.0 """ + __module__ = "pandas.api.indexers" + def __init__( self, index_array: np.ndarray | None = None, window_size: int = 0, **kwargs ) -> None: @@ -271,6 +273,8 @@ class VariableOffsetWindowIndexer(BaseIndexer): 2020-01-10 9.0 """ + __module__ = "pandas.api.indexers" + def __init__( self, index_array: np.ndarray | None = None, @@ -478,6 +482,8 @@ class FixedForwardWindowIndexer(BaseIndexer): 4 4.0 """ + __module__ = "pandas.api.indexers" + def get_window_bounds( self, num_values: int = 0, diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index f75699a9b745f..fe06e235e0fd9 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -26,6 +26,8 @@ class FrozenList(PandasObject, list): for lookups, appropriately, etc. """ + __module__ = "pandas.api.typing" + # Side note: This has to be of type list. Otherwise, # it messes up PyTables type checks. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 4560d3cc3479a..a817cae51be5c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -137,6 +137,8 @@ class RangeIndex(Index): [] """ + __module__ = "pandas" + _typ = "rangeindex" _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") _range: range diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9246309c0c7f1..3f9749f1f7a99 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -30,7 +30,6 @@ from pandas.errors.cow import _chained_assignment_msg from pandas.util._decorators import ( doc, - set_module, ) from pandas.core.dtypes.cast import ( @@ -104,7 +103,6 @@ # the public IndexSlicerMaker -@set_module("pandas") class _IndexSlice: """ Create an object to more easily perform multi-index slicing. @@ -153,6 +151,7 @@ def __getitem__(self, arg): IndexSlice = _IndexSlice() +IndexSlice.__module__ = "pandas" class IndexingMixin: diff --git a/pandas/core/interchange/dataframe_protocol.py b/pandas/core/interchange/dataframe_protocol.py index 95e7b6a26f93a..15bd323d5fade 100644 --- a/pandas/core/interchange/dataframe_protocol.py +++ b/pandas/core/interchange/dataframe_protocol.py @@ -377,6 +377,8 @@ class DataFrame(ABC): to the dataframe interchange protocol specification. """ + __module__ = "pandas.api.interchange" + version = 0 # version of the protocol @abstractmethod diff --git a/pandas/core/resample.py b/pandas/core/resample.py index c85f6b36f0947..3a4ce952ffdcf 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -133,6 +133,8 @@ class Resampler(BaseGroupBy, PandasObject): After resampling, see aggregate, apply, and transform functions. """ + __module__ = "pandas.api.typing" + _grouper: BinGrouper _timegrouper: TimeGrouper binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass @@ -2173,6 +2175,8 @@ class DatetimeIndexResamplerGroupby( # type: ignore[misc] Provides a resample of a groupby implementation """ + __module__ = "pandas.api.typing" + @property def _resampler_cls(self): return DatetimeIndexResampler @@ -2274,6 +2278,8 @@ class PeriodIndexResamplerGroupby( # type: ignore[misc] Provides a resample of a groupby implementation. """ + __module__ = "pandas.api.typing" + @property def _resampler_cls(self): return PeriodIndexResampler @@ -2310,6 +2316,8 @@ class TimedeltaIndexResamplerGroupby( # type: ignore[misc] Provides a resample of a groupby implementation. """ + __module__ = "pandas.api.typing" + @property def _resampler_cls(self): return TimedeltaIndexResampler @@ -2357,6 +2365,8 @@ class TimeGrouper(Grouper): If axis is PeriodIndex """ + __module__ = "pandas.api.typing" + _attributes = Grouper._attributes + ( "closed", "label", diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 73e4de6ea6208..1ea05e24d0db5 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -316,6 +316,8 @@ class ExponentialMovingWindow(BaseWindow): 4 3.233686 """ + __module__ = "pandas.api.typing" + _attributes = [ "com", "span", @@ -907,6 +909,8 @@ class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow) Provide an exponential moving window groupby implementation. """ + __module__ = "pandas.api.typing" + _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes def __init__(self, obj, *args, _grouper=None, **kwargs) -> None: diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index afab2295a8f69..567ede27949d5 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -106,6 +106,8 @@ class Expanding(RollingAndExpandingMixin): 4 7.0 """ + __module__ = "pandas.api.typing" + _attributes: list[str] = ["min_periods", "method"] def __init__( @@ -1455,6 +1457,8 @@ class ExpandingGroupby(BaseWindowGroupby, Expanding): Provide a expanding groupby implementation. """ + __module__ = "pandas.api.typing" + _attributes = Expanding._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d3c417a008916..e6f84941f6b1a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1111,6 +1111,8 @@ class Window(BaseWindow): 2020-01-03 2020-01-02 6.0 """ + __module__ = "pandas.api.typing" + _attributes = [ "window", "min_periods", @@ -1330,7 +1332,7 @@ def sum(self, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. @@ -1390,7 +1392,7 @@ def mean(self, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. @@ -1452,7 +1454,7 @@ def var(self, ddof: int = 1, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. @@ -1507,7 +1509,7 @@ def std(self, ddof: int = 1, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. @@ -1968,6 +1970,7 @@ def corr_func(x, y): class Rolling(RollingAndExpandingMixin): + __module__ = "pandas.api.typing" _attributes: list[str] = [ "window", "min_periods", @@ -3534,6 +3537,8 @@ class RollingGroupby(BaseWindowGroupby, Rolling): Provide a rolling groupby implementation. """ + __module__ = "pandas.api.typing" + _attributes = Rolling._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 481f6a3a0aa61..bfa61253c9c1f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -829,6 +829,8 @@ class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): whole document. """ + __module__ = "pandas.api.typing" + def __init__( self, filepath_or_buffer, diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 46b22310cbfca..936cc4868daf2 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -42,6 +42,8 @@ class SASReader(Iterator["DataFrame"], ABC): Abstract class for XportReader and SAS7BDATReader. """ + __module__ = "pandas.api.typing" + @abstractmethod def read(self, nrows: int | None = None) -> DataFrame: ... diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1a62427b08057..1f953650365ef 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1019,6 +1019,7 @@ def __init__(self) -> None: class StataReader(StataParser, abc.Iterator): + __module__ = "pandas.api.typing" __doc__ = _stata_reader_doc _path_or_buf: IO[bytes] # Will be assigned by `_open_file`. diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b46af93c447d4..2084088d6a823 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -971,6 +971,8 @@ class PlotAccessor(PandasObject): >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot") """ + __module__ = "pandas.plotting" + _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") _series_kinds = ("pie",) _dataframe_kinds = ("scatter", "hexbin") diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 0f2d824f37ffc..f5f62b168450d 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -766,3 +766,4 @@ def use(self, key, value) -> Generator[_Options]: plot_params = _Options() +plot_params.__module__ = "pandas.plotting" diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b347e24bc5268..6cf182b65cdb9 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,5 +1,10 @@ from __future__ import annotations +import importlib +import inspect +import pathlib +import pkgutil + import pytest import pandas as pd @@ -564,3 +569,80 @@ def test_set_module(): assert pd.read_xml.__module__ == "pandas" assert api.typing.SeriesGroupBy.__module__ == "pandas.api.typing" assert api.typing.DataFrameGroupBy.__module__ == "pandas.api.typing" + + +def get_pandas_objects( + module_name: str, recurse: bool +) -> list[tuple[str, str, object]]: + """ + Get all pandas objects within a module. + + An object is determined to be part of pandas if it has a string + __module__ attribute that starts with ``"pandas"``. + + Parameters + ---------- + module_name : str + Name of the module to search. + recurse : bool + Whether to search submodules. + + Returns + ------- + List of all objects that are determined to be a part of pandas. + """ + module = importlib.import_module(module_name) + objs = [] + + for name, obj in inspect.getmembers(module): + if inspect.isfunction(obj) or type(obj).__name__ == "cython_function_or_method": + # We have not set __module__ on public functions; may do + # so in the future. + continue + module_dunder = getattr(obj, "__module__", None) + if isinstance(module_dunder, str) and module_dunder.startswith("pandas"): + objs.append((module_name, name, obj)) + + if not recurse: + return objs + + # __file__ can, but shouldn't, be None + assert isinstance(module.__file__, str) + paths = [pathlib.Path(module.__file__).parent] + for module_info in pkgutil.walk_packages(paths): + name = module_info.name + if name.startswith("_") or name == "internals": + continue + objs.extend( + get_pandas_objects(f"{module.__name__}.{name}", recurse=module_info.ispkg) + ) + return objs + + +@pytest.mark.slow +@pytest.mark.parametrize( + "module_name", + [ + "pandas", + "pandas.api", + "pandas.arrays", + "pandas.errors", + pytest.param("pandas.io", marks=pytest.mark.xfail(reason="Private imports")), + "pandas.plotting", + "pandas.testing", + ], +) +def test_attributes_module(module_name): + recurse = module_name not in ["pandas", "pandas.testing"] + objs = get_pandas_objects(module_name, recurse=recurse) + failures = [ + (module_name, name, type(obj), obj.__module__) + for module_name, name, obj in objs + if not ( + obj.__module__ == module_name + # Explicit exceptions + or ("Dtype" in name and obj.__module__ == "pandas") + or (name == "Categorical" and obj.__module__ == "pandas") + ) + ] + assert len(failures) == 0, failures diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index aa38e63c826f6..81648377942f7 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -579,10 +579,7 @@ def test_astype_categorical_invalid_conversions(self): ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values() ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) - msg = ( - "dtype '' " - "not understood" - ) + msg = "dtype '' not understood" with pytest.raises(TypeError, match=msg): ser.astype(Categorical) with pytest.raises(TypeError, match=msg): diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 15696c9292eda..412a606bb023e 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -111,6 +111,7 @@ def parse(version: str) -> Version: # The docstring is from an older version of the packaging library to avoid # errors in the docstring validation. class InvalidVersion(ValueError): + __module__ = "pandas.errors" """ An invalid version was found, users should refer to PEP 440.