Merge remote-tracking branch 'upstream/main' into aijams-take-function-invalid-dtype

aijams · aijams · commit fd56024421e6 · 2025-10-23T14:01:45.000-04:00
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -181,8 +181,7 @@ jobs:
     timeout-minutes: 90
     strategy:
       matrix:
-        # Note: Don't use macOS latest since macos 14 appears to be arm64 only
-        os: [macos-13, macos-14, windows-2025]
+        os: [macos-15-intel, macos-15, windows-2025]
         env_file: [actions-311.yaml, actions-312.yaml, actions-313.yaml]
       fail-fast: false
     runs-on: ${{ matrix.os }}
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -98,10 +98,9 @@ jobs:
         - [ubuntu-24.04, musllinux_x86_64]
         - [ubuntu-24.04-arm, manylinux_aarch64]
         - [ubuntu-24.04-arm, musllinux_aarch64]
-        - [macos-13, macosx_x86_64]
-        # Note: M1 images on Github Actions start from macOS 14
-        - [macos-14, macosx_arm64]
-        - [windows-2022, win_amd64]
+        - [macos-15-intel, macosx_x86_64]
+        - [macos-15, macosx_arm64]
+        - [windows-2025, win_amd64]
         - [windows-11-arm, win_arm64]
         python: [["cp311", "3.11"], ["cp312", "3.12"], ["cp313", "3.13"], ["cp313t", "3.13"], ["cp314", "3.14"], ["cp314t", "3.14"]]
         include:
diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py
@@ -23,7 +23,7 @@ def gen_of_str(arr):
 
 
 def arr_dict(arr):
-    return dict(zip(range(len(arr)), arr))
+    return dict(zip(range(len(arr)), arr, strict=True))
 
 
 def list_of_tuples(arr):
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -16,7 +16,7 @@ def setup(self):
         self.idx = date_range(
             start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s"
         )
-        self.data = dict(zip(self.idx, range(len(self.idx))))
+        self.data = dict(zip(self.idx, range(len(self.idx)), strict=True))
         self.array = np.array([1, 2, 3])
         self.idx2 = Index(["a", "b", "c"])
 
@@ -407,7 +407,9 @@ def setup(self, num_to_replace):
         self.to_replace_list = np.random.choice(self.arr, num_to_replace)
         self.values_list = np.random.choice(self.arr1, num_to_replace)
 
-        self.replace_dict = dict(zip(self.to_replace_list, self.values_list))
+        self.replace_dict = dict(
+            zip(self.to_replace_list, self.values_list, strict=True)
+        )
 
     def time_replace_dict(self, num_to_replace):
         self.ser.replace(self.replace_dict)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -515,6 +515,22 @@ If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in th
 
 With ``"mode.nan_is_na"`` set to ``False``, ``ser.to_numpy()`` (and ``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if :class:`NA` entries are present, where before they would coerce to ``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan`` to :meth:`Series.to_numpy`.
 
+The ``__module__`` attribute now points to public modules
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``__module__`` attribute on functions and classes in the public API has been
+updated to refer to the preferred public module from which to access the object,
+rather than the module in which the object happens to be defined (:issue:`55178`).
+
+This produces more informative displays in the Python console for classes, e.g.,
+instead of ``<class 'pandas.core.frame.DataFrame'>`` you now see
+``<class 'pandas.DataFrame'>``, and in interactive tools such as IPython, e.g.,
+instead of ``<function pandas.io.parsers.readers.read_csv(...)>`` you now see
+``<function pandas.read_csv(...)>``.
+
+This may break code that relies on the previous ``__module__`` values (e.g.
+doctests inspecting the ``type()`` of a DataFrame object).
+
 .. _whatsnew_300.api_breaking.deps:
 
 Increased minimum version for Python
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1185,12 +1185,12 @@ def astype(self, dtype: Dtype, copy: bool = True):
             How to handle negative values in `indices`.
 
             * False: negative values in `indices` indicate positional indices
-              from the right (the default). This is similar to
-              :func:`numpy.take`.
+                from the right (the default). This is similar to
+                :func:`numpy.take`.
 
             * True: negative values in `indices` indicate
-              missing values. These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
+                missing values. These values are set to `fill_value`. Any other
+                other negative values raise a ``ValueError``.
 
         fill_value : scalar, default None
             If allow_fill=True and fill_value is not None, indices specified by
@@ -1216,7 +1216,6 @@ def astype(self, dtype: Dtype, copy: bool = True):
         Index(['c', 'c', 'b', 'c'], dtype='str')
         """
 
-    @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
     def take(
         self,
         indices,
@@ -1225,6 +1224,51 @@ def take(
         fill_value=None,
         **kwargs,
     ) -> Self:
+        """
+        Return a new Index of the values selected by the indices.
+
+        For internal compatibility with numpy arrays.
+
+        Parameters
+        ----------
+        indices : array-like
+            Indices to be taken.
+        axis : int, optional
+            The axis over which to select values, always 0.
+        allow_fill : bool, default True
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any
+              other negative values raise a ``ValueError``.
+
+        fill_value : scalar, default None
+            If allow_fill=True and fill_value is not None, indices specified by
+            -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
+        **kwargs
+            Required for compatibility with numpy.
+
+        Returns
+        -------
+        Index
+            An index formed of elements at the given indices. Will be the same
+            type as self, except for RangeIndex.
+
+        See Also
+        --------
+        numpy.ndarray.take: Return an array formed from the
+            elements of a at the given indices.
+
+        Examples
+        --------
+        >>> idx = pd.Index(["a", "b", "c"])
+        >>> idx.take([2, 2, 1, 2])
+        Index(['c', 'c', 'b', 'c'], dtype='str')
+        """
         if kwargs:
             nv.validate_take((), kwargs)
         if is_scalar(indices):
@@ -1272,26 +1316,27 @@ def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
             allow_fill = False
         return allow_fill
 
-    _index_shared_docs["repeat"] = """
-        Repeat elements of a %(klass)s.
+    def repeat(self, repeats, axis: None = None) -> Self:
+        """
+        Repeat elements of a Index.
 
-        Returns a new %(klass)s where each element of the current %(klass)s
+        Returns a new Index where each element of the current Index
         is repeated consecutively a given number of times.
 
         Parameters
         ----------
         repeats : int or array of ints
             The number of repetitions for each element. This should be a
             non-negative integer. Repeating 0 times will return an empty
-            %(klass)s.
+            Index.
         axis : None
             Must be ``None``. Has no effect but is accepted for compatibility
             with numpy.
 
         Returns
         -------
-        %(klass)s
-            Newly created %(klass)s with repeated elements.
+        Index
+            Newly created Index with repeated elements.
 
         See Also
         --------
@@ -1300,17 +1345,14 @@ def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
 
         Examples
         --------
-        >>> idx = pd.Index(['a', 'b', 'c'])
+        >>> idx = pd.Index(["a", "b", "c"])
         >>> idx
         Index(['a', 'b', 'c'], dtype='object')
         >>> idx.repeat(2)
         Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
         >>> idx.repeat([1, 2, 3])
         Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
         """
-
-    @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
-    def repeat(self, repeats, axis: None = None) -> Self:
         repeats = ensure_platform_int(repeats)
         nv.validate_repeat((), {"axis": axis})
         res_values = self._values.repeat(repeats)
@@ -5993,10 +6035,61 @@ def _should_fallback_to_positional(self) -> bool:
         (array([-1,  1,  3,  4, -1]), array([0, 2]))
         """
 
-    @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
     def get_indexer_non_unique(
         self, target
     ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
+        """
+        Compute indexer and mask for new index given the current index.
+
+        The indexer should be then used as an input to ndarray.take to align the
+        current data to the new index.
+
+        Parameters
+        ----------
+        target : Index
+            An iterable containing the values to be used for computing indexer.
+
+        Returns
+        -------
+        indexer : np.ndarray[np.intp]
+            Integers from 0 to n - 1 indicating that the index at these
+            positions matches the corresponding target values. Missing values
+            in the target are marked by -1.
+        missing : np.ndarray[np.intp]
+            An indexer into the target of the values not found.
+            These correspond to the -1 in the indexer array.
+
+        See Also
+        --------
+        Index.get_indexer : Computes indexer and mask for new index given
+            the current index.
+        Index.get_indexer_for : Returns an indexer even when non-unique.
+
+        Examples
+        --------
+        >>> index = pd.Index(["c", "b", "a", "b", "b"])
+        >>> index.get_indexer_non_unique(["b", "b"])
+        (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))
+
+        In the example below there are no matched values.
+
+        >>> index = pd.Index(["c", "b", "a", "b", "b"])
+        >>> index.get_indexer_non_unique(["q", "r", "t"])
+        (array([-1, -1, -1]), array([0, 1, 2]))
+
+        For this reason, the returned ``indexer`` contains only integers equal to -1.
+        It demonstrates that there's no match between the index and the ``target``
+        values at these positions. The mask [0, 1, 2] in the return value shows that
+        the first, second, and third elements are missing.
+
+        Notice that the return value is a tuple contains two items. In the example
+        below the first item is an array of locations in ``index``. The second
+        item is a mask shows that the first and third elements are missing.
+
+        >>> index = pd.Index(["c", "b", "a", "b", "b"])
+        >>> index.get_indexer_non_unique(["f", "b", "s"])
+        (array([-1,  1,  3,  4, -1]), array([0, 2]))
+        """
         target = self._maybe_cast_listlike_indexer(target)
 
         if not self._should_compare(target) and not self._should_partial_index(target):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py