diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7a95ed0eef2b2..531a0f25a621e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -69,6 +69,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then "$BASE_DIR"/scripts/validate_docstrings.py \ --format=actions \ -i ES01 `# For now it is ok if docstrings are missing the extended summary` \ + -i "pandas.DataFrame.from_arrow SA01,EX01" \ -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \ -i "pandas.Period.freq GL08" \ -i "pandas.Period.ordinal GL08" \ @@ -165,6 +166,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.tseries.offsets.DateOffset.is_on_offset GL08" \ -i "pandas.tseries.offsets.DateOffset.n GL08" \ -i "pandas.tseries.offsets.DateOffset.normalize GL08" \ + -i "pandas.tseries.offsets.DateOffset.rollback SA01,EX01" \ + -i "pandas.tseries.offsets.DateOffset.rollforward SA01,EX01" \ -i "pandas.tseries.offsets.Day.freqstr SA01" \ -i "pandas.tseries.offsets.Day.is_on_offset GL08" \ -i "pandas.tseries.offsets.Day.n GL08" \ diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index e701d48a89db7..855060ff1e182 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -360,6 +360,7 @@ Serialization / IO / conversion .. autosummary:: :toctree: api/ + DataFrame.from_arrow DataFrame.from_dict DataFrame.from_records DataFrame.to_orc diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 93f88db0843dc..5d3bbe893a2cf 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -189,6 +189,7 @@ Categorical components .. autosummary:: :toctree: api/ + CategoricalIndex.append CategoricalIndex.codes CategoricalIndex.categories CategoricalIndex.ordered diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst index 5876e005574fd..282bc7e7a24ed 100644 --- a/doc/source/reference/offset_frequency.rst +++ b/doc/source/reference/offset_frequency.rst @@ -40,6 +40,8 @@ Methods DateOffset.is_quarter_end DateOffset.is_year_start DateOffset.is_year_end + DateOffset.rollback + DateOffset.rollforward BusinessDay ----------- diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 6006acc8f5e16..9fc692269316c 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -424,6 +424,7 @@ strings and apply several methods to it. These can be accessed like Series.str.fullmatch Series.str.get Series.str.index + Series.str.isascii Series.str.join Series.str.len Series.str.ljust diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index bf3a6745aae3d..b6026b7cbbc1d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -165,26 +165,25 @@ Other enhancements inputs have identical ``attrs``, as has so far already been the case for :func:`pandas.concat`. - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`) +- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default`` (:issue:`60696`) - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) -- Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`) -- Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`) -- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default`` -- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) +- :func:`DataFrame.to_excel` now raises a ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`) - :func:`pandas.merge`, :meth:`DataFrame.merge` and :meth:`DataFrame.join` now support anti joins (``left_anti`` and ``right_anti``) in the ``how`` parameter (:issue:`42916`) -- :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`) +- :func:`read_spss` now supports kwargs to be passed to ``pyreadstat`` (:issue:`56356`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) +- :meth:`.Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) -- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`) +- Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`) +- Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`) - Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`) -- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) -- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) +- Allow dictionaries to be passed to :meth:`Series.str.replace` via ``pat`` parameter (:issue:`51748`) +- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Index` (:issue:`51452`) - Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`) - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) -- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) +- :meth:`.Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) -- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`) - :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`) - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) - :func:`set_option` now accepts a dictionary of options, simplifying configuration of multiple settings at once (:issue:`61093`) @@ -196,36 +195,36 @@ Other enhancements - :meth:`DataFrame.to_json` now encodes ``Decimal`` as strings instead of floats (:issue:`60698`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) +- Added :meth:`.Rolling.pipe` and :meth:`.Expanding.pipe` (:issue:`57076`) - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`) -- :meth:`Series.nlargest` uses a 'stable' sort internally and will preserve original ordering. +- :class:`.Easter` has gained a new constructor argument ``method`` which specifies the method used to calculate Easter — for example, Orthodox Easter (:issue:`61665`) - :class:`ArrowDtype` now supports ``pyarrow.JsonType`` (:issue:`60958`) - :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`) -- :class:`Easter` has gained a new constructor argument ``method`` which specifies the method used to calculate Easter — for example, Orthodox Easter (:issue:`61665`) - :class:`Holiday` constructor argument ``days_of_week`` will raise a ``ValueError`` when type is something other than ``None`` or ``tuple`` (:issue:`61658`) - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`) -- :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) -- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) - :func:`DataFrame.to_excel` has a new ``autofilter`` parameter to add automatic filters to all columns (:issue:`61194`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) -- :func:`to_numeric` on big integers converts to ``object`` datatype with python integers when not coercing. (:issue:`51295`) +- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.RollingGroupby.apply`, :meth:`.ExpandingGroupby.apply`, :meth:`.Rolling.apply`, :meth:`.Expanding.apply`, :meth:`.DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) +- :meth:`.Rolling.aggregate`, :meth:`.Expanding.aggregate` and :meth:`.ExponentialMovingWindow.aggregate` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`) - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`) -- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`) -- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) -- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support f-strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to the ``%`` format strings and callables (:issue:`49580`) - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) - :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`) +- :meth:`Series.nlargest` uses stable sort internally and will preserve original ordering in the case of equality (:issue:`55767`) - :meth:`Series.rank` and :meth:`DataFrame.rank` with numpy-nullable dtypes preserve ``NA`` values and return ``UInt64`` dtype where appropriate instead of casting ``NA`` to ``NaN`` with ``float64`` dtype (:issue:`62043`) - :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) -- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). -- Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) -- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) +- Added :meth:`.Rolling.first`, :meth:`.Rolling.last`, :meth:`.Expanding.first`, and :meth:`.Expanding.last` (:issue:`33155`) +- Added :meth:`.Rolling.nunique` and :meth:`.Expanding.nunique` (:issue:`26958`) +- Added :meth:`Series.str.isascii` (:issue:`59091`) +- Added ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). +- Added half-year offset classes :class:`.HalfYearBegin`, :class:`.HalfYearEnd`, :class:`.BHalfYearBegin` and :class:`.BHalfYearEnd` (:issue:`60928`) +- Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments in :meth:`DataFrame.fillna` (:issue:`4514`) - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`) -- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Improve error reporting through outputting the first few duplicates when :func:`merge` validation fails (:issue:`62742`) - Improve the resulting dtypes in :meth:`DataFrame.where` and :meth:`DataFrame.mask` with :class:`ExtensionDtype` ``other`` (:issue:`62038`) - Improved deprecation message for offset aliases (:issue:`60820`) @@ -253,9 +252,9 @@ These are bug fixes that might have notable behavior changes. Improved behavior in groupby for ``observed=False`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -A number of bugs have been fixed due to improved handling of unobserved groups (:issue:`55738`). All remarks in this section equally impact :class:`.SeriesGroupBy`. +A number of bugs have been fixed due to improved handling of unobserved groups. All remarks in this section equally impact :class:`.SeriesGroupBy`. (:issue:`55738`) -In previous versions of pandas, a single grouping with :meth:`.DataFrameGroupBy.apply` or :meth:`.DataFrameGroupBy.agg` would pass the unobserved groups to the provided function, resulting in ``0`` below. +In previous versions of pandas, a single grouping with :meth:`.DataFrameGroupBy.apply` or :meth:`.DataFrameGroupBy.agg` would pass the unobserved groups to the provided function, resulting correctly in ``0`` below. .. ipython:: python @@ -306,11 +305,6 @@ These improvements also fixed certain bugs in groupby: - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`) - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`) -.. _whatsnew_300.notable_bug_fixes.notable_bug_fix2: - -notable_bug_fix2 -^^^^^^^^^^^^^^^^ - .. --------------------------------------------------------------------------- .. _whatsnew_300.api_breaking: @@ -394,7 +388,7 @@ Similarly, the :class:`Timedelta` constructor and :func:`to_timedelta` with a st When all objects passed to :func:`concat` have a :class:`DatetimeIndex`, passing ``sort=False`` will now result in the non-concatenation axis not being sorted. Previously, the result would always be sorted along -the non-concatenation axis even when ``sort=False`` is passed. :issue:`57335` +the non-concatenation axis even when ``sort=False`` is passed. (:issue:`57335`) If you do not specify the ``sort`` argument, pandas will continue to return a sorted result but this behavior is deprecated and you will receive a warning. @@ -453,10 +447,10 @@ that are now fixed in this release are as follows. .. _whatsnew_300.api_breaking.value_counts_sorting: -Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Changed behavior in :meth:`DataFrame.value_counts` and :meth:`.DataFrameGroupBy.value_counts` when ``sort=False`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In previous versions of pandas, :meth:`DataFrame.value_counts` with ``sort=False`` would sort the result by row labels (as was documented). This was nonintuitive and inconsistent with :meth:`Series.value_counts` which would maintain the order of the input. Now :meth:`DataFrame.value_counts` will maintain the order of the input. +In previous versions of pandas, :meth:`DataFrame.value_counts` with ``sort=False`` would sort the result by row labels (as was documented). This was nonintuitive and inconsistent with :meth:`Series.value_counts` which would maintain the order of the input. Now :meth:`DataFrame.value_counts` will maintain the order of the input. (:issue:`59745`) .. ipython:: python @@ -517,9 +511,9 @@ This change also applies to :meth:`.DataFrameGroupBy.value_counts`. Here, there Changed behavior of ``pd.offsets.Day`` to always represent calendar-day ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In previous versions of pandas, :class:`offsets.Day` represented a fixed span +In previous versions of pandas, :class:`.offsets.Day` represented a fixed span of 24 hours, disregarding Daylight Savings Time transitions. It now consistently -behaves as a calendar-day, preserving time-of-day across DST transitions: +behaves as a calendar-day, preserving time-of-day across DST transitions. (:issue:`61985`) *Old behavior* @@ -540,12 +534,12 @@ This change fixes a long-standing bug in :func:`date_range` (:issue:`51716`, :is small behavior differences as collateral: - ``pd.offsets.Day(n)`` no longer compares as equal to ``pd.offsets.Hour(24*n)`` -- :class:`offsets.Day` no longer supports division -- :class:`Timedelta` no longer accepts :class:`Day` objects as inputs -- :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` object. -- Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. -- Adding or subtracting a :class:`Day` with a :class:`Timedelta` is no longer supported. -- Adding or subtracting a :class:`Day` offset to a timezone-aware :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. +- :class:`.offsets.Day` no longer supports division +- :class:`Timedelta` no longer accepts :class:`.Day` objects as inputs +- :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns an :class:`.offsets.Hour` object in cases where it used to return a :class:`.Day` object. +- Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` with a :class:`.Day` ``freq`` no longer preserves that ``freq`` attribute. +- Adding or subtracting a :class:`.Day` with a :class:`Timedelta` is no longer supported. +- Adding or subtracting a :class:`.Day` offset to a timezone-aware :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. .. _whatsnew_300.api_breaking.nan_vs_na: @@ -776,23 +770,23 @@ Other API changes - All classes inheriting from builtin ``tuple`` (including types created with :func:`collections.namedtuple`) are now hashed and compared as builtin ``tuple`` during indexing operations (:issue:`57922`) - Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`) - Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`) +- Pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`) +- Pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`) - Removed :meth:`Index.sort` which always raised a ``TypeError``. This attribute is not defined and will raise an ``AttributeError`` (:issue:`59283`) - Unused ``dtype`` argument has been removed from the :class:`MultiIndex` constructor (:issue:`60962`) -- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`) -- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`) -- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`) -- when comparing the indexes in :func:`testing.assert_series_equal`, check_exact defaults to True if an :class:`Index` is of integer dtypes. (:issue:`57386`) +- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`.Styler.to_excel` (:issue:`54154`) +- When comparing the indexes in :func:`testing.assert_series_equal`, ``check_exact`` defaults to True if an :class:`Index` is of integer dtype. (:issue:`57386`) - Index set operations (like union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining the dtype of the resulting Index (:issue:`60797`) -- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`) +- :class:`.IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`) - :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`) - :func:`date_range` and :func:`timedelta_range` no longer default to ``unit="ns"``, instead will infer a unit from the ``start``, ``end``, and ``freq`` parameters. Explicitly specify a desired ``unit`` to override these (:issue:`59031`) -- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`) +- :meth:`.CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`) - :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`) -- Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`ExtensionArray` with a ``list`` now consistently wrap that list with an array equivalent to ``Series(my_list).array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`) +- Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`.ExtensionArray` with a ``list`` now consistently wrap that list with an array equivalent to ``Series(my_list).array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`) - Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`) -- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`) +- NumPy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`) - Methods that can operate in-place (:meth:`~DataFrame.replace`, :meth:`~DataFrame.fillna`, :meth:`~DataFrame.ffill`, :meth:`~DataFrame.bfill`, :meth:`~DataFrame.interpolate`, :meth:`~DataFrame.where`, :meth:`~DataFrame.mask`, :meth:`~DataFrame.clip`) now return @@ -808,7 +802,7 @@ Copy keyword ^^^^^^^^^^^^ The ``copy`` keyword argument in the following methods is deprecated and -will be removed in a future version: +will be removed in a future version. (:issue:`57347`) - :meth:`DataFrame.truncate` / :meth:`Series.truncate` - :meth:`DataFrame.tz_convert` / :meth:`Series.tz_convert` @@ -846,24 +840,25 @@ Other Deprecations - Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`) - Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`) - Deprecated converting object-dtype columns of ``datetime.datetime`` objects to datetime64 when writing to stata (:issue:`56536`) -- Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`) -- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) +- Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`.Day`, :class:`.BusinessDay` and :class:`.CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`) +- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`.Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) - Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`) - Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`) -- Deprecated the 'verify_integrity' keyword in :meth:`DataFrame.set_index`; directly check the result for ``obj.index.is_unique`` instead (:issue:`62919`) -- Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`) +- Deprecated the ``arg`` parameter of :meth:`Series.map`; pass the added ``func`` argument instead. (:issue:`61260`) +- Deprecated the ``verify_integrity`` keyword in :meth:`DataFrame.set_index`; directly check the result for ``obj.index.is_unique`` instead (:issue:`62919`) +- Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`) - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`) - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`) - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`) - Deprecated allowing strings representing full dates in :meth:`DataFrame.at_time` and :meth:`Series.at_time` (:issue:`50839`) -- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`) -- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`) +- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching ``str`` dtype when ``np.object_`` is specified (:issue:`61916`) +- Deprecated option ``future.no_silent_downcasting``, as it is no longer used. In a future version accessing this option will raise (:issue:`59502`) - Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`) -- Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`) +- Deprecated silent casting of non-datetime ``other`` to datetime in :meth:`Series.combine_first` (:issue:`62931`) - Deprecated silently casting strings to :class:`Timedelta` in binary operations with :class:`Timedelta` (:issue:`59653`) - Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`) - Deprecated support for the Dataframe Interchange Protocol (:issue:`56732`) -- Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`) +- Deprecated the ``inplace`` keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`) .. --------------------------------------------------------------------------- .. _whatsnew_300.prior_deprecations: @@ -898,15 +893,15 @@ Renamed the following offset aliases (:issue:`57986`): Other Removals ^^^^^^^^^^^^^^ -- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when a group has all NA values, or when used with ``skipna=False`` and any NA value is encountered (:issue:`10694`, :issue:`57745`) +- :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, and :meth:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when a group has all NA values, or when used with ``skipna=False`` and any NA value is encountered (:issue:`10694`, :issue:`57745`) - :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`) - :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`) - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) - :func:`to_datetime` with a ``unit`` specified no longer parses strings into floats, instead parses them the same way as without ``unit`` (:issue:`50735`) +- :meth:`.SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) - :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`) - :meth:`ExtensionArray._reduce` now requires a ``keepdims: bool = False`` parameter in the signature (:issue:`52788`) - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) -- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`) - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`) - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) - Changed behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` to always treat integer keys as labels, never as positional, consistent with :class:`DataFrame` behavior (:issue:`50617`) @@ -917,33 +912,33 @@ Other Removals - Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`) - Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`) - Disallow indexing an :class:`Index` with a boolean indexer of length zero, it now raises ``ValueError`` (:issue:`55820`) -- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`) +- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`.ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`) - Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`) - Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`) -- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`) -- Removed 'fastpath' keyword in :class:`Categorical` constructor (:issue:`20110`) -- Removed 'kind' keyword in :meth:`Series.resample` and :meth:`DataFrame.resample` (:issue:`58125`) - Removed ``Block``, ``DatetimeTZBlock``, ``ExtensionBlock``, ``create_block_manager_from_blocks`` from ``pandas.core.internals`` and ``pandas.core.internals.api`` (:issue:`55139`) +- Removed ``fastpath`` keyword in :class:`Categorical` constructor (:issue:`20110`) +- Removed ``freq`` keyword from :class:`.PeriodArray` constructor, use "dtype" instead (:issue:`52462`) +- Removed ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample` (:issue:`58125`) - Removed alias :class:`arrays.PandasArray` for :class:`arrays.NumpyExtensionArray` (:issue:`53694`) -- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) +- Removed deprecated ``method`` and ``limit`` keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`) - Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`) -- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`) +- Removed the ``closed`` and ``normalize`` keywords in :class:`DatetimeIndex` constructor (:issue:`52628`) - Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`) -- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`) +- Require :meth:`.SparseDtype.fill_value` to be a valid value for the :meth:`.SparseDtype.subtype` (:issue:`53043`) - Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`) -- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`) +- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`.ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`) - Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`) - Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`) -- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`) +- Removed the ``closed`` and ``unit`` keywords in :class:`TimedeltaIndex` constructor (:issue:`52628`, :issue:`55499`) - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`) - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) - Changed the default value of ``na_action`` in :meth:`Categorical.map` to ``None`` (:issue:`51645`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) +- Enforce banning of upcasting in in-place setitem-like operations; see `PDEP6 `_ (:issue:`59007`) - Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`) -- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 `_) - Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`) - Enforced deprecation allowing non-``bool`` and NA values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` (:issue:`59615`) -- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`) +- Enforced deprecation disallowing ``float`` for the ``periods`` argument in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) @@ -956,14 +951,14 @@ Other Removals - Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`) - Enforced deprecation of argument ``infer_datetime_format`` in :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) - Enforced deprecation of combining parsed datetime columns in :func:`read_csv` in ``parse_dates`` (:issue:`55569`) -- Enforced deprecation of non-standard (``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`) -- Enforced deprecation of parsing system timezone strings to ``tzlocal``, which depended on system timezone, pass the 'tz' keyword instead (:issue:`50791`) -- Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) -- Enforced deprecation of string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) -- Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`) -- Enforced deprecation of string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) -- Enforced deprecation of string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`) -- Enforced deprecation of strings ``H``, ``BH``, and ``CBH`` denoting frequencies in :class:`Hour`, :class:`BusinessHour`, :class:`CustomBusinessHour` (:issue:`59143`) +- Enforced deprecation of non-standard (``np.ndarray``, :class:`.ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`) +- Enforced deprecation of parsing system timezone strings to ``tzlocal``, which depended on system timezone, pass the ``tz`` keyword instead (:issue:`50791`) +- Enforced deprecation of passing a dictionary to :meth:`.SeriesGroupBy.agg` (:issue:`52268`) +- Enforced deprecation of string ``AS`` denoting frequency in :class:`.YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) +- Enforced deprecation of string ``A`` denoting frequency in :class:`.YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`) +- Enforced deprecation of string ``BAS`` denoting frequency in :class:`.BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) +- Enforced deprecation of string ``BA`` denoting frequency in :class:`.BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`) +- Enforced deprecation of strings ``H``, ``BH``, and ``CBH`` denoting frequencies in :class:`.Hour`, :class:`.BusinessHour`, :class:`.CustomBusinessHour` (:issue:`59143`) - Enforced deprecation of strings ``H``, ``BH``, and ``CBH`` denoting units in :class:`Timedelta` (:issue:`59143`) - Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`) - Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`) @@ -1007,7 +1002,7 @@ Other Removals - Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`) - Removed deprecated keyword ``method`` on :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`57760`) - Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`) -- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`) +- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records` (:issue:`51697`) - Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`) - Removed support for ``slice`` in :meth:`DataFrame.take` (:issue:`51539`) - Removed the ``ArrayManager`` (:issue:`55043`) @@ -1018,11 +1013,11 @@ Other Removals - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) - Removed the :class:`Grouper` attributes ``ax``, ``groups``, ``indexer``, and ``obj`` (:issue:`51206`, :issue:`51182`) - Removed deprecated keyword ``verbose`` on :func:`read_csv` and :func:`read_table` (:issue:`56556`) -- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`) +- Removed the ``method`` keyword in :meth:`.ExtensionArray.fillna`, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`) - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`) - Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`) - Enforced deprecation of storage option "pyarrow_numpy" for :class:`StringDtype` (:issue:`60152`) -- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`) +- Removed specifying ``include_groups=True`` in :meth:`.DataFrameGroupBy.apply` and :meth:`.Resampler.apply` (:issue:`7155`) .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: @@ -1041,11 +1036,11 @@ Performance improvements - :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`) - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) - Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`) +- Performance improvement in :meth:`.DataFrameGroupBy.ffill`, :meth:`.DataFrameGroupBy.bfill`, :meth:`.SeriesGroupBy.ffill`, and :meth:`.SeriesGroupBy.bfill` (:issue:`56902`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`) - Performance improvement in :meth:`DataFrame.to_csv` when ``index=False`` (:issue:`59312`) -- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`) - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`) - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) - Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`) @@ -1069,8 +1064,8 @@ Performance improvements - Performance improvement in :meth:`DataFrame.__getitem__` when ``key`` is a :class:`DataFrame` with many columns (:issue:`61010`) - Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`) - Performance improvement in :meth:`DataFrame.stack` when using ``future_stack=True`` and the DataFrame does not have a :class:`MultiIndex` (:issue:`58391`) +- Performance improvement in :meth:`DataFrame.to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`) - Performance improvement in :meth:`DataFrame.where` when ``cond`` is a :class:`DataFrame` with many columns (:issue:`61010`) -- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) @@ -1083,6 +1078,7 @@ Bug fixes Categorical ^^^^^^^^^^^ +- Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`) - Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the ``categories.dtype`` is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings) (:issue:`61778`) - Bug in :class:`pandas.Categorical` displaying string categories without quotes when using "string" dtype (:issue:`63045`) - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) @@ -1094,7 +1090,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ -- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`) +- Bug in :attr:`is_year_start` where a :class:`DatetimeIndex` constructed via :func:`date_range` with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`) - Bug in :class:`DataFrame` raising ``ValueError`` when ``dtype`` is ``timedelta64`` and ``data`` is a list containing ``None`` (:issue:`60064`) - Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`) - Bug in :class:`Timestamp` constructor failing to raise when given a ``np.datetime64`` object with non-standard unit (:issue:`25611`) @@ -1104,10 +1100,10 @@ Datetimelike - Bug in :func:`to_datetime` where passing an ``lxml.etree._ElementUnicodeResult`` together with ``format`` raised ``TypeError``. Now subclasses of ``str`` are handled. (:issue:`60933`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) - Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`) +- Bug in :meth:`.DateOffset.rollback` (and subclass methods) with ``normalize=True`` rolling back one offset too long (:issue:`32616`) +- Bug in :meth:`DataFrame.agg` with with missing values resulting in IndexError (:issue:`58810`) - Bug in :meth:`DataFrame.fillna` raising an ``AssertionError`` instead of ``OutOfBoundsDatetime`` when filling a ``datetime64[ns]`` column with an out-of-bounds timestamp. Now correctly raises ``OutOfBoundsDatetime``. (:issue:`61208`) - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`) -- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) -- Bug in :meth:`DateOffset.rollback` (and subclass methods) with ``normalize=True`` rolling back one offset too long (:issue:`32616`) - Bug in :meth:`DatetimeIndex.asof` with a string key giving incorrect results (:issue:`50946`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) @@ -1119,7 +1115,7 @@ Datetimelike - Bug in :meth:`Timestamp.replace` failing to update ``unit`` attribute when replacement introduces non-zero ``nanosecond`` or ``microsecond`` (:issue:`57749`) - Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`) - Bug in :meth:`to_datetime` on float array with missing values throwing ``FloatingPointError`` (:issue:`58419`) -- Bug in :meth:`to_datetime` on float32 df with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`) +- Bug in :meth:`to_datetime` on float32 data with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`) - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) - Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`) - Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) @@ -1160,6 +1156,7 @@ Numeric Conversion ^^^^^^^^^^ +- :func:`to_numeric` on big integers converts to ``object`` datatype with python integers when not coercing. (:issue:`51295`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) @@ -1182,7 +1179,7 @@ Interval - Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`) - Bug in :func:`pandas.interval_range` incorrectly inferring ``int64`` dtype when ``np.float32`` and ``int`` are used for ``start`` and ``freq`` (:issue:`58964`) - Bug in :meth:`IntervalIndex.get_indexer` and :meth:`IntervalIndex.drop` when one of the sides of the index is non-unique (:issue:`52245`) -- Construction of :class:`IntervalArray` and :class:`IntervalIndex` from arrays with mismatched signed/unsigned integer dtypes (e.g., ``int64`` and ``uint64``) now raises a :exc:`TypeError` instead of proceeding silently. (:issue:`55715`) +- Construction of :class:`.IntervalArray` and :class:`IntervalIndex` from arrays with mismatched signed/unsigned integer dtypes (e.g., ``int64`` and ``uint64``) now raises a :exc:`TypeError` instead of proceeding silently. (:issue:`55715`) Indexing ^^^^^^^^ @@ -1206,9 +1203,11 @@ Indexing Missing ^^^^^^^ - Bug in :meth:`DataFrame.fillna` and :meth:`Series.fillna` that would ignore the ``limit`` argument on :class:`.ExtensionArray` dtypes (:issue:`58001`) +- Bug in :meth:`MultiIndex.fillna` error message was referring to ``isna`` instead of ``fillna`` (:issue:`60974`) - Bug in :meth:`NA.__and__`, :meth:`NA.__or__` and :meth:`NA.__xor__` when operating with ``np.bool_`` objects (:issue:`58427`) - Bug in ``divmod`` between :class:`NA` and ``Int64`` dtype objects (:issue:`62196`) -- +- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`NA` values in a :class:`Float64Dtype` object with ``np.nan``; this now works with ``pd.set_option("mode.distinguish_nan_and_na", True)`` and is irrelevant otherwise (:issue:`55127`) +- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`np.nan` values in a :class:`Int64Dtype` object with :class:`NA`; this is now a no-op with ``pd.set_option("mode.distinguish_nan_and_na", True)`` and is irrelevant otherwise (:issue:`51237`) MultiIndex ^^^^^^^^^^ @@ -1231,10 +1230,11 @@ I/O ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`) - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`) - Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`) -- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) +- Bug in :meth:`.DataFrame.to_json` when ``index`` argument was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) - Bug in :meth:`DataFrame.from_records` ignoring ``columns`` and ``index`` parameters when ``data`` is an empty iterator and ``nrows=0``. (:issue:`61140`) +- Bug in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) @@ -1244,13 +1244,15 @@ I/O - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) - Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`) - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) -- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) +- Bug in :meth:`HDFStore.get` was failing to save data of dtype ``datetime64[s]`` correctly (:issue:`59004`) - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`) - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`) - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) - Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`) +- Bug in :meth:`DataFrame.to_csv` where ``quotechar`` is not escaped when ``escapechar`` is not ``None`` (:issue:`61407`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) +- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in :meth:`read_csv` where it did not appropriately skip a line when instructed, causing Empty Data Error (:issue:`62739`) - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`) - Bug in :meth:`read_csv` with ``c`` and ``python`` engines reading big integers as strings. Now reads them as python integers. (:issue:`51295`) @@ -1265,13 +1267,11 @@ I/O - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`) - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) -- Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`) - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) Period ^^^^^^ - Fixed error message when passing invalid period alias to :meth:`PeriodIndex.to_timestamp` (:issue:`58974`) -- Plotting ^^^^^^^^ @@ -1288,38 +1288,38 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :class:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`) - Bug in :meth:`.DataFrameGroupBy.__len__` and :meth:`.SeriesGroupBy.__len__` would raise when the grouping contained NA values and ``dropna=False`` (:issue:`58644`) -- Bug in :meth:`.DataFrameGroupBy.any` that returned True for groups where all Timedelta values are NaT. (:issue:`59712`) +- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.SeriesGroupBy.agg` that was returning numpy dtype values when input values are pyarrow dtype values, instead of returning pyarrow dtype values. (:issue:`53030`) +- Bug in :meth:`.DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) +- Bug in :meth:`.DataFrameGroupBy.agg` where applying a user-defined function to an empty DataFrame returned a Series instead of an empty DataFrame. (:issue:`61503`) +- Bug in :meth:`.DataFrameGroupBy.any` that returned True for groups where all :class:`Timedelta` values are ``NaT``. (:issue:`59712`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` not preserving ``_metadata`` attributes from subclassed DataFrames and Series (:issue:`62134`) +- Bug in :meth:`.DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) +- Bug in :meth:`.DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) +- Bug in :meth:`.DataFrameGroupBy.cumsum` and :meth:`.DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) +- Bug in :meth:`.DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) +- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`) - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups` would fail when the groups were :class:`Categorical` with an NA value (:issue:`61356`) -- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`) - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`) -- Bug in :meth:`.DataFrameGroupBy.sum` and :meth:`.SeriesGroupby.groups` returning ``NaN`` on overflow. These methods now returns ``inf`` or ``-inf`` on overflow. (:issue:`60303`) -- Bug in :meth:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`) +- Bug in :meth:`.DataFrameGroupBy.sum` and :meth:`.SeriesGroupBy.sum` returning ``NaN`` on overflow. These methods now returns ``inf`` or ``-inf`` on overflow. (:issue:`60303`) +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) +- Bug in :meth:`.Resampler.asfreq` where fixed-frequency indexes with ``origin`` ignored alignment and returned incorrect values. Now ``origin`` and ``offset`` are respected. (:issue:`62725`) - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`) +- Bug in :meth:`.Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) +- Bug in :meth:`.Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) +- Bug in :meth:`.Rolling.sem` computing incorrect results because it divided by ``sqrt((n - 1) * (n - ddof))`` instead of ``sqrt(n * (n - ddof))``. (:issue:`63180`) - Bug in :meth:`.Series.rolling` when used with a :class:`.BaseIndexer` subclass and computing min/max (:issue:`46726`) - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) -- Bug in :meth:`DataFrame.resample.asfreq` where fixed-frequency indexes with ``origin`` ignored alignment and returned incorrect values. Now ``origin`` and ``offset`` are respected. (:issue:`62725`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` were not keeping the index name when the index had :class:`ArrowDtype` timestamp dtype (:issue:`61222`) - Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`) -- Bug in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` that was returning numpy dtype values when input values are pyarrow dtype values, instead of returning pyarrow dtype values. (:issue:`53030`) -- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) -- Bug in :meth:`DataFrameGroupBy.agg` where applying a user-defined function to an empty DataFrame returned a Series instead of an empty DataFrame. (:issue:`61503`) -- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) -- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` not preserving ``_metadata`` attributes from subclassed DataFrames and Series (:issue:`62134`) -- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) -- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) -- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) -- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) -- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) -- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`) -- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) -- Bug in :meth:`Rolling.sem` computing incorrect results because it divided by ``sqrt((n - 1) * (n - ddof))`` instead of ``sqrt(n * (n - ddof))``. (:issue:`63180`) -- Bug in :meth:`Rolling.skew` and in :meth:`Rolling.kurt` incorrectly computing skewness and kurtosis, respectively, for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`, :issue:`61416`) -- Bug in :meth:`Rolling.skew` and in :meth:`Rolling.kurt` where results varied with input length despite identical data and window contents (:issue:`54380`) +- Bug in :meth:`.Rolling.skew` and in :meth:`.Rolling.kurt` incorrectly computing skewness and kurtosis, respectively, for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`, :issue:`61416`) +- Bug in :meth:`.Rolling.skew` and in :meth:`.Rolling.kurt` where results varied with input length despite identical data and window contents (:issue:`54380`) - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`) - Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`) -- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`) +- Bug in :meth:`.Rolling.var` and :meth:`.Rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`) - Bug in :meth:`DataFrame.groupby` methods when operating on NumPy-nullable data failing when the NA mask was not C-contiguous (:issue:`61031`) - Bug in :meth:`DataFrame.groupby` when grouping by a Series and that Series was modified after calling :meth:`DataFrame.groupby` but prior to the groupby operation (:issue:`63219`) @@ -1342,9 +1342,9 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) - Bug in :meth:`DataFrame.pivot_table` incorrectly ignoring the ``values`` argument when also supplied to the ``index`` or ``columns`` parameters (:issue:`57876`, :issue:`61292`) - Bug in :meth:`DataFrame.pivot_table` where ``margins=True`` did not correctly include groups with ``NaN`` values in the index or columns when ``dropna=False`` was explicitly passed. (:issue:`61509`) -- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) +- Bug in :meth:`DataFrame.stack` with the ``future_stack=True`` where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) -- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) +- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index=True`` drops the Series name (:issue:`60723`, :issue:`56257`) - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`) - Bug in :meth:`DataFrame.merge` where specifying both ``right_on`` and ``right_index`` did not raise a ``MergeError`` if ``left_on`` is also specified. Now raises a ``MergeError`` in such cases. (:issue:`63242`) - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`) @@ -1356,11 +1356,10 @@ Sparse - Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) - Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`) - Bug in :meth:`DataFrame.sparse.to_dense` which ignored subclassing and always returned an instance of :class:`DataFrame` (:issue:`59913`) -- Bug in :meth:`cumsum` for integer arrays Calling SparseArray.cumsum caused max recursion depth error. (:issue:`62669`) +- Bug in :meth:`SparseArray.cumsum` with integer data caused max recursion depth error. (:issue:`62669`) ExtensionArray ^^^^^^^^^^^^^^ -- Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`) - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`) - Bug in :meth:`NDArrayBackedExtensionArray.take` which produced arrays whose dtypes didn't match their underlying data, when called with integer arrays (:issue:`62448`) @@ -1368,17 +1367,17 @@ ExtensionArray - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) - Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`) - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) -- Fixed flex arithmetic with :class:`ExtensionArray` operands raising when ``fill_value`` was passed. (:issue:`62467`) +- Fixed flex arithmetic with :class:`.ExtensionArray` operands raising when ``fill_value`` was passed. (:issue:`62467`) Styler ^^^^^^ -- Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed. +- Bug in :meth:`.Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed. Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`) -- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) +- Bug in :func:`eval` on :class:`.ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`) @@ -1399,32 +1398,26 @@ Other - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`) -- Bug in :meth:`MultiIndex.fillna` error message was referring to ``isna`` instead of ``fillna`` (:issue:`60974`) - Bug in :meth:`Series.describe` where median percentile was always included when the ``percentiles`` argument was passed (:issue:`60550`). - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) - Bug in :meth:`Series.kurt` and :meth:`Series.skew` resulting in zero for low variance arrays (:issue:`57972`) +- Bug in :meth:`Series.list` accessor methods not preserving the original :class:`Index`. (:issue:`58425`) +- Bug in :meth:`Series.list` accessor methods not preserving the original name. (:issue:`60522`) - Bug in :meth:`Series.map` with a ``timestamp[pyarrow]`` dtype or ``duration[pyarrow]`` dtype incorrectly returning all-``NaN`` entries (:issue:`61231`) - Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`) +- Bug in :meth:`Series.replace` when the Series was created from an :class:`Index` and Copy-On-Write is enabled (:issue:`61622`) - Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`) -- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) -- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) -- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`) -- Bug in ``Series.replace`` when the Series was created from an :class:`Index` and Copy-On-Write is enabled (:issue:`61622`) - Bug in ``divmod`` and ``rdivmod`` with :class:`DataFrame`, :class:`Series`, and :class:`Index` with ``bool`` dtypes failing to raise, which was inconsistent with ``__floordiv__`` behavior (:issue:`46043`) - Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) - Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`) - Bug when calling :py:func:`copy.copy` on a :class:`DataFrame` or :class:`Series` which would return a deep copy instead of a shallow copy (:issue:`62971`) -- Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`) -- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`NA` values in a :class:`Float64Dtype` object with ``np.nan``; this now works with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`55127`) -- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`np.nan` values in a :class:`Int64Dtype` object with :class:`NA`; this is now a no-op with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`51237`) - Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`) - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`) -- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) - Accessing the underlying NumPy array of a DataFrame or Series will return a read-only array if the array shares data with the original DataFrame or Series (:ref:`copy_on_write_read_only_na`). This logic is expanded to accessing the underlying pandas ExtensionArray @@ -1432,9 +1425,6 @@ Other .. ***DO NOT USE THIS SECTION*** -- -- - .. --------------------------------------------------------------------------- .. _whatsnew_300.contributors: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index f2bf18fba8c42..3263009c3ba2a 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -687,6 +687,11 @@ cdef class BaseOffset: """ Roll provided date backward to next offset only if not on offset. + Parameters + ---------- + dt : datetime or Timestamp + Timestamp to rollback. + Returns ------- TimeStamp @@ -704,6 +709,11 @@ cdef class BaseOffset: """ Roll provided date forward to next offset only if not on offset. + Parameters + ---------- + dt : datetime or Timestamp + Timestamp to rollback. + Returns ------- TimeStamp diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79f1c15d26ee6..7d4c0b6b6e80e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1866,7 +1866,6 @@ def from_arrow( Returns ------- DataFrame - """ pa = import_optional_dependency("pyarrow", min_version="14.0.0") if not isinstance(data, pa.Table): diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 210a177d073f7..386681ceb1cf0 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3780,7 +3780,7 @@ def casefold(self): Series.str.isupper : Check whether all characters are uppercase. Examples - ------------ + -------- The ``s5.str.isascii`` method checks for whether all characters are ascii characters, which includes digits 0-9, capital and lowercase letters A-Z, and some other special characters.