From 218f840d2ec0eebe5deda06113cceacde8563fcc Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Tue, 28 Oct 2025 19:02:20 +0800 Subject: [PATCH 01/15] TST: Every function ensure_clean in the file /pandas/tests/io/test_common.py and /pandas/tests/io/pytables/test_store.py has been replaced. --- pandas/tests/io/pytables/test_store.py | 80 +++++------ pandas/tests/io/test_common.py | 190 ++++++++++++------------- 2 files changed, 129 insertions(+), 141 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 5cfefeb469e8a..3525b4d33dd41 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -41,22 +41,21 @@ tables = pytest.importorskip("tables") -def test_context(setup_path): - with tm.ensure_clean(setup_path) as path: - try: - with HDFStore(path) as tbl: - raise ValueError("blah") - except ValueError: - pass - with tm.ensure_clean(setup_path) as path: - with HDFStore(path) as tbl: - tbl["a"] = DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=Index(list("ABCD"), dtype=object), - index=Index([f"i-{i}" for i in range(30)], dtype=object), - ) - assert len(tbl) == 1 - assert type(tbl["a"]) == DataFrame +def test_context(setup_path, tmp_path): + + try: + with HDFStore(tmp_path / setup_path) as tbl: + raise ValueError("blah") + except ValueError: + pass + with HDFStore(tmp_path / setup_path) as tbl: + tbl["a"] = DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=Index(list("ABCD"), dtype=object), + index=Index([f"i-{i}" for i in range(30)], dtype=object), + ) + assert len(tbl) == 1 + assert type(tbl["a"]) == DataFrame def test_no_track_times(tmp_path, setup_path): @@ -971,37 +970,36 @@ def test_pickle_path_localpath(): @pytest.mark.parametrize("propindexes", [True, False]) -def test_copy(propindexes): +def test_copy(propindexes, temp_file): df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD")), index=Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with HDFStore(path) as st: - st.append("df", df, data_columns=["A"]) - with tempfile.NamedTemporaryFile() as new_f: - with HDFStore(path) as store: - with contextlib.closing( - store.copy(new_f.name, keys=None, propindexes=propindexes) - ) as tstore: - # check keys - keys = store.keys() - assert set(keys) == set(tstore.keys()) - # check indices & nrows - for k in tstore.keys(): - if tstore.get_storer(k).is_table: - new_t = tstore.get_storer(k) - orig_t = store.get_storer(k) - - assert orig_t.nrows == new_t.nrows - - # check propindixes - if propindexes: - for a in orig_t.axes: - if a.is_indexed: - assert new_t[a.name].is_indexed + with HDFStore(temp_file) as st: + st.append("df", df, data_columns=["A"]) + with tempfile.NamedTemporaryFile() as new_f: + with HDFStore(temp_file) as store: + with contextlib.closing( + store.copy(new_f.name, keys=None, propindexes=propindexes) + ) as tstore: + # check keys + keys = store.keys() + assert set(keys) == set(tstore.keys()) + # check indices & nrows + for k in tstore.keys(): + if tstore.get_storer(k).is_table: + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) + + assert orig_t.nrows == new_t.nrows + + # check propindixes + if propindexes: + for a in orig_t.axes: + if a.is_indexed: + assert new_t[a.name].is_indexed def test_duplicate_column_name(tmp_path, setup_path): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ab27fda8dcdf5..6798b9fb7e7f1 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -86,12 +86,11 @@ def test_stringify_path_fspath(self): result = icom.stringify_path(p) assert result == "foo/bar.csv" - def test_stringify_file_and_path_like(self): + def test_stringify_file_and_path_like(self, temp_file): # GH 38125: do not stringify file objects that are also path-like fsspec = pytest.importorskip("fsspec") - with tm.ensure_clean() as path: - with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: - assert fsspec_obj == icom.stringify_path(fsspec_obj) + with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj: + assert fsspec_obj == icom.stringify_path(fsspec_obj) @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path]) def test_infer_compression_from_path(self, compression_format, path_type): @@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath): ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), ], ) - def test_write_fspath_all(self, writer_name, writer_kwargs, module): + def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path): if writer_name in ["to_latex"]: # uses Styler implementation pytest.importorskip("jinja2") - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + string = str(tmp_path / "string") + fspath = str(tmp_path / "fspath") df = pd.DataFrame({"A": [1, 2]}) - with p1 as string, p2 as fspath: - pytest.importorskip(module) - mypath = CustomFSPath(fspath) - writer = getattr(df, writer_name) - - writer(string, **writer_kwargs) - writer(mypath, **writer_kwargs) - with open(string, "rb") as f_str, open(fspath, "rb") as f_path: - if writer_name == "to_excel": - # binary representation of excel contains time creation - # data that causes flaky CI failures - result = pd.read_excel(f_str, **writer_kwargs) - expected = pd.read_excel(f_path, **writer_kwargs) - tm.assert_frame_equal(result, expected) - else: - result = f_str.read() - expected = f_path.read() - assert result == expected - - def test_write_fspath_hdf5(self): + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + writer(mypath, **writer_kwargs) + with open(string, "rb") as f_str, open(fspath, "rb") as f_path: + if writer_name == "to_excel": + # binary representation of excel contains time creation + # data that causes flaky CI failures + result = pd.read_excel(f_str, **writer_kwargs) + expected = pd.read_excel(f_path, **writer_kwargs) + tm.assert_frame_equal(result, expected) + else: + result = f_str.read() + expected = f_path.read() + assert result == expected + + def test_write_fspath_hdf5(self, tmp_path): # Same test as write_fspath_all, except HDF5 files aren't # necessarily byte-for-byte identical for a given dataframe, so we'll # have to read and compare equality pytest.importorskip("tables") df = pd.DataFrame({"A": [1, 2]}) - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + string = str(tmp_path / "string") + fspath = str(tmp_path / "fspath") - with p1 as string, p2 as fspath: - mypath = CustomFSPath(fspath) - df.to_hdf(mypath, key="bar") - df.to_hdf(string, key="bar") + mypath = CustomFSPath(fspath) + df.to_hdf(mypath, key="bar") + df.to_hdf(string, key="bar") - result = pd.read_hdf(fspath, key="bar") - expected = pd.read_hdf(string, key="bar") + result = pd.read_hdf(fspath, key="bar") + expected = pd.read_hdf(string, key="bar") tm.assert_frame_equal(result, expected) @@ -432,35 +429,33 @@ def test_next(self, mmap_file): with pytest.raises(StopIteration, match=r"^$"): next(wrapper) - def test_unknown_engine(self): - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path) - with pytest.raises(ValueError, match="Unknown engine"): - pd.read_csv(path, engine="pyt") - - def test_binary_mode(self): + def test_unknown_engine(self, temp_file): + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(temp_file) + with pytest.raises(ValueError, match="Unknown engine"): + pd.read_csv(temp_file, engine="pyt") + + def test_binary_mode(self, temp_file): """ 'encoding' shouldn't be passed to 'open' in binary mode. GH 35058 """ - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path, mode="w+b") - tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(temp_file, mode="w+b") + tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0)) @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"]) @pytest.mark.parametrize("compression_", ["bz2", "xz"]) - def test_warning_missing_utf_bom(self, encoding, compression_): + def test_warning_missing_utf_bom(self, encoding, compression_, temp_file): """ bz2 and xz do not write the byte order mark (BOM) for utf-16/32. @@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_): columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): - df.to_csv(path, compression=compression_, encoding=encoding) + with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): + df.to_csv(temp_file, compression=compression_, encoding=encoding) - # reading should fail (otherwise we wouldn't need the warning) - msg = ( - r"UTF-\d+ stream does not start with BOM|" - r"'utf-\d+' codec can't decode byte" - ) - with pytest.raises(UnicodeError, match=msg): - pd.read_csv(path, compression=compression_, encoding=encoding) + # reading should fail (otherwise we wouldn't need the warning) + msg = ( + r"UTF-\d+ stream does not start with BOM|" + r"'utf-\d+' codec can't decode byte" + ) + with pytest.raises(UnicodeError, match=msg): + pd.read_csv(temp_file, compression=compression_, encoding=encoding) def test_is_fsspec_url(): @@ -514,38 +508,36 @@ def test_is_fsspec_url_chained(): @pytest.mark.parametrize("format", ["csv", "json"]) -def test_codecs_encoding(format): +def test_codecs_encoding(format, temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, mode="w", encoding="utf-8") as handle: - getattr(expected, f"to_{format}")(handle) - with open(path, encoding="utf-8") as handle: - if format == "csv": - df = pd.read_csv(handle, index_col=0) - else: - df = pd.read_json(handle) + with open(temp_file, mode="w", encoding="utf-8") as handle: + getattr(expected, f"to_{format}")(handle) + with open(temp_file, encoding="utf-8") as handle: + if format == "csv": + df = pd.read_csv(handle, index_col=0) + else: + df = pd.read_json(handle) tm.assert_frame_equal(expected, df) -def test_codecs_get_writer_reader(): +def test_codecs_get_writer_reader(temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, "wb") as handle: - with codecs.getwriter("utf-8")(handle) as encoded: - expected.to_csv(encoded) - with open(path, "rb") as handle: - with codecs.getreader("utf-8")(handle) as encoded: - df = pd.read_csv(encoded, index_col=0) + with open(temp_file, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(temp_file, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) tm.assert_frame_equal(expected, df) @@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg): @pytest.mark.parametrize("encoding_errors", ["strict", "replace"]) @pytest.mark.parametrize("format", ["csv", "json"]) -def test_encoding_errors(encoding_errors, format): +def test_encoding_errors(encoding_errors, format, temp_file): # GH39450 msg = "'utf-8' codec can't decode byte" bad_encoding = b"\xe4" @@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format): + b'"}}' ) reader = partial(pd.read_json, orient="index") - with tm.ensure_clean() as path: - file = Path(path) - file.write_bytes(content) + file = Path(temp_file) + file.write_bytes(content) - if encoding_errors != "replace": - with pytest.raises(UnicodeDecodeError, match=msg): - reader(path, encoding_errors=encoding_errors) - else: - df = reader(path, encoding_errors=encoding_errors) - decoded = bad_encoding.decode(errors=encoding_errors) - expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) - tm.assert_frame_equal(df, expected) + if encoding_errors != "replace": + with pytest.raises(UnicodeDecodeError, match=msg): + reader(temp_file, encoding_errors=encoding_errors) + else: + df = reader(temp_file, encoding_errors=encoding_errors) + decoded = bad_encoding.decode(errors=encoding_errors) + expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) + tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("encoding_errors", [0, None]) @@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors): reader(content) -def test_bad_encdoing_errors(): +def test_bad_encdoing_errors(temp_file): # GH 39777 - with tm.ensure_clean() as path: - with pytest.raises(LookupError, match="unknown error handler name"): - icom.get_handle(path, "w", errors="bad") + with pytest.raises(LookupError, match="unknown error handler name"): + icom.get_handle(temp_file, "w", errors="bad") @pytest.mark.skipif(WASM, reason="limited file system access on WASM") @@ -653,7 +643,7 @@ def close(self): @pytest.mark.parametrize("compression", [None, "infer"]) def test_read_csv_chained_url_no_error(compression): # GH 60100 - tar_file_path = "pandas/tests/io/data/tar/test-csv.tar" + tar_file_path = "data/tar/test-csv.tar" chained_file_url = f"tar://test.csv::file://{tar_file_path}" result = pd.read_csv(chained_file_url, compression=compression, sep=";") From 085ac8773bde0fd4180c4a51fc969795499e2649 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Tue, 28 Oct 2025 19:14:09 +0800 Subject: [PATCH 02/15] Update: precommit passed. --- pandas/tests/io/pytables/test_store.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 3525b4d33dd41..688fedb55ca5a 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -42,7 +42,6 @@ def test_context(setup_path, tmp_path): - try: with HDFStore(tmp_path / setup_path) as tbl: raise ValueError("blah") From 798eba003198a33e7c24c49464cfa9af7184693d Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Tue, 28 Oct 2025 19:39:29 +0800 Subject: [PATCH 03/15] Update. --- pandas/tests/io/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 6798b9fb7e7f1..84d5de51ff709 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -643,7 +643,7 @@ def close(self): @pytest.mark.parametrize("compression", [None, "infer"]) def test_read_csv_chained_url_no_error(compression): # GH 60100 - tar_file_path = "data/tar/test-csv.tar" + tar_file_path = "pandas/tests/io/data/tar/test-csv.tar" chained_file_url = f"tar://test.csv::file://{tar_file_path}" result = pd.read_csv(chained_file_url, compression=compression, sep=";") From 01c5281cc8d1fe41d6023d49a69330fe0f80f898 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Thu, 30 Oct 2025 17:16:25 +0800 Subject: [PATCH 04/15] Fixed --- pandas/tests/io/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 84d5de51ff709..5ff7ad5a4e785 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -583,7 +583,7 @@ def test_encoding_errors(encoding_errors, format, temp_file): + b'"}}' ) reader = partial(pd.read_json, orient="index") - file = Path(temp_file) + file = temp_file file.write_bytes(content) if encoding_errors != "replace": From bf598ce09bf3847a7ce6aa209e04c5e9aa4ab962 Mon Sep 17 00:00:00 2001 From: Nikhil <72097440+Nikhil-Narayanan@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:50:56 +0000 Subject: [PATCH 05/15] TST: ensuring bins in cut during tiling are predictably small (#62868) --- pandas/tests/reshape/test_cut.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 10335ff716c1f..5b535ad0061ba 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -410,6 +410,22 @@ def test_single_bin(data, length): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "values,threshold", + [ + ([0.1, 0.1, 0.1], 0.001), # small positive values + ([-0.1, -0.1, -0.1], 0.001), # negative values + ([0.01, 0.01, 0.01], 0.0001), # very small values + ], +) +def test_single_bin_edge_adjustment(values, threshold): + # gh-58517 - edge adjustment mutation when all values are same + result, bins = cut(values, 3, retbins=True) + + bin_range = bins[-1] - bins[0] + assert bin_range < threshold + + @pytest.mark.parametrize( "array_1_writeable,array_2_writeable", [(True, True), (True, False), (False, False)] ) From 0eed2c83e76849b8bb4172a8dc8af09e12f7716c Mon Sep 17 00:00:00 2001 From: Zorex Salvo Date: Thu, 30 Oct 2025 01:01:06 +0800 Subject: [PATCH 06/15] STY: use strict zip in `pandas/tests/window` (#62852) --- pandas/tests/window/test_cython_aggregations.py | 2 +- pandas/tests/window/test_expanding.py | 4 ++-- pandas/tests/window/test_rolling.py | 15 +++++++++------ pyproject.toml | 3 --- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/window/test_cython_aggregations.py b/pandas/tests/window/test_cython_aggregations.py index 39811ea3ec5b9..2e23618a3a201 100644 --- a/pandas/tests/window/test_cython_aggregations.py +++ b/pandas/tests/window/test_cython_aggregations.py @@ -62,7 +62,7 @@ def _get_rolling_aggregations(): ] ) # unzip to a list of 2 tuples, names and functions - unzipped = list(zip(*named_roll_aggs)) + unzipped = list(zip(*named_roll_aggs, strict=True)) return {"ids": unzipped[0], "params": unzipped[1]} diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 2c96ce01c6328..ddcdf07beeb4c 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -180,7 +180,7 @@ def test_iter_expanding_dataframe(df, expected, min_periods): df = DataFrame(df) expecteds = [DataFrame(values, index=index) for (values, index) in expected] - for expected, actual in zip(expecteds, df.expanding(min_periods)): + for expected, actual in zip(expecteds, df.expanding(min_periods), strict=False): tm.assert_frame_equal(actual, expected) @@ -199,7 +199,7 @@ def test_iter_expanding_series(ser, expected, min_periods): # GH 11704 expecteds = [Series(values, index=index) for (values, index) in expected] - for expected, actual in zip(expecteds, ser.expanding(min_periods)): + for expected, actual in zip(expecteds, ser.expanding(min_periods), strict=True): tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 18aafa0d7b71e..3003b142edd3b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -764,7 +764,9 @@ def test_iter_rolling_dataframe(df, expected, window, min_periods): df = DataFrame(df) expecteds = [DataFrame(values, index=index) for (values, index) in expected] - for expected, actual in zip(expecteds, df.rolling(window, min_periods=min_periods)): + for expected, actual in zip( + expecteds, df.rolling(window, min_periods=min_periods), strict=False + ): tm.assert_frame_equal(actual, expected) @@ -810,7 +812,7 @@ def test_iter_rolling_on_dataframe(expected, window): expecteds = [ DataFrame(values, index=df.loc[index, "C"]) for (values, index) in expected ] - for expected, actual in zip(expecteds, df.rolling(window, on="C")): + for expected, actual in zip(expecteds, df.rolling(window, on="C"), strict=False): tm.assert_frame_equal(actual, expected) @@ -819,7 +821,7 @@ def test_iter_rolling_on_dataframe_unordered(): df = DataFrame({"a": ["x", "y", "x"], "b": [0, 1, 2]}) results = list(df.groupby("a").rolling(2)) expecteds = [df.iloc[idx, [1]] for idx in [[0], [0, 2], [1]]] - for result, expected in zip(results, expecteds): + for result, expected in zip(results, expecteds, strict=True): tm.assert_frame_equal(result, expected) @@ -861,7 +863,7 @@ def test_iter_rolling_series(ser, expected, window, min_periods): expecteds = [Series(values, index=index) for (values, index) in expected] for expected, actual in zip( - expecteds, ser.rolling(window, min_periods=min_periods) + expecteds, ser.rolling(window, min_periods=min_periods), strict=True ): tm.assert_series_equal(actual, expected) @@ -909,10 +911,11 @@ def test_iter_rolling_datetime(expected, expected_index, window): ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) expecteds = [ - Series(values, index=idx) for (values, idx) in zip(expected, expected_index) + Series(values, index=idx) + for (values, idx) in zip(expected, expected_index, strict=True) ] - for expected, actual in zip(expecteds, ser.rolling(window)): + for expected, actual in zip(expecteds, ser.rolling(window), strict=True): tm.assert_series_equal(actual, expected) diff --git a/pyproject.toml b/pyproject.toml index 094d0b44a6721..49bb49aab03c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -591,9 +591,6 @@ exclude = [ "pandas/tests/tseries/offsets/test_month.py" = ["B905"] "pandas/tests/tseries/offsets/test_offsets.py" = ["B905"] "pandas/tests/util/test_validate_kwargs.py" = ["B905"] -"pandas/tests/window/test_cython_aggregations.py" = ["B905"] -"pandas/tests/window/test_expanding.py" = ["B905"] -"pandas/tests/window/test_rolling.py" = ["B905"] "scripts/validate_unwanted_patterns.py" = ["B905"] [tool.ruff.lint.flake8-pytest-style] From dd87cbe431666bb618757d95c31e1892dd111bc3 Mon Sep 17 00:00:00 2001 From: "Christine P. Chai" Date: Wed, 29 Oct 2025 10:12:06 -0700 Subject: [PATCH 07/15] DOC: Correct grammatical issues with a/an usage (#62904) --- doc/make.py | 2 +- pandas/_config/localization.py | 2 +- pandas/core/arrays/categorical.py | 6 +++--- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/dtypes/base.py | 2 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/internals/construction.py | 2 +- pandas/core/window/expanding.py | 2 +- pandas/core/window/numba_.py | 2 +- pandas/io/excel/_odswriter.py | 2 +- pandas/io/json/_normalize.py | 2 +- pandas/plotting/_misc.py | 4 ++-- pandas/tests/indexes/datetimes/test_date_range.py | 2 +- pandas/tests/reshape/test_melt.py | 2 +- pandas/tests/scalar/timestamp/test_constructors.py | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/doc/make.py b/doc/make.py index 9542563dc037b..9e627f9a8137b 100755 --- a/doc/make.py +++ b/doc/make.py @@ -105,7 +105,7 @@ def _process_single_doc(self, single_doc): @staticmethod def _run_os(*args) -> None: """ - Execute a command as a OS terminal. + Execute a command as an OS terminal. Parameters ---------- diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 6602633f20399..8e2af78f68eb2 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -79,7 +79,7 @@ def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool: with set_locale(lc, lc_var=lc_var): pass except (ValueError, locale.Error): - # horrible name for a Exception subclass + # horrible name for an Exception subclass return False else: return True diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6f572d0f72df5..41e5c6f65dbb9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -269,7 +269,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi categories are assumed to be the unique values of `values` (sorted, if possible, otherwise in the order in which they appear). ordered : bool, default False - Whether or not this categorical is treated as a ordered categorical. + Whether or not this categorical is treated as an ordered categorical. If True, the resulting categorical will be ordered. An ordered categorical respects, when sorted, the order of its `categories` attribute (which in turn is the `categories` argument, if @@ -1103,7 +1103,7 @@ def set_categories( new_categories : Index-like The categories in new order. ordered : bool, default None - Whether or not the categorical is treated as a ordered categorical. + Whether or not the categorical is treated as an ordered categorical. If not given, do not change the ordered information. rename : bool, default False Whether or not the new_categories should be considered as a rename @@ -1277,7 +1277,7 @@ def reorder_categories(self, new_categories, ordered=None) -> Self: new_categories : Index-like The categories in new order. ordered : bool, optional - Whether or not the categorical is treated as a ordered categorical. + Whether or not the categorical is treated as an ordered categorical. If not given, do not change the ordered information. Returns diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9b3452c318f9c..c081d6190204e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1212,7 +1212,7 @@ def _objects_to_td64ns( data, unit=None, errors: DateTimeErrorChoices = "raise" ) -> np.ndarray: """ - Convert a object-dtyped or string-dtyped array into an + Convert an object-dtyped or string-dtyped array into a timedelta64[ns]-dtyped array. Parameters diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index ee48cf45c6c9f..59f360650ff8c 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -517,7 +517,7 @@ class Registry: """ Registry for dtype inference. - The registry allows one to map a string repr of a extension + The registry allows one to map a string repr of an extension dtype to an extension dtype. The string alias can be used in several places, including diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 6d99f9df73282..2e3d73edcdf4f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -176,7 +176,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): The categories are stored in an Index, and if an index is provided the dtype of that index will be used. ordered : bool or None, default False - Whether or not this categorical is treated as a ordered categorical. + Whether or not this categorical is treated as an ordered categorical. None can be used to maintain the ordered value of existing categoricals when used in operations that combine categoricals, e.g. astype, and will resolve to False if there is no existing ordered to maintain. diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 84ec9d2a344f5..8db37f24cdbb9 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -215,7 +215,7 @@ def ndarray_to_mgr( # GH#19157 if isinstance(values, (np.ndarray, ExtensionArray)) and values.ndim > 1: - # GH#12513 a EA dtype passed with a 2D array, split into + # GH#12513 an EA dtype passed with a 2D array, split into # multiple EAs that view the values # error: No overload variant of "__getitem__" of "ExtensionArray" # matches argument type "Tuple[slice, int]" diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 567ede27949d5..2527a5dd508d8 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -1454,7 +1454,7 @@ def corr( class ExpandingGroupby(BaseWindowGroupby, Expanding): """ - Provide a expanding groupby implementation. + Provide an expanding groupby implementation. """ __module__ = "pandas.api.typing" diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 171d3bc1d1c35..89ad94a6ccab2 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -189,7 +189,7 @@ def generate_numba_table_func( """ Generate a numba jitted function to apply window calculations table-wise. - Func will be passed a M window size x N number of columns array, and + Func will be passed an M window size x N number of columns array, and must return a 1 x N number of columns array. 1. jit the user's function diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index ba4919c9298ed..e9a06076f3aff 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -246,7 +246,7 @@ def _process_style(self, style: dict[str, Any]) -> str: ... def _process_style(self, style: None) -> None: ... def _process_style(self, style: dict[str, Any] | None) -> str | None: - """Convert a style dictionary to a OpenDocument style sheet + """Convert a style dictionary to an OpenDocument style sheet Parameters ---------- diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 16ec73ddeb743..583db03262769 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -219,7 +219,7 @@ def _simple_json_normalize( sep: str = ".", ) -> dict | list[dict] | Any: """ - A optimized basic json_normalize + An optimized basic json_normalize Converts a nested dict into a flat dict ("record"), unlike json_normalize and nested_to_record it doesn't do anything clever. diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 42b02ce99cb3c..4c445c570ae33 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -282,12 +282,12 @@ def radviz( """ Plot a multidimensional dataset in 2D. - Each Series in the DataFrame is represented as a evenly distributed + Each Series in the DataFrame is represented as an evenly distributed slice on a circle. Each data point is rendered in the circle according to the value on each Series. Highly correlated `Series` in the `DataFrame` are placed closer on the unit circle. - RadViz allow to project a N-dimensional data set into a 2D space where the + RadViz allow to project an N-dimensional data set into a 2D space where the influence of each dimension can be interpreted as a balance between the influence of all dimensions. diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 85e2f6a8070e0..45f43f9bf9760 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -215,7 +215,7 @@ def test_date_range_multiplication_overflow(self): # check that overflows in calculating `addend = periods * stride` # are caught with tm.assert_produces_warning(None): - # we should _not_ be seeing a overflow RuntimeWarning + # we should _not_ be seeing an overflow RuntimeWarning dti = date_range(start="1677-09-22", periods=213503, freq="D") assert dti[0] == Timestamp("1677-09-22") diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 02544c9518d10..fba9c28282e94 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -933,7 +933,7 @@ def test_character_overlap(self): tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) def test_invalid_separator(self): - # if an invalid separator is supplied a empty data frame is returned + # if an invalid separator is supplied an empty data frame is returned sep = "nope!" df = DataFrame( { diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index d9defd47b6620..5c89cb3a66c2f 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -49,7 +49,7 @@ def test_constructor_int_float_with_YM_unit(self, typ): @pytest.mark.parametrize("typ", [int, float]) def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ): - # GH#50870 make sure we get a OutOfBoundsDatetime instead of OverflowError + # GH#50870 make sure we get an OutOfBoundsDatetime instead of OverflowError val = typ(150000000000000) msg = f"cannot convert input {val} with the unit 'D'" From 8cab820d8a87c1f770edba729f892403a9f4afd5 Mon Sep 17 00:00:00 2001 From: Aniket <148300120+Aniketsy@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:51:57 +0530 Subject: [PATCH 08/15] TST : Add comprehensive unit tests for tm.shares_memory utility function (GH#55372) (#62291) --- pandas/tests/util/test_shares_memory.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/util/test_shares_memory.py b/pandas/tests/util/test_shares_memory.py index 8f1ac93b40247..94bc51dca3f60 100644 --- a/pandas/tests/util/test_shares_memory.py +++ b/pandas/tests/util/test_shares_memory.py @@ -30,3 +30,17 @@ def test_shares_memory_string(): obj = pd.array(["a", "b"], dtype=pd.ArrowDtype(pa.string())) assert tm.shares_memory(obj, obj) + + +def test_shares_memory_numpy(): + arr = np.arange(10) + view = arr[:5] + assert tm.shares_memory(arr, view) + arr2 = np.arange(10) + assert not tm.shares_memory(arr, arr2) + + +def test_shares_memory_rangeindex(): + idx = pd.RangeIndex(10) + arr = np.arange(10) + assert not tm.shares_memory(idx, arr) From 2b97516e7f646f0c3e297cc56aba57b2628d9681 Mon Sep 17 00:00:00 2001 From: Wang Haoxiang <2795352227@qq,com> Date: Thu, 30 Oct 2025 02:36:01 +0800 Subject: [PATCH 09/15] STY: Add strict=True to zip() calls in pandas\tests\arrays (#62905) Co-authored-by: wdyy20041223 <2795352227@qq,com> --- pandas/tests/arrays/sparse/test_constructors.py | 5 ++++- pandas/tests/arrays/test_datetimes.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_constructors.py b/pandas/tests/arrays/sparse/test_constructors.py index 0bf3ab77e9eed..c6099ea48cccb 100644 --- a/pandas/tests/arrays/sparse/test_constructors.py +++ b/pandas/tests/arrays/sparse/test_constructors.py @@ -77,7 +77,10 @@ def test_constructor_object_dtype_bool_fill(self): assert arr.dtype == SparseDtype(object, False) assert arr.fill_value is False arr_expected = np.array(data, dtype=object) - it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) + it = ( + type(x) == type(y) and x == y + for x, y in zip(arr, arr_expected, strict=True) + ) assert np.fromiter(it, dtype=np.bool_).all() @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 199e3572732a0..5a7cad77a9de0 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -144,7 +144,7 @@ def test_iter(self, dta): def test_astype_object(self, dta): result = dta.astype(object) assert all(x._creso == dta._creso for x in result) - assert all(x == y for x, y in zip(result, dta)) + assert all(x == y for x, y in zip(result, dta, strict=True)) def test_to_pydatetime(self, dta_dti): dta, dti = dta_dti From 0eba9c431b3476aec8a9ad1031164d00d361c78f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Wed, 29 Oct 2025 19:37:24 +0100 Subject: [PATCH 10/15] DOC: inconsistent default values in the initialiser for RangeIndex (#62901) --- pandas/core/indexes/range.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ac794b18075d0..7a025cdd5fb68 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -88,13 +88,13 @@ class RangeIndex(Index): Parameters ---------- - start : int (default: 0), range, or other RangeIndex instance + start : int, range, or other RangeIndex instance, default None If int and "stop" is not given, interpreted as "stop" instead. - stop : int (default: 0) + stop : int, default None The end value of the range (exclusive). - step : int (default: 1) + step : int, default None The step size of the range. - dtype : np.int64 + dtype : np.int64, default None Unused, accepted for homogeneity with other index types. copy : bool, default False Unused, accepted for homogeneity with other index types. From 1fc35cb94b85a75d2e8050fa924ed63c3227b437 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 29 Oct 2025 11:39:13 -0700 Subject: [PATCH 11/15] BUG: rename with Series with non-unique index (#62906) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/generic.py | 4 +++ pandas/tests/frame/methods/test_rename.py | 31 +++++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 126a5e10c0e85..33464a73e7821 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1236,6 +1236,7 @@ Other - Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`) - Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`) - Bug in :meth:`DataFrame.query` which raised an exception when querying integer column names using backticks. (:issue:`60494`) +- Bug in :meth:`DataFrame.rename` and :meth:`Series.rename` when passed a ``mapper``, ``index``, or ``columns`` argument that is a :class:`Series` with non-unique ``ser.index`` producing a corrupted result instead of raising ``ValueError`` (:issue:`58621`) - Bug in :meth:`DataFrame.sample` with ``replace=False`` and ``(n * max(weights) / sum(weights)) > 1``, the method would return biased results. Now raises ``ValueError``. (:issue:`61516`) - Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index aea7a2231582a..b542ca1f431c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1054,6 +1054,10 @@ def _rename( if level is not None: level = ax._get_level_number(level) + if isinstance(replacements, ABCSeries) and not replacements.index.is_unique: + # GH#58621 + raise ValueError("Cannot rename with a Series with non-unique index.") + # GH 13473 if not callable(replacements): if ax._is_multi and level is not None: diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 6153a168476d4..8c02e28bc138c 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -8,6 +8,7 @@ DataFrame, Index, MultiIndex, + Series, merge, ) import pandas._testing as tm @@ -409,3 +410,33 @@ def test_rename_boolean_index(self): index=["foo", "bar", "bah"], ) tm.assert_frame_equal(res, exp) + + def test_rename_non_unique_index_series(self): + # GH#58621 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + orig = df.copy(deep=True) + + rename_series = Series(["X", "Y", "Z", "W"], index=["A", "B", "B", "C"]) + + msg = "Cannot rename with a Series with non-unique index" + with pytest.raises(ValueError, match=msg): + df.rename(rename_series) + with pytest.raises(ValueError, match=msg): + df.rename(columns=rename_series) + with pytest.raises(ValueError, match=msg): + df.rename(columns=rename_series, inplace=True) + + # check we didn't corrupt the original + tm.assert_frame_equal(df, orig) + + # Check the Series method while we're here + ser = df.iloc[0] + with pytest.raises(ValueError, match=msg): + ser.rename(rename_series) + with pytest.raises(ValueError, match=msg): + ser.rename(index=rename_series) + with pytest.raises(ValueError, match=msg): + ser.rename(index=rename_series, inplace=True) + + # check we didn't corrupt the original + tm.assert_series_equal(ser, orig.iloc[0]) From a52a8950188d49452f116bed78fdf7563738e4c7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 29 Oct 2025 12:24:37 -0700 Subject: [PATCH 12/15] TST: fix duplicate parametrization/xfail in test_setitem_integer_with_missing_raises (#62907) --- pandas/tests/extension/base/setitem.py | 26 ++++++++++++------------ pandas/tests/extension/json/test_json.py | 21 ++++++++++++------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index b273c9b9f092a..1265edb02dfa8 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -221,28 +221,28 @@ def test_setitem_integer_array_with_repeats(self, data, idx, box_in_series): tm.assert_equal(arr, expected) @pytest.mark.parametrize( - "idx, box_in_series", + "idx", [ - ([0, 1, 2, pd.NA], False), - pytest.param( - [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") - ), - (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), - # TODO: change False to True? - (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), # noqa: PT014 + [0, 1, 2, pd.NA], + pd.array([0, 1, 2, pd.NA], dtype="Int64"), ], - ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ids=["list", "integer-array"], ) + @pytest.mark.parametrize("box_in_series", [True, False]) def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): arr = data.copy() - # TODO(xfail) this raises KeyError about labels not found (it tries label-based) - # for list of labels with Series + msg = "Cannot index with an integer indexer containing NA values" + err = ValueError + if box_in_series: + # The integer labels are not present in the (string) index, so + # we get KeyErrors arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))]) + msg = "0" + err = KeyError - msg = "Cannot index with an integer indexer containing NA values" - with pytest.raises(ValueError, match=msg): + with pytest.raises(err, match=msg): arr[idx] = arr[0] @pytest.mark.parametrize("as_callable", [True, False]) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index cdb98c5342ecb..aee9df47d6553 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -356,18 +356,25 @@ def test_setitem_integer_array(self, data, idx, box_in_series, request): request.applymarker(mark) super().test_setitem_integer_array(data, idx, box_in_series) - @pytest.mark.xfail(reason="list indices must be integers or slices, not NAType") @pytest.mark.parametrize( - "idx, box_in_series", + "idx", + [ + [0, 1, 2, pd.NA], + pd.array([0, 1, 2, pd.NA], dtype="Int64"), + ], + ids=["list", "integer-array"], + ) + @pytest.mark.parametrize( + "box_in_series", [ - ([0, 1, 2, pd.NA], False), + True, pytest.param( - [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") + False, + marks=pytest.mark.xfail( + reason="list indices must be integers or slices, not NAType" + ), ), - (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), - (pd.array([0, 1, 2, pd.NA], dtype="Int64"), True), ], - ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], ) def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): super().test_setitem_integer_with_missing_raises(data, idx, box_in_series) From 5b6508f5e2eb460a2c540d60a430ec3f4af1da0d Mon Sep 17 00:00:00 2001 From: Aniket <148300120+Aniketsy@users.noreply.github.com> Date: Thu, 30 Oct 2025 02:21:07 +0530 Subject: [PATCH 13/15] BUG: Raise TypeError for mismatched signed/unsigned dtypes in IntervalIndex.from_arrays (#62376) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/interval.py | 12 ++++++++++++ pandas/tests/indexes/interval/test_interval.py | 8 ++++++++ 3 files changed, 21 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 33464a73e7821..5d94d1f87ca38 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1049,6 +1049,7 @@ Interval - Bug in :class:`Index`, :class:`Series`, :class:`DataFrame` constructors when given a sequence of :class:`Interval` subclass objects casting them to :class:`Interval` (:issue:`46945`) - Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`) - Bug in :meth:`IntervalIndex.get_indexer` and :meth:`IntervalIndex.drop` when one of the sides of the index is non-unique (:issue:`52245`) +- Construction of :class:`IntervalArray` and :class:`IntervalIndex` from arrays with mismatched signed/unsigned integer dtypes (e.g., ``int64`` and ``uint64``) now raises a :exc:`TypeError` instead of proceeding silently. (:issue:`55715`) Indexing ^^^^^^^^ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index b0472c70557e5..3e724b176b76d 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -420,6 +420,18 @@ def _ensure_simple_new_inputs( dtype = IntervalDtype(left.dtype, closed=closed) + # Check for mismatched signed/unsigned integer dtypes after casting + left_dtype = left.dtype + right_dtype = right.dtype + if ( + left_dtype.kind in "iu" + and right_dtype.kind in "iu" + and left_dtype.kind != right_dtype.kind + ): + raise TypeError( + f"Left and right arrays must have matching signedness. " + f"Got {left_dtype} and {right_dtype}." + ) return left, right, dtype @classmethod diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 006a06e529971..b302e865eebd1 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -882,6 +882,14 @@ def test_is_all_dates(self): assert not year_2017_index._is_all_dates +def test_from_arrays_mismatched_signedness_raises(): + # GH 55715 + left = np.array([0, 1, 2], dtype="int64") + right = np.array([1, 2, 3], dtype="uint64") + with pytest.raises(TypeError, match="matching signedness"): + IntervalIndex.from_arrays(left, right) + + def test_dir(): # GH#27571 dir(interval_index) should not raise index = IntervalIndex.from_arrays([0, 1], [1, 2]) From 60f18d16f552b698def5d57acfee01fa99712273 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 29 Oct 2025 16:03:29 -0700 Subject: [PATCH 14/15] BUG: retain NAs in ufunc on ArrowEA (#62908) --- pandas/core/arrays/arrow/array.py | 8 ++++++++ pandas/tests/extension/test_arrow.py | 10 ++++++++++ pandas/tests/series/test_npfuncs.py | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 08f1ffdcc6a0d..53c938faf9257 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -829,6 +829,14 @@ def __arrow_array__(self, type=None): """Convert myself to a pyarrow ChunkedArray.""" return self._pa_array + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + # Need to wrap np.array results GH#62800 + result = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) + if type(self) is ArrowExtensionArray: + # Exclude ArrowStringArray + return type(self)._from_sequence(result) + return result + def __array__( self, dtype: NpDtype | None = None, copy: bool | None = None ) -> np.ndarray: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2aa1b658fdf7b..c1e01bbbe57a0 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3800,3 +3800,13 @@ def test_cast_pontwise_result_decimal_nan(): pa_type = result.dtype.pyarrow_dtype assert pa.types.is_decimal(pa_type) + + +def test_ufunc_retains_missing(): + # GH#62800 + ser = pd.Series([0.1, pd.NA], dtype="float64[pyarrow]") + + result = np.sin(ser) + + expected = pd.Series([np.sin(0.1), pd.NA], dtype="float64[pyarrow]") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_npfuncs.py b/pandas/tests/series/test_npfuncs.py index b72ac8efbaa6d..f30c01b496399 100644 --- a/pandas/tests/series/test_npfuncs.py +++ b/pandas/tests/series/test_npfuncs.py @@ -43,7 +43,7 @@ def test_log_arrow_backed_missing_value(using_nan_is_na): ser = Series([1, 2, None], dtype="float64[pyarrow]") if using_nan_is_na: result = np.log(ser) - expected = np.log(Series([1, 2, None], dtype="float64")) + expected = np.log(Series([1, 2, None], dtype="float64[pyarrow]")) tm.assert_series_equal(result, expected) else: # we get cast to object which raises From 1ae04443d643af56c34fc35ae3b2ccdec6743dd0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 29 Oct 2025 16:05:34 -0700 Subject: [PATCH 15/15] BUG: DateOffset(normalize=True).rollback (#62910) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/tslibs/offsets.pyx | 3 +++ pandas/tests/tseries/offsets/test_offsets.py | 16 ---------------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5d94d1f87ca38..2b35abd45a96f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -980,6 +980,7 @@ Datetimelike - Bug in :meth:`DataFrame.fillna` raising an ``AssertionError`` instead of ``OutOfBoundsDatetime`` when filling a ``datetime64[ns]`` column with an out-of-bounds timestamp. Now correctly raises ``OutOfBoundsDatetime``. (:issue:`61208`) - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`) - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) +- Bug in :meth:`DateOffset.rollback` (and subclass methods) with ``normalize=True`` rolling back one offset too long (:issue:`32616`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) - Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index ad3fb72d31559..8fb5e739d3a4e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -692,6 +692,9 @@ cdef class BaseOffset: Rolled timestamp if not on offset, otherwise unchanged timestamp. """ dt = Timestamp(dt) + if self.normalize and (dt - dt.normalize())._value != 0: + # GH#32616 + dt = dt.normalize() if not self.is_on_offset(dt): dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) return dt diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 28badd877fccb..3666342aef433 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -431,22 +431,6 @@ def test_rollback(self, offset_types): for k in norm_expected: norm_expected[k] = Timestamp(norm_expected[k].date()) - normalized = { - "Day": Timestamp("2010-12-31 00:00:00"), - "DateOffset": Timestamp("2010-12-31 00:00:00"), - "MonthBegin": Timestamp("2010-12-01 00:00:00"), - "SemiMonthBegin": Timestamp("2010-12-15 00:00:00"), - "YearBegin": Timestamp("2010-01-01 00:00:00"), - "HalfYearBegin": Timestamp("2010-07-01 00:00:00"), - "Week": Timestamp("2010-12-25 00:00:00"), - "Hour": Timestamp("2011-01-01 00:00:00"), - "Minute": Timestamp("2011-01-01 00:00:00"), - "Second": Timestamp("2011-01-01 00:00:00"), - "Milli": Timestamp("2011-01-01 00:00:00"), - "Micro": Timestamp("2011-01-01 00:00:00"), - } - norm_expected.update(normalized) - sdt = datetime(2011, 1, 1, 9, 0) ndt = np.datetime64("2011-01-01 09:00")