From 5dd77866c28d1a95958628ba0412d1dfdb10f99b Mon Sep 17 00:00:00 2001 From: Venkat <87000728+kpvenkat47@users.noreply.github.com> Date: Tue, 18 Mar 2025 19:25:39 +0530 Subject: [PATCH 1/8] Fix from records() to preserve columns when nrows=0 --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f65277f660f7..7ee8992e1c39d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2231,7 +2231,7 @@ def maybe_reorder( if is_iterator(data): if nrows == 0: - return cls() + return cls(columns=columns) try: first_row = next(data) From cc21929647532edd4eff03401c863cb33349cc09 Mon Sep 17 00:00:00 2001 From: kpvenkat47 Date: Tue, 18 Mar 2025 20:43:36 +0530 Subject: [PATCH 2/8] Update empty DataFrame initialization to preserve columns - Changed 'if nrows == 0' to return Cls(columns=columns) in core/frame.py. - Added test to verify column preservation. --- pandas/core/frame.py | 2 +- pandas/core/frame_test_constructors.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 pandas/core/frame_test_constructors.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f65277f660f7..7ee8992e1c39d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2231,7 +2231,7 @@ def maybe_reorder( if is_iterator(data): if nrows == 0: - return cls() + return cls(columns=columns) try: first_row = next(data) diff --git a/pandas/core/frame_test_constructors.py b/pandas/core/frame_test_constructors.py new file mode 100644 index 0000000000000..b542c8b21ee79 --- /dev/null +++ b/pandas/core/frame_test_constructors.py @@ -0,0 +1,7 @@ +import pandas as pd +def test_empty_df_preserve_col(): + rows = [] + df = pd.DataFrame.from_records(iter(rows), columns=['col_1', 'Col_2'], nrows=0) + assert list(df.columns)==['col_1', 'Col_2'] + assert len(df) == 0 + \ No newline at end of file From e2fbcebd61b1bc5d68489b75e94bd3d0338c671a Mon Sep 17 00:00:00 2001 From: kpvenkat47 Date: Wed, 26 Mar 2025 18:50:40 +0530 Subject: [PATCH 3/8] Updated PR #6114 based on review feedback --- pandas/tests/frame/test_constructors.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 037a2ae294bb2..400a36305bb96 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2780,6 +2780,12 @@ def test_construction_nan_value_timedelta64_dtype(self): ) tm.assert_frame_equal(result, expected) + def test_from_records_empty_iterator_with_preserve_columns(self): + + rows = [] + df = pd.DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) + assert list(df.columns) == ["col_1", "Col_2"] + assert len(df) == 0 class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): From 5dcc1ac81005c5139d86705d46f615cf45ee9bfc Mon Sep 17 00:00:00 2001 From: kpvenkat47 Date: Tue, 1 Apr 2025 18:43:54 +0530 Subject: [PATCH 4/8] All reviews are updated.Please check --- pandas/core/frame_test_constructors.py | 7 ------- pandas/tests/frame/constructors/test_from_records.py | 8 ++++++++ pandas/tests/frame/test_constructors.py | 8 +------- 3 files changed, 9 insertions(+), 14 deletions(-) delete mode 100644 pandas/core/frame_test_constructors.py diff --git a/pandas/core/frame_test_constructors.py b/pandas/core/frame_test_constructors.py deleted file mode 100644 index b542c8b21ee79..0000000000000 --- a/pandas/core/frame_test_constructors.py +++ /dev/null @@ -1,7 +0,0 @@ -import pandas as pd -def test_empty_df_preserve_col(): - rows = [] - df = pd.DataFrame.from_records(iter(rows), columns=['col_1', 'Col_2'], nrows=0) - assert list(df.columns)==['col_1', 'Col_2'] - assert len(df) == 0 - \ No newline at end of file diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 1d4a2c0075e3e..57bd7a3c877ea 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -492,3 +492,11 @@ def test_from_records_structured_array(self): expected_result = DataFrame(modified_data) tm.assert_frame_equal(actual_result, expected_result) + + + def test_from_records_empty_iterator_with_preserve_columns(self): + # GH#61140 + rows = [] + result = DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) + expected = DataFrame([],columns=["col_1", "Col_2"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 400a36305bb96..18f0d4a864c04 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2779,13 +2779,7 @@ def test_construction_nan_value_timedelta64_dtype(self): ["NaT", "0 days 00:00:00.000000001"], dtype="timedelta64[ns]" ) tm.assert_frame_equal(result, expected) - - def test_from_records_empty_iterator_with_preserve_columns(self): - - rows = [] - df = pd.DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) - assert list(df.columns) == ["col_1", "Col_2"] - assert len(df) == 0 + class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): From 241f141b424a8f3b4e8d37b09d9467be83831ec2 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 10 Aug 2025 16:07:54 +0300 Subject: [PATCH 5/8] Format --- pandas/tests/frame/constructors/test_from_records.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 57bd7a3c877ea..bc57680059a9a 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -492,11 +492,10 @@ def test_from_records_structured_array(self): expected_result = DataFrame(modified_data) tm.assert_frame_equal(actual_result, expected_result) - - + def test_from_records_empty_iterator_with_preserve_columns(self): # GH#61140 rows = [] result = DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) - expected = DataFrame([],columns=["col_1", "Col_2"]) + expected = DataFrame([], columns=["col_1", "Col_2"]) tm.assert_frame_equal(result, expected) From 41bb87c7511179bfc47bfa7a45f52f349b9f642b Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 10 Aug 2025 16:17:49 +0300 Subject: [PATCH 6/8] Add note --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 834477f2aa46a..5a05d43ec2d23 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -801,6 +801,7 @@ I/O - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) +- Bug in :meth:`DataFrame.from_records` ignoring ``columns`` parameter when ``data`` is an iterator and ``nrows=0``. (:issue:`61140`) - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) From e1172a3f521ae5d4a6ff2fb5d556121309532653 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Mon, 11 Aug 2025 22:11:55 +0300 Subject: [PATCH 7/8] Preserve index --- pandas/core/frame.py | 2 +- pandas/tests/frame/constructors/test_from_records.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index be4bfcd987db4..bc90a8a67f321 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2234,7 +2234,7 @@ def maybe_reorder( if is_iterator(data): if nrows == 0: - return cls(columns=columns) + return cls(index=index, columns=columns) try: first_row = next(data) diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index bc57680059a9a..b16c3326e82f2 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -496,6 +496,8 @@ def test_from_records_structured_array(self): def test_from_records_empty_iterator_with_preserve_columns(self): # GH#61140 rows = [] - result = DataFrame.from_records(iter(rows), columns=["col_1", "Col_2"], nrows=0) - expected = DataFrame([], columns=["col_1", "Col_2"]) + result = DataFrame.from_records( + iter(rows), index=[0, 1], columns=["col_1", "Col_2"], nrows=0 + ) + expected = DataFrame([], index=[0, 1], columns=["col_1", "Col_2"]) tm.assert_frame_equal(result, expected) From f8c882026aa636ba2fc3868df51c830266a5ff00 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Mon, 11 Aug 2025 22:12:39 +0300 Subject: [PATCH 8/8] Preserve index --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5a05d43ec2d23..085f760b02c55 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -801,7 +801,7 @@ I/O - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) -- Bug in :meth:`DataFrame.from_records` ignoring ``columns`` parameter when ``data`` is an iterator and ``nrows=0``. (:issue:`61140`) +- Bug in :meth:`DataFrame.from_records` ignoring ``columns`` and ``index`` parameters when ``data`` is an empty iterator and ``nrows=0``. (:issue:`61140`) - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)