From ea680cdb8859fa12fadbc2455bdaca7d213bca68 Mon Sep 17 00:00:00 2001 From: parthava-adabala Date: Tue, 28 Oct 2025 01:01:48 -0500 Subject: [PATCH 1/2] BUG: Fix DataFrame.from_dict empty row drop --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 21 +++++++++++-- pandas/tests/frame/test_constructors.py | 42 +++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a778e7eed3843..3263f4bf1640b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1179,6 +1179,7 @@ Reshaping - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`) - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`) - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) +- Bug in :meth:`DataFrame.from_dict` where rows corresponding to an empty :class:`Series` or ``dict`` would be dropped when ``orient='index'`` (:issue:`62775`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68ea6795d47dd..4a8cfeb5a7c9f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1914,11 +1914,13 @@ def from_dict( orient = orient.lower() # type: ignore[assignment] if orient == "index": if len(data) > 0: + index = list(data.keys()) # TODO speed up Series case if isinstance(next(iter(data.values())), (Series, dict)): data = _from_nested_dict(data) + if not data and columns is None: + columns = [] else: - index = list(data.keys()) # error: Incompatible types in assignment (expression has type # "List[Any]", variable has type "Dict[Any, Any]") data = list(data.values()) # type: ignore[assignment] @@ -14413,9 +14415,22 @@ def _from_nested_dict( new_data: collections.defaultdict[HashableT2, dict[HashableT, T]] = ( collections.defaultdict(dict) ) + all_cols_dict = {} + for s in data.values(): + if isinstance(s, (dict, ABCSeries)): + all_cols_dict.update(dict.fromkeys(s.keys())) + all_cols_list = list(all_cols_dict.keys()) + if not all_cols_list: + return new_data for index, s in data.items(): - for col, v in s.items(): - new_data[col][index] = v + if isinstance(s, (dict, ABCSeries)): + for col in all_cols_list: + new_data[col][index] = s.get(col, None) + elif s is None or is_scalar(s): + for col in all_cols_list: + new_data[col][index] = s + else: + raise TypeError(f"Value at index {index} is not a dict/Series/scalar/None") return new_data diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 264011edb65b5..067c79755705d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2649,6 +2649,48 @@ def test_error_from_2darray(self, col_a, col_b): with pytest.raises(ValueError, match=msg): DataFrame({"a": col_a, "b": col_b}) + @pytest.mark.parametrize( + "data, expected", + [ + ( + { + "good": Series({"a": 1, "b": 2}), + "blank": Series(dtype="float64"), + }, + DataFrame( + {"a": [1.0, np.nan], "b": [2.0, np.nan]}, index=["good", "blank"] + ), + ), + ( + { + "blank": Series(dtype="float64"), + "good": Series({"a": 1, "b": 2}), + }, + DataFrame( + {"a": [np.nan, 1.0], "b": [np.nan, 2.0]}, index=["blank", "good"] + ), + ), + ( + {"blank": Series(dtype="float64")}, + DataFrame(index=["blank"], columns=[]), + ), + ( + { + "good": Series({"a": 1, "b": 2}), + "blank_dict": {}, + }, + DataFrame( + {"a": [1.0, np.nan], "b": [2.0, np.nan]}, + index=["good", "blank_dict"], + ), + ), + ], + ) + def test_from_dict_orient_index_empty_series_or_dict(self, data, expected): + # GH-62775 + result = DataFrame.from_dict(data, orient="index") + tm.assert_frame_equal(result, expected) + def test_from_dict_with_missing_copy_false(self): # GH#45369 filled columns should not be views of one another df = DataFrame(index=[1, 2, 3], columns=["a", "b", "c"], copy=False) From 52046f404e1309d10ecc5083dcf19069aa5f2a5b Mon Sep 17 00:00:00 2001 From: parthava-adabala Date: Tue, 28 Oct 2025 11:11:31 -0500 Subject: [PATCH 2/2] Trigger CI --- pandas/core/frame.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4a8cfeb5a7c9f..40ab2c59053b4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1914,13 +1914,15 @@ def from_dict( orient = orient.lower() # type: ignore[assignment] if orient == "index": if len(data) > 0: - index = list(data.keys()) # TODO speed up Series case if isinstance(next(iter(data.values())), (Series, dict)): + original_keys = list(data.keys()) data = _from_nested_dict(data) if not data and columns is None: columns = [] + index = original_keys else: + index = list(data.keys()) # error: Incompatible types in assignment (expression has type # "List[Any]", variable has type "Dict[Any, Any]") data = list(data.values()) # type: ignore[assignment] @@ -14411,8 +14413,8 @@ def values(self) -> np.ndarray: def _from_nested_dict( data: Mapping[HashableT, Mapping[HashableT2, T]], -) -> collections.defaultdict[HashableT2, dict[HashableT, T]]: - new_data: collections.defaultdict[HashableT2, dict[HashableT, T]] = ( +) -> collections.defaultdict[HashableT2, dict[HashableT, Any]]: + new_data: collections.defaultdict[HashableT2, dict[HashableT, Any]] = ( collections.defaultdict(dict) ) all_cols_dict = {}