Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,7 @@ MultiIndex
I/O
^^^
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``timedelta64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`63239`)
- Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
- Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`)
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Any,
ClassVar,
Self,
cast,
)

import numpy as np
Expand Down Expand Up @@ -44,7 +45,10 @@
)

if TYPE_CHECKING:
from pandas._typing import npt
from pandas._typing import (
TimeUnit,
npt,
)


class PyTablesScope(_scope.Scope):
Expand Down Expand Up @@ -225,15 +229,19 @@ def stringify(value):
if conv_val.tz is not None:
conv_val = conv_val.tz_convert("UTC")
return TermValue(conv_val, conv_val._value, kind)
elif kind in ("timedelta64", "timedelta"):
elif kind.startswith("timedelta"):
unit = "ns"
if "[" in kind:
unit = cast("TimeUnit", kind.split("[")[-1][:-1])
if isinstance(conv_val, str):
conv_val = Timedelta(conv_val)
elif lib.is_integer(conv_val) or lib.is_float(conv_val):
conv_val = Timedelta(conv_val, unit="s")
else:
conv_val = Timedelta(conv_val)
conv_val = conv_val.as_unit("ns")._value
conv_val = conv_val.as_unit(unit)._value
return TermValue(int(conv_val), conv_val, kind)

elif meta == "category":
metadata = extract_array(self.metadata, extract_numpy=True)
result: npt.NDArray[np.intp] | np.intp | int
Expand Down
29 changes: 21 additions & 8 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2702,8 +2702,12 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
# recreate with tz if indicated
converted = _set_tz(converted, tz, dtype)

elif dtype == "timedelta64":
converted = np.asarray(converted, dtype="m8[ns]")
elif dtype.startswith("timedelta64"):
if dtype == "timedelta64":
# from before we started storing timedelta64 unit
converted = np.asarray(converted, dtype="m8[ns]")
else:
converted = np.asarray(converted, dtype=dtype)
elif dtype == "date":
try:
converted = np.asarray(
Expand Down Expand Up @@ -3086,8 +3090,13 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
tz = getattr(attrs, "tz", None)
ret = _set_tz(ret, tz, dtype)

elif dtype == "timedelta64":
ret = np.asarray(ret, dtype="m8[ns]")
elif dtype and dtype.startswith("timedelta64"):
if dtype == "timedelta64":
# This was written back before we started writing
# timedelta64 units
ret = np.asarray(ret, dtype="m8[ns]")
else:
ret = np.asarray(ret, dtype=dtype)

if transposed:
return ret.T
Expand Down Expand Up @@ -3324,7 +3333,7 @@ def write_array(
node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]"
elif lib.is_np_dtype(value.dtype, "m"):
self._handle.create_array(self.group, key, value.view("i8"))
getattr(self.group, key)._v_attrs.value_type = "timedelta64"
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
elif isinstance(value, BaseStringArray):
vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom())
vlarr.append(value.to_numpy())
Expand Down Expand Up @@ -5175,8 +5184,12 @@ def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray
index = DatetimeIndex(data)
else:
index = DatetimeIndex(data.view(kind))
elif kind == "timedelta64":
index = TimedeltaIndex(data)
elif kind.startswith("timedelta64"):
if kind == "timedelta64":
# created before we stored resolution information
index = TimedeltaIndex(data)
else:
index = TimedeltaIndex(data.view(kind))
elif kind == "date":
try:
index = np.asarray([date.fromordinal(v) for v in data], dtype=object)
Expand Down Expand Up @@ -5413,7 +5426,7 @@ def _dtype_to_kind(dtype_str: str) -> str:
elif dtype_str.startswith("datetime64"):
kind = dtype_str
elif dtype_str.startswith("timedelta"):
kind = "timedelta64"
kind = dtype_str
elif dtype_str.startswith("bool"):
kind = "bool"
elif dtype_str.startswith("category"):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ def test_append_raise(tmp_path, using_infer_string):
store.append("df", df)


def test_append_with_timedelta(tmp_path):
def test_append_with_timedelta(tmp_path, unit):
# GH 3577
# append timedelta

Expand All @@ -860,6 +860,7 @@ def test_append_with_timedelta(tmp_path):
}
)
df["C"] = df["A"] - df["B"]
df["C"] = df["C"].astype(f"m8[{unit}]")
df.loc[3:5, "C"] = np.nan

path = tmp_path / "test_append_with_timedelta.h5"
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,11 +1017,12 @@ def test_duplicate_column_name(tmp_path, setup_path):
assert other.equals(df)


@pytest.mark.xfail(reason="non-nano TimedeltaIndex does not round-trip")
def test_preserve_timedeltaindex_type(setup_path):
def test_preserve_timedeltaindex_type(setup_path, unit):
# GH9635
df = DataFrame(np.random.default_rng(2).normal(size=(10, 5)))
df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example")
df.index = timedelta_range(
start="0s", periods=10, freq="1s", name="example", unit=unit
)

with ensure_clean_store(setup_path) as store:
store["df"] = df
Expand Down
Loading