Skip to content

Commit b643a7c

Browse files
committed
test changes
1 parent 79a82d5 commit b643a7c

File tree

2 files changed

+68
-152
lines changed

2 files changed

+68
-152
lines changed

pandas/core/algorithms.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -937,24 +937,12 @@ def value_counts_internal(
937937
and not sort
938938
and isinstance(values, (DatetimeIndex, TimedeltaIndex))
939939
and values.inferred_freq is not None
940+
and len(idx) == len(values)
941+
and idx.equals(values)
940942
):
941943
# freq preservation
942-
# Check if the result would be the same as input
943-
if len(idx) == len(values) and idx.equals(values):
944-
# Rebuild idx with the correct type and inferred frequency
945-
if isinstance(values, DatetimeIndex):
946-
idx = DatetimeIndex(
947-
idx._data if hasattr(idx, "_data") else idx.values,
948-
freq=values.inferred_freq,
949-
name=idx.name,
950-
)
951-
952-
elif isinstance(values, TimedeltaIndex):
953-
idx = TimedeltaIndex(
954-
idx._data if hasattr(idx, "_data") else idx.values,
955-
freq=values.inferred_freq,
956-
name=idx.name,
957-
)
944+
# Rebuild idx with the correct type and inferred frequency
945+
idx.freq = values.inferred_freq
958946

959947
result = Series(counts, index=idx, name=name, copy=False)
960948

pandas/tests/base/test_value_counts.py

Lines changed: 64 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -341,151 +341,79 @@ def test_value_counts_object_inference_deprecated():
341341
tm.assert_series_equal(res, exp)
342342

343343

344-
def _vc_make_index(kind: str, periods=5, freq="D"):
345-
if kind == "dt":
346-
return pd.date_range("2016-01-01", periods=periods, freq=freq)
347-
if kind == "td":
348-
return pd.timedelta_range(Timedelta(0), periods=periods, freq=freq)
349-
raise ValueError("kind must be 'dt' or 'td'")
350-
351-
352-
@pytest.mark.parametrize(
353-
"kind,freq,normalize",
354-
[
355-
("dt", "D", False),
356-
("dt", "D", True),
357-
("td", "D", False),
358-
("td", "D", True),
359-
("td", Timedelta(hours=1), False),
360-
("td", Timedelta(hours=1), True),
361-
],
362-
)
363-
def test_value_counts_freq_preserved_datetimelike_no_sort(kind, freq, normalize):
364-
idx = _vc_make_index(kind, periods=5, freq=freq)
365-
vc = idx.value_counts(sort=False, normalize=normalize)
366-
assert vc.index.freq == idx.freq
367-
if normalize:
368-
assert np.isclose(vc.values, 1 / len(idx)).all()
369-
370-
371344
@pytest.mark.parametrize(
372-
"kind,freq",
345+
"index",
373346
[
374-
("dt", "D"),
375-
("td", "D"),
376-
("td", Timedelta(hours=1)),
347+
pd.date_range("2016-01-01", periods=5, freq="D"),
348+
pd.timedelta_range(Timedelta(0), periods=5, freq="h"),
377349
],
350+
ids=["DatetimeIndex[D]", "TimedeltaIndex[h]"],
378351
)
379-
def test_value_counts_freq_drops_datetimelike_when_sorted(kind, freq):
380-
idx = _vc_make_index(kind, periods=5, freq=freq)
381-
vc = idx.value_counts() # default sort=True (reorders)
382-
assert vc.index.freq is None
383-
384-
385352
@pytest.mark.parametrize(
386-
"kind,freq",
353+
"build,kwargs,exp_preserve,exp_hasnans,exp_index_fn",
387354
[
388-
("dt", "D"),
389-
("td", "D"),
390-
("td", Timedelta(hours=1)),
355+
(lambda idx: idx, {"sort": False}, True, False, lambda idx, obj: idx),
356+
(
357+
lambda idx: idx,
358+
{"sort": False, "normalize": True},
359+
True,
360+
False,
361+
lambda idx, obj: idx,
362+
),
363+
(lambda idx: idx, {}, False, False, None),
364+
(
365+
lambda idx: idx.insert(1, idx[1]),
366+
{"sort": False},
367+
False,
368+
False,
369+
lambda idx, obj: type(idx)(idx, freq=None),
370+
),
371+
(
372+
lambda idx: idx.delete(2),
373+
{"sort": False},
374+
False,
375+
False,
376+
lambda idx, obj: type(idx)(obj, freq=None),
377+
),
378+
(
379+
lambda idx: idx.insert(1, pd.NaT),
380+
{"sort": False, "dropna": False},
381+
False,
382+
True,
383+
lambda idx, obj: type(idx)(
384+
list(idx[:1]) + [pd.NaT] + list(idx[1:]), freq=None
385+
),
386+
),
387+
(
388+
lambda idx: idx.insert(1, pd.NaT),
389+
{"sort": False, "dropna": True},
390+
False,
391+
False,
392+
lambda idx, obj: type(idx)(idx, freq=None),
393+
),
391394
],
392395
)
393-
def test_value_counts_freq_drops_datetimelike_with_duplicates(kind, freq):
394-
base = _vc_make_index(kind, periods=5, freq=freq)
395-
obj = base.insert(1, base[1]) # duplicate one label
396-
vc = obj.value_counts(sort=False)
397-
assert vc.index.freq is None
398-
399-
400-
@pytest.mark.parametrize(
401-
"kind,freq",
402-
[
403-
("dt", "D"),
404-
("td", "D"),
405-
("td", Timedelta(hours=1)),
406-
],
407-
)
408-
def test_value_counts_freq_drops_datetimelike_with_gap(kind, freq):
409-
base = _vc_make_index(kind, periods=5, freq=freq)
410-
obj = base.delete(2) # remove one step to break contiguity
411-
vc = obj.value_counts(sort=False)
412-
assert vc.index.freq is None
396+
def test_value_counts_freq_datetimelike(
397+
index, build, kwargs, exp_preserve, exp_hasnans, exp_index_fn
398+
):
399+
obj = build(index)
400+
vc = obj.value_counts(**kwargs)
413401

402+
# without sort
403+
if exp_index_fn is not None:
404+
expected_idx = exp_index_fn(index, obj)
405+
tm.assert_index_equal(vc.index, expected_idx)
414406

415-
@pytest.mark.parametrize(
416-
"kind,freq,dropna,expect_hasnans",
417-
[
418-
("dt", "D", False, True), # keep NaT
419-
("dt", "D", True, False), # drop NaT
420-
("td", "D", False, True),
421-
("td", "D", True, False),
422-
("td", Timedelta(hours=1), False, True),
423-
("td", Timedelta(hours=1), True, False),
424-
],
425-
)
426-
def test_value_counts_freq_drops_datetimelike_with_nat(
427-
kind, freq, dropna, expect_hasnans
428-
):
429-
base = _vc_make_index(kind, periods=3, freq=freq)
430-
obj = base.insert(1, pd.NaT)
431-
vc = obj.value_counts(dropna=dropna, sort=False)
432-
assert vc.index.freq is None
433-
assert vc.index.hasnans is expect_hasnans
407+
# freq preservation / drop
408+
if exp_preserve:
409+
assert vc.index.freq == index.freq
410+
else:
411+
assert vc.index.freq is None
434412

413+
# NaT presence
414+
assert vc.index.hasnans is exp_hasnans
435415

436-
@pytest.mark.parametrize(
437-
"freq,start,periods,sort",
438-
[
439-
("D", "2016-01-01", 5, False),
440-
("D", "2016-01-01", 5, True),
441-
("M", "2016-01", 6, False), # MonthEnd
442-
("M", "2016-01", 6, True),
443-
("Q-DEC", "2016Q1", 4, False), # QuarterEnd (Dec anchored)
444-
("Q-DEC", "2016Q1", 4, True),
445-
("Y-DEC", "2014", 3, False), # YearEnd (Dec anchored)
446-
("Y-DEC", "2014", 3, True),
447-
],
448-
)
449-
def test_value_counts_period_freq_preserved_sort_and_nosort(freq, start, periods, sort):
450-
pi = pd.period_range(start=start, periods=periods, freq=freq)
451-
vc = pi.value_counts(sort=sort)
452-
assert isinstance(vc.index, pd.PeriodIndex)
453-
assert vc.index.dtype == pi.dtype
454-
assert vc.index.freq == pi.freq
455-
456-
457-
def test_value_counts_period_freq_preserved_with_duplicates():
458-
pi = pd.period_range("2016-01", periods=5, freq="M")
459-
obj = pi.insert(1, pi[1]) # duplicate one label
460-
vc = obj.value_counts(sort=False)
461-
assert isinstance(vc.index, pd.PeriodIndex)
462-
assert vc.index.dtype == pi.dtype
463-
assert vc.index.freq == pi.freq
464-
465-
466-
def test_value_counts_period_freq_preserved_with_gap():
467-
pi = pd.period_range("2016-01", periods=5, freq="M")
468-
obj = pi.delete(2) # remove one element
469-
vc = obj.value_counts(sort=False)
470-
assert isinstance(vc.index, pd.PeriodIndex)
471-
assert vc.index.dtype == pi.dtype
472-
assert vc.index.freq == pi.freq
473-
474-
475-
def test_value_counts_period_freq_preserved_with_normalize():
476-
pi = pd.period_range("2016-01", periods=4, freq="M")
477-
vc = pi.value_counts(normalize=True, sort=False)
478-
assert isinstance(vc.index, pd.PeriodIndex)
479-
assert vc.index.dtype == pi.dtype
480-
assert vc.index.freq == pi.freq
481-
assert np.isclose(vc.values, 1 / len(pi)).all()
482-
483-
484-
def test_value_counts_period_freq_preserved_with_nat_dropna_true():
485-
pi = pd.period_range("2016-01", periods=5, freq="M")
486-
obj = pi.insert(1, pd.NaT)
487-
vc = obj.value_counts(dropna=True, sort=False)
488-
assert not vc.index.hasnans
489-
assert isinstance(vc.index, pd.PeriodIndex)
490-
assert vc.index.dtype == pi.dtype
491-
assert vc.index.freq == pi.freq
416+
# without normalize
417+
if kwargs.get("normalize", False):
418+
expected_val = 1.0 / len(index)
419+
assert np.isclose(vc.to_numpy(), expected_val).all()

0 commit comments

Comments
 (0)