@@ -341,151 +341,79 @@ def test_value_counts_object_inference_deprecated():
341341 tm .assert_series_equal (res , exp )
342342
343343
344- def _vc_make_index (kind : str , periods = 5 , freq = "D" ):
345- if kind == "dt" :
346- return pd .date_range ("2016-01-01" , periods = periods , freq = freq )
347- if kind == "td" :
348- return pd .timedelta_range (Timedelta (0 ), periods = periods , freq = freq )
349- raise ValueError ("kind must be 'dt' or 'td'" )
350-
351-
352- @pytest .mark .parametrize (
353- "kind,freq,normalize" ,
354- [
355- ("dt" , "D" , False ),
356- ("dt" , "D" , True ),
357- ("td" , "D" , False ),
358- ("td" , "D" , True ),
359- ("td" , Timedelta (hours = 1 ), False ),
360- ("td" , Timedelta (hours = 1 ), True ),
361- ],
362- )
363- def test_value_counts_freq_preserved_datetimelike_no_sort (kind , freq , normalize ):
364- idx = _vc_make_index (kind , periods = 5 , freq = freq )
365- vc = idx .value_counts (sort = False , normalize = normalize )
366- assert vc .index .freq == idx .freq
367- if normalize :
368- assert np .isclose (vc .values , 1 / len (idx )).all ()
369-
370-
371344@pytest .mark .parametrize (
372- "kind,freq " ,
345+ "index " ,
373346 [
374- ("dt" , "D" ),
375- ("td" , "D" ),
376- ("td" , Timedelta (hours = 1 )),
347+ pd .date_range ("2016-01-01" , periods = 5 , freq = "D" ),
348+ pd .timedelta_range (Timedelta (0 ), periods = 5 , freq = "h" ),
377349 ],
350+ ids = ["DatetimeIndex[D]" , "TimedeltaIndex[h]" ],
378351)
379- def test_value_counts_freq_drops_datetimelike_when_sorted (kind , freq ):
380- idx = _vc_make_index (kind , periods = 5 , freq = freq )
381- vc = idx .value_counts () # default sort=True (reorders)
382- assert vc .index .freq is None
383-
384-
385352@pytest .mark .parametrize (
386- "kind,freq " ,
353+ "build,kwargs,exp_preserve,exp_hasnans,exp_index_fn " ,
387354 [
388- ("dt" , "D" ),
389- ("td" , "D" ),
390- ("td" , Timedelta (hours = 1 )),
355+ (lambda idx : idx , {"sort" : False }, True , False , lambda idx , obj : idx ),
356+ (
357+ lambda idx : idx ,
358+ {"sort" : False , "normalize" : True },
359+ True ,
360+ False ,
361+ lambda idx , obj : idx ,
362+ ),
363+ (lambda idx : idx , {}, False , False , None ),
364+ (
365+ lambda idx : idx .insert (1 , idx [1 ]),
366+ {"sort" : False },
367+ False ,
368+ False ,
369+ lambda idx , obj : type (idx )(idx , freq = None ),
370+ ),
371+ (
372+ lambda idx : idx .delete (2 ),
373+ {"sort" : False },
374+ False ,
375+ False ,
376+ lambda idx , obj : type (idx )(obj , freq = None ),
377+ ),
378+ (
379+ lambda idx : idx .insert (1 , pd .NaT ),
380+ {"sort" : False , "dropna" : False },
381+ False ,
382+ True ,
383+ lambda idx , obj : type (idx )(
384+ list (idx [:1 ]) + [pd .NaT ] + list (idx [1 :]), freq = None
385+ ),
386+ ),
387+ (
388+ lambda idx : idx .insert (1 , pd .NaT ),
389+ {"sort" : False , "dropna" : True },
390+ False ,
391+ False ,
392+ lambda idx , obj : type (idx )(idx , freq = None ),
393+ ),
391394 ],
392395)
393- def test_value_counts_freq_drops_datetimelike_with_duplicates (kind , freq ):
394- base = _vc_make_index (kind , periods = 5 , freq = freq )
395- obj = base .insert (1 , base [1 ]) # duplicate one label
396- vc = obj .value_counts (sort = False )
397- assert vc .index .freq is None
398-
399-
400- @pytest .mark .parametrize (
401- "kind,freq" ,
402- [
403- ("dt" , "D" ),
404- ("td" , "D" ),
405- ("td" , Timedelta (hours = 1 )),
406- ],
407- )
408- def test_value_counts_freq_drops_datetimelike_with_gap (kind , freq ):
409- base = _vc_make_index (kind , periods = 5 , freq = freq )
410- obj = base .delete (2 ) # remove one step to break contiguity
411- vc = obj .value_counts (sort = False )
412- assert vc .index .freq is None
396+ def test_value_counts_freq_datetimelike (
397+ index , build , kwargs , exp_preserve , exp_hasnans , exp_index_fn
398+ ):
399+ obj = build (index )
400+ vc = obj .value_counts (** kwargs )
413401
402+ # without sort
403+ if exp_index_fn is not None :
404+ expected_idx = exp_index_fn (index , obj )
405+ tm .assert_index_equal (vc .index , expected_idx )
414406
415- @pytest .mark .parametrize (
416- "kind,freq,dropna,expect_hasnans" ,
417- [
418- ("dt" , "D" , False , True ), # keep NaT
419- ("dt" , "D" , True , False ), # drop NaT
420- ("td" , "D" , False , True ),
421- ("td" , "D" , True , False ),
422- ("td" , Timedelta (hours = 1 ), False , True ),
423- ("td" , Timedelta (hours = 1 ), True , False ),
424- ],
425- )
426- def test_value_counts_freq_drops_datetimelike_with_nat (
427- kind , freq , dropna , expect_hasnans
428- ):
429- base = _vc_make_index (kind , periods = 3 , freq = freq )
430- obj = base .insert (1 , pd .NaT )
431- vc = obj .value_counts (dropna = dropna , sort = False )
432- assert vc .index .freq is None
433- assert vc .index .hasnans is expect_hasnans
407+ # freq preservation / drop
408+ if exp_preserve :
409+ assert vc .index .freq == index .freq
410+ else :
411+ assert vc .index .freq is None
434412
413+ # NaT presence
414+ assert vc .index .hasnans is exp_hasnans
435415
436- @pytest .mark .parametrize (
437- "freq,start,periods,sort" ,
438- [
439- ("D" , "2016-01-01" , 5 , False ),
440- ("D" , "2016-01-01" , 5 , True ),
441- ("M" , "2016-01" , 6 , False ), # MonthEnd
442- ("M" , "2016-01" , 6 , True ),
443- ("Q-DEC" , "2016Q1" , 4 , False ), # QuarterEnd (Dec anchored)
444- ("Q-DEC" , "2016Q1" , 4 , True ),
445- ("Y-DEC" , "2014" , 3 , False ), # YearEnd (Dec anchored)
446- ("Y-DEC" , "2014" , 3 , True ),
447- ],
448- )
449- def test_value_counts_period_freq_preserved_sort_and_nosort (freq , start , periods , sort ):
450- pi = pd .period_range (start = start , periods = periods , freq = freq )
451- vc = pi .value_counts (sort = sort )
452- assert isinstance (vc .index , pd .PeriodIndex )
453- assert vc .index .dtype == pi .dtype
454- assert vc .index .freq == pi .freq
455-
456-
457- def test_value_counts_period_freq_preserved_with_duplicates ():
458- pi = pd .period_range ("2016-01" , periods = 5 , freq = "M" )
459- obj = pi .insert (1 , pi [1 ]) # duplicate one label
460- vc = obj .value_counts (sort = False )
461- assert isinstance (vc .index , pd .PeriodIndex )
462- assert vc .index .dtype == pi .dtype
463- assert vc .index .freq == pi .freq
464-
465-
466- def test_value_counts_period_freq_preserved_with_gap ():
467- pi = pd .period_range ("2016-01" , periods = 5 , freq = "M" )
468- obj = pi .delete (2 ) # remove one element
469- vc = obj .value_counts (sort = False )
470- assert isinstance (vc .index , pd .PeriodIndex )
471- assert vc .index .dtype == pi .dtype
472- assert vc .index .freq == pi .freq
473-
474-
475- def test_value_counts_period_freq_preserved_with_normalize ():
476- pi = pd .period_range ("2016-01" , periods = 4 , freq = "M" )
477- vc = pi .value_counts (normalize = True , sort = False )
478- assert isinstance (vc .index , pd .PeriodIndex )
479- assert vc .index .dtype == pi .dtype
480- assert vc .index .freq == pi .freq
481- assert np .isclose (vc .values , 1 / len (pi )).all ()
482-
483-
484- def test_value_counts_period_freq_preserved_with_nat_dropna_true ():
485- pi = pd .period_range ("2016-01" , periods = 5 , freq = "M" )
486- obj = pi .insert (1 , pd .NaT )
487- vc = obj .value_counts (dropna = True , sort = False )
488- assert not vc .index .hasnans
489- assert isinstance (vc .index , pd .PeriodIndex )
490- assert vc .index .dtype == pi .dtype
491- assert vc .index .freq == pi .freq
416+ # without normalize
417+ if kwargs .get ("normalize" , False ):
418+ expected_val = 1.0 / len (index )
419+ assert np .isclose (vc .to_numpy (), expected_val ).all ()
0 commit comments