@@ -744,28 +744,32 @@ def test_resample_consistency(self):
744744
745745 def test_resample_timegrouper (self ):
746746 # GH 7227
747- dates = [datetime (2014 , 10 , 1 ), datetime (2014 , 9 , 3 ),
747+ dates1 = [datetime (2014 , 10 , 1 ), datetime (2014 , 9 , 3 ),
748748 datetime (2014 , 11 , 5 ), datetime (2014 , 9 , 5 ),
749749 datetime (2014 , 10 , 8 ), datetime (2014 , 7 , 15 )]
750750
751- df = DataFrame (dict (A = dates , B = np .arange (len (dates ))))
752- result = df .set_index ('A' ).resample ('M' , how = 'count' )
753- exp_idx = pd .DatetimeIndex (['2014-07-31' , '2014-08-31' , '2014-09-30' ,
754- '2014-10-31' , '2014-11-30' ], freq = 'M' , name = 'A' )
755- expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ]}, index = exp_idx )
756- assert_frame_equal (result , expected )
751+ dates2 = dates1 [:2 ] + [pd .NaT ] + dates1 [2 :4 ] + [pd .NaT ] + dates1 [4 :]
752+ dates3 = [pd .NaT ] + dates1 + [pd .NaT ]
757753
758- result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
759- assert_frame_equal (result , expected )
754+ for dates in [dates1 , dates2 , dates3 ]:
755+ df = DataFrame (dict (A = dates , B = np .arange (len (dates ))))
756+ result = df .set_index ('A' ).resample ('M' , how = 'count' )
757+ exp_idx = pd .DatetimeIndex (['2014-07-31' , '2014-08-31' , '2014-09-30' ,
758+ '2014-10-31' , '2014-11-30' ], freq = 'M' , name = 'A' )
759+ expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ]}, index = exp_idx )
760+ assert_frame_equal (result , expected )
760761
761- df = DataFrame (dict (A = dates , B = np .arange (len (dates )), C = np .arange (len (dates ))))
762- result = df .set_index ('A' ).resample ('M' , how = 'count' )
763- expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ], 'C' : [1 , 0 , 2 , 2 , 1 ]},
764- index = exp_idx , columns = ['B' , 'C' ])
765- assert_frame_equal (result , expected )
762+ result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
763+ assert_frame_equal (result , expected )
766764
767- result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
768- assert_frame_equal (result , expected )
765+ df = DataFrame (dict (A = dates , B = np .arange (len (dates )), C = np .arange (len (dates ))))
766+ result = df .set_index ('A' ).resample ('M' , how = 'count' )
767+ expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ], 'C' : [1 , 0 , 2 , 2 , 1 ]},
768+ index = exp_idx , columns = ['B' , 'C' ])
769+ assert_frame_equal (result , expected )
770+
771+ result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
772+ assert_frame_equal (result , expected )
769773
770774
771775def _simple_ts (start , end , freq = 'D' ):
@@ -1302,6 +1306,84 @@ def test_fails_on_no_datetime_index(self):
13021306 "but got an instance of %r" % name ):
13031307 df .groupby (TimeGrouper ('D' ))
13041308
1309+ def test_aggregate_normal (self ):
1310+ # check TimeGrouper's aggregation is identical as normal groupby
1311+
1312+ n = 20
1313+ data = np .random .randn (n , 4 )
1314+ normal_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1315+ normal_df ['key' ] = [1 , 2 , 3 , 4 , 5 ] * 4
1316+
1317+ dt_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1318+ dt_df ['key' ] = [datetime (2013 , 1 , 1 ), datetime (2013 , 1 , 2 ), datetime (2013 , 1 , 3 ),
1319+ datetime (2013 , 1 , 4 ), datetime (2013 , 1 , 5 )] * 4
1320+
1321+ normal_grouped = normal_df .groupby ('key' )
1322+ dt_grouped = dt_df .groupby (TimeGrouper (key = 'key' , freq = 'D' ))
1323+
1324+ for func in ['min' , 'max' , 'prod' , 'var' , 'std' , 'mean' ]:
1325+ expected = getattr (normal_grouped , func )()
1326+ dt_result = getattr (dt_grouped , func )()
1327+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1328+ assert_frame_equal (expected , dt_result )
1329+
1330+ for func in ['count' , 'sum' ]:
1331+ expected = getattr (normal_grouped , func )()
1332+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1333+ dt_result = getattr (dt_grouped , func )()
1334+ assert_frame_equal (expected , dt_result )
1335+
1336+ """
1337+ for func in ['first', 'last']:
1338+ expected = getattr(normal_grouped, func)()
1339+ expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
1340+ dt_result = getattr(dt_grouped, func)()
1341+ assert_frame_equal(expected, dt_result)
1342+
1343+ for func in ['nth']:
1344+ expected = getattr(normal_grouped, func)(3)
1345+ expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
1346+ dt_result = getattr(dt_grouped, func)(3)
1347+ assert_frame_equal(expected, dt_result)
1348+ """
1349+ # if TimeGrouper is used included, 'size' 'first','last' and 'nth' doesn't work yet
1350+
1351+ def test_aggregate_with_nat (self ):
1352+ # check TimeGrouper's aggregation is identical as normal groupby
1353+
1354+ n = 20
1355+ data = np .random .randn (n , 4 )
1356+ normal_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1357+ normal_df ['key' ] = [1 , 2 , np .nan , 4 , 5 ] * 4
1358+
1359+ dt_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1360+ dt_df ['key' ] = [datetime (2013 , 1 , 1 ), datetime (2013 , 1 , 2 ), pd .NaT ,
1361+ datetime (2013 , 1 , 4 ), datetime (2013 , 1 , 5 )] * 4
1362+
1363+ normal_grouped = normal_df .groupby ('key' )
1364+ dt_grouped = dt_df .groupby (TimeGrouper (key = 'key' , freq = 'D' ))
1365+
1366+ for func in ['min' , 'max' , 'prod' ]:
1367+ normal_result = getattr (normal_grouped , func )()
1368+ dt_result = getattr (dt_grouped , func )()
1369+ pad = DataFrame ([[np .nan , np .nan , np .nan , np .nan ]],
1370+ index = [3 ], columns = ['A' , 'B' , 'C' , 'D' ])
1371+ expected = normal_result .append (pad )
1372+ expected = expected .sort_index ()
1373+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1374+ assert_frame_equal (expected , dt_result )
1375+
1376+ for func in ['count' , 'sum' ]:
1377+ normal_result = getattr (normal_grouped , func )()
1378+ pad = DataFrame ([[0 , 0 , 0 , 0 ]], index = [3 ], columns = ['A' , 'B' , 'C' , 'D' ])
1379+ expected = normal_result .append (pad )
1380+ expected = expected .sort_index ()
1381+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1382+ dt_result = getattr (dt_grouped , func )()
1383+ assert_frame_equal (expected , dt_result )
1384+
1385+ # if NaT is included, 'var', 'std', 'mean', 'size', 'first','last' and 'nth' doesn't work yet
1386+
13051387
13061388if __name__ == '__main__' :
13071389 nose .runmodule (argv = [__file__ , '-vvs' , '-x' , '--pdb' , '--pdb-failure' ],
0 commit comments