33
44Created: 2020-05-06
55"""
6-
7-
8-
9- # third party
106import cvxpy as cp
11- from cvxpy .error import SolverError
127import numpy as np
13-
14- # first party
15- from .config import Config
8+ from cvxpy .error import SolverError
169
1710
1811class Weekday :
1912 """Class to handle weekday effects."""
2013
2114 @staticmethod
22- def get_params (data , logger ):
15+ def get_params (data , denominator_col , numerator_cols , date_col , scales , logger ):
16+ r"""Fit weekday correction for each col in numerator_cols.
17+
18+ Return a matrix of parameters: the entire vector of betas, for each time
19+ series column in the data.
20+ """
21+ tmp = data .reset_index ()
22+ denoms = tmp .groupby (date_col ).sum ()[denominator_col ]
23+ nums = tmp .groupby (date_col ).sum ()[numerator_cols ]
24+
25+ # Construct design matrix to have weekday indicator columns and then day
26+ # indicators.
27+ X = np .zeros ((nums .shape [0 ], 6 + nums .shape [0 ]))
28+ not_sunday = np .where (nums .index .dayofweek != 6 )[0 ]
29+ X [not_sunday , np .array (nums .index .dayofweek )[not_sunday ]] = 1
30+ X [np .where (nums .index .dayofweek == 6 )[0 ], :6 ] = - 1
31+ X [:, 6 :] = np .eye (X .shape [0 ])
32+
33+ npnums , npdenoms = np .array (nums ), np .array (denoms )
34+ params = np .zeros ((nums .shape [1 ], X .shape [1 ]))
35+
36+ # Loop over the available numerator columns and smooth each separately.
37+ for i in range (nums .shape [1 ]):
38+ result = Weekday ._fit (X , scales , npnums [:, i ], npdenoms )
39+ if result is None :
40+ logger .error ("Unable to calculate weekday correction" )
41+ else :
42+ params [i ,:] = result
43+
44+ return params
45+
46+ @staticmethod
47+ def _fit (X , scales , npnums , npdenoms ):
2348 r"""Correct a signal estimated as numerator/denominator for weekday effects.
2449
2550 The ordinary estimate would be numerator_t/denominator_t for each time point
@@ -53,57 +78,31 @@ def get_params(data, logger):
5378
5479 ll = (numerator * (X*b + log(denominator)) - sum(exp(X*b) + log(denominator)))
5580 / num_days
56-
57- Return a matrix of parameters: the entire vector of betas, for each time
58- series column in the data.
5981 """
60- denoms = data .groupby (Config .DATE_COL ).sum ()["Denominator" ]
61- nums = data .groupby (Config .DATE_COL ).sum ()[Config .CLI_COLS + Config .FLU1_COL ]
62-
63- # Construct design matrix to have weekday indicator columns and then day
64- # indicators.
65- X = np .zeros ((nums .shape [0 ], 6 + nums .shape [0 ]))
66- not_sunday = np .where (nums .index .dayofweek != 6 )[0 ]
67- X [not_sunday , np .array (nums .index .dayofweek )[not_sunday ]] = 1
68- X [np .where (nums .index .dayofweek == 6 )[0 ], :6 ] = - 1
69- X [:, 6 :] = np .eye (X .shape [0 ])
70-
71- npnums , npdenoms = np .array (nums ), np .array (denoms )
72- params = np .zeros ((nums .shape [1 ], X .shape [1 ]))
73-
74- # Loop over the available numerator columns and smooth each separately.
75- for i in range (nums .shape [1 ]):
76- b = cp .Variable ((X .shape [1 ]))
77-
78- lmbda = cp .Parameter (nonneg = True )
79- lmbda .value = 10 # Hard-coded for now, seems robust to changes
80-
81- ll = (
82- cp .matmul (npnums [:, i ], cp .matmul (X , b ) + np .log (npdenoms ))
83- - cp .sum (cp .exp (cp .matmul (X , b ) + np .log (npdenoms )))
84- ) / X .shape [0 ]
85- penalty = (
86- lmbda * cp .norm (cp .diff (b [6 :], 3 ), 1 ) / (X .shape [0 ] - 2 )
87- ) # L-1 Norm of third differences, rewards smoothness
88- scales = [1 , 1e5 , 1e10 , 1e15 ]
89- for scale in scales :
90- try :
91- prob = cp .Problem (cp .Minimize ((- ll + lmbda * penalty ) / scale ))
92- _ = prob .solve ()
93- params [i ,:] = b .value
94- break
95- except SolverError :
96- # If the magnitude of the objective function is too large, an error is
97- # thrown; Rescale the objective function by going through loop
98- pass
99- else :
100- # Leaving params[i,:] = 0 is equivalent to not performing weekday correction
101- logger .error ("Unable to calculate weekday correction" )
102-
103- return params
82+ b = cp .Variable ((X .shape [1 ]))
83+
84+ lmbda = cp .Parameter (nonneg = True )
85+ lmbda .value = 10 # Hard-coded for now, seems robust to changes
86+
87+ ll = (
88+ cp .matmul (npnums , cp .matmul (X , b ) + np .log (npdenoms )) -
89+ cp .sum (cp .exp (cp .matmul (X , b ) + np .log (npdenoms )))
90+ ) / X .shape [0 ]
91+ # L-1 Norm of third differences, rewards smoothness
92+ penalty = lmbda * cp .norm (cp .diff (b [6 :], 3 ), 1 ) / (X .shape [0 ] - 2 )
93+ for scale in scales :
94+ try :
95+ prob = cp .Problem (cp .Minimize ((- ll + lmbda * penalty ) / scale ))
96+ _ = prob .solve ()
97+ return b .value
98+ except SolverError :
99+ # If the magnitude of the objective function is too large, an error is
100+ # thrown; Rescale the objective function by going through loop
101+ continue
102+ return None
104103
105104 @staticmethod
106- def calc_adjustment (params , sub_data ):
105+ def calc_adjustment (params , sub_data , cols , date_col ):
107106 """Apply the weekday adjustment to a specific time series.
108107
109108 Extracts the weekday fixed effects from the parameters and uses these to
@@ -122,14 +121,15 @@ def calc_adjustment(params, sub_data):
122121 -- this has the same effect.
123122
124123 """
125- for i , c in enumerate (Config .CLI_COLS + Config .FLU1_COL ):
126- wd_correction = np .zeros ((len (sub_data [c ])))
124+ tmp = sub_data .copy ()
125+
126+ for i , c in enumerate (cols ):
127+ wd_correction = np .zeros ((len (tmp [c ])))
127128
128129 for wd in range (7 ):
129- mask = sub_data [ Config . DATE_COL ].dt .dayofweek == wd
130- wd_correction [mask ] = sub_data .loc [mask , c ] / (
130+ mask = tmp [ date_col ].dt .dayofweek == wd
131+ wd_correction [mask ] = tmp .loc [mask , c ] / (
131132 np .exp (params [i , wd ]) if wd < 6 else np .exp (- np .sum (params [i , :6 ]))
132133 )
133- sub_data .loc [:, c ] = wd_correction
134-
135- return sub_data
134+ tmp .loc [:, c ] = wd_correction
135+ return tmp
0 commit comments