diff --git a/doc/spec/estimation/dml.rst b/doc/spec/estimation/dml.rst index 536419abd..200d9d677 100644 --- a/doc/spec/estimation/dml.rst +++ b/doc/spec/estimation/dml.rst @@ -52,11 +52,11 @@ characteristics :math:`X` of the treated samples, then one can use this method. # DML import numpy as np - X = np.random.choice(np.arange(5), size=(100,3)) + X = np.random.choice(6, size=(100,3)) Y = np.random.normal(size=(100,2)) y = np.random.normal(size=(100,)) - T = T0 = T1 = np.random.choice(np.arange(3), size=(100,2)) - t = t0 = t1 = T[:,0] + (T, T0, T1) = (np.random.choice(np.arange(3), size=(100,2)) for _ in range(3)) + (t, t0, t1) = (a[:,0] for a in (T, T0, T1)) W = np.random.normal(size=(100,2)) .. testcode:: @@ -646,7 +646,7 @@ Then we can estimate the coefficients :math:`\alpha_i` by running: from sklearn.preprocessing import PolynomialFeatures est = LinearDML(model_y=RandomForestRegressor(), model_t=RandomForestRegressor(), - featurizer=PolynomialFeatures(degree=3, include_bias=True)) + featurizer=PolynomialFeatures(degree=3, include_bias=False)) est.fit(y, T, X=X, W=W) # To get the coefficients of the polynomial fitted in the final stage we can @@ -663,7 +663,7 @@ To add fixed effect heterogeneity, we can create one-hot encodings of the id, wh from econml.dml import LinearDML from sklearn.preprocessing import OneHotEncoder # removing one id to avoid colinearity, as is standard for fixed effects - X_oh = OneHotEncoder(sparse_output=False).fit_transform(X)[:, 1:] + X_oh = OneHotEncoder(sparse_output=False, drop="first").fit_transform(X) est = LinearDML(model_y=RandomForestRegressor(), model_t=RandomForestRegressor()) diff --git a/econml/iv/sieve/_tsls.py b/econml/iv/sieve/_tsls.py index ffed3350d..0439d4b39 100644 --- a/econml/iv/sieve/_tsls.py +++ b/econml/iv/sieve/_tsls.py @@ -151,7 +151,7 @@ def transform(self, X): for i in range(X.shape[1]): p = powers.copy() c = powers[:, i] - p[:, i] -= 1 + p[p[:, i] > 0, i] -= 1 M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :]) result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1) return result diff --git a/econml/tests/test_treatment_featurization.py b/econml/tests/test_treatment_featurization.py index 068a07847..a3cf55fe7 100644 --- a/econml/tests/test_treatment_featurization.py +++ b/econml/tests/test_treatment_featurization.py @@ -486,20 +486,21 @@ def test_featurization(self): def test_jac(self): def func_transform(x): x = x.reshape(-1, 1) - return np.hstack([x, x**2]) + return np.hstack([np.ones_like(x), x, x**2]) def calc_expected_jacobian(T): - jac = DPolynomialFeatures(degree=2, include_bias=False).fit_transform(T) + jac = DPolynomialFeatures(degree=2, include_bias=True).fit_transform(T) return jac treatment_featurizers = [ - PolynomialFeatures(degree=2, include_bias=False), + PolynomialFeatures(degree=2, include_bias=True), FunctionTransformer(func=func_transform) ] n = 10000 d_t = 1 T = np.random.normal(size=(n, d_t)) + T[0, 0] = 0 # hardcode one value of exactly zero to test that we don't generate nan for treatment_featurizer in treatment_featurizers: # fit a dummy estimator first so the featurizer can be fit to the treatment diff --git a/econml/utilities.py b/econml/utilities.py index e8fc18867..90a22a089 100644 --- a/econml/utilities.py +++ b/econml/utilities.py @@ -1460,9 +1460,15 @@ def jac(self, X, epsilon=0.001): powers = self.featurizer.powers_ result = np.zeros(X.shape + (self.featurizer.n_output_features_,)) for i in range(X.shape[1]): - p = powers.copy() + # d/dx_i of x_1^p_1 * x_2^p_2 * ... x_i^p_i * ... x_n^p_n + # = p_i * x_1^p_1 * x_2^p_2 * ... x_i^(p_i-1) * ... x_n^p_n + # store the coefficient in c, and the updated powers in p c = powers[:, i] - p[:, i] -= 1 + p = powers.copy() + # decrement p[:, i], but only if it was more than 0 already + # (if it is 0 then c=0 so we'll correctly get 0 for a result regardless of the updated entries in p, + # but float_power would return nan if X has a 0 and the exponent is -1) + p[p[:, i] > 0, i] -= 1 M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :]) result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1) return result