Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions doc/spec/estimation/dml.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ characteristics :math:`X` of the treated samples, then one can use this method.

# DML
import numpy as np
X = np.random.choice(np.arange(5), size=(100,3))
X = np.random.choice(6, size=(100,3))
Y = np.random.normal(size=(100,2))
y = np.random.normal(size=(100,))
T = T0 = T1 = np.random.choice(np.arange(3), size=(100,2))
t = t0 = t1 = T[:,0]
(T, T0, T1) = (np.random.choice(np.arange(3), size=(100,2)) for _ in range(3))
(t, t0, t1) = (a[:,0] for a in (T, T0, T1))
W = np.random.normal(size=(100,2))

.. testcode::
Expand Down Expand Up @@ -646,7 +646,7 @@ Then we can estimate the coefficients :math:`\alpha_i` by running:
from sklearn.preprocessing import PolynomialFeatures
est = LinearDML(model_y=RandomForestRegressor(),
model_t=RandomForestRegressor(),
featurizer=PolynomialFeatures(degree=3, include_bias=True))
featurizer=PolynomialFeatures(degree=3, include_bias=False))
est.fit(y, T, X=X, W=W)

# To get the coefficients of the polynomial fitted in the final stage we can
Expand All @@ -663,7 +663,7 @@ To add fixed effect heterogeneity, we can create one-hot encodings of the id, wh
from econml.dml import LinearDML
from sklearn.preprocessing import OneHotEncoder
# removing one id to avoid colinearity, as is standard for fixed effects
X_oh = OneHotEncoder(sparse_output=False).fit_transform(X)[:, 1:]
X_oh = OneHotEncoder(sparse_output=False, drop="first").fit_transform(X)

est = LinearDML(model_y=RandomForestRegressor(),
model_t=RandomForestRegressor())
Expand Down
2 changes: 1 addition & 1 deletion econml/iv/sieve/_tsls.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def transform(self, X):
for i in range(X.shape[1]):
p = powers.copy()
c = powers[:, i]
p[:, i] -= 1
p[p[:, i] > 0, i] -= 1
M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :])
result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1)
return result
Expand Down
7 changes: 4 additions & 3 deletions econml/tests/test_treatment_featurization.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,20 +486,21 @@ def test_featurization(self):
def test_jac(self):
def func_transform(x):
x = x.reshape(-1, 1)
return np.hstack([x, x**2])
return np.hstack([np.ones_like(x), x, x**2])

def calc_expected_jacobian(T):
jac = DPolynomialFeatures(degree=2, include_bias=False).fit_transform(T)
jac = DPolynomialFeatures(degree=2, include_bias=True).fit_transform(T)
return jac

treatment_featurizers = [
PolynomialFeatures(degree=2, include_bias=False),
PolynomialFeatures(degree=2, include_bias=True),
FunctionTransformer(func=func_transform)
]

n = 10000
d_t = 1
T = np.random.normal(size=(n, d_t))
T[0, 0] = 0 # hardcode one value of exactly zero to test that we don't generate nan

for treatment_featurizer in treatment_featurizers:
# fit a dummy estimator first so the featurizer can be fit to the treatment
Expand Down
10 changes: 8 additions & 2 deletions econml/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -1460,9 +1460,15 @@ def jac(self, X, epsilon=0.001):
powers = self.featurizer.powers_
result = np.zeros(X.shape + (self.featurizer.n_output_features_,))
for i in range(X.shape[1]):
p = powers.copy()
# d/dx_i of x_1^p_1 * x_2^p_2 * ... x_i^p_i * ... x_n^p_n
# = p_i * x_1^p_1 * x_2^p_2 * ... x_i^(p_i-1) * ... x_n^p_n
# store the coefficient in c, and the updated powers in p
c = powers[:, i]
p[:, i] -= 1
p = powers.copy()
# decrement p[:, i], but only if it was more than 0 already
# (if it is 0 then c=0 so we'll correctly get 0 for a result regardless of the updated entries in p,
# but float_power would return nan if X has a 0 and the exponent is -1)
p[p[:, i] > 0, i] -= 1
M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :])
result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1)
return result
Expand Down
Loading