Skip to content

Commit 29b7fd7

Browse files
committed
standardize tuning tests
1 parent 56c32bf commit 29b7fd7

9 files changed

+35
-32
lines changed

doubleml/did/tests/test_did_binary_tune_ml_models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
2222
np.random.seed(3152)
2323
df_panel = make_did_CS2021(
24-
n_obs=1000,
24+
n_obs=200,
2525
dgp_type=1,
2626
include_never_treated=True,
2727
time_type="float",
@@ -39,8 +39,8 @@ def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
3939

4040
g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
4141

42-
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=500, max_leaf_nodes=2)
43-
ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=500, max_leaf_nodes=2)
42+
ml_g = DecisionTreeRegressor(random_state=321)
43+
ml_m = DecisionTreeClassifier(random_state=654)
4444

4545
dml_did_binary = DoubleMLDIDBinary(
4646
obj_dml_data=panel_data,
@@ -50,7 +50,7 @@ def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
5050
ml_g=ml_g,
5151
ml_m=ml_m,
5252
score=score,
53-
n_folds=2,
53+
n_folds=5,
5454
)
5555
dml_did_binary.fit()
5656
untuned_score = dml_did_binary.evaluate_learners()

doubleml/did/tests/test_did_cs_binary_tune_ml_models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score):
2222
np.random.seed(3153)
2323
df_panel = make_did_cs_CS2021(
24-
n_obs=1000,
24+
n_obs=200,
2525
dgp_type=2,
2626
include_never_treated=True,
2727
time_type="float",
@@ -38,8 +38,8 @@ def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score)
3838
theta = df_panel["y1"].mean()
3939
g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
4040

41-
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=500)
42-
ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=500)
41+
ml_g = DecisionTreeRegressor(random_state=321)
42+
ml_m = DecisionTreeClassifier(random_state=654)
4343

4444
dml_did_cs_binary = DoubleMLDIDCSBinary(
4545
obj_dml_data=panel_data,

doubleml/did/tests/test_did_cs_tune_ml_models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
def test_doubleml_did_cs_optuna_tune(sampler_name, optuna_sampler, score):
2020
np.random.seed(3151)
2121
dml_data = make_did_SZ2020(
22-
n_obs=1000,
22+
n_obs=200,
2323
dgp_type=2,
2424
cross_sectional_data=True,
2525
return_type="DoubleMLDIDData",
2626
)
2727

28-
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=200, max_leaf_nodes=2)
28+
ml_g = DecisionTreeRegressor(random_state=321)
2929
if score == "observational":
30-
ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=200, max_leaf_nodes=2)
30+
ml_m = DecisionTreeClassifier(random_state=654)
3131
dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, ml_m, score=score, n_folds=2)
3232
else:
3333
dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, score=score, n_folds=2)
@@ -47,4 +47,4 @@ def test_doubleml_did_cs_optuna_tune(sampler_name, optuna_sampler, score):
4747
_assert_tree_params(tuned_params)
4848

4949
# ensure tuning improved RMSE
50-
assert tuned_score[learner_name] <= untuned_score[learner_name]
50+
assert tuned_score[learner_name] < untuned_score[learner_name]

doubleml/did/tests/test_did_tune_ml_models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ def test_doubleml_did_optuna_tune(sampler_name, optuna_sampler, score):
2020
"""Test DID with ml_g0, ml_g1 (and ml_m for observational score) nuisance models."""
2121

2222
np.random.seed(3150)
23-
dml_data = make_did_SZ2020(n_obs=1000, dgp_type=1, return_type="DoubleMLDIDData")
23+
dml_data = make_did_SZ2020(n_obs=200, dgp_type=1, return_type="DoubleMLDIDData")
2424

25-
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=100, max_leaf_nodes=2)
25+
ml_g = DecisionTreeRegressor(random_state=321)
2626
if score == "observational":
27-
ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=100, max_leaf_nodes=2)
27+
ml_m = DecisionTreeClassifier(random_state=654)
2828
dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score=score, n_folds=2)
2929
else:
3030
dml_did = dml.DoubleMLDID(dml_data, ml_g, score=score, n_folds=2)

doubleml/irm/tests/test_cvar_tune_ml_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ def test_doubleml_cvar_optuna_tune(sampler_name, optuna_sampler):
1818
np.random.seed(3145)
1919
dml_data = make_irm_data(n_obs=200, dim_x=5)
2020

21-
ml_g = DecisionTreeRegressor(random_state=321, max_depth=None, min_samples_split=2, min_samples_leaf=1)
22-
ml_m = DecisionTreeClassifier(random_state=654, max_depth=None, min_samples_split=2, min_samples_leaf=1)
21+
ml_g = DecisionTreeRegressor(random_state=321)
22+
ml_m = DecisionTreeClassifier(random_state=654)
2323

2424
dml_cvar = dml.DoubleMLCVAR(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2)
2525
dml_cvar.fit()

doubleml/irm/tests/test_irm_tune_ml_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
1717
def test_doubleml_irm_optuna_tune(sampler_name, optuna_sampler):
1818
np.random.seed(3142)
19-
dml_data = make_irm_data(n_obs=1000, dim_x=5)
19+
dml_data = make_irm_data(n_obs=500, dim_x=5)
2020

2121
ml_g = DecisionTreeRegressor(random_state=321)
2222
ml_m = DecisionTreeClassifier(random_state=654)

doubleml/irm/tests/test_lpq_tune_ml_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
1818
def test_doubleml_lpq_optuna_tune(sampler_name, optuna_sampler):
1919
np.random.seed(3148)
20-
dml_data = make_iivm_data(n_obs=1000, dim_x=10)
20+
dml_data = make_iivm_data(n_obs=500, dim_x=10)
2121

2222
ml_g = DecisionTreeClassifier(random_state=321)
2323
ml_m = DecisionTreeClassifier(random_state=654)
@@ -42,4 +42,4 @@ def test_doubleml_lpq_optuna_tune(sampler_name, optuna_sampler):
4242
_assert_tree_params(tuned_params)
4343

4444
# ensure tuning improved RMSE
45-
assert tuned_score[learner_name] <= untuned_score[learner_name]
45+
assert tuned_score[learner_name] < untuned_score[learner_name]

doubleml/plm/tests/test_lplr_tune_ml_models.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ def score(request):
1717
return request.param
1818

1919

20-
@pytest.fixture(scope="module", params=[DecisionTreeRegressor(random_state=567, max_depth=None, min_samples_split=2), None])
20+
@pytest.fixture(
21+
scope="module",
22+
params=[DecisionTreeRegressor(random_state=567), None],
23+
)
2124
def ml_a(request):
2225
return request.param
2326

@@ -29,9 +32,9 @@ def test_doubleml_lplr_optuna_tune(sampler_name, optuna_sampler, score, ml_a):
2932
alpha = 0.5
3033
dml_data = make_lplr_LZZ2020(n_obs=200, dim_x=15, alpha=alpha)
3134

32-
ml_M = DecisionTreeClassifier(random_state=123, max_depth=None, min_samples_split=2)
33-
ml_t = DecisionTreeRegressor(random_state=234, max_depth=None, min_samples_split=2)
34-
ml_m = DecisionTreeRegressor(random_state=456, max_depth=None, min_samples_split=2)
35+
ml_M = DecisionTreeClassifier(random_state=123)
36+
ml_t = DecisionTreeRegressor(random_state=234)
37+
ml_m = DecisionTreeRegressor(random_state=456)
3538

3639
dml_lplr = dml.DoubleMLLPLR(
3740
dml_data,
@@ -85,6 +88,6 @@ def test_doubleml_lplr_optuna_tune(sampler_name, optuna_sampler, score, ml_a):
8588
assert tune_res[0]["ml_a"].best_params["max_depth"] == tuned_params_a["max_depth"]
8689

8790
# ensure tuning improved RMSE # not actually possible for ml_t as the targets are not available
88-
assert tuned_score["ml_M"] <= untuned_score["ml_M"]
89-
assert tuned_score["ml_m"] <= untuned_score["ml_m"]
90-
assert tuned_score["ml_a"] <= untuned_score["ml_a"]
91+
assert tuned_score["ml_M"] < untuned_score["ml_M"]
92+
assert tuned_score["ml_m"] < untuned_score["ml_m"]
93+
assert tuned_score["ml_a"] < untuned_score["ml_a"]

doubleml/tests/_utils_tune_optuna.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,17 @@ def _basic_optuna_settings(additional=None):
2222

2323
def _small_tree_params(trial):
2424
return {
25-
"max_depth": trial.suggest_int("max_depth", 1, 10),
26-
"min_samples_leaf": trial.suggest_int("min_samples_leaf", 5, 20),
27-
"max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2, 20),
25+
"max_depth": trial.suggest_int("max_depth", 1, 20),
26+
"min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
27+
"min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
2828
}
2929

3030

31-
def _assert_tree_params(param_dict, depth_range=(1, 10), leaf_range=(2, 100), leaf_nodes_range=(2, 20)):
32-
assert set(param_dict.keys()) == {"max_depth", "min_samples_leaf", "max_leaf_nodes"}
31+
def _assert_tree_params(param_dict, depth_range=(1, 20), leaf_range=(1, 10), split_range=(2, 20)):
32+
assert set(param_dict.keys()) == {"max_depth", "min_samples_leaf", "min_samples_split"}
3333
assert depth_range[0] <= param_dict["max_depth"] <= depth_range[1]
3434
assert leaf_range[0] <= param_dict["min_samples_leaf"] <= leaf_range[1]
35-
assert leaf_nodes_range[0] <= param_dict["max_leaf_nodes"] <= leaf_nodes_range[1]
35+
assert split_range[0] <= param_dict["min_samples_split"] <= split_range[1]
3636

3737

3838
def _build_param_space(dml_obj, param_fn):

0 commit comments

Comments
 (0)