standardize tuning tests

SvenKlaassen · SvenKlaassen · commit 29b7fd7458f8 · 2025-11-27T13:30:12.000+01:00
diff --git a/doubleml/did/tests/test_did_binary_tune_ml_models.py b/doubleml/did/tests/test_did_binary_tune_ml_models.py
@@ -21,7 +21,7 @@
 def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
     np.random.seed(3152)
     df_panel = make_did_CS2021(
-        n_obs=1000,
+        n_obs=200,
         dgp_type=1,
         include_never_treated=True,
         time_type="float",
@@ -39,8 +39,8 @@ def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
 
     g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
 
-    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=500, max_leaf_nodes=2)
-    ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=500, max_leaf_nodes=2)
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
 
     dml_did_binary = DoubleMLDIDBinary(
         obj_dml_data=panel_data,
@@ -50,7 +50,7 @@ def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
         ml_g=ml_g,
         ml_m=ml_m,
         score=score,
-        n_folds=2,
+        n_folds=5,
     )
     dml_did_binary.fit()
     untuned_score = dml_did_binary.evaluate_learners()
diff --git a/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py b/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py
@@ -21,7 +21,7 @@
 def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score):
     np.random.seed(3153)
     df_panel = make_did_cs_CS2021(
-        n_obs=1000,
+        n_obs=200,
         dgp_type=2,
         include_never_treated=True,
         time_type="float",
@@ -38,8 +38,8 @@ def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score)
     theta = df_panel["y1"].mean()
     g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
 
-    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=500)
-    ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=500)
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
 
     dml_did_cs_binary = DoubleMLDIDCSBinary(
         obj_dml_data=panel_data,
diff --git a/doubleml/did/tests/test_did_cs_tune_ml_models.py b/doubleml/did/tests/test_did_cs_tune_ml_models.py
@@ -19,15 +19,15 @@
 def test_doubleml_did_cs_optuna_tune(sampler_name, optuna_sampler, score):
     np.random.seed(3151)
     dml_data = make_did_SZ2020(
-        n_obs=1000,
+        n_obs=200,
         dgp_type=2,
         cross_sectional_data=True,
         return_type="DoubleMLDIDData",
     )
 
-    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=200, max_leaf_nodes=2)
+    ml_g = DecisionTreeRegressor(random_state=321)
     if score == "observational":
-        ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=200, max_leaf_nodes=2)
+        ml_m = DecisionTreeClassifier(random_state=654)
         dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, ml_m, score=score, n_folds=2)
     else:
         dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, score=score, n_folds=2)
@@ -47,4 +47,4 @@ def test_doubleml_did_cs_optuna_tune(sampler_name, optuna_sampler, score):
         _assert_tree_params(tuned_params)
 
         # ensure tuning improved RMSE
-        assert tuned_score[learner_name] <= untuned_score[learner_name]
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/did/tests/test_did_tune_ml_models.py b/doubleml/did/tests/test_did_tune_ml_models.py
@@ -20,11 +20,11 @@ def test_doubleml_did_optuna_tune(sampler_name, optuna_sampler, score):
     """Test DID with ml_g0, ml_g1 (and ml_m for observational score) nuisance models."""
 
     np.random.seed(3150)
-    dml_data = make_did_SZ2020(n_obs=1000, dgp_type=1, return_type="DoubleMLDIDData")
+    dml_data = make_did_SZ2020(n_obs=200, dgp_type=1, return_type="DoubleMLDIDData")
 
-    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1, min_samples_leaf=100, max_leaf_nodes=2)
+    ml_g = DecisionTreeRegressor(random_state=321)
     if score == "observational":
-        ml_m = DecisionTreeClassifier(random_state=654, max_depth=1, min_samples_leaf=100, max_leaf_nodes=2)
+        ml_m = DecisionTreeClassifier(random_state=654)
         dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score=score, n_folds=2)
     else:
         dml_did = dml.DoubleMLDID(dml_data, ml_g, score=score, n_folds=2)
diff --git a/doubleml/irm/tests/test_cvar_tune_ml_models.py b/doubleml/irm/tests/test_cvar_tune_ml_models.py
@@ -18,8 +18,8 @@ def test_doubleml_cvar_optuna_tune(sampler_name, optuna_sampler):
     np.random.seed(3145)
     dml_data = make_irm_data(n_obs=200, dim_x=5)
 
-    ml_g = DecisionTreeRegressor(random_state=321, max_depth=None, min_samples_split=2, min_samples_leaf=1)
-    ml_m = DecisionTreeClassifier(random_state=654, max_depth=None, min_samples_split=2, min_samples_leaf=1)
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
 
     dml_cvar = dml.DoubleMLCVAR(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2)
     dml_cvar.fit()
diff --git a/doubleml/irm/tests/test_irm_tune_ml_models.py b/doubleml/irm/tests/test_irm_tune_ml_models.py
@@ -16,7 +16,7 @@
 @pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
 def test_doubleml_irm_optuna_tune(sampler_name, optuna_sampler):
     np.random.seed(3142)
-    dml_data = make_irm_data(n_obs=1000, dim_x=5)
+    dml_data = make_irm_data(n_obs=500, dim_x=5)
 
     ml_g = DecisionTreeRegressor(random_state=321)
     ml_m = DecisionTreeClassifier(random_state=654)
diff --git a/doubleml/irm/tests/test_lpq_tune_ml_models.py b/doubleml/irm/tests/test_lpq_tune_ml_models.py
@@ -17,7 +17,7 @@
 @pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
 def test_doubleml_lpq_optuna_tune(sampler_name, optuna_sampler):
     np.random.seed(3148)
-    dml_data = make_iivm_data(n_obs=1000, dim_x=10)
+    dml_data = make_iivm_data(n_obs=500, dim_x=10)
 
     ml_g = DecisionTreeClassifier(random_state=321)
     ml_m = DecisionTreeClassifier(random_state=654)
@@ -42,4 +42,4 @@ def test_doubleml_lpq_optuna_tune(sampler_name, optuna_sampler):
         _assert_tree_params(tuned_params)
 
         # ensure tuning improved RMSE
-        assert tuned_score[learner_name] <= untuned_score[learner_name]
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/plm/tests/test_lplr_tune_ml_models.py b/doubleml/plm/tests/test_lplr_tune_ml_models.py
@@ -17,7 +17,10 @@ def score(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[DecisionTreeRegressor(random_state=567, max_depth=None, min_samples_split=2), None])
+@pytest.fixture(
+    scope="module",
+    params=[DecisionTreeRegressor(random_state=567), None],
+)
 def ml_a(request):
     return request.param
 
@@ -29,9 +32,9 @@ def test_doubleml_lplr_optuna_tune(sampler_name, optuna_sampler, score, ml_a):
     alpha = 0.5
     dml_data = make_lplr_LZZ2020(n_obs=200, dim_x=15, alpha=alpha)
 
-    ml_M = DecisionTreeClassifier(random_state=123, max_depth=None, min_samples_split=2)
-    ml_t = DecisionTreeRegressor(random_state=234, max_depth=None, min_samples_split=2)
-    ml_m = DecisionTreeRegressor(random_state=456, max_depth=None, min_samples_split=2)
+    ml_M = DecisionTreeClassifier(random_state=123)
+    ml_t = DecisionTreeRegressor(random_state=234)
+    ml_m = DecisionTreeRegressor(random_state=456)
 
     dml_lplr = dml.DoubleMLLPLR(
         dml_data,
@@ -85,6 +88,6 @@ def test_doubleml_lplr_optuna_tune(sampler_name, optuna_sampler, score, ml_a):
     assert tune_res[0]["ml_a"].best_params["max_depth"] == tuned_params_a["max_depth"]
 
     # ensure tuning improved RMSE #  not actually possible for ml_t as the targets are not available
-    assert tuned_score["ml_M"] <= untuned_score["ml_M"]
-    assert tuned_score["ml_m"] <= untuned_score["ml_m"]
-    assert tuned_score["ml_a"] <= untuned_score["ml_a"]
+    assert tuned_score["ml_M"] < untuned_score["ml_M"]
+    assert tuned_score["ml_m"] < untuned_score["ml_m"]
+    assert tuned_score["ml_a"] < untuned_score["ml_a"]
diff --git a/doubleml/tests/_utils_tune_optuna.py b/doubleml/tests/_utils_tune_optuna.py
@@ -22,17 +22,17 @@ def _basic_optuna_settings(additional=None):
 
 def _small_tree_params(trial):
     return {
-        "max_depth": trial.suggest_int("max_depth", 1, 10),
-        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 5, 20),
-        "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2, 20),
+        "max_depth": trial.suggest_int("max_depth", 1, 20),
+        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
+        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
     }
 
 
-def _assert_tree_params(param_dict, depth_range=(1, 10), leaf_range=(2, 100), leaf_nodes_range=(2, 20)):
-    assert set(param_dict.keys()) == {"max_depth", "min_samples_leaf", "max_leaf_nodes"}
+def _assert_tree_params(param_dict, depth_range=(1, 20), leaf_range=(1, 10), split_range=(2, 20)):
+    assert set(param_dict.keys()) == {"max_depth", "min_samples_leaf", "min_samples_split"}
     assert depth_range[0] <= param_dict["max_depth"] <= depth_range[1]
     assert leaf_range[0] <= param_dict["min_samples_leaf"] <= leaf_range[1]
-    assert leaf_nodes_range[0] <= param_dict["max_leaf_nodes"] <= leaf_nodes_range[1]
+    assert split_range[0] <= param_dict["min_samples_split"] <= split_range[1]
 
 
 def _build_param_space(dml_obj, param_fn):