From 05d71202fe3c7ca0fdfd4c4f20a61813c3c56825 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Sun, 23 Nov 2025 08:25:45 +0100 Subject: [PATCH 1/5] fix docstring --- doubleml/double_ml_framework.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py index 1ff21b350..5f767d9fd 100644 --- a/doubleml/double_ml_framework.py +++ b/doubleml/double_ml_framework.py @@ -21,11 +21,11 @@ class DoubleMLFramework: - """Double Machine Learning Framework to combine DoubleML classes and compute confidendence intervals. + """Double Machine Learning Framework to combine DoubleML classes and compute confidence intervals. Parameters ---------- - doubleml_dict : :dict + doubleml_dict : dict A dictionary providing the estimated parameters and normalized scores. Keys have to be 'thetas', 'ses', 'all_thetas', 'all_ses', 'var_scaling_factors' and 'scaled_psi'. Values have to be numpy arrays with the corresponding shapes. From 5aa6379f36aed303a6ce5b4aa7bff604e5b35a6c Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Sun, 23 Nov 2025 08:42:32 +0100 Subject: [PATCH 2/5] add mypy configuration to pyproject.toml --- pyproject.toml | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6aa06ab50..783a7e26d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,8 @@ dependencies = [ "statsmodels>=0.14.0", "matplotlib>=3.9.0", "seaborn>=0.13", - "plotly>=5.0.0" + "plotly>=5.0.0", + "mypy>=1.18.0" ] classifiers = [ "Programming Language :: Python :: 3", @@ -113,3 +114,30 @@ ignore = [ # isinstance checks "E721", ] + + +# MyPy configuration +[tool.mypy] +python_version = "3.12" +mypy_path = "." +packages = ["doubleml"] +exclude = [ + "^tests/|^test_", +] + +# Essential checks only: +warn_return_any = false +warn_unused_configs = true +warn_redundant_casts = true +warn_unused_ignores = true +disallow_untyped_defs = true +check_untyped_defs = true +no_implicit_optional = true + +# Output formatting: +show_error_codes = true +pretty = true +color_output = true + +# Handle third-party libraries: +ignore_missing_imports = true From 019f33c5311e40ad69e2111a19465192f76f3a2f Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Sun, 23 Nov 2025 10:02:18 +0100 Subject: [PATCH 3/5] implement DoubleMLCore class with validation and error handling for parameters --- doubleml/double_ml_framework.py | 159 +++++++++++++++++++ doubleml/tests/test_core_exceptions.py | 206 +++++++++++++++++++++++++ 2 files changed, 365 insertions(+) create mode 100644 doubleml/tests/test_core_exceptions.py diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py index 5f767d9fd..5802ce143 100644 --- a/doubleml/double_ml_framework.py +++ b/doubleml/double_ml_framework.py @@ -1,4 +1,6 @@ import copy +from dataclasses import dataclass +from typing import Dict, List, Optional import numpy as np import pandas as pd @@ -20,6 +22,163 @@ from .utils._plots import _sensitivity_contour_plot +@dataclass +class DoubleMLCore: + thetas: np.ndarray + ses: np.ndarray + all_thetas: np.ndarray + all_ses: np.ndarray + var_scaling_factors: np.ndarray + scaled_psi: np.ndarray + is_cluster_data: bool = False + cluster_dict: Optional[Dict] = None + sensitivity_elements: Optional[Dict[str, np.ndarray]] = None + treatment_names: Optional[List[str]] = None + """ + Core container for DoubleML results . + + This class stores the main results and diagnostics from a DoubleML estimation, including parameter estimates, + standard errors, normalized scores, and (optionally) sensitivity and clustering information. It performs + thorough type and shape validation on all inputs to ensure internal consistency. + + Parameters + ---------- + thetas : np.ndarray + Estimated target parameters (shape: (n_thetas,)). + ses : np.ndarray + Estimated standard errors (shape: (n_thetas,)). + all_thetas : np.ndarray + Estimated target parameters for each repetition (shape: (n_thetas, n_rep)). + all_ses : np.ndarray + Estimated standard errors for each repetition (shape: (n_thetas, n_rep)). + var_scaling_factors : np.ndarray + Variance scaling factors (shape: (n_thetas,)). + scaled_psi : np.ndarray + Normalized scores (shape: (n_obs, n_thetas, n_rep)). + is_cluster_data : bool, optional + Indicates whether clustering is used (default: False). + cluster_dict : dict, optional + Dictionary with clustering information, required if is_cluster_data is True. + sensitivity_elements : dict, optional + Dictionary with sensitivity analysis components (e.g., max_bias, psi_max_bias, sigma2, nu2). + treatment_names : list of str, optional + Names of the treatments (must match n_thetas if provided). + + Raises + ------ + ValueError, TypeError + If any input fails type or shape validation. + """ + + def __post_init__(self): + + if not isinstance(self.scaled_psi, np.ndarray) or self.scaled_psi.ndim != 3: + raise ValueError("scaled_psi must be a 3-dimensional numpy.ndarray.") + self.n_obs, self.n_thetas, self.n_rep = self.scaled_psi.shape + + self._check_arrays() + self._check_cluster_dict() + self._check_sensitivity_elements() + self._check_treatment_names() + + def _check_arrays(self): + """Type and shape checks for input arrays.""" + arrays = { + "thetas": self.thetas, + "ses": self.ses, + "all_thetas": self.all_thetas, + "all_ses": self.all_ses, + "var_scaling_factors": self.var_scaling_factors, + "scaled_psi": self.scaled_psi, + } + for name, arr in arrays.items(): + if not isinstance(arr, np.ndarray): + raise TypeError(f"{name} must be a numpy.ndarray, got {type(arr)}.") + + expected_shapes = { + "thetas": (self.n_thetas,), + "ses": (self.n_thetas,), + "all_thetas": (self.n_thetas, self.n_rep), + "all_ses": (self.n_thetas, self.n_rep), + "var_scaling_factors": (self.n_thetas,), + "scaled_psi": (self.n_obs, self.n_thetas, self.n_rep), + } + for name, expected_shape in expected_shapes.items(): + actual_shape = arrays[name].shape + if actual_shape != expected_shape: + raise ValueError(f"{name} shape {actual_shape} does not match expected {expected_shape}.") + + def _check_cluster_dict(self): + """Checks for cluster_dict if is_cluster_data is True.""" + if self.is_cluster_data: + if self.cluster_dict is None: + raise ValueError("If is_cluster_data is True, cluster_dict must be provided.") + if not isinstance(self.cluster_dict, dict): + raise TypeError("cluster_dict must be a dictionary.") + expected_keys = ["smpls", "smpls_cluster", "cluster_vars", "n_folds_per_cluster"] + if not all(key in self.cluster_dict for key in expected_keys): + raise ValueError( + f"cluster_dict must contain keys: {', '.join(expected_keys)}. " + f"Got: {', '.join(self.cluster_dict.keys())}." + ) + # Type checks for values + if not isinstance(self.cluster_dict["smpls"], list): + raise TypeError("cluster_dict['smpls'] must be a list.") + if not isinstance(self.cluster_dict["smpls_cluster"], list): + raise TypeError("cluster_dict['smpls_cluster'] must be a list.") + if not isinstance(self.cluster_dict["cluster_vars"], list): + raise TypeError("cluster_dict['cluster_vars'] must be a list.") + if not isinstance(self.cluster_dict["n_folds_per_cluster"], int): + raise TypeError("cluster_dict['n_folds_per_cluster'] must be an int.") + + def _check_sensitivity_elements(self): + """Checks for sensitivity_elements if provided.""" + if self.sensitivity_elements is not None: + if not isinstance(self.sensitivity_elements, dict): + raise TypeError("sensitivity_elements must be a dict if provided.") + required_keys = ["max_bias", "psi_max_bias"] + for key in required_keys: + if key not in self.sensitivity_elements: + raise ValueError(f"sensitivity_elements must contain key '{key}'.") + if not isinstance(self.sensitivity_elements[key], np.ndarray): + raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.") + + expected_shapes = { + "max_bias": (1, self.n_thetas, self.n_rep), + "psi_max_bias": (self.n_obs, self.n_thetas, self.n_rep), + } + for key in required_keys: + actual_shape = self.sensitivity_elements[key].shape + if actual_shape != expected_shapes[key]: + raise ValueError( + f"sensitivity_elements['{key}'] shape {actual_shape} does not match expected {expected_shapes[key]}." + ) + + # Optional: check benchmarks if present + for key in ["sigma2", "nu2"]: + if key in self.sensitivity_elements: + if not isinstance(self.sensitivity_elements[key], np.ndarray): + raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.") + if np.any(self.sensitivity_elements[key] < 0): + raise ValueError(f"sensitivity_elements['{key}'] must be positive.") + if self.sensitivity_elements[key].shape != (1, self.n_thetas, self.n_rep): + expected_shape = (1, self.n_thetas, self.n_rep) + actual_shape = self.sensitivity_elements[key].shape + raise ValueError( + f"sensitivity_elements['{key}'] shape {actual_shape} does not match expected {expected_shape}." + ) + + def _check_treatment_names(self): + """Checks for treatment_names if provided.""" + if self.treatment_names is not None: + if not isinstance(self.treatment_names, list) or not all(isinstance(n, str) for n in self.treatment_names): + raise TypeError("treatment_names must be a list of strings.") + if len(self.treatment_names) != self.n_thetas: + raise ValueError( + f"Length of treatment_names ({len(self.treatment_names)}) does not match n_thetas ({self.n_thetas})." + ) + + class DoubleMLFramework: """Double Machine Learning Framework to combine DoubleML classes and compute confidence intervals. diff --git a/doubleml/tests/test_core_exceptions.py b/doubleml/tests/test_core_exceptions.py new file mode 100644 index 000000000..ddd615231 --- /dev/null +++ b/doubleml/tests/test_core_exceptions.py @@ -0,0 +1,206 @@ +import numpy as np +import pytest + +from doubleml.double_ml_framework import DoubleMLCore +from doubleml.tests._utils import generate_dml_dict + +n_obs = 10 +n_thetas = 2 +n_rep = 5 + + +def valid_core_kwargs(): + np.random.seed(42) + psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) + psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) + doubleml_dict = generate_dml_dict(psi_a, psi_b) + return doubleml_dict + + +@pytest.mark.ci +def test_scaled_psi_shape_and_type(): + kwargs = valid_core_kwargs() + msg = "scaled_psi must be a 3-dimensional numpy.ndarray." + kwargs["scaled_psi"] = "not_an_array" + with pytest.raises(ValueError, match=msg): + DoubleMLCore(**kwargs) + kwargs["scaled_psi"] = np.ones((10,)) + with pytest.raises(ValueError, match=msg): + DoubleMLCore(**kwargs) + kwargs["scaled_psi"] = np.ones((10, 2)) + with pytest.raises(ValueError, match=msg): + DoubleMLCore(**kwargs) + + +@pytest.mark.ci +def test_arrays(): + kwargs = valid_core_kwargs() + # Type checks + for key in ["thetas", "ses", "all_thetas", "all_ses", "var_scaling_factors"]: + bad_kwargs = kwargs.copy() + bad_kwargs[key] = "not_an_array" + with pytest.raises(TypeError, match=f"{key} must be a numpy.ndarray"): + DoubleMLCore(**bad_kwargs) + # Shape checks + shapes = { + "thetas": (3,), + "ses": (3,), + "all_thetas": (3, 5), + "all_ses": (3, 5), + "var_scaling_factors": (3,), + } + for key, shape in shapes.items(): + bad_kwargs = kwargs.copy() + bad_kwargs[key] = np.ones(shape) + with pytest.raises(ValueError, match=".*does not match expected.*"): + DoubleMLCore(**bad_kwargs) + + +@pytest.mark.ci +def test_cluster_dict_exceptions(): + kwargs = valid_core_kwargs() + kwargs["is_cluster_data"] = True + + # 1. cluster_dict missing + bad_kwargs = kwargs.copy() + bad_kwargs.pop("cluster_dict", None) + with pytest.raises(ValueError, match="If is_cluster_data is True, cluster_dict must be provided."): + DoubleMLCore(**bad_kwargs) + + # 2. cluster_dict not a dict + bad_kwargs = kwargs.copy() + bad_kwargs["cluster_dict"] = "not_a_dict" + with pytest.raises(TypeError, match="cluster_dict must be a dictionary."): + DoubleMLCore(**bad_kwargs) + + # 3. cluster_dict missing keys + bad_kwargs = kwargs.copy() + bad_kwargs["cluster_dict"] = {"smpls": [], "smpls_cluster": [], "cluster_vars": []} # missing n_folds_per_cluster + msg = "cluster_dict must contain keys: smpls, smpls_cluster, cluster_vars, n_folds_per_cluster." + with pytest.raises(ValueError, match=msg): + DoubleMLCore(**bad_kwargs) + + # 4. cluster_dict wrong value types + type_cases = [ + ("smpls", "not_a_list", "cluster_dict\\['smpls'\\] must be a list."), + ("smpls_cluster", "not_a_list", "cluster_dict\\['smpls_cluster'\\] must be a list."), + ("cluster_vars", "not_a_list", "cluster_dict\\['cluster_vars'\\] must be a list."), + ("n_folds_per_cluster", "not_an_int", "cluster_dict\\['n_folds_per_cluster'\\] must be an int."), + ] + for key, bad_value, msg in type_cases: + cluster_dict = { + "smpls": [], + "smpls_cluster": [], + "cluster_vars": [], + "n_folds_per_cluster": 1, + } + cluster_dict[key] = bad_value + bad_kwargs = kwargs.copy() + bad_kwargs["cluster_dict"] = cluster_dict + with pytest.raises(TypeError, match=msg): + DoubleMLCore(**bad_kwargs) + + +@pytest.mark.ci +def test_sensitivity_elements_exceptions(): + kwargs = valid_core_kwargs() + + # Not a dict + bad_kwargs = kwargs.copy() + bad_kwargs["sensitivity_elements"] = "not_a_dict" + with pytest.raises(TypeError, match="sensitivity_elements must be a dict if provided."): + DoubleMLCore(**bad_kwargs) + + # Missing required key + bad_kwargs = kwargs.copy() + bad_kwargs["sensitivity_elements"] = {"max_bias": np.ones((1, n_thetas, n_rep))} + with pytest.raises(ValueError, match="sensitivity_elements must contain key 'psi_max_bias'."): + DoubleMLCore(**bad_kwargs) + + # Wrong type for required key + bad_kwargs = kwargs.copy() + bad_kwargs["sensitivity_elements"] = { + "max_bias": "not_an_array", + "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)), + } + with pytest.raises(TypeError, match="sensitivity_elements\\['max_bias'\\] must be a numpy.ndarray."): + DoubleMLCore(**bad_kwargs) + + # Wrong shape for required key + bad_kwargs = kwargs.copy() + bad_kwargs["sensitivity_elements"] = { + "max_bias": np.ones((2, n_thetas, n_rep)), # should be (1, n_thetas, n_rep) + "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)), + } + with pytest.raises( + ValueError, match=r"sensitivity_elements\['max_bias'\] shape \(2, 2, 5\) does not match expected \(1, 2, 5\)\." + ): + DoubleMLCore(**bad_kwargs) + + bad_kwargs = kwargs.copy() + bad_kwargs["sensitivity_elements"] = { + "max_bias": np.ones((1, n_thetas, n_rep)), + "psi_max_bias": np.ones((n_obs + 1, n_thetas, n_rep)), # wrong n_obs + } + with pytest.raises( + ValueError, match=r"sensitivity_elements\['psi_max_bias'\] shape \(11, 2, 5\) does not match expected \(10, 2, 5\)\." + ): + DoubleMLCore(**bad_kwargs) + + # sigma2 and nu2 wrong type + for key in ["sigma2", "nu2"]: + bad_kwargs = kwargs.copy() + sens = { + "max_bias": np.ones((1, n_thetas, n_rep)), + "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)), + key: "not_an_array", + } + bad_kwargs["sensitivity_elements"] = sens + with pytest.raises(TypeError, match=rf"sensitivity_elements\['{key}'\] must be a numpy.ndarray."): + DoubleMLCore(**bad_kwargs) + + # sigma2 and nu2 negative values + for key in ["sigma2", "nu2"]: + bad_kwargs = kwargs.copy() + sens = { + "max_bias": np.ones((1, n_thetas, n_rep)), + "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)), + key: -np.ones((1, n_thetas, n_rep)), + } + bad_kwargs["sensitivity_elements"] = sens + with pytest.raises(ValueError, match=rf"sensitivity_elements\['{key}'\] must be positive."): + DoubleMLCore(**bad_kwargs) + + # sigma2 and nu2 wrong shape + for key in ["sigma2", "nu2"]: + bad_kwargs = kwargs.copy() + sens = { + "max_bias": np.ones((1, n_thetas, n_rep)), + "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)), + key: np.ones((2, n_thetas, n_rep)), + } + bad_kwargs["sensitivity_elements"] = sens + with pytest.raises( + ValueError, match=rf"sensitivity_elements\['{key}'\] shape \(2, 2, 5\) does not match expected \(1, 2, 5\)\." + ): + DoubleMLCore(**bad_kwargs) + + +@pytest.mark.ci +def test_treatment_names_exceptions(): + kwargs = valid_core_kwargs() + + bad_kwargs = kwargs.copy() + bad_kwargs["treatment_names"] = "not_a_list" + with pytest.raises(TypeError, match="treatment_names must be a list of strings."): + DoubleMLCore(**bad_kwargs) + + bad_kwargs = kwargs.copy() + bad_kwargs["treatment_names"] = [1, 2] + with pytest.raises(TypeError, match="treatment_names must be a list of strings."): + DoubleMLCore(**bad_kwargs) + + bad_kwargs = kwargs.copy() + bad_kwargs["treatment_names"] = ["treat1"] + with pytest.raises(ValueError, match=r"Length of treatment_names \(1\) does not match n_thetas \(2\)\."): + DoubleMLCore(**bad_kwargs) From 704b1d1719363dae005f954523876e13568f9cc3 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Sun, 23 Nov 2025 18:09:21 +0100 Subject: [PATCH 4/5] Refactor DoubleML framework to utilize DoubleMLCore Adjusted all tests --- doubleml/__init__.py | 3 +- doubleml/did/tests/test_did_aggregation.py | 5 +- .../tests/test_did_aggregation_exceptions.py | 8 +- .../did/tests/test_did_aggregation_plot.py | 5 +- .../test_did_aggregation_return_types.py | 5 +- doubleml/double_ml.py | 7 +- doubleml/double_ml_framework.py | 424 ++++++------------ doubleml/irm/apos.py | 17 +- doubleml/tests/test_core_exceptions.py | 26 +- doubleml/tests/test_framework.py | 8 +- doubleml/tests/test_framework_coverage.py | 8 +- doubleml/tests/test_framework_exceptions.py | 215 ++------- .../tests/test_framework_pval_corrections.py | 8 +- doubleml/tests/test_framework_sensitivity.py | 8 +- doubleml/utils/_checks.py | 4 +- 15 files changed, 232 insertions(+), 519 deletions(-) diff --git a/doubleml/__init__.py b/doubleml/__init__.py index cb3891bac..2e26cfa6e 100644 --- a/doubleml/__init__.py +++ b/doubleml/__init__.py @@ -3,7 +3,7 @@ from .data import DoubleMLClusterData, DoubleMLData, DoubleMLDIDData, DoubleMLPanelData, DoubleMLRDDData, DoubleMLSSMData from .did.did import DoubleMLDID from .did.did_cs import DoubleMLDIDCS -from .double_ml_framework import DoubleMLFramework, concat +from .double_ml_framework import DoubleMLCore, DoubleMLFramework, concat from .irm.apo import DoubleMLAPO from .irm.apos import DoubleMLAPOS from .irm.cvar import DoubleMLCVAR @@ -21,6 +21,7 @@ __all__ = [ "concat", + "DoubleMLCore", "DoubleMLFramework", "DoubleMLPLR", "DoubleMLPLIV", diff --git a/doubleml/did/tests/test_did_aggregation.py b/doubleml/did/tests/test_did_aggregation.py index cc3c4304b..4dd91bc3e 100644 --- a/doubleml/did/tests/test_did_aggregation.py +++ b/doubleml/did/tests/test_did_aggregation.py @@ -2,7 +2,7 @@ import pytest from doubleml.did.did_aggregation import DoubleMLDIDAggregation -from doubleml.double_ml_framework import DoubleMLFramework +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from doubleml.tests._utils import generate_dml_dict @@ -28,7 +28,8 @@ def base_framework(n_rep): psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - return DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + return DoubleMLFramework(dml_core=dml_core) @pytest.fixture(scope="module", params=["ones", "random", "zeros", "mixed"]) diff --git a/doubleml/did/tests/test_did_aggregation_exceptions.py b/doubleml/did/tests/test_did_aggregation_exceptions.py index 0f895b5be..a5e5e22a8 100644 --- a/doubleml/did/tests/test_did_aggregation_exceptions.py +++ b/doubleml/did/tests/test_did_aggregation_exceptions.py @@ -2,7 +2,7 @@ import pytest from doubleml.did.did_aggregation import DoubleMLDIDAggregation -from doubleml.double_ml_framework import DoubleMLFramework +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from doubleml.tests._utils import generate_dml_dict @@ -24,7 +24,8 @@ def mock_framework(n_rep, n_thetas): psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - return DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + return DoubleMLFramework(dml_core) @pytest.fixture @@ -67,7 +68,8 @@ def test_invalid_framework_dim(): psi_a = np.ones(shape=(10, 2, 1)) psi_b = np.random.normal(size=(10, 2, 1)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - framework = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + framework = DoubleMLFramework(dml_core=dml_core) # Test with invalid framework dimension with pytest.raises(ValueError, match="All frameworks must be one-dimensional"): diff --git a/doubleml/did/tests/test_did_aggregation_plot.py b/doubleml/did/tests/test_did_aggregation_plot.py index 692a0e682..65a76a72b 100644 --- a/doubleml/did/tests/test_did_aggregation_plot.py +++ b/doubleml/did/tests/test_did_aggregation_plot.py @@ -5,7 +5,7 @@ from matplotlib.figure import Figure from doubleml.did.did_aggregation import DoubleMLDIDAggregation -from doubleml.double_ml_framework import DoubleMLFramework +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from doubleml.tests._utils import generate_dml_dict @@ -23,7 +23,8 @@ def mock_framework(n_rep): psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - return DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + return DoubleMLFramework(dml_core=dml_core) @pytest.fixture diff --git a/doubleml/did/tests/test_did_aggregation_return_types.py b/doubleml/did/tests/test_did_aggregation_return_types.py index e63eda70e..f9c407fc9 100644 --- a/doubleml/did/tests/test_did_aggregation_return_types.py +++ b/doubleml/did/tests/test_did_aggregation_return_types.py @@ -6,7 +6,7 @@ from matplotlib.figure import Figure from doubleml.did.did_aggregation import DoubleMLDIDAggregation -from doubleml.double_ml_framework import DoubleMLFramework +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from doubleml.tests._utils import generate_dml_dict @@ -24,7 +24,8 @@ def mock_framework(n_rep): psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - return DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + return DoubleMLFramework(dml_core=dml_core) @pytest.fixture diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 6293731a3..2ed7c812b 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -9,7 +9,7 @@ from doubleml.data import DoubleMLDIDData, DoubleMLPanelData, DoubleMLRDDData, DoubleMLSSMData from doubleml.data.base_data import DoubleMLBaseData -from doubleml.double_ml_framework import DoubleMLFramework +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from doubleml.double_ml_sampling_mixins import SampleSplittingMixin from doubleml.utils._checks import _check_external_predictions from doubleml.utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est @@ -632,7 +632,6 @@ def construct_framework(self): "var_scaling_factors": self._var_scaling_factors, "scaled_psi": scaled_psi_reshape, "is_cluster_data": self._is_cluster_data, - "treatment_names": self._dml_data.d_cols, } if self._sensitivity_implemented: @@ -669,8 +668,8 @@ def construct_framework(self): }, } ) - - doubleml_framework = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + doubleml_framework = DoubleMLFramework(dml_core=dml_core, treatment_names=self._dml_data.d_cols) return doubleml_framework def bootstrap(self, method="normal", n_rep_boot=500): diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py index 5802ce143..6902a8d6a 100644 --- a/doubleml/double_ml_framework.py +++ b/doubleml/double_ml_framework.py @@ -33,7 +33,6 @@ class DoubleMLCore: is_cluster_data: bool = False cluster_dict: Optional[Dict] = None sensitivity_elements: Optional[Dict[str, np.ndarray]] = None - treatment_names: Optional[List[str]] = None """ Core container for DoubleML results . @@ -61,8 +60,6 @@ class DoubleMLCore: Dictionary with clustering information, required if is_cluster_data is True. sensitivity_elements : dict, optional Dictionary with sensitivity analysis components (e.g., max_bias, psi_max_bias, sigma2, nu2). - treatment_names : list of str, optional - Names of the treatments (must match n_thetas if provided). Raises ------ @@ -74,12 +71,11 @@ def __post_init__(self): if not isinstance(self.scaled_psi, np.ndarray) or self.scaled_psi.ndim != 3: raise ValueError("scaled_psi must be a 3-dimensional numpy.ndarray.") - self.n_obs, self.n_thetas, self.n_rep = self.scaled_psi.shape + self._n_obs, self._n_thetas, self._n_rep = self.scaled_psi.shape self._check_arrays() self._check_cluster_dict() self._check_sensitivity_elements() - self._check_treatment_names() def _check_arrays(self): """Type and shape checks for input arrays.""" @@ -96,12 +92,12 @@ def _check_arrays(self): raise TypeError(f"{name} must be a numpy.ndarray, got {type(arr)}.") expected_shapes = { - "thetas": (self.n_thetas,), - "ses": (self.n_thetas,), - "all_thetas": (self.n_thetas, self.n_rep), - "all_ses": (self.n_thetas, self.n_rep), - "var_scaling_factors": (self.n_thetas,), - "scaled_psi": (self.n_obs, self.n_thetas, self.n_rep), + "thetas": (self._n_thetas,), + "ses": (self._n_thetas,), + "all_thetas": (self._n_thetas, self._n_rep), + "all_ses": (self._n_thetas, self._n_rep), + "var_scaling_factors": (self._n_thetas,), + "scaled_psi": (self._n_obs, self._n_thetas, self._n_rep), } for name, expected_shape in expected_shapes.items(): actual_shape = arrays[name].shape @@ -121,13 +117,13 @@ def _check_cluster_dict(self): f"cluster_dict must contain keys: {', '.join(expected_keys)}. " f"Got: {', '.join(self.cluster_dict.keys())}." ) - # Type checks for values + # Type checks if not isinstance(self.cluster_dict["smpls"], list): raise TypeError("cluster_dict['smpls'] must be a list.") if not isinstance(self.cluster_dict["smpls_cluster"], list): raise TypeError("cluster_dict['smpls_cluster'] must be a list.") - if not isinstance(self.cluster_dict["cluster_vars"], list): - raise TypeError("cluster_dict['cluster_vars'] must be a list.") + if not isinstance(self.cluster_dict["cluster_vars"], np.ndarray): + raise TypeError("cluster_dict['cluster_vars'] must be a numpy.ndarray.") if not isinstance(self.cluster_dict["n_folds_per_cluster"], int): raise TypeError("cluster_dict['n_folds_per_cluster'] must be an int.") @@ -144,8 +140,8 @@ def _check_sensitivity_elements(self): raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.") expected_shapes = { - "max_bias": (1, self.n_thetas, self.n_rep), - "psi_max_bias": (self.n_obs, self.n_thetas, self.n_rep), + "max_bias": (1, self._n_thetas, self._n_rep), + "psi_max_bias": (self._n_obs, self._n_thetas, self._n_rep), } for key in required_keys: actual_shape = self.sensitivity_elements[key].shape @@ -160,143 +156,123 @@ def _check_sensitivity_elements(self): if not isinstance(self.sensitivity_elements[key], np.ndarray): raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.") if np.any(self.sensitivity_elements[key] < 0): - raise ValueError(f"sensitivity_elements['{key}'] must be positive.") - if self.sensitivity_elements[key].shape != (1, self.n_thetas, self.n_rep): - expected_shape = (1, self.n_thetas, self.n_rep) + raise ValueError( + f"sensitivity_elements['{key}'] must be positive. " + f"Got {str(self.sensitivity_elements[key])} " + "Most likely this is due to low quality learners (especially propensity scores)." + ) + if self.sensitivity_elements[key].shape != (1, self._n_thetas, self._n_rep): + expected_shape = (1, self._n_thetas, self._n_rep) actual_shape = self.sensitivity_elements[key].shape raise ValueError( f"sensitivity_elements['{key}'] shape {actual_shape} does not match expected {expected_shape}." ) - def _check_treatment_names(self): - """Checks for treatment_names if provided.""" - if self.treatment_names is not None: - if not isinstance(self.treatment_names, list) or not all(isinstance(n, str) for n in self.treatment_names): - raise TypeError("treatment_names must be a list of strings.") - if len(self.treatment_names) != self.n_thetas: - raise ValueError( - f"Length of treatment_names ({len(self.treatment_names)}) does not match n_thetas ({self.n_thetas})." - ) - class DoubleMLFramework: - """Double Machine Learning Framework to combine DoubleML classes and compute confidence intervals. + """ + Double Machine Learning Framework to combine DoubleMLCore results and compute confidence intervals. Parameters ---------- - doubleml_dict : dict - A dictionary providing the estimated parameters and normalized scores. Keys have to be 'thetas', 'ses', - 'all_thetas', 'all_ses', 'var_scaling_factors' and 'scaled_psi'. - Values have to be numpy arrays with the corresponding shapes. - + dml_core : DoubleMLCore + A DoubleMLCore object providing the estimated parameters and scores. """ def __init__( self, - doubleml_dict=None, + dml_core: DoubleMLCore, + treatment_names: Optional[List[str]] = None, ): - self._is_cluster_data = False - - # check input - if not isinstance(doubleml_dict, dict): - raise TypeError("doubleml_dict must be a dictionary.") - expected_keys = ["thetas", "ses", "all_thetas", "all_ses", "var_scaling_factors", "scaled_psi"] - if not all(key in doubleml_dict.keys() for key in expected_keys): - raise ValueError("The dict must contain the following keys: " + ", ".join(expected_keys)) - - # set scores and parameters - self._n_thetas = doubleml_dict["scaled_psi"].shape[1] - self._n_rep = doubleml_dict["scaled_psi"].shape[2] - self._n_obs = doubleml_dict["scaled_psi"].shape[0] - - self._thetas = doubleml_dict["thetas"] - self._ses = doubleml_dict["ses"] - self._all_thetas = doubleml_dict["all_thetas"] - self._all_ses = doubleml_dict["all_ses"] - self._var_scaling_factors = doubleml_dict["var_scaling_factors"] - self._scaled_psi = doubleml_dict["scaled_psi"] - - # initialize cluster data - self._check_and_set_cluster_data(doubleml_dict) - - # initialize sensitivity analysis - self._check_and_set_sensitivity_elements(doubleml_dict) - - # check if all sizes match - self._check_framework_shapes() - - self._treatment_names = None - if "treatment_names" in doubleml_dict.keys(): - self._check_treatment_names(doubleml_dict["treatment_names"]) - self._treatment_names = doubleml_dict["treatment_names"] + if not isinstance(dml_core, DoubleMLCore): + raise TypeError("dml_core must be a DoubleMLCore instance.") + self._dml_core = dml_core + + if treatment_names is not None: + self._check_treatment_names(treatment_names) + self._treatment_names = treatment_names + + # initialize sensitivity analysis attributes + self._sensitivity_implemented = self._dml_core.sensitivity_elements is not None + self._benchmark_available = self._sensitivity_implemented and all( + k in self._dml_core.sensitivity_elements for k in ["sigma2", "nu2"] + ) + self._sensitivity_params = None # initialize bootstrap distribution self._boot_t_stat = None self._boot_method = None self._n_rep_boot = None + @property + def dml_core(self): + """ + The underlying DoubleMLCore object. + """ + return self._dml_core + @property def n_thetas(self): """ Number of target parameters. """ - return self._n_thetas + return self._dml_core._n_thetas @property def n_rep(self): """ Number of repetitions. """ - return self._n_rep + return self._dml_core._n_rep @property def n_obs(self): """ Number of observations. """ - return self._n_obs + return self._dml_core._n_obs @property def thetas(self): """ Estimated target parameters (shape (``n_thetas``,)). """ - return self._thetas + return self._dml_core.thetas @property def all_thetas(self): """ Estimated target parameters for each repetition (shape (``n_thetas``, ``n_rep``)). """ - return self._all_thetas + return self._dml_core.all_thetas @property def ses(self): """ Estimated standard errors (shape (``n_thetas``,)). """ - return self._ses + return self._dml_core.ses @property def all_ses(self): """ Estimated standard errors for each repetition (shape (``n_thetas``, ``n_rep``)). """ - return self._all_ses + return self._dml_core.all_ses @property def t_stats(self): """ t-statistics for the causal parameter(s) (shape (``n_thetas``,)). """ - return self._thetas / self._ses + return self.thetas / self.ses @property def all_t_stats(self): """ t-statistics for the causal parameter(s) for each repetition (shape (``n_thetas``, ``n_rep``)). """ - return self._all_thetas / self._all_ses + return self.all_thetas / self.all_ses @property def pvals(self): @@ -320,14 +296,28 @@ def scaled_psi(self): """ Normalized scores (shape (``n_obs``, ``n_thetas``, ``n_rep``)). """ - return self._scaled_psi + return self._dml_core.scaled_psi @property def var_scaling_factors(self): """ Variance scaling factors (shape (``n_thetas``,)). """ - return self._var_scaling_factors + return self._dml_core.var_scaling_factors + + @property + def is_cluster_data(self): + """ + Whether the data is clustered. + """ + return self._dml_core.is_cluster_data + + @property + def cluster_dict(self): + """ + Clustering information (if available). + """ + return self._dml_core.cluster_dict @property def n_rep_boot(self): @@ -359,7 +349,7 @@ def sensitivity_elements(self): ``psi_max_bias`` (shape (``n_obs``, ``n_thetas``, ``n_rep``)). Optionally, additional entries ``sigma2`` and ``nu2``(shape (``1``, ``n_thetas``, ``n_rep``)) are available. """ - return self._sensitivity_elements + return self._dml_core.sensitivity_elements @property def sensitivity_params(self): @@ -388,7 +378,7 @@ def summary(self): A summary for the estimated causal parameters ``thetas``. """ ci = self.confint() - df_summary = generate_summary(self.thetas, self.ses, self.t_stats, self.pvals, ci, self._treatment_names) + df_summary = generate_summary(self.thetas, self.ses, self.t_stats, self.pvals, ci, self.treatment_names) return df_summary @property @@ -455,16 +445,14 @@ def sensitivity_summary(self): def __add__(self, other): if isinstance(other, DoubleMLFramework): # internal consistency check - self._check_framework_shapes() - other._check_framework_shapes() _check_framework_compatibility(self, other, check_treatments=True) - all_thetas = self._all_thetas + other._all_thetas - scaled_psi = self._scaled_psi + other._scaled_psi + all_thetas = self.all_thetas + other.all_thetas + scaled_psi = self.scaled_psi + other.scaled_psi # check if var_scaling_factors are the same - assert np.allclose(self._var_scaling_factors, other._var_scaling_factors) - var_scaling_factors = self._var_scaling_factors + assert np.allclose(self.var_scaling_factors, other.var_scaling_factors) + var_scaling_factors = self.var_scaling_factors # compute standard errors (Uses factor 1/n for scaling!) sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1)) @@ -478,20 +466,21 @@ def __add__(self, other): "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, "scaled_psi": scaled_psi, - "is_cluster_data": self._is_cluster_data, - "cluster_dict": self._cluster_dict, + "is_cluster_data": self.is_cluster_data, + "cluster_dict": self.cluster_dict, } if self._sensitivity_implemented and other._sensitivity_implemented: - max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"] - psi_max_bias = self._sensitivity_elements["psi_max_bias"] + other._sensitivity_elements["psi_max_bias"] + max_bias = self.sensitivity_elements["max_bias"] + other.sensitivity_elements["max_bias"] + psi_max_bias = self.sensitivity_elements["psi_max_bias"] + other.sensitivity_elements["psi_max_bias"] sensitivity_elements = { "max_bias": max_bias, "psi_max_bias": psi_max_bias, } doubleml_dict["sensitivity_elements"] = sensitivity_elements - new_obj = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + new_obj = DoubleMLFramework(dml_core) else: raise TypeError(f"Unsupported operand type: {type(other)}") @@ -503,16 +492,14 @@ def __radd__(self, other): def __sub__(self, other): if isinstance(other, DoubleMLFramework): # internal consistency check - self._check_framework_shapes() - other._check_framework_shapes() _check_framework_compatibility(self, other, check_treatments=True) - all_thetas = self._all_thetas - other._all_thetas - scaled_psi = self._scaled_psi - other._scaled_psi + all_thetas = self.all_thetas - other.all_thetas + scaled_psi = self.scaled_psi - other.scaled_psi # check if var_scaling_factors are the same - assert np.allclose(self._var_scaling_factors, other._var_scaling_factors) - var_scaling_factors = self._var_scaling_factors + assert np.allclose(self.var_scaling_factors, other.var_scaling_factors) + var_scaling_factors = self.var_scaling_factors # compute standard errors sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1)) @@ -526,22 +513,23 @@ def __sub__(self, other): "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, "scaled_psi": scaled_psi, - "is_cluster_data": self._is_cluster_data, - "cluster_dict": self._cluster_dict, + "is_cluster_data": self.is_cluster_data, + "cluster_dict": self.cluster_dict, } # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM) if self._sensitivity_implemented and other._sensitivity_implemented: - max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"] - psi_max_bias = self._sensitivity_elements["psi_max_bias"] + other._sensitivity_elements["psi_max_bias"] + max_bias = self.sensitivity_elements["max_bias"] + other.sensitivity_elements["max_bias"] + psi_max_bias = self.sensitivity_elements["psi_max_bias"] + other.sensitivity_elements["psi_max_bias"] sensitivity_elements = { "max_bias": max_bias, "psi_max_bias": psi_max_bias, } doubleml_dict["sensitivity_elements"] = sensitivity_elements - new_obj = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + new_obj = DoubleMLFramework(dml_core=dml_core) else: raise TypeError(f"Unsupported operand type: {type(other)}") @@ -553,13 +541,13 @@ def __rsub__(self, other): # TODO: Restrict to linear? def __mul__(self, other): if isinstance(other, (int, float)): - thetas = np.multiply(other, self._thetas) - all_thetas = np.multiply(other, self._all_thetas) + thetas = np.multiply(other, self.thetas) + all_thetas = np.multiply(other, self.all_thetas) - var_scaling_factors = self._var_scaling_factors - ses = np.multiply(other, self._ses) - all_ses = np.multiply(other, self._all_ses) - scaled_psi = np.multiply(other, self._scaled_psi) + var_scaling_factors = self.var_scaling_factors + ses = np.multiply(other, self.ses) + all_ses = np.multiply(other, self.all_ses) + scaled_psi = np.multiply(other, self.scaled_psi) doubleml_dict = { "thetas": thetas, @@ -568,15 +556,15 @@ def __mul__(self, other): "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, "scaled_psi": scaled_psi, - "is_cluster_data": self._is_cluster_data, - "cluster_dict": self._cluster_dict, + "is_cluster_data": self.is_cluster_data, + "cluster_dict": self.cluster_dict, } # sensitivity combination only available for linear models if self._sensitivity_implemented: - max_bias = abs(other) * self._sensitivity_elements["max_bias"] - psi_max_bias = abs(other) * self._sensitivity_elements["psi_max_bias"] + max_bias = abs(other) * self.sensitivity_elements["max_bias"] + psi_max_bias = abs(other) * self.sensitivity_elements["psi_max_bias"] sensitivity_elements = { "max_bias": max_bias, "psi_max_bias": psi_max_bias, @@ -584,13 +572,14 @@ def __mul__(self, other): if self._benchmark_available: sensitivity_elements.update( { - "sigma2": self._sensitivity_elements["sigma2"], - "nu2": np.multiply(np.square(other), self._sensitivity_elements["nu2"]), + "sigma2": self.sensitivity_elements["sigma2"], + "nu2": np.multiply(np.square(other), self.sensitivity_elements["nu2"]), } ) doubleml_dict["sensitivity_elements"] = sensitivity_elements - new_obj = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + new_obj = DoubleMLFramework(dml_core=dml_core) else: raise TypeError(f"Unsupported operand type: {type(other)}") @@ -612,7 +601,7 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level): _check_in_zero_one(level, "The confidence level", include_zero=False, include_one=False) # set elements for readability - psi_scaled = self._scaled_psi + psi_scaled = self.scaled_psi max_bias = self.sensitivity_elements["max_bias"] psi_max_bias = self.sensitivity_elements["psi_max_bias"] @@ -632,22 +621,22 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level): for i_rep in range(self.n_rep): for i_theta in range(self.n_thetas): - if not self._is_cluster_data: + if not self.is_cluster_data: smpls = None cluster_vars = None smpls_cluster = None n_folds_per_cluster = None else: - smpls = self._cluster_dict["smpls"][i_rep] - cluster_vars = self._cluster_dict["cluster_vars"] - smpls_cluster = self._cluster_dict["smpls_cluster"][i_rep] - n_folds_per_cluster = self._cluster_dict["n_folds_per_cluster"] + smpls = self.cluster_dict["smpls"][i_rep] + cluster_vars = self.cluster_dict["cluster_vars"] + smpls_cluster = self.cluster_dict["smpls_cluster"][i_rep] + n_folds_per_cluster = self.cluster_dict["n_folds_per_cluster"] sigma2_lower_hat, _ = _var_est( psi=psi_lower[:, i_theta, i_rep], psi_deriv=np.ones_like(psi_lower[:, i_theta, i_rep]), smpls=smpls, - is_cluster_data=self._is_cluster_data, + is_cluster_data=self.is_cluster_data, cluster_vars=cluster_vars, smpls_cluster=smpls_cluster, n_folds_per_cluster=n_folds_per_cluster, @@ -656,7 +645,7 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level): psi=psi_upper[:, i_theta, i_rep], psi_deriv=np.ones_like(psi_upper[:, i_theta, i_rep]), smpls=smpls, - is_cluster_data=self._is_cluster_data, + is_cluster_data=self.is_cluster_data, cluster_vars=cluster_vars, smpls_cluster=smpls_cluster, n_folds_per_cluster=n_folds_per_cluster, @@ -689,7 +678,7 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level): def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment): _check_float(null_hypothesis, "null_hypothesis") - _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas - 1) + _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self.n_thetas - 1) # check which side is relvant bound = "upper" if (null_hypothesis > self.thetas[idx_treatment]) else "lower" @@ -745,14 +734,14 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h """ # check null_hypothesis if isinstance(null_hypothesis, float): - null_hypothesis_vec = np.full(shape=self._n_thetas, fill_value=null_hypothesis) + null_hypothesis_vec = np.full(shape=self.n_thetas, fill_value=null_hypothesis) elif isinstance(null_hypothesis, np.ndarray): - if null_hypothesis.shape == (self._n_thetas,): + if null_hypothesis.shape == (self.n_thetas,): null_hypothesis_vec = null_hypothesis else: raise ValueError( "null_hypothesis is numpy.ndarray but does not have the required " - f"shape ({self._n_thetas},). " + f"shape ({self.n_thetas},). " f"Array of shape {str(null_hypothesis.shape)} was passed." ) else: @@ -765,10 +754,10 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level) # compute robustess values with respect to null_hypothesis - rv = np.full(shape=self._n_thetas, fill_value=np.nan) - rva = np.full(shape=self._n_thetas, fill_value=np.nan) + rv = np.full(shape=self.n_thetas, fill_value=np.nan) + rva = np.full(shape=self.n_thetas, fill_value=np.nan) - for i_theta in range(self._n_thetas): + for i_theta in range(self.n_thetas): rv[i_theta], rva[i_theta] = self._calc_robustness_value( null_hypothesis=null_hypothesis_vec[i_theta], level=level, rho=rho, idx_treatment=i_theta ) @@ -821,7 +810,7 @@ def confint(self, joint=False, level=0.95): max_abs_t_value_distribution = np.amax(np.abs(self._boot_t_stat), axis=1) critical_values = np.quantile(a=max_abs_t_value_distribution, q=level, axis=0) else: - critical_values = np.repeat(norm.ppf(percentages[1]), self._n_rep) + critical_values = np.repeat(norm.ppf(percentages[1]), self.n_rep) # compute all cis over repetitions (shape: n_thetas x 2 x n_rep) self._all_cis = np.stack( @@ -854,17 +843,17 @@ def bootstrap(self, method="normal", n_rep_boot=500): """ _check_bootstrap(method, n_rep_boot) - if self._is_cluster_data: + if self.is_cluster_data: raise NotImplementedError("bootstrap not yet implemented with clustering.") self._n_rep_boot = n_rep_boot self._boot_method = method # initialize bootstrap distribution array - self._boot_t_stat = np.full((n_rep_boot, self.n_thetas, self._n_rep), np.nan) - var_scaling = self._var_scaling_factors.reshape(-1, 1) * self._all_ses + self._boot_t_stat = np.full((n_rep_boot, self.n_thetas, self.n_rep), np.nan) + var_scaling = self.var_scaling_factors.reshape(-1, 1) * self.all_ses for i_rep in range(self.n_rep): - weights = _draw_weights(method, n_rep_boot, self._n_obs) - bootstraped_scaled_psi = np.matmul(weights, np.divide(self._scaled_psi[:, :, i_rep], var_scaling[:, i_rep])) + weights = _draw_weights(method, n_rep_boot, self.n_obs) + bootstraped_scaled_psi = np.matmul(weights, np.divide(self.scaled_psi[:, :, i_rep], var_scaling[:, i_rep])) self._boot_t_stat[:, :, i_rep] = bootstraped_scaled_psi return self @@ -1076,137 +1065,6 @@ def sensitivity_plot( ) return fig - def _check_and_set_cluster_data(self, doubleml_dict): - self._cluster_dict = None - - if "is_cluster_data" in doubleml_dict.keys(): - _check_bool(doubleml_dict["is_cluster_data"], "is_cluster_data") - self._is_cluster_data = doubleml_dict["is_cluster_data"] - - if self._is_cluster_data: - if "cluster_dict" not in doubleml_dict.keys(): - raise ValueError("If is_cluster_data is True, cluster_dict must be provided.") - - if not isinstance(doubleml_dict["cluster_dict"], dict): - raise TypeError("cluster_dict must be a dictionary.") - - expected_keys_cluster = ["smpls", "smpls_cluster", "cluster_vars", "n_folds_per_cluster"] - if not all(key in doubleml_dict["cluster_dict"].keys() for key in expected_keys_cluster): - raise ValueError( - "The cluster_dict must contain the following keys: " - + ", ".join(expected_keys_cluster) - + ". Got: " - + ", ".join(doubleml_dict["cluster_dict"].keys()) - + "." - ) - - self._cluster_dict = doubleml_dict["cluster_dict"] - - return - - def _check_and_set_sensitivity_elements(self, doubleml_dict): - if "sensitivity_elements" not in doubleml_dict.keys(): - sensitivity_implemented = False - sensitivity_elements = None - benchmark_available = False - - else: - if not isinstance(doubleml_dict["sensitivity_elements"], dict): - raise TypeError("sensitivity_elements must be a dictionary.") - - expected_keys_sensitivity = ["max_bias", "psi_max_bias"] - if not all(key in doubleml_dict["sensitivity_elements"].keys() for key in expected_keys_sensitivity): - raise ValueError( - "The sensitivity_elements dict must contain the following keys: " + ", ".join(expected_keys_sensitivity) - ) - - for key in expected_keys_sensitivity: - if not isinstance(doubleml_dict["sensitivity_elements"][key], np.ndarray): - raise TypeError(f"The sensitivity element {key} must be a numpy array.") - - # set sensitivity elements - sensitivity_implemented = True - sensitivity_elements = {key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_sensitivity} - - # check if benchmarks are available and update sensitivity elements - benchmark_available, sensitivity_elements_benchmark = self._check_sensitivity_benchmark(doubleml_dict) - sensitivity_elements.update(sensitivity_elements_benchmark) - - # set attributes - self._sensitivity_implemented = sensitivity_implemented - self._sensitivity_elements = sensitivity_elements - self._benchmark_available = benchmark_available - self._sensitivity_params = None - - return - - def _check_sensitivity_benchmark(self, doubleml_dict): - # check if benchmarks are available - expected_keys_benchmark = ["sigma2", "nu2"] - benchmark_available = all(key in doubleml_dict["sensitivity_elements"] for key in expected_keys_benchmark) - if benchmark_available: - # type checks - for key in expected_keys_benchmark: - if not isinstance(doubleml_dict["sensitivity_elements"][key], np.ndarray): - raise TypeError(f"The sensitivity element {key} must be a numpy array.") - - # additional constraints - if (np.any(doubleml_dict["sensitivity_elements"]["sigma2"] < 0)) | ( - np.any(doubleml_dict["sensitivity_elements"]["nu2"] < 0) - ): - raise ValueError( - "sensitivity_elements sigma2 and nu2 have to be positive. " - f"Got sigma2 {str(doubleml_dict['sensitivity_elements']['sigma2'])} " - f"and nu2 {str(doubleml_dict['sensitivity_elements']['nu2'])}. " - "Most likely this is due to low quality learners (especially propensity scores)." - ) - - sensitivity_elements_benchmark = { - key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_benchmark - } - else: - sensitivity_elements_benchmark = {} - - return benchmark_available, sensitivity_elements_benchmark - - def _check_framework_shapes(self): - expected_shapes = { - "thetas": (self._n_thetas,), - "ses": (self._n_thetas,), - "all_thetas": (self._n_thetas, self._n_rep), - "all_ses": (self._n_thetas, self._n_rep), - "var_scaling_factors": (self._n_thetas,), - "scaled_psi": (self._n_obs, self._n_thetas, self.n_rep), - } - - for attr, expected_shape in expected_shapes.items(): - actual_shape = getattr(self, f"_{attr}").shape - if actual_shape != expected_shape: - raise ValueError(f"The shape of {attr} does not match the expected shape {expected_shape}.") - - if self._sensitivity_implemented: - self._check_sensitivity_elements_shapes() - - return None - - def _check_sensitivity_elements_shapes(self): - expected_sensitivity_shapes = { - "max_bias": (1, self._n_thetas, self.n_rep), - "psi_max_bias": (self._n_obs, self._n_thetas, self.n_rep), - } - - if self._benchmark_available: - expected_sensitivity_shapes.update( - {"sigma2": (1, self._n_thetas, self.n_rep), "nu2": (1, self._n_thetas, self.n_rep)} - ) - - for key, expected_shape in expected_sensitivity_shapes.items(): - actual_shape = self._sensitivity_elements[key].shape - if actual_shape != expected_shape: - raise ValueError(f"The shape of {key} does not match the expected shape {expected_shape}.") - - return None - def _check_treatment_names(self, treatment_names): if not isinstance(treatment_names, list): raise TypeError( @@ -1217,10 +1075,10 @@ def _check_treatment_names(self, treatment_names): raise TypeError( f"treatment_names must be a list of strings. At least one element is not a string: {str(treatment_names)}." ) - if len(treatment_names) != self._n_thetas: + if len(treatment_names) != self.n_thetas: raise ValueError( "The length of treatment_names does not match the number of treatments. " - f"Got {self._n_thetas} treatments and {len(treatment_names)} treatment names." + f"Got {self.n_thetas} treatments and {len(treatment_names)} treatment names." ) return None @@ -1235,20 +1093,18 @@ def concat(objs): if not all(isinstance(obj, DoubleMLFramework) for obj in objs): raise TypeError("All objects must be of type DoubleMLFramework.") - # check on internal consitency of objects - _ = [obj._check_framework_shapes() for obj in objs] # check if all objects are compatible in n_obs and n_rep _ = [_check_framework_compatibility(objs[0], obj, check_treatments=False) for obj in objs[1:]] all_thetas = np.concatenate([obj.all_thetas for obj in objs], axis=0) all_ses = np.concatenate([obj.all_ses for obj in objs], axis=0) - var_scaling_factors = np.concatenate([obj._var_scaling_factors for obj in objs], axis=0) - scaled_psi = np.concatenate([obj._scaled_psi for obj in objs], axis=1) + var_scaling_factors = np.concatenate([obj.var_scaling_factors for obj in objs], axis=0) + scaled_psi = np.concatenate([obj.scaled_psi for obj in objs], axis=1) thetas = np.concatenate([obj.thetas for obj in objs], axis=0) ses = np.concatenate([obj.ses for obj in objs], axis=0) - if any(obj._is_cluster_data for obj in objs): + if any(obj.is_cluster_data for obj in objs): raise NotImplementedError("concat not yet implemented with clustering.") else: is_cluster_data = False @@ -1266,19 +1122,17 @@ def concat(objs): if all(obj._sensitivity_implemented for obj in objs): sensitivity_elements = {} for key in ["max_bias", "psi_max_bias"]: - assert all(key in obj._sensitivity_elements.keys() for obj in objs) - sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1) + assert all(key in obj.sensitivity_elements.keys() for obj in objs) + sensitivity_elements[key] = np.concatenate([obj.sensitivity_elements[key] for obj in objs], axis=1) if all(obj._benchmark_available for obj in objs): for key in ["sigma2", "nu2"]: - assert all(key in obj._sensitivity_elements.keys() for obj in objs) - sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1) + assert all(key in obj.sensitivity_elements.keys() for obj in objs) + sensitivity_elements[key] = np.concatenate([obj.sensitivity_elements[key] for obj in objs], axis=1) doubleml_dict["sensitivity_elements"] = sensitivity_elements - new_obj = DoubleMLFramework(doubleml_dict) - - # check internal consistency of new object - new_obj._check_framework_shapes() + dml_core = DoubleMLCore(**doubleml_dict) + new_obj = DoubleMLFramework(dml_core=dml_core) return new_obj diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 23e7085e8..d6713a384 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -10,7 +10,7 @@ from doubleml.data import DoubleMLData from doubleml.double_ml import DoubleML -from doubleml.double_ml_framework import concat +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat from doubleml.double_ml_sampling_mixins import SampleSplittingMixin from doubleml.irm.apo import DoubleMLAPO from doubleml.utils._checks import _check_score, _check_weights @@ -709,12 +709,23 @@ def causal_contrast(self, reference_levels): if i in skip_index: continue - current_framework = model.framework - ref_model.framework + diff_framework = model.framework - ref_model.framework current_treatment_name = f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}" # update sensitivity elements with sharper bounds current_sensitivity_dict = self._compute_causal_contrast_sensitivity_dict(model=model, ref_model=ref_model) - current_framework._check_and_set_sensitivity_elements(current_sensitivity_dict) + updated_dml_core = DoubleMLCore( + thetas=diff_framework.thetas, + ses=diff_framework.ses, + all_thetas=diff_framework.all_thetas, + all_ses=diff_framework.all_ses, + var_scaling_factors=diff_framework.var_scaling_factors, + scaled_psi=diff_framework.scaled_psi, + is_cluster_data=diff_framework.is_cluster_data, + cluster_dict=diff_framework.cluster_dict, + sensitivity_elements=current_sensitivity_dict["sensitivity_elements"], + ) + current_framework = DoubleMLFramework(updated_dml_core, treatment_names=[current_treatment_name]) all_acc_frameworks += [current_framework] all_treatment_names += [current_treatment_name] diff --git a/doubleml/tests/test_core_exceptions.py b/doubleml/tests/test_core_exceptions.py index ddd615231..835108dc8 100644 --- a/doubleml/tests/test_core_exceptions.py +++ b/doubleml/tests/test_core_exceptions.py @@ -84,14 +84,14 @@ def test_cluster_dict_exceptions(): type_cases = [ ("smpls", "not_a_list", "cluster_dict\\['smpls'\\] must be a list."), ("smpls_cluster", "not_a_list", "cluster_dict\\['smpls_cluster'\\] must be a list."), - ("cluster_vars", "not_a_list", "cluster_dict\\['cluster_vars'\\] must be a list."), + ("cluster_vars", "not_a_list", "cluster_dict\\['cluster_vars'\\] must be a numpy.ndarray."), ("n_folds_per_cluster", "not_an_int", "cluster_dict\\['n_folds_per_cluster'\\] must be an int."), ] for key, bad_value, msg in type_cases: cluster_dict = { "smpls": [], "smpls_cluster": [], - "cluster_vars": [], + "cluster_vars": np.array([]), "n_folds_per_cluster": 1, } cluster_dict[key] = bad_value @@ -168,7 +168,7 @@ def test_sensitivity_elements_exceptions(): key: -np.ones((1, n_thetas, n_rep)), } bad_kwargs["sensitivity_elements"] = sens - with pytest.raises(ValueError, match=rf"sensitivity_elements\['{key}'\] must be positive."): + with pytest.raises(ValueError, match=rf"sensitivity_elements\['{key}'\] must be positive.*"): DoubleMLCore(**bad_kwargs) # sigma2 and nu2 wrong shape @@ -184,23 +184,3 @@ def test_sensitivity_elements_exceptions(): ValueError, match=rf"sensitivity_elements\['{key}'\] shape \(2, 2, 5\) does not match expected \(1, 2, 5\)\." ): DoubleMLCore(**bad_kwargs) - - -@pytest.mark.ci -def test_treatment_names_exceptions(): - kwargs = valid_core_kwargs() - - bad_kwargs = kwargs.copy() - bad_kwargs["treatment_names"] = "not_a_list" - with pytest.raises(TypeError, match="treatment_names must be a list of strings."): - DoubleMLCore(**bad_kwargs) - - bad_kwargs = kwargs.copy() - bad_kwargs["treatment_names"] = [1, 2] - with pytest.raises(TypeError, match="treatment_names must be a list of strings."): - DoubleMLCore(**bad_kwargs) - - bad_kwargs = kwargs.copy() - bad_kwargs["treatment_names"] = ["treat1"] - with pytest.raises(ValueError, match=r"Length of treatment_names \(1\) does not match n_thetas \(2\)\."): - DoubleMLCore(**bad_kwargs) diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py index 13222664f..babd05ef0 100644 --- a/doubleml/tests/test_framework.py +++ b/doubleml/tests/test_framework.py @@ -3,7 +3,7 @@ import pytest from sklearn.linear_model import LinearRegression, LogisticRegression -from doubleml.double_ml_framework import DoubleMLFramework, concat +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat from doubleml.irm.datasets import make_irm_data from doubleml.irm.irm import DoubleMLIRM @@ -28,7 +28,8 @@ def dml_framework_fixture(n_rep, n_thetas): psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - dml_framework_obj = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + dml_framework_obj = DoubleMLFramework(dml_core=dml_core) ci = dml_framework_obj.confint(joint=False, level=0.95) dml_framework_obj.bootstrap(method="normal") @@ -44,7 +45,8 @@ def dml_framework_fixture(n_rep, n_thetas): psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep)) + 1.0 doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) dml_framework_obj_sub_obj = dml_framework_obj - dml_framework_obj_2 ci_sub_obj = dml_framework_obj_sub_obj.confint(joint=False, level=0.95) dml_framework_obj_sub_obj.bootstrap(method="normal") diff --git a/doubleml/tests/test_framework_coverage.py b/doubleml/tests/test_framework_coverage.py index 03625cef2..253f736fb 100644 --- a/doubleml/tests/test_framework_coverage.py +++ b/doubleml/tests/test_framework_coverage.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from doubleml.double_ml_framework import DoubleMLFramework, concat +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat from ._utils import generate_dml_dict @@ -56,8 +56,10 @@ def test_dml_framework_coverage_fixture(n_rep, n_thetas): doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) # combine objects and estimate parameters - dml_framework_obj_1 = DoubleMLFramework(doubleml_dict) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_1 = DoubleMLCore(**doubleml_dict) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_1 = DoubleMLFramework(dml_core=dml_core_1) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) true_thetas = np.vstack((np.repeat(0.0, n_thetas), np.repeat(-1.0, n_thetas))).transpose() ci = dml_framework_obj_1.confint(joint=False, level=0.95) diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py index f562f98d4..8d07b29b3 100644 --- a/doubleml/tests/test_framework_exceptions.py +++ b/doubleml/tests/test_framework_exceptions.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from doubleml.double_ml_framework import DoubleMLFramework, concat +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat from ._utils import generate_dml_dict @@ -12,6 +12,7 @@ n_rep = 5 # generate score samples +np.random.seed(42) psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) @@ -23,148 +24,35 @@ "nu2": np.ones(shape=(1, n_thetas, n_rep)), } -# combine objects and estimate parameters -dml_framework_obj_1 = DoubleMLFramework(doubleml_dict) +dml_core = DoubleMLCore(**doubleml_dict) +dml_framework_obj_1 = DoubleMLFramework(dml_core) @pytest.mark.ci def test_input_exceptions(): - msg = r"The dict must contain the following keys: thetas, ses, all_thetas, all_ses, var_scaling_factors, scaled_psi" - with pytest.raises(ValueError, match=msg): - test_dict = {} - DoubleMLFramework(test_dict) - - msg = r"The shape of thetas does not match the expected shape \(2,\)\." - with pytest.raises(ValueError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["thetas"] = np.ones(shape=(1,)) - DoubleMLFramework(test_dict) - - msg = r"The shape of ses does not match the expected shape \(2,\)\." - with pytest.raises(ValueError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["ses"] = np.ones(shape=(1,)) - DoubleMLFramework(test_dict) - - msg = r"The shape of all_thetas does not match the expected shape \(2, 5\)\." - with pytest.raises(ValueError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["all_thetas"] = np.ones(shape=(1, 5)) - DoubleMLFramework(test_dict) - - msg = r"The shape of all_ses does not match the expected shape \(2, 5\)\." - with pytest.raises(ValueError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["all_ses"] = np.ones(shape=(1, 5)) - DoubleMLFramework(test_dict) - - msg = r"The shape of var_scaling_factors does not match the expected shape \(2,\)\." - with pytest.raises(ValueError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["var_scaling_factors"] = np.ones(shape=(1, 5)) - DoubleMLFramework(test_dict) - - msg = r"The shape of scaled_psi does not match the expected shape \(10, 2, 5\)\." - with pytest.raises(ValueError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["scaled_psi"] = np.ones(shape=(10, 2, 5, 3)) - DoubleMLFramework(test_dict) - - msg = "doubleml_dict must be a dictionary." + msg = "dml_core must be a DoubleMLCore instance." with pytest.raises(TypeError, match=msg): DoubleMLFramework(1.0) - msg = "sensitivity_elements must be a dictionary." - with pytest.raises(TypeError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["sensitivity_elements"] = 1 - DoubleMLFramework(test_dict) - - msg = "The sensitivity_elements dict must contain the following keys: max_bias, psi_max_bias" - with pytest.raises(ValueError, match=msg): - test_dict = doubleml_dict.copy() - test_dict["sensitivity_elements"] = {"sensitivities": np.ones(shape=(n_obs, n_thetas, n_rep))} - DoubleMLFramework(test_dict) - - msg = r"The shape of max_bias does not match the expected shape \(1, 2, 5\)\." - with pytest.raises(ValueError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["sensitivity_elements"]["max_bias"] = np.ones(shape=(n_obs, n_rep)) - DoubleMLFramework(test_dict) - - msg = r"The shape of psi_max_bias does not match the expected shape \(10, 2, 5\)\." - with pytest.raises(ValueError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["sensitivity_elements"]["psi_max_bias"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3)) - DoubleMLFramework(test_dict) - - msg = r"The shape of sigma2 does not match the expected shape \(1, 2, 5\)\." - with pytest.raises(ValueError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["sensitivity_elements"]["sigma2"] = np.ones(shape=(n_obs, n_thetas, n_rep)) - DoubleMLFramework(test_dict) - - msg = r"The shape of nu2 does not match the expected shape \(1, 2, 5\)\." - with pytest.raises(ValueError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["sensitivity_elements"]["nu2"] = np.ones(shape=(n_obs, n_thetas, n_rep)) - DoubleMLFramework(test_dict) - - msg = "is_cluster_data has to be boolean. 1.0 of type was passed." - with pytest.raises(TypeError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["is_cluster_data"] = 1.0 - DoubleMLFramework(test_dict) - - msg = "If is_cluster_data is True, cluster_dict must be provided." - with pytest.raises(ValueError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["is_cluster_data"] = True - DoubleMLFramework(test_dict) - - msg = "cluster_dict must be a dictionary." - with pytest.raises(TypeError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["is_cluster_data"] = True - test_dict["cluster_dict"] = 1.0 - DoubleMLFramework(test_dict) - - msg = ( - "The cluster_dict must contain the following keys: smpls, smpls_cluster," - " cluster_vars, n_folds_per_cluster. Got: cluster_ids." - ) - with pytest.raises(ValueError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["is_cluster_data"] = True - test_dict["cluster_dict"] = {"cluster_ids": np.ones(shape=(n_obs, n_rep))} - DoubleMLFramework(test_dict) - - test_dict = copy.deepcopy(doubleml_dict) - framework_names = DoubleMLFramework(test_dict) + test_framework = DoubleMLFramework(dml_core) msg = "treatment_names must be a list. Got 1 of type ." with pytest.raises(TypeError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["treatment_names"] = 1 - DoubleMLFramework(test_dict) + DoubleMLFramework(dml_core, treatment_names=1) with pytest.raises(TypeError, match=msg): - framework_names.treatment_names = 1 + test_framework.treatment_names = 1 msg = r"treatment_names must be a list of strings. At least one element is not a string: \['test', 1\]." with pytest.raises(TypeError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["treatment_names"] = ["test", 1] - DoubleMLFramework(test_dict) + DoubleMLFramework(dml_core, treatment_names=["test", 1]) with pytest.raises(TypeError, match=msg): - framework_names.treatment_names = ["test", 1] + test_framework.treatment_names = ["test", 1] msg = "The length of treatment_names does not match the number of treatments. Got 2 treatments and 3 treatment names." with pytest.raises(ValueError, match=msg): - test_dict = copy.deepcopy(doubleml_dict) - test_dict["treatment_names"] = ["test", "test2", "test3"] - DoubleMLFramework(test_dict) + DoubleMLFramework(dml_core, treatment_names=["test", "test2", "test3"]) with pytest.raises(ValueError, match=msg): - framework_names.treatment_names = ["test", "test2", "test3"] + test_framework.treatment_names = ["test", "test2", "test3"] def test_operation_exceptions(): @@ -179,21 +67,24 @@ def test_operation_exceptions(): psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep)) psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = dml_framework_obj_1 + dml_framework_obj_2 msg = "The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3." with pytest.raises(ValueError, match=msg): psi_a_2 = np.ones(shape=(n_obs, n_thetas + 1, n_rep)) psi_b_2 = np.random.normal(size=(n_obs, n_thetas + 1, n_rep)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = dml_framework_obj_1 + dml_framework_obj_2 msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6." with pytest.raises(ValueError, match=msg): psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1)) psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = dml_framework_obj_1 + dml_framework_obj_2 # subtraction @@ -207,21 +98,24 @@ def test_operation_exceptions(): psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep)) psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = dml_framework_obj_1 - dml_framework_obj_2 msg = "The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3." with pytest.raises(ValueError, match=msg): psi_a_2 = np.ones(shape=(n_obs, n_thetas + 1, n_rep)) psi_b_2 = np.random.normal(size=(n_obs, n_thetas + 1, n_rep)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = dml_framework_obj_1 - dml_framework_obj_2 msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6." with pytest.raises(ValueError, match=msg): psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1)) psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = dml_framework_obj_1 - dml_framework_obj_2 # multiplication @@ -243,27 +137,24 @@ def test_operation_exceptions(): psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep)) psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = concat([dml_framework_obj_1, dml_framework_obj_2]) msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6." with pytest.raises(ValueError, match=msg): psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1)) psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1)) doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2) - dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2) + dml_core_2 = DoubleMLCore(**doubleml_dict_2) + dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2) _ = concat([dml_framework_obj_1, dml_framework_obj_2]) msg = "concat not yet implemented with clustering." with pytest.raises(NotImplementedError, match=msg): doubleml_dict_cluster = generate_dml_dict(psi_a_2, psi_b_2) - doubleml_dict_cluster["is_cluster_data"] = True - doubleml_dict_cluster["cluster_dict"] = { - "smpls": np.ones(shape=(n_obs, n_rep)), - "smpls_cluster": np.ones(shape=(n_obs, n_rep)), - "cluster_vars": np.ones(shape=(n_obs, n_rep)), - "n_folds_per_cluster": 2, - } - dml_framework_obj_cluster = DoubleMLFramework(doubleml_dict_cluster) + dml_core_cluster = DoubleMLCore(**doubleml_dict_cluster) + dml_core_cluster.is_cluster_data = True + dml_framework_obj_cluster = DoubleMLFramework(dml_core_cluster) _ = concat([dml_framework_obj_cluster, dml_framework_obj_cluster]) # cluster compatibility @@ -285,7 +176,10 @@ def test_p_adjust_exceptions(): @pytest.mark.ci def test_sensitivity_exceptions(): - dml_framework_no_sensitivity = DoubleMLFramework(generate_dml_dict(psi_a, psi_b)) + dml_no_sensitivity_dict = copy.deepcopy(doubleml_dict) + dml_no_sensitivity_dict.pop("sensitivity_elements") + dml_core_no_sensitivity = DoubleMLCore(**dml_no_sensitivity_dict) + dml_framework_no_sensitivity = DoubleMLFramework(dml_core_no_sensitivity) msg = "Sensitivity analysis is not implemented for this model." with pytest.raises(NotImplementedError, match=msg): _ = dml_framework_no_sensitivity._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.1, rho=1.0, level=0.95) @@ -394,47 +288,10 @@ def test_sensitivity_exceptions(): with pytest.raises(ValueError, match=msg): _ = dml_framework_obj_1.sensitivity_plot(idx_treatment=2) - # test benchmark sensitivity elements - sensitivity_dict_benchmark = generate_dml_dict(psi_a, psi_b) - sensitivity_dict_benchmark["sensitivity_elements"] = { - "max_bias": np.ones(shape=(1, n_thetas, n_rep)), - "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)), - "sigma2": np.ones(shape=(1, n_thetas, n_rep)), - "nu2": 5.0, - } - msg = "The sensitivity element nu2 must be a numpy array." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLFramework(sensitivity_dict_benchmark) - - sensitivity_dict_benchmark["sensitivity_elements"].update( - { - "sigma2": 5.0, - "nu2": np.ones(shape=(1, n_thetas, n_rep)), - } - ) - msg = "The sensitivity element sigma2 must be a numpy array." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLFramework(sensitivity_dict_benchmark) - - sensitivity_dict_benchmark["sensitivity_elements"].update( - { - "sigma2": np.ones(shape=(1, n_thetas, n_rep)), - "nu2": -1.0 * np.ones(shape=(1, n_thetas, n_rep)), - } - ) - msg = ( - r"sensitivity_elements sigma2 and nu2 have to be positive\. " - r"Got sigma2 \[\[\[1\. 1\. 1\. 1\. 1\.\]\n\s+\[1\. 1\. 1\. 1\. 1\.\]\]\] " - r"and nu2 \[\[\[-1\. -1\. -1\. -1\. -1\.\]\n\s+\[-1\. -1\. -1\. -1\. -1\.\]\]\]\. " - r"Most likely this is due to low quality learners \(especially propensity scores\)\." - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLFramework(sensitivity_dict_benchmark) - @pytest.mark.ci def test_framework_sensitivity_plot_input(): - dml_framework_obj_plot = DoubleMLFramework(doubleml_dict) + dml_framework_obj_plot = DoubleMLFramework(dml_core=dml_core) msg = r"Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. " with pytest.raises(ValueError, match=msg): diff --git a/doubleml/tests/test_framework_pval_corrections.py b/doubleml/tests/test_framework_pval_corrections.py index b69db44fe..77a7a61fa 100644 --- a/doubleml/tests/test_framework_pval_corrections.py +++ b/doubleml/tests/test_framework_pval_corrections.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from doubleml.double_ml_framework import DoubleMLFramework +from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from ._utils import generate_dml_dict @@ -29,7 +29,8 @@ def dml_framework_tstat_pval_fixture(n_rep, n_thetas): psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - dml_framework_obj = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + dml_framework_obj = DoubleMLFramework(dml_core=dml_core) result_dict = { "dml_framework_obj": dml_framework_obj, @@ -83,7 +84,8 @@ def dml_framework_pval_cov_fixture(n_rep, sig_level): psi_a = np.ones(shape=(n_obs, n_thetas, n_rep)) psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep)) doubleml_dict = generate_dml_dict(psi_a, psi_b) - dml_framework_obj = DoubleMLFramework(doubleml_dict) + dml_core = DoubleMLCore(**doubleml_dict) + dml_framework_obj = DoubleMLFramework(dml_core=dml_core) p_vals = dml_framework_obj.pvals all_p_vals = dml_framework_obj.all_pvals diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py index 496f9de92..072945972 100644 --- a/doubleml/tests/test_framework_sensitivity.py +++ b/doubleml/tests/test_framework_sensitivity.py @@ -79,15 +79,15 @@ def test_dml_framework_sensitivity_shapes(dml_framework_sensitivity_fixture): for obj in object_list: assert dml_framework_sensitivity_fixture[obj]._sensitivity_implemented for key in var_keys: - assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == (1, n_thetas, n_rep) + assert dml_framework_sensitivity_fixture[obj].sensitivity_elements[key].shape == (1, n_thetas, n_rep) for key in score_keys: - assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == (n_obs, n_thetas, n_rep) + assert dml_framework_sensitivity_fixture[obj].sensitivity_elements[key].shape == (n_obs, n_thetas, n_rep) # separate test for concat for key in var_keys: - assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"]._sensitivity_elements[key].shape == (1, 2, n_rep) + assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"].sensitivity_elements[key].shape == (1, 2, n_rep) for key in score_keys: - assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"]._sensitivity_elements[key].shape == ( + assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"].sensitivity_elements[key].shape == ( n_obs, 2, n_rep, diff --git a/doubleml/utils/_checks.py b/doubleml/utils/_checks.py index 0eabf53b0..edc828fba 100644 --- a/doubleml/utils/_checks.py +++ b/doubleml/utils/_checks.py @@ -375,10 +375,10 @@ def _check_framework_compatibility(dml_framework_1, dml_framework_2, check_treat f"Got {str(dml_framework_1.n_thetas)} and {str(dml_framework_2.n_thetas)}." ) - if dml_framework_1._is_cluster_data != dml_framework_2._is_cluster_data: + if dml_framework_1.is_cluster_data != dml_framework_2.is_cluster_data: raise ValueError( "The cluster structure in DoubleMLFrameworks must be the same. " - f"Got {str(dml_framework_1._is_cluster_data)} and {str(dml_framework_2._is_cluster_data)}." + f"Got {str(dml_framework_1.is_cluster_data)} and {str(dml_framework_2.is_cluster_data)}." ) return From 78e6baa71588307ffaaa2682ce0e77d64a3377f2 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Sun, 23 Nov 2025 19:44:10 +0100 Subject: [PATCH 5/5] Remove 'thetas' and 'ses' from DoubleMLCore class and related updates in tests --- doubleml/double_ml.py | 2 -- doubleml/double_ml_framework.py | 33 ++++---------------------- doubleml/irm/apos.py | 2 -- doubleml/tests/_utils.py | 2 -- doubleml/tests/test_core_exceptions.py | 4 +--- 5 files changed, 6 insertions(+), 37 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 2ed7c812b..69acf3860 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -625,9 +625,7 @@ def construct_framework(self): scaled_psi_reshape = np.transpose(scaled_psi, (0, 2, 1)) doubleml_dict = { - "thetas": self.coef, "all_thetas": self.all_coef, - "ses": self.se, "all_ses": self.all_se, "var_scaling_factors": self._var_scaling_factors, "scaled_psi": scaled_psi_reshape, diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py index 6902a8d6a..99941c073 100644 --- a/doubleml/double_ml_framework.py +++ b/doubleml/double_ml_framework.py @@ -24,8 +24,6 @@ @dataclass class DoubleMLCore: - thetas: np.ndarray - ses: np.ndarray all_thetas: np.ndarray all_ses: np.ndarray var_scaling_factors: np.ndarray @@ -42,10 +40,6 @@ class DoubleMLCore: Parameters ---------- - thetas : np.ndarray - Estimated target parameters (shape: (n_thetas,)). - ses : np.ndarray - Estimated standard errors (shape: (n_thetas,)). all_thetas : np.ndarray Estimated target parameters for each repetition (shape: (n_thetas, n_rep)). all_ses : np.ndarray @@ -80,8 +74,6 @@ def __post_init__(self): def _check_arrays(self): """Type and shape checks for input arrays.""" arrays = { - "thetas": self.thetas, - "ses": self.ses, "all_thetas": self.all_thetas, "all_ses": self.all_ses, "var_scaling_factors": self.var_scaling_factors, @@ -92,8 +84,6 @@ def _check_arrays(self): raise TypeError(f"{name} must be a numpy.ndarray, got {type(arr)}.") expected_shapes = { - "thetas": (self._n_thetas,), - "ses": (self._n_thetas,), "all_thetas": (self._n_thetas, self._n_rep), "all_ses": (self._n_thetas, self._n_rep), "var_scaling_factors": (self._n_thetas,), @@ -192,6 +182,9 @@ def __init__( self._check_treatment_names(treatment_names) self._treatment_names = treatment_names + # aggregate estimates + self._thetas, self._ses = _aggregate_coefs_and_ses(self.all_thetas, self.all_ses) + # initialize sensitivity analysis attributes self._sensitivity_implemented = self._dml_core.sensitivity_elements is not None self._benchmark_available = self._sensitivity_implemented and all( @@ -237,7 +230,7 @@ def thetas(self): """ Estimated target parameters (shape (``n_thetas``,)). """ - return self._dml_core.thetas + return self._thetas @property def all_thetas(self): @@ -251,7 +244,7 @@ def ses(self): """ Estimated standard errors (shape (``n_thetas``,)). """ - return self._dml_core.ses + return self._ses @property def all_ses(self): @@ -457,11 +450,8 @@ def __add__(self, other): # compute standard errors (Uses factor 1/n for scaling!) sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1)) all_ses = np.sqrt(sigma2_hat) - thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses) doubleml_dict = { - "thetas": thetas, - "ses": ses, "all_thetas": all_thetas, "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, @@ -504,11 +494,8 @@ def __sub__(self, other): # compute standard errors sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1)) all_ses = np.sqrt(sigma2_hat) - thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses) doubleml_dict = { - "thetas": thetas, - "ses": ses, "all_thetas": all_thetas, "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, @@ -541,17 +528,12 @@ def __rsub__(self, other): # TODO: Restrict to linear? def __mul__(self, other): if isinstance(other, (int, float)): - thetas = np.multiply(other, self.thetas) all_thetas = np.multiply(other, self.all_thetas) - var_scaling_factors = self.var_scaling_factors - ses = np.multiply(other, self.ses) all_ses = np.multiply(other, self.all_ses) scaled_psi = np.multiply(other, self.scaled_psi) doubleml_dict = { - "thetas": thetas, - "ses": ses, "all_thetas": all_thetas, "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, @@ -1101,17 +1083,12 @@ def concat(objs): var_scaling_factors = np.concatenate([obj.var_scaling_factors for obj in objs], axis=0) scaled_psi = np.concatenate([obj.scaled_psi for obj in objs], axis=1) - thetas = np.concatenate([obj.thetas for obj in objs], axis=0) - ses = np.concatenate([obj.ses for obj in objs], axis=0) - if any(obj.is_cluster_data for obj in objs): raise NotImplementedError("concat not yet implemented with clustering.") else: is_cluster_data = False doubleml_dict = { - "thetas": thetas, - "ses": ses, "all_thetas": all_thetas, "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index d6713a384..4e6dc944f 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -715,8 +715,6 @@ def causal_contrast(self, reference_levels): # update sensitivity elements with sharper bounds current_sensitivity_dict = self._compute_causal_contrast_sensitivity_dict(model=model, ref_model=ref_model) updated_dml_core = DoubleMLCore( - thetas=diff_framework.thetas, - ses=diff_framework.ses, all_thetas=diff_framework.all_thetas, all_ses=diff_framework.all_ses, var_scaling_factors=diff_framework.var_scaling_factors, diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py index 60416246c..47b506ae2 100644 --- a/doubleml/tests/_utils.py +++ b/doubleml/tests/_utils.py @@ -106,8 +106,6 @@ def generate_dml_dict(psi_a, psi_b): scaled_psi = psi_b / np.mean(psi_a, axis=0) doubleml_dict = { - "thetas": thetas, - "ses": ses, "all_thetas": all_thetas, "all_ses": all_ses, "var_scaling_factors": var_scaling_factors, diff --git a/doubleml/tests/test_core_exceptions.py b/doubleml/tests/test_core_exceptions.py index 835108dc8..a40c8d609 100644 --- a/doubleml/tests/test_core_exceptions.py +++ b/doubleml/tests/test_core_exceptions.py @@ -36,15 +36,13 @@ def test_scaled_psi_shape_and_type(): def test_arrays(): kwargs = valid_core_kwargs() # Type checks - for key in ["thetas", "ses", "all_thetas", "all_ses", "var_scaling_factors"]: + for key in ["all_thetas", "all_ses", "var_scaling_factors"]: bad_kwargs = kwargs.copy() bad_kwargs[key] = "not_an_array" with pytest.raises(TypeError, match=f"{key} must be a numpy.ndarray"): DoubleMLCore(**bad_kwargs) # Shape checks shapes = { - "thetas": (3,), - "ses": (3,), "all_thetas": (3, 5), "all_ses": (3, 5), "var_scaling_factors": (3,),