From 05d71202fe3c7ca0fdfd4c4f20a61813c3c56825 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Sun, 23 Nov 2025 08:25:45 +0100
Subject: [PATCH 1/5] fix docstring

---
 doubleml/double_ml_framework.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 1ff21b350..5f767d9fd 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -21,11 +21,11 @@
 
 
 class DoubleMLFramework:
-    """Double Machine Learning Framework to combine DoubleML classes and compute confidendence intervals.
+    """Double Machine Learning Framework to combine DoubleML classes and compute confidence intervals.
 
     Parameters
     ----------
-    doubleml_dict : :dict
+    doubleml_dict : dict
         A dictionary providing the estimated parameters and normalized scores. Keys have to be 'thetas', 'ses',
         'all_thetas', 'all_ses', 'var_scaling_factors' and 'scaled_psi'.
         Values have to be numpy arrays with the corresponding shapes.

From 5aa6379f36aed303a6ce5b4aa7bff604e5b35a6c Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Sun, 23 Nov 2025 08:42:32 +0100
Subject: [PATCH 2/5] add mypy configuration to pyproject.toml

---
 pyproject.toml | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6aa06ab50..783a7e26d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,8 @@ dependencies = [
     "statsmodels>=0.14.0",
     "matplotlib>=3.9.0",
     "seaborn>=0.13",
-    "plotly>=5.0.0"
+    "plotly>=5.0.0",
+    "mypy>=1.18.0"
 ]
 classifiers = [
     "Programming Language :: Python :: 3",
@@ -113,3 +114,30 @@ ignore = [
     # isinstance checks
     "E721",
 ]
+
+
+# MyPy configuration
+[tool.mypy]
+python_version = "3.12"
+mypy_path = "."
+packages = ["doubleml"]
+exclude = [
+    "^tests/|^test_",
+]
+
+# Essential checks only:
+warn_return_any = false
+warn_unused_configs = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+disallow_untyped_defs = true
+check_untyped_defs = true
+no_implicit_optional = true
+
+# Output formatting:
+show_error_codes = true
+pretty = true
+color_output = true
+
+# Handle third-party libraries:
+ignore_missing_imports = true

From 019f33c5311e40ad69e2111a19465192f76f3a2f Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Sun, 23 Nov 2025 10:02:18 +0100
Subject: [PATCH 3/5] implement DoubleMLCore class with validation and error
 handling for parameters

---
 doubleml/double_ml_framework.py        | 159 +++++++++++++++++++
 doubleml/tests/test_core_exceptions.py | 206 +++++++++++++++++++++++++
 2 files changed, 365 insertions(+)
 create mode 100644 doubleml/tests/test_core_exceptions.py

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 5f767d9fd..5802ce143 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -1,4 +1,6 @@
 import copy
+from dataclasses import dataclass
+from typing import Dict, List, Optional
 
 import numpy as np
 import pandas as pd
@@ -20,6 +22,163 @@
 from .utils._plots import _sensitivity_contour_plot
 
 
+@dataclass
+class DoubleMLCore:
+    thetas: np.ndarray
+    ses: np.ndarray
+    all_thetas: np.ndarray
+    all_ses: np.ndarray
+    var_scaling_factors: np.ndarray
+    scaled_psi: np.ndarray
+    is_cluster_data: bool = False
+    cluster_dict: Optional[Dict] = None
+    sensitivity_elements: Optional[Dict[str, np.ndarray]] = None
+    treatment_names: Optional[List[str]] = None
+    """
+    Core container for DoubleML results .
+
+    This class stores the main results and diagnostics from a DoubleML estimation, including parameter estimates,
+    standard errors, normalized scores, and (optionally) sensitivity and clustering information. It performs
+    thorough type and shape validation on all inputs to ensure internal consistency.
+
+    Parameters
+    ----------
+    thetas : np.ndarray
+        Estimated target parameters (shape: (n_thetas,)).
+    ses : np.ndarray
+        Estimated standard errors (shape: (n_thetas,)).
+    all_thetas : np.ndarray
+        Estimated target parameters for each repetition (shape: (n_thetas, n_rep)).
+    all_ses : np.ndarray
+        Estimated standard errors for each repetition (shape: (n_thetas, n_rep)).
+    var_scaling_factors : np.ndarray
+        Variance scaling factors (shape: (n_thetas,)).
+    scaled_psi : np.ndarray
+        Normalized scores (shape: (n_obs, n_thetas, n_rep)).
+    is_cluster_data : bool, optional
+        Indicates whether clustering is used (default: False).
+    cluster_dict : dict, optional
+        Dictionary with clustering information, required if is_cluster_data is True.
+    sensitivity_elements : dict, optional
+        Dictionary with sensitivity analysis components (e.g., max_bias, psi_max_bias, sigma2, nu2).
+    treatment_names : list of str, optional
+        Names of the treatments (must match n_thetas if provided).
+
+    Raises
+    ------
+    ValueError, TypeError
+        If any input fails type or shape validation.
+    """
+
+    def __post_init__(self):
+
+        if not isinstance(self.scaled_psi, np.ndarray) or self.scaled_psi.ndim != 3:
+            raise ValueError("scaled_psi must be a 3-dimensional numpy.ndarray.")
+        self.n_obs, self.n_thetas, self.n_rep = self.scaled_psi.shape
+
+        self._check_arrays()
+        self._check_cluster_dict()
+        self._check_sensitivity_elements()
+        self._check_treatment_names()
+
+    def _check_arrays(self):
+        """Type and shape checks for input arrays."""
+        arrays = {
+            "thetas": self.thetas,
+            "ses": self.ses,
+            "all_thetas": self.all_thetas,
+            "all_ses": self.all_ses,
+            "var_scaling_factors": self.var_scaling_factors,
+            "scaled_psi": self.scaled_psi,
+        }
+        for name, arr in arrays.items():
+            if not isinstance(arr, np.ndarray):
+                raise TypeError(f"{name} must be a numpy.ndarray, got {type(arr)}.")
+
+        expected_shapes = {
+            "thetas": (self.n_thetas,),
+            "ses": (self.n_thetas,),
+            "all_thetas": (self.n_thetas, self.n_rep),
+            "all_ses": (self.n_thetas, self.n_rep),
+            "var_scaling_factors": (self.n_thetas,),
+            "scaled_psi": (self.n_obs, self.n_thetas, self.n_rep),
+        }
+        for name, expected_shape in expected_shapes.items():
+            actual_shape = arrays[name].shape
+            if actual_shape != expected_shape:
+                raise ValueError(f"{name} shape {actual_shape} does not match expected {expected_shape}.")
+
+    def _check_cluster_dict(self):
+        """Checks for cluster_dict if is_cluster_data is True."""
+        if self.is_cluster_data:
+            if self.cluster_dict is None:
+                raise ValueError("If is_cluster_data is True, cluster_dict must be provided.")
+            if not isinstance(self.cluster_dict, dict):
+                raise TypeError("cluster_dict must be a dictionary.")
+            expected_keys = ["smpls", "smpls_cluster", "cluster_vars", "n_folds_per_cluster"]
+            if not all(key in self.cluster_dict for key in expected_keys):
+                raise ValueError(
+                    f"cluster_dict must contain keys: {', '.join(expected_keys)}. "
+                    f"Got: {', '.join(self.cluster_dict.keys())}."
+                )
+            # Type checks for values
+            if not isinstance(self.cluster_dict["smpls"], list):
+                raise TypeError("cluster_dict['smpls'] must be a list.")
+            if not isinstance(self.cluster_dict["smpls_cluster"], list):
+                raise TypeError("cluster_dict['smpls_cluster'] must be a list.")
+            if not isinstance(self.cluster_dict["cluster_vars"], list):
+                raise TypeError("cluster_dict['cluster_vars'] must be a list.")
+            if not isinstance(self.cluster_dict["n_folds_per_cluster"], int):
+                raise TypeError("cluster_dict['n_folds_per_cluster'] must be an int.")
+
+    def _check_sensitivity_elements(self):
+        """Checks for sensitivity_elements if provided."""
+        if self.sensitivity_elements is not None:
+            if not isinstance(self.sensitivity_elements, dict):
+                raise TypeError("sensitivity_elements must be a dict if provided.")
+            required_keys = ["max_bias", "psi_max_bias"]
+            for key in required_keys:
+                if key not in self.sensitivity_elements:
+                    raise ValueError(f"sensitivity_elements must contain key '{key}'.")
+                if not isinstance(self.sensitivity_elements[key], np.ndarray):
+                    raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.")
+
+            expected_shapes = {
+                "max_bias": (1, self.n_thetas, self.n_rep),
+                "psi_max_bias": (self.n_obs, self.n_thetas, self.n_rep),
+            }
+            for key in required_keys:
+                actual_shape = self.sensitivity_elements[key].shape
+                if actual_shape != expected_shapes[key]:
+                    raise ValueError(
+                        f"sensitivity_elements['{key}'] shape {actual_shape} does not match expected {expected_shapes[key]}."
+                    )
+
+            # Optional: check benchmarks if present
+            for key in ["sigma2", "nu2"]:
+                if key in self.sensitivity_elements:
+                    if not isinstance(self.sensitivity_elements[key], np.ndarray):
+                        raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.")
+                    if np.any(self.sensitivity_elements[key] < 0):
+                        raise ValueError(f"sensitivity_elements['{key}'] must be positive.")
+                    if self.sensitivity_elements[key].shape != (1, self.n_thetas, self.n_rep):
+                        expected_shape = (1, self.n_thetas, self.n_rep)
+                        actual_shape = self.sensitivity_elements[key].shape
+                        raise ValueError(
+                            f"sensitivity_elements['{key}'] shape {actual_shape} does not match expected {expected_shape}."
+                        )
+
+    def _check_treatment_names(self):
+        """Checks for treatment_names if provided."""
+        if self.treatment_names is not None:
+            if not isinstance(self.treatment_names, list) or not all(isinstance(n, str) for n in self.treatment_names):
+                raise TypeError("treatment_names must be a list of strings.")
+            if len(self.treatment_names) != self.n_thetas:
+                raise ValueError(
+                    f"Length of treatment_names ({len(self.treatment_names)}) does not match n_thetas ({self.n_thetas})."
+                )
+
+
 class DoubleMLFramework:
     """Double Machine Learning Framework to combine DoubleML classes and compute confidence intervals.
 
diff --git a/doubleml/tests/test_core_exceptions.py b/doubleml/tests/test_core_exceptions.py
new file mode 100644
index 000000000..ddd615231
--- /dev/null
+++ b/doubleml/tests/test_core_exceptions.py
@@ -0,0 +1,206 @@
+import numpy as np
+import pytest
+
+from doubleml.double_ml_framework import DoubleMLCore
+from doubleml.tests._utils import generate_dml_dict
+
+n_obs = 10
+n_thetas = 2
+n_rep = 5
+
+
+def valid_core_kwargs():
+    np.random.seed(42)
+    psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
+    psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
+    doubleml_dict = generate_dml_dict(psi_a, psi_b)
+    return doubleml_dict
+
+
+@pytest.mark.ci
+def test_scaled_psi_shape_and_type():
+    kwargs = valid_core_kwargs()
+    msg = "scaled_psi must be a 3-dimensional numpy.ndarray."
+    kwargs["scaled_psi"] = "not_an_array"
+    with pytest.raises(ValueError, match=msg):
+        DoubleMLCore(**kwargs)
+    kwargs["scaled_psi"] = np.ones((10,))
+    with pytest.raises(ValueError, match=msg):
+        DoubleMLCore(**kwargs)
+    kwargs["scaled_psi"] = np.ones((10, 2))
+    with pytest.raises(ValueError, match=msg):
+        DoubleMLCore(**kwargs)
+
+
+@pytest.mark.ci
+def test_arrays():
+    kwargs = valid_core_kwargs()
+    # Type checks
+    for key in ["thetas", "ses", "all_thetas", "all_ses", "var_scaling_factors"]:
+        bad_kwargs = kwargs.copy()
+        bad_kwargs[key] = "not_an_array"
+        with pytest.raises(TypeError, match=f"{key} must be a numpy.ndarray"):
+            DoubleMLCore(**bad_kwargs)
+    # Shape checks
+    shapes = {
+        "thetas": (3,),
+        "ses": (3,),
+        "all_thetas": (3, 5),
+        "all_ses": (3, 5),
+        "var_scaling_factors": (3,),
+    }
+    for key, shape in shapes.items():
+        bad_kwargs = kwargs.copy()
+        bad_kwargs[key] = np.ones(shape)
+        with pytest.raises(ValueError, match=".*does not match expected.*"):
+            DoubleMLCore(**bad_kwargs)
+
+
+@pytest.mark.ci
+def test_cluster_dict_exceptions():
+    kwargs = valid_core_kwargs()
+    kwargs["is_cluster_data"] = True
+
+    # 1. cluster_dict missing
+    bad_kwargs = kwargs.copy()
+    bad_kwargs.pop("cluster_dict", None)
+    with pytest.raises(ValueError, match="If is_cluster_data is True, cluster_dict must be provided."):
+        DoubleMLCore(**bad_kwargs)
+
+    # 2. cluster_dict not a dict
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["cluster_dict"] = "not_a_dict"
+    with pytest.raises(TypeError, match="cluster_dict must be a dictionary."):
+        DoubleMLCore(**bad_kwargs)
+
+    # 3. cluster_dict missing keys
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["cluster_dict"] = {"smpls": [], "smpls_cluster": [], "cluster_vars": []}  # missing n_folds_per_cluster
+    msg = "cluster_dict must contain keys: smpls, smpls_cluster, cluster_vars, n_folds_per_cluster."
+    with pytest.raises(ValueError, match=msg):
+        DoubleMLCore(**bad_kwargs)
+
+    # 4. cluster_dict wrong value types
+    type_cases = [
+        ("smpls", "not_a_list", "cluster_dict\\['smpls'\\] must be a list."),
+        ("smpls_cluster", "not_a_list", "cluster_dict\\['smpls_cluster'\\] must be a list."),
+        ("cluster_vars", "not_a_list", "cluster_dict\\['cluster_vars'\\] must be a list."),
+        ("n_folds_per_cluster", "not_an_int", "cluster_dict\\['n_folds_per_cluster'\\] must be an int."),
+    ]
+    for key, bad_value, msg in type_cases:
+        cluster_dict = {
+            "smpls": [],
+            "smpls_cluster": [],
+            "cluster_vars": [],
+            "n_folds_per_cluster": 1,
+        }
+        cluster_dict[key] = bad_value
+        bad_kwargs = kwargs.copy()
+        bad_kwargs["cluster_dict"] = cluster_dict
+        with pytest.raises(TypeError, match=msg):
+            DoubleMLCore(**bad_kwargs)
+
+
+@pytest.mark.ci
+def test_sensitivity_elements_exceptions():
+    kwargs = valid_core_kwargs()
+
+    # Not a dict
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["sensitivity_elements"] = "not_a_dict"
+    with pytest.raises(TypeError, match="sensitivity_elements must be a dict if provided."):
+        DoubleMLCore(**bad_kwargs)
+
+    # Missing required key
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["sensitivity_elements"] = {"max_bias": np.ones((1, n_thetas, n_rep))}
+    with pytest.raises(ValueError, match="sensitivity_elements must contain key 'psi_max_bias'."):
+        DoubleMLCore(**bad_kwargs)
+
+    # Wrong type for required key
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["sensitivity_elements"] = {
+        "max_bias": "not_an_array",
+        "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)),
+    }
+    with pytest.raises(TypeError, match="sensitivity_elements\\['max_bias'\\] must be a numpy.ndarray."):
+        DoubleMLCore(**bad_kwargs)
+
+    # Wrong shape for required key
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["sensitivity_elements"] = {
+        "max_bias": np.ones((2, n_thetas, n_rep)),  # should be (1, n_thetas, n_rep)
+        "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)),
+    }
+    with pytest.raises(
+        ValueError, match=r"sensitivity_elements\['max_bias'\] shape \(2, 2, 5\) does not match expected \(1, 2, 5\)\."
+    ):
+        DoubleMLCore(**bad_kwargs)
+
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["sensitivity_elements"] = {
+        "max_bias": np.ones((1, n_thetas, n_rep)),
+        "psi_max_bias": np.ones((n_obs + 1, n_thetas, n_rep)),  # wrong n_obs
+    }
+    with pytest.raises(
+        ValueError, match=r"sensitivity_elements\['psi_max_bias'\] shape \(11, 2, 5\) does not match expected \(10, 2, 5\)\."
+    ):
+        DoubleMLCore(**bad_kwargs)
+
+    # sigma2 and nu2 wrong type
+    for key in ["sigma2", "nu2"]:
+        bad_kwargs = kwargs.copy()
+        sens = {
+            "max_bias": np.ones((1, n_thetas, n_rep)),
+            "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)),
+            key: "not_an_array",
+        }
+        bad_kwargs["sensitivity_elements"] = sens
+        with pytest.raises(TypeError, match=rf"sensitivity_elements\['{key}'\] must be a numpy.ndarray."):
+            DoubleMLCore(**bad_kwargs)
+
+    # sigma2 and nu2 negative values
+    for key in ["sigma2", "nu2"]:
+        bad_kwargs = kwargs.copy()
+        sens = {
+            "max_bias": np.ones((1, n_thetas, n_rep)),
+            "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)),
+            key: -np.ones((1, n_thetas, n_rep)),
+        }
+        bad_kwargs["sensitivity_elements"] = sens
+        with pytest.raises(ValueError, match=rf"sensitivity_elements\['{key}'\] must be positive."):
+            DoubleMLCore(**bad_kwargs)
+
+    # sigma2 and nu2 wrong shape
+    for key in ["sigma2", "nu2"]:
+        bad_kwargs = kwargs.copy()
+        sens = {
+            "max_bias": np.ones((1, n_thetas, n_rep)),
+            "psi_max_bias": np.ones((n_obs, n_thetas, n_rep)),
+            key: np.ones((2, n_thetas, n_rep)),
+        }
+        bad_kwargs["sensitivity_elements"] = sens
+        with pytest.raises(
+            ValueError, match=rf"sensitivity_elements\['{key}'\] shape \(2, 2, 5\) does not match expected \(1, 2, 5\)\."
+        ):
+            DoubleMLCore(**bad_kwargs)
+
+
+@pytest.mark.ci
+def test_treatment_names_exceptions():
+    kwargs = valid_core_kwargs()
+
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["treatment_names"] = "not_a_list"
+    with pytest.raises(TypeError, match="treatment_names must be a list of strings."):
+        DoubleMLCore(**bad_kwargs)
+
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["treatment_names"] = [1, 2]
+    with pytest.raises(TypeError, match="treatment_names must be a list of strings."):
+        DoubleMLCore(**bad_kwargs)
+
+    bad_kwargs = kwargs.copy()
+    bad_kwargs["treatment_names"] = ["treat1"]
+    with pytest.raises(ValueError, match=r"Length of treatment_names \(1\) does not match n_thetas \(2\)\."):
+        DoubleMLCore(**bad_kwargs)

From 704b1d1719363dae005f954523876e13568f9cc3 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Sun, 23 Nov 2025 18:09:21 +0100
Subject: [PATCH 4/5] Refactor DoubleML framework to utilize DoubleMLCore

Adjusted all tests
---
 doubleml/__init__.py                          |   3 +-
 doubleml/did/tests/test_did_aggregation.py    |   5 +-
 .../tests/test_did_aggregation_exceptions.py  |   8 +-
 .../did/tests/test_did_aggregation_plot.py    |   5 +-
 .../test_did_aggregation_return_types.py      |   5 +-
 doubleml/double_ml.py                         |   7 +-
 doubleml/double_ml_framework.py               | 424 ++++++------------
 doubleml/irm/apos.py                          |  17 +-
 doubleml/tests/test_core_exceptions.py        |  26 +-
 doubleml/tests/test_framework.py              |   8 +-
 doubleml/tests/test_framework_coverage.py     |   8 +-
 doubleml/tests/test_framework_exceptions.py   | 215 ++-------
 .../tests/test_framework_pval_corrections.py  |   8 +-
 doubleml/tests/test_framework_sensitivity.py  |   8 +-
 doubleml/utils/_checks.py                     |   4 +-
 15 files changed, 232 insertions(+), 519 deletions(-)

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index cb3891bac..2e26cfa6e 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -3,7 +3,7 @@
 from .data import DoubleMLClusterData, DoubleMLData, DoubleMLDIDData, DoubleMLPanelData, DoubleMLRDDData, DoubleMLSSMData
 from .did.did import DoubleMLDID
 from .did.did_cs import DoubleMLDIDCS
-from .double_ml_framework import DoubleMLFramework, concat
+from .double_ml_framework import DoubleMLCore, DoubleMLFramework, concat
 from .irm.apo import DoubleMLAPO
 from .irm.apos import DoubleMLAPOS
 from .irm.cvar import DoubleMLCVAR
@@ -21,6 +21,7 @@
 
 __all__ = [
     "concat",
+    "DoubleMLCore",
     "DoubleMLFramework",
     "DoubleMLPLR",
     "DoubleMLPLIV",
diff --git a/doubleml/did/tests/test_did_aggregation.py b/doubleml/did/tests/test_did_aggregation.py
index cc3c4304b..4dd91bc3e 100644
--- a/doubleml/did/tests/test_did_aggregation.py
+++ b/doubleml/did/tests/test_did_aggregation.py
@@ -2,7 +2,7 @@
 import pytest
 
 from doubleml.did.did_aggregation import DoubleMLDIDAggregation
-from doubleml.double_ml_framework import DoubleMLFramework
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework
 from doubleml.tests._utils import generate_dml_dict
 
 
@@ -28,7 +28,8 @@ def base_framework(n_rep):
     psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
 
     doubleml_dict = generate_dml_dict(psi_a, psi_b)
-    return DoubleMLFramework(doubleml_dict)
+    dml_core = DoubleMLCore(**doubleml_dict)
+    return DoubleMLFramework(dml_core=dml_core)
 
 
 @pytest.fixture(scope="module", params=["ones", "random", "zeros", "mixed"])
diff --git a/doubleml/did/tests/test_did_aggregation_exceptions.py b/doubleml/did/tests/test_did_aggregation_exceptions.py
index 0f895b5be..a5e5e22a8 100644
--- a/doubleml/did/tests/test_did_aggregation_exceptions.py
+++ b/doubleml/did/tests/test_did_aggregation_exceptions.py
@@ -2,7 +2,7 @@
 import pytest
 
 from doubleml.did.did_aggregation import DoubleMLDIDAggregation
-from doubleml.double_ml_framework import DoubleMLFramework
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework
 from doubleml.tests._utils import generate_dml_dict
 
 
@@ -24,7 +24,8 @@ def mock_framework(n_rep, n_thetas):
     psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
     psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
     doubleml_dict = generate_dml_dict(psi_a, psi_b)
-    return DoubleMLFramework(doubleml_dict)
+    dml_core = DoubleMLCore(**doubleml_dict)
+    return DoubleMLFramework(dml_core)
 
 
 @pytest.fixture
@@ -67,7 +68,8 @@ def test_invalid_framework_dim():
     psi_a = np.ones(shape=(10, 2, 1))
     psi_b = np.random.normal(size=(10, 2, 1))
     doubleml_dict = generate_dml_dict(psi_a, psi_b)
-    framework = DoubleMLFramework(doubleml_dict)
+    dml_core = DoubleMLCore(**doubleml_dict)
+    framework = DoubleMLFramework(dml_core=dml_core)
 
     # Test with invalid framework dimension
     with pytest.raises(ValueError, match="All frameworks must be one-dimensional"):
diff --git a/doubleml/did/tests/test_did_aggregation_plot.py b/doubleml/did/tests/test_did_aggregation_plot.py
index 692a0e682..65a76a72b 100644
--- a/doubleml/did/tests/test_did_aggregation_plot.py
+++ b/doubleml/did/tests/test_did_aggregation_plot.py
@@ -5,7 +5,7 @@
 from matplotlib.figure import Figure
 
 from doubleml.did.did_aggregation import DoubleMLDIDAggregation
-from doubleml.double_ml_framework import DoubleMLFramework
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework
 from doubleml.tests._utils import generate_dml_dict
 
 
@@ -23,7 +23,8 @@ def mock_framework(n_rep):
     psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
     psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
     doubleml_dict = generate_dml_dict(psi_a, psi_b)
-    return DoubleMLFramework(doubleml_dict)
+    dml_core = DoubleMLCore(**doubleml_dict)
+    return DoubleMLFramework(dml_core=dml_core)
 
 
 @pytest.fixture
diff --git a/doubleml/did/tests/test_did_aggregation_return_types.py b/doubleml/did/tests/test_did_aggregation_return_types.py
index e63eda70e..f9c407fc9 100644
--- a/doubleml/did/tests/test_did_aggregation_return_types.py
+++ b/doubleml/did/tests/test_did_aggregation_return_types.py
@@ -6,7 +6,7 @@
 from matplotlib.figure import Figure
 
 from doubleml.did.did_aggregation import DoubleMLDIDAggregation
-from doubleml.double_ml_framework import DoubleMLFramework
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework
 from doubleml.tests._utils import generate_dml_dict
 
 
@@ -24,7 +24,8 @@ def mock_framework(n_rep):
     psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
     psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
     doubleml_dict = generate_dml_dict(psi_a, psi_b)
-    return DoubleMLFramework(doubleml_dict)
+    dml_core = DoubleMLCore(**doubleml_dict)
+    return DoubleMLFramework(dml_core=dml_core)
 
 
 @pytest.fixture
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 6293731a3..2ed7c812b 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -9,7 +9,7 @@
 
 from doubleml.data import DoubleMLDIDData, DoubleMLPanelData, DoubleMLRDDData, DoubleMLSSMData
 from doubleml.data.base_data import DoubleMLBaseData
-from doubleml.double_ml_framework import DoubleMLFramework
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework
 from doubleml.double_ml_sampling_mixins import SampleSplittingMixin
 from doubleml.utils._checks import _check_external_predictions
 from doubleml.utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est
@@ -632,7 +632,6 @@ def construct_framework(self):
             "var_scaling_factors": self._var_scaling_factors,
             "scaled_psi": scaled_psi_reshape,
             "is_cluster_data": self._is_cluster_data,
-            "treatment_names": self._dml_data.d_cols,
         }
 
         if self._sensitivity_implemented:
@@ -669,8 +668,8 @@ def construct_framework(self):
                     },
                 }
             )
-
-        doubleml_framework = DoubleMLFramework(doubleml_dict)
+        dml_core = DoubleMLCore(**doubleml_dict)
+        doubleml_framework = DoubleMLFramework(dml_core=dml_core, treatment_names=self._dml_data.d_cols)
         return doubleml_framework
 
     def bootstrap(self, method="normal", n_rep_boot=500):
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 5802ce143..6902a8d6a 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -33,7 +33,6 @@ class DoubleMLCore:
     is_cluster_data: bool = False
     cluster_dict: Optional[Dict] = None
     sensitivity_elements: Optional[Dict[str, np.ndarray]] = None
-    treatment_names: Optional[List[str]] = None
     """
     Core container for DoubleML results .
 
@@ -61,8 +60,6 @@ class DoubleMLCore:
         Dictionary with clustering information, required if is_cluster_data is True.
     sensitivity_elements : dict, optional
         Dictionary with sensitivity analysis components (e.g., max_bias, psi_max_bias, sigma2, nu2).
-    treatment_names : list of str, optional
-        Names of the treatments (must match n_thetas if provided).
 
     Raises
     ------
@@ -74,12 +71,11 @@ def __post_init__(self):
 
         if not isinstance(self.scaled_psi, np.ndarray) or self.scaled_psi.ndim != 3:
             raise ValueError("scaled_psi must be a 3-dimensional numpy.ndarray.")
-        self.n_obs, self.n_thetas, self.n_rep = self.scaled_psi.shape
+        self._n_obs, self._n_thetas, self._n_rep = self.scaled_psi.shape
 
         self._check_arrays()
         self._check_cluster_dict()
         self._check_sensitivity_elements()
-        self._check_treatment_names()
 
     def _check_arrays(self):
         """Type and shape checks for input arrays."""
@@ -96,12 +92,12 @@ def _check_arrays(self):
                 raise TypeError(f"{name} must be a numpy.ndarray, got {type(arr)}.")
 
         expected_shapes = {
-            "thetas": (self.n_thetas,),
-            "ses": (self.n_thetas,),
-            "all_thetas": (self.n_thetas, self.n_rep),
-            "all_ses": (self.n_thetas, self.n_rep),
-            "var_scaling_factors": (self.n_thetas,),
-            "scaled_psi": (self.n_obs, self.n_thetas, self.n_rep),
+            "thetas": (self._n_thetas,),
+            "ses": (self._n_thetas,),
+            "all_thetas": (self._n_thetas, self._n_rep),
+            "all_ses": (self._n_thetas, self._n_rep),
+            "var_scaling_factors": (self._n_thetas,),
+            "scaled_psi": (self._n_obs, self._n_thetas, self._n_rep),
         }
         for name, expected_shape in expected_shapes.items():
             actual_shape = arrays[name].shape
@@ -121,13 +117,13 @@ def _check_cluster_dict(self):
                     f"cluster_dict must contain keys: {', '.join(expected_keys)}. "
                     f"Got: {', '.join(self.cluster_dict.keys())}."
                 )
-            # Type checks for values
+            # Type checks
             if not isinstance(self.cluster_dict["smpls"], list):
                 raise TypeError("cluster_dict['smpls'] must be a list.")
             if not isinstance(self.cluster_dict["smpls_cluster"], list):
                 raise TypeError("cluster_dict['smpls_cluster'] must be a list.")
-            if not isinstance(self.cluster_dict["cluster_vars"], list):
-                raise TypeError("cluster_dict['cluster_vars'] must be a list.")
+            if not isinstance(self.cluster_dict["cluster_vars"], np.ndarray):
+                raise TypeError("cluster_dict['cluster_vars'] must be a numpy.ndarray.")
             if not isinstance(self.cluster_dict["n_folds_per_cluster"], int):
                 raise TypeError("cluster_dict['n_folds_per_cluster'] must be an int.")
 
@@ -144,8 +140,8 @@ def _check_sensitivity_elements(self):
                     raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.")
 
             expected_shapes = {
-                "max_bias": (1, self.n_thetas, self.n_rep),
-                "psi_max_bias": (self.n_obs, self.n_thetas, self.n_rep),
+                "max_bias": (1, self._n_thetas, self._n_rep),
+                "psi_max_bias": (self._n_obs, self._n_thetas, self._n_rep),
             }
             for key in required_keys:
                 actual_shape = self.sensitivity_elements[key].shape
@@ -160,143 +156,123 @@ def _check_sensitivity_elements(self):
                     if not isinstance(self.sensitivity_elements[key], np.ndarray):
                         raise TypeError(f"sensitivity_elements['{key}'] must be a numpy.ndarray.")
                     if np.any(self.sensitivity_elements[key] < 0):
-                        raise ValueError(f"sensitivity_elements['{key}'] must be positive.")
-                    if self.sensitivity_elements[key].shape != (1, self.n_thetas, self.n_rep):
-                        expected_shape = (1, self.n_thetas, self.n_rep)
+                        raise ValueError(
+                            f"sensitivity_elements['{key}'] must be positive. "
+                            f"Got {str(self.sensitivity_elements[key])} "
+                            "Most likely this is due to low quality learners (especially propensity scores)."
+                        )
+                    if self.sensitivity_elements[key].shape != (1, self._n_thetas, self._n_rep):
+                        expected_shape = (1, self._n_thetas, self._n_rep)
                         actual_shape = self.sensitivity_elements[key].shape
                         raise ValueError(
                             f"sensitivity_elements['{key}'] shape {actual_shape} does not match expected {expected_shape}."
                         )
 
-    def _check_treatment_names(self):
-        """Checks for treatment_names if provided."""
-        if self.treatment_names is not None:
-            if not isinstance(self.treatment_names, list) or not all(isinstance(n, str) for n in self.treatment_names):
-                raise TypeError("treatment_names must be a list of strings.")
-            if len(self.treatment_names) != self.n_thetas:
-                raise ValueError(
-                    f"Length of treatment_names ({len(self.treatment_names)}) does not match n_thetas ({self.n_thetas})."
-                )
-
 
 class DoubleMLFramework:
-    """Double Machine Learning Framework to combine DoubleML classes and compute confidence intervals.
+    """
+    Double Machine Learning Framework to combine DoubleMLCore results and compute confidence intervals.
 
     Parameters
     ----------
-    doubleml_dict : dict
-        A dictionary providing the estimated parameters and normalized scores. Keys have to be 'thetas', 'ses',
-        'all_thetas', 'all_ses', 'var_scaling_factors' and 'scaled_psi'.
-        Values have to be numpy arrays with the corresponding shapes.
-
+    dml_core : DoubleMLCore
+        A DoubleMLCore object providing the estimated parameters and scores.
     """
 
     def __init__(
         self,
-        doubleml_dict=None,
+        dml_core: DoubleMLCore,
+        treatment_names: Optional[List[str]] = None,
     ):
-        self._is_cluster_data = False
-
-        # check input
-        if not isinstance(doubleml_dict, dict):
-            raise TypeError("doubleml_dict must be a dictionary.")
-        expected_keys = ["thetas", "ses", "all_thetas", "all_ses", "var_scaling_factors", "scaled_psi"]
-        if not all(key in doubleml_dict.keys() for key in expected_keys):
-            raise ValueError("The dict must contain the following keys: " + ", ".join(expected_keys))
-
-        # set scores and parameters
-        self._n_thetas = doubleml_dict["scaled_psi"].shape[1]
-        self._n_rep = doubleml_dict["scaled_psi"].shape[2]
-        self._n_obs = doubleml_dict["scaled_psi"].shape[0]
-
-        self._thetas = doubleml_dict["thetas"]
-        self._ses = doubleml_dict["ses"]
-        self._all_thetas = doubleml_dict["all_thetas"]
-        self._all_ses = doubleml_dict["all_ses"]
-        self._var_scaling_factors = doubleml_dict["var_scaling_factors"]
-        self._scaled_psi = doubleml_dict["scaled_psi"]
-
-        # initialize cluster data
-        self._check_and_set_cluster_data(doubleml_dict)
-
-        # initialize sensitivity analysis
-        self._check_and_set_sensitivity_elements(doubleml_dict)
-
-        # check if all sizes match
-        self._check_framework_shapes()
-
-        self._treatment_names = None
-        if "treatment_names" in doubleml_dict.keys():
-            self._check_treatment_names(doubleml_dict["treatment_names"])
-            self._treatment_names = doubleml_dict["treatment_names"]
+        if not isinstance(dml_core, DoubleMLCore):
+            raise TypeError("dml_core must be a DoubleMLCore instance.")
+        self._dml_core = dml_core
+
+        if treatment_names is not None:
+            self._check_treatment_names(treatment_names)
+        self._treatment_names = treatment_names
+
+        # initialize sensitivity analysis attributes
+        self._sensitivity_implemented = self._dml_core.sensitivity_elements is not None
+        self._benchmark_available = self._sensitivity_implemented and all(
+            k in self._dml_core.sensitivity_elements for k in ["sigma2", "nu2"]
+        )
+        self._sensitivity_params = None
 
         # initialize bootstrap distribution
         self._boot_t_stat = None
         self._boot_method = None
         self._n_rep_boot = None
 
+    @property
+    def dml_core(self):
+        """
+        The underlying DoubleMLCore object.
+        """
+        return self._dml_core
+
     @property
     def n_thetas(self):
         """
         Number of target parameters.
         """
-        return self._n_thetas
+        return self._dml_core._n_thetas
 
     @property
     def n_rep(self):
         """
         Number of repetitions.
         """
-        return self._n_rep
+        return self._dml_core._n_rep
 
     @property
     def n_obs(self):
         """
         Number of observations.
         """
-        return self._n_obs
+        return self._dml_core._n_obs
 
     @property
     def thetas(self):
         """
         Estimated target parameters (shape (``n_thetas``,)).
         """
-        return self._thetas
+        return self._dml_core.thetas
 
     @property
     def all_thetas(self):
         """
         Estimated target parameters for each repetition (shape (``n_thetas``, ``n_rep``)).
         """
-        return self._all_thetas
+        return self._dml_core.all_thetas
 
     @property
     def ses(self):
         """
         Estimated standard errors (shape (``n_thetas``,)).
         """
-        return self._ses
+        return self._dml_core.ses
 
     @property
     def all_ses(self):
         """
         Estimated standard errors for each repetition (shape (``n_thetas``, ``n_rep``)).
         """
-        return self._all_ses
+        return self._dml_core.all_ses
 
     @property
     def t_stats(self):
         """
         t-statistics for the causal parameter(s) (shape (``n_thetas``,)).
         """
-        return self._thetas / self._ses
+        return self.thetas / self.ses
 
     @property
     def all_t_stats(self):
         """
         t-statistics for the causal parameter(s) for each repetition (shape (``n_thetas``, ``n_rep``)).
         """
-        return self._all_thetas / self._all_ses
+        return self.all_thetas / self.all_ses
 
     @property
     def pvals(self):
@@ -320,14 +296,28 @@ def scaled_psi(self):
         """
         Normalized scores (shape (``n_obs``, ``n_thetas``, ``n_rep``)).
         """
-        return self._scaled_psi
+        return self._dml_core.scaled_psi
 
     @property
     def var_scaling_factors(self):
         """
         Variance scaling factors (shape (``n_thetas``,)).
         """
-        return self._var_scaling_factors
+        return self._dml_core.var_scaling_factors
+
+    @property
+    def is_cluster_data(self):
+        """
+        Whether the data is clustered.
+        """
+        return self._dml_core.is_cluster_data
+
+    @property
+    def cluster_dict(self):
+        """
+        Clustering information (if available).
+        """
+        return self._dml_core.cluster_dict
 
     @property
     def n_rep_boot(self):
@@ -359,7 +349,7 @@ def sensitivity_elements(self):
          ``psi_max_bias`` (shape (``n_obs``, ``n_thetas``, ``n_rep``)).
         Optionally, additional entries ``sigma2`` and ``nu2``(shape (``1``, ``n_thetas``, ``n_rep``)) are available.
         """
-        return self._sensitivity_elements
+        return self._dml_core.sensitivity_elements
 
     @property
     def sensitivity_params(self):
@@ -388,7 +378,7 @@ def summary(self):
         A summary for the estimated causal parameters ``thetas``.
         """
         ci = self.confint()
-        df_summary = generate_summary(self.thetas, self.ses, self.t_stats, self.pvals, ci, self._treatment_names)
+        df_summary = generate_summary(self.thetas, self.ses, self.t_stats, self.pvals, ci, self.treatment_names)
         return df_summary
 
     @property
@@ -455,16 +445,14 @@ def sensitivity_summary(self):
     def __add__(self, other):
         if isinstance(other, DoubleMLFramework):
             # internal consistency check
-            self._check_framework_shapes()
-            other._check_framework_shapes()
             _check_framework_compatibility(self, other, check_treatments=True)
 
-            all_thetas = self._all_thetas + other._all_thetas
-            scaled_psi = self._scaled_psi + other._scaled_psi
+            all_thetas = self.all_thetas + other.all_thetas
+            scaled_psi = self.scaled_psi + other.scaled_psi
 
             # check if var_scaling_factors are the same
-            assert np.allclose(self._var_scaling_factors, other._var_scaling_factors)
-            var_scaling_factors = self._var_scaling_factors
+            assert np.allclose(self.var_scaling_factors, other.var_scaling_factors)
+            var_scaling_factors = self.var_scaling_factors
 
             # compute standard errors (Uses factor 1/n for scaling!)
             sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
@@ -478,20 +466,21 @@ def __add__(self, other):
                 "all_ses": all_ses,
                 "var_scaling_factors": var_scaling_factors,
                 "scaled_psi": scaled_psi,
-                "is_cluster_data": self._is_cluster_data,
-                "cluster_dict": self._cluster_dict,
+                "is_cluster_data": self.is_cluster_data,
+                "cluster_dict": self.cluster_dict,
             }
 
             if self._sensitivity_implemented and other._sensitivity_implemented:
-                max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"]
-                psi_max_bias = self._sensitivity_elements["psi_max_bias"] + other._sensitivity_elements["psi_max_bias"]
+                max_bias = self.sensitivity_elements["max_bias"] + other.sensitivity_elements["max_bias"]
+                psi_max_bias = self.sensitivity_elements["psi_max_bias"] + other.sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,
                 }
                 doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
-            new_obj = DoubleMLFramework(doubleml_dict)
+            dml_core = DoubleMLCore(**doubleml_dict)
+            new_obj = DoubleMLFramework(dml_core)
         else:
             raise TypeError(f"Unsupported operand type: {type(other)}")
 
@@ -503,16 +492,14 @@ def __radd__(self, other):
     def __sub__(self, other):
         if isinstance(other, DoubleMLFramework):
             # internal consistency check
-            self._check_framework_shapes()
-            other._check_framework_shapes()
             _check_framework_compatibility(self, other, check_treatments=True)
 
-            all_thetas = self._all_thetas - other._all_thetas
-            scaled_psi = self._scaled_psi - other._scaled_psi
+            all_thetas = self.all_thetas - other.all_thetas
+            scaled_psi = self.scaled_psi - other.scaled_psi
 
             # check if var_scaling_factors are the same
-            assert np.allclose(self._var_scaling_factors, other._var_scaling_factors)
-            var_scaling_factors = self._var_scaling_factors
+            assert np.allclose(self.var_scaling_factors, other.var_scaling_factors)
+            var_scaling_factors = self.var_scaling_factors
 
             # compute standard errors
             sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
@@ -526,22 +513,23 @@ def __sub__(self, other):
                 "all_ses": all_ses,
                 "var_scaling_factors": var_scaling_factors,
                 "scaled_psi": scaled_psi,
-                "is_cluster_data": self._is_cluster_data,
-                "cluster_dict": self._cluster_dict,
+                "is_cluster_data": self.is_cluster_data,
+                "cluster_dict": self.cluster_dict,
             }
 
             # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
             if self._sensitivity_implemented and other._sensitivity_implemented:
 
-                max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"]
-                psi_max_bias = self._sensitivity_elements["psi_max_bias"] + other._sensitivity_elements["psi_max_bias"]
+                max_bias = self.sensitivity_elements["max_bias"] + other.sensitivity_elements["max_bias"]
+                psi_max_bias = self.sensitivity_elements["psi_max_bias"] + other.sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,
                 }
                 doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
-            new_obj = DoubleMLFramework(doubleml_dict)
+            dml_core = DoubleMLCore(**doubleml_dict)
+            new_obj = DoubleMLFramework(dml_core=dml_core)
         else:
             raise TypeError(f"Unsupported operand type: {type(other)}")
 
@@ -553,13 +541,13 @@ def __rsub__(self, other):
     # TODO: Restrict to linear?
     def __mul__(self, other):
         if isinstance(other, (int, float)):
-            thetas = np.multiply(other, self._thetas)
-            all_thetas = np.multiply(other, self._all_thetas)
+            thetas = np.multiply(other, self.thetas)
+            all_thetas = np.multiply(other, self.all_thetas)
 
-            var_scaling_factors = self._var_scaling_factors
-            ses = np.multiply(other, self._ses)
-            all_ses = np.multiply(other, self._all_ses)
-            scaled_psi = np.multiply(other, self._scaled_psi)
+            var_scaling_factors = self.var_scaling_factors
+            ses = np.multiply(other, self.ses)
+            all_ses = np.multiply(other, self.all_ses)
+            scaled_psi = np.multiply(other, self.scaled_psi)
 
             doubleml_dict = {
                 "thetas": thetas,
@@ -568,15 +556,15 @@ def __mul__(self, other):
                 "all_ses": all_ses,
                 "var_scaling_factors": var_scaling_factors,
                 "scaled_psi": scaled_psi,
-                "is_cluster_data": self._is_cluster_data,
-                "cluster_dict": self._cluster_dict,
+                "is_cluster_data": self.is_cluster_data,
+                "cluster_dict": self.cluster_dict,
             }
 
             # sensitivity combination only available for linear models
             if self._sensitivity_implemented:
 
-                max_bias = abs(other) * self._sensitivity_elements["max_bias"]
-                psi_max_bias = abs(other) * self._sensitivity_elements["psi_max_bias"]
+                max_bias = abs(other) * self.sensitivity_elements["max_bias"]
+                psi_max_bias = abs(other) * self.sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,
@@ -584,13 +572,14 @@ def __mul__(self, other):
                 if self._benchmark_available:
                     sensitivity_elements.update(
                         {
-                            "sigma2": self._sensitivity_elements["sigma2"],
-                            "nu2": np.multiply(np.square(other), self._sensitivity_elements["nu2"]),
+                            "sigma2": self.sensitivity_elements["sigma2"],
+                            "nu2": np.multiply(np.square(other), self.sensitivity_elements["nu2"]),
                         }
                     )
                 doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
-            new_obj = DoubleMLFramework(doubleml_dict)
+            dml_core = DoubleMLCore(**doubleml_dict)
+            new_obj = DoubleMLFramework(dml_core=dml_core)
         else:
             raise TypeError(f"Unsupported operand type: {type(other)}")
 
@@ -612,7 +601,7 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         _check_in_zero_one(level, "The confidence level", include_zero=False, include_one=False)
 
         # set elements for readability
-        psi_scaled = self._scaled_psi
+        psi_scaled = self.scaled_psi
         max_bias = self.sensitivity_elements["max_bias"]
         psi_max_bias = self.sensitivity_elements["psi_max_bias"]
 
@@ -632,22 +621,22 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
 
         for i_rep in range(self.n_rep):
             for i_theta in range(self.n_thetas):
-                if not self._is_cluster_data:
+                if not self.is_cluster_data:
                     smpls = None
                     cluster_vars = None
                     smpls_cluster = None
                     n_folds_per_cluster = None
                 else:
-                    smpls = self._cluster_dict["smpls"][i_rep]
-                    cluster_vars = self._cluster_dict["cluster_vars"]
-                    smpls_cluster = self._cluster_dict["smpls_cluster"][i_rep]
-                    n_folds_per_cluster = self._cluster_dict["n_folds_per_cluster"]
+                    smpls = self.cluster_dict["smpls"][i_rep]
+                    cluster_vars = self.cluster_dict["cluster_vars"]
+                    smpls_cluster = self.cluster_dict["smpls_cluster"][i_rep]
+                    n_folds_per_cluster = self.cluster_dict["n_folds_per_cluster"]
 
                 sigma2_lower_hat, _ = _var_est(
                     psi=psi_lower[:, i_theta, i_rep],
                     psi_deriv=np.ones_like(psi_lower[:, i_theta, i_rep]),
                     smpls=smpls,
-                    is_cluster_data=self._is_cluster_data,
+                    is_cluster_data=self.is_cluster_data,
                     cluster_vars=cluster_vars,
                     smpls_cluster=smpls_cluster,
                     n_folds_per_cluster=n_folds_per_cluster,
@@ -656,7 +645,7 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
                     psi=psi_upper[:, i_theta, i_rep],
                     psi_deriv=np.ones_like(psi_upper[:, i_theta, i_rep]),
                     smpls=smpls,
-                    is_cluster_data=self._is_cluster_data,
+                    is_cluster_data=self.is_cluster_data,
                     cluster_vars=cluster_vars,
                     smpls_cluster=smpls_cluster,
                     n_folds_per_cluster=n_folds_per_cluster,
@@ -689,7 +678,7 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
 
     def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
-        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas - 1)
+        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self.n_thetas - 1)
 
         # check which side is relvant
         bound = "upper" if (null_hypothesis > self.thetas[idx_treatment]) else "lower"
@@ -745,14 +734,14 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         """
         # check null_hypothesis
         if isinstance(null_hypothesis, float):
-            null_hypothesis_vec = np.full(shape=self._n_thetas, fill_value=null_hypothesis)
+            null_hypothesis_vec = np.full(shape=self.n_thetas, fill_value=null_hypothesis)
         elif isinstance(null_hypothesis, np.ndarray):
-            if null_hypothesis.shape == (self._n_thetas,):
+            if null_hypothesis.shape == (self.n_thetas,):
                 null_hypothesis_vec = null_hypothesis
             else:
                 raise ValueError(
                     "null_hypothesis is numpy.ndarray but does not have the required "
-                    f"shape ({self._n_thetas},). "
+                    f"shape ({self.n_thetas},). "
                     f"Array of shape {str(null_hypothesis.shape)} was passed."
                 )
         else:
@@ -765,10 +754,10 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
 
         # compute robustess values with respect to null_hypothesis
-        rv = np.full(shape=self._n_thetas, fill_value=np.nan)
-        rva = np.full(shape=self._n_thetas, fill_value=np.nan)
+        rv = np.full(shape=self.n_thetas, fill_value=np.nan)
+        rva = np.full(shape=self.n_thetas, fill_value=np.nan)
 
-        for i_theta in range(self._n_thetas):
+        for i_theta in range(self.n_thetas):
             rv[i_theta], rva[i_theta] = self._calc_robustness_value(
                 null_hypothesis=null_hypothesis_vec[i_theta], level=level, rho=rho, idx_treatment=i_theta
             )
@@ -821,7 +810,7 @@ def confint(self, joint=False, level=0.95):
             max_abs_t_value_distribution = np.amax(np.abs(self._boot_t_stat), axis=1)
             critical_values = np.quantile(a=max_abs_t_value_distribution, q=level, axis=0)
         else:
-            critical_values = np.repeat(norm.ppf(percentages[1]), self._n_rep)
+            critical_values = np.repeat(norm.ppf(percentages[1]), self.n_rep)
 
         # compute all cis over repetitions (shape: n_thetas x 2 x n_rep)
         self._all_cis = np.stack(
@@ -854,17 +843,17 @@ def bootstrap(self, method="normal", n_rep_boot=500):
         """
 
         _check_bootstrap(method, n_rep_boot)
-        if self._is_cluster_data:
+        if self.is_cluster_data:
             raise NotImplementedError("bootstrap not yet implemented with clustering.")
 
         self._n_rep_boot = n_rep_boot
         self._boot_method = method
         # initialize bootstrap distribution array
-        self._boot_t_stat = np.full((n_rep_boot, self.n_thetas, self._n_rep), np.nan)
-        var_scaling = self._var_scaling_factors.reshape(-1, 1) * self._all_ses
+        self._boot_t_stat = np.full((n_rep_boot, self.n_thetas, self.n_rep), np.nan)
+        var_scaling = self.var_scaling_factors.reshape(-1, 1) * self.all_ses
         for i_rep in range(self.n_rep):
-            weights = _draw_weights(method, n_rep_boot, self._n_obs)
-            bootstraped_scaled_psi = np.matmul(weights, np.divide(self._scaled_psi[:, :, i_rep], var_scaling[:, i_rep]))
+            weights = _draw_weights(method, n_rep_boot, self.n_obs)
+            bootstraped_scaled_psi = np.matmul(weights, np.divide(self.scaled_psi[:, :, i_rep], var_scaling[:, i_rep]))
             self._boot_t_stat[:, :, i_rep] = bootstraped_scaled_psi
 
         return self
@@ -1076,137 +1065,6 @@ def sensitivity_plot(
         )
         return fig
 
-    def _check_and_set_cluster_data(self, doubleml_dict):
-        self._cluster_dict = None
-
-        if "is_cluster_data" in doubleml_dict.keys():
-            _check_bool(doubleml_dict["is_cluster_data"], "is_cluster_data")
-            self._is_cluster_data = doubleml_dict["is_cluster_data"]
-
-        if self._is_cluster_data:
-            if "cluster_dict" not in doubleml_dict.keys():
-                raise ValueError("If is_cluster_data is True, cluster_dict must be provided.")
-
-            if not isinstance(doubleml_dict["cluster_dict"], dict):
-                raise TypeError("cluster_dict must be a dictionary.")
-
-            expected_keys_cluster = ["smpls", "smpls_cluster", "cluster_vars", "n_folds_per_cluster"]
-            if not all(key in doubleml_dict["cluster_dict"].keys() for key in expected_keys_cluster):
-                raise ValueError(
-                    "The cluster_dict must contain the following keys: "
-                    + ", ".join(expected_keys_cluster)
-                    + ". Got: "
-                    + ", ".join(doubleml_dict["cluster_dict"].keys())
-                    + "."
-                )
-
-            self._cluster_dict = doubleml_dict["cluster_dict"]
-
-        return
-
-    def _check_and_set_sensitivity_elements(self, doubleml_dict):
-        if "sensitivity_elements" not in doubleml_dict.keys():
-            sensitivity_implemented = False
-            sensitivity_elements = None
-            benchmark_available = False
-
-        else:
-            if not isinstance(doubleml_dict["sensitivity_elements"], dict):
-                raise TypeError("sensitivity_elements must be a dictionary.")
-
-            expected_keys_sensitivity = ["max_bias", "psi_max_bias"]
-            if not all(key in doubleml_dict["sensitivity_elements"].keys() for key in expected_keys_sensitivity):
-                raise ValueError(
-                    "The sensitivity_elements dict must contain the following keys: " + ", ".join(expected_keys_sensitivity)
-                )
-
-            for key in expected_keys_sensitivity:
-                if not isinstance(doubleml_dict["sensitivity_elements"][key], np.ndarray):
-                    raise TypeError(f"The sensitivity element {key} must be a numpy array.")
-
-            # set sensitivity elements
-            sensitivity_implemented = True
-            sensitivity_elements = {key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_sensitivity}
-
-            # check if benchmarks are available and update sensitivity elements
-            benchmark_available, sensitivity_elements_benchmark = self._check_sensitivity_benchmark(doubleml_dict)
-            sensitivity_elements.update(sensitivity_elements_benchmark)
-
-        # set attributes
-        self._sensitivity_implemented = sensitivity_implemented
-        self._sensitivity_elements = sensitivity_elements
-        self._benchmark_available = benchmark_available
-        self._sensitivity_params = None
-
-        return
-
-    def _check_sensitivity_benchmark(self, doubleml_dict):
-        # check if benchmarks are available
-        expected_keys_benchmark = ["sigma2", "nu2"]
-        benchmark_available = all(key in doubleml_dict["sensitivity_elements"] for key in expected_keys_benchmark)
-        if benchmark_available:
-            # type checks
-            for key in expected_keys_benchmark:
-                if not isinstance(doubleml_dict["sensitivity_elements"][key], np.ndarray):
-                    raise TypeError(f"The sensitivity element {key} must be a numpy array.")
-
-            # additional constraints
-            if (np.any(doubleml_dict["sensitivity_elements"]["sigma2"] < 0)) | (
-                np.any(doubleml_dict["sensitivity_elements"]["nu2"] < 0)
-            ):
-                raise ValueError(
-                    "sensitivity_elements sigma2 and nu2 have to be positive. "
-                    f"Got sigma2 {str(doubleml_dict['sensitivity_elements']['sigma2'])} "
-                    f"and nu2 {str(doubleml_dict['sensitivity_elements']['nu2'])}. "
-                    "Most likely this is due to low quality learners (especially propensity scores)."
-                )
-
-            sensitivity_elements_benchmark = {
-                key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_benchmark
-            }
-        else:
-            sensitivity_elements_benchmark = {}
-
-        return benchmark_available, sensitivity_elements_benchmark
-
-    def _check_framework_shapes(self):
-        expected_shapes = {
-            "thetas": (self._n_thetas,),
-            "ses": (self._n_thetas,),
-            "all_thetas": (self._n_thetas, self._n_rep),
-            "all_ses": (self._n_thetas, self._n_rep),
-            "var_scaling_factors": (self._n_thetas,),
-            "scaled_psi": (self._n_obs, self._n_thetas, self.n_rep),
-        }
-
-        for attr, expected_shape in expected_shapes.items():
-            actual_shape = getattr(self, f"_{attr}").shape
-            if actual_shape != expected_shape:
-                raise ValueError(f"The shape of {attr} does not match the expected shape {expected_shape}.")
-
-        if self._sensitivity_implemented:
-            self._check_sensitivity_elements_shapes()
-
-        return None
-
-    def _check_sensitivity_elements_shapes(self):
-        expected_sensitivity_shapes = {
-            "max_bias": (1, self._n_thetas, self.n_rep),
-            "psi_max_bias": (self._n_obs, self._n_thetas, self.n_rep),
-        }
-
-        if self._benchmark_available:
-            expected_sensitivity_shapes.update(
-                {"sigma2": (1, self._n_thetas, self.n_rep), "nu2": (1, self._n_thetas, self.n_rep)}
-            )
-
-        for key, expected_shape in expected_sensitivity_shapes.items():
-            actual_shape = self._sensitivity_elements[key].shape
-            if actual_shape != expected_shape:
-                raise ValueError(f"The shape of {key} does not match the expected shape {expected_shape}.")
-
-        return None
-
     def _check_treatment_names(self, treatment_names):
         if not isinstance(treatment_names, list):
             raise TypeError(
@@ -1217,10 +1075,10 @@ def _check_treatment_names(self, treatment_names):
             raise TypeError(
                 f"treatment_names must be a list of strings. At least one element is not a string: {str(treatment_names)}."
             )
-        if len(treatment_names) != self._n_thetas:
+        if len(treatment_names) != self.n_thetas:
             raise ValueError(
                 "The length of treatment_names does not match the number of treatments. "
-                f"Got {self._n_thetas} treatments and {len(treatment_names)} treatment names."
+                f"Got {self.n_thetas} treatments and {len(treatment_names)} treatment names."
             )
         return None
 
@@ -1235,20 +1093,18 @@ def concat(objs):
     if not all(isinstance(obj, DoubleMLFramework) for obj in objs):
         raise TypeError("All objects must be of type DoubleMLFramework.")
 
-    # check on internal consitency of objects
-    _ = [obj._check_framework_shapes() for obj in objs]
     # check if all objects are compatible in n_obs and n_rep
     _ = [_check_framework_compatibility(objs[0], obj, check_treatments=False) for obj in objs[1:]]
 
     all_thetas = np.concatenate([obj.all_thetas for obj in objs], axis=0)
     all_ses = np.concatenate([obj.all_ses for obj in objs], axis=0)
-    var_scaling_factors = np.concatenate([obj._var_scaling_factors for obj in objs], axis=0)
-    scaled_psi = np.concatenate([obj._scaled_psi for obj in objs], axis=1)
+    var_scaling_factors = np.concatenate([obj.var_scaling_factors for obj in objs], axis=0)
+    scaled_psi = np.concatenate([obj.scaled_psi for obj in objs], axis=1)
 
     thetas = np.concatenate([obj.thetas for obj in objs], axis=0)
     ses = np.concatenate([obj.ses for obj in objs], axis=0)
 
-    if any(obj._is_cluster_data for obj in objs):
+    if any(obj.is_cluster_data for obj in objs):
         raise NotImplementedError("concat not yet implemented with clustering.")
     else:
         is_cluster_data = False
@@ -1266,19 +1122,17 @@ def concat(objs):
     if all(obj._sensitivity_implemented for obj in objs):
         sensitivity_elements = {}
         for key in ["max_bias", "psi_max_bias"]:
-            assert all(key in obj._sensitivity_elements.keys() for obj in objs)
-            sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
+            assert all(key in obj.sensitivity_elements.keys() for obj in objs)
+            sensitivity_elements[key] = np.concatenate([obj.sensitivity_elements[key] for obj in objs], axis=1)
 
         if all(obj._benchmark_available for obj in objs):
             for key in ["sigma2", "nu2"]:
-                assert all(key in obj._sensitivity_elements.keys() for obj in objs)
-                sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
+                assert all(key in obj.sensitivity_elements.keys() for obj in objs)
+                sensitivity_elements[key] = np.concatenate([obj.sensitivity_elements[key] for obj in objs], axis=1)
 
         doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
-    new_obj = DoubleMLFramework(doubleml_dict)
-
-    # check internal consistency of new object
-    new_obj._check_framework_shapes()
+    dml_core = DoubleMLCore(**doubleml_dict)
+    new_obj = DoubleMLFramework(dml_core=dml_core)
 
     return new_obj
diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 23e7085e8..d6713a384 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -10,7 +10,7 @@
 
 from doubleml.data import DoubleMLData
 from doubleml.double_ml import DoubleML
-from doubleml.double_ml_framework import concat
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat
 from doubleml.double_ml_sampling_mixins import SampleSplittingMixin
 from doubleml.irm.apo import DoubleMLAPO
 from doubleml.utils._checks import _check_score, _check_weights
@@ -709,12 +709,23 @@ def causal_contrast(self, reference_levels):
                 if i in skip_index:
                     continue
 
-                current_framework = model.framework - ref_model.framework
+                diff_framework = model.framework - ref_model.framework
                 current_treatment_name = f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}"
 
                 # update sensitivity elements with sharper bounds
                 current_sensitivity_dict = self._compute_causal_contrast_sensitivity_dict(model=model, ref_model=ref_model)
-                current_framework._check_and_set_sensitivity_elements(current_sensitivity_dict)
+                updated_dml_core = DoubleMLCore(
+                    thetas=diff_framework.thetas,
+                    ses=diff_framework.ses,
+                    all_thetas=diff_framework.all_thetas,
+                    all_ses=diff_framework.all_ses,
+                    var_scaling_factors=diff_framework.var_scaling_factors,
+                    scaled_psi=diff_framework.scaled_psi,
+                    is_cluster_data=diff_framework.is_cluster_data,
+                    cluster_dict=diff_framework.cluster_dict,
+                    sensitivity_elements=current_sensitivity_dict["sensitivity_elements"],
+                )
+                current_framework = DoubleMLFramework(updated_dml_core, treatment_names=[current_treatment_name])
 
                 all_acc_frameworks += [current_framework]
                 all_treatment_names += [current_treatment_name]
diff --git a/doubleml/tests/test_core_exceptions.py b/doubleml/tests/test_core_exceptions.py
index ddd615231..835108dc8 100644
--- a/doubleml/tests/test_core_exceptions.py
+++ b/doubleml/tests/test_core_exceptions.py
@@ -84,14 +84,14 @@ def test_cluster_dict_exceptions():
     type_cases = [
         ("smpls", "not_a_list", "cluster_dict\\['smpls'\\] must be a list."),
         ("smpls_cluster", "not_a_list", "cluster_dict\\['smpls_cluster'\\] must be a list."),
-        ("cluster_vars", "not_a_list", "cluster_dict\\['cluster_vars'\\] must be a list."),
+        ("cluster_vars", "not_a_list", "cluster_dict\\['cluster_vars'\\] must be a numpy.ndarray."),
         ("n_folds_per_cluster", "not_an_int", "cluster_dict\\['n_folds_per_cluster'\\] must be an int."),
     ]
     for key, bad_value, msg in type_cases:
         cluster_dict = {
             "smpls": [],
             "smpls_cluster": [],
-            "cluster_vars": [],
+            "cluster_vars": np.array([]),
             "n_folds_per_cluster": 1,
         }
         cluster_dict[key] = bad_value
@@ -168,7 +168,7 @@ def test_sensitivity_elements_exceptions():
             key: -np.ones((1, n_thetas, n_rep)),
         }
         bad_kwargs["sensitivity_elements"] = sens
-        with pytest.raises(ValueError, match=rf"sensitivity_elements\['{key}'\] must be positive."):
+        with pytest.raises(ValueError, match=rf"sensitivity_elements\['{key}'\] must be positive.*"):
             DoubleMLCore(**bad_kwargs)
 
     # sigma2 and nu2 wrong shape
@@ -184,23 +184,3 @@ def test_sensitivity_elements_exceptions():
             ValueError, match=rf"sensitivity_elements\['{key}'\] shape \(2, 2, 5\) does not match expected \(1, 2, 5\)\."
         ):
             DoubleMLCore(**bad_kwargs)
-
-
-@pytest.mark.ci
-def test_treatment_names_exceptions():
-    kwargs = valid_core_kwargs()
-
-    bad_kwargs = kwargs.copy()
-    bad_kwargs["treatment_names"] = "not_a_list"
-    with pytest.raises(TypeError, match="treatment_names must be a list of strings."):
-        DoubleMLCore(**bad_kwargs)
-
-    bad_kwargs = kwargs.copy()
-    bad_kwargs["treatment_names"] = [1, 2]
-    with pytest.raises(TypeError, match="treatment_names must be a list of strings."):
-        DoubleMLCore(**bad_kwargs)
-
-    bad_kwargs = kwargs.copy()
-    bad_kwargs["treatment_names"] = ["treat1"]
-    with pytest.raises(ValueError, match=r"Length of treatment_names \(1\) does not match n_thetas \(2\)\."):
-        DoubleMLCore(**bad_kwargs)
diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 13222664f..babd05ef0 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -3,7 +3,7 @@
 import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
-from doubleml.double_ml_framework import DoubleMLFramework, concat
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat
 from doubleml.irm.datasets import make_irm_data
 from doubleml.irm.irm import DoubleMLIRM
 
@@ -28,7 +28,8 @@ def dml_framework_fixture(n_rep, n_thetas):
     psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
     psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
     doubleml_dict = generate_dml_dict(psi_a, psi_b)
-    dml_framework_obj = DoubleMLFramework(doubleml_dict)
+    dml_core = DoubleMLCore(**doubleml_dict)
+    dml_framework_obj = DoubleMLFramework(dml_core=dml_core)
 
     ci = dml_framework_obj.confint(joint=False, level=0.95)
     dml_framework_obj.bootstrap(method="normal")
@@ -44,7 +45,8 @@ def dml_framework_fixture(n_rep, n_thetas):
     psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep))
     psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep)) + 1.0
     doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-    dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+    dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+    dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
     dml_framework_obj_sub_obj = dml_framework_obj - dml_framework_obj_2
     ci_sub_obj = dml_framework_obj_sub_obj.confint(joint=False, level=0.95)
     dml_framework_obj_sub_obj.bootstrap(method="normal")
diff --git a/doubleml/tests/test_framework_coverage.py b/doubleml/tests/test_framework_coverage.py
index 03625cef2..253f736fb 100644
--- a/doubleml/tests/test_framework_coverage.py
+++ b/doubleml/tests/test_framework_coverage.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from doubleml.double_ml_framework import DoubleMLFramework, concat
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat
 
 from ._utils import generate_dml_dict
 
@@ -56,8 +56,10 @@ def test_dml_framework_coverage_fixture(n_rep, n_thetas):
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
 
         # combine objects and estimate parameters
-        dml_framework_obj_1 = DoubleMLFramework(doubleml_dict)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_1 = DoubleMLCore(**doubleml_dict)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_1 = DoubleMLFramework(dml_core=dml_core_1)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
 
         true_thetas = np.vstack((np.repeat(0.0, n_thetas), np.repeat(-1.0, n_thetas))).transpose()
         ci = dml_framework_obj_1.confint(joint=False, level=0.95)
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index f562f98d4..8d07b29b3 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from doubleml.double_ml_framework import DoubleMLFramework, concat
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework, concat
 
 from ._utils import generate_dml_dict
 
@@ -12,6 +12,7 @@
 n_rep = 5
 
 # generate score samples
+np.random.seed(42)
 psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
 psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
 doubleml_dict = generate_dml_dict(psi_a, psi_b)
@@ -23,148 +24,35 @@
     "nu2": np.ones(shape=(1, n_thetas, n_rep)),
 }
 
-# combine objects and estimate parameters
-dml_framework_obj_1 = DoubleMLFramework(doubleml_dict)
+dml_core = DoubleMLCore(**doubleml_dict)
+dml_framework_obj_1 = DoubleMLFramework(dml_core)
 
 
 @pytest.mark.ci
 def test_input_exceptions():
-    msg = r"The dict must contain the following keys: thetas, ses, all_thetas, all_ses, var_scaling_factors, scaled_psi"
-    with pytest.raises(ValueError, match=msg):
-        test_dict = {}
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of thetas does not match the expected shape \(2,\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["thetas"] = np.ones(shape=(1,))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of ses does not match the expected shape \(2,\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["ses"] = np.ones(shape=(1,))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of all_thetas does not match the expected shape \(2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["all_thetas"] = np.ones(shape=(1, 5))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of all_ses does not match the expected shape \(2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["all_ses"] = np.ones(shape=(1, 5))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of var_scaling_factors does not match the expected shape \(2,\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["var_scaling_factors"] = np.ones(shape=(1, 5))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of scaled_psi does not match the expected shape \(10, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["scaled_psi"] = np.ones(shape=(10, 2, 5, 3))
-        DoubleMLFramework(test_dict)
-
-    msg = "doubleml_dict must be a dictionary."
+    msg = "dml_core must be a DoubleMLCore instance."
     with pytest.raises(TypeError, match=msg):
         DoubleMLFramework(1.0)
 
-    msg = "sensitivity_elements must be a dictionary."
-    with pytest.raises(TypeError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["sensitivity_elements"] = 1
-        DoubleMLFramework(test_dict)
-
-    msg = "The sensitivity_elements dict must contain the following keys: max_bias, psi_max_bias"
-    with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict["sensitivity_elements"] = {"sensitivities": np.ones(shape=(n_obs, n_thetas, n_rep))}
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of max_bias does not match the expected shape \(1, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["max_bias"] = np.ones(shape=(n_obs, n_rep))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of psi_max_bias does not match the expected shape \(10, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["psi_max_bias"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of sigma2 does not match the expected shape \(1, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["sigma2"] = np.ones(shape=(n_obs, n_thetas, n_rep))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of nu2 does not match the expected shape \(1, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["nu2"] = np.ones(shape=(n_obs, n_thetas, n_rep))
-        DoubleMLFramework(test_dict)
-
-    msg = "is_cluster_data has to be boolean. 1.0 of type <class 'float'> was passed."
-    with pytest.raises(TypeError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["is_cluster_data"] = 1.0
-        DoubleMLFramework(test_dict)
-
-    msg = "If is_cluster_data is True, cluster_dict must be provided."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["is_cluster_data"] = True
-        DoubleMLFramework(test_dict)
-
-    msg = "cluster_dict must be a dictionary."
-    with pytest.raises(TypeError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["is_cluster_data"] = True
-        test_dict["cluster_dict"] = 1.0
-        DoubleMLFramework(test_dict)
-
-    msg = (
-        "The cluster_dict must contain the following keys: smpls, smpls_cluster,"
-        " cluster_vars, n_folds_per_cluster. Got: cluster_ids."
-    )
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["is_cluster_data"] = True
-        test_dict["cluster_dict"] = {"cluster_ids": np.ones(shape=(n_obs, n_rep))}
-        DoubleMLFramework(test_dict)
-
-    test_dict = copy.deepcopy(doubleml_dict)
-    framework_names = DoubleMLFramework(test_dict)
+    test_framework = DoubleMLFramework(dml_core)
 
     msg = "treatment_names must be a list. Got 1 of type <class 'int'>."
     with pytest.raises(TypeError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["treatment_names"] = 1
-        DoubleMLFramework(test_dict)
+        DoubleMLFramework(dml_core, treatment_names=1)
     with pytest.raises(TypeError, match=msg):
-        framework_names.treatment_names = 1
+        test_framework.treatment_names = 1
 
     msg = r"treatment_names must be a list of strings. At least one element is not a string: \['test', 1\]."
     with pytest.raises(TypeError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["treatment_names"] = ["test", 1]
-        DoubleMLFramework(test_dict)
+        DoubleMLFramework(dml_core, treatment_names=["test", 1])
     with pytest.raises(TypeError, match=msg):
-        framework_names.treatment_names = ["test", 1]
+        test_framework.treatment_names = ["test", 1]
 
     msg = "The length of treatment_names does not match the number of treatments. Got 2 treatments and 3 treatment names."
     with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["treatment_names"] = ["test", "test2", "test3"]
-        DoubleMLFramework(test_dict)
+        DoubleMLFramework(dml_core, treatment_names=["test", "test2", "test3"])
     with pytest.raises(ValueError, match=msg):
-        framework_names.treatment_names = ["test", "test2", "test3"]
+        test_framework.treatment_names = ["test", "test2", "test3"]
 
 
 def test_operation_exceptions():
@@ -179,21 +67,24 @@ def test_operation_exceptions():
         psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = dml_framework_obj_1 + dml_framework_obj_2
     msg = "The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas + 1, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas + 1, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = dml_framework_obj_1 + dml_framework_obj_2
     msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = dml_framework_obj_1 + dml_framework_obj_2
 
     # subtraction
@@ -207,21 +98,24 @@ def test_operation_exceptions():
         psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = dml_framework_obj_1 - dml_framework_obj_2
     msg = "The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas + 1, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas + 1, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = dml_framework_obj_1 - dml_framework_obj_2
     msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = dml_framework_obj_1 - dml_framework_obj_2
 
     # multiplication
@@ -243,27 +137,24 @@ def test_operation_exceptions():
         psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = concat([dml_framework_obj_1, dml_framework_obj_2])
     msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
-        dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
+        dml_core_2 = DoubleMLCore(**doubleml_dict_2)
+        dml_framework_obj_2 = DoubleMLFramework(dml_core=dml_core_2)
         _ = concat([dml_framework_obj_1, dml_framework_obj_2])
 
     msg = "concat not yet implemented with clustering."
     with pytest.raises(NotImplementedError, match=msg):
         doubleml_dict_cluster = generate_dml_dict(psi_a_2, psi_b_2)
-        doubleml_dict_cluster["is_cluster_data"] = True
-        doubleml_dict_cluster["cluster_dict"] = {
-            "smpls": np.ones(shape=(n_obs, n_rep)),
-            "smpls_cluster": np.ones(shape=(n_obs, n_rep)),
-            "cluster_vars": np.ones(shape=(n_obs, n_rep)),
-            "n_folds_per_cluster": 2,
-        }
-        dml_framework_obj_cluster = DoubleMLFramework(doubleml_dict_cluster)
+        dml_core_cluster = DoubleMLCore(**doubleml_dict_cluster)
+        dml_core_cluster.is_cluster_data = True
+        dml_framework_obj_cluster = DoubleMLFramework(dml_core_cluster)
         _ = concat([dml_framework_obj_cluster, dml_framework_obj_cluster])
 
     # cluster compatibility
@@ -285,7 +176,10 @@ def test_p_adjust_exceptions():
 
 @pytest.mark.ci
 def test_sensitivity_exceptions():
-    dml_framework_no_sensitivity = DoubleMLFramework(generate_dml_dict(psi_a, psi_b))
+    dml_no_sensitivity_dict = copy.deepcopy(doubleml_dict)
+    dml_no_sensitivity_dict.pop("sensitivity_elements")
+    dml_core_no_sensitivity = DoubleMLCore(**dml_no_sensitivity_dict)
+    dml_framework_no_sensitivity = DoubleMLFramework(dml_core_no_sensitivity)
     msg = "Sensitivity analysis is not implemented for this model."
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_framework_no_sensitivity._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.1, rho=1.0, level=0.95)
@@ -394,47 +288,10 @@ def test_sensitivity_exceptions():
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.sensitivity_plot(idx_treatment=2)
 
-    # test benchmark sensitivity elements
-    sensitivity_dict_benchmark = generate_dml_dict(psi_a, psi_b)
-    sensitivity_dict_benchmark["sensitivity_elements"] = {
-        "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
-        "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
-        "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
-        "nu2": 5.0,
-    }
-    msg = "The sensitivity element nu2 must be a numpy array."
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLFramework(sensitivity_dict_benchmark)
-
-    sensitivity_dict_benchmark["sensitivity_elements"].update(
-        {
-            "sigma2": 5.0,
-            "nu2": np.ones(shape=(1, n_thetas, n_rep)),
-        }
-    )
-    msg = "The sensitivity element sigma2 must be a numpy array."
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLFramework(sensitivity_dict_benchmark)
-
-    sensitivity_dict_benchmark["sensitivity_elements"].update(
-        {
-            "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
-            "nu2": -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
-        }
-    )
-    msg = (
-        r"sensitivity_elements sigma2 and nu2 have to be positive\. "
-        r"Got sigma2 \[\[\[1\. 1\. 1\. 1\. 1\.\]\n\s+\[1\. 1\. 1\. 1\. 1\.\]\]\] "
-        r"and nu2 \[\[\[-1\. -1\. -1\. -1\. -1\.\]\n\s+\[-1\. -1\. -1\. -1\. -1\.\]\]\]\. "
-        r"Most likely this is due to low quality learners \(especially propensity scores\)\."
-    )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLFramework(sensitivity_dict_benchmark)
-
 
 @pytest.mark.ci
 def test_framework_sensitivity_plot_input():
-    dml_framework_obj_plot = DoubleMLFramework(doubleml_dict)
+    dml_framework_obj_plot = DoubleMLFramework(dml_core=dml_core)
 
     msg = r"Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. "
     with pytest.raises(ValueError, match=msg):
diff --git a/doubleml/tests/test_framework_pval_corrections.py b/doubleml/tests/test_framework_pval_corrections.py
index b69db44fe..77a7a61fa 100644
--- a/doubleml/tests/test_framework_pval_corrections.py
+++ b/doubleml/tests/test_framework_pval_corrections.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from doubleml.double_ml_framework import DoubleMLFramework
+from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework
 
 from ._utils import generate_dml_dict
 
@@ -29,7 +29,8 @@ def dml_framework_tstat_pval_fixture(n_rep, n_thetas):
     psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
     psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
     doubleml_dict = generate_dml_dict(psi_a, psi_b)
-    dml_framework_obj = DoubleMLFramework(doubleml_dict)
+    dml_core = DoubleMLCore(**doubleml_dict)
+    dml_framework_obj = DoubleMLFramework(dml_core=dml_core)
 
     result_dict = {
         "dml_framework_obj": dml_framework_obj,
@@ -83,7 +84,8 @@ def dml_framework_pval_cov_fixture(n_rep, sig_level):
         psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
         psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
         doubleml_dict = generate_dml_dict(psi_a, psi_b)
-        dml_framework_obj = DoubleMLFramework(doubleml_dict)
+        dml_core = DoubleMLCore(**doubleml_dict)
+        dml_framework_obj = DoubleMLFramework(dml_core=dml_core)
 
         p_vals = dml_framework_obj.pvals
         all_p_vals = dml_framework_obj.all_pvals
diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py
index 496f9de92..072945972 100644
--- a/doubleml/tests/test_framework_sensitivity.py
+++ b/doubleml/tests/test_framework_sensitivity.py
@@ -79,15 +79,15 @@ def test_dml_framework_sensitivity_shapes(dml_framework_sensitivity_fixture):
     for obj in object_list:
         assert dml_framework_sensitivity_fixture[obj]._sensitivity_implemented
         for key in var_keys:
-            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == (1, n_thetas, n_rep)
+            assert dml_framework_sensitivity_fixture[obj].sensitivity_elements[key].shape == (1, n_thetas, n_rep)
         for key in score_keys:
-            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == (n_obs, n_thetas, n_rep)
+            assert dml_framework_sensitivity_fixture[obj].sensitivity_elements[key].shape == (n_obs, n_thetas, n_rep)
 
     # separate test for concat
     for key in var_keys:
-        assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"]._sensitivity_elements[key].shape == (1, 2, n_rep)
+        assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"].sensitivity_elements[key].shape == (1, 2, n_rep)
     for key in score_keys:
-        assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"]._sensitivity_elements[key].shape == (
+        assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"].sensitivity_elements[key].shape == (
             n_obs,
             2,
             n_rep,
diff --git a/doubleml/utils/_checks.py b/doubleml/utils/_checks.py
index 0eabf53b0..edc828fba 100644
--- a/doubleml/utils/_checks.py
+++ b/doubleml/utils/_checks.py
@@ -375,10 +375,10 @@ def _check_framework_compatibility(dml_framework_1, dml_framework_2, check_treat
                 f"Got {str(dml_framework_1.n_thetas)} and {str(dml_framework_2.n_thetas)}."
             )
 
-    if dml_framework_1._is_cluster_data != dml_framework_2._is_cluster_data:
+    if dml_framework_1.is_cluster_data != dml_framework_2.is_cluster_data:
         raise ValueError(
             "The cluster structure in DoubleMLFrameworks must be the same. "
-            f"Got {str(dml_framework_1._is_cluster_data)} and {str(dml_framework_2._is_cluster_data)}."
+            f"Got {str(dml_framework_1.is_cluster_data)} and {str(dml_framework_2.is_cluster_data)}."
         )
     return
 

From 78e6baa71588307ffaaa2682ce0e77d64a3377f2 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Sun, 23 Nov 2025 19:44:10 +0100
Subject: [PATCH 5/5] Remove 'thetas' and 'ses' from DoubleMLCore class and
 related updates in tests

---
 doubleml/double_ml.py                  |  2 --
 doubleml/double_ml_framework.py        | 33 ++++----------------------
 doubleml/irm/apos.py                   |  2 --
 doubleml/tests/_utils.py               |  2 --
 doubleml/tests/test_core_exceptions.py |  4 +---
 5 files changed, 6 insertions(+), 37 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 2ed7c812b..69acf3860 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -625,9 +625,7 @@ def construct_framework(self):
         scaled_psi_reshape = np.transpose(scaled_psi, (0, 2, 1))
 
         doubleml_dict = {
-            "thetas": self.coef,
             "all_thetas": self.all_coef,
-            "ses": self.se,
             "all_ses": self.all_se,
             "var_scaling_factors": self._var_scaling_factors,
             "scaled_psi": scaled_psi_reshape,
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 6902a8d6a..99941c073 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -24,8 +24,6 @@
 
 @dataclass
 class DoubleMLCore:
-    thetas: np.ndarray
-    ses: np.ndarray
     all_thetas: np.ndarray
     all_ses: np.ndarray
     var_scaling_factors: np.ndarray
@@ -42,10 +40,6 @@ class DoubleMLCore:
 
     Parameters
     ----------
-    thetas : np.ndarray
-        Estimated target parameters (shape: (n_thetas,)).
-    ses : np.ndarray
-        Estimated standard errors (shape: (n_thetas,)).
     all_thetas : np.ndarray
         Estimated target parameters for each repetition (shape: (n_thetas, n_rep)).
     all_ses : np.ndarray
@@ -80,8 +74,6 @@ def __post_init__(self):
     def _check_arrays(self):
         """Type and shape checks for input arrays."""
         arrays = {
-            "thetas": self.thetas,
-            "ses": self.ses,
             "all_thetas": self.all_thetas,
             "all_ses": self.all_ses,
             "var_scaling_factors": self.var_scaling_factors,
@@ -92,8 +84,6 @@ def _check_arrays(self):
                 raise TypeError(f"{name} must be a numpy.ndarray, got {type(arr)}.")
 
         expected_shapes = {
-            "thetas": (self._n_thetas,),
-            "ses": (self._n_thetas,),
             "all_thetas": (self._n_thetas, self._n_rep),
             "all_ses": (self._n_thetas, self._n_rep),
             "var_scaling_factors": (self._n_thetas,),
@@ -192,6 +182,9 @@ def __init__(
             self._check_treatment_names(treatment_names)
         self._treatment_names = treatment_names
 
+        # aggregate estimates
+        self._thetas, self._ses = _aggregate_coefs_and_ses(self.all_thetas, self.all_ses)
+
         # initialize sensitivity analysis attributes
         self._sensitivity_implemented = self._dml_core.sensitivity_elements is not None
         self._benchmark_available = self._sensitivity_implemented and all(
@@ -237,7 +230,7 @@ def thetas(self):
         """
         Estimated target parameters (shape (``n_thetas``,)).
         """
-        return self._dml_core.thetas
+        return self._thetas
 
     @property
     def all_thetas(self):
@@ -251,7 +244,7 @@ def ses(self):
         """
         Estimated standard errors (shape (``n_thetas``,)).
         """
-        return self._dml_core.ses
+        return self._ses
 
     @property
     def all_ses(self):
@@ -457,11 +450,8 @@ def __add__(self, other):
             # compute standard errors (Uses factor 1/n for scaling!)
             sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
             all_ses = np.sqrt(sigma2_hat)
-            thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses)
 
             doubleml_dict = {
-                "thetas": thetas,
-                "ses": ses,
                 "all_thetas": all_thetas,
                 "all_ses": all_ses,
                 "var_scaling_factors": var_scaling_factors,
@@ -504,11 +494,8 @@ def __sub__(self, other):
             # compute standard errors
             sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
             all_ses = np.sqrt(sigma2_hat)
-            thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses)
 
             doubleml_dict = {
-                "thetas": thetas,
-                "ses": ses,
                 "all_thetas": all_thetas,
                 "all_ses": all_ses,
                 "var_scaling_factors": var_scaling_factors,
@@ -541,17 +528,12 @@ def __rsub__(self, other):
     # TODO: Restrict to linear?
     def __mul__(self, other):
         if isinstance(other, (int, float)):
-            thetas = np.multiply(other, self.thetas)
             all_thetas = np.multiply(other, self.all_thetas)
-
             var_scaling_factors = self.var_scaling_factors
-            ses = np.multiply(other, self.ses)
             all_ses = np.multiply(other, self.all_ses)
             scaled_psi = np.multiply(other, self.scaled_psi)
 
             doubleml_dict = {
-                "thetas": thetas,
-                "ses": ses,
                 "all_thetas": all_thetas,
                 "all_ses": all_ses,
                 "var_scaling_factors": var_scaling_factors,
@@ -1101,17 +1083,12 @@ def concat(objs):
     var_scaling_factors = np.concatenate([obj.var_scaling_factors for obj in objs], axis=0)
     scaled_psi = np.concatenate([obj.scaled_psi for obj in objs], axis=1)
 
-    thetas = np.concatenate([obj.thetas for obj in objs], axis=0)
-    ses = np.concatenate([obj.ses for obj in objs], axis=0)
-
     if any(obj.is_cluster_data for obj in objs):
         raise NotImplementedError("concat not yet implemented with clustering.")
     else:
         is_cluster_data = False
 
     doubleml_dict = {
-        "thetas": thetas,
-        "ses": ses,
         "all_thetas": all_thetas,
         "all_ses": all_ses,
         "var_scaling_factors": var_scaling_factors,
diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index d6713a384..4e6dc944f 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -715,8 +715,6 @@ def causal_contrast(self, reference_levels):
                 # update sensitivity elements with sharper bounds
                 current_sensitivity_dict = self._compute_causal_contrast_sensitivity_dict(model=model, ref_model=ref_model)
                 updated_dml_core = DoubleMLCore(
-                    thetas=diff_framework.thetas,
-                    ses=diff_framework.ses,
                     all_thetas=diff_framework.all_thetas,
                     all_ses=diff_framework.all_ses,
                     var_scaling_factors=diff_framework.var_scaling_factors,
diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py
index 60416246c..47b506ae2 100644
--- a/doubleml/tests/_utils.py
+++ b/doubleml/tests/_utils.py
@@ -106,8 +106,6 @@ def generate_dml_dict(psi_a, psi_b):
     scaled_psi = psi_b / np.mean(psi_a, axis=0)
 
     doubleml_dict = {
-        "thetas": thetas,
-        "ses": ses,
         "all_thetas": all_thetas,
         "all_ses": all_ses,
         "var_scaling_factors": var_scaling_factors,
diff --git a/doubleml/tests/test_core_exceptions.py b/doubleml/tests/test_core_exceptions.py
index 835108dc8..a40c8d609 100644
--- a/doubleml/tests/test_core_exceptions.py
+++ b/doubleml/tests/test_core_exceptions.py
@@ -36,15 +36,13 @@ def test_scaled_psi_shape_and_type():
 def test_arrays():
     kwargs = valid_core_kwargs()
     # Type checks
-    for key in ["thetas", "ses", "all_thetas", "all_ses", "var_scaling_factors"]:
+    for key in ["all_thetas", "all_ses", "var_scaling_factors"]:
         bad_kwargs = kwargs.copy()
         bad_kwargs[key] = "not_an_array"
         with pytest.raises(TypeError, match=f"{key} must be a numpy.ndarray"):
             DoubleMLCore(**bad_kwargs)
     # Shape checks
     shapes = {
-        "thetas": (3,),
-        "ses": (3,),
         "all_thetas": (3, 5),
         "all_ses": (3, 5),
         "var_scaling_factors": (3,),