Skip to content

Commit 70300a0

Browse files
authored
Merge pull request #354 from DoubleML/s-add-doctest
Add docstring tests
2 parents 92081ad + 13080e1 commit 70300a0

File tree

16 files changed

+86
-51
lines changed

16 files changed

+86
-51
lines changed

doubleml/data/did_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class DoubleMLDIDData(DoubleMLData):
6363
>>> from doubleml.did.datasets import make_did_SZ2020
6464
>>> # initialization from pandas.DataFrame
6565
>>> df = make_did_SZ2020(return_type='DataFrame')
66-
>>> obj_dml_data_from_df = DoubleMLDIDData(df, 'y', 'd', 't')
66+
>>> obj_dml_data_from_df = DoubleMLDIDData(df, 'y', 'd')
6767
>>> # initialization from np.ndarray
6868
>>> (x, y, d, t) = make_did_SZ2020(return_type='array')
6969
>>> obj_dml_data_from_array = DoubleMLDIDData.from_arrays(x, y, d, t=t)

doubleml/data/rdd_data.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,17 @@ class DoubleMLRDDData(DoubleMLData):
6161
6262
Examples
6363
--------
64+
>>> import numpy as np
65+
>>> import pandas as pd
6466
>>> from doubleml import DoubleMLRDDData
65-
>>> from doubleml.rdd.datasets import make_rdd_data
67+
>>> from doubleml.rdd.datasets import make_simple_rdd_data
6668
>>> # initialization from pandas.DataFrame
67-
>>> df = make_rdd_data(return_type='DataFrame')
68-
>>> obj_dml_data_from_df = DoubleMLRDDData(df, 'y', 'd', 's')
69+
>>> data = make_simple_rdd_data(return_type='DataFrame')
70+
>>> columns = ["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])]
71+
>>> df = pd.DataFrame(np.column_stack((data["Y"], data["D"], data["score"], data["X"])), columns=columns)
72+
>>> obj_dml_data_from_df = DoubleMLRDDData(df, 'y', 'd', score_col='s')
6973
>>> # initialization from np.ndarray
70-
>>> (x, y, d, s) = make_rdd_data(return_type='array')
71-
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s)
74+
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(data["X"], data["Y"], data["D"], score=data["score"])
7275
"""
7376

7477
def __init__(
@@ -160,10 +163,13 @@ def from_arrays(
160163
161164
Examples
162165
--------
166+
>>> import numpy as np
167+
>>> import pandas as pd
163168
>>> from doubleml import DoubleMLRDDData
164-
>>> from doubleml.rdd.datasets import make_rdd_data
165-
>>> (x, y, d, s) = make_rdd_data(return_type='array')
166-
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s)
169+
>>> from doubleml.rdd.datasets import make_simple_rdd_data
170+
>>> # initialization from pandas.DataFrame
171+
>>> data = make_simple_rdd_data(return_type='DataFrame')
172+
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(data["X"], data["Y"], data["D"], score=data["score"])
167173
"""
168174
# Prepare score variable
169175
score = check_array(score, ensure_2d=False, allow_nd=False)

doubleml/data/ssm_data.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ class DoubleMLSSMData(DoubleMLData):
6666
>>> from doubleml.irm.datasets import make_ssm_data
6767
>>> # initialization from pandas.DataFrame
6868
>>> df = make_ssm_data(return_type='DataFrame')
69-
>>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', 's')
69+
>>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', s_col='s')
7070
>>> # initialization from np.ndarray
71-
>>> (x, y, d, s) = make_ssm_data(return_type='array')
71+
>>> (x, y, d, _, s) = make_ssm_data(return_type='array')
7272
>>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
7373
"""
7474

@@ -186,7 +186,7 @@ def from_arrays(
186186
--------
187187
>>> from doubleml import DoubleMLSSMData
188188
>>> from doubleml.irm.datasets import make_ssm_data
189-
>>> (x, y, d, s) = make_ssm_data(return_type='array')
189+
>>> (x, y, d, _, s) = make_ssm_data(return_type='array')
190190
>>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
191191
"""
192192
# Prepare selection variable

doubleml/did/did.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
7575
>>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd')
7676
>>> dml_did_obj = dml.DoubleMLDID(obj_dml_data, ml_g, ml_m)
7777
>>> dml_did_obj.fit().summary
78-
coef std err t P>|t| 2.5 % 97.5 %
79-
d -2.685104 1.798071 -1.493325 0.135352 -6.209257 0.83905
78+
coef std err t P>|t| 2.5 % 97.5 %
79+
d -2.840718 1.760386 -1.613691 0.106595 -6.291011 0.609575
80+
8081
"""
8182

8283
def __init__(

doubleml/did/did_cs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
7474
>>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd', t_col='t')
7575
>>> dml_did_obj = dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m)
7676
>>> dml_did_obj.fit().summary
77-
coef std err t P>|t| 2.5 % 97.5 %
78-
d -6.604603 8.725802 -0.756905 0.449107 -23.706862 10.497655
77+
coef std err t P>|t| 2.5 % 97.5 %
78+
d -4.9944 7.561785 -0.660479 0.508947 -19.815226 9.826426
7979
"""
8080

8181
def __init__(

doubleml/did/did_multi.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,22 @@ class DoubleMLDIDMulti:
134134
... gt_combinations="standard",
135135
... control_group="never_treated",
136136
... )
137-
>>> print(dml_did_obj.fit())
137+
>>> print(dml_did_obj.fit().summary)
138+
coef std err ... 2.5 % 97.5 %
139+
ATT(2025-03,2025-01,2025-02) -0.797617 0.459617 ... -1.698450 0.103215
140+
ATT(2025-03,2025-02,2025-03) 0.270311 0.456453 ... -0.624320 1.164941
141+
ATT(2025-03,2025-02,2025-04) 0.628213 0.895275 ... -1.126494 2.382919
142+
ATT(2025-03,2025-02,2025-05) 1.281360 1.327121 ... -1.319750 3.882470
143+
ATT(2025-04,2025-01,2025-02) -0.078095 0.407758 ... -0.877287 0.721097
144+
ATT(2025-04,2025-02,2025-03) 0.223625 0.479288 ... -0.715764 1.163013
145+
ATT(2025-04,2025-03,2025-04) 1.008674 0.455564 ... 0.115785 1.901563
146+
ATT(2025-04,2025-03,2025-05) 2.941047 0.832991 ... 1.308415 4.573679
147+
ATT(2025-05,2025-01,2025-02) -0.102282 0.454129 ... -0.992359 0.787795
148+
ATT(2025-05,2025-02,2025-03) 0.108742 0.547794 ... -0.964914 1.182399
149+
ATT(2025-05,2025-03,2025-04) 0.253610 0.422984 ... -0.575423 1.082643
150+
ATT(2025-05,2025-04,2025-05) 1.264255 0.487934 ... 0.307923 2.220587
151+
<BLANKLINE>
152+
[12 rows x 6 columns]
138153
"""
139154

140155
def __init__(

doubleml/double_ml.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,14 +1207,12 @@ def evaluate_learners(self, learners=None, metric=_rmse):
12071207
>>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
12081208
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
12091209
>>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
1210-
>>> dml_irm_obj.fit()
1210+
>>> _ = dml_irm_obj.fit()
12111211
>>> def mae(y_true, y_pred):
1212-
>>> subset = np.logical_not(np.isnan(y_true))
1213-
>>> return mean_absolute_error(y_true[subset], y_pred[subset])
1212+
... subset = np.logical_not(np.isnan(y_true))
1213+
... return mean_absolute_error(y_true[subset], y_pred[subset])
12141214
>>> dml_irm_obj.evaluate_learners(metric=mae)
1215-
{'ml_g0': array([[0.85974356]]),
1216-
'ml_g1': array([[0.85280376]]),
1217-
'ml_m': array([[0.35365143]])}
1215+
{'ml_g0': array([[0.88173585]]), 'ml_g1': array([[0.83854057]]), 'ml_m': array([[0.35871235]])}
12181216
"""
12191217
# if no learners are provided try to evaluate all learners
12201218
if learners is None:

doubleml/double_ml_sampling_mixins.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,19 +91,18 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
9191
>>> ml_m = learner
9292
>>> obj_dml_data = make_plr_CCDDHNR2018(n_obs=10, alpha=0.5)
9393
>>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m)
94-
>>> # simple sample splitting with two folds and without cross-fitting
95-
>>> smpls = ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
96-
>>> dml_plr_obj.set_sample_splitting(smpls)
9794
>>> # sample splitting with two folds and cross-fitting
9895
>>> smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
99-
>>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])]
100-
>>> dml_plr_obj.set_sample_splitting(smpls)
96+
... ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])]
97+
>>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS
98+
<doubleml.plm.plr.DoubleMLPLR object at 0x...>
10199
>>> # sample splitting with two folds and repeated cross-fitting with n_rep = 2
102100
>>> smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
103-
>>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
104-
>>> [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
105-
>>> ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
106-
>>> dml_plr_obj.set_sample_splitting(smpls)
101+
... ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
102+
... [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
103+
... ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
104+
>>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS
105+
<doubleml.plm.plr.DoubleMLPLR object at 0x...>
107106
"""
108107
self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
109108
all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting

doubleml/irm/cvar.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
9191
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
9292
>>> dml_cvar_obj = dml.DoubleMLCVAR(obj_dml_data, ml_g, ml_m, treatment=1, quantile=0.5)
9393
>>> dml_cvar_obj.fit().summary
94-
coef std err t P>|t| 2.5 % 97.5 %
95-
d 1.591441 0.095781 16.615498 5.382582e-62 1.403715 1.779167
94+
coef std err t P>|t| 2.5 % 97.5 %
95+
d 1.588364 0.096616 16.43989 9.909942e-61 1.398999 1.777728
96+
9697
"""
9798

9899
def __init__(

doubleml/irm/iivm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
9191
>>> dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r)
9292
>>> dml_iivm_obj.fit().summary
9393
coef std err t P>|t| 2.5 % 97.5 %
94-
d 0.378351 0.190648 1.984551 0.047194 0.004688 0.752015
94+
d 0.362398 0.191578 1.891649 0.058538 -0.013088 0.737884
9595
9696
Notes
9797
-----

0 commit comments

Comments
 (0)