diff --git a/tests/functional/test_dsl.py b/tests/functional/test_dsl.py index 6ee9eff..841da6d 100644 --- a/tests/functional/test_dsl.py +++ b/tests/functional/test_dsl.py @@ -3,33 +3,27 @@ """ import numpy as np +from patsy import dmatrices from dsl.dsl import dsl def test_dsl_linear_regression(sample_data, sample_prediction): """Test DSL with linear regression""" - # Add prediction to data - sample_data["prediction"] = sample_prediction - # Extract labeled indicator labeled_ind = sample_data["labeled"].values + # Create design matrices from formula + y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe") + # Run DSL result = dsl( - model="lm", - formula="y ~ x1 + x2 + x3 + x4 + x5", - predicted_var=["y"], - prediction="prediction", - data=sample_data, + X=X_mat.values, + y=y_mat.values.flatten(), labeled_ind=labeled_ind, sample_prob=sample_data["sample_prob"].values, - sl_method="grf", - feature=["x1", "x2", "x3", "x4", "x5"], - family="gaussian", - cross_fit=2, - sample_split=2, - seed=1234, + model="lm", + method="linear", ) # Check result @@ -63,27 +57,20 @@ def test_dsl_linear_regression(sample_data, sample_prediction): def test_dsl_logistic_regression(sample_data, sample_prediction): """Test DSL with logistic regression""" - # Add prediction to data - sample_data["prediction"] = sample_prediction - # Extract labeled indicator labeled_ind = sample_data["labeled"].values + # Create design matrices from formula + y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe") + # Run DSL result = dsl( - model="logit", - formula="y ~ x1 + x2 + x3 + x4 + x5", - predicted_var=["y"], - prediction="prediction", - data=sample_data, + X=X_mat.values, + y=y_mat.values.flatten(), labeled_ind=labeled_ind, sample_prob=sample_data["sample_prob"].values, - sl_method="grf", - feature=["x1", "x2", "x3", "x4", "x5"], - family="binomial", - cross_fit=2, - sample_split=2, - seed=1234, + model="logit", + method="logistic", ) # Check result @@ -117,27 +104,20 @@ def test_dsl_logistic_regression(sample_data, sample_prediction): def test_dsl_fixed_effects(sample_data, sample_prediction): """Test DSL with fixed effects""" - # Add prediction to data - sample_data["prediction"] = sample_prediction - # Extract labeled indicator labeled_ind = sample_data["labeled"].values - # Run DSL + # Create design matrices from formula (basic formula, fixed effects handling may vary) + y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe") + + # Run DSL with fixed_effects method result = dsl( - model="felm", - formula="y ~ x1 + x2 + x3 + x4 + x5 | fe1 + fe2", - predicted_var=["y"], - prediction="prediction", - data=sample_data, + X=X_mat.values, + y=y_mat.values.flatten(), labeled_ind=labeled_ind, sample_prob=sample_data["sample_prob"].values, - sl_method="grf", - feature=["x1", "x2", "x3", "x4", "x5"], - family="gaussian", - cross_fit=2, - sample_split=2, - seed=1234, + model="felm", + method="fixed_effects", ) # Check result @@ -174,20 +154,17 @@ def test_dsl_without_prediction(sample_data): # Extract labeled indicator labeled_ind = sample_data["labeled"].values + # Create design matrices from formula + y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe") + # Run DSL result = dsl( - model="lm", - formula="y ~ x1 + x2 + x3 + x4 + x5", - predicted_var=["y"], - data=sample_data, + X=X_mat.values, + y=y_mat.values.flatten(), labeled_ind=labeled_ind, sample_prob=sample_data["sample_prob"].values, - sl_method="grf", - feature=["x1", "x2", "x3", "x4", "x5"], - family="gaussian", - cross_fit=2, - sample_split=2, - seed=1234, + model="lm", + method="linear", ) # Check result @@ -221,27 +198,20 @@ def test_dsl_without_prediction(sample_data): def test_dsl_without_labeled(sample_data, sample_prediction): """Test DSL without providing labeled indicator""" - # Add prediction to data - sample_data["prediction"] = sample_prediction - # Remove labeled column sample_data_no_labeled = sample_data.drop(columns=["labeled"]) - # Run DSL + # Create design matrices from formula + y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data_no_labeled, return_type="dataframe") + + # Run DSL with all observations labeled result = dsl( - model="lm", - formula="y ~ x1 + x2 + x3 + x4 + x5", - predicted_var=["y"], - prediction="prediction", - data=sample_data_no_labeled, + X=X_mat.values, + y=y_mat.values.flatten(), labeled_ind=np.ones(len(sample_data_no_labeled)), sample_prob=sample_data["sample_prob"].values, - sl_method="grf", - feature=["x1", "x2", "x3", "x4", "x5"], - family="gaussian", - cross_fit=2, - sample_split=2, - seed=1234, + model="lm", + method="linear", ) # Check result @@ -275,26 +245,20 @@ def test_dsl_without_labeled(sample_data, sample_prediction): def test_dsl_without_sample_prob(sample_data, sample_prediction): """Test DSL without providing sample probabilities""" - # Add prediction to data - sample_data["prediction"] = sample_prediction - # Extract labeled indicator labeled_ind = sample_data["labeled"].values - # Run DSL + # Create design matrices from formula + y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe") + + # Run DSL with uniform sample probabilities result = dsl( - model="lm", - formula="y ~ x1 + x2 + x3 + x4 + x5", - predicted_var=["y"], - prediction="prediction", - data=sample_data, + X=X_mat.values, + y=y_mat.values.flatten(), labeled_ind=labeled_ind, - sl_method="grf", - feature=["x1", "x2", "x3", "x4", "x5"], - family="gaussian", - cross_fit=2, - sample_split=2, - seed=1234, + sample_prob=np.ones(len(sample_data)), + model="lm", + method="linear", ) # Check result diff --git a/tests/functional/test_power_dsl.py b/tests/functional/test_power_dsl.py index 615c63c..9d8f668 100644 --- a/tests/functional/test_power_dsl.py +++ b/tests/functional/test_power_dsl.py @@ -2,32 +2,28 @@ Functional tests for the power_dsl function """ +import numpy as np +from patsy import dmatrices + from dsl.dsl import dsl, power_dsl def test_power_dsl_with_dsl_output(sample_data, sample_prediction): """Test power_dsl with dsl output""" - # Add prediction to data - sample_data["prediction"] = sample_prediction - # Extract labeled indicator labeled_ind = sample_data["labeled"].values + # Create design matrices from formula + y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe") + # Run DSL dsl_result = dsl( - model="lm", - formula="y ~ x1 + x2 + x3 + x4 + x5", - predicted_var=["y"], - prediction="prediction", - data=sample_data, + X=X_mat.values, + y=y_mat.values.flatten(), labeled_ind=labeled_ind, sample_prob=sample_data["sample_prob"].values, - sl_method="grf", - feature=["x1", "x2", "x3", "x4", "x5"], - family="gaussian", - cross_fit=2, - sample_split=2, - seed=1234, + model="lm", + method="linear", ) # Run power_dsl