Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 47 additions & 83 deletions tests/functional/test_dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,27 @@
"""

import numpy as np
from patsy import dmatrices

from dsl.dsl import dsl


def test_dsl_linear_regression(sample_data, sample_prediction):
"""Test DSL with linear regression"""
# Add prediction to data
sample_data["prediction"] = sample_prediction

# Extract labeled indicator
labeled_ind = sample_data["labeled"].values

# Create design matrices from formula
y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe")

# Run DSL
result = dsl(
model="lm",
formula="y ~ x1 + x2 + x3 + x4 + x5",
predicted_var=["y"],
prediction="prediction",
data=sample_data,
X=X_mat.values,
y=y_mat.values.flatten(),
labeled_ind=labeled_ind,
sample_prob=sample_data["sample_prob"].values,
sl_method="grf",
feature=["x1", "x2", "x3", "x4", "x5"],
family="gaussian",
cross_fit=2,
sample_split=2,
seed=1234,
model="lm",
method="linear",
)

# Check result
Expand Down Expand Up @@ -63,27 +57,20 @@ def test_dsl_linear_regression(sample_data, sample_prediction):

def test_dsl_logistic_regression(sample_data, sample_prediction):
"""Test DSL with logistic regression"""
# Add prediction to data
sample_data["prediction"] = sample_prediction

# Extract labeled indicator
labeled_ind = sample_data["labeled"].values

# Create design matrices from formula
y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe")

# Run DSL
result = dsl(
model="logit",
formula="y ~ x1 + x2 + x3 + x4 + x5",
predicted_var=["y"],
prediction="prediction",
data=sample_data,
X=X_mat.values,
y=y_mat.values.flatten(),
labeled_ind=labeled_ind,
sample_prob=sample_data["sample_prob"].values,
sl_method="grf",
feature=["x1", "x2", "x3", "x4", "x5"],
family="binomial",
cross_fit=2,
sample_split=2,
seed=1234,
model="logit",
method="logistic",
)

# Check result
Expand Down Expand Up @@ -117,27 +104,20 @@ def test_dsl_logistic_regression(sample_data, sample_prediction):

def test_dsl_fixed_effects(sample_data, sample_prediction):
"""Test DSL with fixed effects"""
# Add prediction to data
sample_data["prediction"] = sample_prediction

# Extract labeled indicator
labeled_ind = sample_data["labeled"].values

# Run DSL
# Create design matrices from formula (basic formula, fixed effects handling may vary)
y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe")

# Run DSL with fixed_effects method
result = dsl(
model="felm",
formula="y ~ x1 + x2 + x3 + x4 + x5 | fe1 + fe2",
predicted_var=["y"],
prediction="prediction",
data=sample_data,
X=X_mat.values,
y=y_mat.values.flatten(),
labeled_ind=labeled_ind,
sample_prob=sample_data["sample_prob"].values,
sl_method="grf",
feature=["x1", "x2", "x3", "x4", "x5"],
family="gaussian",
cross_fit=2,
sample_split=2,
seed=1234,
model="felm",
method="fixed_effects",
)

# Check result
Expand Down Expand Up @@ -174,20 +154,17 @@ def test_dsl_without_prediction(sample_data):
# Extract labeled indicator
labeled_ind = sample_data["labeled"].values

# Create design matrices from formula
y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe")

# Run DSL
result = dsl(
model="lm",
formula="y ~ x1 + x2 + x3 + x4 + x5",
predicted_var=["y"],
data=sample_data,
X=X_mat.values,
y=y_mat.values.flatten(),
labeled_ind=labeled_ind,
sample_prob=sample_data["sample_prob"].values,
sl_method="grf",
feature=["x1", "x2", "x3", "x4", "x5"],
family="gaussian",
cross_fit=2,
sample_split=2,
seed=1234,
model="lm",
method="linear",
)

# Check result
Expand Down Expand Up @@ -221,27 +198,20 @@ def test_dsl_without_prediction(sample_data):

def test_dsl_without_labeled(sample_data, sample_prediction):
"""Test DSL without providing labeled indicator"""
# Add prediction to data
sample_data["prediction"] = sample_prediction

# Remove labeled column
sample_data_no_labeled = sample_data.drop(columns=["labeled"])

# Run DSL
# Create design matrices from formula
y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data_no_labeled, return_type="dataframe")

# Run DSL with all observations labeled
result = dsl(
model="lm",
formula="y ~ x1 + x2 + x3 + x4 + x5",
predicted_var=["y"],
prediction="prediction",
data=sample_data_no_labeled,
X=X_mat.values,
y=y_mat.values.flatten(),
labeled_ind=np.ones(len(sample_data_no_labeled)),
sample_prob=sample_data["sample_prob"].values,
sl_method="grf",
feature=["x1", "x2", "x3", "x4", "x5"],
family="gaussian",
cross_fit=2,
sample_split=2,
seed=1234,
model="lm",
method="linear",
)

# Check result
Expand Down Expand Up @@ -275,26 +245,20 @@ def test_dsl_without_labeled(sample_data, sample_prediction):

def test_dsl_without_sample_prob(sample_data, sample_prediction):
"""Test DSL without providing sample probabilities"""
# Add prediction to data
sample_data["prediction"] = sample_prediction

# Extract labeled indicator
labeled_ind = sample_data["labeled"].values

# Run DSL
# Create design matrices from formula
y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe")

# Run DSL with uniform sample probabilities
result = dsl(
model="lm",
formula="y ~ x1 + x2 + x3 + x4 + x5",
predicted_var=["y"],
prediction="prediction",
data=sample_data,
X=X_mat.values,
y=y_mat.values.flatten(),
labeled_ind=labeled_ind,
sl_method="grf",
feature=["x1", "x2", "x3", "x4", "x5"],
family="gaussian",
cross_fit=2,
sample_split=2,
seed=1234,
sample_prob=np.ones(len(sample_data)),
model="lm",
method="linear",
)

# Check result
Expand Down
24 changes: 10 additions & 14 deletions tests/functional/test_power_dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,28 @@
Functional tests for the power_dsl function
"""

import numpy as np
from patsy import dmatrices

from dsl.dsl import dsl, power_dsl


def test_power_dsl_with_dsl_output(sample_data, sample_prediction):
"""Test power_dsl with dsl output"""
# Add prediction to data
sample_data["prediction"] = sample_prediction

# Extract labeled indicator
labeled_ind = sample_data["labeled"].values

# Create design matrices from formula
y_mat, X_mat = dmatrices("y ~ x1 + x2 + x3 + x4 + x5", sample_data, return_type="dataframe")

# Run DSL
dsl_result = dsl(
model="lm",
formula="y ~ x1 + x2 + x3 + x4 + x5",
predicted_var=["y"],
prediction="prediction",
data=sample_data,
X=X_mat.values,
y=y_mat.values.flatten(),
labeled_ind=labeled_ind,
sample_prob=sample_data["sample_prob"].values,
sl_method="grf",
feature=["x1", "x2", "x3", "x4", "x5"],
family="gaussian",
cross_fit=2,
sample_split=2,
seed=1234,
model="lm",
method="linear",
)

# Run power_dsl
Expand Down
Loading