Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ pip install validmind
- **Install with R support (requires R to be installed)**

```bash
pip install validmind[r-support]
pip install validmind
pip install rpy2
```

## How to contribute
Expand Down Expand Up @@ -102,7 +103,8 @@ brew install r
Once you have R installed, install the `r-support` extra to install the necessary dependencies for R by running:

```bash
poetry install --extras r-support
poetry install
pip install rpy2
```

### Versioning
Expand Down
3 changes: 2 additions & 1 deletion README.pypi.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,6 @@ The ValidMind Library has optional dependencies that can be installed separately
- **R Models**: To use R models with the ValidMind Library, install the `r` extra:

```bash
pip install validmind[r-support]
pip install validmind
pip install rpy2
```
450 changes: 189 additions & 261 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ python = ">=3.8.1,<3.12"
python-dotenv = "*"
ragas = {version = ">=0.2.3", optional = true}
rouge = ">=1"
rpy2 = {version = "^3.5.10", optional = true}
scikit-learn = "*,<1.6.0"
scipy = "*"
scorecardpy = "^0.1.9.6"
Expand Down Expand Up @@ -76,7 +75,6 @@ twine = "^4.0.2"

[tool.poetry.extras]
all = [
"rpy2",
"torch",
"transformers",
"pycocoevalcap",
Expand All @@ -94,7 +92,6 @@ llm = [
"langchain-openai",
]
pytorch = ["torch"]
r-support = ["rpy2"]

[build-system]
build-backend = "poetry.core.masonry.api"
Expand Down
26 changes: 16 additions & 10 deletions validmind/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,32 @@

__all__ = [ # noqa
"__version__",
# Python Library API
"datasets",
"errors",
"get_test_suite",
# main library API
"init",
"reload",
"init_dataset",
"init_model",
"init_r_model",
"preview_template",
"print_env",
"RawData",
"reload",
"run_documentation_tests",
# log metric function (for direct/bulk/retroactive logging of metrics)
"log_metric",
# test suite functions (less common)
"get_test_suite",
"run_test_suite",
# helper functions (for troubleshooting)
"print_env",
# decorators (for building tests
"tags",
"tasks",
"test",
"tests",
"test_suites",
# raw data (for post-processing test results and building tests)
"RawData",
# submodules
"datasets",
"errors",
"vm_models",
"tests",
"unit_metrics",
"log_metric",
"test_suites",
]
2 changes: 1 addition & 1 deletion validmind/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ class MissingRExtrasError(BaseError):
def description(self, *args, **kwargs):
return (
self.message
or "ValidMind r-support needs to be installed: `pip install validmind[r-support]`"
or "`rpy2` is required to use R models. Please install it with `pip install rpy2`"
)


Expand Down
6 changes: 5 additions & 1 deletion validmind/models/r_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pandas as pd

from validmind.errors import MissingRExtrasError
from validmind.logging import get_logger
from validmind.vm_models.model import VMModel

Expand Down Expand Up @@ -125,7 +126,10 @@ def predict(self, new_data, return_probs=False):
"""
Converts the predicted probabilities to classes
"""
from rpy2.robjects import pandas2ri
try:
from rpy2.robjects import pandas2ri
except ImportError:
raise MissingRExtrasError()

# Activate the pandas conversion for rpy2
pandas2ri.activate()
Expand Down
13 changes: 4 additions & 9 deletions validmind/tests/data_validation/FeatureTargetCorrelationPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,10 @@ def FeatureTargetCorrelationPlot(dataset, fig_height=600):
- Not apt for models that employ complex feature interactions, like Decision Trees or Neural Networks, as the test
may not accurately reflect their importance.
"""

# Filter DataFrame based on features and target_column
df = dataset.df[dataset.feature_columns + [dataset.target_column]]

fig = _visualize_feature_target_correlation(df, dataset.target_column, fig_height)

correlations = (
df.corr(numeric_only=True)[dataset.target_column]
.drop(dataset.target_column)
.to_frame()
fig, correlations = _visualize_feature_target_correlation(
df, dataset.target_column, fig_height
)

return fig, RawData(correlation_data=correlations)
Expand Down Expand Up @@ -100,4 +94,5 @@ def _visualize_feature_target_correlation(df, target_column, fig_height):
yaxis_title="",
height=fig_height, # Adjust the height value as needed
)
return fig

return fig, correlations
2 changes: 1 addition & 1 deletion validmind/tests/model_validation/ContextualRecall.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,4 @@ def ContextualRecall(dataset, model):
# Create a DataFrame from all collected statistics
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})

return (result_df, *tuple(figures), RawData(contextual_recall_scores=metrics_df))
return (result_df, *figures, RawData(contextual_recall_scores=metrics_df))
2 changes: 1 addition & 1 deletion validmind/tests/model_validation/MeteorScore.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,4 @@ def MeteorScore(dataset, model):
# Create a DataFrame from all collected statistics
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})

return (result_df, *tuple(figures), RawData(meteor_scores=metrics_df))
return (result_df, *figures, RawData(meteor_scores=metrics_df))
2 changes: 1 addition & 1 deletion validmind/tests/model_validation/ToxicityScore.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def calculate_stats(df):

return (
result_df,
*tuple(figures),
*figures,
RawData(
input_toxicity_df=input_df,
true_toxicity_df=true_df,
Expand Down
10 changes: 3 additions & 7 deletions validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,14 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
lb = LabelBinarizer()
lb.fit(y_true)

y_true_binarized = lb.transform(y_true)
y_score_binarized = lb.transform(dataset.y_pred(model))

roc_auc = roc_auc_score(
y_true=y_true_binarized,
y_score=y_score_binarized,
y_true=lb.transform(y_true),
y_score=lb.transform(dataset.y_pred(model)),
average="macro",
)

else:
y_score_prob = dataset.y_prob(model)
roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
roc_auc = roc_auc_score(y_true=y_true, y_score=dataset.y_prob(model))

return [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,17 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
- A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
overfitting.
"""
y_true = dataset.y
y_pred = dataset.y_pred(model)

return [
{
"Independent Variables": dataset.feature_columns,
"R-Squared": r2_score(dataset.y, dataset.y_pred(model)),
"R-Squared": r2_score(y_true, y_pred),
"Adjusted R-Squared": adj_r2_score(
dataset.y,
dataset.y_pred(model),
len(dataset.y),
len(dataset.feature_columns),
y_true, y_pred, len(y_true), len(dataset.feature_columns)
),
"MSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=True),
"RMSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=False),
"MSE": mean_squared_error(y_true, y_pred, squared=True),
"RMSE": mean_squared_error(y_true, y_pred, squared=False),
}
]
Loading