validmind · johnwalz97 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/README.md b/README.md
@@ -59,7 +59,8 @@ pip install validmind
 - **Install with R support (requires R to be installed)**
 
     ```bash
-    pip install validmind[r-support]
+    pip install validmind
+    pip install rpy2
     ```
 
 ## How to contribute
@@ -102,7 +103,8 @@ brew install r
 Once you have R installed, install the `r-support` extra to install the necessary dependencies for R by running:
 
 ```bash
-poetry install --extras r-support
+poetry install
+pip install rpy2
 ```
 
 ### Versioning

diff --git a/README.pypi.md b/README.pypi.md
@@ -68,5 +68,6 @@ The ValidMind Library has optional dependencies that can be installed separately
 - **R Models**: To use R models with the ValidMind Library, install the `r` extra:
 
     ```bash
-    pip install validmind[r-support]
+    pip install validmind
+    pip install rpy2
     ```
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -40,7 +40,6 @@ python = ">=3.8.1,<3.12"
 python-dotenv = "*"
 ragas = {version = ">=0.2.3", optional = true}
 rouge = ">=1"
-rpy2 = {version = "^3.5.10", optional = true}
 scikit-learn = "*,<1.6.0"
 scipy = "*"
 scorecardpy = "^0.1.9.6"
@@ -76,7 +75,6 @@ twine = "^4.0.2"
 
 [tool.poetry.extras]
 all = [
-  "rpy2",
   "torch",
   "transformers",
   "pycocoevalcap",
@@ -94,7 +92,6 @@ llm = [
   "langchain-openai",
 ]
 pytorch = ["torch"]
-r-support = ["rpy2"]
 
 [build-system]
 build-backend = "poetry.core.masonry.api"

diff --git a/validmind/__init__.py b/validmind/__init__.py
@@ -55,26 +55,32 @@
 
 __all__ = [  # noqa
     "__version__",
-    # Python Library API
-    "datasets",
-    "errors",
-    "get_test_suite",
+    # main library API
     "init",
+    "reload",
     "init_dataset",
     "init_model",
     "init_r_model",
     "preview_template",
-    "print_env",
-    "RawData",
-    "reload",
     "run_documentation_tests",
+    # log metric function (for direct/bulk/retroactive logging of metrics)
+    "log_metric",
+    # test suite functions (less common)
+    "get_test_suite",
     "run_test_suite",
+    # helper functions (for troubleshooting)
+    "print_env",
+    # decorators (for building tests
     "tags",
     "tasks",
     "test",
-    "tests",
-    "test_suites",
+    # raw data (for post-processing test results and building tests)
+    "RawData",
+    # submodules
+    "datasets",
+    "errors",
     "vm_models",
+    "tests",
     "unit_metrics",
-    "log_metric",
+    "test_suites",
 ]
diff --git a/validmind/errors.py b/validmind/errors.py
@@ -228,7 +228,7 @@ class MissingRExtrasError(BaseError):
     def description(self, *args, **kwargs):
         return (
             self.message
-            or "ValidMind r-support needs to be installed: `pip install validmind[r-support]`"
+            or "`rpy2` is required to use R models. Please install it with `pip install rpy2`"
         )
 
 

diff --git a/validmind/models/r_model.py b/validmind/models/r_model.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 
+from validmind.errors import MissingRExtrasError
 from validmind.logging import get_logger
 from validmind.vm_models.model import VMModel
 
@@ -125,7 +126,10 @@ def predict(self, new_data, return_probs=False):
         """
         Converts the predicted probabilities to classes
         """
-        from rpy2.robjects import pandas2ri
+        try:
+            from rpy2.robjects import pandas2ri
+        except ImportError:
+            raise MissingRExtrasError()
 
         # Activate the pandas conversion for rpy2
         pandas2ri.activate()

diff --git a/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py b/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py
@@ -52,16 +52,10 @@ def FeatureTargetCorrelationPlot(dataset, fig_height=600):
     - Not apt for models that employ complex feature interactions, like Decision Trees or Neural Networks, as the test
     may not accurately reflect their importance.
     """
-
-    # Filter DataFrame based on features and target_column
     df = dataset.df[dataset.feature_columns + [dataset.target_column]]
 
-    fig = _visualize_feature_target_correlation(df, dataset.target_column, fig_height)
-
-    correlations = (
-        df.corr(numeric_only=True)[dataset.target_column]
-        .drop(dataset.target_column)
-        .to_frame()
+    fig, correlations = _visualize_feature_target_correlation(
+        df, dataset.target_column, fig_height
     )
 
     return fig, RawData(correlation_data=correlations)
@@ -100,4 +94,5 @@ def _visualize_feature_target_correlation(df, target_column, fig_height):
         yaxis_title="",
         height=fig_height,  # Adjust the height value as needed
     )
-    return fig
+
+    return fig, correlations
diff --git a/validmind/tests/model_validation/ContextualRecall.py b/validmind/tests/model_validation/ContextualRecall.py
@@ -118,4 +118,4 @@ def ContextualRecall(dataset, model):
     # Create a DataFrame from all collected statistics
     result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
 
-    return (result_df, *tuple(figures), RawData(contextual_recall_scores=metrics_df))
+    return (result_df, *figures, RawData(contextual_recall_scores=metrics_df))
diff --git a/validmind/tests/model_validation/MeteorScore.py b/validmind/tests/model_validation/MeteorScore.py
@@ -117,4 +117,4 @@ def MeteorScore(dataset, model):
     # Create a DataFrame from all collected statistics
     result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
 
-    return (result_df, *tuple(figures), RawData(meteor_scores=metrics_df))
+    return (result_df, *figures, RawData(meteor_scores=metrics_df))
diff --git a/validmind/tests/model_validation/ToxicityScore.py b/validmind/tests/model_validation/ToxicityScore.py
@@ -141,7 +141,7 @@ def calculate_stats(df):
 
     return (
         result_df,
-        *tuple(figures),
+        *figures,
         RawData(
             input_toxicity_df=input_df,
             true_toxicity_df=true_df,

diff --git a/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py b/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py
@@ -62,18 +62,14 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
         lb = LabelBinarizer()
         lb.fit(y_true)
 
-        y_true_binarized = lb.transform(y_true)
-        y_score_binarized = lb.transform(dataset.y_pred(model))
-
         roc_auc = roc_auc_score(
-            y_true=y_true_binarized,
-            y_score=y_score_binarized,
+            y_true=lb.transform(y_true),
+            y_score=lb.transform(dataset.y_pred(model)),
             average="macro",
         )
 
     else:
-        y_score_prob = dataset.y_prob(model)
-        roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
+        roc_auc = roc_auc_score(y_true=y_true, y_score=dataset.y_prob(model))
 
     return [
         {

diff --git a/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py b/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py
@@ -45,17 +45,17 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
     - A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
     overfitting.
     """
+    y_true = dataset.y
+    y_pred = dataset.y_pred(model)
+
     return [
         {
             "Independent Variables": dataset.feature_columns,
-            "R-Squared": r2_score(dataset.y, dataset.y_pred(model)),
+            "R-Squared": r2_score(y_true, y_pred),
             "Adjusted R-Squared": adj_r2_score(
-                dataset.y,
-                dataset.y_pred(model),
-                len(dataset.y),
-                len(dataset.feature_columns),
+                y_true, y_pred, len(y_true), len(dataset.feature_columns)
             ),
-            "MSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=True),
-            "RMSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=False),
+            "MSE": mean_squared_error(y_true, y_pred, squared=True),
+            "RMSE": mean_squared_error(y_true, y_pred, squared=False),
         }
     ]