From 0ce31acf5d0e59483c1987fe7585aa6b064a9874 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andres@validmind.ai>
Date: Fri, 25 Apr 2025 12:56:14 -0700
Subject: [PATCH 1/3] Improve performance of feature_columns_numeric and
 feature_columns_categorical

---
 validmind/vm_models/dataset/dataset.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/validmind/vm_models/dataset/dataset.py b/validmind/vm_models/dataset/dataset.py
index f0a9571d1..3b04bcf16 100644
--- a/validmind/vm_models/dataset/dataset.py
+++ b/validmind/vm_models/dataset/dataset.py
@@ -133,16 +133,19 @@ def _set_feature_columns(self, feature_columns=None):
             excluded = [self.target_column, *self.extra_columns.flatten()]
             self.feature_columns = [col for col in self.columns if col not in excluded]
 
-        self.feature_columns_numeric = (
-            self._df[self.feature_columns]
-            .select_dtypes(include=[np.number])
-            .columns.tolist()
-        )
-        self.feature_columns_categorical = (
-            self._df[self.feature_columns]
-            .select_dtypes(include=[object, pd.Categorical])
-            .columns.tolist()
-        )
+        # Get dtypes without loading data into memory
+        feature_dtypes = self._df[self.feature_columns].dtypes
+
+        self.feature_columns_numeric = feature_dtypes[
+            feature_dtypes.apply(lambda x: pd.api.types.is_numeric_dtype(x))
+        ].index.tolist()
+
+        self.feature_columns_categorical = feature_dtypes[
+            feature_dtypes.apply(
+                lambda x: pd.api.types.is_categorical_dtype(x)
+                or pd.api.types.is_object_dtype(x)
+            )
+        ].index.tolist()
 
     def _add_column(self, column_name, column_values):
         column_values = np.array(column_values)
@@ -560,6 +563,7 @@ def __init__(
 
         index = None
         if isinstance(raw_dataset.index, pd.Index):
+            print("Index is a pandas Index")
             index = raw_dataset.index.values
         self.index = index
 
@@ -585,6 +589,7 @@ def __init__(
                 "and you won't modify the source data."
             )
 
+        print("Setting feature columns...")
         self._set_feature_columns(feature_columns)
 
         if model:

From e16d837c7b6840c00ff43556a619406c146fdb22 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andres@validmind.ai>
Date: Fri, 25 Apr 2025 12:59:55 -0700
Subject: [PATCH 2/3] Remove debugging lines

---
 validmind/vm_models/dataset/dataset.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/validmind/vm_models/dataset/dataset.py b/validmind/vm_models/dataset/dataset.py
index 3b04bcf16..d40c1d692 100644
--- a/validmind/vm_models/dataset/dataset.py
+++ b/validmind/vm_models/dataset/dataset.py
@@ -563,7 +563,6 @@ def __init__(
 
         index = None
         if isinstance(raw_dataset.index, pd.Index):
-            print("Index is a pandas Index")
             index = raw_dataset.index.values
         self.index = index
 
@@ -589,7 +588,6 @@ def __init__(
                 "and you won't modify the source data."
             )
 
-        print("Setting feature columns...")
         self._set_feature_columns(feature_columns)
 
         if model:

From 07623754e81fc7b798a66be65d98f4f16d1fe722 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andres@validmind.ai>
Date: Fri, 25 Apr 2025 14:41:09 -0700
Subject: [PATCH 3/3] 2.8.22

---
 pyproject.toml           | 2 +-
 validmind/__version__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e3375290f..32728854d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ description = "ValidMind Library"
 license = "Commercial License"
 name = "validmind"
 readme = "README.pypi.md"
-version = "2.8.21"
+version = "2.8.22"
 
 [tool.poetry.dependencies]
 aiohttp = {extras = ["speedups"], version = "*"}
diff --git a/validmind/__version__.py b/validmind/__version__.py
index cfdf41d3a..7217d14b6 100644
--- a/validmind/__version__.py
+++ b/validmind/__version__.py
@@ -1 +1 @@
-__version__ = "2.8.21"
+__version__ = "2.8.22"