diff --git a/pyproject.toml b/pyproject.toml index e3375290f..32728854d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ description = "ValidMind Library" license = "Commercial License" name = "validmind" readme = "README.pypi.md" -version = "2.8.21" +version = "2.8.22" [tool.poetry.dependencies] aiohttp = {extras = ["speedups"], version = "*"} diff --git a/validmind/__version__.py b/validmind/__version__.py index cfdf41d3a..7217d14b6 100644 --- a/validmind/__version__.py +++ b/validmind/__version__.py @@ -1 +1 @@ -__version__ = "2.8.21" +__version__ = "2.8.22" diff --git a/validmind/vm_models/dataset/dataset.py b/validmind/vm_models/dataset/dataset.py index f0a9571d1..d40c1d692 100644 --- a/validmind/vm_models/dataset/dataset.py +++ b/validmind/vm_models/dataset/dataset.py @@ -133,16 +133,19 @@ def _set_feature_columns(self, feature_columns=None): excluded = [self.target_column, *self.extra_columns.flatten()] self.feature_columns = [col for col in self.columns if col not in excluded] - self.feature_columns_numeric = ( - self._df[self.feature_columns] - .select_dtypes(include=[np.number]) - .columns.tolist() - ) - self.feature_columns_categorical = ( - self._df[self.feature_columns] - .select_dtypes(include=[object, pd.Categorical]) - .columns.tolist() - ) + # Get dtypes without loading data into memory + feature_dtypes = self._df[self.feature_columns].dtypes + + self.feature_columns_numeric = feature_dtypes[ + feature_dtypes.apply(lambda x: pd.api.types.is_numeric_dtype(x)) + ].index.tolist() + + self.feature_columns_categorical = feature_dtypes[ + feature_dtypes.apply( + lambda x: pd.api.types.is_categorical_dtype(x) + or pd.api.types.is_object_dtype(x) + ) + ].index.tolist() def _add_column(self, column_name, column_values): column_values = np.array(column_values)