From 9554cc977d423214d8ef42ec37498e77226aafb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Gouv=C3=AAa?= Date: Wed, 12 Mar 2025 21:15:49 +0100 Subject: [PATCH 1/3] Update modnet2024 in init --- modnet/featurizers/presets/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modnet/featurizers/presets/__init__.py b/modnet/featurizers/presets/__init__.py index 1191aec0..5cd0709a 100644 --- a/modnet/featurizers/presets/__init__.py +++ b/modnet/featurizers/presets/__init__.py @@ -11,6 +11,7 @@ MatminerAll2023Featurizer, CompositionOnlyMatminerAll2023Featurizer, ) +from .matminer_2024_fast import Matminer2024FastFeaturizer from modnet.featurizers import MODFeaturizer DEFAULT_FEATURIZER: str = "Matminer2023" @@ -23,4 +24,5 @@ "MatminerAll2023": MatminerAll2023Featurizer, "CompositionOnlyMatminer2023": CompositionOnlyMatminer2023Featurizer, "CompositionOnlyMatminerAll2023": CompositionOnlyMatminerAll2023Featurizer, + "Matminer2024Fast":Matminer2024FastFeaturizer, } From 8f3e124510c84ca160eb22bb630ef673fd8517e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Gouv=C3=AAa?= Date: Wed, 12 Mar 2025 21:21:39 +0100 Subject: [PATCH 2/3] Update featurizers.py, concatenation with empty dataframes if we removed for example, the composition featurizers, the way it was joining resulted in an empty dataframe even if a dataframe from the structure featurizers had been generated. --- modnet/featurizers/featurizers.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/modnet/featurizers/featurizers.py b/modnet/featurizers/featurizers.py index ee103fa8..2202310e 100644 --- a/modnet/featurizers/featurizers.py +++ b/modnet/featurizers/featurizers.py @@ -98,7 +98,26 @@ def featurize(self, df: pd.DataFrame) -> pd.DataFrame: if self.site_featurizers: df_site = self.featurize_site(df) - return df_composition.join(df_structure.join(df_site, lsuffix="l"), rsuffix="r") + def safe_join(left: pd.DataFrame, right: pd.DataFrame, lsuffix: str = "", rsuffix: str = "") -> pd.DataFrame: + """ + Join two dataframes with suffixes for overlapping columns. + If either side is empty, return the non-empty dataframe. + If both are empty, return an empty DataFrame. + """ + if left.empty and right.empty: + return pd.DataFrame([]) + elif left.empty: + return right + elif right.empty: + return left + else: + return left.join(right, lsuffix=lsuffix, rsuffix=rsuffix) + + # Here we use lsuffix="l" to resolve any overlapping column names. + tmp = safe_join(df_structure, df_site, lsuffix="l") + + final_df = safe_join(df_composition, tmp, rsuffix="r") + return final_df def _fit_apply_featurizers( self, From 7818a86a0af8e6649819aa7e6966494ae2a32c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Gouv=C3=AAa?= Date: Thu, 13 Mar 2025 11:29:43 +0100 Subject: [PATCH 3/3] Update featurizers.py Just pulled df['composition'] creation outside the condition of composition_featurizers, so that oxid_composition descriptors can be calculated independently in a custom featurizer. --- modnet/featurizers/featurizers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modnet/featurizers/featurizers.py b/modnet/featurizers/featurizers.py index 2202310e..820765aa 100644 --- a/modnet/featurizers/featurizers.py +++ b/modnet/featurizers/featurizers.py @@ -219,11 +219,11 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame: """ df = df.copy() - + df["composition"] = df["structure"].apply(lambda s: s.composition) if self.composition_featurizers: LOG.info("Applying composition featurizers...") - df["composition"] = df["structure"].apply(lambda s: s.composition) + df = self._fit_apply_featurizers( df, self.composition_featurizers,