Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions modnet/featurizers/featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,26 @@ def featurize(self, df: pd.DataFrame) -> pd.DataFrame:
if self.site_featurizers:
df_site = self.featurize_site(df)

return df_composition.join(df_structure.join(df_site, lsuffix="l"), rsuffix="r")
def safe_join(left: pd.DataFrame, right: pd.DataFrame, lsuffix: str = "", rsuffix: str = "") -> pd.DataFrame:
"""
Join two dataframes with suffixes for overlapping columns.
If either side is empty, return the non-empty dataframe.
If both are empty, return an empty DataFrame.
"""
if left.empty and right.empty:
return pd.DataFrame([])
elif left.empty:
return right
elif right.empty:
return left
else:
return left.join(right, lsuffix=lsuffix, rsuffix=rsuffix)

# Here we use lsuffix="l" to resolve any overlapping column names.
tmp = safe_join(df_structure, df_site, lsuffix="l")

final_df = safe_join(df_composition, tmp, rsuffix="r")
return final_df

def _fit_apply_featurizers(
self,
Expand Down Expand Up @@ -200,11 +219,11 @@ def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
"""

df = df.copy()

df["composition"] = df["structure"].apply(lambda s: s.composition)
if self.composition_featurizers:

LOG.info("Applying composition featurizers...")
df["composition"] = df["structure"].apply(lambda s: s.composition)

df = self._fit_apply_featurizers(
df,
self.composition_featurizers,
Expand Down
2 changes: 2 additions & 0 deletions modnet/featurizers/presets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
MatminerAll2023Featurizer,
CompositionOnlyMatminerAll2023Featurizer,
)
from .matminer_2024_fast import Matminer2024FastFeaturizer
from modnet.featurizers import MODFeaturizer

DEFAULT_FEATURIZER: str = "Matminer2023"
Expand All @@ -23,4 +24,5 @@
"MatminerAll2023": MatminerAll2023Featurizer,
"CompositionOnlyMatminer2023": CompositionOnlyMatminer2023Featurizer,
"CompositionOnlyMatminerAll2023": CompositionOnlyMatminerAll2023Featurizer,
"Matminer2024Fast":Matminer2024FastFeaturizer,
}