-
Notifications
You must be signed in to change notification settings - Fork 34
Open
Description
Regarding this code in featurizers.py:
def featurize(self, df: pd.DataFrame) -> pd.DataFrame:
"""Run all of the preset featurizers on the input dataframe.
Arguments:
df: the input dataframe with a `"structure"` column
containing pymatgen `Structure` objects.
Returns:
The featurized DataFrame.
"""
df_composition = pd.DataFrame([])
if self.composition_featurizers or self.oxid_composition_featurizers:
df_composition = self.featurize_composition(df)
df_structure = pd.DataFrame([])
if self.structure_featurizers:
df_structure = self.featurize_structure(df)
df_site = pd.DataFrame([])
if self.site_featurizers:
df_site = self.featurize_site(df)
return df_composition.join(df_structure.join(df_site, lsuffix="l"), rsuffix="r")
When df_composition/ df_structure will be empty an empty dataframe would be produced because of the join.
One can test with singleton featurizers:
from modnet.featurizers.presets import MatminerAll2023Featurizer
from matminer.featurizers.structure import GlobalSymmetryFeatures
featurizer_all = MatminerAll2023Featurizer()
# Get the lists of featurizers
all_featurizers = list(featurizer_all.structure_featurizers)
all_comp = list(featurizer_all.composition_featurizers)
all_oxid = list(featurizer_all.oxid_composition_featurizers)
all_site = list(featurizer_all.site_featurizers)
# We'll collect our new featurizers here:
featurizer_singletons = []
# Generate instances with one structure featurizer (others empty)
for s in all_featurizers:
new_f = MatminerAll2023Featurizer()
new_f.structure_featurizers = [s, GlobalSymmetryFeatures()]
new_f.composition_featurizers = []
new_f.oxid_composition_featurizers = []
new_f.site_featurizers = []
new_f.name = f"structure_{s.__class__.__name__}"
featurizer_singletons.append(new_f)
# Generate instances with one composition featurizer (others empty)
for c in [all_comp[-1]]:
new_f = MatminerAll2023Featurizer()
new_f.structure_featurizers = []
new_f.composition_featurizers = [c]
new_f.oxid_composition_featurizers = []
new_f.site_featurizers = []
new_f.name = f"composition_{c.__class__.__name__}"
featurizer_singletons.append(new_f)
# Generate instances with one oxid composition featurizer (others empty)
for o in all_oxid:
new_f = MatminerAll2023Featurizer()
new_f.structure_featurizers = []
new_f.composition_featurizers = []
new_f.oxid_composition_featurizers = [o]
new_f.site_featurizers = []
new_f.name = f"oxid_composition_{o.__class__.__name__}"
featurizer_singletons.append(new_f)
# Generate instances with one site featurizer (others empty)
for st in all_site:
new_f = MatminerAll2023Featurizer()
new_f.structure_featurizers = []
new_f.composition_featurizers = []
new_f.oxid_composition_featurizers = []
new_f.site_featurizers = [st]
new_f.name = f"site_{st.__class__.__name__}"
featurizer_singletons.append(new_f)
print(f"Created {len(featurizer_singletons)} individual featurizer instances.")
for f in featurizer_singletons:
print(f"Featurizer name: {f.name}")
Additionally, structure_featurizers isn't flexible because it requires GlobalSymmetryFeatures() for the mapping that is done for these features later on.
I think it is in our interest to allow for this kind of modularity.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels