LinusBDittmer · LinusBDittmer · Apr 19, 2025 · Apr 19, 2025
diff --git a/molbench/external_parser.py b/molbench/external_parser.py
@@ -78,6 +78,10 @@ def _load_file(self, outfile: str, out_parser: callable,
             The suffix of the assignment file (default: '.ass').
             filename.out -> filename.ass
         """
+        # XXX: Why the split?
+        # Above in the error message you say that the callable may
+        # take an optional name parameter. But you did not mention
+        # any suffix separated by '_'
         name: str = Path(outfile).stem.split("_")[0]
         if requires_name:
             data: dict = out_parser(outfile, name)  # read in as dict
@@ -91,7 +95,8 @@ def _load_file(self, outfile: str, out_parser: callable,
             mol.add_assignments(assignments)
         return mol
 
-    def _fetch_all_outfiles(self, path: str, suffix: str = '.out') -> list:
+    def _fetch_all_outfiles(self, path: str,
+                            suffix: str = '.out') -> list[str]:
         outfiles = []
         for root, _, files in os.walk(os.path.abspath(path), topdown=True,
                                       followlinks=True):
@@ -109,4 +114,4 @@ def _assignmentfile_from_outfile(self, outfile: str,
         assignment file could be found.
         """
         assignmentf = Path(outfile).with_suffix(assignment_suffix)
-        return assignmentf if assignmentf.is_file() else None
+        return str(assignmentf) if assignmentf.is_file() else None
diff --git a/molbench/input_constructor.py b/molbench/input_constructor.py
@@ -132,7 +132,7 @@ def create_inputs(self, benchmark: MoleculeList[Molecule], basepath: str,
                       calc_details: dict,
                       file_expansion_keys: tuple = ("basis",),
                       flat_structure: bool = False,
-                      name_template: str = None) -> list:
+                      name_template: str | None = None) -> list:
         """
         Create inputs files for the provided set of Molecules by filling
         in the placeholders in the input template with data from the
@@ -523,7 +523,7 @@ def create_inputs(self, benchmark: MoleculeList[Molecule], basepath: str,
                       calc_details: dict,
                       file_expansion_keys: tuple = ("basis",),
                       flat_structure: bool = False,
-                      name_template: str = None,
+                      name_template: str | None = None,
                       reference_path: str = "references.json",
                       compressed_property: str | None = None) -> list:
         # Create compressed benchmark
@@ -532,8 +532,8 @@ def create_inputs(self, benchmark: MoleculeList[Molecule], basepath: str,
         # Additionally, we create a dict of references to the individual
         # molecules
 
-        compressed: MoleculeList = []
-        references: dict = defaultdict()
+        compressed = MoleculeList()
+        references = {}
 
         def _unique(xyz: list, charge: int, mult: int) -> int:
             all_xyzs = [(m.system_data["xyz"],
@@ -595,10 +595,17 @@ def _unique(xyz: list, charge: int, mult: int) -> int:
                 else:
                     pkey = [k for k, v in mol.state_data.items()
                             if v["type"] == compressed_property][0]
+
+                factor_key = None
                 if "stochiometry" in mol.state_data[pkey]:
                     factor_key = "stochiometry"
                 elif "factors" in mol.state_data[pkey]:
                     factor_key = "factors"
+
+                if factor_key is None:
+                    log.critical(f"Could not find a factor in state {pkey} of "
+                                 f"molecule {mol.name}.",
+                                 "CompressedTemplateConstructor")
                 stoch: list = mol.state_data[pkey][factor_key]
                 references[mol.name]["factors"] = stoch
 

diff --git a/molbench/molecule.py b/molbench/molecule.py
@@ -3,8 +3,8 @@
 
 
 class Molecule:
-    def __init__(self, name, data_id, system_data: dict = None,
-                 state_data: dict = None) -> None:
+    def __init__(self, name, data_id, system_data: dict | None = None,
+                 state_data: dict | None = None) -> None:
         self.name = name
         self.data_id = data_id
         # dict that contains all the information regarding the system:
@@ -30,23 +30,24 @@ def from_benchmark(cls, benchmark_entry: dict,
         # e. g. through relative energies
         # If so, it contains the usual entries suffixed by "_list"
         # i. e. "xyz_list", "multiplicity_list", "n_atoms_list" etc.
-        if "xyz_list" in system_data:
-            if not isinstance(system_data["xyz_list"][0], str):
-                system_data["xyz_list"] = ["\n".join(s)
-                                           for s in system_data["xyz_list"]]
+        if "xyz_list" in system_data and \
+                isinstance(system_data["xyz_list"], (list, tuple)):
+            system_data["xyz_list"] = [
+                "\n".join(xyz) if not isinstance(xyz, str) else xyz
+                for xyz in system_data["xyz_list"]
+            ]
         # ensure that xyz coordinates are a string
         if "xyz" in system_data and not isinstance(system_data["xyz"], str):
             system_data["xyz"] = "\n".join(system_data["xyz"])
         # get a name for the molecule
+        name = molname
         if "name" in system_data:
             name = system_data["name"]
             del system_data["name"]
-        elif molname is None:
+        if name is None:
             log.critical("Name not specified in benchmark entry and not "
                          "provided as argument to the method.",
                          "Molecule: from_benchmark")
-        else:
-            name = molname
 
         properties = benchmark_entry.get("properties", None)
         return cls(name, benchmark_id, system_data, properties)
@@ -154,6 +155,8 @@ def remove(self, key, *values) -> 'MoleculeList':
         return self._filter(key, lambda v: v not in values)
 
     def apply_stochiometry(self, stochiometry: dict) -> 'MoleculeList':
+        # TODO: some explanation: e.g., what is the expected form of the
+        # stochiometry dict?
         combined_list = MoleculeList()
 
         def find_mol(name):
@@ -351,6 +354,7 @@ def _join_state_data(self, dst_state, src_state, factor):
             else:
                 pdict = dst_state[keydict[val["type"]]]
                 if isinstance(property_value, (list, tuple)):
+                    assert isinstance(pdict["value"], (tuple, list))
                     pdict["value"] = [p0 + p1 for p0, p1 in
                                       zip(property_value, pdict["value"])]
                 else: