diff --git a/molbench/external_parser.py b/molbench/external_parser.py index 524ab4d..c06c314 100644 --- a/molbench/external_parser.py +++ b/molbench/external_parser.py @@ -78,6 +78,10 @@ def _load_file(self, outfile: str, out_parser: callable, The suffix of the assignment file (default: '.ass'). filename.out -> filename.ass """ + # XXX: Why the split? + # Above in the error message you say that the callable may + # take an optional name parameter. But you did not mention + # any suffix separated by '_' name: str = Path(outfile).stem.split("_")[0] if requires_name: data: dict = out_parser(outfile, name) # read in as dict @@ -91,7 +95,8 @@ def _load_file(self, outfile: str, out_parser: callable, mol.add_assignments(assignments) return mol - def _fetch_all_outfiles(self, path: str, suffix: str = '.out') -> list: + def _fetch_all_outfiles(self, path: str, + suffix: str = '.out') -> list[str]: outfiles = [] for root, _, files in os.walk(os.path.abspath(path), topdown=True, followlinks=True): @@ -109,4 +114,4 @@ def _assignmentfile_from_outfile(self, outfile: str, assignment file could be found. """ assignmentf = Path(outfile).with_suffix(assignment_suffix) - return assignmentf if assignmentf.is_file() else None + return str(assignmentf) if assignmentf.is_file() else None diff --git a/molbench/input_constructor.py b/molbench/input_constructor.py index fc79edd..3d67c20 100644 --- a/molbench/input_constructor.py +++ b/molbench/input_constructor.py @@ -132,7 +132,7 @@ def create_inputs(self, benchmark: MoleculeList[Molecule], basepath: str, calc_details: dict, file_expansion_keys: tuple = ("basis",), flat_structure: bool = False, - name_template: str = None) -> list: + name_template: str | None = None) -> list: """ Create inputs files for the provided set of Molecules by filling in the placeholders in the input template with data from the @@ -523,7 +523,7 @@ def create_inputs(self, benchmark: MoleculeList[Molecule], basepath: str, calc_details: dict, file_expansion_keys: tuple = ("basis",), flat_structure: bool = False, - name_template: str = None, + name_template: str | None = None, reference_path: str = "references.json", compressed_property: str | None = None) -> list: # Create compressed benchmark @@ -532,8 +532,8 @@ def create_inputs(self, benchmark: MoleculeList[Molecule], basepath: str, # Additionally, we create a dict of references to the individual # molecules - compressed: MoleculeList = [] - references: dict = defaultdict() + compressed = MoleculeList() + references = {} def _unique(xyz: list, charge: int, mult: int) -> int: all_xyzs = [(m.system_data["xyz"], @@ -595,10 +595,17 @@ def _unique(xyz: list, charge: int, mult: int) -> int: else: pkey = [k for k, v in mol.state_data.items() if v["type"] == compressed_property][0] + + factor_key = None if "stochiometry" in mol.state_data[pkey]: factor_key = "stochiometry" elif "factors" in mol.state_data[pkey]: factor_key = "factors" + + if factor_key is None: + log.critical(f"Could not find a factor in state {pkey} of " + f"molecule {mol.name}.", + "CompressedTemplateConstructor") stoch: list = mol.state_data[pkey][factor_key] references[mol.name]["factors"] = stoch diff --git a/molbench/molecule.py b/molbench/molecule.py index 8919ea6..fd3fa59 100644 --- a/molbench/molecule.py +++ b/molbench/molecule.py @@ -3,8 +3,8 @@ class Molecule: - def __init__(self, name, data_id, system_data: dict = None, - state_data: dict = None) -> None: + def __init__(self, name, data_id, system_data: dict | None = None, + state_data: dict | None = None) -> None: self.name = name self.data_id = data_id # dict that contains all the information regarding the system: @@ -30,23 +30,24 @@ def from_benchmark(cls, benchmark_entry: dict, # e. g. through relative energies # If so, it contains the usual entries suffixed by "_list" # i. e. "xyz_list", "multiplicity_list", "n_atoms_list" etc. - if "xyz_list" in system_data: - if not isinstance(system_data["xyz_list"][0], str): - system_data["xyz_list"] = ["\n".join(s) - for s in system_data["xyz_list"]] + if "xyz_list" in system_data and \ + isinstance(system_data["xyz_list"], (list, tuple)): + system_data["xyz_list"] = [ + "\n".join(xyz) if not isinstance(xyz, str) else xyz + for xyz in system_data["xyz_list"] + ] # ensure that xyz coordinates are a string if "xyz" in system_data and not isinstance(system_data["xyz"], str): system_data["xyz"] = "\n".join(system_data["xyz"]) # get a name for the molecule + name = molname if "name" in system_data: name = system_data["name"] del system_data["name"] - elif molname is None: + if name is None: log.critical("Name not specified in benchmark entry and not " "provided as argument to the method.", "Molecule: from_benchmark") - else: - name = molname properties = benchmark_entry.get("properties", None) return cls(name, benchmark_id, system_data, properties) @@ -154,6 +155,8 @@ def remove(self, key, *values) -> 'MoleculeList': return self._filter(key, lambda v: v not in values) def apply_stochiometry(self, stochiometry: dict) -> 'MoleculeList': + # TODO: some explanation: e.g., what is the expected form of the + # stochiometry dict? combined_list = MoleculeList() def find_mol(name): @@ -351,6 +354,7 @@ def _join_state_data(self, dst_state, src_state, factor): else: pdict = dst_state[keydict[val["type"]]] if isinstance(property_value, (list, tuple)): + assert isinstance(pdict["value"], (tuple, list)) pdict["value"] = [p0 + p1 for p0, p1 in zip(property_value, pdict["value"])] else: