diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index bef1fe5..bb71e85 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -117,6 +117,25 @@ Explicit conversion versions, the corresponding data is not copied. IMAS-Python provides logging to indicate when this happens. +.. rubric:: DD3 -> DD4 special rule: name + identifier -> description + name (GH#59) + +IMAS‑Python implements an additional explicit conversion rule (see GH#59) to improve +migration of Machine Description parts of IDSs when moving from major version 3 to 4. +The rule targets simple sibling pairs on the same parent that provide both a "name" +and an "identifier" field and that are NOT part of an "identifier structure" (the +parent must not also have an "index" sibling). When applicable the rule performs the +following renames during explicit DD3->DD4 conversion: + +- DD3: parent/name -> DD4: parent/description +- DD3: parent/identifier -> DD4: parent/name + +The conversion is applied only when the corresponding target fields exist in the +DD4 definition and when no earlier mapping already covers the same paths. This +is performed by the explicit conversion machinery (for example via +imas.convert_ids or DBEntry explicit conversion) and is not guaranteed to be +applied by automatic conversion when reading/writing from a backend. + +In some cases like the one above, reverse conversion is also allowed(DD 4.0.0 -> 3.41.1) .. _`Supported conversions`: diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 559e8a3..72318a9 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -201,6 +201,10 @@ def _build_map(self, old: Element, new: Element) -> None: old_path_set = set(old_paths) new_path_set = set(new_paths) + # expose the path->Element maps as members so other methods can reuse them + self.old_paths = old_paths + self.new_paths = new_paths + def process_parent_renames(path: str) -> str: # Apply any parent AoS/structure rename # Loop in reverse order to find the closest parent which was renamed: @@ -222,20 +226,6 @@ def get_old_path(path: str, previous_name: str) -> str: old_path = previous_name return process_parent_renames(old_path) - def add_rename(old_path: str, new_path: str): - old_item = old_paths[old_path] - new_item = new_paths[new_path] - self.new_to_old[new_path] = ( - old_path, - _get_tbp(old_item, old_paths), - _get_ctxpath(old_path, old_paths), - ) - self.old_to_new[old_path] = ( - new_path, - _get_tbp(new_item, new_paths), - _get_ctxpath(new_path, new_paths), - ) - # Iterate through all NBC metadata and add entries for new_item in new.iterfind(".//field[@change_nbc_description]"): new_path = new_item.get("path") @@ -275,14 +265,16 @@ def add_rename(old_path: str, new_path: str): self.version_old, ) elif self._check_data_type(old_item, new_item): - add_rename(old_path, new_path) + # use class helper to register simple renames and + # reciprocal mappings + self._add_rename(old_path, new_path) if old_item.get("data_type") in DDVersionMap.STRUCTURE_TYPES: # Add entries for common sub-elements for path in old_paths: if path.startswith(old_path): npath = path.replace(old_path, new_path, 1) if npath in new_path_set: - add_rename(path, npath) + self._add_rename(path, npath) elif nbc_description == "type_changed": pass # We will handle this (if possible) in self._check_data_type elif nbc_description == "repeat_children_first_point": @@ -334,6 +326,28 @@ def add_rename(old_path: str, new_path: str): if self.version_old.major == 3 and new_version and new_version.major == 4: self._apply_3to4_conversion(old, new) + def _add_rename(self, old_path: str, new_path: str) -> None: + """Register a simple rename from old_path -> new_path using the + path->Element maps stored on the instance (self.old_paths/self.new_paths). + This will also add the reciprocal mapping when possible. + """ + old_item = self.old_paths[old_path] + new_item = self.new_paths[new_path] + + # forward mapping + self.old_to_new[old_path] = ( + new_path, + _get_tbp(new_item, self.new_paths), + _get_ctxpath(new_path, self.new_paths), + ) + + # reciprocal mapping + self.new_to_old[new_path] = ( + old_path, + _get_tbp(old_item, self.old_paths), + _get_ctxpath(old_path, self.old_paths), + ) + def _apply_3to4_conversion(self, old: Element, new: Element) -> None: # Postprocessing for COCOS definition change: cocos_paths = [] @@ -391,6 +405,46 @@ def _apply_3to4_conversion(self, old: Element, new: Element) -> None: to_update[p] = v self.old_to_new.path.update(to_update) + # GH#59: To improve further the conversion of DD3 to DD4, especially the + # Machine Description part of the IDSs, we would like to add a 3to4 specific + # rule to convert any siblings name + identifier (that are not part of an + # identifier structure, meaning that there is no index sibling) into + # description + name. Meaning: + # parent/name (DD3) -> parent/description (DD4) + # parent/identifier (DD3) -> parent/name (DD4) + # Only perform the mapping if the corresponding target fields exist in the + # new DD and if we don't already have a mapping for the involved paths. + # use self.old_paths and self.new_paths set in _build_map + for p in self.old_paths: + # look for name children + if not p.endswith("/name"): + continue + parent = p.rsplit("/", 1)[0] + name_path = f"{parent}/name" + id_path = f"{parent}/identifier" + index_path = f"{parent}/index" + desc_path = f"{parent}/description" + new_name_path = name_path + + # If neither 'name' nor 'identifier' existed in the old DD, skip this parent + if name_path not in self.old_paths or id_path not in self.old_paths: + continue + # exclude identifier-structure (has index sibling) + if index_path in self.old_paths: + continue + + # Ensure the candidate target fields exist in the new DD + if desc_path not in self.new_paths or new_name_path not in self.new_paths: + continue + + # Map DD3 name -> DD4 description + if name_path not in self.old_to_new.path: + self._add_rename(name_path, desc_path) + + # Map DD3 identifier -> DD4 name + if id_path in self.old_to_new.path: + self._add_rename(id_path, new_name_path) + def _map_missing(self, is_new: bool, missing_paths: Set[str]): rename_map = self.new_to_old if is_new else self.old_to_new # Find all structures which have a renamed sub-item diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index af6f3a5..118cd26 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -533,6 +533,48 @@ def test_3to4_migrate_deprecated_fields(): # GH#55 assert cp4.profiles_1d[0].ion[0].name == "y" +def test_3to4_name_identifier_mapping_magnetics(): + # Create source IDS using DD 3.40.1 + factory = IDSFactory("3.40.1") + + src = factory.magnetics() + src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + # Populate a parent that has name + identifier (no 'index' sibling) + src.b_field_pol_probe.resize(1) + src.b_field_pol_probe[0].name = "TEST_NAME" + src.b_field_pol_probe[0].identifier = "TEST_IDENTIFIER" + + # Convert to DD 4.0.0 + dst = convert_ids(src, "4.0.0") + + # DD3 name -> DD4 description + assert dst.b_field_pol_probe[0].description == "TEST_NAME" + + # DD3 identifier -> DD4 name + assert dst.b_field_pol_probe[0].name == "TEST_IDENTIFIER" + + +def test_4to3_name_identifier_mapping_magnetics(): + # Create source IDS using DD 4.0.0 + factory = IDSFactory("4.0.0") + + src = factory.magnetics() + src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + # Populate a parent that has description + name (no 'index' sibling) + src.b_field_pol_probe.resize(1) + src.b_field_pol_probe[0].description = "TEST_DESCRIPTION" + src.b_field_pol_probe[0].name = "TEST_NAME" + + # Convert to DD 3.40.1 + dst = convert_ids(src, "3.40.1") + + # DD4 description -> DD3 name + assert dst.b_field_pol_probe[0].name == "TEST_DESCRIPTION" + + # DD4 name -> DD3 identifier + assert dst.b_field_pol_probe[0].identifier == "TEST_NAME" + + def test_3to4_cocos_hardcoded_paths(): # Check for existence in 3.42.0 factory = IDSFactory("3.42.0")