Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/source/multi-dd.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,25 @@ Explicit conversion
versions, the corresponding data is not copied. IMAS-Python provides logging to indicate
when this happens.

.. rubric:: DD3 -> DD4 special rule: name + identifier -> description + name (GH#59)

IMAS‑Python implements an additional explicit conversion rule (see GH#59) to improve
migration of Machine Description parts of IDSs when moving from major version 3 to 4.
The rule targets simple sibling pairs on the same parent that provide both a "name"
and an "identifier" field and that are NOT part of an "identifier structure" (the
parent must not also have an "index" sibling). When applicable the rule performs the
following renames during explicit DD3->DD4 conversion:

- DD3: parent/name -> DD4: parent/description
- DD3: parent/identifier -> DD4: parent/name

The conversion is applied only when the corresponding target fields exist in the
DD4 definition and when no earlier mapping already covers the same paths. This
is performed by the explicit conversion machinery (for example via
imas.convert_ids or DBEntry explicit conversion) and is not guaranteed to be
applied by automatic conversion when reading/writing from a backend.

In some cases like the one above, reverse conversion is also allowed(DD 4.0.0 -> 3.41.1)

.. _`Supported conversions`:

Expand Down
86 changes: 70 additions & 16 deletions imas/ids_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ def _build_map(self, old: Element, new: Element) -> None:
old_path_set = set(old_paths)
new_path_set = set(new_paths)

# expose the path->Element maps as members so other methods can reuse them
self.old_paths = old_paths
self.new_paths = new_paths

def process_parent_renames(path: str) -> str:
# Apply any parent AoS/structure rename
# Loop in reverse order to find the closest parent which was renamed:
Expand All @@ -222,20 +226,6 @@ def get_old_path(path: str, previous_name: str) -> str:
old_path = previous_name
return process_parent_renames(old_path)

def add_rename(old_path: str, new_path: str):
old_item = old_paths[old_path]
new_item = new_paths[new_path]
self.new_to_old[new_path] = (
old_path,
_get_tbp(old_item, old_paths),
_get_ctxpath(old_path, old_paths),
)
self.old_to_new[old_path] = (
new_path,
_get_tbp(new_item, new_paths),
_get_ctxpath(new_path, new_paths),
)

# Iterate through all NBC metadata and add entries
for new_item in new.iterfind(".//field[@change_nbc_description]"):
new_path = new_item.get("path")
Expand Down Expand Up @@ -275,14 +265,16 @@ def add_rename(old_path: str, new_path: str):
self.version_old,
)
elif self._check_data_type(old_item, new_item):
add_rename(old_path, new_path)
# use class helper to register simple renames and
# reciprocal mappings
self._add_rename(old_path, new_path)
if old_item.get("data_type") in DDVersionMap.STRUCTURE_TYPES:
# Add entries for common sub-elements
for path in old_paths:
if path.startswith(old_path):
npath = path.replace(old_path, new_path, 1)
if npath in new_path_set:
add_rename(path, npath)
self._add_rename(path, npath)
elif nbc_description == "type_changed":
pass # We will handle this (if possible) in self._check_data_type
elif nbc_description == "repeat_children_first_point":
Expand Down Expand Up @@ -334,6 +326,28 @@ def add_rename(old_path: str, new_path: str):
if self.version_old.major == 3 and new_version and new_version.major == 4:
self._apply_3to4_conversion(old, new)

def _add_rename(self, old_path: str, new_path: str) -> None:
"""Register a simple rename from old_path -> new_path using the
path->Element maps stored on the instance (self.old_paths/self.new_paths).
This will also add the reciprocal mapping when possible.
"""
old_item = self.old_paths[old_path]
new_item = self.new_paths[new_path]

# forward mapping
self.old_to_new[old_path] = (
new_path,
_get_tbp(new_item, self.new_paths),
_get_ctxpath(new_path, self.new_paths),
)

# reciprocal mapping
self.new_to_old[new_path] = (
old_path,
_get_tbp(old_item, self.old_paths),
_get_ctxpath(old_path, self.old_paths),
)

def _apply_3to4_conversion(self, old: Element, new: Element) -> None:
# Postprocessing for COCOS definition change:
cocos_paths = []
Expand Down Expand Up @@ -391,6 +405,46 @@ def _apply_3to4_conversion(self, old: Element, new: Element) -> None:
to_update[p] = v
self.old_to_new.path.update(to_update)

# GH#59: To improve further the conversion of DD3 to DD4, especially the
# Machine Description part of the IDSs, we would like to add a 3to4 specific
# rule to convert any siblings name + identifier (that are not part of an
# identifier structure, meaning that there is no index sibling) into
# description + name. Meaning:
# parent/name (DD3) -> parent/description (DD4)
# parent/identifier (DD3) -> parent/name (DD4)
# Only perform the mapping if the corresponding target fields exist in the
# new DD and if we don't already have a mapping for the involved paths.
# use self.old_paths and self.new_paths set in _build_map
for p in self.old_paths:
# look for name children
if not p.endswith("/name"):
continue
parent = p.rsplit("/", 1)[0]
name_path = f"{parent}/name"
id_path = f"{parent}/identifier"
index_path = f"{parent}/index"
desc_path = f"{parent}/description"
new_name_path = name_path

# If neither 'name' nor 'identifier' existed in the old DD, skip this parent
if name_path not in self.old_paths or id_path not in self.old_paths:
continue
# exclude identifier-structure (has index sibling)
if index_path in self.old_paths:
continue

# Ensure the candidate target fields exist in the new DD
if desc_path not in self.new_paths or new_name_path not in self.new_paths:
continue

# Map DD3 name -> DD4 description
if name_path not in self.old_to_new.path:
self._add_rename(name_path, desc_path)

# Map DD3 identifier -> DD4 name
if id_path in self.old_to_new.path:
self._add_rename(id_path, new_name_path)

def _map_missing(self, is_new: bool, missing_paths: Set[str]):
rename_map = self.new_to_old if is_new else self.old_to_new
# Find all structures which have a renamed sub-item
Expand Down
42 changes: 42 additions & 0 deletions imas/test/test_ids_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,48 @@ def test_3to4_migrate_deprecated_fields(): # GH#55
assert cp4.profiles_1d[0].ion[0].name == "y"


def test_3to4_name_identifier_mapping_magnetics():
# Create source IDS using DD 3.40.1
factory = IDSFactory("3.40.1")

src = factory.magnetics()
src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
# Populate a parent that has name + identifier (no 'index' sibling)
src.b_field_pol_probe.resize(1)
src.b_field_pol_probe[0].name = "TEST_NAME"
src.b_field_pol_probe[0].identifier = "TEST_IDENTIFIER"

# Convert to DD 4.0.0
dst = convert_ids(src, "4.0.0")

# DD3 name -> DD4 description
assert dst.b_field_pol_probe[0].description == "TEST_NAME"

# DD3 identifier -> DD4 name
assert dst.b_field_pol_probe[0].name == "TEST_IDENTIFIER"


def test_4to3_name_identifier_mapping_magnetics():
# Create source IDS using DD 4.0.0
factory = IDSFactory("4.0.0")

src = factory.magnetics()
src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS
# Populate a parent that has description + name (no 'index' sibling)
src.b_field_pol_probe.resize(1)
src.b_field_pol_probe[0].description = "TEST_DESCRIPTION"
src.b_field_pol_probe[0].name = "TEST_NAME"

# Convert to DD 3.40.1
dst = convert_ids(src, "3.40.1")

# DD4 description -> DD3 name
assert dst.b_field_pol_probe[0].name == "TEST_DESCRIPTION"

# DD4 name -> DD3 identifier
assert dst.b_field_pol_probe[0].identifier == "TEST_NAME"


def test_3to4_cocos_hardcoded_paths():
# Check for existence in 3.42.0
factory = IDSFactory("3.42.0")
Expand Down
Loading