From 5b02d9618e07f50021cc98a007ad4fb248964192 Mon Sep 17 00:00:00 2001 From: deucalioncodes Date: Sat, 21 Mar 2026 10:27:17 +0100 Subject: [PATCH 1/4] fix: add for_export param to serialize() and load_some resilience (closes #4) - serialize(for_export=True) skips OneToMany relations (reconstructed from reverse ManyToOne) and serializes OneToOne only on one deterministic side (alphabetically-earlier entity type carries the reference). This prevents circular/dangling references in exported JSON. - load_some() now catches ValueError/AttributeError per entity, skipping broken entities with dangling relation refs instead of crashing the batch. - 8 new tests: 4 for for_export behavior + 4 issue #4 regression tests covering bidirectional OneToOne import, OneToMany dangling refs, export-eliminates-dangling, and full round-trip. --- ic_python_db/entity.py | 34 ++- tests/src/tests/test_serialization.py | 365 ++++++++++++++++++++++++++ 2 files changed, 394 insertions(+), 5 deletions(-) diff --git a/ic_python_db/entity.py b/ic_python_db/entity.py index 893897d..fc50760 100644 --- a/ic_python_db/entity.py +++ b/ic_python_db/entity.py @@ -511,9 +511,14 @@ def load_some( while len(ret) < count and from_id <= cls.max_id(): logger.info(f"Loading entity {from_id}") - entity = cls.load(str(from_id)) - if entity: - ret.append(entity) + try: + entity = cls.load(str(from_id)) + if entity: + ret.append(entity) + except (ValueError, AttributeError) as e: + # Skip entities with broken/dangling relation references + # (full fix: issue #4 — lazy relation resolution) + logger.warning(f"Skipping {cls.__name__}@{from_id}: {e}") from_id += 1 return ret @@ -559,9 +564,16 @@ def delete(self) -> None: # Remove from context self.__class__._context.discard(self) - def serialize(self) -> Dict[str, Any]: + def serialize(self, for_export: bool = False) -> Dict[str, Any]: """Convert the entity to a serializable dictionary. + Args: + for_export: If True, produce a portable representation suitable for + importing into a different database/canister. This skips + OneToMany relations (reconstructed from reverse ManyToOne) and + serializes OneToOne only on one deterministic side to avoid + circular reference issues during import. + Returns: Dict containing the entity's serializable data """ @@ -603,7 +615,19 @@ def get_entity_reference(entity): if rel_entities: # Check if this is a *ToMany relation that should always be a list rel_prop = getattr(self.__class__, rel_name, None) - from ic_python_db.properties import ManyToMany, OneToMany + from ic_python_db.properties import ManyToMany, ManyToOne, OneToMany, OneToOne + + if for_export: + # Skip OneToMany — always reconstructed from reverse ManyToOne + if isinstance(rel_prop, OneToMany): + continue + # For OneToOne bilateral, only serialize on one deterministic side: + # the entity whose type name is alphabetically <= the target type. + # This ensures exactly one side carries the reference. + if isinstance(rel_prop, OneToOne): + target_type = rel_entities[0]._type if rel_entities else None + if target_type and self._type > target_type: + continue is_to_many = isinstance(rel_prop, (OneToMany, ManyToMany)) diff --git a/tests/src/tests/test_serialization.py b/tests/src/tests/test_serialization.py index 79c9738..b30b8fc 100644 --- a/tests/src/tests/test_serialization.py +++ b/tests/src/tests/test_serialization.py @@ -706,6 +706,371 @@ class Book2(Entity): assert recreated_book.author.name == "Alice" + def test_serialize_for_export_skips_one_to_many(self): + """Test that for_export=True skips OneToMany relations.""" + Database.get_instance().clear() + + parent = Parent(name="Alice") + child1 = Child(name="Bob") + child2 = Child(name="Charlie") + + parent.children = [child1, child2] + child1.parent = parent + child2.parent = parent + + # Normal serialize includes OneToMany + normal_data = parent.serialize() + assert "children" in normal_data, "Normal serialize should include OneToMany" + + # Export serialize skips OneToMany + export_data = parent.serialize(for_export=True) + assert "children" not in export_data, "Export serialize should skip OneToMany" + + # ManyToOne is always included + child_data = child1.serialize(for_export=True) + assert "parent" in child_data, "Export serialize should keep ManyToOne" + assert child_data["parent"] == parent._id + + def test_serialize_for_export_one_to_one_deterministic(self): + """Test that for_export=True serializes OneToOne on only one deterministic side. + + Rule: serialize only if self._type <= target._type (alphabetically). + """ + Database.get_instance().clear() + + parent = Parent(name="Alice") + child1 = Child(name="Bob") + + parent.favorite_child = child1 + + # "Child" < "Parent" → Child serializes favorite_parent + child_export = child1.serialize(for_export=True) + assert "favorite_parent" in child_export, ( + "Child (alphabetically earlier) should serialize OneToOne to Parent" + ) + + # "Parent" > "Child" → Parent does NOT serialize favorite_child + parent_export = parent.serialize(for_export=True) + assert "favorite_child" not in parent_export, ( + "Parent (alphabetically later) should skip OneToOne to Child" + ) + + # Normal serialize includes both sides + parent_normal = parent.serialize() + assert "favorite_child" in parent_normal + + def test_serialize_for_export_round_trip(self): + """Test that for_export serialization can be imported back correctly. + + Import order: Parent first, then Children (children carry ManyToOne refs). + OneToMany on Parent is reconstructed from ManyToOne on Children. + """ + Database.get_instance().clear() + + parent = Parent(name="Alice") + child1 = Child(name="Bob") + child2 = Child(name="Charlie") + + parent.children = [child1, child2] + parent.favorite_child = child1 + child1.siblings = [child2] + + # Serialize for export + parent_data = parent.serialize(for_export=True) + child1_data = child1.serialize(for_export=True) + child2_data = child2.serialize(for_export=True) + + # Verify export format + assert "children" not in parent_data, "OneToMany should be skipped" + assert "favorite_child" not in parent_data, "OneToOne (Parent>Child) should be skipped" + assert "parent" in child1_data, "ManyToOne should be present" + assert "favorite_parent" in child1_data, "OneToOne (Child "Member" → User.member is skipped). + Import order: User first (no member ref), Member second (user ref resolves). + """ + Database.get_instance().clear() + + # Define User↔Member with bidirectional OneToOne (mirrors real realm entities) + class User(Entity): + __alias__ = "name" + name = String() + member = OneToOne("Member4", "user") + + class Member4(Entity): + __alias__ = "id" + id = String() + user = OneToOne("User", "member") + + # Create linked entities + user = User(name="system") + member = Member4(id="mem_9f03f2ee") + member.user = user + + # Verify both sides are set + assert user.member == member + assert member.user == user + + # ── Part 1: for_export=True produces importable JSON ── + user_data = user.serialize(for_export=True) + member_data = member.serialize(for_export=True) + + # User should NOT have 'member' (since "User" > "Member4") + assert "member" not in user_data, ( + f"for_export should skip OneToOne on alphabetically-later side; got {user_data}" + ) + # Member4 SHOULD have 'user' (since "Member4" < "User") + assert "user" in member_data, ( + f"for_export should keep OneToOne on alphabetically-earlier side; got {member_data}" + ) + assert member_data["user"] == "system" # alias + + # Clear and reimport in dependency order + Database.get_instance().clear() + recreated_user = User.deserialize(user_data) # No member ref → no crash + recreated_member = Member4.deserialize(member_data) # user ref → resolves to existing User + + # Both sides should be reconstructed + assert recreated_member.user == recreated_user, "ManyToOne should resolve" + assert recreated_user.member == recreated_member, "Reverse OneToOne should be set" + + # ── Part 2: OLD serialize() would crash on wrong import order ── + Database.get_instance().clear() + + user2 = User(name="alice") + member2 = Member4(id="mem_deadbeef") + member2.user = user2 + + # Normal serialize includes BOTH sides + user2_full = user2.serialize() + member2_full = member2.serialize() + assert "member" in user2_full, "Normal serialize should include both sides" + + # Importing User first with normal data crashes because Member doesn't exist + Database.get_instance().clear() + try: + User.deserialize(user2_full) # has member: "mem_deadbeef" → Member4 doesn't exist + # If deserialize silently skips, that's also fine (it catches ValueError) + except ValueError as e: + assert "mem_deadbeef" in str(e), f"Should reference the missing member alias: {e}" + + def test_issue4_one_to_many_dangling_refs_load_some(self): + """Regression test for issue #4: OneToMany with dangling refs crashes load_some. + + Reproduces the runtime crash: Token stored with balances: [4, 5, 6] but + WalletBalance entities were deleted. Token.load_some() crashes with: + ValueError: No entity of types WalletBalance found with ID or name '4' + + The fix: load_some() catches ValueError/AttributeError and skips broken + entities instead of crashing the entire batch. + """ + Database.get_instance().clear() + + class Account(Entity): + name = String() + entries = OneToMany("Entry4", "account") + + class Entry4(Entity): + label = String() + account = ManyToOne("Account", "entries") + + # Create account with entries + acct = Account(name="Treasury") + e1 = Entry4(label="tx1") + e2 = Entry4(label="tx2") + e1.account = acct + e2.account = acct + assert len(acct.entries) == 2 + + # Serialize WITH OneToMany (internal storage format) + acct_data = acct.serialize() + assert "entries" in acct_data + + # Now delete the entries from DB but leave the account's stored data intact + e1.delete() + e2.delete() + + # Clear entity registry so load() reads from DB + Database.get_instance()._entity_registry = {} + + # load_some should NOT crash — it should skip broken entities + loaded = Account.load_some(from_id=1, count=10) + # The account itself should still load (its entries will fail silently) + assert len(loaded) >= 0, "load_some should not crash on dangling OneToMany refs" + + def test_issue4_for_export_eliminates_dangling_one_to_many(self): + """Regression test: for_export=True prevents OneToMany dangling refs entirely. + + If Token was serialized with for_export=True, the 'balances' OneToMany field + is not included. On import, Token has no forward refs to WalletBalance, + so there's nothing to dangle. + """ + Database.get_instance().clear() + + class Token4(Entity): + name = String() + balances = OneToMany("Balance4", "token") + + class Balance4(Entity): + amount = String() + token = ManyToOne("Token4", "balances") + + token = Token4(name="ICP") + b1 = Balance4(amount="100") + b2 = Balance4(amount="200") + b1.token = token + b2.token = token + + # for_export skips OneToMany + export_data = token.serialize(for_export=True) + assert "balances" not in export_data + + # Import token without balance refs — no crash possible + Database.get_instance().clear() + reimported_token = Token4.deserialize(export_data) + assert reimported_token.name == "ICP" + assert len(reimported_token.balances) == 0 # No balances yet + + # Now import balances with ManyToOne ref — reconstructs OneToMany + b1_data = b1.serialize(for_export=True) + b2_data = b2.serialize(for_export=True) + Balance4.deserialize(b1_data) + Balance4.deserialize(b2_data) + + assert len(reimported_token.balances) == 2, ( + "OneToMany should be reconstructed from ManyToOne during import" + ) + + def test_issue4_full_round_trip_bidirectional(self): + """End-to-end test: serialize(for_export=True) → clear → deserialize. + + Models the exact realm workflow: generate entities in memory, + serialize to JSON, clear DB, import in dependency order. + All relations should be fully reconstructed. + """ + Database.get_instance().clear() + + class Realm4(Entity): + __alias__ = "name" + name = String() + users = OneToMany("User4", "realm") + + class User4(Entity): + __alias__ = "name" + name = String() + realm = ManyToOne("Realm4", "users") + member = OneToOne("Member44", "user") + human = OneToOne("Human4", "user") + + class Human4(Entity): + __alias__ = "name" + name = String() + user = OneToOne("User4", "human") + + class Member44(Entity): + __alias__ = "alias" + alias = String() + user = OneToOne("User4", "member") + + # Generate data (mimics generator.py) + realm = Realm4(name="Agora") + users = [User4(name=f"user_{i}", realm=realm) for i in range(3)] + humans = [Human4(name=f"human_{i}", user=users[i]) for i in range(3)] + members = [Member44(alias=f"mem_{i}", user=users[i]) for i in range(3)] + + # Verify in-memory relations + assert len(realm.users) == 3 + for i in range(3): + assert users[i].member == members[i] + assert users[i].human == humans[i] + + # Serialize for export (dependency order: Realm → Users → Humans → Members) + all_data = ( + [realm.serialize(for_export=True)] + + [u.serialize(for_export=True) for u in users] + + [h.serialize(for_export=True) for h in humans] + + [m.serialize(for_export=True) for m in members] + ) + + # Verify no OneToMany or wrong-side OneToOne in export + realm_data = all_data[0] + assert "users" not in realm_data, "OneToMany should not be in export" + user_data = all_data[1] + # "User4" > "Human4" → User4 should NOT have human + assert "human" not in user_data, "OneToOne (User4>Human4) should be skipped" + # "User4" > "Member44" → User4 should NOT have member + assert "member" not in user_data, "OneToOne (User4>Member44) should be skipped" + + # Clear DB completely + Database.get_instance().clear() + + # Import in dependency order — should NOT crash + # Use specific classes (Database.clear() wipes entity type registry, + # so generic Entity.deserialize can't resolve locally-defined classes) + type_map = { + "Realm4": Realm4, "User4": User4, + "Human4": Human4, "Member44": Member44, + } + for item in all_data: + type_map[item["_type"]].deserialize(item) + + # Verify all relations are reconstructed + loaded_realm = Realm4["Agora"] + assert loaded_realm is not None + assert len(loaded_realm.users) == 3, "OneToMany should be reconstructed" + + loaded_user0 = User4["user_0"] + assert loaded_user0.realm == loaded_realm, "ManyToOne should resolve" + assert loaded_user0.human is not None, "OneToOne reverse should be set" + assert loaded_user0.human.name == "human_0" + assert loaded_user0.member is not None, "OneToOne reverse should be set" + assert loaded_user0.member.alias == "mem_0" + + def run(test_name: str = None, test_var: str = None): tester = Tester(TestSerialization) return tester.run_tests() From d77bd473730edec314e1992df600ceb75a48e584 Mon Sep 17 00:00:00 2001 From: deucalioncodes Date: Sat, 21 Mar 2026 10:30:32 +0100 Subject: [PATCH 2/4] style: fix black formatting and flake8 unused variable --- ic_python_db/entity.py | 7 ++- tests/src/tests/test_serialization.py | 76 +++++++++++++++++---------- 2 files changed, 53 insertions(+), 30 deletions(-) diff --git a/ic_python_db/entity.py b/ic_python_db/entity.py index fc50760..1c70962 100644 --- a/ic_python_db/entity.py +++ b/ic_python_db/entity.py @@ -615,7 +615,12 @@ def get_entity_reference(entity): if rel_entities: # Check if this is a *ToMany relation that should always be a list rel_prop = getattr(self.__class__, rel_name, None) - from ic_python_db.properties import ManyToMany, ManyToOne, OneToMany, OneToOne + from ic_python_db.properties import ( + ManyToMany, + ManyToOne, + OneToMany, + OneToOne, + ) if for_export: # Skip OneToMany — always reconstructed from reverse ManyToOne diff --git a/tests/src/tests/test_serialization.py b/tests/src/tests/test_serialization.py index b30b8fc..85b7ea7 100644 --- a/tests/src/tests/test_serialization.py +++ b/tests/src/tests/test_serialization.py @@ -705,7 +705,6 @@ class Book2(Entity): assert recreated_book.author is not None assert recreated_book.author.name == "Alice" - def test_serialize_for_export_skips_one_to_many(self): """Test that for_export=True skips OneToMany relations.""" Database.get_instance().clear() @@ -745,15 +744,15 @@ def test_serialize_for_export_one_to_one_deterministic(self): # "Child" < "Parent" → Child serializes favorite_parent child_export = child1.serialize(for_export=True) - assert "favorite_parent" in child_export, ( - "Child (alphabetically earlier) should serialize OneToOne to Parent" - ) + assert ( + "favorite_parent" in child_export + ), "Child (alphabetically earlier) should serialize OneToOne to Parent" # "Parent" > "Child" → Parent does NOT serialize favorite_child parent_export = parent.serialize(for_export=True) - assert "favorite_child" not in parent_export, ( - "Parent (alphabetically later) should skip OneToOne to Child" - ) + assert ( + "favorite_child" not in parent_export + ), "Parent (alphabetically later) should skip OneToOne to Child" # Normal serialize includes both sides parent_normal = parent.serialize() @@ -782,9 +781,13 @@ def test_serialize_for_export_round_trip(self): # Verify export format assert "children" not in parent_data, "OneToMany should be skipped" - assert "favorite_child" not in parent_data, "OneToOne (Parent>Child) should be skipped" + assert ( + "favorite_child" not in parent_data + ), "OneToOne (Parent>Child) should be skipped" assert "parent" in child1_data, "ManyToOne should be present" - assert "favorite_parent" in child1_data, "OneToOne (Child "Member4") - assert "member" not in user_data, ( - f"for_export should skip OneToOne on alphabetically-later side; got {user_data}" - ) + assert ( + "member" not in user_data + ), f"for_export should skip OneToOne on alphabetically-later side; got {user_data}" # Member4 SHOULD have 'user' (since "Member4" < "User") - assert "user" in member_data, ( - f"for_export should keep OneToOne on alphabetically-earlier side; got {member_data}" - ) + assert ( + "user" in member_data + ), f"for_export should keep OneToOne on alphabetically-earlier side; got {member_data}" assert member_data["user"] == "system" # alias # Clear and reimport in dependency order Database.get_instance().clear() recreated_user = User.deserialize(user_data) # No member ref → no crash - recreated_member = Member4.deserialize(member_data) # user ref → resolves to existing User + recreated_member = Member4.deserialize( + member_data + ) # user ref → resolves to existing User # Both sides should be reconstructed assert recreated_member.user == recreated_user, "ManyToOne should resolve" - assert recreated_user.member == recreated_member, "Reverse OneToOne should be set" + assert ( + recreated_user.member == recreated_member + ), "Reverse OneToOne should be set" # ── Part 2: OLD serialize() would crash on wrong import order ── Database.get_instance().clear() @@ -887,16 +899,20 @@ class Member4(Entity): # Normal serialize includes BOTH sides user2_full = user2.serialize() - member2_full = member2.serialize() + _ = member2.serialize() # noqa: F841 assert "member" in user2_full, "Normal serialize should include both sides" # Importing User first with normal data crashes because Member doesn't exist Database.get_instance().clear() try: - User.deserialize(user2_full) # has member: "mem_deadbeef" → Member4 doesn't exist + User.deserialize( + user2_full + ) # has member: "mem_deadbeef" → Member4 doesn't exist # If deserialize silently skips, that's also fine (it catches ValueError) except ValueError as e: - assert "mem_deadbeef" in str(e), f"Should reference the missing member alias: {e}" + assert "mem_deadbeef" in str( + e + ), f"Should reference the missing member alias: {e}" def test_issue4_one_to_many_dangling_refs_load_some(self): """Regression test for issue #4: OneToMany with dangling refs crashes load_some. @@ -981,9 +997,9 @@ class Balance4(Entity): Balance4.deserialize(b1_data) Balance4.deserialize(b2_data) - assert len(reimported_token.balances) == 2, ( - "OneToMany should be reconstructed from ManyToOne during import" - ) + assert ( + len(reimported_token.balances) == 2 + ), "OneToMany should be reconstructed from ManyToOne during import" def test_issue4_full_round_trip_bidirectional(self): """End-to-end test: serialize(for_export=True) → clear → deserialize. @@ -1052,8 +1068,10 @@ class Member44(Entity): # Use specific classes (Database.clear() wipes entity type registry, # so generic Entity.deserialize can't resolve locally-defined classes) type_map = { - "Realm4": Realm4, "User4": User4, - "Human4": Human4, "Member44": Member44, + "Realm4": Realm4, + "User4": User4, + "Human4": Human4, + "Member44": Member44, } for item in all_data: type_map[item["_type"]].deserialize(item) From 814c3bea3f84fb0466548cf2369cc9994e0c54c0 Mon Sep 17 00:00:00 2001 From: deucalioncodes Date: Sat, 21 Mar 2026 10:48:02 +0100 Subject: [PATCH 3/4] =?UTF-8?q?refactor:=20remove=20for=5Fexport=20param?= =?UTF-8?q?=20=E2=80=94=20serialize()=20always=20skips=20OneToMany=20and?= =?UTF-8?q?=20picks=20one=20side=20of=20OneToOne?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking change (no production use yet): serialize() now always produces clean output without OneToMany relations and with deterministic OneToOne side selection. This simplifies the API — callers no longer need to pass for_export=True. Updated all tests to match new default behavior. --- ic_python_db/entity.py | 32 ++- tests/src/tests/test_serialization.py | 291 ++++++++------------------ 2 files changed, 99 insertions(+), 224 deletions(-) diff --git a/ic_python_db/entity.py b/ic_python_db/entity.py index 1c70962..6a2ba0a 100644 --- a/ic_python_db/entity.py +++ b/ic_python_db/entity.py @@ -564,15 +564,13 @@ def delete(self) -> None: # Remove from context self.__class__._context.discard(self) - def serialize(self, for_export: bool = False) -> Dict[str, Any]: + def serialize(self) -> Dict[str, Any]: """Convert the entity to a serializable dictionary. - Args: - for_export: If True, produce a portable representation suitable for - importing into a different database/canister. This skips - OneToMany relations (reconstructed from reverse ManyToOne) and - serializes OneToOne only on one deterministic side to avoid - circular reference issues during import. + OneToMany relations are always skipped (they are reconstructed from the + reverse ManyToOne side). For bilateral OneToOne relations, only the + alphabetically-earlier entity type serializes the reference, avoiding + circular dependencies during sequential import. Returns: Dict containing the entity's serializable data @@ -617,22 +615,20 @@ def get_entity_reference(entity): rel_prop = getattr(self.__class__, rel_name, None) from ic_python_db.properties import ( ManyToMany, - ManyToOne, OneToMany, OneToOne, ) - if for_export: - # Skip OneToMany — always reconstructed from reverse ManyToOne - if isinstance(rel_prop, OneToMany): + # Skip OneToMany — always reconstructed from reverse ManyToOne + if isinstance(rel_prop, OneToMany): + continue + # For OneToOne bilateral, only serialize on one deterministic side: + # the entity whose type name is alphabetically <= the target type. + # This ensures exactly one side carries the reference. + if isinstance(rel_prop, OneToOne): + target_type = rel_entities[0]._type if rel_entities else None + if target_type and self._type > target_type: continue - # For OneToOne bilateral, only serialize on one deterministic side: - # the entity whose type name is alphabetically <= the target type. - # This ensures exactly one side carries the reference. - if isinstance(rel_prop, OneToOne): - target_type = rel_entities[0]._type if rel_entities else None - if target_type and self._type > target_type: - continue is_to_many = isinstance(rel_prop, (OneToMany, ManyToMany)) diff --git a/tests/src/tests/test_serialization.py b/tests/src/tests/test_serialization.py index 85b7ea7..da1470e 100644 --- a/tests/src/tests/test_serialization.py +++ b/tests/src/tests/test_serialization.py @@ -32,7 +32,12 @@ def setUp(self): Database.get_instance().clear() def test_serialization_format(self): - """Test that relations are serialized in the correct format.""" + """Test that relations are serialized in the correct format. + + OneToMany is always skipped (reconstructed from reverse ManyToOne). + OneToOne is serialized only on the alphabetically-earlier entity type side. + "Child" < "Parent" → Child serializes favorite_parent; Parent skips favorite_child. + """ # Create entities parent = Parent(name="Alice") @@ -48,21 +53,19 @@ def test_serialization_format(self): parent_data = parent.serialize() child1_data = child1.serialize() - # OneToMany should always be a list, even with single item - assert isinstance( - parent_data["children"], list - ), "OneToMany should serialize as list" - assert parent_data["children"] == [ - child1._id - ], f"Expected ['{child1._id}'], got {parent_data['children']}" + # OneToMany is always skipped in serialize + assert "children" not in parent_data, "OneToMany should be skipped" - # OneToOne should be a single value - assert isinstance( - parent_data["favorite_child"], str - ), "OneToOne should serialize as single value" + # OneToOne: "Parent" > "Child" → Parent skips favorite_child assert ( - parent_data["favorite_child"] == child1._id - ), f"Expected '{child1._id}', got {parent_data['favorite_child']}" + "favorite_child" not in parent_data + ), "OneToOne on alphabetically-later side should be skipped" + + # OneToOne: "Child" < "Parent" → Child serializes favorite_parent + assert isinstance( + child1_data["favorite_parent"], str + ), "OneToOne on alphabetically-earlier side should serialize as single value" + assert child1_data["favorite_parent"] == parent._id # ManyToOne should be a single value assert isinstance( @@ -80,46 +83,12 @@ def test_serialization_format(self): child2._id ], f"Expected ['{child2._id}'], got {child1_data['siblings']}" - # Test string representation of serialized data - parent_str = str(parent_data) - child1_str = str(child1_data) - - # Verify OneToMany appears as list in string format - assert ( - f"'children': ['{child1._id}']" in parent_str - ), f"OneToMany should appear as list in string: {parent_str}" - - # Verify OneToOne appears as single value in string format - assert ( - f"'favorite_child': '{child1._id}'" in parent_str - ), f"OneToOne should appear as single value in string: {parent_str}" - - # Verify ManyToOne appears as single value in string format - assert ( - f"'parent': '{parent._id}'" in child1_str - ), f"ManyToOne should appear as single value in string: {child1_str}" - - # Verify ManyToMany appears as list in string format - assert ( - f"'siblings': ['{child2._id}']" in child1_str - ), f"ManyToMany should appear as list in string: {child1_str}" - # Test with multiple items child3 = Child(name="David") - parent.children = [child1, child3] # OneToMany with multiple items child1.siblings = [child2, child3] # ManyToMany with multiple items - parent_data = parent.serialize() child1_data = child1.serialize() - # Should still be lists - assert isinstance( - parent_data["children"], list - ), "OneToMany should serialize as list" - assert ( - len(parent_data["children"]) == 2 - ), "OneToMany should contain both children" - assert isinstance( child1_data["siblings"], list ), "ManyToMany should serialize as list" @@ -127,10 +96,7 @@ def test_serialization_format(self): len(child1_data["siblings"]) == 2 ), "ManyToMany should contain both siblings" - assert ( - str(parent_data) - == "{'_type': 'Parent', '_id': '1', 'name': 'Alice', 'children': ['1', '3'], 'favorite_child': '1'}" - ) + assert str(parent_data) == "{'_type': 'Parent', '_id': '1', 'name': 'Alice'}" assert ( str(child1_data) == "{'_type': 'Child', '_id': '1', 'name': 'Bob', 'parent': '1', 'favorite_parent': '1', 'siblings': ['2', '3']}" @@ -195,7 +161,12 @@ def test_deserialization(self): assert result._id is not None, "Should auto-generate _id" def test_round_trip_serialization(self): - """Test that serialize -> deserialize produces equivalent entities.""" + """Test that serialize -> deserialize produces equivalent entities. + + Import order: Parent first (no forward refs), then Children (ManyToOne + + OneToOne refs to Parent). OneToMany on Parent is reconstructed from + ManyToOne on Children. + """ Database.get_instance().clear() # Create entities with complex relationships @@ -216,17 +187,23 @@ def test_round_trip_serialization(self): child2_data = child2.serialize() child3_data = child3.serialize() + # Parent should have no forward refs (OneToMany + OneToOne skipped) + assert "children" not in parent_data + assert "favorite_child" not in parent_data + + # Children carry ManyToOne + OneToOne refs + assert "parent" in child1_data + assert "favorite_parent" in child1_data + # Clear and recreate from serialized data Database.get_instance().clear() - # Recreate entities (order matters for relations) + # Import order: Parent first, then Children + recreated_parent = Parent.deserialize(parent_data) Child.deserialize(child3_data) Child.deserialize(child2_data) recreated_child1 = Child.deserialize(child1_data) - recreated_parent = Parent.deserialize(parent_data) - # Relations are attempted to be resolved immediately during deserialize - # (unresolvable relations are silently skipped) # Verify the recreated entities have the same serialized output recreated_parent_data = recreated_parent.serialize() recreated_child1_data = recreated_child1.serialize() @@ -236,7 +213,6 @@ def test_round_trip_serialization(self): print(f"Original child1: {child1_data}") print(f"Recreated child1: {recreated_child1_data}") - # Verify that serialized data matches (allowing for different ordering in many-to-many relations) assert ( recreated_parent_data == parent_data ), f"Parent data mismatch:\nOriginal: {parent_data}\nRecreated: {recreated_parent_data}" @@ -256,7 +232,7 @@ def test_round_trip_serialization(self): assert recreated_parent.name == "Alice" assert recreated_child1.name == "Bob" - # Verify relations are properly restored + # Verify relations are properly restored via reverse links assert len(recreated_parent.children) == 2, "Parent should have 2 children" assert ( recreated_parent.favorite_child is not None @@ -705,8 +681,8 @@ class Book2(Entity): assert recreated_book.author is not None assert recreated_book.author.name == "Alice" - def test_serialize_for_export_skips_one_to_many(self): - """Test that for_export=True skips OneToMany relations.""" + def test_serialize_skips_one_to_many(self): + """Test that serialize() always skips OneToMany relations.""" Database.get_instance().clear() parent = Parent(name="Alice") @@ -717,21 +693,17 @@ def test_serialize_for_export_skips_one_to_many(self): child1.parent = parent child2.parent = parent - # Normal serialize includes OneToMany - normal_data = parent.serialize() - assert "children" in normal_data, "Normal serialize should include OneToMany" - - # Export serialize skips OneToMany - export_data = parent.serialize(for_export=True) - assert "children" not in export_data, "Export serialize should skip OneToMany" + # serialize skips OneToMany + data = parent.serialize() + assert "children" not in data, "serialize should skip OneToMany" # ManyToOne is always included - child_data = child1.serialize(for_export=True) - assert "parent" in child_data, "Export serialize should keep ManyToOne" + child_data = child1.serialize() + assert "parent" in child_data, "serialize should keep ManyToOne" assert child_data["parent"] == parent._id - def test_serialize_for_export_one_to_one_deterministic(self): - """Test that for_export=True serializes OneToOne on only one deterministic side. + def test_serialize_one_to_one_deterministic(self): + """Test that serialize() emits OneToOne on only one deterministic side. Rule: serialize only if self._type <= target._type (alphabetically). """ @@ -743,78 +715,19 @@ def test_serialize_for_export_one_to_one_deterministic(self): parent.favorite_child = child1 # "Child" < "Parent" → Child serializes favorite_parent - child_export = child1.serialize(for_export=True) + child_data = child1.serialize() assert ( - "favorite_parent" in child_export + "favorite_parent" in child_data ), "Child (alphabetically earlier) should serialize OneToOne to Parent" # "Parent" > "Child" → Parent does NOT serialize favorite_child - parent_export = parent.serialize(for_export=True) - assert ( - "favorite_child" not in parent_export - ), "Parent (alphabetically later) should skip OneToOne to Child" - - # Normal serialize includes both sides - parent_normal = parent.serialize() - assert "favorite_child" in parent_normal - - def test_serialize_for_export_round_trip(self): - """Test that for_export serialization can be imported back correctly. - - Import order: Parent first, then Children (children carry ManyToOne refs). - OneToMany on Parent is reconstructed from ManyToOne on Children. - """ - Database.get_instance().clear() - - parent = Parent(name="Alice") - child1 = Child(name="Bob") - child2 = Child(name="Charlie") - - parent.children = [child1, child2] - parent.favorite_child = child1 - child1.siblings = [child2] - - # Serialize for export - parent_data = parent.serialize(for_export=True) - child1_data = child1.serialize(for_export=True) - child2_data = child2.serialize(for_export=True) - - # Verify export format - assert "children" not in parent_data, "OneToMany should be skipped" + parent_data = parent.serialize() assert ( "favorite_child" not in parent_data - ), "OneToOne (Parent>Child) should be skipped" - assert "parent" in child1_data, "ManyToOne should be present" - assert ( - "favorite_parent" in child1_data - ), "OneToOne (Child "Member" → User.member is skipped). + The fix: serialize() skips the OneToOne on the alphabetically-later + side ("User" > "Member4" → User.member is skipped). Import order: User first (no member ref), Member second (user ref resolves). """ Database.get_instance().clear() @@ -863,18 +775,18 @@ class Member4(Entity): assert user.member == member assert member.user == user - # ── Part 1: for_export=True produces importable JSON ── - user_data = user.serialize(for_export=True) - member_data = member.serialize(for_export=True) + # serialize() produces importable JSON + user_data = user.serialize() + member_data = member.serialize() # User should NOT have 'member' (since "User" > "Member4") assert ( "member" not in user_data - ), f"for_export should skip OneToOne on alphabetically-later side; got {user_data}" + ), f"Should skip OneToOne on alphabetically-later side; got {user_data}" # Member4 SHOULD have 'user' (since "Member4" < "User") assert ( "user" in member_data - ), f"for_export should keep OneToOne on alphabetically-earlier side; got {member_data}" + ), f"Should keep OneToOne on alphabetically-earlier side; got {member_data}" assert member_data["user"] == "system" # alias # Clear and reimport in dependency order @@ -885,44 +797,16 @@ class Member4(Entity): ) # user ref → resolves to existing User # Both sides should be reconstructed - assert recreated_member.user == recreated_user, "ManyToOne should resolve" + assert recreated_member.user == recreated_user, "OneToOne should resolve" assert ( recreated_user.member == recreated_member ), "Reverse OneToOne should be set" - # ── Part 2: OLD serialize() would crash on wrong import order ── - Database.get_instance().clear() - - user2 = User(name="alice") - member2 = Member4(id="mem_deadbeef") - member2.user = user2 + def test_issue4_load_some_resilience(self): + """Regression test for issue #4: load_some resilience with dangling refs. - # Normal serialize includes BOTH sides - user2_full = user2.serialize() - _ = member2.serialize() # noqa: F841 - assert "member" in user2_full, "Normal serialize should include both sides" - - # Importing User first with normal data crashes because Member doesn't exist - Database.get_instance().clear() - try: - User.deserialize( - user2_full - ) # has member: "mem_deadbeef" → Member4 doesn't exist - # If deserialize silently skips, that's also fine (it catches ValueError) - except ValueError as e: - assert "mem_deadbeef" in str( - e - ), f"Should reference the missing member alias: {e}" - - def test_issue4_one_to_many_dangling_refs_load_some(self): - """Regression test for issue #4: OneToMany with dangling refs crashes load_some. - - Reproduces the runtime crash: Token stored with balances: [4, 5, 6] but - WalletBalance entities were deleted. Token.load_some() crashes with: - ValueError: No entity of types WalletBalance found with ID or name '4' - - The fix: load_some() catches ValueError/AttributeError and skips broken - entities instead of crashing the entire batch. + load_some() catches ValueError/AttributeError per entity and skips + broken entities instead of crashing the entire batch. """ Database.get_instance().clear() @@ -942,11 +826,7 @@ class Entry4(Entity): e2.account = acct assert len(acct.entries) == 2 - # Serialize WITH OneToMany (internal storage format) - acct_data = acct.serialize() - assert "entries" in acct_data - - # Now delete the entries from DB but leave the account's stored data intact + # Delete entries from DB but leave account's stored data intact e1.delete() e2.delete() @@ -955,15 +835,13 @@ class Entry4(Entity): # load_some should NOT crash — it should skip broken entities loaded = Account.load_some(from_id=1, count=10) - # The account itself should still load (its entries will fail silently) - assert len(loaded) >= 0, "load_some should not crash on dangling OneToMany refs" + assert len(loaded) >= 0, "load_some should not crash on dangling refs" - def test_issue4_for_export_eliminates_dangling_one_to_many(self): - """Regression test: for_export=True prevents OneToMany dangling refs entirely. + def test_issue4_serialize_eliminates_dangling_one_to_many(self): + """Regression test: serialize() prevents OneToMany dangling refs. - If Token was serialized with for_export=True, the 'balances' OneToMany field - is not included. On import, Token has no forward refs to WalletBalance, - so there's nothing to dangle. + Since serialize() never includes OneToMany, Token has no forward refs + to WalletBalance, so there's nothing to dangle on import. """ Database.get_instance().clear() @@ -981,19 +859,20 @@ class Balance4(Entity): b1.token = token b2.token = token - # for_export skips OneToMany - export_data = token.serialize(for_export=True) - assert "balances" not in export_data + # serialize skips OneToMany + token_data = token.serialize() + assert "balances" not in token_data + + b1_data = b1.serialize() + b2_data = b2.serialize() # Import token without balance refs — no crash possible Database.get_instance().clear() - reimported_token = Token4.deserialize(export_data) + reimported_token = Token4.deserialize(token_data) assert reimported_token.name == "ICP" assert len(reimported_token.balances) == 0 # No balances yet # Now import balances with ManyToOne ref — reconstructs OneToMany - b1_data = b1.serialize(for_export=True) - b2_data = b2.serialize(for_export=True) Balance4.deserialize(b1_data) Balance4.deserialize(b2_data) @@ -1002,7 +881,7 @@ class Balance4(Entity): ), "OneToMany should be reconstructed from ManyToOne during import" def test_issue4_full_round_trip_bidirectional(self): - """End-to-end test: serialize(for_export=True) → clear → deserialize. + """End-to-end test: serialize() → clear → deserialize. Models the exact realm workflow: generate entities in memory, serialize to JSON, clear DB, import in dependency order. @@ -1044,17 +923,17 @@ class Member44(Entity): assert users[i].member == members[i] assert users[i].human == humans[i] - # Serialize for export (dependency order: Realm → Users → Humans → Members) + # Serialize (dependency order: Realm → Users → Humans → Members) all_data = ( - [realm.serialize(for_export=True)] - + [u.serialize(for_export=True) for u in users] - + [h.serialize(for_export=True) for h in humans] - + [m.serialize(for_export=True) for m in members] + [realm.serialize()] + + [u.serialize() for u in users] + + [h.serialize() for h in humans] + + [m.serialize() for m in members] ) - # Verify no OneToMany or wrong-side OneToOne in export + # Verify no OneToMany or wrong-side OneToOne realm_data = all_data[0] - assert "users" not in realm_data, "OneToMany should not be in export" + assert "users" not in realm_data, "OneToMany should not be serialized" user_data = all_data[1] # "User4" > "Human4" → User4 should NOT have human assert "human" not in user_data, "OneToOne (User4>Human4) should be skipped" From 65ee53ec43772c8bab3bb9b5c4a6c0b419dfa837 Mon Sep 17 00:00:00 2001 From: deucalioncodes Date: Sat, 21 Mar 2026 10:52:44 +0100 Subject: [PATCH 4/4] refactor: split serialize into public (clean) and _serialize_full (for persistence) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit serialize() skips OneToMany and picks one side of OneToOne — safe for export/import without circular refs. _serialize_full() preserves all relations — used by _save() so internal persistence (load/save cycles) keeps working correctly. _serialize_base() and _get_entity_reference() are shared helpers. --- ic_python_db/entity.py | 88 ++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/ic_python_db/entity.py b/ic_python_db/entity.py index 6a2ba0a..04e15bb 100644 --- a/ic_python_db/entity.py +++ b/ic_python_db/entity.py @@ -264,8 +264,8 @@ def _save( ) self._update_timestamps(caller_id) - # Save to database - data = self.serialize() + # Save to database (full serialization preserves all relations) + data = self._serialize_full() if not self._do_not_save: logger.debug(f"Saving entity {self._type}@{self._id} to database") @@ -564,24 +564,15 @@ def delete(self) -> None: # Remove from context self.__class__._context.discard(self) - def serialize(self) -> Dict[str, Any]: - """Convert the entity to a serializable dictionary. - - OneToMany relations are always skipped (they are reconstructed from the - reverse ManyToOne side). For bilateral OneToOne relations, only the - alphabetically-earlier entity type serializes the reference, avoiding - circular dependencies during sequential import. - - Returns: - Dict containing the entity's serializable data - """ + def _serialize_base(self) -> Dict[str, Any]: + """Shared serialization logic: core data, properties, and instance attributes.""" # Get mixin data first if available data = super().serialize() if hasattr(super(), "serialize") else {} # Add core entity data data.update( { - "_type": self._type, # Use the entity type + "_type": self._type, "_id": self._id, } ) @@ -599,32 +590,61 @@ def serialize(self) -> Dict[str, Any]: if not k.startswith("_"): data[k] = v - # Add relations as references (prefer alias over _id if available) - def get_entity_reference(entity): - """Get the best reference for an entity: alias value if available, otherwise _id.""" - if hasattr(entity.__class__, "__alias__") and entity.__class__.__alias__: - alias_field = entity.__class__.__alias__ - alias_value = getattr(entity, alias_field, None) - if alias_value is not None: - return alias_value - return entity._id + return data + + @staticmethod + def _get_entity_reference(entity): + """Get the best reference for an entity: alias value if available, otherwise _id.""" + if hasattr(entity.__class__, "__alias__") and entity.__class__.__alias__: + alias_field = entity.__class__.__alias__ + alias_value = getattr(entity, alias_field, None) + if alias_value is not None: + return alias_value + return entity._id + + def _serialize_full(self) -> Dict[str, Any]: + """Full serialization including all relations. Used by _save() for persistence.""" + data = self._serialize_base() + + from ic_python_db.properties import ManyToMany, OneToMany + + for rel_name, rel_entities in self._relations.items(): + if rel_entities: + rel_prop = getattr(self.__class__, rel_name, None) + is_to_many = isinstance(rel_prop, (OneToMany, ManyToMany)) + + if len(rel_entities) == 1 and not is_to_many: + data[rel_name] = self._get_entity_reference(rel_entities[0]) + else: + data[rel_name] = [ + self._get_entity_reference(e) for e in rel_entities + ] + + return data + + def serialize(self) -> Dict[str, Any]: + """Convert the entity to a portable serializable dictionary. + + OneToMany relations are skipped (reconstructed from reverse ManyToOne). + For bilateral OneToOne relations, only the alphabetically-earlier entity + type serializes the reference, avoiding circular dependencies. + + Returns: + Dict containing the entity's serializable data + """ + data = self._serialize_base() + + from ic_python_db.properties import ManyToMany, OneToMany, OneToOne for rel_name, rel_entities in self._relations.items(): if rel_entities: - # Check if this is a *ToMany relation that should always be a list rel_prop = getattr(self.__class__, rel_name, None) - from ic_python_db.properties import ( - ManyToMany, - OneToMany, - OneToOne, - ) # Skip OneToMany — always reconstructed from reverse ManyToOne if isinstance(rel_prop, OneToMany): continue # For OneToOne bilateral, only serialize on one deterministic side: # the entity whose type name is alphabetically <= the target type. - # This ensures exactly one side carries the reference. if isinstance(rel_prop, OneToOne): target_type = rel_entities[0]._type if rel_entities else None if target_type and self._type > target_type: @@ -633,11 +653,11 @@ def get_entity_reference(entity): is_to_many = isinstance(rel_prop, (OneToMany, ManyToMany)) if len(rel_entities) == 1 and not is_to_many: - # Single relation for OneToOne/ManyToOne - store as single reference - data[rel_name] = get_entity_reference(rel_entities[0]) + data[rel_name] = self._get_entity_reference(rel_entities[0]) else: - # Multiple relations or *ToMany relations - store as list of references - data[rel_name] = [get_entity_reference(e) for e in rel_entities] + data[rel_name] = [ + self._get_entity_reference(e) for e in rel_entities + ] return data