diff --git a/mmif/serialize/annotation.py b/mmif/serialize/annotation.py index 0b2d3ad6..35be2f8c 100644 --- a/mmif/serialize/annotation.py +++ b/mmif/serialize/annotation.py @@ -559,8 +559,8 @@ def _deserialize(self, input_dict: dict) -> None: self.location = input_dict.pop("location") super()._deserialize(input_dict) - def _serialize(self, alt_container: Optional[Dict] = None) -> dict: - serialized = super()._serialize() + def _serialize(self, *args, **kwargs) -> dict: + serialized = super()._serialize(**kwargs) if "location_" in serialized: serialized["location"] = serialized.pop("location_") return serialized diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 2715bae8..f701ad8d 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -179,17 +179,18 @@ def validate(json_str: Union[bytes, str, dict]) -> None: json_str = json.loads(json_str) jsonschema.validators.validate(json_str, schema) - def serialize(self, pretty: bool = False, sanitize: bool = False, autogenerate_capital_annotations=True) -> str: + def serialize(self, sanitize: bool = False, autogenerate_capital_annotations: bool = True, **kwargs) -> str: """ Serializes the MMIF object to a JSON string. - :param sanitize: If True, performs some sanitization of before returning + :param sanitize: If True, performs some sanitization of before returning the JSON string. See :meth:`sanitize` for details. - :param autogenerate_capital_annotations: If True, automatically convert - any "pending" temporary properties from `Document` objects to - `Annotation` objects. See :meth:`generate_capital_annotations` for + :param autogenerate_capital_annotations: If True, automatically convert + any "pending" temporary properties from `Document` objects to + `Annotation` objects. See :meth:`generate_capital_annotations` for details. - :param pretty: If True, returns string representation with indentation. + :param kwargs: Keyword arguments to pass to the parent's ``serialize`` + method (e.g., ``pretty=True``, ``include_context=False``). :return: JSON string of the MMIF object. """ if autogenerate_capital_annotations: @@ -197,7 +198,7 @@ def serialize(self, pretty: bool = False, sanitize: bool = False, autogenerate_c # sanitization should be done after `Annotation` annotations are generated if sanitize: self.sanitize() - return super().serialize(pretty) + return super().serialize(**kwargs) def _deserialize(self, input_dict: dict) -> None: """ diff --git a/mmif/serialize/model.py b/mmif/serialize/model.py index 70ff9e25..f331c58b 100644 --- a/mmif/serialize/model.py +++ b/mmif/serialize/model.py @@ -16,8 +16,6 @@ from datetime import datetime from typing import Union, Any, Dict, Optional, TypeVar, Generic, Generator, Iterator, Type, Set, ClassVar -from deepdiff import DeepDiff - T = TypeVar('T') S = TypeVar('S') PRMTV_TYPES: Type = Union[str, int, float, bool, None] @@ -93,10 +91,12 @@ class MmifObject(object): '_unnamed_attributes', '_attribute_classes', '_required_attributes', - '_exclude_from_diff' + '_exclude_from_diff', + '_contextual_attributes' } _unnamed_attributes: Optional[dict] _exclude_from_diff: Set[str] + _contextual_attributes: Set[str] _attribute_classes: Dict[str, Type] = {} # Mapping: str -> Type def __init__(self, mmif_obj: Optional[Union[bytes, str, dict]] = None, *_) -> None: @@ -106,6 +106,8 @@ def __init__(self, mmif_obj: Optional[Union[bytes, str, dict]] = None, *_) -> No self._required_attributes = [] if not hasattr(self, '_exclude_from_diff'): self._exclude_from_diff = set() + if not hasattr(self, '_contextual_attributes'): + self._contextual_attributes = set() if not hasattr(self, '_unnamed_attributes'): self._unnamed_attributes = {} if mmif_obj is not None: @@ -139,16 +141,21 @@ def _named_attributes(self) -> Generator[str, None, None]: """ return (n for n in self.__dict__.keys() if n not in self.reserved_names) - def serialize(self, pretty: bool = False) -> str: + def serialize(self, pretty: bool = False, include_context: bool = True) -> str: """ Generates JSON representation of an object. :param pretty: If True, returns string representation with indentation. + :param include_context: If ``False``, excludes contextual attributes from + serialization. Contextual attributes hold information + that varies at runtime (e.g., timestamps) and do not + constitute the core information of the MMIF object. + This is useful for comparing two MMIF objects for equality. :return: JSON string of the object. """ - return json.dumps(self._serialize(), indent=2 if pretty else None, cls=MmifObjectEncoder) + return json.dumps(self._serialize(include_context=include_context), indent=2 if pretty else None, cls=MmifObjectEncoder) - def _serialize(self, alt_container: Optional[Dict] = None) -> dict: + def _serialize(self, alt_container: Optional[Dict] = None, include_context: bool = True) -> dict: """ Maps a MMIF object to a plain python dict object, rewriting internal keys that start with '_' to @@ -157,6 +164,8 @@ def _serialize(self, alt_container: Optional[Dict] = None) -> dict: If a subclass needs special treatment during the mapping, it needs to override this method. + :param alt_container: Alternative container to serialize from + :param include_context: See :meth:`serialize` for details. :return: the prepared dictionary """ container = alt_container if alt_container is not None else self._unnamed_attributes @@ -166,20 +175,32 @@ def _serialize(self, alt_container: Optional[Dict] = None) -> dict: if v is None: continue k = str(k) + if not include_context and k in self._contextual_attributes: + continue if k.startswith('_'): # _ as a placeholder ``@`` in json-ld k = f'@{k[1:]}' - serializing_obj[k] = v + # Recursively serialize nested MmifObjects with the same include_context parameter + if isinstance(v, MmifObject): + serializing_obj[k] = v._serialize(include_context=include_context) + else: + serializing_obj[k] = v except AttributeError as e: # means _unnamed_attributes is None, so nothing unnamed would be serialized pass for k, v in self.__dict__.items(): if k in self.reserved_names: continue + if not include_context and k in self._contextual_attributes: + continue if k not in self._required_attributes and self.is_empty(v): continue if k.startswith('_'): # _ as a placeholder ``@`` in json-ld k = f'@{k[1:]}' - serializing_obj[k] = v + # Recursively serialize nested MmifObjects with the same include_context parameter + if isinstance(v, MmifObject): + serializing_obj[k] = v._serialize(include_context=include_context) + else: + serializing_obj[k] = v return serializing_obj @staticmethod @@ -263,14 +284,21 @@ def _deserialize(self, input_dict: dict) -> None: self[k] = v def __str__(self) -> str: - return self.serialize(False) + return self.serialize() def __eq__(self, other) -> bool: + """ + Compares two MmifObject instances for equality by comparing their serialized + representations with contextual attributes excluded. + + This avoids issues with DeepDiff accessing properties that may raise exceptions, + and properly handles comparison by ignoring contextual attributes like timestamps + and stack traces that vary based on runtime environment. + + See https://github.com/clamsproject/mmif-python/issues/311 for details. + """ return isinstance(other, type(self)) and \ - len(DeepDiff(self, other, report_repetition=True, exclude_types=[datetime], - # https://github.com/clamsproject/mmif-python/issues/214 - exclude_paths=self._exclude_from_diff) - ) == 0 + self.serialize(include_context=False) == other.serialize(include_context=False) def __len__(self) -> int: """ @@ -356,7 +384,7 @@ def _serialize(self, *args, **kwargs) -> list: # pytype: disable=signature-mism :return: list of the values of the internal dictionary. """ - return list(super()._serialize(self._items).values()) + return list(super()._serialize(self._items, **kwargs).values()) def deserialize(self, mmif_json: Union[str, list]) -> None: # pytype: disable=signature-mismatch """ @@ -450,7 +478,7 @@ def __init__(self, mmif_obj: Optional[Union[bytes, str, dict]] = None, *_): super().__init__(mmif_obj) def _serialize(self, *args, **kwargs) -> dict: - return super()._serialize(self._items) + return super()._serialize(self._items, **kwargs) def get(self, key: T, default=None) -> Optional[S]: return self._items.get(key, default) diff --git a/mmif/serialize/view.py b/mmif/serialize/view.py index d8821c5b..740a98f9 100644 --- a/mmif/serialize/view.py +++ b/mmif/serialize/view.py @@ -346,6 +346,7 @@ def __init__(self, viewmetadata_obj: Optional[Union[bytes, str, dict]] = None, * self.error: Union[dict, ErrorDict] = {} self.warnings: List[str] = [] self._required_attributes = ["app"] + self._contextual_attributes = {"timestamp"} self._attribute_classes = { 'error': ErrorDict, 'contains': ContainsDict @@ -357,8 +358,8 @@ def __init__(self, viewmetadata_obj: Optional[Union[bytes, str, dict]] = None, * # also see this class' `_serialize()` override implementation super().__init__(viewmetadata_obj) - def _serialize(self, alt_container: Optional[Dict] = None) -> dict: - serialized = super()._serialize() + def _serialize(self, *args, **kwargs) -> dict: + serialized = super()._serialize(**kwargs) # `_serialize()` eliminates any *empty* attributes, so # when no "contains", "errors", nor "warnings", at least add an empty contains back if not (self.contains.items() or self.error or self.warnings): @@ -459,6 +460,7 @@ class ErrorDict(MmifObject): def __init__(self, error_obj: Optional[Union[bytes, str, dict]] = None, *_) -> None: self.message: str = '' self.stackTrace: str = '' + self._contextual_attributes = {"stackTrace"} super().__init__(error_obj) def __str__(self): diff --git a/requirements.txt b/requirements.txt index ccf8bbd8..a97c214e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -deepdiff>5 + orderly-set==5.3.* # 5.4 drops py38 support jsonschema diff --git a/tests/test_serialize.py b/tests/test_serialize.py index bec77d28..1bf6ff0e 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -28,15 +28,45 @@ tester_appname = 'http://not.existing/app' +class TestMmifObject(unittest.TestCase): + + def test_setattr_additional_properties_disallowed(self): + """Test that setting additional properties raises AttributeError when disallowed""" + # Create an object that disallows additional properties + obj = MmifObject() + obj._unnamed_attributes = None # Disallow additional properties + + with self.assertRaises(AttributeError) as cm: + obj.test_prop = "value" + self.assertIn("Additional properties are disallowed", str(cm.exception)) + + def test_setattr_additional_properties_allowed(self): + """Test that setting additional properties works when allowed""" + obj = MmifObject() + obj._unnamed_attributes = {} # Allow additional properties + + obj.test_prop = "value" + self.assertEqual(obj.test_prop, "value") + self.assertEqual(obj._unnamed_attributes["test_prop"], "value") + + def test_serialize_unnamed_attributes_none(self): + """Test serialization when _unnamed_attributes is None""" + obj = MmifObject() + obj._unnamed_attributes = None + + # This should not raise an error, but handle AttributeError gracefully + serialized = obj.serialize() + self.assertIsInstance(serialized, str) + + class TestMmif(unittest.TestCase): def setUp(self) -> None: self.mmif_examples_json = {k: json.loads(v) for k, v in MMIF_EXAMPLES.items()} - @pytest.mark.skip("comparing two `Mmif` objs with an arbitrary file path included won't work until https://github.com/seperman/deepdiff/issues/357 is addressed") def test_init_from_bytes(self): - mmif_from_str = Mmif(EVERYTHING_JSON) - mmif_from_bytes = Mmif(EVERYTHING_JSON.encode('utf8')) + mmif_from_str = Mmif(MMIF_EXAMPLES['everything']) + mmif_from_bytes = Mmif(MMIF_EXAMPLES['everything'].encode('utf8')) self.assertEqual(mmif_from_str, mmif_from_bytes) def test_str_mmif_deserialize(self): @@ -456,10 +486,9 @@ def test_add_view(self): except KeyError: self.fail("raised exception on duplicate ID add when overwrite was set to True") - @pytest.mark.skip("comparing two `Mmif` objs with an arbitrary file path included won't work until https://github.com/seperman/deepdiff/issues/357 is addressed") def test_eq_checking_order(self): - mmif1 = Mmif(EVERYTHING_JSON) - mmif2 = Mmif(EVERYTHING_JSON) + mmif1 = Mmif(MMIF_EXAMPLES['everything']) + mmif2 = Mmif(MMIF_EXAMPLES['everything']) view1 = View() view1.id = 'v99' view2 = View() @@ -470,14 +499,68 @@ def test_eq_checking_order(self): mmif2.add_view(view1) self.assertFalse(mmif1 == mmif2) - mmif3 = Mmif(EVERYTHING_JSON) - mmif4 = Mmif(EVERYTHING_JSON) + mmif3 = Mmif(MMIF_EXAMPLES['everything']) + mmif4 = Mmif(MMIF_EXAMPLES['everything']) mmif3.add_view(view1) mmif3.add_view(view2) mmif4.add_view(view1) mmif4.add_view(view2) self.assertTrue(mmif3 == mmif4) + def test_eq_basic(self): + """Test basic equality comparison (issue #311)""" + minimal_mmif = ''' + { + "metadata": { + "mmif": "http://mmif.clams.ai/1.0.0" + }, + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", + "properties": { + "mime": "video", + "id": "d1", + "location": "file:///test.mp4" + } + } + ], + "views": [] + }''' + m1 = Mmif(minimal_mmif) + m2 = Mmif(minimal_mmif) + self.assertTrue(m1 == m2) + + def test_eq_with_different_documents(self): + """Test inequality when documents differ (issue #311)""" + mmif1_str = ''' + { + "metadata": {"mmif": "http://mmif.clams.ai/1.0.0"}, + "documents": [{"@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", + "properties": {"mime": "video", "id": "d1", "location": "file:///test1.mp4"}}], + "views": [] + }''' + mmif2_str = mmif1_str.replace("d1", "d2") + m1 = Mmif(mmif1_str) + m2 = Mmif(mmif2_str) + self.assertFalse(m1 == m2) + + def test_eq_ignores_contextual_attributes(self): + """Test that contextual attributes (timestamps) are ignored in equality comparison (issue #311)""" + from datetime import datetime, timedelta + mmif_str = ''' + { + "metadata": {"mmif": "http://mmif.clams.ai/1.0.0"}, + "documents": [{"@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", + "properties": {"mime": "video", "id": "d1", "location": "file:///test.mp4"}}], + "views": [{"id": "v1", "metadata": {"app": "http://mmif.clams.ai/apps/test/1.0", "contains": {}}, "annotations": []}] + }''' + m1 = Mmif(mmif_str) + m2 = Mmif(mmif_str) + # Set different timestamps + m1.views.get('v1').metadata.timestamp = datetime.now() + m2.views.get('v1').metadata.timestamp = datetime.now() + timedelta(seconds=10) + self.assertEqual(m1, m2) + def test___getitem__(self): mmif_obj = Mmif(MMIF_EXAMPLES['everything']) self.assertIsInstance(mmif_obj['m1'], Document)