Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mmif/serialize/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,8 @@ def _deserialize(self, input_dict: dict) -> None:
self.location = input_dict.pop("location")
super()._deserialize(input_dict)

def _serialize(self, alt_container: Optional[Dict] = None) -> dict:
serialized = super()._serialize()
def _serialize(self, *args, **kwargs) -> dict:
serialized = super()._serialize(**kwargs)
if "location_" in serialized:
serialized["location"] = serialized.pop("location_")
return serialized
Expand Down
15 changes: 8 additions & 7 deletions mmif/serialize/mmif.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,25 +179,26 @@ def validate(json_str: Union[bytes, str, dict]) -> None:
json_str = json.loads(json_str)
jsonschema.validators.validate(json_str, schema)

def serialize(self, pretty: bool = False, sanitize: bool = False, autogenerate_capital_annotations=True) -> str:
def serialize(self, sanitize: bool = False, autogenerate_capital_annotations: bool = True, **kwargs) -> str:
"""
Serializes the MMIF object to a JSON string.

:param sanitize: If True, performs some sanitization of before returning
:param sanitize: If True, performs some sanitization of before returning
the JSON string. See :meth:`sanitize` for details.
:param autogenerate_capital_annotations: If True, automatically convert
any "pending" temporary properties from `Document` objects to
`Annotation` objects. See :meth:`generate_capital_annotations` for
:param autogenerate_capital_annotations: If True, automatically convert
any "pending" temporary properties from `Document` objects to
`Annotation` objects. See :meth:`generate_capital_annotations` for
details.
:param pretty: If True, returns string representation with indentation.
:param kwargs: Keyword arguments to pass to the parent's ``serialize``
method (e.g., ``pretty=True``, ``include_context=False``).
:return: JSON string of the MMIF object.
"""
if autogenerate_capital_annotations:
self.generate_capital_annotations()
# sanitization should be done after `Annotation` annotations are generated
if sanitize:
self.sanitize()
return super().serialize(pretty)
return super().serialize(**kwargs)

def _deserialize(self, input_dict: dict) -> None:
"""
Expand Down
58 changes: 43 additions & 15 deletions mmif/serialize/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
from datetime import datetime
from typing import Union, Any, Dict, Optional, TypeVar, Generic, Generator, Iterator, Type, Set, ClassVar

from deepdiff import DeepDiff

T = TypeVar('T')
S = TypeVar('S')
PRMTV_TYPES: Type = Union[str, int, float, bool, None]
Expand Down Expand Up @@ -93,10 +91,12 @@ class MmifObject(object):
'_unnamed_attributes',
'_attribute_classes',
'_required_attributes',
'_exclude_from_diff'
'_exclude_from_diff',
'_contextual_attributes'
}
_unnamed_attributes: Optional[dict]
_exclude_from_diff: Set[str]
_contextual_attributes: Set[str]
_attribute_classes: Dict[str, Type] = {} # Mapping: str -> Type

def __init__(self, mmif_obj: Optional[Union[bytes, str, dict]] = None, *_) -> None:
Expand All @@ -106,6 +106,8 @@ def __init__(self, mmif_obj: Optional[Union[bytes, str, dict]] = None, *_) -> No
self._required_attributes = []
if not hasattr(self, '_exclude_from_diff'):
self._exclude_from_diff = set()
if not hasattr(self, '_contextual_attributes'):
self._contextual_attributes = set()
if not hasattr(self, '_unnamed_attributes'):
self._unnamed_attributes = {}
if mmif_obj is not None:
Expand Down Expand Up @@ -139,16 +141,21 @@ def _named_attributes(self) -> Generator[str, None, None]:
"""
return (n for n in self.__dict__.keys() if n not in self.reserved_names)

def serialize(self, pretty: bool = False) -> str:
def serialize(self, pretty: bool = False, include_context: bool = True) -> str:
"""
Generates JSON representation of an object.

:param pretty: If True, returns string representation with indentation.
:param include_context: If ``False``, excludes contextual attributes from
serialization. Contextual attributes hold information
that varies at runtime (e.g., timestamps) and do not
constitute the core information of the MMIF object.
This is useful for comparing two MMIF objects for equality.
:return: JSON string of the object.
"""
return json.dumps(self._serialize(), indent=2 if pretty else None, cls=MmifObjectEncoder)
return json.dumps(self._serialize(include_context=include_context), indent=2 if pretty else None, cls=MmifObjectEncoder)

def _serialize(self, alt_container: Optional[Dict] = None) -> dict:
def _serialize(self, alt_container: Optional[Dict] = None, include_context: bool = True) -> dict:
"""
Maps a MMIF object to a plain python dict object,
rewriting internal keys that start with '_' to
Expand All @@ -157,6 +164,8 @@ def _serialize(self, alt_container: Optional[Dict] = None) -> dict:
If a subclass needs special treatment during the mapping, it needs to
override this method.

:param alt_container: Alternative container to serialize from
:param include_context: See :meth:`serialize` for details.
:return: the prepared dictionary
"""
container = alt_container if alt_container is not None else self._unnamed_attributes
Expand All @@ -166,20 +175,32 @@ def _serialize(self, alt_container: Optional[Dict] = None) -> dict:
if v is None:
continue
k = str(k)
if not include_context and k in self._contextual_attributes:
continue
if k.startswith('_'): # _ as a placeholder ``@`` in json-ld
k = f'@{k[1:]}'
serializing_obj[k] = v
# Recursively serialize nested MmifObjects with the same include_context parameter
if isinstance(v, MmifObject):
serializing_obj[k] = v._serialize(include_context=include_context)
else:
serializing_obj[k] = v
except AttributeError as e:
# means _unnamed_attributes is None, so nothing unnamed would be serialized
pass
for k, v in self.__dict__.items():
if k in self.reserved_names:
continue
if not include_context and k in self._contextual_attributes:
continue
if k not in self._required_attributes and self.is_empty(v):
continue
if k.startswith('_'): # _ as a placeholder ``@`` in json-ld
k = f'@{k[1:]}'
serializing_obj[k] = v
# Recursively serialize nested MmifObjects with the same include_context parameter
if isinstance(v, MmifObject):
serializing_obj[k] = v._serialize(include_context=include_context)
else:
serializing_obj[k] = v
return serializing_obj

@staticmethod
Expand Down Expand Up @@ -263,14 +284,21 @@ def _deserialize(self, input_dict: dict) -> None:
self[k] = v

def __str__(self) -> str:
return self.serialize(False)
return self.serialize()

def __eq__(self, other) -> bool:
"""
Compares two MmifObject instances for equality by comparing their serialized
representations with contextual attributes excluded.

This avoids issues with DeepDiff accessing properties that may raise exceptions,
and properly handles comparison by ignoring contextual attributes like timestamps
and stack traces that vary based on runtime environment.

See https://github.com/clamsproject/mmif-python/issues/311 for details.
"""
return isinstance(other, type(self)) and \
len(DeepDiff(self, other, report_repetition=True, exclude_types=[datetime],
# https://github.com/clamsproject/mmif-python/issues/214
exclude_paths=self._exclude_from_diff)
) == 0
self.serialize(include_context=False) == other.serialize(include_context=False)

def __len__(self) -> int:
"""
Expand Down Expand Up @@ -356,7 +384,7 @@ def _serialize(self, *args, **kwargs) -> list: # pytype: disable=signature-mism

:return: list of the values of the internal dictionary.
"""
return list(super()._serialize(self._items).values())
return list(super()._serialize(self._items, **kwargs).values())

def deserialize(self, mmif_json: Union[str, list]) -> None: # pytype: disable=signature-mismatch
"""
Expand Down Expand Up @@ -450,7 +478,7 @@ def __init__(self, mmif_obj: Optional[Union[bytes, str, dict]] = None, *_):
super().__init__(mmif_obj)

def _serialize(self, *args, **kwargs) -> dict:
return super()._serialize(self._items)
return super()._serialize(self._items, **kwargs)

def get(self, key: T, default=None) -> Optional[S]:
return self._items.get(key, default)
Expand Down
6 changes: 4 additions & 2 deletions mmif/serialize/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ def __init__(self, viewmetadata_obj: Optional[Union[bytes, str, dict]] = None, *
self.error: Union[dict, ErrorDict] = {}
self.warnings: List[str] = []
self._required_attributes = ["app"]
self._contextual_attributes = {"timestamp"}
self._attribute_classes = {
'error': ErrorDict,
'contains': ContainsDict
Expand All @@ -357,8 +358,8 @@ def __init__(self, viewmetadata_obj: Optional[Union[bytes, str, dict]] = None, *
# also see this class' `_serialize()` override implementation
super().__init__(viewmetadata_obj)

def _serialize(self, alt_container: Optional[Dict] = None) -> dict:
serialized = super()._serialize()
def _serialize(self, *args, **kwargs) -> dict:
serialized = super()._serialize(**kwargs)
# `_serialize()` eliminates any *empty* attributes, so
# when no "contains", "errors", nor "warnings", at least add an empty contains back
if not (self.contains.items() or self.error or self.warnings):
Expand Down Expand Up @@ -459,6 +460,7 @@ class ErrorDict(MmifObject):
def __init__(self, error_obj: Optional[Union[bytes, str, dict]] = None, *_) -> None:
self.message: str = ''
self.stackTrace: str = ''
self._contextual_attributes = {"stackTrace"}
super().__init__(error_obj)

def __str__(self):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
deepdiff>5

orderly-set==5.3.* # 5.4 drops py38 support
jsonschema
99 changes: 91 additions & 8 deletions tests/test_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,45 @@
tester_appname = 'http://not.existing/app'


class TestMmifObject(unittest.TestCase):

def test_setattr_additional_properties_disallowed(self):
"""Test that setting additional properties raises AttributeError when disallowed"""
# Create an object that disallows additional properties
obj = MmifObject()
obj._unnamed_attributes = None # Disallow additional properties

with self.assertRaises(AttributeError) as cm:
obj.test_prop = "value"
self.assertIn("Additional properties are disallowed", str(cm.exception))

def test_setattr_additional_properties_allowed(self):
"""Test that setting additional properties works when allowed"""
obj = MmifObject()
obj._unnamed_attributes = {} # Allow additional properties

obj.test_prop = "value"
self.assertEqual(obj.test_prop, "value")
self.assertEqual(obj._unnamed_attributes["test_prop"], "value")

def test_serialize_unnamed_attributes_none(self):
"""Test serialization when _unnamed_attributes is None"""
obj = MmifObject()
obj._unnamed_attributes = None

# This should not raise an error, but handle AttributeError gracefully
serialized = obj.serialize()
self.assertIsInstance(serialized, str)


class TestMmif(unittest.TestCase):

def setUp(self) -> None:
self.mmif_examples_json = {k: json.loads(v) for k, v in MMIF_EXAMPLES.items()}

@pytest.mark.skip("comparing two `Mmif` objs with an arbitrary file path included won't work until https://github.com/seperman/deepdiff/issues/357 is addressed")
def test_init_from_bytes(self):
mmif_from_str = Mmif(EVERYTHING_JSON)
mmif_from_bytes = Mmif(EVERYTHING_JSON.encode('utf8'))
mmif_from_str = Mmif(MMIF_EXAMPLES['everything'])
mmif_from_bytes = Mmif(MMIF_EXAMPLES['everything'].encode('utf8'))
self.assertEqual(mmif_from_str, mmif_from_bytes)

def test_str_mmif_deserialize(self):
Expand Down Expand Up @@ -456,10 +486,9 @@ def test_add_view(self):
except KeyError:
self.fail("raised exception on duplicate ID add when overwrite was set to True")

@pytest.mark.skip("comparing two `Mmif` objs with an arbitrary file path included won't work until https://github.com/seperman/deepdiff/issues/357 is addressed")
def test_eq_checking_order(self):
mmif1 = Mmif(EVERYTHING_JSON)
mmif2 = Mmif(EVERYTHING_JSON)
mmif1 = Mmif(MMIF_EXAMPLES['everything'])
mmif2 = Mmif(MMIF_EXAMPLES['everything'])
view1 = View()
view1.id = 'v99'
view2 = View()
Expand All @@ -470,14 +499,68 @@ def test_eq_checking_order(self):
mmif2.add_view(view1)
self.assertFalse(mmif1 == mmif2)

mmif3 = Mmif(EVERYTHING_JSON)
mmif4 = Mmif(EVERYTHING_JSON)
mmif3 = Mmif(MMIF_EXAMPLES['everything'])
mmif4 = Mmif(MMIF_EXAMPLES['everything'])
mmif3.add_view(view1)
mmif3.add_view(view2)
mmif4.add_view(view1)
mmif4.add_view(view2)
self.assertTrue(mmif3 == mmif4)

def test_eq_basic(self):
"""Test basic equality comparison (issue #311)"""
minimal_mmif = '''
{
"metadata": {
"mmif": "http://mmif.clams.ai/1.0.0"
},
"documents": [
{
"@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1",
"properties": {
"mime": "video",
"id": "d1",
"location": "file:///test.mp4"
}
}
],
"views": []
}'''
m1 = Mmif(minimal_mmif)
m2 = Mmif(minimal_mmif)
self.assertTrue(m1 == m2)

def test_eq_with_different_documents(self):
"""Test inequality when documents differ (issue #311)"""
mmif1_str = '''
{
"metadata": {"mmif": "http://mmif.clams.ai/1.0.0"},
"documents": [{"@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1",
"properties": {"mime": "video", "id": "d1", "location": "file:///test1.mp4"}}],
"views": []
}'''
mmif2_str = mmif1_str.replace("d1", "d2")
m1 = Mmif(mmif1_str)
m2 = Mmif(mmif2_str)
self.assertFalse(m1 == m2)

def test_eq_ignores_contextual_attributes(self):
"""Test that contextual attributes (timestamps) are ignored in equality comparison (issue #311)"""
from datetime import datetime, timedelta
mmif_str = '''
{
"metadata": {"mmif": "http://mmif.clams.ai/1.0.0"},
"documents": [{"@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1",
"properties": {"mime": "video", "id": "d1", "location": "file:///test.mp4"}}],
"views": [{"id": "v1", "metadata": {"app": "http://mmif.clams.ai/apps/test/1.0", "contains": {}}, "annotations": []}]
}'''
m1 = Mmif(mmif_str)
m2 = Mmif(mmif_str)
# Set different timestamps
m1.views.get('v1').metadata.timestamp = datetime.now()
m2.views.get('v1').metadata.timestamp = datetime.now() + timedelta(seconds=10)
self.assertEqual(m1, m2)

def test___getitem__(self):
mmif_obj = Mmif(MMIF_EXAMPLES['everything'])
self.assertIsInstance(mmif_obj['m1'], Document)
Expand Down
Loading