Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c2ba4c3
promoted `get()` with default value from `Mmif` to `MmifObject` (prim…
keighrim Aug 10, 2025
6f95b04
deprecated `get()` from list-like objs, separate key-index, int-index…
keighrim Aug 18, 2025
5d41378
replacing deepdiff with custom serialization logic
keighrim Oct 6, 2025
7fa50e7
more updates to test cases
keighrim Oct 9, 2025
ca56ca7
Merge pull request #328 from clamsproject/311-checking-for-equality-o…
keighrim Oct 29, 2025
2bd897b
updated outdated docstrings
keighrim Oct 29, 2025
e55c7c2
Merge branch 'develop' into docstring-updates
keighrim Oct 29, 2025
b08fd6e
Merge pull request #333 from clamsproject/332-docstring-updates
keighrim Oct 29, 2025
c5ae73a
big updates on docstring based on current `get_by_id` impl
keighrim Oct 30, 2025
dcf7a4f
Merge branch 'develop' into 295-pythonic-getters
keighrim Oct 30, 2025
15dd3df
get rid of intermediate `get_by_key` implementation and deprecating l…
keighrim Nov 18, 2025
d0024a6
Initial plan
Copilot Nov 18, 2025
ad8d4e2
Address PR feedback: simplify returns, use KeyError, add pathlib vali…
Copilot Nov 18, 2025
72c5e82
Merge pull request #336 from clamsproject/copilot/sub-pr-335
keighrim Nov 18, 2025
e32ec86
fixed syntax error in docstrings
keighrim Nov 19, 2025
9c0d3ba
Merge pull request #335 from clamsproject/295-pythonic-getters
keighrim Nov 19, 2025
c4f1fb0
added basic `mmif describe` cli
keighrim Nov 20, 2025
9348b68
updated other CLI modules' argparser for consistency
keighrim Nov 20, 2025
43e2e14
updated handling of "null"-views in `describe` module
keighrim Nov 20, 2025
f7eedfa
fixed type hints, changed time format when profiling is found
keighrim Nov 20, 2025
0415fff
Merge pull request #339 from clamsproject/326-describe-cli
keighrim Nov 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,5 @@ mmif/vocabulary
documentation/_build/

/VERSION
_issues

142 changes: 100 additions & 42 deletions mmif/serialize/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ def _deserialize(self, input_dict: dict) -> None:

def _cache_alignment(self, alignment_ann: 'Annotation', alignedto_ann: 'Annotation') -> None:
"""
Cache alignment information. This cache will not be serialized.
Cache alignment information. This cache will not be serialized.

:param alignment_ann: the Alignment annotation that has this annotation on one side
:param alignedto_ann: the annotation that this annotation is aligned to (other side of Alignment)
:return: None
"""
self._alignments[alignment_ann] = alignedto_ann

Expand Down Expand Up @@ -228,7 +229,7 @@ def add_property(self, name: str,
value: Union[PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]) -> None:
"""
Adds a property to the annotation's properties.

:param name: the name of the property
:param value: the property's desired value
:return: None
Expand Down Expand Up @@ -256,18 +257,41 @@ def __getitem__(self, prop_name: str):

def get(self, prop_name: str, default=None):
"""
A getter for Annotation, will search for a property by its name,
and return the value if found, or the default value if not found.
This is designed to allow for directly accessing properties without
having to go through the properties object, or view-level
annotation metadata (common properties) encoded in the
``view.metadata.contains`` dict. Note that the regular properties
will take the priority over the view-level common properties when
there are name conflicts.

:param prop_name: the name of the property to get
:param default: the value to return if the property is not found
:return: the value of the property
Safe property access with optional default value.

Searches for an annotation property by name and returns its value,
or a default value if not found. This method searches in multiple
locations with the following priority:

1. Direct properties (in ``annotation.properties``)
2. Ephemeral properties (view-level metadata from ``contains``)
3. Special fields (``@type``, ``properties``)

This allows convenient access to properties without explicitly
checking the ``properties`` object or view-level metadata.

:param prop_name: The name of the property to retrieve
:param default: The value to return if the property is not found (default: None)
:return: The property value, or the default value if not found

Examples
--------
.. code-block:: python

# Access annotation properties:
label = annotation.get('label', default='unknown')
start_time = annotation.get('start', default=0)

# Access @type:
at_type = annotation.get('@type')

# Safe access with custom default:
targets = annotation.get('targets', default=[])

See Also
--------
__getitem__ : Direct property access that raises KeyError when not found
get_property : Alias for this method
"""
try:
return self.__getitem__(prop_name)
Expand Down Expand Up @@ -336,29 +360,32 @@ def add_property(self, name: str,
) -> None:
"""
Adds a property to the document's properties.
Unlike the parent :class:`Annotation` class, added properties of a
``Document`` object can be lost during serialization unless it belongs
to somewhere in a ``Mmif`` object. This is because we want to keep
``Document`` object as "read-only" as possible. Thus, if you want to add
a property to a ``Document`` object,
* add the document to a ``Mmif`` object (either in the documents list or

Unlike the parent :class:`Annotation` class, added properties of a
``Document`` object can be lost during serialization unless it belongs
to somewhere in a ``Mmif`` object. This is because we want to keep
``Document`` object as "read-only" as possible. Thus, if you want to add
a property to a ``Document`` object,

* add the document to a ``Mmif`` object (either in the documents list or
in a view from the views list), or
* directly write to ``Document.properties`` instead of using this method
(which is not recommended).
With the former method, the SDK will record the added property as a
`Annotation` annotation object, separate from the original `Document`
(which is not recommended).

With the former method, the SDK will record the added property as a
`Annotation` annotation object, separate from the original `Document`
object. See :meth:`.Mmif.generate_capital_annotations()` for more.

A few notes to keep in mind:
#. You can't overwrite an existing property of a ``Document`` object.
#. A MMIF can have multiple ``Annotation`` objects with the same

#. You can't overwrite an existing property of a ``Document`` object.
#. A MMIF can have multiple ``Annotation`` objects with the same
property name but different values. When this happens, the SDK will
only keep the latest value (in order of appearances in views list) of
only keep the latest value (in order of appearances in views list) of
the property, effectively overwriting the previous values.

:param name: the name of the property
:param value: the property's desired value (note: Document accepts fewer value types than Annotation)
"""
# we don't checking if this k-v already exists in _original (new props) or _ephemeral (read from existing MMIF)
# because it is impossible to keep the _original updated when a new annotation is added (via `new_annotation`)
Expand All @@ -378,13 +405,44 @@ def add_property(self, name: str,

def get(self, prop_name, default=None):
"""
A special getter for Document properties. The major difference from
the super class's :py:meth:`Annotation.get` method is that Document
class has one more set of *"pending"* properties, that are added after
the Document object is created and will be serialized as a separate
:py:class:`Annotation` object of which ``@type = Annotation``. The
pending properties will take the priority over the regular properties
when there are conflicts.
Safe property access with optional default value for Document objects.

Searches for a document property by name and returns its value, or a
default value if not found. Documents have a more complex property
hierarchy than regular annotations:

Priority order (highest to lowest):
1. Special fields ('id', 'location')
2. Pending properties (added after creation, to be serialized as ``Annotation`` objects)
3. Ephemeral properties (from existing ``Annotation`` annotations or view metadata)
4. Original properties (in ``document.properties``)

This allows convenient access to all document properties regardless of
where they're stored internally.

:param prop_name: The name of the property to retrieve
:param default: The value to return if the property is not found (default: None)
:return: The property value, or the default value if not found

Examples
--------
.. code-block:: python

# Access document properties:
mime = document.get('mime', default='application/octet-stream')
location = document.get('location')

# Access properties added after creation (pending):
author = document.get('author', default='anonymous')
publisher = document.get('publisher')

# Access ephemeral properties from Annotation objects:
sentiment = document.get('sentiment', default='neutral')

See Also
--------
add_property : Add a new property to the document
Mmif.generate_capital_annotations : How pending properties are serialized
"""
if prop_name == 'id':
# because all three dicts have `id` key as required field, we need
Expand All @@ -399,7 +457,7 @@ class has one more set of *"pending"* properties, that are added after
elif prop_name in self._props_ephemeral:
return self._props_ephemeral[prop_name]
else:
return super().get(prop_name)
return super().get(prop_name, default)

get_property = get

Expand Down Expand Up @@ -559,8 +617,8 @@ def _deserialize(self, input_dict: dict) -> None:
self.location = input_dict.pop("location")
super()._deserialize(input_dict)

def _serialize(self, alt_container: Optional[Dict] = None) -> dict:
serialized = super()._serialize()
def _serialize(self, *args, **kwargs) -> dict:
serialized = super()._serialize(**kwargs)
if "location_" in serialized:
serialized["location"] = serialized.pop("location_")
return serialized
Expand Down
Loading
Loading