Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0df8dbd
feat: add metadata model hierarchy
vagenas Oct 21, 2025
83b1948
add deprecation, add first migration
vagenas Oct 21, 2025
33e2f68
extend annotations migration
vagenas Oct 21, 2025
e7f278c
update with feedback
vagenas Oct 24, 2025
2002d2d
expose main prediction
vagenas Oct 24, 2025
52cae8d
ideas on enforcing separation between standard and custom fields
vagenas Oct 24, 2025
6ce1dba
add custom field setter method
vagenas Oct 27, 2025
9f08d35
update Markdown serialization
vagenas Oct 28, 2025
c3466e3
revert description, add include_non_meta, showcase custom serializer …
vagenas Oct 28, 2025
a8af63f
simplify customization
vagenas Oct 28, 2025
18b9144
fix reference exclusion
vagenas Oct 28, 2025
cd496c5
eliminate serialization dupliation between meta & (legacy) annotations
vagenas Oct 28, 2025
e71e97d
remove old file
vagenas Oct 28, 2025
a1cacfd
fix item used in get_parts for meta ser
vagenas Oct 28, 2025
3287664
serialize GroupItem meta prior to content, DocItem meta after content
vagenas Oct 28, 2025
5fc98e3
restore ser order for all nodeitems
vagenas Oct 28, 2025
627ba61
move meta serialization into DocSerializer.serialize() to maintain se…
vagenas Oct 28, 2025
7e4c29e
add allow- & block-lists for meta names, add std field name enum
vagenas Oct 28, 2025
d8b7cc5
add HTML serializer, document meta field names, rename SMILES field
vagenas Oct 29, 2025
dafb584
bump DoclingDocument version
vagenas Oct 29, 2025
5aa768f
make TabularChartMetaField.title optional, expose new classes through…
vagenas Oct 30, 2025
37982d9
add DocTags serialization, revert smiles to smi to prevent confusion …
vagenas Oct 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions docling_core/transforms/serializer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Any, Optional, Union

from pydantic import AnyUrl, BaseModel
from typing_extensions import deprecated

from docling_core.types.doc.document import (
DocItem,
Expand Down Expand Up @@ -258,6 +259,7 @@ def serialize_captions(
"""Serialize the item's captions."""
...

@deprecated("Use serialize_meta() instead.")
@abstractmethod
def serialize_annotations(
self,
Expand All @@ -267,6 +269,15 @@ def serialize_annotations(
"""Serialize the item's annotations."""
...

@abstractmethod
def serialize_meta(
self,
item: NodeItem,
**kwargs: Any,
) -> SerializationResult:
"""Serialize the item's meta."""
...

@abstractmethod
def get_excluded_refs(self, **kwargs: Any) -> set[str]:
"""Get references to excluded items."""
Expand All @@ -287,6 +298,26 @@ def get_serializer(self, doc: DoclingDocument) -> BaseDocSerializer:
...


class BaseMetaSerializer(ABC):
"""Base class for meta serializers."""

@abstractmethod
def serialize(
self,
*,
item: NodeItem,
doc: DoclingDocument,
**kwargs: Any,
) -> SerializationResult:
"""Serializes the meta of the passed item."""
...

def _humanize_text(self, text: str, title: bool = False) -> str:
tmp = text.replace("__", "_").replace("_", " ")
return tmp.title() if title else tmp.capitalize()


@deprecated("Use BaseMetaSerializer() instead.")
class BaseAnnotationSerializer(ABC):
"""Base class for annotation serializers."""

Expand Down
Loading
Loading