2727 Field ,
2828 FieldSerializationInfo ,
2929 StringConstraints ,
30+ TypeAdapter ,
31+ ValidationError ,
3032 computed_field ,
3133 field_serializer ,
3234 field_validator ,
@@ -941,39 +943,51 @@ class ContentLayer(str, Enum):
941943DEFAULT_CONTENT_LAYERS = {ContentLayer .BODY }
942944
943945
944- class BaseMeta (BaseModel ):
945- """Base class for metadata ."""
946+ class BasePrediction (BaseModel ):
947+ """Prediction field ."""
946948
947- model_config = ConfigDict (extra = "allow" )
949+ confidence : Optional [float ] = None
950+ provenance : Optional [str ] = None
951+ details : Optional [dict [str , Any ]] = None
948952
953+ @field_serializer ("confidence" )
954+ def _serialize (self , value : float , info : FieldSerializationInfo ) -> float :
955+ return round_pydantic_float (value , info .context , PydanticSerCtxKey .CONFID_PREC )
949956
950- class SummaryInstance (BaseModel ):
951- """Single summary data point."""
957+
958+ class SummaryMetaField (BasePrediction ):
959+ """Summary data."""
952960
953961 text : str
954- confidence : Optional [float ] = None
955- provenance : Optional [str ] = None
956962
957963
958- class SummaryModel (BaseModel ):
959- """Summary data."""
964+ class BaseMeta (BaseModel ):
965+ """Base class for metadata."""
966+
967+ model_config = ConfigDict (extra = "allow" )
968+ summary : Optional [SummaryMetaField ] = None
969+
960970
961- # convention: the first instance represents the main summary
962- instances : List [SummaryInstance ] = Field (default_factory = list , min_length = 1 )
963- # NOTE: if needed, can add validator to coerce simpler forms to instances
971+ class PictureClassificationPrediction (BasePrediction ):
972+ """Picture classification instance."""
964973
974+ class_name : str
965975
966- class CommonMeta (BaseMeta ):
967- """Common metadata model."""
968976
969- summary : Optional [SummaryModel ] = None
977+ class PictureClassificationMetaField (BaseModel ):
978+ """Picture classification metadata field."""
979+
980+ predictions : list [PictureClassificationPrediction ] = Field (
981+ default_factory = list , min_length = 1
982+ )
970983
971984
972- class PictureMeta (CommonMeta ):
985+ class PictureMeta (BaseMeta ):
973986 """Picture metadata model."""
974987
988+ classification : Optional [PictureClassificationMetaField ] = None
989+
975990 # TODO the previous classes include "kind" for disambiguation, which is not needed here
976- classification : Optional [PictureClassificationData ] = None
977991 molecule : Optional [PictureMoleculeData ] = None
978992 tabular_chart : Optional [PictureTabularChartData ] = None
979993 line_chart : Optional [PictureLineChartData ] = None
@@ -983,13 +997,6 @@ class PictureMeta(CommonMeta):
983997 scatter_chart : Optional [PictureScatterChartData ] = None
984998
985999
986- class TableMeta (CommonMeta ):
987- """Table metadata model."""
988-
989- # TODO the previous classes include "kind" for disambiguation, which is not needed here
990- description : Optional [DescriptionAnnotation ] = None
991-
992-
9931000class NodeItem (BaseModel ):
9941001 """NodeItem."""
9951002
@@ -1099,7 +1106,7 @@ def _add_sibling(
10991106class GroupItem (NodeItem ): # Container type, can't be a leaf node
11001107 """GroupItem."""
11011108
1102- meta : Optional [CommonMeta ] = None
1109+ meta : Optional [BaseMeta ] = None
11031110
11041111 name : str = (
11051112 "group" # Name of the group, e.g. "Introduction Chapter",
@@ -1151,7 +1158,7 @@ class DocItem(
11511158
11521159 label : DocItemLabel
11531160 prov : List [ProvenanceItem ] = []
1154- meta : Optional [CommonMeta ] = None
1161+ meta : Optional [BaseMeta ] = None
11551162
11561163 def get_location_tokens (
11571164 self ,
@@ -1460,9 +1467,47 @@ class PictureItem(FloatingItem):
14601467 DocItemLabel .PICTURE
14611468 )
14621469
1463- annotations : List [PictureDataType ] = []
1470+ annotations : Annotated [
1471+ List [PictureDataType ],
1472+ Field (deprecated = "The `annotations` field is deprecated; use `meta` instead." ),
1473+ ] = []
14641474 meta : Optional [PictureMeta ] = None
14651475
1476+ @model_validator (mode = "before" )
1477+ @classmethod
1478+ def migrate_annotations_to_meta (cls , data : Any ) -> Any :
1479+ """Migrate the `annotations` field to `meta`."""
1480+ if isinstance (data , dict ) and (annotations := data .get ("annotations" )):
1481+
1482+ for raw_ann in annotations :
1483+ # migrate annotations to meta
1484+ try :
1485+ # Use Pydantic TypeAdapter to validate the annotation type according to the instruction.
1486+
1487+ ann : PictureDataType = TypeAdapter (PictureDataType ).validate_python (
1488+ raw_ann
1489+ )
1490+ if isinstance (ann , PictureClassificationData ):
1491+ # ensure meta field is present
1492+ data .setdefault ("meta" , {})
1493+ data ["meta" ].setdefault (
1494+ "classification" ,
1495+ PictureClassificationMetaField (
1496+ predictions = [
1497+ PictureClassificationPrediction (
1498+ class_name = pred .class_name ,
1499+ confidence = pred .confidence ,
1500+ provenance = ann .provenance ,
1501+ )
1502+ for pred in ann .predicted_classes
1503+ ],
1504+ ).model_dump (),
1505+ )
1506+ except ValidationError as e :
1507+ raise e
1508+
1509+ return data
1510+
14661511 # Convert the image to Base64
14671512 def _image_to_base64 (self , pil_image , format = "PNG" ):
14681513 """Base64 representation of the image."""
@@ -1609,8 +1654,10 @@ class TableItem(FloatingItem):
16091654 DocItemLabel .TABLE ,
16101655 ] = DocItemLabel .TABLE
16111656
1612- annotations : List [TableAnnotationType ] = []
1613- meta : Optional [TableMeta ] = None
1657+ annotations : Annotated [
1658+ List [TableAnnotationType ],
1659+ deprecated ("The `annotations` field is deprecated; use `meta` instead." ),
1660+ ] = []
16141661
16151662 def export_to_dataframe (
16161663 self , doc : Optional ["DoclingDocument" ] = None
0 commit comments