From fbf4e6826c8398dc9e888ccde869c03fb20a400f Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Thu, 21 Aug 2025 12:01:02 +0200 Subject: [PATCH] feat: Add more fine-grained control in the DoclingEvalCOCOExporter to either export the gt_doc or the pre_doc from the parquet files. Signed-off-by: Nikos Livathinos --- docling_eval/utils/coco_exporter.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docling_eval/utils/coco_exporter.py b/docling_eval/utils/coco_exporter.py index 53b16373..8bdd093f 100644 --- a/docling_eval/utils/coco_exporter.py +++ b/docling_eval/utils/coco_exporter.py @@ -123,7 +123,7 @@ def export_COCO( extra_doc_label_to_valid_label_mapping: dict[ DocItemLabel, Optional[DocItemLabel] ], - source_doc_column: str = "GT", + use_pred_doc: bool = False, # If True the gt_doc is used, otherwise the pred_doc ): r""" Parameters @@ -171,7 +171,7 @@ def export_COCO( data_record = DatasetRecordWithPrediction.model_validate(data) doc_id = data_record.doc_id - if data_record.predicted_doc is not None and source_doc_column == "pred": + if data_record.predicted_doc is not None and use_pred_doc: doc = data_record.predicted_doc _log.info("Dataset document to export: 'predicted_doc'") else: @@ -538,7 +538,7 @@ def main(): "--operation", required=True, type=str, - help="Operation to perform. One of ['coco']", + help="Operation to perform. One of ['coco_gt_doc', 'coco_pred_doc', 'predictions']", ) parser.add_argument( "-s", @@ -576,7 +576,8 @@ def main(): exporter = DoclingEvalCOCOExporter(args.docling_eval_dir) # Run the operation - if args.operation.upper() == "COCO": + op = args.operation.lower() + if op in ["coco_gt_doc", "coco_pred_doc"]: # Mapping from the parquet document label to the valid docling labels doc_label_to_valid_label_mapping: dict[DocItemLabel, DocItemLabel] = { DocItemLabel.PAGE_FOOTER: DocItemLabel.TEXT, @@ -591,8 +592,9 @@ def main(): "test", args.save_dir, doc_label_to_valid_label_mapping, + use_pred_doc="coco_pred_doc" == op, ) - elif args.operation.upper() == "predictions": + elif op == "predictions": exporter.export_predictions_wrt_original_COCO( "test", args.save_dir,