From b4ed9b9f6b04914d3e6417a03048054931062187 Mon Sep 17 00:00:00 2001 From: Torre Baert Date: Mon, 24 Jun 2024 15:32:42 +0200 Subject: [PATCH] Fixed a bug where the word tokens would be extracted correctly when multiple objects are detected --- src/inference.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/inference.py b/src/inference.py index 564dd7685..ac35b190b 100644 --- a/src/inference.py +++ b/src/inference.py @@ -6,6 +6,7 @@ import os import random import io +import copy import torch from torchvision import transforms @@ -249,7 +250,7 @@ def outputs_to_objects(outputs, img_size, class_idx2name): return objects -def objects_to_crops(img, tokens, objects, class_thresholds, padding=10): +def objects_to_crops(img, tokens_original, objects, class_thresholds, padding=10): """ Process the bounding boxes produced by the table detection model into cropped table images and cropped tokens. @@ -257,6 +258,7 @@ def objects_to_crops(img, tokens, objects, class_thresholds, padding=10): table_crops = [] for obj in objects: + tokens = copy.deepcopy(tokens_original) if obj['score'] < class_thresholds[obj['label']]: continue