From b4ed9b9f6b04914d3e6417a03048054931062187 Mon Sep 17 00:00:00 2001
From: Torre Baert <torre.baert@dsoft.be>
Date: Mon, 24 Jun 2024 15:32:42 +0200
Subject: [PATCH] Fixed a bug where the word tokens would be extracted
 correctly when multiple objects are detected

---
 src/inference.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/inference.py b/src/inference.py
index 564dd7685..ac35b190b 100644
--- a/src/inference.py
+++ b/src/inference.py
@@ -6,6 +6,7 @@
 import os
 import random
 import io
+import copy
 
 import torch
 from torchvision import transforms
@@ -249,7 +250,7 @@ def outputs_to_objects(outputs, img_size, class_idx2name):
 
     return objects
 
-def objects_to_crops(img, tokens, objects, class_thresholds, padding=10):
+def objects_to_crops(img, tokens_original, objects, class_thresholds, padding=10):
     """
     Process the bounding boxes produced by the table detection model into
     cropped table images and cropped tokens.
@@ -257,6 +258,7 @@ def objects_to_crops(img, tokens, objects, class_thresholds, padding=10):
 
     table_crops = []
     for obj in objects:
+        tokens = copy.deepcopy(tokens_original)
         if obj['score'] < class_thresholds[obj['label']]:
             continue