From ec570e3ca61b77a43d5950d6fcab0dbdbb42b72c Mon Sep 17 00:00:00 2001
From: Jose Gomez <1josegomezr@gmail.com>
Date: Fri, 30 Aug 2024 23:41:19 +0200
Subject: [PATCH 1/2] Add support to barcode detection with ZBar

- The barcode detection routine tries multiple times (seven to be
  precise) to find a code by applying preprocessing the image with the
  following filters:

    1. Preserve luminance channel
    2. Gaussian blur (pre)
    3. Parametrizable Binary filter (this is the filter adjusted on every
      iteration)
    4. [Dilatation & Erosion](https://docs.opencv.org/4.x/db/df6/tutorial_erosion_dilatation.html)
    5. 2x Resize
    6. 1/2 downsize with linear interpolation
    7. Gaussian blur (post)

  And appends the detected Code at the end of the OCR scan for the
  image.
---
 Dockerfile               |  5 +++
 ingestors/support/ocr.py | 92 +++++++++++++++++++++++++++++++++++++++-
 requirements.txt         |  3 ++
 3 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 85d024feb..342759e2a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -146,6 +146,11 @@ RUN python3 -m spacy download el_core_news_sm \
     && python3 -m spacy download da_core_news_sm
 # RUN python3 -m spacy download zh_core_web_sm
 
+RUN apt-get update && apt-get -qq -y install libzbar0 && apt-get -qq -y autoremove \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
+    && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
+
 COPY . /ingestors
 WORKDIR /ingestors
 RUN pip3 install --no-cache-dir --config-settings editable_mode=compat --use-pep517 -e /ingestors
diff --git a/ingestors/support/ocr.py b/ingestors/support/ocr.py
index f40ce7a2c..71f3659bc 100644
--- a/ingestors/support/ocr.py
+++ b/ingestors/support/ocr.py
@@ -4,6 +4,11 @@
 from hashlib import sha1
 from normality import stringify
 from PIL import Image
+
+from pyzbar import pyzbar
+import numpy as np
+import cv2
+
 from io import BytesIO
 from languagecodes import list_to_alpha3 as alpha3
 
@@ -45,6 +50,87 @@ def extract_ocr_text(self, data, languages=None):
         return stringify(text)
 
 
+class ZBarDetectorService(object):
+    THRESHOLDS = list(range(32, 230, 32))
+
+    def _enhance_image(self, image, threshold=127):
+        width, height = image.size
+        crop = (0, height - width * 3 / 2, width, height)
+        # Convert to grayscale using Pillow
+        gray_image = image.convert("L")
+
+        # Convert Pillow image to OpenCV format
+        opencv_image = np.array(gray_image)
+
+        # Apply Gaussian blur to reduce noise
+        blurred_image = cv2.GaussianBlur(opencv_image, (3, 3), 0)
+
+        # Apply thresholding using OpenCV
+        _, thresh_image = cv2.threshold(
+            blurred_image, threshold, 255, cv2.THRESH_BINARY
+        )
+
+        # Apply morphological transformations to enhance the QR code
+        kernel = np.ones((3, 3), np.uint8)
+        dilated_image = cv2.dilate(thresh_image, kernel, iterations=1)
+        eroded_image = cv2.erode(dilated_image, kernel, iterations=1)
+
+        # Resize the image to make the QR code larger
+        scale_percent = 200  # Adjust the scale as needed
+        width = int(eroded_image.shape[1] * scale_percent / 100)
+        height = int(eroded_image.shape[0] * scale_percent / 100)
+        dim = (width, height)
+        resized_image = cv2.resize(eroded_image, dim, interpolation=cv2.INTER_LINEAR)
+        resized_image = cv2.GaussianBlur(eroded_image, (5, 5), 0)
+
+        return Image.fromarray(resized_image)
+
+    def _serialize_zbar_result(self, result):
+        return "\n".join(
+            [
+                "",
+                "--- CODE ---",
+                "TYPE: {}".format(result.type),
+                "QUALITY: {}".format(result.quality),
+                "ORIENTATION: {}".format(result.orientation),
+                "POSITION: {}".format(list(result.rect)),
+                "DATA: {}".format(result.data.decode("utf-8")),
+            ]
+        )
+
+    def _results_to_text(self, results):
+        return "---\n".join([self._serialize_zbar_result(result) for result in results])
+
+    def _try_best(self, image):
+        results = pyzbar.decode(image)
+        # Found it at first try
+        if len(results) > 0:
+            log.info("OCR: zbar found (%d) results at first shot", len(results))
+            return results
+
+        log.info("OCR: zbar ehnahcing image")
+        # Try with our enhance logic
+        for threshold in self.THRESHOLDS:
+            log.info("OCR: zbar applying threshold %d", threshold)
+            new_image = self._enhance_image(image, threshold=threshold)
+            results = pyzbar.decode(new_image)
+
+            if len(results) > 0:
+                log.info(
+                    "OCR: zbar found (%d) results with threshold=%d",
+                    len(results),
+                    threshold,
+                )
+                return results
+
+        # no results found then
+        return []
+
+    def extract_barcodes(self, image):
+        log.info("OCR: zbar scanning for codes")
+        return self._results_to_text(self._try_best(image))
+
+
 class LocalOCRService(object):
     """Perform OCR using an RPC-based service."""
 
@@ -90,6 +176,7 @@ def extract_text(self, data, languages=None):
             log.error("Cannot open image data using Pillow: %s", exc)
             return ""
 
+        text = ""
         with temp_locale(TESSERACT_LOCALE):
             languages = self.language_list(languages)
             api = self.configure_engine(languages)
@@ -109,13 +196,14 @@ def extract_text(self, data, languages=None):
                     confidence,
                     duration,
                 )
-                return text
             except Exception as exc:
                 log.error("OCR error: %s", exc)
-                return ""
             finally:
                 api.Clear()
 
+        text += ZBarDetectorService().extract_barcodes(image)
+        return text
+
 
 class GoogleOCRService(object):
     """Use Google's Vision API to perform OCR. This has very good quality
diff --git a/requirements.txt b/requirements.txt
index ec32d83e6..e36b147f6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,6 +12,9 @@ tesserocr==2.6.2
 spacy==3.6.1
 fingerprints==1.1.1
 fasttext==0.9.2
+pyzbar==0.1.9
+opencv-python==4.10.0.84
+numpy==1.24.4
 
 # Development
 pytest==8.2.0

From f80e4278baf1ca7e01f963978b95e3c65836d73d Mon Sep 17 00:00:00 2001
From: Jose Gomez <1josegomezr@gmail.com>
Date: Sat, 31 Aug 2024 09:48:11 +0200
Subject: [PATCH 2/2] PILlowing OpenCV image filters

Rewritten opencv & numpy based image processing filters with Pillow
instead. It's a bit slower but it reduces the dependencies to only
`libzbar0`.
---
 Dockerfile               |  6 +-----
 ingestors/support/ocr.py | 44 ++++++++++++++++------------------------
 requirements.txt         |  2 --
 3 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 342759e2a..73d5660e2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -102,6 +102,7 @@ RUN apt-get -qq -y update \
     fonts-droid-fallback fonts-dustin fonts-f500 fonts-fanwood fonts-freefont-ttf \
     fonts-liberation fonts-lmodern fonts-lyx fonts-sil-gentium fonts-texgyre \
     fonts-tlwg-purisa \
+    libzbar0 \
     ###
     && apt-get -qq -y autoremove \
     && apt-get clean \
@@ -146,11 +147,6 @@ RUN python3 -m spacy download el_core_news_sm \
     && python3 -m spacy download da_core_news_sm
 # RUN python3 -m spacy download zh_core_web_sm
 
-RUN apt-get update && apt-get -qq -y install libzbar0 && apt-get -qq -y autoremove \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
-    && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
-
 COPY . /ingestors
 WORKDIR /ingestors
 RUN pip3 install --no-cache-dir --config-settings editable_mode=compat --use-pep517 -e /ingestors
diff --git a/ingestors/support/ocr.py b/ingestors/support/ocr.py
index 71f3659bc..f6f3b3db4 100644
--- a/ingestors/support/ocr.py
+++ b/ingestors/support/ocr.py
@@ -3,11 +3,9 @@
 import threading
 from hashlib import sha1
 from normality import stringify
-from PIL import Image
+from PIL import Image, ImageFilter
 
 from pyzbar import pyzbar
-import numpy as np
-import cv2
 
 from io import BytesIO
 from languagecodes import list_to_alpha3 as alpha3
@@ -55,42 +53,35 @@ class ZBarDetectorService(object):
 
     def _enhance_image(self, image, threshold=127):
         width, height = image.size
-        crop = (0, height - width * 3 / 2, width, height)
-        # Convert to grayscale using Pillow
-        gray_image = image.convert("L")
 
-        # Convert Pillow image to OpenCV format
-        opencv_image = np.array(gray_image)
+        # Convert to grayscale using Pillow
+        image = image.convert("L")
 
         # Apply Gaussian blur to reduce noise
-        blurred_image = cv2.GaussianBlur(opencv_image, (3, 3), 0)
+        image = image.filter(ImageFilter.GaussianBlur(3))
 
-        # Apply thresholding using OpenCV
-        _, thresh_image = cv2.threshold(
-            blurred_image, threshold, 255, cv2.THRESH_BINARY
-        )
+        # Apply threshold
+        image = image.point(lambda p: 255 if p > threshold else 0)
+
+        # Dilatate the image
+        image = image.filter(ImageFilter.MaxFilter(3))
 
-        # Apply morphological transformations to enhance the QR code
-        kernel = np.ones((3, 3), np.uint8)
-        dilated_image = cv2.dilate(thresh_image, kernel, iterations=1)
-        eroded_image = cv2.erode(dilated_image, kernel, iterations=1)
+        # Erode the image
+        image = image.filter(ImageFilter.MinFilter(3))
 
         # Resize the image to make the QR code larger
-        scale_percent = 200  # Adjust the scale as needed
-        width = int(eroded_image.shape[1] * scale_percent / 100)
-        height = int(eroded_image.shape[0] * scale_percent / 100)
-        dim = (width, height)
-        resized_image = cv2.resize(eroded_image, dim, interpolation=cv2.INTER_LINEAR)
-        resized_image = cv2.GaussianBlur(eroded_image, (5, 5), 0)
+        new_size = map(lambda x: x * 2, image.size)
+        image = image.resize(new_size, resample=Image.Resampling.BILINEAR)
 
-        return Image.fromarray(resized_image)
+        # Last round of gaussian blur
+        image = image.filter(ImageFilter.GaussianBlur(5))
+        return image
 
     def _serialize_zbar_result(self, result):
         return "\n".join(
             [
                 "",
-                "--- CODE ---",
-                "TYPE: {}".format(result.type),
+                "--- {} CODE ---".format(result.type),
                 "QUALITY: {}".format(result.quality),
                 "ORIENTATION: {}".format(result.orientation),
                 "POSITION: {}".format(list(result.rect)),
@@ -112,6 +103,7 @@ def _try_best(self, image):
         # Try with our enhance logic
         for threshold in self.THRESHOLDS:
             log.info("OCR: zbar applying threshold %d", threshold)
+            # Headsup: preserve the original image
             new_image = self._enhance_image(image, threshold=threshold)
             results = pyzbar.decode(new_image)
 
diff --git a/requirements.txt b/requirements.txt
index e36b147f6..456394343 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,8 +13,6 @@ spacy==3.6.1
 fingerprints==1.1.1
 fasttext==0.9.2
 pyzbar==0.1.9
-opencv-python==4.10.0.84
-numpy==1.24.4
 
 # Development
 pytest==8.2.0