From ec570e3ca61b77a43d5950d6fcab0dbdbb42b72c Mon Sep 17 00:00:00 2001 From: Jose Gomez <1josegomezr@gmail.com> Date: Fri, 30 Aug 2024 23:41:19 +0200 Subject: [PATCH 1/2] Add support to barcode detection with ZBar - The barcode detection routine tries multiple times (seven to be precise) to find a code by applying preprocessing the image with the following filters: 1. Preserve luminance channel 2. Gaussian blur (pre) 3. Parametrizable Binary filter (this is the filter adjusted on every iteration) 4. [Dilatation & Erosion](https://docs.opencv.org/4.x/db/df6/tutorial_erosion_dilatation.html) 5. 2x Resize 6. 1/2 downsize with linear interpolation 7. Gaussian blur (post) And appends the detected Code at the end of the OCR scan for the image. --- Dockerfile | 5 +++ ingestors/support/ocr.py | 92 +++++++++++++++++++++++++++++++++++++++- requirements.txt | 3 ++ 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 85d024feb..342759e2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -146,6 +146,11 @@ RUN python3 -m spacy download el_core_news_sm \ && python3 -m spacy download da_core_news_sm # RUN python3 -m spacy download zh_core_web_sm +RUN apt-get update && apt-get -qq -y install libzbar0 && apt-get -qq -y autoremove \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ + && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 + COPY . /ingestors WORKDIR /ingestors RUN pip3 install --no-cache-dir --config-settings editable_mode=compat --use-pep517 -e /ingestors diff --git a/ingestors/support/ocr.py b/ingestors/support/ocr.py index f40ce7a2c..71f3659bc 100644 --- a/ingestors/support/ocr.py +++ b/ingestors/support/ocr.py @@ -4,6 +4,11 @@ from hashlib import sha1 from normality import stringify from PIL import Image + +from pyzbar import pyzbar +import numpy as np +import cv2 + from io import BytesIO from languagecodes import list_to_alpha3 as alpha3 @@ -45,6 +50,87 @@ def extract_ocr_text(self, data, languages=None): return stringify(text) +class ZBarDetectorService(object): + THRESHOLDS = list(range(32, 230, 32)) + + def _enhance_image(self, image, threshold=127): + width, height = image.size + crop = (0, height - width * 3 / 2, width, height) + # Convert to grayscale using Pillow + gray_image = image.convert("L") + + # Convert Pillow image to OpenCV format + opencv_image = np.array(gray_image) + + # Apply Gaussian blur to reduce noise + blurred_image = cv2.GaussianBlur(opencv_image, (3, 3), 0) + + # Apply thresholding using OpenCV + _, thresh_image = cv2.threshold( + blurred_image, threshold, 255, cv2.THRESH_BINARY + ) + + # Apply morphological transformations to enhance the QR code + kernel = np.ones((3, 3), np.uint8) + dilated_image = cv2.dilate(thresh_image, kernel, iterations=1) + eroded_image = cv2.erode(dilated_image, kernel, iterations=1) + + # Resize the image to make the QR code larger + scale_percent = 200 # Adjust the scale as needed + width = int(eroded_image.shape[1] * scale_percent / 100) + height = int(eroded_image.shape[0] * scale_percent / 100) + dim = (width, height) + resized_image = cv2.resize(eroded_image, dim, interpolation=cv2.INTER_LINEAR) + resized_image = cv2.GaussianBlur(eroded_image, (5, 5), 0) + + return Image.fromarray(resized_image) + + def _serialize_zbar_result(self, result): + return "\n".join( + [ + "", + "--- CODE ---", + "TYPE: {}".format(result.type), + "QUALITY: {}".format(result.quality), + "ORIENTATION: {}".format(result.orientation), + "POSITION: {}".format(list(result.rect)), + "DATA: {}".format(result.data.decode("utf-8")), + ] + ) + + def _results_to_text(self, results): + return "---\n".join([self._serialize_zbar_result(result) for result in results]) + + def _try_best(self, image): + results = pyzbar.decode(image) + # Found it at first try + if len(results) > 0: + log.info("OCR: zbar found (%d) results at first shot", len(results)) + return results + + log.info("OCR: zbar ehnahcing image") + # Try with our enhance logic + for threshold in self.THRESHOLDS: + log.info("OCR: zbar applying threshold %d", threshold) + new_image = self._enhance_image(image, threshold=threshold) + results = pyzbar.decode(new_image) + + if len(results) > 0: + log.info( + "OCR: zbar found (%d) results with threshold=%d", + len(results), + threshold, + ) + return results + + # no results found then + return [] + + def extract_barcodes(self, image): + log.info("OCR: zbar scanning for codes") + return self._results_to_text(self._try_best(image)) + + class LocalOCRService(object): """Perform OCR using an RPC-based service.""" @@ -90,6 +176,7 @@ def extract_text(self, data, languages=None): log.error("Cannot open image data using Pillow: %s", exc) return "" + text = "" with temp_locale(TESSERACT_LOCALE): languages = self.language_list(languages) api = self.configure_engine(languages) @@ -109,13 +196,14 @@ def extract_text(self, data, languages=None): confidence, duration, ) - return text except Exception as exc: log.error("OCR error: %s", exc) - return "" finally: api.Clear() + text += ZBarDetectorService().extract_barcodes(image) + return text + class GoogleOCRService(object): """Use Google's Vision API to perform OCR. This has very good quality diff --git a/requirements.txt b/requirements.txt index ec32d83e6..e36b147f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,9 @@ tesserocr==2.6.2 spacy==3.6.1 fingerprints==1.1.1 fasttext==0.9.2 +pyzbar==0.1.9 +opencv-python==4.10.0.84 +numpy==1.24.4 # Development pytest==8.2.0 From f80e4278baf1ca7e01f963978b95e3c65836d73d Mon Sep 17 00:00:00 2001 From: Jose Gomez <1josegomezr@gmail.com> Date: Sat, 31 Aug 2024 09:48:11 +0200 Subject: [PATCH 2/2] PILlowing OpenCV image filters Rewritten opencv & numpy based image processing filters with Pillow instead. It's a bit slower but it reduces the dependencies to only `libzbar0`. --- Dockerfile | 6 +----- ingestors/support/ocr.py | 44 ++++++++++++++++------------------------ requirements.txt | 2 -- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/Dockerfile b/Dockerfile index 342759e2a..73d5660e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -102,6 +102,7 @@ RUN apt-get -qq -y update \ fonts-droid-fallback fonts-dustin fonts-f500 fonts-fanwood fonts-freefont-ttf \ fonts-liberation fonts-lmodern fonts-lyx fonts-sil-gentium fonts-texgyre \ fonts-tlwg-purisa \ + libzbar0 \ ### && apt-get -qq -y autoremove \ && apt-get clean \ @@ -146,11 +147,6 @@ RUN python3 -m spacy download el_core_news_sm \ && python3 -m spacy download da_core_news_sm # RUN python3 -m spacy download zh_core_web_sm -RUN apt-get update && apt-get -qq -y install libzbar0 && apt-get -qq -y autoremove \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ - && localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 - COPY . /ingestors WORKDIR /ingestors RUN pip3 install --no-cache-dir --config-settings editable_mode=compat --use-pep517 -e /ingestors diff --git a/ingestors/support/ocr.py b/ingestors/support/ocr.py index 71f3659bc..f6f3b3db4 100644 --- a/ingestors/support/ocr.py +++ b/ingestors/support/ocr.py @@ -3,11 +3,9 @@ import threading from hashlib import sha1 from normality import stringify -from PIL import Image +from PIL import Image, ImageFilter from pyzbar import pyzbar -import numpy as np -import cv2 from io import BytesIO from languagecodes import list_to_alpha3 as alpha3 @@ -55,42 +53,35 @@ class ZBarDetectorService(object): def _enhance_image(self, image, threshold=127): width, height = image.size - crop = (0, height - width * 3 / 2, width, height) - # Convert to grayscale using Pillow - gray_image = image.convert("L") - # Convert Pillow image to OpenCV format - opencv_image = np.array(gray_image) + # Convert to grayscale using Pillow + image = image.convert("L") # Apply Gaussian blur to reduce noise - blurred_image = cv2.GaussianBlur(opencv_image, (3, 3), 0) + image = image.filter(ImageFilter.GaussianBlur(3)) - # Apply thresholding using OpenCV - _, thresh_image = cv2.threshold( - blurred_image, threshold, 255, cv2.THRESH_BINARY - ) + # Apply threshold + image = image.point(lambda p: 255 if p > threshold else 0) + + # Dilatate the image + image = image.filter(ImageFilter.MaxFilter(3)) - # Apply morphological transformations to enhance the QR code - kernel = np.ones((3, 3), np.uint8) - dilated_image = cv2.dilate(thresh_image, kernel, iterations=1) - eroded_image = cv2.erode(dilated_image, kernel, iterations=1) + # Erode the image + image = image.filter(ImageFilter.MinFilter(3)) # Resize the image to make the QR code larger - scale_percent = 200 # Adjust the scale as needed - width = int(eroded_image.shape[1] * scale_percent / 100) - height = int(eroded_image.shape[0] * scale_percent / 100) - dim = (width, height) - resized_image = cv2.resize(eroded_image, dim, interpolation=cv2.INTER_LINEAR) - resized_image = cv2.GaussianBlur(eroded_image, (5, 5), 0) + new_size = map(lambda x: x * 2, image.size) + image = image.resize(new_size, resample=Image.Resampling.BILINEAR) - return Image.fromarray(resized_image) + # Last round of gaussian blur + image = image.filter(ImageFilter.GaussianBlur(5)) + return image def _serialize_zbar_result(self, result): return "\n".join( [ "", - "--- CODE ---", - "TYPE: {}".format(result.type), + "--- {} CODE ---".format(result.type), "QUALITY: {}".format(result.quality), "ORIENTATION: {}".format(result.orientation), "POSITION: {}".format(list(result.rect)), @@ -112,6 +103,7 @@ def _try_best(self, image): # Try with our enhance logic for threshold in self.THRESHOLDS: log.info("OCR: zbar applying threshold %d", threshold) + # Headsup: preserve the original image new_image = self._enhance_image(image, threshold=threshold) results = pyzbar.decode(new_image) diff --git a/requirements.txt b/requirements.txt index e36b147f6..456394343 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,8 +13,6 @@ spacy==3.6.1 fingerprints==1.1.1 fasttext==0.9.2 pyzbar==0.1.9 -opencv-python==4.10.0.84 -numpy==1.24.4 # Development pytest==8.2.0