From 0832027e79759f8f5e577206ea2d22c7f34592f7 Mon Sep 17 00:00:00 2001 From: ryanznie Date: Sun, 4 Jan 2026 18:42:28 -0500 Subject: [PATCH 1/2] updated to use ONNX model as fallback --- benchmarks/README.md | 7 +++++++ benchmarks/models/hybrid_model.py | 30 +++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 624df19..7d7e3d1 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -23,6 +23,13 @@ python benchmarks/benchmark.py \ --device mps \ --split train +# Benchmark hybrid model with ONNX fallback +python benchmarks/benchmark.py \ + --model hybrid \ + --data-dir data/test \ + --model-path models/artifacts/layoutlmv3_invoice_ner.onnx \ + --run-name "layoutlmv3-lora-heuristics-ONNX" + # Benchmark Finetuned LayoutLMv3 only python benchmarks/benchmark.py \ --model layoutlmv3 \ diff --git a/benchmarks/models/hybrid_model.py b/benchmarks/models/hybrid_model.py index dc8bb37..6789489 100644 --- a/benchmarks/models/hybrid_model.py +++ b/benchmarks/models/hybrid_model.py @@ -61,10 +61,21 @@ def load(self) -> None: Note: Heuristics don't require loading, they're pure pattern matching. """ if self.fallback_model is None: - # Default to LayoutLMv3 - from benchmarks.models.layoutlmv3_model import LayoutLMv3Model + # Check for ONNX model path + model_path = "" + if self.model_config: + model_path = self.model_config.get("model_path", "") - self.fallback_model = LayoutLMv3Model(self.model_config) + if str(model_path).endswith(".onnx"): + logger.info("Detected ONNX model path, using OnnxModel fallback") + from benchmarks.models.onnx_model import OnnxModel + + self.fallback_model = OnnxModel(self.model_config) + else: + # Default to LayoutLMv3 + from benchmarks.models.layoutlmv3_model import LayoutLMv3Model + + self.fallback_model = LayoutLMv3Model(self.model_config) logger.info("Loading hybrid model...") logger.info("✓ Heuristics ready (no loading required)") @@ -142,6 +153,16 @@ def predict( result.metadata["fallback_used"] = True result.metadata["extraction_stage"] = "model_fallback" + + # Get model name from config + fb_config = self.fallback_model.get_config() + model_path = ( + fb_config.get("model_path") + or fb_config.get("checkpoint_path") + or "unknown" + ) + result.metadata["model_name"] = os.path.basename(str(model_path)) + result.method = "model_fallback" return result @@ -167,8 +188,7 @@ def get_config(self) -> Dict[str, Any]: fallback_config = self.fallback_model.get_config() return { - "model_name": "HybridModel (Heuristics + ML)", - "model_version": "v1.0", + "model_name": "HybridModel (Heuristics + LM)", "architecture": "Heuristics → Model Fallback", "heuristic_patterns": 14, # From heuristics.py "fallback_model": fallback_config, From 8ac6d71fb66704c85e74a2cf9af3305a6c0100ea Mon Sep 17 00:00:00 2001 From: ryanznie Date: Sun, 4 Jan 2026 18:42:58 -0500 Subject: [PATCH 2/2] v0.2.3 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 554a2be..ea0f113 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "invoice" -version = "0.2.2" +version = "0.2.3" description = "Finetuned LayoutLMv3 model for invoice number extraction" authors = [{ name = "Ryan Z. Nie", email = "ryanznie@gatech.edu" }] readme = "README.md" @@ -67,4 +67,4 @@ required-environments = ["sys_platform == 'darwin'"] [tool.coverage.run] relative_files = true branch = true -source = ["."] \ No newline at end of file +source = ["."]