From 6fbfff1be6622306fe041b33947c564cf8ce00d8 Mon Sep 17 00:00:00 2001
From: Vik Paruchuri <vik.paruchuri@gmail.com>
Date: Wed, 18 Mar 2026 11:19:37 -0400
Subject: [PATCH 1/3] Fix link

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 31d88e8..4389217 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ Chandra OCR 2 is a state of the art OCR model that converts images and PDFs into
 
 - Tops external olmocr benchmark and significant improvement in internal multilingual benchmarks
 - Convert documents to markdown, html, or json with detailed layout information
-- Support for 90+ languages ([benchmark below](#multilingual-benchmark))
+- Support for 90+ languages ([benchmark below](#multilingual-benchmark-table))
 - Excellent handwriting support
 - Reconstructs forms accurately, including checkboxes
 - Strong performance with tables, math, and complex layouts

From 69988118f334c0c65dac5e0c70154e566e524f46 Mon Sep 17 00:00:00 2001
From: Vik Paruchuri <vik.paruchuri@gmail.com>
Date: Wed, 18 Mar 2026 11:20:58 -0400
Subject: [PATCH 2/3] Fix test

---
 tests/integration/test_image_inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_image_inference.py b/tests/integration/test_image_inference.py
index 46e0c1f..bb066d7 100644
--- a/tests/integration/test_image_inference.py
+++ b/tests/integration/test_image_inference.py
@@ -15,4 +15,4 @@ def test_inference_image(simple_text_image):
     assert "Hello, World!" in output.markdown
 
     chunks = output.chunks
-    assert len(chunks) == 1
+    assert len(chunks) > 0

From 19e061aee7ebf649ed18296ca48927b6fc2dd372 Mon Sep 17 00:00:00 2001
From: Vik Paruchuri <vik.paruchuri@gmail.com>
Date: Wed, 18 Mar 2026 11:38:32 -0400
Subject: [PATCH 3/3] Add im end

---
 chandra/model/hf.py                       | 13 ++++++++++++-
 tests/integration/test_image_inference.py |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/chandra/model/hf.py b/chandra/model/hf.py
index 3f4ebf0..c98e58f 100644
--- a/chandra/model/hf.py
+++ b/chandra/model/hf.py
@@ -28,7 +28,18 @@ def generate_hf(
     )
     inputs = inputs.to(model.device)
 
-    generated_ids = model.generate(**inputs, max_new_tokens=max_output_tokens)
+    # Include both <|endoftext|> and <|im_end|> as stop tokens.
+    # generation_config only has <|endoftext|>, but the model emits <|im_end|> at turn boundaries.
+    eos_token_id = model.generation_config.eos_token_id
+    im_end_id = model.processor.tokenizer.convert_tokens_to_ids("<|im_end|>")
+    if isinstance(eos_token_id, int):
+        eos_token_id = [eos_token_id]
+    if im_end_id is not None and im_end_id not in eos_token_id:
+        eos_token_id.append(im_end_id)
+
+    generated_ids = model.generate(
+        **inputs, max_new_tokens=max_output_tokens, eos_token_id=eos_token_id
+    )
     generated_ids_trimmed = [
         out_ids[len(in_ids) :]
         for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
diff --git a/tests/integration/test_image_inference.py b/tests/integration/test_image_inference.py
index bb066d7..46e0c1f 100644
--- a/tests/integration/test_image_inference.py
+++ b/tests/integration/test_image_inference.py
@@ -15,4 +15,4 @@ def test_inference_image(simple_text_image):
     assert "Hello, World!" in output.markdown
 
     chunks = output.chunks
-    assert len(chunks) > 0
+    assert len(chunks) == 1