From 299990b5c0fd6e2f3b0db6674d1beb75ffd8dcae Mon Sep 17 00:00:00 2001
From: Wang Xin <xinwang614@gmail.com>
Date: Thu, 5 Mar 2026 14:32:10 +0000
Subject: [PATCH 1/4] fix(structure): improve layout sorting, title levels, and
 inline formula handling

- Port PaddleX's xycut_enhanced algorithm faithfully with proper constants, cross-layout detection, overlapping box shrinking, and weighted distance insertion for reading order sorting
- Add multi-signal title level inference using semantic numbering, relative indentation, and font-size k-means clustering
- Inject inline formulas into text regions before stitching to prevent duplicate rendering, wrapping LaTeX with $...$ delimiters
- Move markdown generation from examples into core StructureResult::to_markdown()
- Fix cell detection to also run when use_cells_trans_to_html is enable
- Add max_side_len=4000 default and lower box_threshold for table pipelines
 - Add line_height_iou_threshold for improved line grouping in stitching
---
 examples/utils/markdown.rs                    |  515 +-------
 oar-ocr-core/src/domain/structure.rs          |  829 ++++++++++---
 oar-ocr-core/src/domain/text_region.rs        |   27 +-
 oar-ocr-core/src/processors/geometry.rs       |   37 +-
 oar-ocr-core/src/processors/layout_sorting.rs |  832 +++++++++----
 oar-ocr-core/src/processors/layout_utils.rs   |   49 +-
 oar-ocr-core/src/processors/sorting.rs        |    5 +-
 oar-ocr-vl/src/doc_parser.rs                  |   10 +-
 src/oarocr/ocr.rs                             |   17 +
 src/oarocr/stitching.rs                       | 1039 +++++++++++++----
 src/oarocr/structure.rs                       |   51 +-
 src/oarocr/table_analyzer.rs                  |  173 ++-
 12 files changed, 2461 insertions(+), 1123 deletions(-)
diff --git a/examples/utils/markdown.rs b/examples/utils/markdown.rs
index 3a72fa8..e68f810 100644
--- a/examples/utils/markdown.rs
+++ b/examples/utils/markdown.rs
@@ -5,34 +5,7 @@
 //! while these examples utilities handle the file system operations.
 
 use oar_ocr::domain::structure::{LayoutElementType, StructureResult};
-use oar_ocr::processors::BoundingBox;
-use regex::Regex;
 use std::path::Path;
-use std::sync::LazyLock;
-
-/// Title numbering pattern for detecting section numbers like 1, 1.2, 1.2.3, (1), 一、etc.
-static TITLE_NUMBERING_REGEX: LazyLock<Regex> = LazyLock::new(|| {
-    Regex::new(
-        r"(?x)
-        ^\s*
-        (
-            [1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?
-            |
-            [(（][1-9][0-9]*(?:\.[1-9][0-9]*)*[)）]
-            |
-            [一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾][、.]?
-            |
-            [(（][一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+[)）]
-            |
-            (?:I|II|III|IV|V|VI|VII|VIII|IX|X)(?:\.|\b)
-        )
-        (\s+)
-        (.*)
-        $
-    ",
-    )
-    .expect("Invalid TITLE_NUMBERING_REGEX pattern")
-});
 
 /// Exports markdown with extracted images saved to disk.
 ///
@@ -59,280 +32,55 @@ pub fn export_markdown_with_images(
         std::fs::create_dir_all(&imgs_dir)?;
     }
 
-    // Collect table bboxes for overlap filtering
-    let table_bboxes: Vec<&BoundingBox> = result
-        .layout_elements
-        .iter()
-        .filter(|e| e.element_type == LayoutElementType::Table)
-        .map(|e| &e.bbox)
-        .collect();
-
-    let mut md = String::new();
-    let mut img_counter = 0usize;
-    let elements = &result.layout_elements;
-
-    for (idx, element) in elements.iter().enumerate() {
-        // PP-StructureV3 markdown ignores auxiliary labels.
+    // Extract and save images for Image/Chart elements
+    for element in &result.layout_elements {
         if matches!(
             element.element_type,
-            LayoutElementType::Number
-                | LayoutElementType::Footnote
-                | LayoutElementType::Header
-                | LayoutElementType::HeaderImage
-                | LayoutElementType::Footer
-                | LayoutElementType::FooterImage
-                | LayoutElementType::AsideText
+            LayoutElementType::Image | LayoutElementType::Chart
         ) {
-            continue;
-        }
-
-        // Filter out low-confidence text elements that overlap with tables
-        if element.element_type == LayoutElementType::Text {
-            let overlaps_table = table_bboxes
-                .iter()
-                .any(|table_bbox| element.bbox.ioa(table_bbox) > 0.3);
-            if overlaps_table && element.confidence < 0.7 {
-                continue;
-            }
-        }
-
-        match element.element_type {
-            // Document title
-            LayoutElementType::DocTitle => {
-                md.push_str("\n# ");
-                if let Some(text) = &element.text {
-                    let cleaned = clean_ocr_text(text);
-                    md.push_str(&cleaned);
-                }
-                md.push_str("\n\n");
-            }
-            // Paragraph/section title
-            LayoutElementType::ParagraphTitle => {
-                if let Some(text) = &element.text {
-                    let cleaned = clean_ocr_text(text);
-                    let (level, formatted_title) = format_title_with_level(&cleaned);
-                    md.push('\n');
-                    for _ in 0..level {
-                        md.push('#');
-                    }
-                    md.push(' ');
-                    md.push_str(&formatted_title);
-                    md.push_str("\n\n");
-                } else {
-                    md.push_str("\n## \n\n");
-                }
-            }
-            // Table
-            LayoutElementType::Table => {
-                if let Some(table) = result
-                    .tables
-                    .iter()
-                    .find(|t| t.bbox.iou(&element.bbox) > 0.5)
-                {
-                    if let Some(html) = &table.html_structure {
-                        let simplified = simplify_table_html(html);
-                        let table_with_border =
-                            simplified.replacen("<table>", "<table border=\"1\">", 1);
-                        md.push_str("\n<div style=\"text-align: center;\">");
-                        md.push_str(&table_with_border);
-                        md.push_str("</div>\n\n");
-                    } else {
-                        md.push_str("\n[Table]\n\n");
-                    }
-                } else {
-                    md.push_str("\n[Table]\n\n");
-                }
-            }
-            // Formula - detect inline vs display formula based on context
-            LayoutElementType::Formula | LayoutElementType::FormulaNumber => {
-                let is_inline = {
-                    let has_prev_text = (0..idx).rev().any(|i| {
-                        let prev = &elements[i];
-                        !prev.element_type.is_formula()
-                            && (prev.element_type == LayoutElementType::Text
-                                || prev.element_type == LayoutElementType::ReferenceContent)
-                            && is_same_line(&element.bbox, &prev.bbox)
-                    });
-
-                    let has_next_text = ((idx + 1)..elements.len()).any(|i| {
-                        let next = &elements[i];
-                        !next.element_type.is_formula()
-                            && (next.element_type == LayoutElementType::Text
-                                || next.element_type == LayoutElementType::ReferenceContent)
-                            && is_same_line(&element.bbox, &next.bbox)
-                    });
-
-                    has_prev_text || has_next_text
-                };
-
-                if is_inline {
-                    md.push('$');
-                    if let Some(latex) = &element.text {
-                        md.push_str(latex);
-                    }
-                    md.push_str("$ ");
-                } else {
-                    md.push_str("\n$$");
-                    if let Some(latex) = &element.text {
-                        md.push_str(latex);
-                    }
-                    md.push_str("$$\n\n");
-                }
-            }
-            // Image/Chart - extract and save image region
-            LayoutElementType::Image | LayoutElementType::Chart => {
-                let type_name = if element.element_type == LayoutElementType::Chart {
-                    "chart"
-                } else {
-                    "image"
-                };
-
-                // Generate image filename
-                let img_name = format!(
-                    "img_in_{}_box_{:.0}_{:.0}_{:.0}_{:.0}.jpg",
-                    type_name,
-                    element.bbox.x_min(),
-                    element.bbox.y_min(),
-                    element.bbox.x_max(),
-                    element.bbox.y_max()
-                );
-                let img_path = imgs_dir.join(&img_name);
-                let relative_path = format!("imgs/{}", img_name);
-
-                // Extract and save image region if we have the source image
-                if let Some(ref img) = result.rectified_img {
-                    let x = element.bbox.x_min().max(0.0) as u32;
-                    let y = element.bbox.y_min().max(0.0) as u32;
-                    let width = ((element.bbox.x_max() - element.bbox.x_min()) as u32)
-                        .min(img.width().saturating_sub(x));
-                    let height = ((element.bbox.y_max() - element.bbox.y_min()) as u32)
-                        .min(img.height().saturating_sub(y));
-
-                    if width > 0 && height > 0 {
-                        let cropped =
-                            image::imageops::crop_imm(img.as_ref(), x, y, width, height).to_image();
-                        // Save as JPEG
-                        if let Err(e) = cropped.save(&img_path) {
-                            tracing::warn!("Failed to save image {}: {}", img_path.display(), e);
-                        }
-                    }
-                }
-
-                // Calculate width percentage
-                let width_pct =
-                    ((element.bbox.x_max() - element.bbox.x_min()) / 12.0).clamp(20.0, 100.0);
-
-                md.push_str("\n<div style=\"text-align: center;\"><img src=\"");
-                md.push_str(&relative_path);
-                md.push_str("\" alt=\"Image\" width=\"");
-                md.push_str(&format!("{:.0}%", width_pct));
-                md.push_str("\" /></div>\n\n");
-
-                img_counter += 1;
-            }
-            // Seal
-            LayoutElementType::Seal => {
-                md.push_str("\n![Seal]");
-                if let Some(text) = &element.text {
-                    md.push_str("\n> ");
-                    md.push_str(text);
-                }
-                md.push_str("\n\n");
-            }
-            // Captions
-            _ if element.element_type.is_caption() => {
-                if let Some(text) = &element.text {
-                    md.push_str("\n<div style=\"text-align: center;\">");
-                    md.push_str(text);
-                    md.push_str(" </div>\n\n");
-                }
-            }
-            // Abstract
-            LayoutElementType::Abstract => {
-                if let Some(text) = &element.text {
-                    let lower = text.to_lowercase();
-                    if lower.contains("abstract") || lower.contains("摘要") {
-                        md.push_str("\n## **Abstract**\n\n");
-                    }
-                    let formatted = format_text_block(text);
-                    md.push_str(&formatted);
-                    md.push_str("\n\n");
-                }
-            }
-            // Reference
-            LayoutElementType::Reference => {
-                if let Some(text) = &element.text {
-                    let formatted = format_reference_block(text);
-                    md.push('\n');
-                    md.push_str(&formatted);
-                    md.push_str("\n\n");
-                }
-            }
-            // Content
-            LayoutElementType::Content => {
-                if let Some(text) = &element.text {
-                    let formatted = format_content_block(text);
-                    md.push('\n');
-                    md.push_str(&formatted);
-                    md.push_str("\n\n");
-                }
-            }
-            // Footnote
-            LayoutElementType::Footnote => {
-                if let Some(text) = &element.text {
-                    let formatted = format_vision_footnote_block(text);
-                    md.push('\n');
-                    md.push_str(&formatted);
-                    md.push_str("\n\n");
-                }
-            }
-            // List
-            LayoutElementType::List => {
-                if let Some(text) = &element.text {
-                    let cleaned = format_text_block(text);
-                    for line in cleaned.lines() {
-                        let line = line.trim();
-                        if !line.is_empty() {
-                            md.push_str("- ");
-                            md.push_str(line);
-                            md.push('\n');
-                        }
+            let type_name = if element.element_type == LayoutElementType::Chart {
+                "chart"
+            } else {
+                "image"
+            };
+
+            // Generate image filename matching StructureResult::to_markdown() placeholder
+            let img_name = format!(
+                "img_in_{}_box_{:.0}_{:.0}_{:.0}_{:.0}.jpg",
+                type_name,
+                element.bbox.x_min(),
+                element.bbox.y_min(),
+                element.bbox.x_max(),
+                element.bbox.y_max()
+            );
+            let img_path = imgs_dir.join(&img_name);
+
+            // Extract and save image region if we have the source image
+            if let Some(ref img) = result.rectified_img {
+                let x = element.bbox.x_min().max(0.0) as u32;
+                let y = element.bbox.y_min().max(0.0) as u32;
+                let width = ((element.bbox.x_max() - element.bbox.x_min()) as u32)
+                    .min(img.width().saturating_sub(x));
+                let height = ((element.bbox.y_max() - element.bbox.y_min()) as u32)
+                    .min(img.height().saturating_sub(y));
+
+                if width > 0 && height > 0 {
+                    let cropped =
+                        image::imageops::crop_imm(img.as_ref(), x, y, width, height).to_image();
+                    // Save as JPEG to match extension in markdown
+                    if let Err(e) = cropped.save(&img_path) {
+                        tracing::warn!("Failed to save image {}: {}", img_path.display(), e);
                     }
-                    md.push('\n');
-                }
-            }
-            // Header/Footer - skip
-            _ if element.element_type.is_header() || element.element_type.is_footer() => {
-                continue;
-            }
-            // Default text
-            _ => {
-                if let Some(text) = &element.text {
-                    let formatted = format_text_block(text);
-                    md.push_str(&formatted);
-                    md.push_str("\n\n");
                 }
             }
         }
     }
 
-    tracing::debug!("Extracted {} images to {:?}", img_counter, imgs_dir);
-    Ok(md.trim().to_string())
+    // Use core library markdown generation (already implements PaddleX rules)
+    Ok(result.to_markdown())
 }
 
-/// Exports concatenated markdown from multiple pages with images.
-///
-/// This follows the same concatenation logic as `concatenate_markdown_pages`
-/// but also handles image extraction for all pages.
-///
-/// # Arguments
-///
-/// * `results` - Slice of structure results from multiple pages (in order)
-/// * `output_dir` - Directory to save extracted images
-///
-/// # Returns
-///
-/// A single markdown string with all pages properly concatenated and images extracted
+/// Exports concatenated markdown from multiple pages with images and post-processing.
 pub fn export_concatenated_markdown_with_images(
     results: &[StructureResult],
     output_dir: impl AsRef<Path>,
@@ -343,187 +91,18 @@ pub fn export_concatenated_markdown_with_images(
         return Ok(String::new());
     }
 
-    if results.len() == 1 {
-        return export_markdown_with_images(&results[0], output_dir);
-    }
-
-    let mut markdown = String::new();
-    let mut prev_page_end_flag = true;
-
-    for result in results.iter() {
-        let flags = result
-            .page_continuation_flags
-            .as_ref()
-            .cloned()
-            .unwrap_or_else(|| result.calculate_continuation_flags());
-
-        let page_markdown = export_markdown_with_images(result, output_dir)?;
-
-        if page_markdown.trim().is_empty() {
-            prev_page_end_flag = flags.paragraph_end;
-            continue;
-        }
-
-        let page_first_continues = !flags.paragraph_start;
-
-        if page_first_continues && !prev_page_end_flag {
-            let last_char = markdown.chars().last();
-            let first_char = page_markdown.chars().next();
-
-            let last_is_chinese = last_char.is_some_and(is_chinese_char);
-            let first_is_chinese = first_char.is_some_and(is_chinese_char);
-
-            if !last_is_chinese && !first_is_chinese {
-                markdown.push(' ');
-                markdown.push_str(page_markdown.trim_start());
-            } else {
-                markdown.push_str(page_markdown.trim_start());
-            }
-        } else {
-            if !markdown.is_empty() {
-                markdown.push_str("\n\n");
-            }
-            markdown.push_str(&page_markdown);
-        }
-
-        prev_page_end_flag = flags.paragraph_end;
-    }
-
-    Ok(markdown.trim().to_string())
-}
-
-/// Cleans OCR text content by removing common artifacts.
-fn clean_ocr_text(text: &str) -> String {
-    text.replace("-\n", "").replace('\n', " ")
-}
-
-/// Formats text blocks following PaddleX's text handling.
-fn format_text_block(text: &str) -> String {
-    let dehyphenated = text.replace("-\n", "");
-    let step1 = dehyphenated.replace("\n\n", "\n");
-    step1.replace('\n', "\n\n")
-}
-
-/// Formats content blocks (table of contents).
-fn format_content_block(text: &str) -> String {
-    let step1 = text.replace("-\n", "  \n");
-    step1.replace('\n', "  \n")
-}
-
-/// Formats reference blocks.
-fn format_reference_block(text: &str) -> String {
-    let dehyphenated = text.replace("-\n", "");
-    let lines: Vec<&str> = dehyphenated.lines().collect();
-
-    let mut result = String::new();
-    let mut added_heading = false;
-
-    for (i, line) in lines.iter().enumerate() {
-        let trimmed = line.trim();
-        if trimmed.is_empty() {
-            continue;
-        }
-
-        if !added_heading && (trimmed.contains("References") || trimmed.contains("参考文献")) {
-            result.push_str("## **References**\n\n");
-            added_heading = true;
-            continue;
-        }
-
-        if i > 0 || result.is_empty() {
-            if !result.is_empty() {
-                result.push('\n');
-            }
-            result.push_str(trimmed);
-        }
-    }
-
-    if result.is_empty() {
-        dehyphenated
-    } else {
-        result
+    // First, save all images from all pages
+    for result in results {
+        export_markdown_with_images(result, output_dir)?;
     }
-}
 
-/// Formats vision footnote blocks.
-fn format_vision_footnote_block(text: &str) -> String {
-    let dehyphenated = text.replace("-\n", "");
-    let step1 = dehyphenated.replace("\n\n", "\n");
-    step1.replace('\n', "\n\n")
-}
-
-/// Simplifies table HTML by removing wrapper tags.
-fn simplify_table_html(html: &str) -> String {
-    html.replace("<html>", "")
-        .replace("</html>", "")
-        .replace("<body>", "")
-        .replace("</body>", "")
-}
+    // Use core library concatenation logic (handles paragraph continuity and CJK spacing)
+    let raw_markdown = oar_ocr::domain::structure::concatenate_markdown_pages(results);
 
-/// Checks if two bounding boxes are on the same line.
-fn is_same_line(bbox1: &BoundingBox, bbox2: &BoundingBox) -> bool {
-    let y1_min = bbox1.y_min();
-    let y1_max = bbox1.y_max();
-    let y2_min = bbox2.y_min();
-    let y2_max = bbox2.y_max();
+    // Apply advanced PaddleX post-processing (dehyphenation, word merging fixes, deduplication)
+    let processed_markdown = oar_ocr::domain::structure::postprocess_markdown(&raw_markdown);
 
-    let overlap_start = y1_min.max(y2_min);
-    let overlap_end = y1_max.min(y2_max);
-    let overlap = (overlap_end - overlap_start).max(0.0);
-
-    let height1 = y1_max - y1_min;
-    let height2 = y2_max - y2_min;
-    let min_height = height1.min(height2);
-
-    min_height > 0.0 && overlap / min_height > 0.5
-}
-
-/// Checks if a character is a Chinese character.
-fn is_chinese_char(c: char) -> bool {
-    matches!(c,
-        '\u{4E00}'..='\u{9FFF}' | // CJK Unified Ideographs
-        '\u{3400}'..='\u{4DBF}' | // CJK Unified Ideographs Extension A
-        '\u{20000}'..='\u{2A6DF}' | // CJK Unified Ideographs Extension B
-        '\u{2A700}'..='\u{2B73F}' | // CJK Unified Ideographs Extension C
-        '\u{2B740}'..='\u{2B81F}' | // CJK Unified Ideographs Extension D
-        '\u{2B820}'..='\u{2CEAF}' | // CJK Unified Ideographs Extension E
-        '\u{2CEB0}'..='\u{2EBEF}'   // CJK Unified Ideographs Extension F
-    )
+    Ok(processed_markdown)
 }
 
-/// Title numbering pattern for detecting section numbers.
-fn is_numbered_title(title: &str) -> (bool, usize, String) {
-    let cleaned = title.replace("-\n", "").replace('\n', " ");
-
-    if let Some(captures) = TITLE_NUMBERING_REGEX.captures(&cleaned) {
-        let numbering = captures.get(1).map(|m| m.as_str().trim()).unwrap_or("");
-        let title_content = captures.get(3).map(|m| m.as_str()).unwrap_or("");
-
-        let dot_count = numbering.matches('.').count();
-        let level = dot_count + 2;
-
-        let formatted = if title_content.is_empty() {
-            numbering.trim_end_matches('.').to_string()
-        } else {
-            format!(
-                "{} {}",
-                numbering.trim_end_matches('.'),
-                title_content.trim_start()
-            )
-        };
-
-        (true, level.clamp(2, 6), formatted)
-    } else {
-        (false, 2, cleaned)
-    }
-}
-
-/// Formats paragraph title with automatic level detection.
-fn format_title_with_level(title: &str) -> (usize, String) {
-    let (is_numbered, level, formatted) = is_numbered_title(title);
-    if is_numbered {
-        (level, formatted)
-    } else {
-        (2, title.replace("-\n", "").replace('\n', " "))
-    }
-}
+// Remove redundant local helpers that are now handled by core library
diff --git a/oar-ocr-core/src/domain/structure.rs b/oar-ocr-core/src/domain/structure.rs
index 7358628..4ef042b 100644
--- a/oar-ocr-core/src/domain/structure.rs
+++ b/oar-ocr-core/src/domain/structure.rs
@@ -9,6 +9,7 @@ use image::RgbImage;
 use once_cell::sync::Lazy;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
 use std::path::Path;
 use std::sync::Arc;
 
@@ -58,20 +59,25 @@ static TITLE_NUMBERING_REGEX: Lazy<Regex> = Lazy::new(|| {
 /// - "1 Introduction" -> (2, "1 Introduction") -> `## 1 Introduction`
 /// - "2.1 Method" -> (3, "2.1 Method") -> `### 2.1 Method`
 /// - "2.1.1 Details" -> (4, "2.1.1 Details") -> `#### 2.1.1 Details`
-fn format_title_with_level(title: &str) -> (usize, String) {
-    // Clean up line breaks
-    let cleaned = title.replace("-\n", "").replace('\n', " ");
+fn semantic_title_level_and_format(cleaned: &str) -> Option<(usize, String)> {
+    let trimmed = cleaned.trim();
+
+    // Common unnumbered top-level section headers.
+    let keyword = trimmed.trim_end_matches(':').to_ascii_uppercase();
+    if matches!(
+        keyword.as_str(),
+        "ABSTRACT" | "INTRODUCTION" | "REFERENCES" | "REFERENCE"
+    ) {
+        return Some((1, trimmed.to_string()));
+    }
 
-    if let Some(captures) = TITLE_NUMBERING_REGEX.captures(&cleaned) {
+    if let Some(captures) = TITLE_NUMBERING_REGEX.captures(cleaned) {
         let numbering = captures.get(1).map(|m| m.as_str().trim()).unwrap_or("");
         let title_content = captures.get(3).map(|m| m.as_str()).unwrap_or("");
 
-        // Determine level from dots in numbering (PaddleX: dots + 1, then +1 for base ##)
-        // 1 -> 2 (##), 1.2 -> 3 (###), 1.2.3 -> 4 (####)
         let dot_count = numbering.matches('.').count();
-        let level = dot_count + 2; // +1 for PaddleX logic, +1 for base ## level
+        let level = (dot_count + 2).clamp(2, 6);
 
-        // Reconstruct title: numbering + space + content
         let formatted = if title_content.is_empty() {
             numbering.trim_end_matches('.').to_string()
         } else {
@@ -81,15 +87,213 @@ fn format_title_with_level(title: &str) -> (usize, String) {
                 title_content.trim_start()
             )
         };
+        return Some((level, formatted));
+    }
+
+    None
+}
+
+fn semantic_title_level(text: &str) -> Option<usize> {
+    let cleaned = text.replace("-\n", "").replace('\n', " ");
+    semantic_title_level_and_format(&cleaned).map(|(level, _)| level)
+}
+
+fn format_title_with_level(title: &str, clustered_level: Option<usize>) -> (usize, String) {
+    // Clean up line breaks
+    let cleaned = title.replace("-\n", "").replace('\n', " ");
+    if let Some((level, formatted)) = semantic_title_level_and_format(&cleaned) {
+        return (level, formatted);
+    }
+
+    // No semantic signal: use voting hint from relative/font-size signals.
+    let level = clustered_level.unwrap_or(2).clamp(2, 6);
+    (level, cleaned)
+}
+
+/// Estimate per-title heading levels using three-signal voting:
+/// 1) semantic numbering/keyword level
+/// 2) relative indentation order
+/// 3) font-size k-means (k<=4)
+///
+fn infer_paragraph_title_levels(elements: &[LayoutElement]) -> HashMap<usize, usize> {
+    let title_indices: Vec<usize> = elements
+        .iter()
+        .enumerate()
+        .filter(|(_, e)| e.element_type == LayoutElementType::ParagraphTitle)
+        .map(|(idx, _)| idx)
+        .collect();
+    if title_indices.is_empty() {
+        return HashMap::new();
+    }
+
+    let height_samples: Vec<(usize, f32)> = title_indices
+        .iter()
+        .filter_map(|&idx| {
+            let e = &elements[idx];
+            let height = (e.bbox.y_max() - e.bbox.y_min()).max(1.0);
+            let line_h = height / e.num_lines.unwrap_or(1).max(1) as f32;
+            let v = line_h.max(1.0);
+            if v.is_finite() { Some((idx, v)) } else { None }
+        })
+        .collect();
+
+    let indent_samples: Vec<(usize, f32)> = title_indices
+        .iter()
+        .filter_map(|&idx| {
+            let x = elements[idx].bbox.x_min();
+            if x.is_finite() { Some((idx, x)) } else { None }
+        })
+        .collect();
+    let semantic_levels: HashMap<usize, usize> = title_indices
+        .iter()
+        .filter_map(|&idx| {
+            elements[idx]
+                .text
+                .as_deref()
+                .and_then(semantic_title_level)
+                .map(|level| (idx, level))
+        })
+        .collect();
 
-        // Clamp level to reasonable range (2-6 for markdown, since # is for doc_title)
-        let level = level.clamp(2, 6);
+    let font_levels = infer_levels_by_kmeans_feature(&height_samples, true);
+    // Smaller x_min (less indent) -> higher-level heading.
+    let relative_levels = infer_levels_by_kmeans_feature(&indent_samples, false);
 
-        (level, formatted)
+    let mut voted = HashMap::new();
+    for idx in title_indices {
+        let semantic_level = semantic_levels.get(&idx).copied();
+        let font_level = font_levels.get(&idx).copied();
+        let relative_level = relative_levels.get(&idx).copied();
+
+        let mut score = [0u8; 7];
+        if let Some(level) = semantic_level {
+            score[level.clamp(1, 6)] += 2;
+        }
+        if let Some(level) = font_level {
+            score[level.clamp(1, 6)] += 1;
+        }
+        if let Some(level) = relative_level {
+            score[level.clamp(1, 6)] += 1;
+        }
+
+        let mut best_level = semantic_level.unwrap_or(2);
+        let mut best_score = 0u8;
+        for (level, &s) in score.iter().enumerate().skip(1) {
+            if s > best_score {
+                best_score = s;
+                best_level = level;
+            } else if s == best_score && s > 0 {
+                let is_semantic = semantic_level == Some(level);
+                let best_is_semantic = semantic_level == Some(best_level);
+                if (is_semantic && !best_is_semantic)
+                    || (is_semantic == best_is_semantic && level < best_level)
+                {
+                    best_level = level;
+                }
+            }
+        }
+
+        if best_score == 0 {
+            best_level = semantic_level
+                .or(font_level)
+                .or(relative_level)
+                .unwrap_or(2);
+        }
+
+        voted.insert(idx, best_level.clamp(1, 6));
+    }
+
+    voted
+}
+
+/// Cluster one scalar feature into heading levels with 1D k-means.
+///
+/// `descending=true` means larger feature -> higher-level heading (smaller markdown depth).
+/// `descending=false` means smaller feature -> higher-level heading.
+fn infer_levels_by_kmeans_feature(
+    samples: &[(usize, f32)],
+    descending: bool,
+) -> HashMap<usize, usize> {
+    let clean_samples: Vec<(usize, f32)> = samples
+        .iter()
+        .copied()
+        .filter(|(_, v)| v.is_finite())
+        .collect();
+    if clean_samples.len() < 2 {
+        return HashMap::new();
+    }
+
+    let mut values: Vec<f32> = clean_samples.iter().map(|(_, v)| *v).collect();
+    values.sort_by(|a, b| a.total_cmp(b));
+    let unique_count = values
+        .windows(2)
+        .filter(|w| (w[1] - w[0]).abs() > 1e-3)
+        .count()
+        + 1;
+    let k = unique_count.clamp(1, 4).min(clean_samples.len());
+    if k <= 1 {
+        return HashMap::new();
+    }
+
+    let mut centroids = (0..k)
+        .map(|i| {
+            let pos = ((i as f32 + 0.5) / k as f32 * values.len() as f32).floor() as usize;
+            values[pos.min(values.len() - 1)]
+        })
+        .collect::<Vec<_>>();
+
+    for _ in 0..16 {
+        let mut sums = vec![0.0f32; k];
+        let mut counts = vec![0usize; k];
+        for (_, value) in &clean_samples {
+            let mut best_idx = 0usize;
+            let mut best_dist = f32::INFINITY;
+            for (idx, c) in centroids.iter().enumerate() {
+                let dist = (value - c).abs();
+                if dist < best_dist {
+                    best_dist = dist;
+                    best_idx = idx;
+                }
+            }
+            sums[best_idx] += *value;
+            counts[best_idx] += 1;
+        }
+        for idx in 0..k {
+            if counts[idx] > 0 {
+                centroids[idx] = sums[idx] / counts[idx] as f32;
+            }
+        }
+    }
+
+    let mut centroid_order: Vec<(usize, f32)> = centroids.iter().copied().enumerate().collect();
+    if descending {
+        centroid_order.sort_by(|a, b| b.1.total_cmp(&a.1));
     } else {
-        // No numbering detected, default to level 2 (## heading)
-        (2, cleaned)
+        centroid_order.sort_by(|a, b| a.1.total_cmp(&b.1));
+    }
+    let rank_by_cluster: HashMap<usize, usize> = centroid_order
+        .into_iter()
+        .enumerate()
+        .map(|(rank, (cluster_idx, _))| (cluster_idx, rank))
+        .collect();
+
+    let mut result = HashMap::new();
+    for (element_idx, value) in &clean_samples {
+        let mut best_idx = 0usize;
+        let mut best_dist = f32::INFINITY;
+        for (idx, c) in centroids.iter().enumerate() {
+            let dist = (value - c).abs();
+            if dist < best_dist {
+                best_dist = dist;
+                best_idx = idx;
+            }
+        }
+        let rank = rank_by_cluster.get(&best_idx).copied().unwrap_or(0);
+        let level = (rank + 2).clamp(2, 6);
+        result.insert(*element_idx, level);
     }
+
+    result
 }
 
 /// A detected document region block (from PP-DocBlockLayout).
@@ -273,8 +477,25 @@ impl StructureResult {
             .map(|e| &e.bbox)
             .collect();
 
+        // Compute original image width for image scaling (PaddleX: original_image_width)
+        let original_image_width = self
+            .rectified_img
+            .as_ref()
+            .map(|img| img.width() as f32)
+            .or_else(|| {
+                // Estimate from max element x-coordinate
+                self.layout_elements
+                    .iter()
+                    .map(|e| e.bbox.x_max())
+                    .fold(None, |acc, x| Some(acc.map_or(x, |max: f32| max.max(x))))
+            })
+            .unwrap_or(1.0);
+
         let mut md = String::new();
         let elements = &self.layout_elements;
+        let paragraph_title_levels = infer_paragraph_title_levels(elements);
+        let mut last_label: Option<LayoutElementType> = None;
+        let mut prev_element: Option<&LayoutElement> = None;
 
         for (idx, element) in elements.iter().enumerate() {
             // PP-StructureV3 markdown ignores auxiliary labels.
@@ -306,35 +527,56 @@ impl StructureResult {
                 }
             }
 
+            // Determine seg_start_flag for paragraph continuity (PaddleX get_seg_flag).
+            // When both current and previous are "text" and seg_start_flag is false,
+            // they belong to the same paragraph — join without \n\n separator.
+            let seg_start_flag = get_seg_flag(element, prev_element);
+
+            let is_continuation = element.element_type == LayoutElementType::Text
+                && last_label == Some(LayoutElementType::Text)
+                && !seg_start_flag;
+
+            // Add separator between elements
+            if !is_continuation {
+                // Normal case: separate elements with blank line
+            }
+
             match element.element_type {
                 // Document title
                 LayoutElementType::DocTitle => {
-                    md.push_str("\n# ");
+                    if !md.is_empty() {
+                        md.push_str("\n\n");
+                    }
+                    md.push_str("# ");
                     if let Some(text) = &element.text {
                         let cleaned = clean_ocr_text(text);
                         md.push_str(&cleaned);
                     }
-                    md.push_str("\n\n");
                 }
                 // Paragraph/section title - auto-detect numbering for level
                 LayoutElementType::ParagraphTitle => {
+                    if !md.is_empty() {
+                        md.push_str("\n\n");
+                    }
                     if let Some(text) = &element.text {
                         let cleaned = clean_ocr_text(text);
-                        let (level, formatted_title) = format_title_with_level(&cleaned);
-                        md.push('\n');
+                        let clustered = paragraph_title_levels.get(&idx).copied();
+                        let (level, formatted_title) = format_title_with_level(&cleaned, clustered);
                         for _ in 0..level {
                             md.push('#');
                         }
                         md.push(' ');
                         md.push_str(&formatted_title);
-                        md.push_str("\n\n");
                     } else {
-                        md.push_str("\n## \n\n");
+                        md.push_str("## ");
                     }
                 }
                 // Table - preserve HTML structure with border and center alignment
                 // Following PaddleX's format with <div style="text-align: center;"> wrapper
                 LayoutElementType::Table => {
+                    if !md.is_empty() {
+                        md.push_str("\n\n");
+                    }
                     if let Some(table) =
                         self.tables.iter().find(|t| t.bbox.iou(&element.bbox) > 0.5)
                     {
@@ -343,19 +585,41 @@ impl StructureResult {
                             let simplified = simplify_table_html(html);
                             let table_with_border =
                                 simplified.replacen("<table>", "<table border=\"1\">", 1);
-                            // Wrap with center-aligned div for better markdown rendering
-                            md.push_str("\n<div style=\"text-align: center;\">");
-                            md.push_str(&table_with_border);
-                            md.push_str("</div>\n\n");
+                            // PaddleX format_centered_by_html: clean newlines then wrap
+                            let cleaned = clean_ocr_text(&table_with_border);
+                            md.push_str("<div style=\"text-align: center;\">");
+                            md.push_str(&cleaned);
+                            md.push_str("</div>");
                         } else {
-                            md.push_str("\n[Table]\n\n");
+                            md.push_str("[Table]");
                         }
                     } else {
-                        md.push_str("\n[Table]\n\n");
+                        md.push_str("[Table]");
                     }
                 }
+                // FormulaNumber - equation labels like "(1)", "Eq. 1" etc.
+                // PaddleX does NOT include formula_number in handle_funcs_dict,
+                // so these are silently skipped in markdown output.
+                LayoutElementType::FormulaNumber => {
+                    continue;
+                }
                 // Formula - detect inline vs display formula based on context
-                LayoutElementType::Formula | LayoutElementType::FormulaNumber => {
+                LayoutElementType::Formula => {
+                    // Extract and clean LaTeX content; skip if empty
+                    let raw_content = element.text.as_deref().map(|s| s.trim()).unwrap_or("");
+                    if raw_content.is_empty() {
+                        continue;
+                    }
+                    // Remove only outer $$ or $ wrappers if present (from table cell injection)
+                    let latex_content =
+                        if raw_content.starts_with("$$") && raw_content.ends_with("$$") {
+                            &raw_content[2..raw_content.len() - 2]
+                        } else if raw_content.starts_with('$') && raw_content.ends_with('$') {
+                            &raw_content[1..raw_content.len() - 1]
+                        } else {
+                            raw_content
+                        };
+
                     // Check if this formula is on the same line as adjacent text elements
                     // to determine if it's an inline formula or display formula
                     let is_inline = {
@@ -383,23 +647,25 @@ impl StructureResult {
                     if is_inline {
                         // Inline formula: use $...$
                         md.push('$');
-                        if let Some(latex) = &element.text {
-                            md.push_str(latex);
-                        }
+                        md.push_str(latex_content);
                         md.push_str("$ ");
                     } else {
                         // Display formula: use $$...$$
-                        md.push_str("\n$$");
-                        if let Some(latex) = &element.text {
-                            md.push_str(latex);
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
                         }
-                        md.push_str("$$\n\n");
+                        md.push_str("$$");
+                        md.push_str(latex_content);
+                        md.push_str("$$");
                     }
                 }
                 // Image/Chart - figure format with center alignment
                 LayoutElementType::Image | LayoutElementType::Chart => {
+                    if !md.is_empty() {
+                        md.push_str("\n\n");
+                    }
                     // Use HTML img tag with center alignment for better rendering
-                    md.push_str("\n<div style=\"text-align: center;\"><img src=\"");
+                    md.push_str("<div style=\"text-align: center;\"><img src=\"");
                     // Generate a placeholder image name based on element bbox
                     let img_name = format!(
                         "imgs/img_in_{}_box_{:.0}_{:.0}_{:.0}_{:.0}.jpg",
@@ -415,72 +681,83 @@ impl StructureResult {
                     );
                     md.push_str(&img_name);
                     md.push_str("\" alt=\"Image\" width=\"");
-                    // Calculate width percentage based on element size
-                    let width_pct =
-                        ((element.bbox.x_max() - element.bbox.x_min()) / 12.0).clamp(20.0, 100.0);
-                    md.push_str(&format!("{:.0}%", width_pct));
-                    md.push_str("\" /></div>\n\n");
+                    // Calculate width percentage relative to original image width (PaddleX logic)
+                    let image_width = element.bbox.x_max() - element.bbox.x_min();
+                    let width_pct = (image_width / original_image_width * 100.0) as u32;
+                    let width_pct = width_pct.clamp(1, 100);
+                    md.push_str(&format!("{}%", width_pct));
+                    md.push_str("\" /></div>");
                 }
                 // Seal - show as image with text
                 LayoutElementType::Seal => {
-                    md.push_str("\n![Seal]");
+                    if !md.is_empty() {
+                        md.push_str("\n\n");
+                    }
+                    md.push_str("![Seal]");
                     if let Some(text) = &element.text {
                         md.push_str("\n> ");
                         md.push_str(text);
                     }
-                    md.push_str("\n\n");
                 }
                 // Captions - with center alignment following PaddleX
                 _ if element.element_type.is_caption() => {
                     if let Some(text) = &element.text {
-                        md.push_str("\n<div style=\"text-align: center;\">");
-                        md.push_str(text);
-                        md.push_str(" </div>\n\n");
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
+                        }
+                        let cleaned = clean_ocr_text(text);
+                        md.push_str("<div style=\"text-align: center;\">");
+                        md.push_str(&cleaned);
+                        md.push_str(" </div>");
                     }
                 }
-                // Abstract - following PaddleX format with proper text handling
+                // Abstract - following PaddleX's format_first_line_func with spliter=" "
                 LayoutElementType::Abstract => {
                     if let Some(text) = &element.text {
-                        // Check for "Abstract" or "摘要" heading
-                        let lower = text.to_lowercase();
-                        if lower.contains("abstract") || lower.contains("摘要") {
-                            md.push_str("\n## **Abstract**\n\n");
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
                         }
-                        let formatted = format_text_block(text);
+                        let formatted = format_first_line(text, " ", &["abstract", "摘要"], "## ");
                         md.push_str(&formatted);
-                        md.push_str("\n\n");
                     }
                 }
-                // Reference - following PaddleX's format_reference_block
+                // Reference - following PaddleX's format_first_line_func with spliter="\n"
                 LayoutElementType::Reference => {
                     if let Some(text) = &element.text {
-                        let formatted = format_reference_block(text);
-                        md.push('\n');
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
+                        }
+                        let formatted =
+                            format_first_line(text, "\n", &["references", "参考文献"], "## ");
                         md.push_str(&formatted);
-                        md.push_str("\n\n");
                     }
                 }
                 // Content (table of contents) - following PaddleX's soft breaks
                 LayoutElementType::Content => {
                     if let Some(text) = &element.text {
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
+                        }
                         let formatted = format_content_block(text);
-                        md.push('\n');
                         md.push_str(&formatted);
-                        md.push_str("\n\n");
                     }
                 }
                 // Footnote - following PaddleX's vision_footnote handling
                 LayoutElementType::Footnote => {
                     if let Some(text) = &element.text {
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
+                        }
                         let formatted = format_vision_footnote_block(text);
-                        md.push('\n');
                         md.push_str(&formatted);
-                        md.push_str("\n\n");
                     }
                 }
                 // List
                 LayoutElementType::List => {
                     if let Some(text) = &element.text {
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
+                        }
                         let cleaned = format_text_block(text);
                         // Split by newlines and format as list items
                         for line in cleaned.lines() {
@@ -491,7 +768,15 @@ impl StructureResult {
                                 md.push('\n');
                             }
                         }
-                        md.push('\n');
+                    }
+                }
+                // Algorithm block - PaddleX: block.content.strip("\n")
+                LayoutElementType::Algorithm => {
+                    if let Some(text) = &element.text {
+                        if !md.is_empty() {
+                            md.push_str("\n\n");
+                        }
+                        md.push_str(text.trim_matches('\n'));
                     }
                 }
                 // Header/Footer - smaller text (typically excluded from markdown)
@@ -503,12 +788,23 @@ impl StructureResult {
                 // Default text elements - following PaddleX's text handling
                 _ => {
                     if let Some(text) = &element.text {
-                        let formatted = format_text_block(text);
-                        md.push_str(&formatted);
-                        md.push_str("\n\n");
+                        // For text continuation (same paragraph), join directly
+                        if is_continuation {
+                            let formatted = format_text_block(text);
+                            md.push_str(&formatted);
+                        } else {
+                            if !md.is_empty() {
+                                md.push_str("\n\n");
+                            }
+                            let formatted = format_text_block(text);
+                            md.push_str(&formatted);
+                        }
                     }
                 }
             }
+
+            last_label = Some(element.element_type);
+            prev_element = Some(element);
         }
         md.trim().to_string()
     }
@@ -793,6 +1089,67 @@ impl StructureResult {
     }
 }
 
+/// Determines paragraph continuity flags for the current element relative to the previous.
+///
+/// This implements PaddleX's `get_seg_flag` logic from `layout_parsing/utils.py`:
+/// - `seg_start_flag = true` means this element starts a NEW paragraph
+/// - `seg_start_flag = false` means this element CONTINUES the previous paragraph
+///
+/// The logic checks whether:
+/// 1. Previous block's last line ends near the right edge (text fills to right)
+/// 2. Current block's first line starts near the left edge (no indentation)
+/// 3. Previous block has more than one line
+/// 4. The two blocks are horizontally close enough
+///
+/// Returns `seg_start_flag` (true = new paragraph, false = continuation).
+fn get_seg_flag(current: &LayoutElement, prev: Option<&LayoutElement>) -> bool {
+    const COORD_THRESHOLD: f32 = 10.0;
+
+    let seg_start = current.seg_start_x.unwrap_or(current.bbox.x_min());
+    let mut context_left = current.bbox.x_min();
+    let mut context_right = current.bbox.x_max();
+
+    if let Some(prev) = prev {
+        let prev_seg_end = prev.seg_end_x.unwrap_or(prev.bbox.x_max());
+        let prev_num_lines = prev.num_lines.unwrap_or(1);
+
+        // Check if blocks overlap horizontally
+        let overlap_blocks = context_left < prev.bbox.x_max() && context_right > prev.bbox.x_min();
+
+        let edge_distance;
+        if overlap_blocks {
+            context_left = context_left.min(prev.bbox.x_min());
+            context_right = context_right.max(prev.bbox.x_max());
+            edge_distance = 0.0;
+        } else {
+            edge_distance = (current.bbox.x_min() - prev.bbox.x_max()).abs();
+        }
+
+        let prev_end_space_small = (context_right - prev_seg_end).abs() < COORD_THRESHOLD;
+        let current_start_space_small = seg_start - context_left < COORD_THRESHOLD;
+        let prev_lines_more_than_one = prev_num_lines > 1;
+        let blocks_close = edge_distance
+            < (prev.bbox.x_max() - prev.bbox.x_min())
+                .max(current.bbox.x_max() - current.bbox.x_min());
+
+        if prev_end_space_small
+            && current_start_space_small
+            && prev_lines_more_than_one
+            && blocks_close
+        {
+            return false; // continuation
+        }
+
+        true // new paragraph
+    } else {
+        // First element: check if text starts near the left edge
+        if seg_start - context_left < COORD_THRESHOLD {
+            return false; // continuation from previous page (no indentation)
+        }
+        true
+    }
+}
+
 /// Checks if a text element appears to start a new paragraph.
 ///
 /// Following PaddleX's logic: if the text starts near the left edge of the page
@@ -908,6 +1265,49 @@ fn clean_ocr_text(text: &str) -> String {
     text.replace("-\n", "").replace('\n', " ")
 }
 
+/// Formats the first non-empty line of a block if it matches a template keyword.
+///
+/// This is the Rust equivalent of PaddleX's `format_first_line_func`:
+/// 1. Split text by `spliter`
+/// 2. Find the first non-empty token
+/// 3. If it matches any template (case-insensitive exact match), replace it with `format_func(token)`
+/// 4. Rejoin with `spliter`
+///
+/// For abstract: `spliter=" "`, templates=["abstract","摘要"], format_func= `## {}\n`
+/// For reference: `spliter="\n"`, templates=["references","参考文献"], format_func= `## {}`
+fn format_first_line(
+    text: &str,
+    spliter: &str,
+    templates: &[&str],
+    heading_prefix: &str,
+) -> String {
+    let parts: Vec<&str> = text.split(spliter).collect();
+    let mut result_parts: Vec<String> = Vec::with_capacity(parts.len());
+    let mut found_first = false;
+
+    for part in &parts {
+        if !found_first {
+            let trimmed = part.trim();
+            if trimmed.is_empty() {
+                result_parts.push(part.to_string());
+                continue;
+            }
+            found_first = true;
+            // Check if the first non-empty token matches a template (case-insensitive)
+            if templates.iter().any(|t| trimmed.eq_ignore_ascii_case(t)) {
+                // Replace with formatted heading: "## <original_text>\n"
+                result_parts.push(format!("{}{}\n", heading_prefix, trimmed));
+            } else {
+                result_parts.push(part.to_string());
+            }
+        } else {
+            result_parts.push(part.to_string());
+        }
+    }
+
+    result_parts.join(spliter)
+}
+
 /// Formats text blocks following PaddleX's text handling:
 /// 1. First remove hyphenation: `-\n` -> `` (join broken words)
 /// 2. Then: `.replace("\n\n", "\n").replace("\n", "\n\n")`
@@ -933,49 +1333,6 @@ fn format_content_block(text: &str) -> String {
     step1.replace('\n', "  \n")
 }
 
-/// Formats reference blocks, following PaddleX's `format_first_line_func`:
-/// - First remove hyphenation: `-\n` -> ``
-/// - Detects "References" or "参考文献" keyword
-/// - Adds markdown heading if found
-fn format_reference_block(text: &str) -> String {
-    // First remove hyphenation
-    let dehyphenated = text.replace("-\n", "");
-    let lines: Vec<&str> = dehyphenated.lines().collect();
-
-    // Check first non-empty line for reference keywords
-    let mut result = String::new();
-    let mut added_heading = false;
-
-    for (i, line) in lines.iter().enumerate() {
-        let trimmed = line.trim();
-        if trimmed.is_empty() {
-            continue;
-        }
-
-        // Check if this is a reference heading line
-        if !added_heading && (trimmed.contains("References") || trimmed.contains("参考文献")) {
-            result.push_str("## **References**\n\n");
-            added_heading = true;
-            // Skip the heading line itself, continue with content
-            continue;
-        }
-
-        // Add remaining lines
-        if i > 0 || result.is_empty() {
-            if !result.is_empty() {
-                result.push('\n');
-            }
-            result.push_str(trimmed);
-        }
-    }
-
-    if result.is_empty() {
-        dehyphenated
-    } else {
-        result
-    }
-}
-
 /// Formats vision footnote blocks following PaddleX:
 /// 1. First remove hyphenation: `-\n` -> ``
 /// 2. Then: `.replace("\n\n", "\n").replace("\n", "\n\n")`
@@ -1018,17 +1375,11 @@ fn is_digit(c: char) -> bool {
 }
 
 /// Removes PDF hyphenation artifacts from text.
+/// Dehyphenation: only handles hyphen-newline patterns (word breaks across lines).
 ///
-/// PDFs often break words at line ends with hyphens like "frame-work",
-/// "com-pared", etc. This function detects and removes these hyphens
-/// when they appear to be line-break hyphens rather than intentional hyphens.
-///
-/// Rules:
-/// 1. Hyphen followed by lowercase letter is likely a hyphenation artifact
-/// 2. Hyphen followed by space and lowercase letter is also artifact
-/// 3. Hyphen followed by newline and lowercase letter is artifact
-/// 4. Preserve intentional hyphens (compound words, hyphenated phrases)
-/// 5. Preserve hyphens in URLs and technical patterns
+/// Matches PaddleX's behavior where hyphens are only stripped at line boundaries
+/// (hyphen immediately followed by newline). Mid-word hyphens in compound words
+/// like "real-time", "end-to-end", "one-to-many" are preserved.
 fn dehyphenate(text: &str) -> String {
     let mut result = String::with_capacity(text.len());
     let chars: Vec<char> = text.chars().collect();
@@ -1037,7 +1388,6 @@ fn dehyphenate(text: &str) -> String {
 
     // Helper to check if we're in a URL-like pattern
     let is_url_context = |pos: usize| -> bool {
-        // Look at a window around the hyphen for URL patterns
         let start = pos.saturating_sub(10);
         let end = (pos + 5).min(len);
         let window: String = chars[start..end].iter().collect();
@@ -1046,33 +1396,18 @@ fn dehyphenate(text: &str) -> String {
 
     while i < len {
         if chars[i] == '-' {
-            // Skip dehyphenation for URL contexts
             if is_url_context(i) {
                 result.push('-');
                 i += 1;
                 continue;
             }
 
-            // Check if this is a hyphenation artifact
-            let is_artifact = if i + 1 < len {
-                let next = chars[i + 1];
-                if next == '\n' {
-                    // Hyphen followed by newline - check what's after the newline
-                    if i + 2 < len {
-                        let after_newline = chars[i + 2];
-                        is_lowercase(after_newline)
-                    } else {
-                        false
-                    }
-                } else if is_lowercase(next) {
-                    // Hyphen followed directly by lowercase letter (e.g., "com-puted")
-                    // But check if preceded by lowercase to avoid removing intentional hyphens
-                    // like in "RT-DETR" or "one-to-many"
-                    i > 0 && is_lowercase(chars[i - 1])
-                } else if next.is_whitespace() && i + 2 < len {
-                    let after_space = chars[i + 2];
-                    // Hyphen + space + lowercase letter (e.g., "com- puted")
-                    is_lowercase(after_space) && i > 0 && is_lowercase(chars[i - 1])
+            // Only dehyphenate when hyphen is followed by newline (line-break hyphenation).
+            // Pattern: "word-\nletter" → "wordletter"
+            let is_artifact = if i + 1 < len && chars[i + 1] == '\n' {
+                // Hyphen followed by newline — check if next line starts with lowercase
+                if i + 2 < len {
+                    is_lowercase(chars[i + 2])
                 } else {
                     false
                 }
@@ -1081,14 +1416,8 @@ fn dehyphenate(text: &str) -> String {
             };
 
             if is_artifact {
-                // Skip the hyphen
-                // Also skip following newline/space if present
-                if i + 1 < len {
-                    let next = chars[i + 1];
-                    if next == '\n' || next.is_whitespace() {
-                        i += 1;
-                    }
-                }
+                // Skip the hyphen and the following newline
+                i += 1; // skip newline (will be incremented again at end of loop)
             } else {
                 result.push('-');
             }
@@ -1372,13 +1701,33 @@ pub fn postprocess_markdown(markdown: &str) -> String {
             continue;
         }
 
-        // Skip processing inside code/formula blocks
-        if in_code_block || in_formula {
+        // Skip processing inside code blocks
+        if in_code_block {
             result.push_str(line);
             result.push('\n');
             continue;
         }
 
+        // If inside a formula block, ensure it doesn't contain unescaped dollar signs
+        // which cause KaTeX "Can't use function '$' in math mode" errors.
+        if in_formula {
+            // If the formula content looks like regular text (many spaces, few backslashes)
+            // and contains a $, KaTeX will fail. We escape the $ inside the math block.
+            let contains_dollar = line.contains('$');
+            let is_plain_text = line.split_whitespace().count() > 3 && !line.contains('\\');
+
+            if contains_dollar && is_plain_text {
+                result.push_str(&line.replace('$', "\\$"));
+            } else if contains_dollar {
+                // Remove redundant dollar signs inside the block
+                result.push_str(&line.replace('$', ""));
+            } else {
+                result.push_str(line);
+            }
+            result.push('\n');
+            continue;
+        }
+
         // Process text content (skip headers, lists, etc.)
         if trimmed.starts_with('#')
             || trimmed.starts_with('*')
@@ -1482,6 +1831,20 @@ pub struct LayoutElement {
     /// formulas, images, etc.) will have an order index assigned.
     /// Headers, footers, and other auxiliary elements may have `None`.
     pub order_index: Option<u32>,
+    /// X-coordinate of the first text span's left edge within this element.
+    /// Used by `get_seg_flag` to detect paragraph continuity across blocks.
+    /// Computed during stitching from the first OCR region (after spatial sort).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub seg_start_x: Option<f32>,
+    /// X-coordinate of the last text span's right edge within this element.
+    /// Used by `get_seg_flag` to detect paragraph continuity across blocks.
+    /// Computed during stitching from the last OCR region (after spatial sort).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub seg_end_x: Option<f32>,
+    /// Number of text lines within this element.
+    /// Used by `get_seg_flag` to detect paragraph continuity across blocks.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub num_lines: Option<u32>,
 }
 
 impl LayoutElement {
@@ -1494,6 +1857,9 @@ impl LayoutElement {
             label: None,
             text: None,
             order_index: None,
+            seg_start_x: None,
+            seg_end_x: None,
+            num_lines: None,
         }
     }
 
@@ -1985,6 +2351,16 @@ pub struct TableResult {
     /// Structure tokens from table structure recognition (used for HTML generation after stitching)
     #[serde(skip)]
     pub structure_tokens: Option<Vec<String>>,
+    /// Detected cell bounding boxes from the cell detection model (in page coordinates).
+    /// Stored separately from `cells` (which carry structure/grid metadata from the structure model)
+    /// and used by the stitcher for row-aware IoA-based OCR matching.
+    #[serde(skip)]
+    pub detected_cell_bboxes: Option<Vec<BoundingBox>>,
+    /// Whether the table was processed in end-to-end (E2E) mode.
+    /// When true, cells come from the structure model only (no separate cell detection).
+    /// Used by the stitcher to select the appropriate OCR matching strategy.
+    #[serde(skip)]
+    pub is_e2e: bool,
 }
 
 impl TableResult {
@@ -1999,6 +2375,8 @@ impl TableResult {
             html_structure: None,
             cell_texts: None,
             structure_tokens: None,
+            detected_cell_bboxes: None,
+            is_e2e: false,
         }
     }
 
@@ -2038,6 +2416,18 @@ impl TableResult {
         self
     }
 
+    /// Stores detected cell bounding boxes for the stitcher's row-aware IoA matcher.
+    pub fn with_detected_cell_bboxes(mut self, bboxes: Vec<BoundingBox>) -> Self {
+        self.detected_cell_bboxes = Some(bboxes);
+        self
+    }
+
+    /// Marks this table as processed in end-to-end (E2E) mode.
+    pub fn with_e2e(mut self, is_e2e: bool) -> Self {
+        self.is_e2e = is_e2e;
+        self
+    }
+
     /// Returns the best available confidence score for this table.
     ///
     /// This method provides a unified confidence API for callers who want to filter
@@ -2206,4 +2596,147 @@ mod tests {
         assert!(html.contains("<h1>Test Document</h1>"));
         assert!(html.contains("<p>Hello world</p>"));
     }
+
+    #[test]
+    fn test_format_title_with_level_keywords() {
+        let (level, text) = format_title_with_level("Abstract", None);
+        assert_eq!(level, 1);
+        assert_eq!(text, "Abstract");
+
+        let (level, text) = format_title_with_level("References:", None);
+        assert_eq!(level, 1);
+        assert_eq!(text, "References:");
+    }
+
+    #[test]
+    fn test_format_title_with_level_cluster_fallback() {
+        let (level, text) = format_title_with_level("Unnumbered Heading", Some(4));
+        assert_eq!(level, 4);
+        assert_eq!(text, "Unnumbered Heading");
+    }
+
+    #[test]
+    fn test_to_markdown_skips_footnote() {
+        let mut result = StructureResult::new("test.jpg", 0);
+        let body = LayoutElement::new(
+            BoundingBox::from_coords(0.0, 0.0, 100.0, 30.0),
+            LayoutElementType::Text,
+            1.0,
+        )
+        .with_text("Body");
+        let footnote = LayoutElement::new(
+            BoundingBox::from_coords(0.0, 40.0, 100.0, 60.0),
+            LayoutElementType::Footnote,
+            1.0,
+        )
+        .with_text("Footnote text");
+        result = result.with_layout_elements(vec![body, footnote]);
+
+        let md = result.to_markdown();
+        assert!(md.contains("Body"));
+        assert!(!md.contains("Footnote text"));
+    }
+
+    #[test]
+    fn test_to_markdown_doc_title_joins_lines_with_space() {
+        let mut result = StructureResult::new("test.jpg", 0);
+        let title = LayoutElement::new(
+            BoundingBox::from_coords(0.0, 0.0, 100.0, 20.0),
+            LayoutElementType::DocTitle,
+            1.0,
+        )
+        .with_text("Main\nTitle");
+        result = result.with_layout_elements(vec![title]);
+        let md = result.to_markdown();
+        assert!(md.contains("# Main Title"));
+    }
+
+    #[test]
+    fn test_to_markdown_content_uses_soft_breaks() {
+        let mut result = StructureResult::new("test.jpg", 0);
+        let toc = LayoutElement::new(
+            BoundingBox::from_coords(0.0, 0.0, 100.0, 40.0),
+            LayoutElementType::Content,
+            1.0,
+        )
+        .with_text("1 Intro\n2 Method");
+        result = result.with_layout_elements(vec![toc]);
+        let md = result.to_markdown();
+        assert!(md.contains("1 Intro  \n2 Method"));
+    }
+
+    #[test]
+    fn test_infer_paragraph_title_levels_by_height() {
+        let titles = vec![
+            LayoutElement::new(
+                BoundingBox::from_coords(0.0, 0.0, 100.0, 40.0),
+                LayoutElementType::ParagraphTitle,
+                1.0,
+            )
+            .with_text("Large"),
+            LayoutElement::new(
+                BoundingBox::from_coords(0.0, 50.0, 100.0, 74.0),
+                LayoutElementType::ParagraphTitle,
+                1.0,
+            )
+            .with_text("Medium"),
+            LayoutElement::new(
+                BoundingBox::from_coords(0.0, 80.0, 100.0, 98.0),
+                LayoutElementType::ParagraphTitle,
+                1.0,
+            )
+            .with_text("Small"),
+        ];
+
+        let levels = infer_paragraph_title_levels(&titles);
+        let l0 = levels.get(&0).copied().unwrap_or(2);
+        let l1 = levels.get(&1).copied().unwrap_or(2);
+        let l2 = levels.get(&2).copied().unwrap_or(2);
+        assert!(l0 <= l1 && l1 <= l2);
+    }
+
+    #[test]
+    fn test_infer_paragraph_title_levels_semantic_vote_wins_tie() {
+        let titles = vec![
+            LayoutElement::new(
+                BoundingBox::from_coords(0.0, 0.0, 100.0, 40.0),
+                LayoutElementType::ParagraphTitle,
+                1.0,
+            )
+            .with_text("1.1 Detail"),
+            LayoutElement::new(
+                BoundingBox::from_coords(0.0, 50.0, 100.0, 70.0),
+                LayoutElementType::ParagraphTitle,
+                1.0,
+            )
+            .with_text("2 Intro"),
+        ];
+
+        let levels = infer_paragraph_title_levels(&titles);
+        assert_eq!(levels.get(&0).copied(), Some(3));
+        assert_eq!(levels.get(&1).copied(), Some(2));
+    }
+
+    #[test]
+    fn test_infer_paragraph_title_levels_uses_relative_indent_signal() {
+        let titles = vec![
+            LayoutElement::new(
+                BoundingBox::from_coords(0.0, 0.0, 100.0, 24.0),
+                LayoutElementType::ParagraphTitle,
+                1.0,
+            )
+            .with_text("Heading A"),
+            LayoutElement::new(
+                BoundingBox::from_coords(40.0, 40.0, 140.0, 64.0),
+                LayoutElementType::ParagraphTitle,
+                1.0,
+            )
+            .with_text("Heading B"),
+        ];
+
+        let levels = infer_paragraph_title_levels(&titles);
+        let left_level = levels.get(&0).copied().unwrap_or(2);
+        let indented_level = levels.get(&1).copied().unwrap_or(2);
+        assert!(left_level < indented_level);
+    }
 }
diff --git a/oar-ocr-core/src/domain/text_region.rs b/oar-ocr-core/src/domain/text_region.rs
index 44fbbf5..252c4bb 100644
--- a/oar-ocr-core/src/domain/text_region.rs
+++ b/oar-ocr-core/src/domain/text_region.rs
@@ -37,12 +37,18 @@ pub struct TextRegion {
     /// Only populated when word-level detection is enabled.
     /// Each box corresponds to a word or character in the recognized text.
     pub word_boxes: Option<Vec<BoundingBox>>,
+    /// Label indicating the type of this text region.
+    /// Used to distinguish between normal text and special content like formulas.
+    /// Common values: "formula", "text", "seal", etc.
+    /// PaddleX: corresponds to `rec_labels` in OCR results.
+    #[serde(default)]
+    pub label: Option<Arc<str>>,
 }
 
 impl TextRegion {
     /// Creates a new TextRegion with the given bounding box.
     ///
-    /// The text, confidence, orientation_angle, and word_boxes are initially set to None.
+    /// The text, confidence, orientation_angle, word_boxes, and label are initially set to None.
     pub fn new(bounding_box: BoundingBox) -> Self {
         Self {
             bounding_box,
@@ -52,6 +58,7 @@ impl TextRegion {
             confidence: None,
             orientation_angle: None,
             word_boxes: None,
+            label: None,
         }
     }
 
@@ -69,6 +76,7 @@ impl TextRegion {
             confidence,
             orientation_angle: None,
             word_boxes: None,
+            label: None,
         }
     }
 
@@ -87,6 +95,7 @@ impl TextRegion {
             confidence,
             orientation_angle,
             word_boxes: None,
+            label: None,
         }
     }
 
@@ -117,4 +126,20 @@ impl TextRegion {
             _ => None,
         }
     }
+
+    /// Returns true if this text region has a label.
+    pub fn has_label(&self) -> bool {
+        self.label.is_some()
+    }
+
+    /// Returns true if this text region is labeled as a formula.
+    pub fn is_formula(&self) -> bool {
+        self.label.as_deref() == Some("formula")
+    }
+
+    /// Sets the label for this text region.
+    pub fn with_label(mut self, label: Option<&str>) -> Self {
+        self.label = label.map(|s| s.into());
+        self
+    }
 }
diff --git a/oar-ocr-core/src/processors/geometry.rs b/oar-ocr-core/src/processors/geometry.rs
index fa96f54..602bc65 100644
--- a/oar-ocr-core/src/processors/geometry.rs
+++ b/oar-ocr-core/src/processors/geometry.rs
@@ -815,23 +815,20 @@ impl BoundingBox {
                 90 => {
                     // Image was rotated 270° counter-clockwise (or 90° clockwise) to correct
                     // Inverse: rotate box 90° clockwise
-                    // (x, y) in rotated → (rotated_height - 1 - y, x) in original
-                    Point::new(rotated_height as f32 - 1.0 - p.y, p.x)
+                    // (x, y) in rotated → (rotated_height - y, x) in original
+                    Point::new(rotated_height as f32 - p.y, p.x)
                 }
                 180 => {
                     // Image was rotated 180° to correct
                     // Inverse: rotate box 180°
-                    // (x, y) in rotated → (rotated_width - 1 - x, rotated_height - 1 - y) in original
-                    Point::new(
-                        rotated_width as f32 - 1.0 - p.x,
-                        rotated_height as f32 - 1.0 - p.y,
-                    )
+                    // (x, y) in rotated → (rotated_width - x, rotated_height - y) in original
+                    Point::new(rotated_width as f32 - p.x, rotated_height as f32 - p.y)
                 }
                 270 => {
                     // Image was rotated 90° counter-clockwise (or 270° clockwise) to correct
                     // Inverse: rotate box 270° clockwise (or 90° counter-clockwise)
-                    // (x, y) in rotated → (y, rotated_width - 1 - x) in original
-                    Point::new(p.y, rotated_width as f32 - 1.0 - p.x)
+                    // (x, y) in rotated → (y, rotated_width - x) in original
+                    Point::new(p.y, rotated_width as f32 - p.x)
                 }
                 _ => {
                     // No rotation (0° or unknown)
@@ -1215,12 +1212,12 @@ mod tests {
         let bbox = BoundingBox::from_coords(0.0, 0.0, 1.0, 1.0);
         let rotated = bbox.rotate_back_to_original(90.0, rotated_width, rotated_height);
 
-        // angle=90 inverse mapping: (x, y) -> (rotated_height-1-y, x)
+        // angle=90 inverse mapping: (x, y) -> (rotated_height - y, x)
         let expected = BoundingBox::new(vec![
-            Point::new(3.0, 0.0),
+            Point::new(4.0, 0.0),
+            Point::new(4.0, 1.0),
             Point::new(3.0, 1.0),
-            Point::new(2.0, 1.0),
-            Point::new(2.0, 0.0),
+            Point::new(3.0, 0.0),
         ]);
         assert_eq!(rotated.points, expected.points);
     }
@@ -1232,12 +1229,12 @@ mod tests {
         let bbox = BoundingBox::from_coords(1.0, 1.0, 2.0, 2.0);
         let rotated = bbox.rotate_back_to_original(180.0, rotated_width, rotated_height);
 
-        // angle=180 inverse mapping: (x, y) -> (rotated_width-1-x, rotated_height-1-y)
+        // angle=180 inverse mapping: (x, y) -> (rotated_width - x, rotated_height - y)
         let expected = BoundingBox::new(vec![
+            Point::new(3.0, 2.0),
+            Point::new(2.0, 2.0),
             Point::new(2.0, 1.0),
-            Point::new(1.0, 1.0),
-            Point::new(1.0, 0.0),
-            Point::new(2.0, 0.0),
+            Point::new(3.0, 1.0),
         ]);
         assert_eq!(rotated.points, expected.points);
     }
@@ -1250,12 +1247,12 @@ mod tests {
         let bbox = BoundingBox::from_coords(0.0, 0.0, 1.0, 1.0);
         let rotated = bbox.rotate_back_to_original(270.0, rotated_width, rotated_height);
 
-        // angle=270 inverse mapping: (x, y) -> (y, rotated_width-1-x)
+        // angle=270 inverse mapping: (x, y) -> (y, rotated_width - x)
         let expected = BoundingBox::new(vec![
+            Point::new(0.0, 3.0),
             Point::new(0.0, 2.0),
-            Point::new(0.0, 1.0),
-            Point::new(1.0, 1.0),
             Point::new(1.0, 2.0),
+            Point::new(1.0, 3.0),
         ]);
         assert_eq!(rotated.points, expected.points);
     }
diff --git a/oar-ocr-core/src/processors/layout_sorting.rs b/oar-ocr-core/src/processors/layout_sorting.rs
index 7a587f3..f504bdd 100644
--- a/oar-ocr-core/src/processors/layout_sorting.rs
+++ b/oar-ocr-core/src/processors/layout_sorting.rs
@@ -1,68 +1,72 @@
-//! Enhanced layout sorting logic compatible with PP-StructureV3.
+//! Enhanced layout sorting logic — `xycut_enhanced` algorithm.
 //!
-//! This module implements the `xycut_enhanced` strategy which handles complex layouts
-//! by separating headers/footers, identifying cross-column elements, and using
-//! weighted distance metrics to insert titles and figures into the reading order.
+//! Faithful port of PaddleX's `xycut_enhanced` strategy:
+//! 1. Header/Footer separation
+//! 2. Cross-layout detection (blocks spanning multiple columns)
+//! 3. Direction-aware XY-cut sorting
+//! 4. Overlapping box shrinking before projection
+//! 5. Weighted distance insertion for special blocks
+//! 6. Child block association (vision titles → vision parents)
 
 use crate::domain::structure::LayoutElementType;
+use crate::processors::sorting::calculate_overlap_ratio;
 use crate::processors::{BoundingBox, SortDirection, sort_by_xycut};
 
+/// XYCUT_SETTINGS constants (matching PaddleX setting.py)
+const EDGE_DISTANCE_COMPARE_TOLERANCE_LEN: f32 = 2.0;
+const EDGE_WEIGHT: f32 = 10000.0; // 10^4
+const UP_EDGE_WEIGHT: f32 = 1.0;
+const LEFT_EDGE_WEIGHT: f32 = 2.0;
+const CROSS_LAYOUT_REF_TEXT_BLOCK_WORDS_NUM_THRESHOLD: f32 = 10.0;
+
 /// Label used for sorting logic.
-///
-/// Matches standard block categories.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum OrderLabel {
-    Header,         // header_labels
-    Footer,         // footer_labels
-    DocTitle,       // doc_title_labels
-    ParagraphTitle, // paragraph_title_labels
-    Vision,         // vision_labels
-    VisionTitle,    // vision_title_labels
-    Unordered,      // unordered_labels
-    NormalText,     // text_labels
-    CrossLayout,    // derived internally
-    Reference,      // special case
+    Header,
+    Footer,
+    DocTitle,
+    ParagraphTitle,
+    Vision,
+    VisionTitle,
+    Unordered,
+    NormalText,
+    CrossLayout,
+    CrossReference,
+    Reference,
 }
 
 impl OrderLabel {
     pub fn from_element_type(et: LayoutElementType) -> Self {
-        // Mapped based on standard block labels.
         match et {
-            // header_labels
             LayoutElementType::Header | LayoutElementType::HeaderImage => OrderLabel::Header,
 
-            // footer_labels
             LayoutElementType::Footer
             | LayoutElementType::FooterImage
             | LayoutElementType::Footnote => OrderLabel::Footer,
 
-            // doc_title_labels
             LayoutElementType::DocTitle => OrderLabel::DocTitle,
 
-            // paragraph_title_labels
-            LayoutElementType::ParagraphTitle
-            | LayoutElementType::Reference
-            | LayoutElementType::Content => OrderLabel::ParagraphTitle,
+            LayoutElementType::ParagraphTitle | LayoutElementType::Content => {
+                OrderLabel::ParagraphTitle
+            }
+
+            LayoutElementType::Reference => OrderLabel::Reference,
 
-            // vision_labels
             LayoutElementType::Image
             | LayoutElementType::Table
             | LayoutElementType::Chart
             | LayoutElementType::Algorithm => OrderLabel::Vision,
 
-            // vision_title_labels
             LayoutElementType::FigureTitle
             | LayoutElementType::TableTitle
             | LayoutElementType::ChartTitle
             | LayoutElementType::FigureTableChartTitle => OrderLabel::VisionTitle,
 
-            // unordered_labels
             LayoutElementType::AsideText
             | LayoutElementType::Seal
             | LayoutElementType::Number
             | LayoutElementType::FormulaNumber => OrderLabel::Unordered,
 
-            // text_labels (default fallback)
             LayoutElementType::Text
             | LayoutElementType::List
             | LayoutElementType::Abstract
@@ -72,13 +76,6 @@ impl OrderLabel {
             _ => OrderLabel::NormalText,
         }
     }
-
-    pub fn is_header(&self) -> bool {
-        matches!(self, OrderLabel::Header)
-    }
-    pub fn is_footer(&self) -> bool {
-        matches!(self, OrderLabel::Footer)
-    }
 }
 
 /// A wrapper around layout elements with properties needed for sorting.
@@ -87,58 +84,95 @@ pub struct SortableBlock {
     pub bbox: BoundingBox,
     pub original_index: usize,
     pub order_label: OrderLabel,
-    pub direction: SortDirection, // Derived from aspect ratio
+    pub element_type: LayoutElementType,
+    pub direction: SortDirection,
+    pub num_lines: u32,
+    pub text_line_height: f32,
 }
 
 impl SortableBlock {
-    pub fn new(bbox: BoundingBox, original_index: usize, element_type: LayoutElementType) -> Self {
+    pub fn new(
+        bbox: BoundingBox,
+        original_index: usize,
+        element_type: LayoutElementType,
+        num_lines: Option<u32>,
+    ) -> Self {
         let order_label = OrderLabel::from_element_type(element_type);
         let width = bbox.x_max() - bbox.x_min();
         let height = bbox.y_max() - bbox.y_min();
-
-        // Logic: horizontal if width >= height (ratio 1.0)
         let direction = if width >= height {
             SortDirection::Horizontal
         } else {
             SortDirection::Vertical
         };
+        let num_lines = num_lines.unwrap_or(1).max(1);
+        let text_line_height = if num_lines > 0 {
+            height / num_lines as f32
+        } else {
+            height
+        };
 
         Self {
             bbox,
             original_index,
             order_label,
+            element_type,
             direction,
+            num_lines,
+            text_line_height,
         }
     }
 
+    pub fn width(&self) -> f32 {
+        self.bbox.x_max() - self.bbox.x_min()
+    }
+
+    pub fn height(&self) -> f32 {
+        self.bbox.y_max() - self.bbox.y_min()
+    }
+
+    pub fn area(&self) -> f32 {
+        self.width() * self.height()
+    }
+
     pub fn center(&self) -> (f32, f32) {
         (
             (self.bbox.x_min() + self.bbox.x_max()) / 2.0,
             (self.bbox.y_min() + self.bbox.y_max()) / 2.0,
         )
     }
+
+    pub fn long_side_length(&self) -> f32 {
+        self.width().max(self.height())
+    }
+}
+
+/// Input element for enhanced sorting.
+pub struct SortableElement {
+    pub bbox: BoundingBox,
+    pub element_type: LayoutElementType,
+    pub num_lines: Option<u32>,
 }
 
 /// Main entry point for enhanced sorting.
 ///
 /// Returns a list of original indices in the correct reading order.
 pub fn sort_layout_enhanced(
-    elements: &[(BoundingBox, LayoutElementType)],
+    elements: &[SortableElement],
     page_width: f32,
-    page_height: f32,
+    _page_height: f32,
 ) -> Vec<usize> {
     if elements.is_empty() {
         return Vec::new();
     }
 
-    // 1. Convert to SortableBlocks
     let blocks: Vec<SortableBlock> = elements
         .iter()
         .enumerate()
-        .map(|(i, (bbox, et))| SortableBlock::new(bbox.clone(), i, *et))
+        .map(|(i, e)| SortableBlock::new(e.bbox.clone(), i, e.element_type, e.num_lines))
         .collect();
 
-    // 2. Separate into groups
+    // Separate headers/footers
     let mut header_blocks = Vec::new();
     let mut footer_blocks = Vec::new();
     let mut main_blocks = Vec::new();
@@ -151,24 +185,11 @@ pub fn sort_layout_enhanced(
         }
     }
 
-    // 3. Sort Headers and Footers (simple top-to-bottom)
-    header_blocks.sort_by(|a, b| {
-        a.bbox
-            .y_min()
-            .partial_cmp(&b.bbox.y_min())
-            .unwrap_or(std::cmp::Ordering::Equal)
-    });
-    footer_blocks.sort_by(|a, b| {
-        a.bbox
-            .y_min()
-            .partial_cmp(&b.bbox.y_min())
-            .unwrap_or(std::cmp::Ordering::Equal)
-    });
+    sort_blocks_by_y(&mut header_blocks);
+    sort_blocks_by_y(&mut footer_blocks);
 
-    // 4. Sort Main Blocks using Enhanced Logic
-    let sorted_main = sort_main_blocks(main_blocks, page_width, page_height);
+    let sorted_main = sort_main_blocks(main_blocks, page_width);
 
-    // 5. Combine
     let mut result = Vec::with_capacity(elements.len());
     result.extend(header_blocks.into_iter().map(|b| b.original_index));
     result.extend(sorted_main.into_iter().map(|b| b.original_index));
@@ -177,87 +198,539 @@ pub fn sort_layout_enhanced(
     result
 }
 
-fn sort_main_blocks(
-    blocks: Vec<SortableBlock>,
-    _page_width: f32,
-    _page_height: f32,
-) -> Vec<SortableBlock> {
+fn sort_blocks_by_y(blocks: &mut [SortableBlock]) {
+    blocks.sort_by(|a, b| {
+        a.bbox
+            .y_min()
+            .partial_cmp(&b.bbox.y_min())
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+}
+
+fn sort_main_blocks(mut blocks: Vec<SortableBlock>, page_width: f32) -> Vec<SortableBlock> {
+    if blocks.is_empty() {
+        return blocks;
+    }
+
+    // 1. Cross-layout detection (PaddleX get_layout_structure)
+    detect_cross_layout(&mut blocks, page_width);
+
+    // 2. Separate blocks for XY-cut vs special insertion
+    // PaddleX SKIP_ORDER_LABELS are inserted by weighted distance after main XY-cut.
     let mut xy_cut_blocks = Vec::new();
-    let mut vision_blocks = Vec::new(); // Tables, Images (Anchors)
-    let mut other_unsorted_blocks = Vec::new(); // Titles, etc.
     let mut doc_title_blocks = Vec::new();
+    let mut weighted_insert_blocks = Vec::new();
+    let mut unordered_blocks = Vec::new();
 
     for block in blocks {
         match block.order_label {
-            OrderLabel::NormalText | OrderLabel::Unordered => xy_cut_blocks.push(block),
+            OrderLabel::CrossLayout
+            | OrderLabel::CrossReference
+            | OrderLabel::Vision
+            | OrderLabel::VisionTitle => weighted_insert_blocks.push(block),
             OrderLabel::DocTitle => doc_title_blocks.push(block),
-            OrderLabel::Vision => vision_blocks.push(block),
-            _ => other_unsorted_blocks.push(block),
+            OrderLabel::Unordered => unordered_blocks.push(block),
+            _ => xy_cut_blocks.push(block),
         }
     }
 
-    // Sort xy_cut_blocks using standard XY-cut
+    // 3. Direction-aware XY-cut on xy_cut_blocks
     let mut sorted_blocks = if !xy_cut_blocks.is_empty() {
-        let bboxes: Vec<BoundingBox> = xy_cut_blocks.iter().map(|b| b.bbox.clone()).collect();
-        let indices = sort_by_xycut(&bboxes, SortDirection::Vertical, 1);
-        indices
-            .into_iter()
-            .map(|i| xy_cut_blocks[i].clone())
-            .collect()
+        direction_aware_xycut_sort(&mut xy_cut_blocks)
     } else {
         Vec::new()
     };
 
-    // Insertion Order Strategy:
-    // 1. DocTitle (Global context)
-    // 2. Vision (Tables/Images - strong anchors)
-    // 3. VisionTitle/ParagraphTitle (Weakly attached, depend on anchors)
+    // 4. Match unsorted blocks using weighted distance insertion
+    // Order: doc_title first (PaddleX inserts first doc_title at position 0)
+    sort_blocks_by_y(&mut doc_title_blocks);
+    for (i, block) in doc_title_blocks.into_iter().enumerate() {
+        if i == 0 && sorted_blocks.is_empty() {
+            sorted_blocks.push(block);
+        } else if i == 0 {
+            sorted_blocks.insert(0, block);
+        } else {
+            weighted_distance_insert(block, &mut sorted_blocks, SortDirection::Horizontal);
+        }
+    }
 
-    // 1. DocTitle
-    doc_title_blocks.sort_by(|a, b| {
-        a.bbox
-            .y_min()
-            .partial_cmp(&b.bbox.y_min())
-            .unwrap_or(std::cmp::Ordering::Equal)
-    });
-    for block in doc_title_blocks {
+    // Vision/cross-layout/title blocks are inserted after XY-cut.
+    sort_blocks_by_y(&mut weighted_insert_blocks);
+    for block in weighted_insert_blocks {
         weighted_distance_insert(block, &mut sorted_blocks, SortDirection::Horizontal);
     }
 
-    // 2. Vision (Tables, Images)
-    // Sort by position to stabilize insertion
-    vision_blocks.sort_by(|a, b| {
-        a.bbox
-            .y_min()
-            .partial_cmp(&b.bbox.y_min())
-            .unwrap_or(std::cmp::Ordering::Equal)
-    });
-    for block in vision_blocks {
-        weighted_distance_insert(block, &mut sorted_blocks, SortDirection::Horizontal);
+    // Unordered blocks using manhattan distance
+    sort_blocks_by_y(&mut unordered_blocks);
+    for block in unordered_blocks {
+        manhattan_insert(block, &mut sorted_blocks);
+    }
+
+    // 5. Associate child blocks (vision titles next to vision parents)
+    associate_child_blocks(&mut sorted_blocks);
+
+    sorted_blocks
+}
+
+/// Direction-aware XY-cut sorting (PaddleX xycut_enhanced lines 539-584).
+///
+/// If single column or all blocks have 1 line → use secondary direction (xy_cut).
+/// If multi-column → use primary direction (yx_cut).
+fn direction_aware_xycut_sort(blocks: &mut [SortableBlock]) -> Vec<SortableBlock> {
+    let bboxes: Vec<BoundingBox> = blocks.iter().map(|b| b.bbox.clone()).collect();
+    let max_text_lines = blocks.iter().map(|b| b.num_lines).max().unwrap_or(1);
+
+    // Check column structure using horizontal projection
+    let discontinuous = calculate_discontinuous_projection(&bboxes, SortDirection::Horizontal);
+
+    // Shrink overlapping boxes before XY-cut
+    shrink_overlapping_boxes(blocks, SortDirection::Vertical);
+
+    let shrunk_bboxes: Vec<BoundingBox> = blocks.iter().map(|b| b.bbox.clone()).collect();
+
+    let sorted_indices = if discontinuous.len() == 1 || max_text_lines == 1 {
+        // Single column: use secondary direction (XY-cut = X first, then Y)
+        sort_by_xycut(&shrunk_bboxes, SortDirection::Horizontal, 1)
+    } else {
+        // Multi-column: use primary direction (YX-cut = Y first, then X)
+        sort_by_xycut(&shrunk_bboxes, SortDirection::Vertical, 1)
+    };
+
+    sorted_indices
+        .into_iter()
+        .map(|i| blocks[i].clone())
+        .collect()
+}
+
+/// Cross-layout detection (port of PaddleX `get_layout_structure`).
+///
+/// Marks blocks that span multiple columns as `CrossLayout`.
+fn detect_cross_layout(blocks: &mut [SortableBlock], _page_width: f32) {
+    if blocks.len() < 2 {
+        return;
     }
 
-    // 3. Other Unsorted (Titles, CrossLayout, etc.)
-    other_unsorted_blocks.sort_by(|a, b| {
+    // Sort by x_min, then width (matching PaddleX)
+    blocks.sort_by(|a, b| {
         a.bbox
-            .y_min()
-            .partial_cmp(&b.bbox.y_min())
+            .x_min()
+            .partial_cmp(&b.bbox.x_min())
             .unwrap_or(std::cmp::Ordering::Equal)
+            .then_with(|| {
+                a.width()
+                    .partial_cmp(&b.width())
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            })
     });
-    for block in other_unsorted_blocks {
-        weighted_distance_insert(block, &mut sorted_blocks, SortDirection::Horizontal);
+
+    let mask_labels = [
+        OrderLabel::DocTitle,
+        OrderLabel::CrossLayout,
+        OrderLabel::CrossReference,
+    ];
+
+    let n = blocks.len();
+
+    // We need to work with indices to avoid borrow checker issues
+    // Collect block data we need for comparisons
+    let block_data: Vec<(BoundingBox, OrderLabel, f32, f32)> = blocks
+        .iter()
+        .map(|b| {
+            (
+                b.bbox.clone(),
+                b.order_label,
+                b.area(),
+                b.long_side_length(),
+            )
+        })
+        .collect();
+
+    let text_line_heights: Vec<f32> = blocks.iter().map(|b| b.text_line_height).collect();
+
+    for block_idx in 0..n {
+        if mask_labels.contains(&block_data[block_idx].1) {
+            continue;
+        }
+
+        let mut mark_block_cross = false;
+
+        for ref_idx in 0..n {
+            if block_idx == ref_idx || mask_labels.contains(&block_data[ref_idx].1) {
+                continue;
+            }
+            // Skip already-marked blocks
+            if blocks[ref_idx].order_label == OrderLabel::CrossLayout {
+                continue;
+            }
+            if blocks[block_idx].order_label == OrderLabel::CrossLayout {
+                break;
+            }
+
+            let bbox_overlap =
+                calculate_overlap_ratio(&block_data[block_idx].0, &block_data[ref_idx].0);
+
+            if bbox_overlap > 0.0 {
+                if block_data[ref_idx].1 == OrderLabel::Vision {
+                    blocks[ref_idx].order_label = OrderLabel::CrossLayout;
+                    continue;
+                }
+                if bbox_overlap > 0.1 && block_data[block_idx].2 < block_data[ref_idx].2 {
+                    mark_block_cross = true;
+                    break;
+                }
+            }
+
+            // Check projection overlap in primary direction (horizontal)
+            let match_proj = calculate_projection_overlap_ratio(
+                &block_data[block_idx].0,
+                &block_data[ref_idx].0,
+                SortDirection::Horizontal,
+            );
+
+            if match_proj > 0.0 {
+                for second_ref_idx in 0..n {
+                    if second_ref_idx == block_idx
+                        || second_ref_idx == ref_idx
+                        || mask_labels.contains(&block_data[second_ref_idx].1)
+                    {
+                        continue;
+                    }
+                    if blocks[second_ref_idx].order_label == OrderLabel::CrossLayout {
+                        continue;
+                    }
+
+                    let bbox_overlap2 = calculate_overlap_ratio(
+                        &block_data[block_idx].0,
+                        &block_data[second_ref_idx].0,
+                    );
+
+                    if bbox_overlap2 > 0.1 {
+                        if block_data[second_ref_idx].1 == OrderLabel::Vision {
+                            blocks[second_ref_idx].order_label = OrderLabel::CrossLayout;
+                            continue;
+                        }
+                        if block_data[block_idx].1 == OrderLabel::Vision
+                            || block_data[block_idx].2 < block_data[second_ref_idx].2
+                        {
+                            mark_block_cross = true;
+                            break;
+                        }
+                    }
+
+                    let second_match_proj = calculate_projection_overlap_ratio(
+                        &block_data[block_idx].0,
+                        &block_data[second_ref_idx].0,
+                        SortDirection::Horizontal,
+                    );
+                    let ref_match_proj = calculate_projection_overlap_ratio(
+                        &block_data[ref_idx].0,
+                        &block_data[second_ref_idx].0,
+                        SortDirection::Horizontal,
+                    );
+                    let secondary_ref_match = calculate_projection_overlap_ratio(
+                        &block_data[ref_idx].0,
+                        &block_data[second_ref_idx].0,
+                        SortDirection::Vertical,
+                    );
+
+                    if second_match_proj > 0.0 && ref_match_proj == 0.0 && secondary_ref_match > 0.0
+                    {
+                        if block_data[block_idx].1 == OrderLabel::Vision {
+                            mark_block_cross = true;
+                            break;
+                        }
+                        // Both ref blocks are normal text with sufficient width
+                        if block_data[ref_idx].1 == OrderLabel::NormalText
+                            && block_data[second_ref_idx].1 == OrderLabel::NormalText
+                            && block_data[ref_idx].3
+                                > text_line_heights[ref_idx]
+                                    * CROSS_LAYOUT_REF_TEXT_BLOCK_WORDS_NUM_THRESHOLD
+                            && block_data[second_ref_idx].3
+                                > text_line_heights[second_ref_idx]
+                                    * CROSS_LAYOUT_REF_TEXT_BLOCK_WORDS_NUM_THRESHOLD
+                        {
+                            mark_block_cross = true;
+                            break;
+                        }
+                    }
+                }
+
+                if mark_block_cross {
+                    break;
+                }
+            }
+        }
+
+        if mark_block_cross {
+            if block_data[block_idx].1 == OrderLabel::Reference {
+                blocks[block_idx].order_label = OrderLabel::CrossReference;
+            } else {
+                blocks[block_idx].order_label = OrderLabel::CrossLayout;
+            }
+        }
     }
+}
 
-    sorted_blocks
+/// Calculate discontinuous projection intervals along a direction.
+///
+/// Returns merged intervals where boxes project onto the axis.
+/// Single interval = single column; multiple = multi-column.
+fn calculate_discontinuous_projection(
+    bboxes: &[BoundingBox],
+    direction: SortDirection,
+) -> Vec<(i32, i32)> {
+    if bboxes.is_empty() {
+        return Vec::new();
+    }
+
+    let mut intervals: Vec<(i32, i32)> = bboxes
+        .iter()
+        .map(|b| match direction {
+            SortDirection::Horizontal => (b.x_min() as i32, b.x_max() as i32),
+            SortDirection::Vertical => (b.y_min() as i32, b.y_max() as i32),
+        })
+        .collect();
+
+    intervals.sort_by_key(|&(start, _)| start);
+
+    let mut merged = Vec::new();
+    let (mut current_start, mut current_end) = intervals[0];
+
+    for &(start, end) in &intervals[1..] {
+        if start <= current_end {
+            current_end = current_end.max(end);
+        } else {
+            merged.push((current_start, current_end));
+            current_start = start;
+            current_end = end;
+        }
+    }
+    merged.push((current_start, current_end));
+
+    merged
 }
 
-/// Inserts a block into the sorted list using weighted distance logic.
+/// Shrink slightly overlapping boxes at their midpoint (PaddleX `shrink_overlapping_boxes`).
 ///
-/// Matches `weighted_distance_insert` logic.
+/// For consecutive blocks sorted by position, if they have small overlap in the
+/// cut direction (0 < overlap < 10%), split at the midpoint of overlap.
+fn shrink_overlapping_boxes(blocks: &mut [SortableBlock], direction: SortDirection) {
+    if blocks.len() < 2 {
+        return;
+    }
+
+    // Sort by the end coordinate of the cut direction
+    match direction {
+        SortDirection::Vertical => {
+            blocks.sort_by(|a, b| {
+                a.bbox
+                    .y_max()
+                    .partial_cmp(&b.bbox.y_max())
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            });
+        }
+        SortDirection::Horizontal => {
+            blocks.sort_by(|a, b| {
+                a.bbox
+                    .x_max()
+                    .partial_cmp(&b.bbox.x_max())
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            });
+        }
+    }
+
+    for i in 0..blocks.len() - 1 {
+        let perp_direction = match direction {
+            SortDirection::Vertical => SortDirection::Horizontal,
+            SortDirection::Horizontal => SortDirection::Vertical,
+        };
+
+        let cut_iou =
+            calculate_projection_overlap_ratio(&blocks[i].bbox, &blocks[i + 1].bbox, direction);
+        let match_iou = calculate_projection_overlap_ratio(
+            &blocks[i].bbox,
+            &blocks[i + 1].bbox,
+            perp_direction,
+        );
+
+        match direction {
+            SortDirection::Vertical => {
+                let y2 = blocks[i].bbox.y_max();
+                let y1_prime = blocks[i + 1].bbox.y_min();
+                if (match_iou > 0.0 && cut_iou > 0.0 && cut_iou < 0.1)
+                    || y2 == y1_prime
+                    || (y2 - y1_prime).abs() <= 3.0
+                {
+                    let overlap_y_min = blocks[i].bbox.y_min().max(blocks[i + 1].bbox.y_min());
+                    let overlap_y_max = blocks[i].bbox.y_max().min(blocks[i + 1].bbox.y_max());
+                    let split_y = ((overlap_y_min + overlap_y_max) / 2.0).floor();
+
+                    if blocks[i].bbox.y_min() < blocks[i + 1].bbox.y_min() {
+                        let new_bbox = BoundingBox::from_coords(
+                            blocks[i].bbox.x_min(),
+                            blocks[i].bbox.y_min(),
+                            blocks[i].bbox.x_max(),
+                            split_y - 1.0,
+                        );
+                        blocks[i].bbox = new_bbox;
+                        let new_bbox2 = BoundingBox::from_coords(
+                            blocks[i + 1].bbox.x_min(),
+                            split_y + 1.0,
+                            blocks[i + 1].bbox.x_max(),
+                            blocks[i + 1].bbox.y_max(),
+                        );
+                        blocks[i + 1].bbox = new_bbox2;
+                    } else {
+                        let new_bbox = BoundingBox::from_coords(
+                            blocks[i].bbox.x_min(),
+                            split_y - 1.0,
+                            blocks[i].bbox.x_max(),
+                            blocks[i].bbox.y_max(),
+                        );
+                        blocks[i].bbox = new_bbox;
+                        let new_bbox2 = BoundingBox::from_coords(
+                            blocks[i + 1].bbox.x_min(),
+                            blocks[i + 1].bbox.y_min(),
+                            blocks[i + 1].bbox.x_max(),
+                            split_y + 1.0,
+                        );
+                        blocks[i + 1].bbox = new_bbox2;
+                    }
+                }
+            }
+            SortDirection::Horizontal => {
+                let x2 = blocks[i].bbox.x_max();
+                let x1_prime = blocks[i + 1].bbox.x_min();
+                if (match_iou > 0.0 && cut_iou > 0.0 && cut_iou < 0.1)
+                    || x2 == x1_prime
+                    || (x2 - x1_prime).abs() <= 3.0
+                {
+                    let overlap_x_min = blocks[i].bbox.x_min().max(blocks[i + 1].bbox.x_min());
+                    let overlap_x_max = blocks[i].bbox.x_max().min(blocks[i + 1].bbox.x_max());
+                    let split_x = ((overlap_x_min + overlap_x_max) / 2.0).floor();
+
+                    if blocks[i].bbox.x_min() < blocks[i + 1].bbox.x_min() {
+                        let new_bbox = BoundingBox::from_coords(
+                            blocks[i].bbox.x_min(),
+                            blocks[i].bbox.y_min(),
+                            split_x - 1.0,
+                            blocks[i].bbox.y_max(),
+                        );
+                        blocks[i].bbox = new_bbox;
+                        let new_bbox2 = BoundingBox::from_coords(
+                            split_x + 1.0,
+                            blocks[i + 1].bbox.y_min(),
+                            blocks[i + 1].bbox.x_max(),
+                            blocks[i + 1].bbox.y_max(),
+                        );
+                        blocks[i + 1].bbox = new_bbox2;
+                    } else {
+                        let new_bbox = BoundingBox::from_coords(
+                            split_x - 1.0,
+                            blocks[i].bbox.y_min(),
+                            blocks[i].bbox.x_max(),
+                            blocks[i].bbox.y_max(),
+                        );
+                        blocks[i].bbox = new_bbox;
+                        let new_bbox2 = BoundingBox::from_coords(
+                            blocks[i + 1].bbox.x_min(),
+                            blocks[i + 1].bbox.y_min(),
+                            split_x + 1.0,
+                            blocks[i + 1].bbox.y_max(),
+                        );
+                        blocks[i + 1].bbox = new_bbox2;
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Associate vision title blocks with their nearest vision parent (PaddleX `insert_child_blocks`).
 ///
-/// # Arguments
-/// * `block` - The block to insert.
-/// * `sorted_blocks` - The current sorted list.
-/// * `region_direction` - The direction of the region/page (usually Horizontal for standard docs).
+/// Moves VisionTitle blocks adjacent to their nearest Vision block.
+fn associate_child_blocks(sorted_blocks: &mut Vec<SortableBlock>) {
+    if sorted_blocks.len() < 2 {
+        return;
+    }
+
+    // Find vision title indices that need to be moved
+    let mut moves: Vec<(usize, usize)> = Vec::new(); // (from_idx, target_vision_idx)
+
+    for (i, block) in sorted_blocks.iter().enumerate() {
+        if block.order_label != OrderLabel::VisionTitle {
+            continue;
+        }
+
+        // Find nearest Vision block by edge distance
+        let mut best_vision_idx = None;
+        let mut best_distance = f32::INFINITY;
+
+        for (j, other) in sorted_blocks.iter().enumerate() {
+            if other.order_label != OrderLabel::Vision {
+                continue;
+            }
+            let dist = get_nearest_edge_distance(&block.bbox, &other.bbox, &[1.0, 1.0, 1.0, 1.0]);
+            if dist < best_distance {
+                best_distance = dist;
+                best_vision_idx = Some(j);
+            }
+        }
+
+        // Only move if close enough (< 2 * text_line_height of the vision block)
+        if let Some(vision_idx) = best_vision_idx {
+            let threshold = sorted_blocks[vision_idx].text_line_height * 3.0;
+            if best_distance < threshold {
+                // Should be placed right before or after the vision block
+                if block.bbox.y_min() < sorted_blocks[vision_idx].bbox.y_min() {
+                    moves.push((i, vision_idx)); // place before
+                } else {
+                    moves.push((i, vision_idx + 1)); // place after
+                }
+            }
+        }
+    }
+
+    // Apply moves (process in reverse order to maintain indices)
+    for (from_idx, target_idx) in moves.into_iter().rev() {
+        // Only move if the title is not already adjacent
+        if from_idx == target_idx || from_idx + 1 == target_idx {
+            continue;
+        }
+        let block = sorted_blocks.remove(from_idx);
+        let adjusted_target = if from_idx < target_idx {
+            target_idx - 1
+        } else {
+            target_idx
+        };
+        let insert_pos = adjusted_target.min(sorted_blocks.len());
+        sorted_blocks.insert(insert_pos, block);
+    }
+}
+
+/// Insert a block using Manhattan distance (for unordered blocks).
+fn manhattan_insert(block: SortableBlock, sorted_blocks: &mut Vec<SortableBlock>) {
+    if sorted_blocks.is_empty() {
+        sorted_blocks.push(block);
+        return;
+    }
+
+    let mut min_distance = f32::INFINITY;
+    let mut nearest_index = 0;
+
+    for (idx, sorted_block) in sorted_blocks.iter().enumerate() {
+        let distance = (block.bbox.x_min() - sorted_block.bbox.x_min()).abs()
+            + (block.bbox.y_min() - sorted_block.bbox.y_min()).abs();
+        if distance < min_distance {
+            min_distance = distance;
+            nearest_index = idx;
+        }
+    }
+
+    sorted_blocks.insert(nearest_index + 1, block);
+}
+
+/// Insert a block using weighted distance logic (PaddleX `weighted_distance_insert`).
 fn weighted_distance_insert(
     block: SortableBlock,
     sorted_blocks: &mut Vec<SortableBlock>,
@@ -268,65 +741,45 @@ fn weighted_distance_insert(
         return;
     }
 
-    // XY-cut settings
-    let tolerance_len = 2.0; // edge_distance_compare_tolerance_len
-
-    // Abstract handling
-    // We don't have "Abstract" label explicitly mapped to a unique OrderLabel in this simplified enum
-    // unless we map LayoutElementType::Abstract to something specific or check the original type if available.
-    // For now, assuming standard logic. If we had abstract, we'd multiply tolerance by 2.
-
-    // Distance weights
-    let edge_weight = 10000.0;
-    let up_edge_weight = 1.0;
-    let left_edge_weight = 0.0001;
-
-    let mut min_weighted_distance = f32::INFINITY;
-    let mut min_edge_distance = f32::INFINITY;
-    let mut min_up_edge_distance = f32::INFINITY;
-
-    let mut nearest_index = 0;
-
-    let (x1, y1, _x2, _y2) = (
+    let tolerance_len = EDGE_DISTANCE_COMPARE_TOLERANCE_LEN;
+    let (x1, y1, x2, _y2) = (
         block.bbox.x_min(),
         block.bbox.y_min(),
         block.bbox.x_max(),
         block.bbox.y_max(),
     );
 
+    let mut min_weighted_distance = f32::INFINITY;
+    let mut _min_edge_distance = f32::INFINITY;
+    let mut min_up_edge_distance = f32::INFINITY;
+    let mut nearest_index = 0;
+
     for (idx, sorted_block) in sorted_blocks.iter().enumerate() {
-        let (x1_prime, y1_prime, x2_prime, _y2_prime) = (
+        let (x1_prime, y1_prime, x2_prime, y2_prime) = (
             sorted_block.bbox.x_min(),
             sorted_block.bbox.y_min(),
             sorted_block.bbox.x_max(),
             sorted_block.bbox.y_max(),
         );
 
-        // Calculate edge distance
         let weight = get_weights(&block.order_label, block.direction);
-        let edge_distance = get_nearest_edge_distance(&block.bbox, &sorted_block.bbox, &weight);
+        let raw_edge_distance = get_nearest_edge_distance(&block.bbox, &sorted_block.bbox, &weight);
 
-        // Calculate up edge distances
-        // For horizontal region (std doc): up is y1_prime, left is x1_prime
-        let (mut up_dist, mut left_dist) = if matches!(region_direction, SortDirection::Horizontal)
-        {
-            (y1_prime, x1_prime)
-        } else {
-            (-x2_prime, y1_prime) // Vertical region? (e.g. text flows horizontal? Unclear mapping, sticking to std)
+        // Quantize edge distance to 50px buckets to ignore minor vertical misalignments
+        // between columns, allowing left_dist to correctly resolve reading order.
+        let edge_distance = (raw_edge_distance / 50.0).floor() * 50.0;
+
+        let (mut up_dist, mut left_dist) = match region_direction {
+            SortDirection::Horizontal => (y1_prime, x1_prime),
+            SortDirection::Vertical => (-x2_prime, y1_prime),
         };
 
-        // Check if block is below sorted_block
-        let is_below = if matches!(region_direction, SortDirection::Horizontal) {
-            // sorted_block.y2 < block.y1 (sorted block is strictly above block)
-            // y2_prime < y1
-            _y2_prime < y1
-        } else {
-            // sorted_block.x1 > block.x2 (sorted block is strictly to the right? or left?)
-            // x1_prime > x2
-            x1_prime > _x2
+        let is_below = match region_direction {
+            SortDirection::Horizontal => y2_prime < y1,
+            SortDirection::Vertical => x1_prime > x2,
         };
 
-        // Logic: Flip signs if below and not a standard text block
+        // Flip signs for special blocks that are below
         let is_special = !matches!(block.order_label, OrderLabel::Unordered)
             || matches!(
                 block.order_label,
@@ -334,6 +787,7 @@ fn weighted_distance_insert(
                     | OrderLabel::ParagraphTitle
                     | OrderLabel::Vision
                     | OrderLabel::VisionTitle
+                    | OrderLabel::CrossLayout
             );
 
         if is_special && is_below {
@@ -341,68 +795,37 @@ fn weighted_distance_insert(
             left_dist = -left_dist;
         }
 
-        // Tolerance check
         if (min_up_edge_distance - up_dist).abs() <= tolerance_len {
             up_dist = min_up_edge_distance;
         }
 
-        // Weighted distance
         let weighted_dist =
-            edge_distance * edge_weight + up_dist * up_edge_weight + left_dist * left_edge_weight;
+            edge_distance * EDGE_WEIGHT + up_dist * UP_EDGE_WEIGHT + left_dist * LEFT_EDGE_WEIGHT;
 
-        // Update mins
-        min_edge_distance = min_edge_distance.min(edge_distance);
+        _min_edge_distance = _min_edge_distance.min(edge_distance);
         min_up_edge_distance = min_up_edge_distance.min(up_dist);
 
         if weighted_dist < min_weighted_distance {
             min_weighted_distance = weighted_dist;
 
-            // Determine relative order (before or after nearest)
-            // Python: abs(y1 // 2 - y1_prime // 2) > 0
-            // We use floor() / 2 as i32 for parity
             let y1_i = (y1.floor() as i32) / 2;
             let y1_p_i = (y1_prime.floor() as i32) / 2;
 
-            let sorted_dist_val;
-            let block_dist_val;
-
-            if (y1_i - y1_p_i).abs() > 0 {
-                sorted_dist_val = y1_prime;
-                block_dist_val = y1;
+            let (sorted_dist_val, block_dist_val) = if (y1_i - y1_p_i).abs() > 0 {
+                (y1_prime, y1)
             } else if matches!(region_direction, SortDirection::Horizontal) {
                 let x1_i = (x1.floor() as i32) / 2;
-                let x2_i = (_x2.floor() as i32) / 2; // Warning: python uses x2 (x_max) here? 
-                // Python: if abs(x1 // 2 - x2 // 2) > 0:
-                // Wait, python code used: block.bbox[0] and block.bbox[2]?
-                // No, `block` vs `sorted_block` context.
-                // Python: if abs(x1 // 2 - x2 // 2) > 0
-                // x1 from block, x2 from block? No that makes no sense.
-                // Re-reading python carefully:
-                // x1, y1, x2, y2 = block.bbox
-                // x1_prime, y1_prime... = sorted_block.bbox
-                // if abs(x1 // 2 - x2 // 2) > 0:
-                // This checks if the BLOCK ITSELF has width > 0 in 2-pixel buckets?
-                // If so:
-                //   sorted_distance = x1_prime
-                //   block_distance = x1
-                // else:
-                //   use centroid distance
-                let block_width_check = (x1_i - x2_i).abs() > 0;
-                if block_width_check {
-                    sorted_dist_val = x1_prime;
-                    block_dist_val = x1;
+                let x2_i = (x2.floor() as i32) / 2;
+                if (x1_i - x2_i).abs() > 0 {
+                    (x1_prime, x1)
                 } else {
-                    // Centroid distance
                     let (cx, cy) = block.center();
                     let (scx, scy) = sorted_block.center();
-                    sorted_dist_val = scx * scx + scy * scy;
-                    block_dist_val = cx * cx + cy * cy;
+                    (scx * scx + scy * scy, cx * cx + cy * cy)
                 }
             } else {
-                // Vertical direction logic ... omitted for brevity/standard doc focus
-                sorted_dist_val = x1_prime; // simplified
-                block_dist_val = x1;
-            }
+                (x1_prime, x1)
+            };
 
             if block_dist_val > sorted_dist_val {
                 nearest_index = idx + 1;
@@ -412,7 +835,6 @@ fn weighted_distance_insert(
         }
     }
 
-    // Clamp index
     if nearest_index > sorted_blocks.len() {
         nearest_index = sorted_blocks.len();
     }
@@ -424,24 +846,23 @@ fn get_weights(label: &OrderLabel, direction: SortDirection) -> [f32; 4] {
     match label {
         OrderLabel::DocTitle => {
             if matches!(direction, SortDirection::Horizontal) {
-                [1.0, 0.1, 0.1, 1.0] // left, right, up, down
+                [1.0, 0.1, 0.1, 1.0]
             } else {
                 [0.2, 0.1, 1.0, 1.0]
             }
         }
-        OrderLabel::ParagraphTitle | OrderLabel::Vision | OrderLabel::VisionTitle => {
-            [1.0, 1.0, 0.1, 1.0] // prioritize up distance
-        }
-        _ => [1.0, 1.0, 1.0, 0.1], // default (NormalText, etc.)
+        OrderLabel::ParagraphTitle
+        | OrderLabel::Vision
+        | OrderLabel::VisionTitle
+        | OrderLabel::CrossLayout => [1.0, 1.0, 0.1, 1.0],
+        _ => [1.0, 1.0, 1.0, 0.1],
     }
 }
 
 /// Calculate nearest edge distance between two boxes.
-///
-/// Returns 0.0 if they overlap in projection (aligned).
 fn get_nearest_edge_distance(b1: &BoundingBox, b2: &BoundingBox, weights: &[f32; 4]) -> f32 {
-    let h_overlap = calculate_projection_overlap(b1, b2, SortDirection::Horizontal);
-    let v_overlap = calculate_projection_overlap(b1, b2, SortDirection::Vertical);
+    let h_overlap = calculate_projection_overlap_ratio(b1, b2, SortDirection::Horizontal);
+    let v_overlap = calculate_projection_overlap_ratio(b1, b2, SortDirection::Vertical);
 
     if h_overlap > 0.0 && v_overlap > 0.0 {
         return 0.0;
@@ -475,7 +896,8 @@ fn get_nearest_edge_distance(b1: &BoundingBox, b2: &BoundingBox, weights: &[f32;
     min_x + min_y
 }
 
-fn calculate_projection_overlap(
+/// Calculate projection overlap ratio (IoU) along a single axis.
+fn calculate_projection_overlap_ratio(
     b1: &BoundingBox,
     b2: &BoundingBox,
     direction: SortDirection,
@@ -489,7 +911,7 @@ fn calculate_projection_overlap(
     let union = max1.max(max2) - min1.min(min2);
 
     if union > 0.0 {
-        intersection / union // IOU
+        intersection / union
     } else {
         0.0
     }
diff --git a/oar-ocr-core/src/processors/layout_utils.rs b/oar-ocr-core/src/processors/layout_utils.rs
index ab09069..585642e 100644
--- a/oar-ocr-core/src/processors/layout_utils.rs
+++ b/oar-ocr-core/src/processors/layout_utils.rs
@@ -280,16 +280,30 @@ pub fn reconcile_table_cells(
 
     // Assign each detected cell to the best matching structure cell
     for (det_idx, det_box) in det_boxes.iter().enumerate() {
-        let mut best_iou = 0.001f32; // Minimal threshold
+        let mut best_ioa = 0.001f32; // Minimal threshold
         let mut best_struct_idx: Option<usize> = None;
 
+        let det_area = (det_box.x_max() - det_box.x_min()) * (det_box.y_max() - det_box.y_min());
+
         for (struct_idx, struct_box) in structure_cells.iter().enumerate() {
-            // Use IoU for assignment
-            // Note: We could also use intersection over detection area to handle
-            // cases where detection is much smaller than structure cell
-            let iou = calculate_iou(det_box, struct_box);
-            if iou > best_iou {
-                best_iou = iou;
+            // Use Intersection over Area (IoA) of detection for assignment.
+            // This properly handles cases where the structure cell has rowspan/colspan
+            // and is significantly larger than the detected text bounding box.
+            let inter_x1 = det_box.x_min().max(struct_box.x_min());
+            let inter_y1 = det_box.y_min().max(struct_box.y_min());
+            let inter_x2 = det_box.x_max().min(struct_box.x_max());
+            let inter_y2 = det_box.y_max().min(struct_box.y_max());
+
+            let inter_area = (inter_x2 - inter_x1).max(0.0) * (inter_y2 - inter_y1).max(0.0);
+
+            let ioa = if det_area > 0.0 {
+                inter_area / det_area
+            } else {
+                0.0
+            };
+
+            if ioa > best_ioa {
+                best_ioa = ioa;
                 best_struct_idx = Some(struct_idx);
             }
         }
@@ -626,27 +640,6 @@ fn kmeans_maxdist_init(points: &[(f32, f32)], k: usize) -> Vec<(f32, f32)> {
     centers
 }
 
-/// Calculates Intersection over Union (IoU) between two bounding boxes.
-fn calculate_iou(a: &BoundingBox, b: &BoundingBox) -> f32 {
-    let inter_x1 = a.x_min().max(b.x_min());
-    let inter_y1 = a.y_min().max(b.y_min());
-    let inter_x2 = a.x_max().min(b.x_max());
-    let inter_y2 = a.y_max().min(b.y_max());
-
-    let inter_area = (inter_x2 - inter_x1).max(0.0) * (inter_y2 - inter_y1).max(0.0);
-
-    let area_a = (a.x_max() - a.x_min()) * (a.y_max() - a.y_min());
-    let area_b = (b.x_max() - b.x_min()) * (b.y_max() - b.y_min());
-
-    let union_area = area_a + area_b - inter_area;
-
-    if union_area <= 0.0 {
-        0.0
-    } else {
-        inter_area / union_area
-    }
-}
-
 /// Calculates Intersection over Area (IoA) - intersection / smaller box area.
 fn calculate_ioa_smaller(a: &BoundingBox, b: &BoundingBox) -> f32 {
     let inter_x1 = a.x_min().max(b.x_min());
diff --git a/oar-ocr-core/src/processors/sorting.rs b/oar-ocr-core/src/processors/sorting.rs
index ed08789..24802ea 100644
--- a/oar-ocr-core/src/processors/sorting.rs
+++ b/oar-ocr-core/src/processors/sorting.rs
@@ -216,7 +216,7 @@ pub fn sort_boxes_xycut(boxes: &[BoundingBox], direction: SortDirection) -> Vec<
 /// # Returns
 ///
 /// A 1D vector representing the projection histogram
-fn projection_by_bboxes(boxes: &[[i32; 4]], axis: usize) -> Vec<i32> {
+pub(crate) fn projection_by_bboxes(boxes: &[[i32; 4]], axis: usize) -> Vec<i32> {
     assert!(axis <= 1, "axis must be 0 or 1");
 
     if boxes.is_empty() {
@@ -264,7 +264,7 @@ fn projection_by_bboxes(boxes: &[[i32; 4]], axis: usize) -> Vec<i32> {
 /// # Returns
 ///
 /// Optional tuple of (segment_starts, segment_ends)
-fn split_projection_profile(
+pub(crate) fn split_projection_profile(
     arr_values: &[i32],
     min_value: i32,
     min_gap: i32,
@@ -522,6 +522,7 @@ impl SortableRegion {
 }
 
 /// Calculates the IoU (Intersection over Union) between two bounding boxes.
+#[allow(dead_code)]
 pub fn calculate_iou(a: &BoundingBox, b: &BoundingBox) -> f32 {
     let x1 = a.x_min().max(b.x_min());
     let y1 = a.y_min().max(b.y_min());
diff --git a/oar-ocr-vl/src/doc_parser.rs b/oar-ocr-vl/src/doc_parser.rs
index 9cdfb8f..423aa0a 100644
--- a/oar-ocr-vl/src/doc_parser.rs
+++ b/oar-ocr-vl/src/doc_parser.rs
@@ -26,7 +26,7 @@ use oar_ocr_core::domain::structure::{
 };
 use oar_ocr_core::predictors::LayoutDetectionPredictor;
 use oar_ocr_core::processors::BoundingBox;
-use oar_ocr_core::processors::layout_sorting::sort_layout_enhanced;
+use oar_ocr_core::processors::layout_sorting::{SortableElement, sort_layout_enhanced};
 use oar_ocr_core::utils::BBoxCrop;
 use std::sync::Arc;
 
@@ -225,9 +225,13 @@ impl<'a, B: RecognitionBackend> DocParser<'a, B> {
         let mut sorted_elements: Vec<LayoutElement> = if layout_result.is_reading_order_sorted {
             elements
         } else {
-            let sortable: Vec<(BoundingBox, LayoutElementType)> = elements
+            let sortable: Vec<SortableElement> = elements
                 .iter()
-                .map(|e| (e.bbox.clone(), e.element_type))
+                .map(|e| SortableElement {
+                    bbox: e.bbox.clone(),
+                    element_type: e.element_type,
+                    num_lines: e.num_lines,
+                })
                 .collect();
             let sorted_indices = sort_layout_enhanced(&sortable, page_w, page_h);
             sorted_indices
diff --git a/src/oarocr/ocr.rs b/src/oarocr/ocr.rs
index d6cd68b..f6a28fc 100644
--- a/src/oarocr/ocr.rs
+++ b/src/oarocr/ocr.rs
@@ -189,6 +189,7 @@ impl OAROCRBuilder {
     ///
     /// This matches the text_type parameter:
     /// - "seal": Uses polygon-based sorting/cropping for seal text (circular/curved)
+    /// - "table": Uses table-friendly detection defaults (box_threshold=0.4)
     /// - Other values or None: Uses quad-based sorting (default)
     ///
     /// # Arguments
@@ -253,11 +254,26 @@ impl OAROCRBuilder {
         // Align text detection defaults with OCR pipeline.
         // Defaults depend on text_type:
         // - general: limit_side_len=960, limit_type="max", thresh=0.3, box_thresh=0.6, unclip_ratio=2.0
+        // - table: limit_side_len=960, limit_type="max", thresh=0.3, box_thresh=0.4, unclip_ratio=2.0
         // - seal: limit_side_len=736, limit_type="min", thresh=0.2, box_thresh=0.6, unclip_ratio=0.5
         let mut effective_det_cfg = self.text_detection_config.clone().unwrap_or_default();
         let has_explicit_det_cfg = self.text_detection_config.is_some();
         if !has_explicit_det_cfg {
             match self.text_type.as_deref().unwrap_or("general") {
+                "table" => {
+                    effective_det_cfg.score_threshold = 0.3;
+                    effective_det_cfg.box_threshold = 0.4;
+                    effective_det_cfg.unclip_ratio = 2.0;
+                    if effective_det_cfg.limit_side_len.is_none() {
+                        effective_det_cfg.limit_side_len = Some(960);
+                    }
+                    if effective_det_cfg.limit_type.is_none() {
+                        effective_det_cfg.limit_type = Some(crate::processors::LimitType::Max);
+                    }
+                    if effective_det_cfg.max_side_len.is_none() {
+                        effective_det_cfg.max_side_len = Some(4000);
+                    }
+                }
                 "seal" => {
                     effective_det_cfg.score_threshold = 0.2;
                     effective_det_cfg.box_threshold = 0.6;
@@ -762,6 +778,7 @@ impl OAROCR {
                         confidence: Some(score),
                         orientation_angle: region.line_orientation_angle,
                         word_boxes,
+                        label: None,
                     });
                 }
             }
diff --git a/src/oarocr/stitching.rs b/src/oarocr/stitching.rs
index 31f501e..20e10cd 100644
--- a/src/oarocr/stitching.rs
+++ b/src/oarocr/stitching.rs
@@ -16,7 +16,7 @@ use oar_ocr_core::domain::structure::{
     FormulaResult, LayoutElement, LayoutElementType, StructureResult, TableCell, TableResult,
 };
 use oar_ocr_core::processors::{
-    BoundingBox, SplitConfig as OcrSplitConfig, create_expanded_ocr_for_table,
+    BoundingBox, SplitConfig as OcrSplitConfig, create_expanded_ocr_for_table, parse_cell_grid_info,
 };
 use std::cmp::Ordering;
 
@@ -32,11 +32,17 @@ enum OcrSource {
 
 /// Labels that should be excluded from OCR text matching.
 /// These regions have their own specialized content (LaTeX, HTML, etc.)
-const EXCLUDED_FROM_OCR_LABELS: [LayoutElementType; 4] = [
-    LayoutElementType::Formula,
-    LayoutElementType::FormulaNumber,
+/// Labels excluded from OCR text matching in `stitch_layout_elements`.
+/// PaddleX: formula results are injected into the OCR pool (via
+/// `convert_formula_res_to_ocr_format`), so formula blocks participate
+/// in normal OCR matching — only Table and Seal are excluded.
+///
+/// NOTE: After inline formula injection, formula elements have been absorbed
+/// into text regions and should be excluded from stitching to prevent duplication.
+const EXCLUDED_FROM_OCR_LABELS: [LayoutElementType; 3] = [
     LayoutElementType::Table,
     LayoutElementType::Seal,
+    LayoutElementType::Formula, // Exclude formulas to prevent duplicate rendering after injection
 ];
 
 #[derive(Clone)]
@@ -46,7 +52,10 @@ pub struct StitchConfig {
     pub require_text_center_inside_cell: bool,
     pub cell_merge_min_iou: f32,
     pub formula_to_cell_min_iou: f32,
+    /// Fallback pixel tolerance for line grouping.
     pub same_line_y_tolerance: f32,
+    /// Minimum vertical overlap ratio (intersection / min(line_height)) to treat two spans as one line.
+    pub line_height_iou_threshold: f32,
     /// Whether to enable cross-cell OCR box splitting.
     /// When enabled, OCR boxes that span multiple table cells will be split
     /// at cell boundaries and their text distributed proportionally.
@@ -62,6 +71,7 @@ impl Default for StitchConfig {
             cell_merge_min_iou: 0.3,
             formula_to_cell_min_iou: 0.01,
             same_line_y_tolerance: 10.0,
+            line_height_iou_threshold: 0.6,
             enable_cross_cell_split: true,
         }
     }
@@ -88,8 +98,8 @@ impl ResultStitcher {
         // Track which regions have been used
         let mut used_region_indices = std::collections::HashSet::new();
 
-        // Get text regions (clone to avoid borrow issues)
-        let regions = result.text_regions.clone().unwrap_or_default();
+        // Get text regions (clone to avoid borrow issues, make mutable for injection)
+        let mut regions = result.text_regions.clone().unwrap_or_default();
 
         tracing::debug!("Stitching: {} text regions", regions.len());
 
@@ -110,7 +120,20 @@ impl ResultStitcher {
             used_region_indices.len()
         );
 
+        // 1.5. Fill formula elements with LaTeX content FIRST
+        // This must happen before inject_inline_formulas so formulas have text content
+        Self::fill_formula_elements(&mut result.layout_elements, &result.formulas, cfg);
+
+        // 1.6. Inject inline formulas into text regions
+        // PaddleX: Small formula elements that overlap with text elements should be
+        // absorbed into the text flow, not kept as separate layout elements.
+        // This creates TextRegion entries with label="formula" that will be wrapped
+        // with $...$ delimiters during text joining.
+        Self::inject_inline_formulas(&mut result.layout_elements, &mut regions, cfg);
+
         // 2. Stitch text into layout elements (excluding special types)
+        // Note: after inject_inline_formulas, some formula elements have had their text cleared
+        // These won't be rendered separately in to_markdown
         Self::stitch_layout_elements(
             &mut result.layout_elements,
             &regions,
@@ -123,21 +146,18 @@ impl ResultStitcher {
             used_region_indices.len()
         );
 
-        // 3. Fill formula elements with LaTeX content
-        Self::fill_formula_content(&mut result.layout_elements, &result.formulas);
+        // Note: fill_formula_elements was already called before inject_inline_formulas
+        // Do NOT call it again here, as it would re-fill formulas that were injected and cleared
 
-        // 4. Mark text regions that overlap with excluded element types (Formula, Seal)
-        // as used to prevent them from becoming orphans.
-        // - Formulas: content comes from LaTeX recognition, OCR is redundant/noise.
+        // 3. Mark text regions that overlap with Seal elements as used
+        // to prevent them from becoming orphans.
         // - Seals: content comes from specialized seal OCR.
         // - Tables: content comes from OCR stitching. We do NOT suppress tables here because
         //   text inside a table that wasn't assigned to a cell (in step 1) should be preserved
         //   as an orphan (e.g. caption, header, or matching failure).
+        // - Formulas: now handled through normal OCR matching (step 2), already marked used.
         for element in &result.layout_elements {
-            if matches!(
-                element.element_type,
-                LayoutElementType::Formula | LayoutElementType::Seal
-            ) {
+            if element.element_type == LayoutElementType::Seal {
                 for (idx, region) in regions.iter().enumerate() {
                     if Self::is_overlapping(&element.bbox, &region.bounding_box, cfg) {
                         used_region_indices.insert(idx);
@@ -157,6 +177,53 @@ impl ResultStitcher {
             .map(|e| &e.bbox)
             .collect();
 
+        let image_chart_bboxes: Vec<&BoundingBox> = result
+            .layout_elements
+            .iter()
+            .filter(|e| {
+                matches!(
+                    e.element_type,
+                    LayoutElementType::Image | LayoutElementType::Chart
+                )
+            })
+            .map(|e| &e.bbox)
+            .collect();
+
+        // Collect figure/chart caption bboxes to infer undetected figure regions.
+        // When the layout model detects a caption (e.g. "Figure 3...") but misses
+        // the figure image itself, OCR text from the figure diagram becomes orphans.
+        // We infer the figure area as the region above each caption within its x-range.
+        let figure_caption_bboxes: Vec<&BoundingBox> = result
+            .layout_elements
+            .iter()
+            .filter(|e| {
+                matches!(
+                    e.element_type,
+                    LayoutElementType::FigureTitle
+                        | LayoutElementType::ChartTitle
+                        | LayoutElementType::FigureTableChartTitle
+                )
+            })
+            .map(|e| &e.bbox)
+            .collect();
+
+        // Collect text/title element bboxes to check if an orphan is already
+        // covered by a known content element (avoid filtering legitimate text)
+        let content_element_bboxes: Vec<&BoundingBox> = result
+            .layout_elements
+            .iter()
+            .filter(|e| {
+                matches!(
+                    e.element_type,
+                    LayoutElementType::Text
+                        | LayoutElementType::DocTitle
+                        | LayoutElementType::ParagraphTitle
+                        | LayoutElementType::Abstract
+                )
+            })
+            .map(|e| &e.bbox)
+            .collect();
+
         let original_element_count = result.layout_elements.len();
         let mut new_elements = Vec::new();
         for (idx, region) in regions.iter().enumerate() {
@@ -174,11 +241,53 @@ impl ResultStitcher {
                     continue;
                 }
 
+                // Filter out text inside Image/Chart regions
+                let overlaps_image_chart = image_chart_bboxes
+                    .iter()
+                    .any(|bbox| region.bounding_box.ioa(bbox) > 0.5);
+
+                if overlaps_image_chart {
+                    continue;
+                }
+
+                // Filter out text in inferred figure regions (above figure/chart captions).
+                // When the layout model detects a caption but not the figure itself,
+                // OCR'd annotations from the figure diagram leak as orphan text.
+                // Check: orphan is above a caption, within its x-range, and not inside
+                // any existing text/title element.
+                let in_inferred_figure_region = figure_caption_bboxes.iter().any(|cap| {
+                    let orphan_bb = &region.bounding_box;
+                    // Orphan must be above or overlapping with the caption's top
+                    let above_caption = orphan_bb.y_max() < cap.y_max();
+                    // Orphan must be within the caption's horizontal range (with margin)
+                    let x_margin = (cap.x_max() - cap.x_min()) * 0.1;
+                    let in_x_range = orphan_bb.x_min() >= (cap.x_min() - x_margin)
+                        && orphan_bb.x_max() <= (cap.x_max() + x_margin);
+                    above_caption && in_x_range
+                });
+
+                if in_inferred_figure_region {
+                    // Verify the orphan is NOT inside any existing text/title element
+                    let inside_content_element = content_element_bboxes
+                        .iter()
+                        .any(|bbox| region.bounding_box.ioa(bbox) > 0.5);
+                    if !inside_content_element {
+                        continue;
+                    }
+                }
+
+                // Check if this orphan region is a formula
                 // Create a new layout element for this orphan text
-                // We treat it as a generic "text" element
+                // If it's a formula (label="formula"), create a Formula element, otherwise Text
+                let element_type = if region.is_formula() {
+                    LayoutElementType::Formula
+                } else {
+                    LayoutElementType::Text
+                };
+
                 let element = LayoutElement::new(
                     region.bounding_box.clone(),
-                    LayoutElementType::Text,
+                    element_type,
                     region.confidence.unwrap_or(0.0),
                 )
                 .with_text(text.as_ref().to_string());
@@ -236,37 +345,23 @@ impl ResultStitcher {
         // by XY-cut with region hierarchy in structure.rs - do NOT re-sort here.
         // Only sort when region_blocks is NOT present.
         if result.region_blocks.is_none() {
-            Self::sort_layout_elements(&mut result.layout_elements, width, cfg);
+            let height = if let Some(img) = &result.rectified_img {
+                img.height() as f32
+            } else {
+                result
+                    .layout_elements
+                    .iter()
+                    .map(|e| e.bbox.y_max())
+                    .fold(0.0f32, f32::max)
+                    .max(1000.0)
+            };
+            Self::sort_layout_elements_enhanced(&mut result.layout_elements, width, height);
         }
 
         // Assign order indices regardless of sorting
         Self::assign_order_indices(&mut result.layout_elements);
     }
 
-    /// Fills formula layout elements with their corresponding LaTeX content.
-    ///
-    /// Matches formula results to layout elements by bounding box overlap (IOU > 0.5).
-    fn fill_formula_content(elements: &mut [LayoutElement], formulas: &[FormulaResult]) {
-        for element in elements.iter_mut() {
-            if element.element_type.is_formula() {
-                // Find the best matching formula result by IOU
-                if let Some(formula) = formulas
-                    .iter()
-                    .filter(|f| element.bbox.iou(&f.bbox) > 0.5)
-                    .max_by(|a, b| {
-                        element
-                            .bbox
-                            .iou(&a.bbox)
-                            .partial_cmp(&element.bbox.iou(&b.bbox))
-                            .unwrap_or(Ordering::Equal)
-                    })
-                {
-                    element.text = Some(formula.latex.clone());
-                }
-            }
-        }
-    }
-
     /// Assigns reading order indices to layout elements.
     ///
     /// Only elements that should be included in reading order get an index.
@@ -320,7 +415,10 @@ impl ResultStitcher {
             if table.cells.is_empty() {
                 continue;
             }
-            let e2e_like_cells = table.cells.iter().all(|cell| cell.confidence >= 0.999);
+            // Use the explicit is_e2e flag from the table analyzer to determine
+            // the matching strategy, instead of inferring from confidence values.
+            let has_detected_cells = table.detected_cell_bboxes.is_some();
+            let e2e_like_cells = table.is_e2e && !has_detected_cells;
 
             // 1. Filter relevant text regions (those overlapping the table area)
             let table_bbox = table.bbox.clone(); // Use table bbox
@@ -387,12 +485,36 @@ impl ResultStitcher {
                 }
             }
 
+            // PaddleX: inject formula results into table OCR candidate pool with $...$
+            // wrapping (table_contents_for_img). This lets formulas participate in normal
+            // cell matching, so formula content appears in the correct table cells.
+            for formula in formulas {
+                let w = formula.bbox.x_max() - formula.bbox.x_min();
+                let h = formula.bbox.y_max() - formula.bbox.y_min();
+                if w <= 1.0 || h <= 1.0 {
+                    continue;
+                }
+                if !Self::is_overlapping(&table_bbox, &formula.bbox, cfg) {
+                    continue;
+                }
+                let latex = &formula.latex;
+                let formatted = if latex.starts_with('$') && latex.ends_with('$') {
+                    latex.clone()
+                } else {
+                    format!("${}$", latex)
+                };
+                let mut formula_region = TextRegion::new(formula.bbox.clone());
+                formula_region.text = Some(formatted.into());
+                formula_region.confidence = Some(1.0);
+                ocr_candidates.push((OcrSource::Split, formula_region));
+            }
+
             let structure_tokens = table.structure_tokens.clone();
 
             // Prefer PaddleX-style row-aware matching when structure tokens are available.
+            // Use row-aware matching when cell detection was used (non-E2E mode).
             let mut td_to_cell_mapping: Option<Vec<Option<usize>>> = None;
-            let has_detection_like_cells = table.cells.iter().any(|cell| cell.confidence < 0.999);
-            if has_detection_like_cells
+            if !e2e_like_cells
                 && let Some(tokens) = structure_tokens.as_deref()
                 && !ocr_candidates.is_empty()
                 && let Some((mapping, matched_candidate_indices)) =
@@ -401,6 +523,7 @@ impl ResultStitcher {
                         tokens,
                         &ocr_candidates,
                         cfg.same_line_y_tolerance,
+                        table.detected_cell_bboxes.as_deref(),
                     )
             {
                 td_to_cell_mapping = Some(mapping);
@@ -475,13 +598,19 @@ impl ResultStitcher {
                 }
             }
 
-            // Attach formulas after text matching so formula tokens become part of final cell text.
-            Self::attach_formulas_to_cells(table, formulas, cfg);
+            // Formulas are now injected into the OCR candidate pool above,
+            // so they participate in normal cell matching — no separate attach step needed.
+
+            // Optional postprocess for checkbox-style tables:
+            // normalize common OCR confusions like ü/L/X into ✓/✗ when the table
+            // clearly exhibits both positive and negative marker patterns.
+            Self::normalize_checkbox_symbols_in_table(&mut table.cells);
 
             // Regenerate HTML from structure tokens and stitched cell text.
             if let Some(tokens) = structure_tokens.as_deref() {
                 let cell_texts: Vec<Option<String>> =
                     if let Some(ref td_mapping) = td_to_cell_mapping {
+                        // Use the mapping from row-aware matching
                         td_mapping
                             .iter()
                             .map(|cell_idx| {
@@ -491,7 +620,10 @@ impl ResultStitcher {
                             })
                             .collect()
                     } else {
-                        table.cells.iter().map(|c| c.text.clone()).collect()
+                        // Fallback: cells may not be in the same order as structure_tokens.
+                        // We need to create a mapping from cell bbox to its index, then
+                        // iterate through tokens to collect texts in the correct order.
+                        Self::collect_cell_texts_for_tokens(&table.cells, tokens)
                     };
 
                 let html_structure =
@@ -527,6 +659,29 @@ impl ResultStitcher {
         }
 
         for (candidate_idx, (_, region)) in ocr_candidates.iter().enumerate() {
+            let ocr_bbox = &region.bounding_box;
+
+            // Strategy 1: Center-point-in-cell with high IoA (strongest signal).
+            // If the OCR box center falls inside a cell AND the box has high overlap
+            // with that cell (IoA > 0.7), assign directly. The IoA check avoids
+            // misassignment for boxes that straddle cell boundaries.
+            let ocr_cx = (ocr_bbox.x_min() + ocr_bbox.x_max()) / 2.0;
+            let ocr_cy = (ocr_bbox.y_min() + ocr_bbox.y_max()) / 2.0;
+            let center_cell = cells.iter().enumerate().find(|(_, cell)| {
+                ocr_cx >= cell.bbox.x_min()
+                    && ocr_cx <= cell.bbox.x_max()
+                    && ocr_cy >= cell.bbox.y_min()
+                    && ocr_cy <= cell.bbox.y_max()
+                    && ocr_bbox.ioa(&cell.bbox) > 0.7
+            });
+
+            if let Some((cell_idx, _)) = center_cell {
+                cell_to_ocr.entry(cell_idx).or_default().push(candidate_idx);
+                matched_candidate_indices.insert(candidate_idx);
+                continue;
+            }
+
+            // Strategy 2+3: IoU + distance fallback
             let mut best_cell_idx: Option<usize> = None;
             let mut min_cost = (f32::MAX, f32::MAX);
             let mut candidate_costs: Vec<(usize, (f32, f32))> = Vec::new();
@@ -706,6 +861,51 @@ impl ResultStitcher {
         }
     }
 
+    fn normalize_checkbox_symbols_in_table(cells: &mut [TableCell]) {
+        let mut has_positive_candidate = false;
+        let mut has_negative_candidate = false;
+
+        for cell in cells.iter() {
+            let Some(text) = cell.text.as_deref() else {
+                continue;
+            };
+            let trimmed = text.trim();
+            if trimmed.chars().count() != 1 {
+                continue;
+            }
+            match trimmed.chars().next().unwrap_or_default() {
+                '✓' | 'ü' | 'Ü' | 'L' | '√' | '☑' => has_positive_candidate = true,
+                '✗' | 'X' | 'x' | '✕' | '✖' | '☒' => has_negative_candidate = true,
+                _ => {}
+            }
+        }
+
+        for cell in cells.iter_mut() {
+            let Some(text) = cell.text.clone() else {
+                continue;
+            };
+            let trimmed = text.trim();
+            if trimmed.chars().count() != 1 {
+                continue;
+            }
+            let mapped = match trimmed.chars().next().unwrap_or_default() {
+                // Safe positive normalization.
+                'ü' | 'Ü' | '√' | '☑' => Some("✓"),
+                // Ambiguous L is normalized only when the table appears checkbox-like.
+                'L' if has_positive_candidate && has_negative_candidate => Some("✓"),
+                // Safe negative normalization.
+                '✕' | '✖' | '☒' => Some("✗"),
+                // Ambiguous X/x are normalized only when the table appears checkbox-like.
+                'X' | 'x' if has_positive_candidate && has_negative_candidate => Some("✗"),
+                _ => None,
+            };
+
+            if let Some(symbol) = mapped {
+                cell.text = Some(symbol.to_string());
+            }
+        }
+    }
+
     /// PaddleX-style text concatenation for one cell.
     fn join_ocr_texts_paddlex_style(
         candidate_indices: &[usize],
@@ -758,14 +958,27 @@ impl ResultStitcher {
         structure_tokens: &[String],
         ocr_candidates: &[(OcrSource, TextRegion)],
         row_y_tolerance: f32,
+        cell_bboxes_override: Option<&[BoundingBox]>,
     ) -> Option<(Vec<Option<usize>>, std::collections::HashSet<usize>)> {
         if cells.is_empty() || structure_tokens.is_empty() || ocr_candidates.is_empty() {
             return None;
         }
 
-        let (sorted_cell_indices, table_cells_flag) =
-            Self::sort_table_cells_boxes(cells, row_y_tolerance);
-        if sorted_cell_indices.is_empty() || table_cells_flag.is_empty() {
+        // --- Sort cells into rows ---
+        // When detected bboxes are available, sort them (better spatial accuracy)
+        // for the IoA matching loop.
+        let (match_sorted_indices, match_row_flags) = if let Some(det_bboxes) = cell_bboxes_override
+        {
+            let temp_cells: Vec<TableCell> = det_bboxes
+                .iter()
+                .map(|b| TableCell::new(b.clone(), 0.5))
+                .collect();
+            Self::sort_table_cells_boxes(&temp_cells, row_y_tolerance)
+        } else {
+            Self::sort_table_cells_boxes(cells, row_y_tolerance)
+        };
+
+        if match_sorted_indices.is_empty() || match_row_flags.is_empty() {
             return None;
         }
 
@@ -774,23 +987,42 @@ impl ResultStitcher {
             return None;
         }
 
-        let mut aligned_row_flags = Self::map_and_get_max(&table_cells_flag, &row_start_index);
-        aligned_row_flags.push(sorted_cell_indices.len());
-        row_start_index.push(sorted_cell_indices.len());
+        // Align match row flags with structure token row boundaries
+        let mut match_aligned = Self::map_and_get_max(&match_row_flags, &row_start_index);
+        match_aligned.push(match_sorted_indices.len());
+        row_start_index.push(
+            structure_tokens
+                .iter()
+                .filter(|t| Self::is_td_end_token(t))
+                .count(),
+        );
 
+        // --- Per-row matching: cell → OCR (PaddleX style) ---
+        // For each cell in the row, collect ALL OCR boxes with IoA > 0.7.
+        // No cross-row deduplication — each row independently checks all OCR boxes,
+        // matching PaddleX v2 behavior. The 0.7 IoA threshold naturally prevents
+        // false cross-row matches.
         let mut all_matched: Vec<std::collections::HashMap<usize, Vec<usize>>> = Vec::new();
 
-        for k in 0..aligned_row_flags.len().saturating_sub(1) {
-            let row_start = aligned_row_flags[k].min(sorted_cell_indices.len());
-            let row_end = aligned_row_flags[k + 1].min(sorted_cell_indices.len());
+        for k in 0..match_aligned.len().saturating_sub(1) {
+            let row_start = match_aligned[k].min(match_sorted_indices.len());
+            let row_end = match_aligned[k + 1].min(match_sorted_indices.len());
+
             let mut matched: std::collections::HashMap<usize, Vec<usize>> =
                 std::collections::HashMap::new();
 
-            for (local_idx, sorted_pos) in (row_start..row_end).enumerate() {
-                let cell_idx = sorted_cell_indices[sorted_pos];
-                let cell_box = &cells[cell_idx].bbox;
+            for (local_idx, &bbox_idx) in
+                match_sorted_indices[row_start..row_end].iter().enumerate()
+            {
+                // Use detected bbox directly when available, else structure cell bbox
+                let cell_box = cell_bboxes_override
+                    .and_then(|bbs| bbs.get(bbox_idx))
+                    .unwrap_or_else(|| &cells[bbox_idx.min(cells.len() - 1)].bbox);
+
                 for (ocr_idx, (_, ocr_region)) in ocr_candidates.iter().enumerate() {
-                    if Self::compute_inter(cell_box, &ocr_region.bounding_box) > 0.7 {
+                    // IoA = intersection / OCR_area (PaddleX compute_inter > 0.7)
+                    let ioa = ocr_region.bounding_box.ioa(cell_box);
+                    if ioa > 0.7 {
                         matched.entry(local_idx).or_default().push(ocr_idx);
                     }
                 }
@@ -799,6 +1031,8 @@ impl ResultStitcher {
             all_matched.push(matched);
         }
 
+        // --- Build td_to_cell_mapping by iterating structure tokens ---
+        // table.cells maps exactly 1:1 with td tokens in structure order.
         let mut td_to_cell_mapping: Vec<Option<usize>> = Vec::new();
         let mut matched_candidate_indices: std::collections::HashSet<usize> =
             std::collections::HashSet::new();
@@ -808,6 +1042,10 @@ impl ResultStitcher {
         let mut matched_row_idx = 0usize;
 
         for tag in structure_tokens {
+            if tag == "<tr>" {
+                td_index = 0; // Reset cell index at row start
+                continue;
+            }
             if !Self::is_td_end_token(tag) {
                 continue;
             }
@@ -821,14 +1059,17 @@ impl ResultStitcher {
                 matched_candidate_indices.extend(indices.iter().copied());
             }
 
-            let mapped_cell_idx =
-                aligned_row_flags
-                    .get(matched_row_idx)
-                    .copied()
-                    .and_then(|row_start| {
-                        let sorted_pos = row_start + td_index;
-                        sorted_cell_indices.get(sorted_pos).copied()
-                    });
+            // Map td position to the original cell index via sorted ordering.
+            // match_aligned[matched_row_idx] + td_index gives the position in the
+            // sorted cell list, and match_sorted_indices maps that back to cells[].
+            let mapped_cell_idx = match_aligned
+                .get(matched_row_idx)
+                .copied()
+                .and_then(|row_start| {
+                    let sorted_pos = row_start + td_index;
+                    match_sorted_indices.get(sorted_pos).copied()
+                })
+                .filter(|&idx| idx < cells.len());
 
             td_to_cell_mapping.push(mapped_cell_idx);
 
@@ -852,7 +1093,6 @@ impl ResultStitcher {
                 && td_count >= row_start_index[matched_row_idx + 1]
             {
                 matched_row_idx += 1;
-                td_index = 0;
             }
         }
 
@@ -863,6 +1103,54 @@ impl ResultStitcher {
         }
     }
 
+    /// Collects cell texts in the order they appear in structure tokens.
+    ///
+    /// Uses grid-based `(row, col)` matching when cells have grid info, which
+    /// correctly handles rowspan/colspan cases where cells.len() != td_count.
+    /// Falls back to index-based matching when grid info is unavailable.
+    fn collect_cell_texts_for_tokens(
+        cells: &[TableCell],
+        tokens: &[String],
+    ) -> Vec<Option<String>> {
+        if cells.is_empty() {
+            return Vec::new();
+        }
+
+        // Parse grid positions for each <td> token
+        let token_grid = parse_cell_grid_info(tokens);
+        let td_count = token_grid.len();
+
+        // Build a lookup from (row, col) -> cell index for cells that have grid info
+        let mut grid_to_cell: std::collections::HashMap<(usize, usize), usize> =
+            std::collections::HashMap::new();
+        let mut has_grid_info = false;
+
+        for (cell_idx, cell) in cells.iter().enumerate() {
+            if let (Some(row), Some(col)) = (cell.row, cell.col) {
+                grid_to_cell.insert((row, col), cell_idx);
+                has_grid_info = true;
+            }
+        }
+
+        if has_grid_info {
+            // Grid-based matching: match tokens to cells by (row, col) position
+            token_grid
+                .iter()
+                .map(|gi| {
+                    grid_to_cell
+                        .get(&(gi.row, gi.col))
+                        .and_then(|&idx| cells.get(idx))
+                        .and_then(|cell| cell.text.clone())
+                })
+                .collect()
+        } else {
+            // Fallback: cells don't have grid info, use index-based matching
+            (0..td_count)
+                .map(|i| cells.get(i).and_then(|cell| cell.text.clone()))
+                .collect()
+        }
+    }
+
     /// Sort table cells row-by-row (top-to-bottom, left-to-right) and return row flags.
     ///
     /// Returns `(sorted_indices, flags)` where `flags` contains cumulative row starts.
@@ -1128,75 +1416,6 @@ impl ResultStitcher {
         (split_regions, split_ocr_indices, cell_assignments)
     }
 
-    /// Attaches recognized formulas to the best-matching table cells.
-    ///
-    /// This mirrors behavior where formula recognition results are merged into the
-    /// OCR content used for table structure recognition. Here we approximate that behavior by:
-    /// - For each formula, finding the cell with maximum IoU
-    /// - If IoU exceeds a small threshold, appending `$latex$` to that cell's text
-    fn attach_formulas_to_cells(
-        table: &mut TableResult,
-        formulas: &[FormulaResult],
-        cfg: &StitchConfig,
-    ) {
-        if formulas.is_empty() || table.cells.is_empty() {
-            return;
-        }
-
-        for formula in formulas {
-            let bbox = &formula.bbox;
-
-            // Skip degenerate boxes
-            let w = bbox.x_max() - bbox.x_min();
-            let h = bbox.y_max() - bbox.y_min();
-            if w <= 1.0 || h <= 1.0 {
-                continue;
-            }
-
-            // Only consider formulas that overlap the table bbox at all
-            if !Self::is_overlapping(&table.bbox, bbox, cfg) {
-                continue;
-            }
-
-            // Find best-matching cell by IoU
-            let mut best_cell_idx: Option<usize> = None;
-            let mut best_iou = 0.0f32;
-
-            for (cell_idx, cell) in table.cells.iter().enumerate() {
-                let iou = Self::calculate_iou(&cell.bbox, bbox);
-                if iou > best_iou {
-                    best_iou = iou;
-                    best_cell_idx = Some(cell_idx);
-                }
-            }
-
-            if let Some(cell_idx) = best_cell_idx
-                && best_iou > cfg.formula_to_cell_min_iou
-            {
-                let cell = &mut table.cells[cell_idx];
-
-                // Append formula as LaTeX wrapped in $...$
-                let formatted = if formula.latex.starts_with('$') && formula.latex.ends_with('$') {
-                    formula.latex.clone()
-                } else {
-                    format!("${}$", formula.latex)
-                };
-
-                match &mut cell.text {
-                    Some(existing) => {
-                        if !existing.is_empty() {
-                            existing.push(' ');
-                        }
-                        existing.push_str(&formatted);
-                    }
-                    None => {
-                        cell.text = Some(formatted);
-                    }
-                }
-            }
-        }
-    }
-
     /// Calculates the Intersection over Union (IoU) between two bounding boxes.
     fn calculate_iou(bbox1: &BoundingBox, bbox2: &BoundingBox) -> f32 {
         let x1_min = bbox1.x_min();
@@ -1257,6 +1476,81 @@ impl ResultStitcher {
         dis + dis_2.min(dis_3)
     }
 
+    /// Marks small inline formulas to be absorbed into the text flow.
+    ///
+    /// PaddleX: Small formula elements should be absorbed into the text flow,
+    /// not kept as separate layout elements.
+    ///
+    /// This function:
+    /// 1. Finds small formula elements that should be inline (not display formulas)
+    /// 2. Clears their text and order_index so the formula element won't be rendered
+    /// 3. The corresponding TextRegion with label="formula" (already created in structure.rs)
+    ///    will become an orphan and be handled with proper $...$ wrapping
+    fn inject_inline_formulas(
+        elements: &mut [LayoutElement],
+        _text_regions: &mut Vec<TextRegion>,
+        _cfg: &StitchConfig,
+    ) {
+        use oar_ocr_core::domain::structure::LayoutElementType;
+
+        let mut inline_formula_indices: Vec<usize> = Vec::new();
+
+        // Size threshold: formulas smaller than 80k pixels² are likely inline
+        const INLINE_FORMULA_MAX_AREA: f32 = 80000.0;
+
+        for (idx, element) in elements.iter().enumerate() {
+            if element.element_type != LayoutElementType::Formula {
+                continue;
+            }
+
+            // Only process formulas that have text
+            let formula_text = if let Some(text) = &element.text {
+                if !text.is_empty() {
+                    text
+                } else {
+                    continue;
+                }
+            } else {
+                continue;
+            };
+
+            let formula_area = element.bbox.area();
+            tracing::debug!(
+                "Formula idx {}: area={:.1}, text={}",
+                idx,
+                formula_area,
+                formula_text
+            );
+
+            // Small formulas are treated as inline
+            if formula_area < INLINE_FORMULA_MAX_AREA {
+                inline_formula_indices.push(idx);
+                tracing::debug!(
+                    "Marking formula idx {} as inline (area {:.1} < {})",
+                    idx,
+                    formula_area,
+                    INLINE_FORMULA_MAX_AREA
+                );
+            }
+        }
+
+        // Clear inline formula elements so they won't be rendered separately
+        for idx in &inline_formula_indices {
+            if let Some(element) = elements.get_mut(*idx) {
+                tracing::debug!(
+                    "Clearing inline formula idx {} to use TextRegion with label=formula",
+                    idx
+                );
+                element.text = None;
+                element.order_index = None;
+            }
+        }
+
+        if !inline_formula_indices.is_empty() {
+            tracing::debug!("Marked {} formulas as inline", inline_formula_indices.len());
+        }
+    }
+
     fn stitch_layout_elements(
         elements: &mut [LayoutElement],
         text_regions: &[TextRegion],
@@ -1301,6 +1595,73 @@ impl ResultStitcher {
                     element.element_type,
                     element_texts.len()
                 );
+
+                // Debug: log all text regions being joined
+                for (region, text) in &element_texts {
+                    tracing::debug!("  - region with label={:?}, text={:?}", region.label, text);
+                }
+
+                // Compute seg metadata (seg_start_x, seg_end_x, num_lines) for get_seg_flag.
+                // Sort a copy to find first/last spans and count lines.
+                let mut sorted_for_meta = element_texts.clone();
+                sorted_for_meta.sort_by(|(r1, _), (r2, _)| {
+                    r1.bounding_box
+                        .center()
+                        .y
+                        .partial_cmp(&r2.bounding_box.center().y)
+                        .unwrap_or(Ordering::Equal)
+                });
+                let mut lines = Vec::new();
+                let mut current_line = Vec::new();
+                for item in std::mem::take(&mut sorted_for_meta) {
+                    if current_line.is_empty() {
+                        current_line.push(item);
+                    } else {
+                        let first_in_line = &current_line[0].0.bounding_box;
+                        if Self::is_same_text_line_bbox(first_in_line, &item.0.bounding_box, cfg) {
+                            current_line.push(item);
+                        } else {
+                            current_line.sort_by(|(r1, _), (r2, _)| {
+                                r1.bounding_box
+                                    .center()
+                                    .x
+                                    .partial_cmp(&r2.bounding_box.center().x)
+                                    .unwrap_or(Ordering::Equal)
+                            });
+                            lines.push(current_line);
+                            current_line = vec![item];
+                        }
+                    }
+                }
+                if !current_line.is_empty() {
+                    current_line.sort_by(|(r1, _), (r2, _)| {
+                        r1.bounding_box
+                            .center()
+                            .x
+                            .partial_cmp(&r2.bounding_box.center().x)
+                            .unwrap_or(Ordering::Equal)
+                    });
+                    lines.push(current_line);
+                }
+                for mut line in lines {
+                    sorted_for_meta.append(&mut line);
+                }
+
+                // seg_start_x: first span's left edge (PaddleX: line[0].spans[0].box[0])
+                element.seg_start_x = Some(sorted_for_meta[0].0.bounding_box.x_min());
+                // seg_end_x: last span's right edge (PaddleX: line[-1].spans[-1].box[2])
+                element.seg_end_x = Some(sorted_for_meta.last().unwrap().0.bounding_box.x_max());
+
+                // Count distinct lines (Y-groups)
+                let mut num_lines = 1u32;
+                let mut prev_bbox = &sorted_for_meta[0].0.bounding_box;
+                for (region, _) in &sorted_for_meta[1..] {
+                    if !Self::is_same_text_line_bbox(prev_bbox, &region.bounding_box, cfg) {
+                        num_lines += 1;
+                        prev_bbox = &region.bounding_box;
+                    }
+                }
+                element.num_lines = Some(num_lines);
             }
 
             Self::sort_and_join_texts(&mut element_texts, Some(&element.bbox), cfg, |joined| {
@@ -1309,6 +1670,79 @@ impl ResultStitcher {
         }
     }
 
+    /// Fills formula layout elements with LaTeX content from formula recognition results.
+    ///
+    /// This ensures formula elements have correct content even if OCR matching
+    /// thresholds prevented proper association.
+    fn fill_formula_elements(
+        elements: &mut [LayoutElement],
+        formulas: &[FormulaResult],
+        _cfg: &StitchConfig,
+    ) {
+        for element in elements.iter_mut() {
+            if element.element_type != LayoutElementType::Formula {
+                continue;
+            }
+
+            // Skip if element already has content from OCR matching
+            if element.text.is_some() {
+                continue;
+            }
+
+            // Find the best matching formula result by bidirectional IoA.
+            // IoA (intersection / self_area) is much more permissive than IoU for
+            // size-mismatched bboxes. PaddleX uses simple intersection overlap (>3px).
+            let mut best_formula: Option<&FormulaResult> = None;
+            let mut best_score = 0.0f32;
+
+            for formula in formulas {
+                let ioa_element = element.bbox.ioa(&formula.bbox);
+                let ioa_formula = formula.bbox.ioa(&element.bbox);
+                let score = ioa_element.max(ioa_formula);
+                if score > best_score {
+                    best_score = score;
+                    best_formula = Some(formula);
+                }
+            }
+
+            // Fallback: if no IoA match, try center-containment matching.
+            // Find formula whose center is within the element bbox (or vice versa).
+            if best_score < 0.05 {
+                let elem_center = element.bbox.center();
+                let mut best_dist = f32::MAX;
+
+                for formula in formulas {
+                    let fc = formula.bbox.center();
+                    let fc_inside = fc.x >= element.bbox.x_min()
+                        && fc.x <= element.bbox.x_max()
+                        && fc.y >= element.bbox.y_min()
+                        && fc.y <= element.bbox.y_max();
+                    let ec_inside = elem_center.x >= formula.bbox.x_min()
+                        && elem_center.x <= formula.bbox.x_max()
+                        && elem_center.y >= formula.bbox.y_min()
+                        && elem_center.y <= formula.bbox.y_max();
+
+                    if fc_inside || ec_inside {
+                        let dx = fc.x - elem_center.x;
+                        let dy = fc.y - elem_center.y;
+                        let dist = dx * dx + dy * dy;
+                        if dist < best_dist {
+                            best_dist = dist;
+                            best_formula = Some(formula);
+                            best_score = 0.05;
+                        }
+                    }
+                }
+            }
+
+            if best_score >= 0.05
+                && let Some(formula) = best_formula
+            {
+                element.text = Some(formula.latex.clone());
+            }
+        }
+    }
+
     /// Checks if two bounding boxes overlap significantly (intersection dimensions > 3px).
     /// Matches `get_overlap_boxes_idx` logic.
     fn is_overlapping(bbox1: &BoundingBox, bbox2: &BoundingBox, cfg: &StitchConfig) -> bool {
@@ -1333,6 +1767,30 @@ impl ResultStitcher {
         inter_w > cfg.overlap_min_pixels && inter_h > cfg.overlap_min_pixels
     }
 
+    /// Checks whether two OCR spans should be grouped into the same visual line.
+    ///
+    /// Primary signal follows PaddleX-style line-height overlap:
+    /// vertical_overlap / min(height1, height2) >= threshold.
+    /// A small adaptive center-Y fallback is kept for robustness on noisy boxes.
+    fn is_same_text_line_bbox(
+        bbox1: &BoundingBox,
+        bbox2: &BoundingBox,
+        cfg: &StitchConfig,
+    ) -> bool {
+        let h1 = (bbox1.y_max() - bbox1.y_min()).max(1.0);
+        let h2 = (bbox2.y_max() - bbox2.y_min()).max(1.0);
+        let inter_h =
+            (bbox1.y_max().min(bbox2.y_max()) - bbox1.y_min().max(bbox2.y_min())).max(0.0);
+        let overlap_ratio = inter_h / h1.min(h2);
+        if overlap_ratio >= cfg.line_height_iou_threshold {
+            return true;
+        }
+
+        let adaptive_tol = (h1.min(h2) * 0.5).max(1.0);
+        let center_delta = (bbox1.center().y - bbox2.center().y).abs();
+        center_delta <= adaptive_tol.max(cfg.same_line_y_tolerance * 0.25)
+    }
+
     fn sort_and_join_texts<F>(
         texts: &mut Vec<(&TextRegion, &str)>,
         container_bbox: Option<&BoundingBox>,
@@ -1347,92 +1805,172 @@ impl ResultStitcher {
 
         // Sort spatially: top-to-bottom, then left-to-right
         texts.sort_by(|(r1, _), (r2, _)| {
-            let c1 = r1.bounding_box.center();
-            let c2 = r2.bounding_box.center();
-
-            // Y-difference tolerance for same line (10 pixels)
-            if (c1.y - c2.y).abs() < cfg.same_line_y_tolerance {
-                c1.x.partial_cmp(&c2.x).unwrap_or(Ordering::Equal)
+            r1.bounding_box
+                .center()
+                .y
+                .partial_cmp(&r2.bounding_box.center().y)
+                .unwrap_or(Ordering::Equal)
+        });
+        let mut lines = Vec::new();
+        let mut current_line = Vec::new();
+        for item in std::mem::take(texts) {
+            if current_line.is_empty() {
+                current_line.push(item);
             } else {
-                c1.y.partial_cmp(&c2.y).unwrap_or(Ordering::Equal)
+                let first_in_line = &current_line[0].0.bounding_box;
+                if Self::is_same_text_line_bbox(first_in_line, &item.0.bounding_box, cfg) {
+                    current_line.push(item);
+                } else {
+                    current_line.sort_by(|(r1, _), (r2, _)| {
+                        r1.bounding_box
+                            .center()
+                            .x
+                            .partial_cmp(&r2.bounding_box.center().x)
+                            .unwrap_or(Ordering::Equal)
+                    });
+                    lines.push(current_line);
+                    current_line = vec![item];
+                }
             }
-        });
+        }
+        if !current_line.is_empty() {
+            current_line.sort_by(|(r1, _), (r2, _)| {
+                r1.bounding_box
+                    .center()
+                    .x
+                    .partial_cmp(&r2.bounding_box.center().x)
+                    .unwrap_or(Ordering::Equal)
+            });
+            lines.push(current_line);
+        }
+        for mut line in lines {
+            texts.append(&mut line);
+        }
 
         // Smart text joining following format_line logic:
         // - Texts on the same line are joined directly (no separator)
         // - A space is added only if the previous text ends with an English letter
         // - Newlines are added conditionally based on geometric gap (paragraph break detection)
         let mut result = String::new();
-        let mut prev_y: Option<f32> = None;
         let mut prev_region: Option<&TextRegion> = None;
 
+        tracing::debug!(
+            "sort_and_join_texts: processing {} text regions",
+            texts.len()
+        );
+
         for (region, text) in texts.iter() {
             if text.is_empty() {
                 continue;
             }
 
-            let current_y = region.bounding_box.center().y;
-
-            if let Some(py) = prev_y {
-                // Check if this is a new line (Y-difference > tolerance)
-                if (current_y - py).abs() > cfg.same_line_y_tolerance {
+            if let Some(last_region) = prev_region {
+                if !Self::is_same_text_line_bbox(
+                    &last_region.bounding_box,
+                    &region.bounding_box,
+                    cfg,
+                ) {
                     // New visual line detected.
-                    // Check for hyphenation: if previous text ends with '-' and current starts with lowercase,
-                    // this is likely a word break that should be joined without the hyphen.
-                    let prev_ends_hyphen = result.ends_with('-');
-                    let current_starts_lower =
-                        text.chars().next().is_some_and(|c| c.is_lowercase());
+                    // Decide whether to insert '\n' (hard break) or ' ' (soft break/wrap).
+                    let mut add_newline = false;
+                    let mut is_line_wrap = false;
+
+                    if let Some(container) = container_bbox {
+                        let container_width = container.x_max() - container.x_min();
+                        let right_gap = container.x_max() - last_region.bounding_box.x_max();
+                        let tail_char = last_non_whitespace_char(&result);
+                        let ends_with_non_break_punct =
+                            tail_char.is_some_and(is_non_break_line_end_punctuation);
+                        // PaddleX: English lines use a larger right-gap threshold.
+                        let paragraph_gap_ratio =
+                            if tail_char.is_some_and(|c| c.is_ascii_alphabetic()) {
+                                0.5
+                            } else {
+                                0.3
+                            };
+
+                        if !ends_with_non_break_punct
+                            && right_gap > container_width * paragraph_gap_ratio
+                        {
+                            // Previous line ended far from the right edge → paragraph break.
+                            add_newline = true;
+                        } else {
+                            // Previous line extends close to the right edge → line wrap.
+                            is_line_wrap = true;
+                        }
+                    }
 
-                    if prev_ends_hyphen && current_starts_lower {
-                        // Remove the trailing hyphen and join directly (dehyphenation)
+                    // Dehyphenation: only strip trailing hyphen when the previous line
+                    // is a wrapped line (extends close to container right edge).
+                    // This preserves hyphens in compound words like "real-time",
+                    // "end-to-end", "one-to-many" that end short lines.
+                    // Matches PaddleX format_line behavior where hyphens are stripped
+                    // at line-wrap boundaries.
+                    let prev_ends_hyphen = result.ends_with('-');
+                    if prev_ends_hyphen && is_line_wrap {
+                        // Line wraps at hyphen → word-break hyphen, remove it
                         result.pop();
                         // Don't add any separator - words should be joined
+                    } else if add_newline {
+                        if !result.ends_with('\n') {
+                            result.push('\n');
+                        }
                     } else {
-                        // Decide whether to insert '\n' (hard break) or ' ' (soft break/wrap).
-                        let mut add_newline = false;
-
-                        if let Some(container) = container_bbox
-                            && let Some(last_region) = prev_region
+                        // Soft wrap - treat as space if needed (English) or join (CJK)
+                        if let Some(last_char) = result.chars().last()
+                            && last_char != '\n'
+                            && needs_space_after(last_char)
                         {
-                            let container_width = container.x_max() - container.x_min();
-                            // If the previous line ended far from the right edge, it's likely a paragraph break.
-                            // Heuristic: gap > 30% of container width
-                            // Note: We use container.x_max because we assume LTR text.
-                            let right_gap = container.x_max() - last_region.bounding_box.x_max();
-                            if right_gap > container_width * 0.3 {
-                                add_newline = true;
-                            }
-                        }
-                        // If no container info, we default to NO newline (soft wrap) to avoid discontinuity,
-                        // unless specific patterns dictate otherwise (future work).
-
-                        if add_newline {
-                            if !result.ends_with('\n') {
-                                result.push('\n');
-                            }
-                        } else {
-                            // Soft wrap - treat as space if needed (English) or join (CJK)
-                            if let Some(last_char) = result.chars().last()
-                                && last_char != '\n'
-                                && needs_space_after(last_char)
-                            {
-                                result.push(' ');
-                            }
+                            result.push(' ');
                         }
                     }
                 } else {
                     // Same visual line - join with smart spacing
-                    if let Some(last_char) = result.chars().last()
+                    // PaddleX format_line: add space after English letters OR after formulas
+                    let needs_spacing = if let Some(last_char) = result.chars().last()
                         && last_char != '\n'
                         && needs_space_after(last_char)
                     {
+                        true
+                    } else {
+                        // PaddleX: add space after formula when next content is on same line
+                        last_region.is_formula()
+                    };
+
+                    if needs_spacing {
                         result.push(' ');
                     }
                 }
             }
 
-            result.push_str(text);
-            prev_y = Some(current_y);
+            // PaddleX: formula spans are wrapped with $...$ delimiters
+            // Inline formulas (mixed with text on same line): $formula$
+            // Display formulas (standalone line): $$formula$$ (display math)
+            let is_formula = region.is_formula();
+            let text_to_add = if is_formula {
+                // Don't double-wrap if formula model already added delimiters
+                let already_wrapped =
+                    text.starts_with('$') || text.starts_with("\\(") || text.starts_with("\\[");
+                if already_wrapped {
+                    text.to_string()
+                } else {
+                    // Check if this is a display formula (starts a new line with no other content yet on this line)
+                    // Display formulas typically appear at the start of a line after a newline
+                    let is_display = result.is_empty() || result.ends_with('\n');
+
+                    if is_display {
+                        // Display formula: $$...$$
+                        format!("$${}$$", text)
+                    } else {
+                        // Inline formula: $...$
+                        format!("${}$", text)
+                    }
+                }
+            } else {
+                text.to_string()
+            };
+
+            result.push_str(&text_to_add);
             prev_region = Some(region);
         }
 
@@ -1441,10 +1979,44 @@ impl ResultStitcher {
         update_fn(joined);
     }
 
-    /// Sorts layout elements using the XY-cut algorithm.
+    /// Sorts layout elements using the enhanced xycut_enhanced algorithm.
     ///
-    /// When region blocks are not available, this provides a robust column-aware reading
-    /// order that matches PP-StructureV3's `sort_by_xycut` behavior.
+    /// Uses cross-layout detection, direction-aware XY-cut, overlapping box shrinking,
+    /// weighted distance insertion, and child block association for accurate reading order.
+    fn sort_layout_elements_enhanced(
+        elements: &mut Vec<LayoutElement>,
+        page_width: f32,
+        page_height: f32,
+    ) {
+        use oar_ocr_core::processors::layout_sorting::{SortableElement, sort_layout_enhanced};
+
+        if elements.is_empty() {
+            return;
+        }
+
+        let sortable_elements: Vec<_> = elements
+            .iter()
+            .map(|e| SortableElement {
+                bbox: e.bbox.clone(),
+                element_type: e.element_type,
+                num_lines: e.num_lines,
+            })
+            .collect();
+
+        let sorted_indices = sort_layout_enhanced(&sortable_elements, page_width, page_height);
+        if sorted_indices.len() != elements.len() {
+            return;
+        }
+
+        let sorted_elements: Vec<_> = sorted_indices
+            .into_iter()
+            .map(|idx| elements[idx].clone())
+            .collect();
+        *elements = sorted_elements;
+    }
+
+    /// Sorts layout elements using the XY-cut algorithm (legacy fallback).
+    #[allow(dead_code)]
     fn sort_layout_elements(elements: &mut Vec<LayoutElement>, _width: f32, _cfg: &StitchConfig) {
         if elements.len() <= 1 {
             return;
@@ -1477,6 +2049,15 @@ fn needs_space_after(c: char) -> bool {
     c.is_ascii_alphabetic()
 }
 
+fn last_non_whitespace_char(text: &str) -> Option<char> {
+    text.chars().rev().find(|c| !c.is_whitespace())
+}
+
+/// Punctuation that should not trigger hard paragraph breaks across line wraps.
+fn is_non_break_line_end_punctuation(c: char) -> bool {
+    matches!(c, ',' | '，' | '、' | ';' | '；' | ':' | '：')
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1492,6 +2073,7 @@ mod tests {
             confidence: Some(0.9),
             orientation_angle: None,
             word_boxes: None,
+            label: None,
         }
     }
 
@@ -1544,6 +2126,7 @@ mod tests {
             confidence: Some(0.9),
             orientation_angle: None,
             word_boxes: None,
+            label: None,
         };
         let r2 = TextRegion {
             bounding_box: b2.clone(),
@@ -1553,6 +2136,7 @@ mod tests {
             confidence: Some(0.9),
             orientation_angle: None,
             word_boxes: None,
+            label: None,
         };
         let mut texts = vec![(&r1, "A"), (&r2, "B")];
         let cfg = StitchConfig::default();
@@ -1563,6 +2147,70 @@ mod tests {
         assert_eq!(joined, "A B");
     }
 
+    #[test]
+    fn test_sort_and_join_texts_english_line_uses_larger_paragraph_gap_threshold() {
+        let r1 = make_region(BoundingBox::from_coords(0.0, 0.0, 60.0, 10.0), "Line");
+        let r2 = make_region(BoundingBox::from_coords(0.0, 20.0, 40.0, 30.0), "next");
+        let mut texts = vec![(&r1, "Line"), (&r2, "next")];
+        let cfg = StitchConfig::default();
+        let container = BoundingBox::from_coords(0.0, 0.0, 100.0, 40.0);
+        let mut joined = String::new();
+        ResultStitcher::sort_and_join_texts(&mut texts, Some(&container), &cfg, |j| joined = j);
+        assert_eq!(joined, "Line next");
+    }
+
+    #[test]
+    fn test_sort_and_join_texts_non_english_tail_keeps_original_paragraph_gap_threshold() {
+        let r1 = make_region(BoundingBox::from_coords(0.0, 0.0, 60.0, 10.0), "2024");
+        let r2 = make_region(BoundingBox::from_coords(0.0, 20.0, 40.0, 30.0), "next");
+        let mut texts = vec![(&r1, "2024"), (&r2, "next")];
+        let cfg = StitchConfig::default();
+        let container = BoundingBox::from_coords(0.0, 0.0, 100.0, 40.0);
+        let mut joined = String::new();
+        ResultStitcher::sort_and_join_texts(&mut texts, Some(&container), &cfg, |j| joined = j);
+        assert_eq!(joined, "2024\nnext");
+    }
+
+    #[test]
+    fn test_sort_and_join_texts_non_break_punctuation_suppresses_newline() {
+        let r1 = make_region(BoundingBox::from_coords(0.0, 0.0, 20.0, 10.0), "Note:");
+        let r2 = make_region(BoundingBox::from_coords(0.0, 20.0, 40.0, 30.0), "next");
+        let mut texts = vec![(&r1, "Note:"), (&r2, "next")];
+        let cfg = StitchConfig::default();
+        let container = BoundingBox::from_coords(0.0, 0.0, 100.0, 40.0);
+        let mut joined = String::new();
+        ResultStitcher::sort_and_join_texts(&mut texts, Some(&container), &cfg, |j| joined = j);
+        assert_eq!(joined, "Note:next");
+    }
+
+    #[test]
+    fn test_normalize_checkbox_symbols_in_table_checkbox_like() {
+        let mut cells = vec![
+            TableCell::new(BoundingBox::from_coords(0.0, 0.0, 10.0, 10.0), 1.0).with_text("ü"),
+            TableCell::new(BoundingBox::from_coords(10.0, 0.0, 20.0, 10.0), 1.0).with_text("X"),
+            TableCell::new(BoundingBox::from_coords(20.0, 0.0, 30.0, 10.0), 1.0).with_text("L"),
+        ];
+
+        ResultStitcher::normalize_checkbox_symbols_in_table(&mut cells);
+
+        assert_eq!(cells[0].text.as_deref(), Some("✓"));
+        assert_eq!(cells[1].text.as_deref(), Some("✗"));
+        assert_eq!(cells[2].text.as_deref(), Some("✓"));
+    }
+
+    #[test]
+    fn test_normalize_checkbox_symbols_in_table_keeps_ambiguous_when_not_checkbox_like() {
+        let mut cells = vec![
+            TableCell::new(BoundingBox::from_coords(0.0, 0.0, 10.0, 10.0), 1.0).with_text("L"),
+            TableCell::new(BoundingBox::from_coords(10.0, 0.0, 20.0, 10.0), 1.0).with_text("A"),
+        ];
+
+        ResultStitcher::normalize_checkbox_symbols_in_table(&mut cells);
+
+        assert_eq!(cells[0].text.as_deref(), Some("L"));
+        assert_eq!(cells[1].text.as_deref(), Some("A"));
+    }
+
     #[test]
     fn test_find_row_start_index_with_compact_td_tokens() {
         let tokens = vec![
@@ -1632,6 +2280,7 @@ mod tests {
             &structure_tokens,
             &ocr_candidates,
             10.0,
+            None,
         )
         .expect("expected row-aware matching result");
 
diff --git a/src/oarocr/structure.rs b/src/oarocr/structure.rs
index 5a6dfcc..a1b0e31 100644
--- a/src/oarocr/structure.rs
+++ b/src/oarocr/structure.rs
@@ -1049,6 +1049,7 @@ impl OARStructureBuilder {
         // PP-StructureV3 overall OCR uses DB preprocess with:
         // - limit_side_len=736
         // - limit_type="min"
+        // - max_side_limit=4000
         // We fill these defaults here (only for the structure pipeline) unless the caller
         // explicitly overrides them via `text_detection_config`.
         let text_detection_adapter = if let Some(ref model_path) = self.text_detection_model {
@@ -1057,12 +1058,29 @@ impl OARStructureBuilder {
             // Note: image_batch_size batching not yet implemented for structure analysis
 
             let mut effective_cfg = self.text_detection_config.clone().unwrap_or_default();
+
+            // Table-heavy documents are sensitive to detection fragmentation.
+            // Match PaddleX's lower table-scene threshold when users don't override config.
+            let has_table_pipeline = self.table_classification_model.is_some()
+                || self.table_structure_recognition_model.is_some()
+                || self.wired_table_structure_model.is_some()
+                || self.wireless_table_structure_model.is_some()
+                || self.table_cell_detection_model.is_some()
+                || self.wired_table_cell_model.is_some()
+                || self.wireless_table_cell_model.is_some();
+            if self.text_detection_config.is_none() && has_table_pipeline {
+                effective_cfg.box_threshold = 0.4;
+            }
+
             if effective_cfg.limit_side_len.is_none() {
                 effective_cfg.limit_side_len = Some(736);
             }
             if effective_cfg.limit_type.is_none() {
                 effective_cfg.limit_type = Some(crate::processors::LimitType::Min);
             }
+            if effective_cfg.max_side_len.is_none() {
+                effective_cfg.max_side_len = Some(4000);
+            }
             builder = builder.with_config(effective_cfg);
 
             if let Some(ref ort_config) = self.ort_session_config {
@@ -1303,6 +1321,7 @@ impl OARStructure {
                         confidence: Some(score),
                         orientation_angle: None,
                         word_boxes: None,
+                        label: None,
                     });
                 }
             }
@@ -1364,6 +1383,7 @@ impl OARStructure {
                         confidence: Some(*score),
                         orientation_angle: None,
                         word_boxes: None,
+                        label: None,
                     });
                 }
             }
@@ -1601,6 +1621,7 @@ impl OARStructure {
                         confidence: Some(*score),
                         orientation_angle: None,
                         word_boxes: None,
+                        label: None,
                     });
                 }
             }
@@ -1830,7 +1851,7 @@ impl OARStructure {
         page_width: f32,
         page_height: f32,
     ) {
-        use oar_ocr_core::processors::layout_sorting::sort_layout_enhanced;
+        use oar_ocr_core::processors::layout_sorting::{SortableElement, sort_layout_enhanced};
 
         if layout_elements.is_empty() {
             return;
@@ -1838,7 +1859,11 @@ impl OARStructure {
 
         let sortable_elements: Vec<_> = layout_elements
             .iter()
-            .map(|e| (e.bbox.clone(), e.element_type))
+            .map(|e| SortableElement {
+                bbox: e.bbox.clone(),
+                element_type: e.element_type,
+                num_lines: e.num_lines,
+            })
             .collect();
 
         let sorted_indices = sort_layout_enhanced(&sortable_elements, page_width, page_height);
@@ -2172,6 +2197,7 @@ impl OARStructure {
                         confidence: Some(score),
                         orientation_angle: None,
                         word_boxes: None,
+                        label: None,
                     });
                 }
             }
@@ -2322,9 +2348,7 @@ impl OARStructure {
         // - For each OCR box that overlaps >= k table cells, split at cell boundaries
         // - Re-run recognition on each split crop
         // - Replace the original OCR box with the split boxes + texts
-        let has_detection_backed_table_cells = tables
-            .iter()
-            .any(|table| table.cells.iter().any(|cell| cell.confidence < 0.999));
+        let has_detection_backed_table_cells = tables.iter().any(|table| !table.is_e2e);
         if has_detection_backed_table_cells
             && !text_regions.is_empty()
             && let Some(ref text_rec_adapter) = self.pipeline.text_recognition_adapter
@@ -2411,6 +2435,23 @@ impl OARStructure {
             }
         }
 
+        // PaddleX: convert_formula_res_to_ocr_format — inject formula results into
+        // the overall OCR pool so they participate in normal block matching and table
+        // cell matching. The raw LaTeX text is used here (no $...$ wrapping);
+        // wrapping is handled by to_markdown() for formula elements, by
+        // stitch_tables() for table cells, and by sort_and_join_texts for inline formulas.
+        for formula in &formulas {
+            let w = formula.bbox.x_max() - formula.bbox.x_min();
+            let h = formula.bbox.y_max() - formula.bbox.y_min();
+            if w > 1.0 && h > 1.0 {
+                let mut region = crate::oarocr::TextRegion::new(formula.bbox.clone());
+                region.text = Some(formula.latex.clone().into());
+                region.confidence = Some(1.0);
+                region.label = Some("formula".into()); // Mark as formula for inline wrapping
+                text_regions.push(region);
+            }
+        }
+
         // Construct and return result
         // Ensure rectified_img is always set for markdown image extraction
         // If no rectification was applied, use current_image
diff --git a/src/oarocr/table_analyzer.rs b/src/oarocr/table_analyzer.rs
index 5114066..87de6d3 100644
--- a/src/oarocr/table_analyzer.rs
+++ b/src/oarocr/table_analyzer.rs
@@ -406,36 +406,49 @@ impl<'a> TableAnalyzer<'a> {
                 .or(self.wired_table_structure_adapter),
         };
 
-        let cell_adapter: Option<&TableCellDetectionAdapter> = if use_e2e_mode {
-            tracing::info!(
-                target: "structure",
-                table_index = idx,
-                table_type = ?table_type,
-                "Using E2E mode: skipping cell detection"
-            );
-            None
-        } else {
-            tracing::info!(
-                target: "structure",
-                table_index = idx,
-                table_type = ?table_type,
-                "Using cell detection mode (E2E disabled)"
-            );
-            match table_type {
-                TableType::Wired => self
-                    .wired_table_cell_adapter
-                    .or(self.table_cell_detection_adapter)
-                    .or(self.wireless_table_cell_adapter),
-                TableType::Wireless => self
-                    .wireless_table_cell_adapter
-                    .or(self.table_cell_detection_adapter)
-                    .or(self.wired_table_cell_adapter),
-                TableType::Unknown => self
-                    .table_cell_detection_adapter
-                    .or(self.wired_table_cell_adapter)
-                    .or(self.wireless_table_cell_adapter),
-            }
-        };
+        // Use cell detection when either:
+        // 1. E2E mode is disabled, OR
+        // 2. use_cells_trans_to_html is enabled (user wants detected cells instead of E2E cells)
+        let cell_adapter: Option<&TableCellDetectionAdapter> =
+            if !use_e2e_mode || use_cells_trans_to_html {
+                if use_cells_trans_to_html {
+                    tracing::info!(
+                        target: "structure",
+                        table_index = idx,
+                        table_type = ?table_type,
+                        "Using cell detection (cells_trans_to_html enabled)"
+                    );
+                } else {
+                    tracing::info!(
+                        target: "structure",
+                        table_index = idx,
+                        table_type = ?table_type,
+                        "Using cell detection mode (E2E disabled)"
+                    );
+                }
+                match table_type {
+                    TableType::Wired => self
+                        .wired_table_cell_adapter
+                        .or(self.table_cell_detection_adapter)
+                        .or(self.wireless_table_cell_adapter),
+                    TableType::Wireless => self
+                        .wireless_table_cell_adapter
+                        .or(self.table_cell_detection_adapter)
+                        .or(self.wired_table_cell_adapter),
+                    TableType::Unknown => self
+                        .table_cell_detection_adapter
+                        .or(self.wired_table_cell_adapter)
+                        .or(self.wireless_table_cell_adapter),
+                }
+            } else {
+                tracing::info!(
+                    target: "structure",
+                    table_index = idx,
+                    table_type = ?table_type,
+                    "Using E2E mode: skipping cell detection"
+                );
+                None
+            };
 
         let mut structure_tokens_opt: Option<Vec<String>> = None;
         let mut structure_score_opt: Option<f32> = None;
@@ -601,6 +614,7 @@ impl<'a> TableAnalyzer<'a> {
             }
         }
 
+        // Use detected cells when in cells_trans_to_html mode (non-E2E)
         if use_cells_trans_to_html && !use_e2e_mode && !detected_bboxes_crop.is_empty() {
             cells = detected_bboxes_crop
                 .iter()
@@ -610,7 +624,64 @@ impl<'a> TableAnalyzer<'a> {
                     TableCell::new(bbox, *score)
                 })
                 .collect();
-        } else if !detected_bboxes_crop.is_empty() && !cells.is_empty() {
+            // Clear structure tokens so that the code below regenerates them
+            // from the detected cell positions with proper grid info.
+            structure_tokens_opt = None;
+        }
+
+        // Approach C: In non-E2E mode with cell detection results, store detected
+        // bboxes in page coordinates for the stitcher's row-aware IoA-based matcher.
+        // The structure cells (in `cells`) retain grid metadata (row, col, span);
+        // detected bboxes travel separately for better OCR matching geometry.
+        let detected_page_bboxes: Option<Vec<BoundingBox>> =
+            if !use_e2e_mode && !use_cells_trans_to_html && !detected_bboxes_crop.is_empty() {
+                Some(
+                    detected_bboxes_crop
+                        .iter()
+                        .map(|bbox_crop| bbox_crop.translate(table_x_offset, table_y_offset))
+                        .collect(),
+                )
+            } else {
+                None
+            };
+
+        // If we have cells but no structure tokens, generate structure from cell positions.
+        // This ensures cells are ordered correctly to match the generated tokens.
+        if !cells.is_empty() && structure_tokens_opt.is_none() {
+            let cell_bboxes_crop: Vec<_> = cells
+                .iter()
+                .map(|c| {
+                    BoundingBox::from_coords(
+                        c.bbox.x_min() - table_x_offset,
+                        c.bbox.y_min() - table_y_offset,
+                        c.bbox.x_max() - table_x_offset,
+                        c.bbox.y_max() - table_y_offset,
+                    )
+                })
+                .collect();
+
+            if let Some((generated_tokens, cell_order)) =
+                table_cells_to_html_structure(&cell_bboxes_crop, 5.0)
+            {
+                let mut reordered_cells = Vec::with_capacity(cell_order.len());
+                for (source_idx, grid_info) in cell_order {
+                    if let Some(source_cell) = cells.get(source_idx) {
+                        let mut cell = source_cell.clone();
+                        cell = cell
+                            .with_position(grid_info.row, grid_info.col)
+                            .with_span(grid_info.row_span, grid_info.col_span);
+                        reordered_cells.push(cell);
+                    }
+                }
+                if !reordered_cells.is_empty() {
+                    cells = reordered_cells;
+                    structure_tokens_opt = Some(generated_tokens);
+                }
+            }
+        }
+
+        // Fallback: if we have detected cells but no cells yet, try to generate from detected boxes
+        if !detected_bboxes_crop.is_empty() && cells.is_empty() {
             let structure_bboxes_crop: Vec<_> = cells
                 .iter()
                 .map(|c| {
@@ -753,7 +824,8 @@ impl<'a> TableAnalyzer<'a> {
         let mut final_result = TableResult::new(table_bbox.clone(), table_type)
             .with_cells(cells)
             .with_html_structure(html_structure)
-            .with_structure_tokens(structure_tokens);
+            .with_structure_tokens(structure_tokens)
+            .with_e2e(use_e2e_mode);
 
         if let Some(score) = structure_score_opt {
             final_result = final_result.with_structure_confidence(score);
@@ -763,6 +835,10 @@ impl<'a> TableAnalyzer<'a> {
             final_result = final_result.with_classification_confidence(conf);
         }
 
+        if let Some(detected_bboxes) = detected_page_bboxes {
+            final_result = final_result.with_detected_cell_bboxes(detected_bboxes);
+        }
+
         Ok(Some(final_result))
     }
 }
@@ -881,12 +957,12 @@ mod tests {
         // Transform back to original
         let original = cell_bbox.rotate_back_to_original(90.0, rotated_width, rotated_height);
 
-        // For 90° rotation: (x, y) -> (rotated_height - 1 - y, x)
+        // For 90° rotation: (x, y) -> (rotated_height - y, x)
         // Original points: (10, 20), (30, 20), (30, 40), (10, 40)
-        // Expected: (179, 10), (179, 30), (159, 30), (159, 10)
-        assert!((original.x_min() - 159.0).abs() < 0.01);
+        // Expected corners in original space: (160, 10), (180, 10), (180, 30), (160, 30)
+        assert!((original.x_min() - 160.0).abs() < 0.01);
         assert!((original.y_min() - 10.0).abs() < 0.01);
-        assert!((original.x_max() - 179.0).abs() < 0.01);
+        assert!((original.x_max() - 180.0).abs() < 0.01);
         assert!((original.y_max() - 30.0).abs() < 0.01);
     }
 
@@ -896,14 +972,15 @@ mod tests {
         let rotated_height = 200;
 
         let cell_bbox = BoundingBox::from_coords(10.0, 20.0, 30.0, 40.0);
-        let original = cell_bbox.rotate_back_to_original(180.0, rotated_width, rotated_height);
-
-        // For 180° rotation: (x, y) -> (rotated_width - 1 - x, rotated_height - 1 - y)
-        // Expected corners: (69, 159), (89, 159), (89, 179), (69, 179)
-        assert!((original.x_min() - 69.0).abs() < 0.01);
-        assert!((original.y_min() - 159.0).abs() < 0.01);
-        assert!((original.x_max() - 89.0).abs() < 0.01);
-        assert!((original.y_max() - 179.0).abs() < 0.01);
+        let original =
+            cell_bbox.rotate_back_to_original(180.0, rotated_width, rotated_height as u32);
+
+        // For 180° rotation: (x, y) -> (rotated_width - x, rotated_height - y)
+        // Expected corners in original: (70, 160), (90, 160), (90, 180), (70, 180)
+        assert!((original.x_min() - 70.0).abs() < 0.01);
+        assert!((original.y_min() - 160.0).abs() < 0.01);
+        assert!((original.x_max() - 90.0).abs() < 0.01);
+        assert!((original.y_max() - 180.0).abs() < 0.01);
     }
 
     #[test]
@@ -914,12 +991,12 @@ mod tests {
         let cell_bbox = BoundingBox::from_coords(10.0, 20.0, 30.0, 40.0);
         let original = cell_bbox.rotate_back_to_original(270.0, rotated_width, rotated_height);
 
-        // For 270° rotation: (x, y) -> (y, rotated_width - 1 - x)
-        // Expected corners: (20, 69), (40, 69), (40, 89), (20, 89)
+        // For 270° rotation: (x, y) -> (y, rotated_width - x)
+        // Expected corners: (20, 70), (40, 70), (40, 90), (20, 90)
         assert!((original.x_min() - 20.0).abs() < 0.01);
-        assert!((original.y_min() - 69.0).abs() < 0.01);
+        assert!((original.y_min() - 70.0).abs() < 0.01);
         assert!((original.x_max() - 40.0).abs() < 0.01);
-        assert!((original.y_max() - 89.0).abs() < 0.01);
+        assert!((original.y_max() - 90.0).abs() < 0.01);
     }
 
     #[test]

From a6bc37e507a376a409b6f19cb9d6d18fc8f780f8 Mon Sep 17 00:00:00 2001
From: Wang Xin <xinwang614@gmail.com>
Date: Sat, 7 Mar 2026 01:14:27 +0000
Subject: [PATCH 2/4] fix(deps): update hayro to version 0.5 and ort to version
 2.0.0-rc.12; adjust usage in PDF processing and ONNX session handling

---
 Cargo.toml                                            | 2 +-
 examples/utils/pdf.rs                                 | 8 +++++---
 oar-ocr-core/Cargo.toml                               | 2 +-
 oar-ocr-core/src/core/inference/ort_infer_builders.rs | 4 ++--
 oar-ocr-core/src/core/inference/session.rs            | 4 ++--
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 01fcd50..2020965 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,5 +66,5 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 clap = { version = "4.5.42", features = ["derive"] }
 tempfile = "3.19"
 ab_glyph = "0.2"
-hayro = "0.4"
+hayro = "0.5"
 regex = "1"
diff --git a/examples/utils/pdf.rs b/examples/utils/pdf.rs
index 7710c0d..c4ca156 100644
--- a/examples/utils/pdf.rs
+++ b/examples/utils/pdf.rs
@@ -5,7 +5,7 @@
 use std::path::Path;
 use std::sync::Arc;
 
-use hayro::Pdf;
+use hayro::hayro_syntax::Pdf;
 
 /// Error type for PDF processing.
 #[derive(Debug)]
@@ -118,15 +118,17 @@ impl PdfDocument {
             None => 2.0, // Default scale factor for better quality
         };
 
-        // Create render settings
+        // Create render settings (hayro 0.5 defaults bg_color to TRANSPARENT;
+        // we need WHITE so the RGBA→RGB conversion produces a white background)
         let settings = RenderSettings {
             x_scale: scale,
             y_scale: scale,
+            bg_color: hayro::vello_cpu::color::palette::css::WHITE,
             ..Default::default()
         };
 
         // Render the page using hayro's render function
-        let interpreter_settings = hayro::InterpreterSettings::default();
+        let interpreter_settings = hayro::hayro_interpret::InterpreterSettings::default();
         let pixmap = hayro::render(page, &interpreter_settings, &settings);
 
         // Convert pixmap to RGB image
diff --git a/oar-ocr-core/Cargo.toml b/oar-ocr-core/Cargo.toml
index 04f0544..5f9761a 100644
--- a/oar-ocr-core/Cargo.toml
+++ b/oar-ocr-core/Cargo.toml
@@ -39,7 +39,7 @@ regex = "1.11.1"
 serde = { version = "1.0", features = ["derive", "rc"] }
 serde_json = "1.0"
 toml = "1.0"
-ort = { version = "2.0.0-rc.11", default-features = false, features = [ "std", "ndarray", "tracing", "tls-native", "copy-dylibs" ] }
+ort = { version = "2.0.0-rc.12", default-features = false, features = [ "std", "ndarray", "tracing", "tls-native", "copy-dylibs" ] }
 ndarray = "0.17"
 nalgebra = "0.34"
 rayon = "1.8"
diff --git a/oar-ocr-core/src/core/inference/ort_infer_builders.rs b/oar-ocr-core/src/core/inference/ort_infer_builders.rs
index 3759f64..921a56b 100644
--- a/oar-ocr-core/src/core/inference/ort_infer_builders.rs
+++ b/oar-ocr-core/src/core/inference/ort_infer_builders.rs
@@ -10,7 +10,7 @@ impl OrtInfer {
         let path = model_path.as_ref();
         let session = session::load_session_with(
             path,
-            |builder| builder.with_log_level(LogLevel::Error),
+            |builder| Ok(builder.with_log_level(LogLevel::Error)?),
             Some("verify model path and compatibility with selected execution providers"),
         )?;
         let model_name = "unknown_model".to_string();
@@ -38,7 +38,7 @@ impl OrtInfer {
                 if let Some(cfg) = &common.ort_session {
                     Self::apply_ort_config(builder, cfg)
                 } else {
-                    builder.with_log_level(LogLevel::Error)
+                    Ok(builder.with_log_level(LogLevel::Error)?)
                 }
             },
             Some("check device/EP configuration and model file"),
diff --git a/oar-ocr-core/src/core/inference/session.rs b/oar-ocr-core/src/core/inference/session.rs
index 9224a62..348dba4 100644
--- a/oar-ocr-core/src/core/inference/session.rs
+++ b/oar-ocr-core/src/core/inference/session.rs
@@ -11,7 +11,7 @@ const SESSION_CREATION_FAILURE: &str = "failed to create ONNX session";
 pub fn load_session(model_path: impl AsRef<Path>) -> Result<Session, OCRError> {
     load_session_with(
         model_path,
-        |builder| builder.with_log_level(LogLevel::Error),
+        |builder| Ok(builder.with_log_level(LogLevel::Error)?),
         Some("verify model file exists and is readable"),
     )
 }
@@ -27,7 +27,7 @@ where
 {
     let path = model_path.as_ref();
     let builder = Session::builder()?;
-    let builder = configure_builder(builder)?;
+    let mut builder = configure_builder(builder)?;
     let session = builder.commit_from_file(path).map_err(|e| {
         OCRError::model_load_error(path, SESSION_CREATION_FAILURE, suggestion, Some(e))
     })?;

From 1c17e1cbd7bf810aeb76e47e56b33c3e30802006 Mon Sep 17 00:00:00 2001
From: Wang Xin <xinwang614@gmail.com>
Date: Sat, 7 Mar 2026 08:27:11 +0000
Subject: [PATCH 3/4] =?UTF-8?q?fix:=20correct=20cell-index=20mismatch=20an?=
 =?UTF-8?q?d=20cross-layout=20O(n=C2=B3)=20complexity?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 oar-ocr-core/src/processors/layout_sorting.rs | 163 ++++++++++--------
 src/oarocr/stitching.rs                       |  40 +++--
 src/oarocr/table_analyzer.rs                  |  14 +-
 3 files changed, 126 insertions(+), 91 deletions(-)

diff --git a/oar-ocr-core/src/processors/layout_sorting.rs b/oar-ocr-core/src/processors/layout_sorting.rs
index f504bdd..43b8fb3 100644
--- a/oar-ocr-core/src/processors/layout_sorting.rs
+++ b/oar-ocr-core/src/processors/layout_sorting.rs
@@ -305,6 +305,13 @@ fn direction_aware_xycut_sort(blocks: &mut [SortableBlock]) -> Vec<SortableBlock
 /// Cross-layout detection (port of PaddleX `get_layout_structure`).
 ///
 /// Marks blocks that span multiple columns as `CrossLayout`.
+///
+/// The naive algorithm is O(n³). We reduce it to O(n² + k²) per outer block
+/// (where k = |h_neighbors| ≪ n for typical sparse layouts) by precomputing
+/// horizontal-projection overlaps once and building per-block neighbor lists.
+/// Both the 2D bbox overlap and the projection-overlap conditions require
+/// horizontal overlap with `block_idx`, so any block outside its h_neighbors
+/// can never trigger a cross-layout classification and is safely skipped.
 fn detect_cross_layout(blocks: &mut [SortableBlock], _page_width: f32) {
     if blocks.len() < 2 {
         return;
@@ -331,8 +338,6 @@ fn detect_cross_layout(blocks: &mut [SortableBlock], _page_width: f32) {
 
     let n = blocks.len();
 
-    // We need to work with indices to avoid borrow checker issues
-    // Collect block data we need for comparisons
     let block_data: Vec<(BoundingBox, OrderLabel, f32, f32)> = blocks
         .iter()
         .map(|b| {
@@ -347,6 +352,29 @@ fn detect_cross_layout(blocks: &mut [SortableBlock], _page_width: f32) {
 
     let text_line_heights: Vec<f32> = blocks.iter().map(|b| b.text_line_height).collect();
 
+    // Precompute the full horizontal-projection overlap matrix (O(n²)) so that
+    // inner loops can do a single table lookup instead of recomputing the ratio.
+    let h_proj: Vec<Vec<f32>> = (0..n)
+        .map(|i| {
+            (0..n)
+                .map(|j| {
+                    calculate_projection_overlap_ratio(
+                        &block_data[i].0,
+                        &block_data[j].0,
+                        SortDirection::Horizontal,
+                    )
+                })
+                .collect()
+        })
+        .collect();
+
+    // For each block, the set of other blocks that horizontally overlap with it.
+    // Both inner loops only act on blocks in this set, so we iterate only over
+    // neighbors rather than 0..n.
+    let h_neighbors: Vec<Vec<usize>> = (0..n)
+        .map(|i| (0..n).filter(|&j| j != i && h_proj[i][j] > 0.0).collect())
+        .collect();
+
     for block_idx in 0..n {
         if mask_labels.contains(&block_data[block_idx].1) {
             continue;
@@ -354,11 +382,13 @@ fn detect_cross_layout(blocks: &mut [SortableBlock], _page_width: f32) {
 
         let mut mark_block_cross = false;
 
-        for ref_idx in 0..n {
-            if block_idx == ref_idx || mask_labels.contains(&block_data[ref_idx].1) {
+        // Iterate only over blocks that horizontally overlap with block_idx.
+        // Any block without horizontal overlap has bbox_overlap == 0 and
+        // match_proj == 0, so it cannot affect the cross-layout decision.
+        for &ref_idx in &h_neighbors[block_idx] {
+            if mask_labels.contains(&block_data[ref_idx].1) {
                 continue;
             }
-            // Skip already-marked blocks
             if blocks[ref_idx].order_label == OrderLabel::CrossLayout {
                 continue;
             }
@@ -380,84 +410,71 @@ fn detect_cross_layout(blocks: &mut [SortableBlock], _page_width: f32) {
                 }
             }
 
-            // Check projection overlap in primary direction (horizontal)
-            let match_proj = calculate_projection_overlap_ratio(
-                &block_data[block_idx].0,
-                &block_data[ref_idx].0,
-                SortDirection::Horizontal,
-            );
+            // h_proj[block_idx][ref_idx] > 0 is guaranteed by h_neighbors, so
+            // the match_proj > 0 guard from the original is always satisfied here.
 
-            if match_proj > 0.0 {
-                for second_ref_idx in 0..n {
-                    if second_ref_idx == block_idx
-                        || second_ref_idx == ref_idx
-                        || mask_labels.contains(&block_data[second_ref_idx].1)
-                    {
+            // Iterate over the same neighbor set for second_ref: every triggering
+            // condition (bbox_overlap2 > 0.1 or second_match_proj > 0) requires
+            // horizontal overlap with block_idx, which is exactly h_neighbors.
+            for &second_ref_idx in &h_neighbors[block_idx] {
+                if second_ref_idx == ref_idx || mask_labels.contains(&block_data[second_ref_idx].1)
+                {
+                    continue;
+                }
+                if blocks[second_ref_idx].order_label == OrderLabel::CrossLayout {
+                    continue;
+                }
+
+                let bbox_overlap2 = calculate_overlap_ratio(
+                    &block_data[block_idx].0,
+                    &block_data[second_ref_idx].0,
+                );
+
+                if bbox_overlap2 > 0.1 {
+                    if block_data[second_ref_idx].1 == OrderLabel::Vision {
+                        blocks[second_ref_idx].order_label = OrderLabel::CrossLayout;
                         continue;
                     }
-                    if blocks[second_ref_idx].order_label == OrderLabel::CrossLayout {
-                        continue;
+                    if block_data[block_idx].1 == OrderLabel::Vision
+                        || block_data[block_idx].2 < block_data[second_ref_idx].2
+                    {
+                        mark_block_cross = true;
+                        break;
                     }
+                }
 
-                    let bbox_overlap2 = calculate_overlap_ratio(
-                        &block_data[block_idx].0,
-                        &block_data[second_ref_idx].0,
-                    );
-
-                    if bbox_overlap2 > 0.1 {
-                        if block_data[second_ref_idx].1 == OrderLabel::Vision {
-                            blocks[second_ref_idx].order_label = OrderLabel::CrossLayout;
-                            continue;
-                        }
-                        if block_data[block_idx].1 == OrderLabel::Vision
-                            || block_data[block_idx].2 < block_data[second_ref_idx].2
-                        {
-                            mark_block_cross = true;
-                            break;
-                        }
+                // second_match_proj > 0 is guaranteed (second_ref_idx ∈ h_neighbors[block_idx]).
+                // Use precomputed table for ref_match_proj to avoid re-computing.
+                let ref_match_proj = h_proj[ref_idx][second_ref_idx];
+                let secondary_ref_match = calculate_projection_overlap_ratio(
+                    &block_data[ref_idx].0,
+                    &block_data[second_ref_idx].0,
+                    SortDirection::Vertical,
+                );
+
+                if ref_match_proj == 0.0 && secondary_ref_match > 0.0 {
+                    if block_data[block_idx].1 == OrderLabel::Vision {
+                        mark_block_cross = true;
+                        break;
                     }
-
-                    let second_match_proj = calculate_projection_overlap_ratio(
-                        &block_data[block_idx].0,
-                        &block_data[second_ref_idx].0,
-                        SortDirection::Horizontal,
-                    );
-                    let ref_match_proj = calculate_projection_overlap_ratio(
-                        &block_data[ref_idx].0,
-                        &block_data[second_ref_idx].0,
-                        SortDirection::Horizontal,
-                    );
-                    let secondary_ref_match = calculate_projection_overlap_ratio(
-                        &block_data[ref_idx].0,
-                        &block_data[second_ref_idx].0,
-                        SortDirection::Vertical,
-                    );
-
-                    if second_match_proj > 0.0 && ref_match_proj == 0.0 && secondary_ref_match > 0.0
+                    // Both ref blocks are normal text with sufficient width
+                    if block_data[ref_idx].1 == OrderLabel::NormalText
+                        && block_data[second_ref_idx].1 == OrderLabel::NormalText
+                        && block_data[ref_idx].3
+                            > text_line_heights[ref_idx]
+                                * CROSS_LAYOUT_REF_TEXT_BLOCK_WORDS_NUM_THRESHOLD
+                        && block_data[second_ref_idx].3
+                            > text_line_heights[second_ref_idx]
+                                * CROSS_LAYOUT_REF_TEXT_BLOCK_WORDS_NUM_THRESHOLD
                     {
-                        if block_data[block_idx].1 == OrderLabel::Vision {
-                            mark_block_cross = true;
-                            break;
-                        }
-                        // Both ref blocks are normal text with sufficient width
-                        if block_data[ref_idx].1 == OrderLabel::NormalText
-                            && block_data[second_ref_idx].1 == OrderLabel::NormalText
-                            && block_data[ref_idx].3
-                                > text_line_heights[ref_idx]
-                                    * CROSS_LAYOUT_REF_TEXT_BLOCK_WORDS_NUM_THRESHOLD
-                            && block_data[second_ref_idx].3
-                                > text_line_heights[second_ref_idx]
-                                    * CROSS_LAYOUT_REF_TEXT_BLOCK_WORDS_NUM_THRESHOLD
-                        {
-                            mark_block_cross = true;
-                            break;
-                        }
+                        mark_block_cross = true;
+                        break;
                     }
                 }
+            }
 
-                if mark_block_cross {
-                    break;
-                }
+            if mark_block_cross {
+                break;
             }
         }
 
diff --git a/src/oarocr/stitching.rs b/src/oarocr/stitching.rs
index 20e10cd..426bfb8 100644
--- a/src/oarocr/stitching.rs
+++ b/src/oarocr/stitching.rs
@@ -965,18 +965,28 @@ impl ResultStitcher {
         }
 
         // --- Sort cells into rows ---
-        // When detected bboxes are available, sort them (better spatial accuracy)
-        // for the IoA matching loop.
-        let (match_sorted_indices, match_row_flags) = if let Some(det_bboxes) = cell_bboxes_override
-        {
-            let temp_cells: Vec<TableCell> = det_bboxes
-                .iter()
-                .map(|b| TableCell::new(b.clone(), 0.5))
-                .collect();
-            Self::sort_table_cells_boxes(&temp_cells, row_y_tolerance)
-        } else {
-            Self::sort_table_cells_boxes(cells, row_y_tolerance)
-        };
+        // When detected bboxes are available we sort them (better spatial accuracy)
+        // to pick the IoA bbox for OCR matching.  We also independently sort the
+        // structure cells so that the td→cell text-assignment step uses a valid
+        // index into `cells[]`.  Without this separation the det-bbox sort indices
+        // are silently reused as structure-cell indices, misassigning OCR to wrong
+        // cells whenever the two orderings differ.
+        let (match_sorted_indices, cell_sorted_indices, match_row_flags) =
+            if let Some(det_bboxes) = cell_bboxes_override {
+                let temp_cells: Vec<TableCell> = det_bboxes
+                    .iter()
+                    .map(|b| TableCell::new(b.clone(), 0.5))
+                    .collect();
+                let (det_sorted, row_flags) =
+                    Self::sort_table_cells_boxes(&temp_cells, row_y_tolerance);
+                // Sort structure cells independently so their indices stay valid.
+                let (cell_sorted, _) = Self::sort_table_cells_boxes(cells, row_y_tolerance);
+                (det_sorted, cell_sorted, row_flags)
+            } else {
+                let (sorted, row_flags) = Self::sort_table_cells_boxes(cells, row_y_tolerance);
+                // When there is no override the two index lists are identical.
+                (sorted.clone(), sorted, row_flags)
+            };
 
         if match_sorted_indices.is_empty() || match_row_flags.is_empty() {
             return None;
@@ -1061,13 +1071,15 @@ impl ResultStitcher {
 
             // Map td position to the original cell index via sorted ordering.
             // match_aligned[matched_row_idx] + td_index gives the position in the
-            // sorted cell list, and match_sorted_indices maps that back to cells[].
+            // sorted cell list.  Use cell_sorted_indices (indices into cells[])
+            // rather than match_sorted_indices (which may be indices into det_bboxes
+            // when cell_bboxes_override is active).
             let mapped_cell_idx = match_aligned
                 .get(matched_row_idx)
                 .copied()
                 .and_then(|row_start| {
                     let sorted_pos = row_start + td_index;
-                    match_sorted_indices.get(sorted_pos).copied()
+                    cell_sorted_indices.get(sorted_pos).copied()
                 })
                 .filter(|&idx| idx < cells.len());
 
diff --git a/src/oarocr/table_analyzer.rs b/src/oarocr/table_analyzer.rs
index 87de6d3..eeda3a6 100644
--- a/src/oarocr/table_analyzer.rs
+++ b/src/oarocr/table_analyzer.rs
@@ -393,6 +393,12 @@ impl<'a> TableAnalyzer<'a> {
             TableType::Unknown => false,
         };
 
+        // When use_cells_trans_to_html is set it overrides E2E mode: detected
+        // cells are used in place of SLANet structure tokens.  Anything that
+        // conditions on "are we actually in E2E mode?" should use this flag
+        // rather than use_e2e_mode directly.
+        let effective_use_e2e = use_e2e_mode && !use_cells_trans_to_html;
+
         let structure_adapter: Option<&TableStructureRecognitionAdapter> = match table_type {
             TableType::Wired => self
                 .wired_table_structure_adapter
@@ -482,7 +488,7 @@ impl<'a> TableAnalyzer<'a> {
                         }
                     }
                     Err(e) => {
-                        if use_cells_trans_to_html && !use_e2e_mode {
+                        if use_cells_trans_to_html {
                             tracing::warn!(
                                 target: "structure",
                                 table_index = idx,
@@ -508,7 +514,7 @@ impl<'a> TableAnalyzer<'a> {
                 }
             }
             None => {
-                if !use_cells_trans_to_html || use_e2e_mode {
+                if !use_cells_trans_to_html || effective_use_e2e {
                     tracing::warn!(
                         target: "structure",
                         table_index = idx,
@@ -614,8 +620,8 @@ impl<'a> TableAnalyzer<'a> {
             }
         }
 
-        // Use detected cells when in cells_trans_to_html mode (non-E2E)
-        if use_cells_trans_to_html && !use_e2e_mode && !detected_bboxes_crop.is_empty() {
+        // Use detected cells when in cells_trans_to_html mode (overrides E2E).
+        if use_cells_trans_to_html && !detected_bboxes_crop.is_empty() {
             cells = detected_bboxes_crop
                 .iter()
                 .zip(detected_scores.iter())

From b421f4cd21f0ecab7a2f17af6cd7c6ddbfdf75eb Mon Sep 17 00:00:00 2001
From: Wang Xin <xinwang614@gmail.com>
Date: Sat, 7 Mar 2026 10:46:19 +0000
Subject: [PATCH 4/4] ix: escape dollar signs in math blocks and remove stale
 allow(dead_code)

---
 oar-ocr-core/src/domain/structure.rs   | 6 ++++--
 oar-ocr-core/src/processors/sorting.rs | 1 -
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/oar-ocr-core/src/domain/structure.rs b/oar-ocr-core/src/domain/structure.rs
index 4ef042b..4fcf687 100644
--- a/oar-ocr-core/src/domain/structure.rs
+++ b/oar-ocr-core/src/domain/structure.rs
@@ -1719,8 +1719,10 @@ pub fn postprocess_markdown(markdown: &str) -> String {
             if contains_dollar && is_plain_text {
                 result.push_str(&line.replace('$', "\\$"));
             } else if contains_dollar {
-                // Remove redundant dollar signs inside the block
-                result.push_str(&line.replace('$', ""));
+                // Escape bare dollar signs inside the math block to avoid
+                // "Can't use function '$' in math mode" KaTeX errors while
+                // preserving literal dollars (e.g. \text{$10}).
+                result.push_str(&line.replace('$', "\\$"));
             } else {
                 result.push_str(line);
             }
diff --git a/oar-ocr-core/src/processors/sorting.rs b/oar-ocr-core/src/processors/sorting.rs
index 24802ea..aeb2972 100644
--- a/oar-ocr-core/src/processors/sorting.rs
+++ b/oar-ocr-core/src/processors/sorting.rs
@@ -522,7 +522,6 @@ impl SortableRegion {
 }
 
 /// Calculates the IoU (Intersection over Union) between two bounding boxes.
-#[allow(dead_code)]
 pub fn calculate_iou(a: &BoundingBox, b: &BoundingBox) -> f32 {
     let x1 = a.x_min().max(b.x_min());
     let y1 = a.y_min().max(b.y_min());