From 605ff3543216f528793ca4244cc1f2d8fb3ae449 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Mon, 22 Sep 2025 13:02:40 +0200
Subject: [PATCH 01/14] added the  field

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docling_core/types/doc/document.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
index 07693b88..932fdc40 100644
--- a/docling_core/types/doc/document.py
+++ b/docling_core/types/doc/document.py
@@ -952,6 +952,8 @@ class NodeItem(BaseModel):
 
     model_config = ConfigDict(extra="forbid")
 
+    summary: Optional[str] = None # 
+    
     def get_ref(self) -> RefItem:
         """get_ref."""
         return RefItem(cref=self.self_ref)

From 79a13470a7502d107e1935a7938171b995b6722e Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Mon, 22 Sep 2025 13:18:50 +0200
Subject: [PATCH 02/14] added the MD-summary serializer

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 .../transforms/serializer/markdown_summary.py | 104 ++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 docling_core/transforms/serializer/markdown_summary.py

diff --git a/docling_core/transforms/serializer/markdown_summary.py b/docling_core/transforms/serializer/markdown_summary.py
new file mode 100644
index 00000000..852b1bbc
--- /dev/null
+++ b/docling_core/transforms/serializer/markdown_summary.py
@@ -0,0 +1,104 @@
+from docling_core.types.doc import (
+    ContentLayer,
+    DocItemLabel,
+    DoclingDocument,
+    NodeItem,
+    GroupItem,
+    GroupLabel,
+    DocItem,
+    LevelNumber,
+    ListItem,
+    SectionHeaderItem,
+    TableItem,
+    TextItem,
+    TitleItem,
+    RefItem,
+    PictureItem,
+)
+
+class MarkdownSummaryParams(CommonParams):
+    """Markdown-specific serialization parameters."""
+
+    use_markdown_headers: bool = False
+    
+class MarkdownSummarySerializer(DocSerializer):
+    """Markdown-specific document summary serializer."""
+
+        params: MarkdownParams = MarkdownParams()
+
+    @override
+    def serialize_bold(self, text: str, **kwargs: Any):
+        """Apply Markdown-specific bold serialization."""
+        return f"**{text}**"
+
+    @override
+    def serialize_italic(self, text: str, **kwargs: Any):
+        """Apply Markdown-specific italic serialization."""
+        return f"*{text}*"
+
+    @override
+    def serialize_strikethrough(self, text: str, **kwargs: Any):
+        """Apply Markdown-specific strikethrough serialization."""
+        return f"~~{text}~~"
+
+    @override
+    def serialize_hyperlink(
+        self,
+        text: str,
+        hyperlink: Union[AnyUrl, Path],
+        **kwargs: Any,
+    ):
+        """Apply Markdown-specific hyperlink serialization."""
+        return f"[{text}]({str(hyperlink)})"
+
+    @override
+    def serialize_doc(
+        self,
+        *,
+        parts: list[SerializationResult],
+        **kwargs: Any,
+    ) -> SerializationResult:
+        """Serialize a document out of its parts."""
+        text_res = "\n\n".join([p.text for p in parts if p.text])
+
+        return create_ser_result(text=text_res, span_source=parts)
+    
+    def _create_document_outline(self, doc: DoclingDocument) -> str:
+        label_counter: dict[DocItemLabel, int] = {
+            DocItemLabel.TABLE: 0,
+            DocItemLabel.PICTURE: 0,
+            DocItemLabel.TEXT: 0,
+        }
+
+        lines = []
+        for item, level in doc.iterate_items(with_groups=True):
+            if isinstance(item, TitleItem):
+                lines.append(f"title (reference={item.self_ref}): {item.text}")
+                
+            elif isinstance(item, SectionHeaderItem):
+                lines.append(
+                    f"section-header (level={item.level}, reference={item.self_ref}): {item.text}"
+                )
+
+            elif isinstance(item, ListItem):
+                continue
+            
+            elif isinstance(item, TextItem):
+                lines.append(f"{item.label} (reference={item.self_ref})")
+                
+            elif isinstance(item, TableItem):
+                label_counter[item.label] += 1
+                lines.append(
+                    f"{item.label} {label_counter[item.label]} (reference={item.self_ref})"
+                )
+                
+            elif isinstance(item, PictureItem):
+                label_counter[item.label] += 1
+                lines.append(
+                    f"{item.label} {label_counter[item.label]} (reference={item.self_ref})"
+                )
+
+        outline = "\n\n".join(lines)
+
+        return outline
+

From 003883bae0fffdbe7d22df7a873aac53461b24aa Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Tue, 23 Sep 2025 09:58:29 +0200
Subject: [PATCH 03/14] added the first attempt at a markdown_summary

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 .../transforms/serializer/markdown_summary.py | 168 +++++++++++++-----
 1 file changed, 120 insertions(+), 48 deletions(-)

diff --git a/docling_core/transforms/serializer/markdown_summary.py b/docling_core/transforms/serializer/markdown_summary.py
index 852b1bbc..4ba0379f 100644
--- a/docling_core/transforms/serializer/markdown_summary.py
+++ b/docling_core/transforms/serializer/markdown_summary.py
@@ -1,44 +1,86 @@
+from typing import Any, Optional, Union
+from pathlib import Path
+
+from pydantic import AnyUrl
+from typing_extensions import override
+
+from docling_core.transforms.serializer.base import (
+    BaseAnnotationSerializer,
+    BaseDocSerializer,
+    BaseFallbackSerializer,
+    BaseFormSerializer,
+    BaseInlineSerializer,
+    BaseKeyValueSerializer,
+    BaseListSerializer,
+    BasePictureSerializer,
+    BaseTableSerializer,
+    BaseTextSerializer,
+    SerializationResult,
+)
+from docling_core.transforms.serializer.common import (
+    CommonParams,
+    DocSerializer,
+    create_ser_result,
+)
+from docling_core.transforms.serializer.markdown import (
+    MarkdownAnnotationSerializer,
+    MarkdownFallbackSerializer,
+    MarkdownFormSerializer,
+    MarkdownInlineSerializer,
+    MarkdownKeyValueSerializer,
+    MarkdownListSerializer,
+    MarkdownPictureSerializer,
+    MarkdownTableSerializer,
+    MarkdownTextSerializer,
+)
 from docling_core.types.doc import (
-    ContentLayer,
+    DocItem,
     DocItemLabel,
     DoclingDocument,
-    NodeItem,
-    GroupItem,
-    GroupLabel,
-    DocItem,
-    LevelNumber,
     ListItem,
+    NodeItem,
+    PictureItem,
     SectionHeaderItem,
     TableItem,
     TextItem,
     TitleItem,
-    RefItem,
-    PictureItem,
 )
 
+
 class MarkdownSummaryParams(CommonParams):
-    """Markdown-specific serialization parameters."""
+    """Markdown-specific serialization parameters for outline."""
 
     use_markdown_headers: bool = False
-    
+
+
 class MarkdownSummarySerializer(DocSerializer):
     """Markdown-specific document summary serializer."""
 
-        params: MarkdownParams = MarkdownParams()
+    # Provide required serializer attributes to satisfy DocSerializer’s model
+    text_serializer: BaseTextSerializer = MarkdownTextSerializer()
+    table_serializer: BaseTableSerializer = MarkdownTableSerializer()
+    picture_serializer: BasePictureSerializer = MarkdownPictureSerializer()
+    key_value_serializer: BaseKeyValueSerializer = MarkdownKeyValueSerializer()
+    form_serializer: BaseFormSerializer = MarkdownFormSerializer()
+    fallback_serializer: BaseFallbackSerializer = MarkdownFallbackSerializer()
+
+    list_serializer: BaseListSerializer = MarkdownListSerializer()
+    inline_serializer: BaseInlineSerializer = MarkdownInlineSerializer()
+
+    annotation_serializer: BaseAnnotationSerializer = MarkdownAnnotationSerializer()
+
+    params: MarkdownSummaryParams = MarkdownSummaryParams()
 
     @override
-    def serialize_bold(self, text: str, **kwargs: Any):
-        """Apply Markdown-specific bold serialization."""
+    def serialize_bold(self, text: str, **kwargs: Any) -> str:
         return f"**{text}**"
 
     @override
-    def serialize_italic(self, text: str, **kwargs: Any):
-        """Apply Markdown-specific italic serialization."""
+    def serialize_italic(self, text: str, **kwargs: Any) -> str:
         return f"*{text}*"
 
     @override
-    def serialize_strikethrough(self, text: str, **kwargs: Any):
-        """Apply Markdown-specific strikethrough serialization."""
+    def serialize_strikethrough(self, text: str, **kwargs: Any) -> str:
         return f"~~{text}~~"
 
     @override
@@ -47,10 +89,19 @@ def serialize_hyperlink(
         text: str,
         hyperlink: Union[AnyUrl, Path],
         **kwargs: Any,
-    ):
-        """Apply Markdown-specific hyperlink serialization."""
+    ) -> str:
         return f"[{text}]({str(hyperlink)})"
 
+    @override
+    def get_parts(
+        self,
+        item: Optional[NodeItem] = None,
+        **kwargs: Any,
+    ) -> list[SerializationResult]:
+        """Return a single part containing the document (or subtree) outline."""
+        outline = self._create_document_outline(root=item, **kwargs)
+        return [create_ser_result(text=outline, span_source=[])] if outline else []
+
     @override
     def serialize_doc(
         self,
@@ -58,47 +109,68 @@ def serialize_doc(
         parts: list[SerializationResult],
         **kwargs: Any,
     ) -> SerializationResult:
-        """Serialize a document out of its parts."""
         text_res = "\n\n".join([p.text for p in parts if p.text])
-
         return create_ser_result(text=text_res, span_source=parts)
-    
-    def _create_document_outline(self, doc: DoclingDocument) -> str:
+
+    def _create_document_outline(
+        self,
+        *,
+        root: Optional[NodeItem] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Create an outline, respecting params and recursive traversal."""
+        params = self.params.merge_with_patch(patch=kwargs)
+        excluded = self.get_excluded_refs(**kwargs)
+
         label_counter: dict[DocItemLabel, int] = {
             DocItemLabel.TABLE: 0,
             DocItemLabel.PICTURE: 0,
             DocItemLabel.TEXT: 0,
         }
+        lines: list[str] = []
+        visited: set[str] = set()
 
-        lines = []
-        for item, level in doc.iterate_items(with_groups=True):
-            if isinstance(item, TitleItem):
-                lines.append(f"title (reference={item.self_ref}): {item.text}")
-                
-            elif isinstance(item, SectionHeaderItem):
-                lines.append(
-                    f"section-header (level={item.level}, reference={item.self_ref}): {item.text}"
-                )
+        # Iterate depth-first with groups, similar to MarkdownSerializer
+        for node, level in self.doc.iterate_items(root=root, with_groups=True):
+            if node.self_ref in visited:
+                continue
+            visited.add(node.self_ref)
 
-            elif isinstance(item, ListItem):
+            # Skip list items in outline
+            if isinstance(node, ListItem):
                 continue
-            
-            elif isinstance(item, TextItem):
-                lines.append(f"{item.label} (reference={item.self_ref})")
-                
-            elif isinstance(item, TableItem):
-                label_counter[item.label] += 1
+
+            # Respect excluded refs and skip caption text items
+            if isinstance(node, DocItem):
+                if node.self_ref in excluded:
+                    continue
+                if isinstance(node, TextItem) and node.self_ref in self._captions_of_some_item:
+                    continue
+
+            if isinstance(node, TitleItem):
+                if params.use_markdown_headers:
+                    lines.append(f"# {node.text}")
+                else:
+                    lines.append(f"title (reference={node.self_ref}): {node.text}")
+            elif isinstance(node, SectionHeaderItem):
+                if params.use_markdown_headers:
+                    hashes = "#" * (node.level + 1)
+                    lines.append(f"{hashes} {node.text}")
+                else:
+                    lines.append(
+                        f"section-header (level={node.level}, reference={node.self_ref}): {node.text}"
+                    )
+            elif isinstance(node, TextItem):
+                lines.append(f"{node.label} (reference={node.self_ref})")
+            elif isinstance(node, TableItem):
+                label_counter[DocItemLabel.TABLE] += 1
                 lines.append(
-                    f"{item.label} {label_counter[item.label]} (reference={item.self_ref})"
+                    f"{node.label} {label_counter[DocItemLabel.TABLE]} (reference={node.self_ref})"
                 )
-                
-            elif isinstance(item, PictureItem):
-                label_counter[item.label] += 1
+            elif isinstance(node, PictureItem):
+                label_counter[DocItemLabel.PICTURE] += 1
                 lines.append(
-                    f"{item.label} {label_counter[item.label]} (reference={item.self_ref})"
+                    f"{node.label} {label_counter[DocItemLabel.PICTURE]} (reference={node.self_ref})"
                 )
 
-        outline = "\n\n".join(lines)
-
-        return outline
-
+        return "\n\n".join(lines)

From e64b83d5aec3c7f469c33c53571908a09df6f056 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Tue, 23 Sep 2025 10:21:20 +0200
Subject: [PATCH 04/14] added a test for markdown_summary

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 .../transforms/serializer/markdown_summary.py |  5 ++
 test/test_markdown_summary.py                 | 56 +++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 test/test_markdown_summary.py

diff --git a/docling_core/transforms/serializer/markdown_summary.py b/docling_core/transforms/serializer/markdown_summary.py
index 4ba0379f..e511b4a5 100644
--- a/docling_core/transforms/serializer/markdown_summary.py
+++ b/docling_core/transforms/serializer/markdown_summary.py
@@ -92,6 +92,11 @@ def serialize_hyperlink(
     ) -> str:
         return f"[{text}]({str(hyperlink)})"
 
+    @override
+    def requires_page_break(self) -> bool:
+        """Whether to add page breaks."""
+        return False
+    
     @override
     def get_parts(
         self,
diff --git a/test/test_markdown_summary.py b/test/test_markdown_summary.py
new file mode 100644
index 00000000..6c3b72dc
--- /dev/null
+++ b/test/test_markdown_summary.py
@@ -0,0 +1,56 @@
+"""Tests for MarkdownSummarySerializer (document outline)."""
+
+from pathlib import Path
+
+import pytest
+
+from docling_core.transforms.serializer.markdown_summary import (
+    MarkdownSummaryParams,
+    MarkdownSummarySerializer,
+)
+
+from .test_docling_doc import _construct_doc
+
+
+@pytest.mark.parametrize("use_md_headers", [False, True])
+def test_markdown_summary_outline(use_md_headers: bool):
+    # Build a representative document with title, headers, text, lists, table, and pictures
+    doc = _construct_doc()
+
+    ser = MarkdownSummarySerializer(
+        doc=doc,
+        params=MarkdownSummaryParams(use_markdown_headers=use_md_headers),
+    )
+
+    outline = ser.serialize().text
+
+    print(outline)
+    
+    # Leading list items should not appear in the outline
+    assert "item of leading list" not in outline
+
+    # Captions should be excluded from outline
+    assert "This is the caption of table 1." not in outline
+    assert "This is the caption of figure 1." not in outline
+    assert "This is the caption of figure 2." not in outline
+
+    # Title and section header formatting based on params
+    if use_md_headers:
+        # Markdown-style headers
+        assert "# Title of the Document" in outline
+        assert "## 1. Introduction" in outline
+        # Ensure we don't get the verbose label style when using MD headers
+        assert "title (reference=" not in outline.splitlines()[0]
+    else:
+        # Verbose outline lines with references
+        first_line = outline.splitlines()[0]
+        assert first_line.startswith("title (reference=") and first_line.endswith(
+            "): Title of the Document"
+        )
+        # Section header line contains level and reference
+        assert "section-header (level=1, reference=" in outline
+
+    # Tables and pictures should be numbered and listed with references
+    assert "table 1 (reference=" in outline
+    assert "picture 1 (reference=" in outline
+

From da4e775c206ce8591503faaec169072cf562d92c Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Tue, 23 Sep 2025 13:55:41 +0200
Subject: [PATCH 05/14] refactored the markdown summary serializer

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 .../transforms/serializer/markdown_summary.py | 172 +++++++++++++++---
 docling_core/types/doc/document.py            |   2 +-
 ...ndent_mdhdr_false_indent_true_size_2.gt.md |  59 ++++++
 ...indent_mdhdr_true_indent_true_size_2.gt.md |  59 ++++++
 ...sum_outline_mdhdr_false_indent_false.gt.md |  59 ++++++
 ...dsum_outline_mdhdr_false_indent_true.gt.md |  59 ++++++
 ...dsum_outline_mdhdr_true_indent_false.gt.md |  59 ++++++
 ...mdsum_outline_mdhdr_true_indent_true.gt.md |  59 ++++++
 ...of_contents_mdhdr_false_indent_false.gt.md |   3 +
 ..._of_contents_mdhdr_false_indent_true.gt.md |   3 +
 ..._of_contents_mdhdr_true_indent_false.gt.md |   3 +
 ...e_of_contents_mdhdr_true_indent_true.gt.md |   3 +
 test/test_markdown_summary.py                 |  89 +++++----
 13 files changed, 570 insertions(+), 59 deletions(-)
 create mode 100644 test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_false.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_false.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_false.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_false.gt.md
 create mode 100644 test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md

diff --git a/docling_core/transforms/serializer/markdown_summary.py b/docling_core/transforms/serializer/markdown_summary.py
index e511b4a5..bbfd3ac6 100644
--- a/docling_core/transforms/serializer/markdown_summary.py
+++ b/docling_core/transforms/serializer/markdown_summary.py
@@ -1,5 +1,6 @@
 from typing import Any, Optional, Union
 from pathlib import Path
+from enum import Enum
 
 from pydantic import AnyUrl
 from typing_extensions import override
@@ -34,9 +35,12 @@
     MarkdownTextSerializer,
 )
 from docling_core.types.doc import (
+    CodeItem,
     DocItem,
     DocItemLabel,
     DoclingDocument,
+    FormItem,
+    ListGroup,
     ListItem,
     NodeItem,
     PictureItem,
@@ -46,12 +50,29 @@
     TitleItem,
 )
 
-
+class MarkdownSummaryMode(str, Enum):
+    
+    OUTLINE = "outline"
+    TABLE_OF_CONTENTS = "table_of_contents"
+    
 class MarkdownSummaryParams(CommonParams):
     """Markdown-specific serialization parameters for outline."""
 
+    mode: MarkdownSummaryMode = MarkdownSummaryMode.OUTLINE
+    
     use_markdown_headers: bool = False
 
+    add_label_counter: bool = False
+    add_references: bool = True
+    add_summary: bool = True
+
+    # Indentation control: when enabled, indent each line according to
+    # the latest encountered section-header level (title treated as level 0).
+    indent_by_section_level: bool = False
+    indent_size: int = 2
+
+    toc_labels: list[DocItemLabel] = [DocItemLabel.TITLE, DocItemLabel.SECTION_HEADER]
+    
 
 class MarkdownSummarySerializer(DocSerializer):
     """Markdown-specific document summary serializer."""
@@ -104,8 +125,8 @@ def get_parts(
         **kwargs: Any,
     ) -> list[SerializationResult]:
         """Return a single part containing the document (or subtree) outline."""
-        outline = self._create_document_outline(root=item, **kwargs)
-        return [create_ser_result(text=outline, span_source=[])] if outline else []
+        return self._create_document_outline(root=item, **kwargs)
+    #return [create_ser_result(text=outline, span_source=[])] if outline else []
 
     @override
     def serialize_doc(
@@ -122,60 +143,157 @@ def _create_document_outline(
         *,
         root: Optional[NodeItem] = None,
         **kwargs: Any,
-    ) -> str:
+    ) -> list[SerializationResult]:
         """Create an outline, respecting params and recursive traversal."""
         params = self.params.merge_with_patch(patch=kwargs)
         excluded = self.get_excluded_refs(**kwargs)
 
-        label_counter: dict[DocItemLabel, int] = {
-            DocItemLabel.TABLE: 0,
-            DocItemLabel.PICTURE: 0,
-            DocItemLabel.TEXT: 0,
-        }
+        # Per-label counters; used consistently when params.add_label_counter is True
+        # and always for table/picture numbering.
+        label_counter: dict[DocItemLabel, int] = {}
         lines: list[str] = []
         visited: set[str] = set()
 
+        result: list[SerializationResult] = []
+
+        # Track latest section header level for indentation
+        current_section_level: int = 0
+
+        # Helper to increment and fetch the counter for a given label
+        def _next_idx(lbl: DocItemLabel) -> int:
+            label_counter[lbl] = label_counter.get(lbl, 0) + 1
+            return label_counter[lbl]
+        
+        # Helper to identify if the label should be included in the table-of-contents
+        def _include(lbl: DocItemLabel) -> int:
+            if params.mode==MarkdownSummaryMode.TABLE_OF_CONTENTS and \
+               (lbl not in params.toc_labels):
+                return False
+
+            return True
+        
         # Iterate depth-first with groups, similar to MarkdownSerializer
         for node, level in self.doc.iterate_items(root=root, with_groups=True):
             if node.self_ref in visited:
                 continue
+            
             visited.add(node.self_ref)
 
+            if not _include(lbl=node.label):
+                continue
+            
+            summary = ""
+            if params.add_summary and \
+               (node.summary is not None) and \
+               isinstance(node.summary, str):
+                summary = node.summary
+            
             # Skip list items in outline
             if isinstance(node, ListItem):
                 continue
 
-            # Respect excluded refs and skip caption text items
+            # Respect excluded refs
             if isinstance(node, DocItem):
                 if node.self_ref in excluded:
                     continue
                 if isinstance(node, TextItem) and node.self_ref in self._captions_of_some_item:
                     continue
 
+            line:str = ""
+
+            # Base label string (normalize underscores to hyphens)
+            node_label = str(node.label).replace("_", "-")
+            if params.add_label_counter and not isinstance(node, (TableItem, PictureItem)):
+                # Apply generic counters to non-table/picture items
+                node_label = f"{node_label} {_next_idx(node.label)}"
+
+            # Build optional reference snippet only when enabled
+            ref_part = f" (reference={node.self_ref})" if params.add_references else ""
+                
             if isinstance(node, TitleItem):
+
+                raw_text = self.text_serializer.serialize(
+                    item=node, doc_serializer=self, doc=self.doc
+                ).text
+
                 if params.use_markdown_headers:
-                    lines.append(f"# {node.text}")
+                    # raw_text already includes the heading marker
+                    text = raw_text.lstrip()
+                    line = f"{text}{ref_part}"
                 else:
-                    lines.append(f"title (reference={node.self_ref}): {node.text}")
+                    # strip leading markdown header markers for verbose representation
+                    text = raw_text.lstrip().lstrip("# ") if raw_text.startswith("#") else raw_text
+                    if params.add_references:
+                        line = f"{node_label}{ref_part}: {text}"
+                    else:
+                        line = f"{node_label}: {text}"
+
             elif isinstance(node, SectionHeaderItem):
+
+                raw_text = self.text_serializer.serialize(
+                    item=node, doc_serializer=self, doc=self.doc
+                ).text
+
                 if params.use_markdown_headers:
-                    hashes = "#" * (node.level + 1)
-                    lines.append(f"{hashes} {node.text}")
+                    # raw_text already includes the correct number of '#'
+                    text = raw_text.lstrip()
+                    if params.add_references:
+                        line = f"{text} (level={node.level}, reference={node.self_ref})"
+                    else:
+                        line = f"{text} (level={node.level})"
                 else:
-                    lines.append(
-                        f"section-header (level={node.level}, reference={node.self_ref}): {node.text}"
-                    )
+                    # strip leading markdown header markers for verbose representation
+                    stripped = raw_text.lstrip()
+                    while stripped.startswith("#"):
+                        stripped = stripped.lstrip("#").lstrip()
+                    text = stripped
+                    if params.add_references:
+                        line = f"{node_label} (level={node.level}, reference={node.self_ref}): {text}"
+                    else:
+                        line = f"{node_label} (level={node.level}): {text}"
+
+                # Update current section level for subsequent items
+                current_section_level = node.level
+
+            elif isinstance(node, ListGroup):
+                # Skip listing list groups in summary to avoid leading list noise
+                line = ""
+                    
             elif isinstance(node, TextItem):
-                lines.append(f"{node.label} (reference={node.self_ref})")
+                line = f"{node_label}{ref_part}"
+
+            elif isinstance(node, FormItem):
+                line = f"{node_label}{ref_part}"
+
+            elif isinstance(node, CodeItem):
+                line = f"{node_label}{ref_part}"
+                
             elif isinstance(node, TableItem):
-                label_counter[DocItemLabel.TABLE] += 1
-                lines.append(
-                    f"{node.label} {label_counter[DocItemLabel.TABLE]} (reference={node.self_ref})"
-                )
+                # Tables are always numbered in the summary
+                line = f"{node_label} {_next_idx(DocItemLabel.TABLE)}{ref_part}"
+                
             elif isinstance(node, PictureItem):
-                label_counter[DocItemLabel.PICTURE] += 1
-                lines.append(
-                    f"{node.label} {label_counter[DocItemLabel.PICTURE]} (reference={node.self_ref})"
-                )
+                # Pictures are always numbered in the summary
+                line = f"{node_label} {_next_idx(DocItemLabel.PICTURE)}{ref_part}"
+
+            if len(summary)>0:
+                line += f" (summary={summary})"
 
-        return "\n\n".join(lines)
+            # Apply indentation based on latest section level if enabled
+            if params.indent_by_section_level:
+                indent_level = current_section_level
+                # For a section-header, indent by its own level
+                if isinstance(node, SectionHeaderItem):
+                    indent_level = node.level
+                indent = " " * (params.indent_size * indent_level)
+                line = f"{indent}{line}" if line else line
+
+            if line:
+                result.append(
+                    create_ser_result(
+                        text=line,
+                        span_source=node if isinstance(node, DocItem) else [],
+                    )
+                )
+            
+        return result
diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
index 932fdc40..32d7c533 100644
--- a/docling_core/types/doc/document.py
+++ b/docling_core/types/doc/document.py
@@ -952,7 +952,7 @@ class NodeItem(BaseModel):
 
     model_config = ConfigDict(extra="forbid")
 
-    summary: Optional[str] = None # 
+    summary: Optional[str] = Field(default=None, exclude=True)  # optional, not serialized
     
     def get_ref(self) -> RefItem:
         """get_ref."""
diff --git a/test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md b/test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md
new file mode 100644
index 00000000..da6a4cb7
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md
@@ -0,0 +1,59 @@
+title (reference=#/texts/1): Title of the Document
+
+text (reference=#/texts/2)
+
+text (reference=#/texts/3)
+
+  section-header (level=1, reference=#/texts/4): 1. Introduction
+
+  text (reference=#/texts/5)
+
+  table 1 (reference=#/tables/0)
+
+  picture 1 (reference=#/pictures/0)
+
+  picture 2 (reference=#/pictures/1)
+
+  text (reference=#/texts/24)
+
+  code (reference=#/texts/25)
+
+  text (reference=#/texts/26)
+
+  text (reference=#/texts/28)
+
+  formula (reference=#/texts/29)
+
+  text (reference=#/texts/30)
+
+  text (reference=#/texts/31)
+
+  code (reference=#/texts/32)
+
+  text (reference=#/texts/33)
+
+  formula (reference=#/texts/34)
+
+  form (reference=#/form_items/0)
+
+  text (reference=#/texts/35)
+
+  text (reference=#/texts/36)
+
+  text (reference=#/texts/37)
+
+  text (reference=#/texts/38)
+
+  text (reference=#/texts/39)
+
+  text (reference=#/texts/40)
+
+  text (reference=#/texts/41)
+
+  text (reference=#/texts/42)
+
+  text (reference=#/texts/43)
+
+  text (reference=#/texts/44)
+
+  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md b/test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md
new file mode 100644
index 00000000..68f8efc7
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md
@@ -0,0 +1,59 @@
+# Title of the Document (reference=#/texts/1)
+
+text (reference=#/texts/2)
+
+text (reference=#/texts/3)
+
+  ## 1. Introduction (level=1, reference=#/texts/4)
+
+  text (reference=#/texts/5)
+
+  table 1 (reference=#/tables/0)
+
+  picture 1 (reference=#/pictures/0)
+
+  picture 2 (reference=#/pictures/1)
+
+  text (reference=#/texts/24)
+
+  code (reference=#/texts/25)
+
+  text (reference=#/texts/26)
+
+  text (reference=#/texts/28)
+
+  formula (reference=#/texts/29)
+
+  text (reference=#/texts/30)
+
+  text (reference=#/texts/31)
+
+  code (reference=#/texts/32)
+
+  text (reference=#/texts/33)
+
+  formula (reference=#/texts/34)
+
+  form (reference=#/form_items/0)
+
+  text (reference=#/texts/35)
+
+  text (reference=#/texts/36)
+
+  text (reference=#/texts/37)
+
+  text (reference=#/texts/38)
+
+  text (reference=#/texts/39)
+
+  text (reference=#/texts/40)
+
+  text (reference=#/texts/41)
+
+  text (reference=#/texts/42)
+
+  text (reference=#/texts/43)
+
+  text (reference=#/texts/44)
+
+  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_false.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_false.gt.md
new file mode 100644
index 00000000..da005563
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_false.gt.md
@@ -0,0 +1,59 @@
+title (reference=#/texts/1): Title of the Document
+
+text (reference=#/texts/2)
+
+text (reference=#/texts/3)
+
+section-header (level=1, reference=#/texts/4): 1. Introduction
+
+text (reference=#/texts/5)
+
+table 1 (reference=#/tables/0)
+
+picture 1 (reference=#/pictures/0)
+
+picture 2 (reference=#/pictures/1)
+
+text (reference=#/texts/24)
+
+code (reference=#/texts/25)
+
+text (reference=#/texts/26)
+
+text (reference=#/texts/28)
+
+formula (reference=#/texts/29)
+
+text (reference=#/texts/30)
+
+text (reference=#/texts/31)
+
+code (reference=#/texts/32)
+
+text (reference=#/texts/33)
+
+formula (reference=#/texts/34)
+
+form (reference=#/form_items/0)
+
+text (reference=#/texts/35)
+
+text (reference=#/texts/36)
+
+text (reference=#/texts/37)
+
+text (reference=#/texts/38)
+
+text (reference=#/texts/39)
+
+text (reference=#/texts/40)
+
+text (reference=#/texts/41)
+
+text (reference=#/texts/42)
+
+text (reference=#/texts/43)
+
+text (reference=#/texts/44)
+
+text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md
new file mode 100644
index 00000000..da6a4cb7
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md
@@ -0,0 +1,59 @@
+title (reference=#/texts/1): Title of the Document
+
+text (reference=#/texts/2)
+
+text (reference=#/texts/3)
+
+  section-header (level=1, reference=#/texts/4): 1. Introduction
+
+  text (reference=#/texts/5)
+
+  table 1 (reference=#/tables/0)
+
+  picture 1 (reference=#/pictures/0)
+
+  picture 2 (reference=#/pictures/1)
+
+  text (reference=#/texts/24)
+
+  code (reference=#/texts/25)
+
+  text (reference=#/texts/26)
+
+  text (reference=#/texts/28)
+
+  formula (reference=#/texts/29)
+
+  text (reference=#/texts/30)
+
+  text (reference=#/texts/31)
+
+  code (reference=#/texts/32)
+
+  text (reference=#/texts/33)
+
+  formula (reference=#/texts/34)
+
+  form (reference=#/form_items/0)
+
+  text (reference=#/texts/35)
+
+  text (reference=#/texts/36)
+
+  text (reference=#/texts/37)
+
+  text (reference=#/texts/38)
+
+  text (reference=#/texts/39)
+
+  text (reference=#/texts/40)
+
+  text (reference=#/texts/41)
+
+  text (reference=#/texts/42)
+
+  text (reference=#/texts/43)
+
+  text (reference=#/texts/44)
+
+  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_false.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_false.gt.md
new file mode 100644
index 00000000..8e03ee91
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_false.gt.md
@@ -0,0 +1,59 @@
+# Title of the Document (reference=#/texts/1)
+
+text (reference=#/texts/2)
+
+text (reference=#/texts/3)
+
+## 1. Introduction (level=1, reference=#/texts/4)
+
+text (reference=#/texts/5)
+
+table 1 (reference=#/tables/0)
+
+picture 1 (reference=#/pictures/0)
+
+picture 2 (reference=#/pictures/1)
+
+text (reference=#/texts/24)
+
+code (reference=#/texts/25)
+
+text (reference=#/texts/26)
+
+text (reference=#/texts/28)
+
+formula (reference=#/texts/29)
+
+text (reference=#/texts/30)
+
+text (reference=#/texts/31)
+
+code (reference=#/texts/32)
+
+text (reference=#/texts/33)
+
+formula (reference=#/texts/34)
+
+form (reference=#/form_items/0)
+
+text (reference=#/texts/35)
+
+text (reference=#/texts/36)
+
+text (reference=#/texts/37)
+
+text (reference=#/texts/38)
+
+text (reference=#/texts/39)
+
+text (reference=#/texts/40)
+
+text (reference=#/texts/41)
+
+text (reference=#/texts/42)
+
+text (reference=#/texts/43)
+
+text (reference=#/texts/44)
+
+text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md
new file mode 100644
index 00000000..68f8efc7
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md
@@ -0,0 +1,59 @@
+# Title of the Document (reference=#/texts/1)
+
+text (reference=#/texts/2)
+
+text (reference=#/texts/3)
+
+  ## 1. Introduction (level=1, reference=#/texts/4)
+
+  text (reference=#/texts/5)
+
+  table 1 (reference=#/tables/0)
+
+  picture 1 (reference=#/pictures/0)
+
+  picture 2 (reference=#/pictures/1)
+
+  text (reference=#/texts/24)
+
+  code (reference=#/texts/25)
+
+  text (reference=#/texts/26)
+
+  text (reference=#/texts/28)
+
+  formula (reference=#/texts/29)
+
+  text (reference=#/texts/30)
+
+  text (reference=#/texts/31)
+
+  code (reference=#/texts/32)
+
+  text (reference=#/texts/33)
+
+  formula (reference=#/texts/34)
+
+  form (reference=#/form_items/0)
+
+  text (reference=#/texts/35)
+
+  text (reference=#/texts/36)
+
+  text (reference=#/texts/37)
+
+  text (reference=#/texts/38)
+
+  text (reference=#/texts/39)
+
+  text (reference=#/texts/40)
+
+  text (reference=#/texts/41)
+
+  text (reference=#/texts/42)
+
+  text (reference=#/texts/43)
+
+  text (reference=#/texts/44)
+
+  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_false.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_false.gt.md
new file mode 100644
index 00000000..4d406b13
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_false.gt.md
@@ -0,0 +1,3 @@
+title (reference=#/texts/1): Title of the Document
+
+section-header (level=1, reference=#/texts/4): 1. Introduction
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md
new file mode 100644
index 00000000..fdb5b964
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md
@@ -0,0 +1,3 @@
+title (reference=#/texts/1): Title of the Document
+
+  section-header (level=1, reference=#/texts/4): 1. Introduction
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_false.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_false.gt.md
new file mode 100644
index 00000000..628c2a04
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_false.gt.md
@@ -0,0 +1,3 @@
+# Title of the Document (reference=#/texts/1)
+
+## 1. Introduction (level=1, reference=#/texts/4)
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md
new file mode 100644
index 00000000..341caca5
--- /dev/null
+++ b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md
@@ -0,0 +1,3 @@
+# Title of the Document (reference=#/texts/1)
+
+  ## 1. Introduction (level=1, reference=#/texts/4)
diff --git a/test/test_markdown_summary.py b/test/test_markdown_summary.py
index 6c3b72dc..372ff6c6 100644
--- a/test/test_markdown_summary.py
+++ b/test/test_markdown_summary.py
@@ -5,52 +5,79 @@
 import pytest
 
 from docling_core.transforms.serializer.markdown_summary import (
+    MarkdownSummaryMode,
     MarkdownSummaryParams,
     MarkdownSummarySerializer,
 )
 
 from .test_docling_doc import _construct_doc
 
+from .test_data_gen_flag import GEN_TEST_DATA
 
+
+def verify(exp_file: Path, actual: str):
+    if GEN_TEST_DATA:
+        with open(exp_file, "w", encoding="utf-8") as f:
+            f.write(f"{actual}\n")
+    else:
+        with open(exp_file, "r", encoding="utf-8") as f:
+            expected = f.read().rstrip()
+        assert expected == actual
+
+@pytest.mark.parametrize(
+    "mode",
+    [
+        MarkdownSummaryMode.OUTLINE,
+        MarkdownSummaryMode.TABLE_OF_CONTENTS,
+    ],
+)
 @pytest.mark.parametrize("use_md_headers", [False, True])
-def test_markdown_summary_outline(use_md_headers: bool):
+@pytest.mark.parametrize("indent_by_section_level", [False, True])
+def test_markdown_summary_outline(
+    mode: MarkdownSummaryMode, use_md_headers: bool, indent_by_section_level: bool
+):
     # Build a representative document with title, headers, text, lists, table, and pictures
     doc = _construct_doc()
 
     ser = MarkdownSummarySerializer(
         doc=doc,
-        params=MarkdownSummaryParams(use_markdown_headers=use_md_headers),
+        params=MarkdownSummaryParams(
+            use_markdown_headers=use_md_headers,
+            mode=mode,
+            indent_by_section_level=indent_by_section_level,
+        ),
     )
 
     outline = ser.serialize().text
 
-    print(outline)
-    
-    # Leading list items should not appear in the outline
-    assert "item of leading list" not in outline
-
-    # Captions should be excluded from outline
-    assert "This is the caption of table 1." not in outline
-    assert "This is the caption of figure 1." not in outline
-    assert "This is the caption of figure 2." not in outline
-
-    # Title and section header formatting based on params
-    if use_md_headers:
-        # Markdown-style headers
-        assert "# Title of the Document" in outline
-        assert "## 1. Introduction" in outline
-        # Ensure we don't get the verbose label style when using MD headers
-        assert "title (reference=" not in outline.splitlines()[0]
-    else:
-        # Verbose outline lines with references
-        first_line = outline.splitlines()[0]
-        assert first_line.startswith("title (reference=") and first_line.endswith(
-            "): Title of the Document"
-        )
-        # Section header line contains level and reference
-        assert "section-header (level=1, reference=" in outline
-
-    # Tables and pictures should be numbered and listed with references
-    assert "table 1 (reference=" in outline
-    assert "picture 1 (reference=" in outline
+    # Compare with or generate ground-truth output
+    root_dir = Path("./test/data/doc")
+    exp_path = (
+        root_dir
+        / f"constructed_mdsum_{mode.value}_mdhdr_{str(use_md_headers).lower()}_indent_{str(indent_by_section_level).lower()}.gt.md"
+    )
+    verify(exp_file=exp_path, actual=outline)
+
+@pytest.mark.parametrize("use_md_headers", [False, True])
+def test_markdown_summary_indentation(use_md_headers: bool):
+    # Build a representative document
+    doc = _construct_doc()
+
+    ser = MarkdownSummarySerializer(
+        doc=doc,
+        params=MarkdownSummaryParams(
+            use_markdown_headers=use_md_headers,
+            indent_by_section_level=True,
+            indent_size=2,
+        ),
+    )
+
+    outline = ser.serialize().text
 
+    # Compare with or generate ground-truth output for indentation-specific case
+    root_dir = Path("./test/data/doc")
+    exp_path = (
+        root_dir
+        / f"constructed_mdsum_indent_mdhdr_{str(use_md_headers).lower()}_indent_true_size_2.gt.md"
+    )
+    verify(exp_file=exp_path, actual=outline)

From c6ace9a84ec7cc08019a2b062cd26622be61c008 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Tue, 23 Sep 2025 14:30:19 +0200
Subject: [PATCH 06/14] passed all the pre-commit hooks

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 .../transforms/serializer/markdown_summary.py | 354 +++++++++++-------
 docling_core/types/doc/document.py            |   6 +-
 test/test_markdown_summary.py                 |   5 +-
 3 files changed, 234 insertions(+), 131 deletions(-)

diff --git a/docling_core/transforms/serializer/markdown_summary.py b/docling_core/transforms/serializer/markdown_summary.py
index bbfd3ac6..60403441 100644
--- a/docling_core/transforms/serializer/markdown_summary.py
+++ b/docling_core/transforms/serializer/markdown_summary.py
@@ -1,13 +1,18 @@
-from typing import Any, Optional, Union
-from pathlib import Path
+"""Markdown document summary serializers (outline and TOC).
+
+This module provides a Markdown-focused serializer that emits a compact
+document outline or a table of contents derived from a Docling document.
+"""
+
 from enum import Enum
+from pathlib import Path
+from typing import Any, Optional, Union
 
 from pydantic import AnyUrl
 from typing_extensions import override
 
 from docling_core.transforms.serializer.base import (
     BaseAnnotationSerializer,
-    BaseDocSerializer,
     BaseFallbackSerializer,
     BaseFormSerializer,
     BaseInlineSerializer,
@@ -38,8 +43,8 @@
     CodeItem,
     DocItem,
     DocItemLabel,
-    DoclingDocument,
     FormItem,
+    GroupItem,
     ListGroup,
     ListItem,
     NodeItem,
@@ -50,16 +55,19 @@
     TitleItem,
 )
 
+
 class MarkdownSummaryMode(str, Enum):
-    
+    """Display mode for document summary output."""
+
     OUTLINE = "outline"
     TABLE_OF_CONTENTS = "table_of_contents"
-    
+
+
 class MarkdownSummaryParams(CommonParams):
     """Markdown-specific serialization parameters for outline."""
 
     mode: MarkdownSummaryMode = MarkdownSummaryMode.OUTLINE
-    
+
     use_markdown_headers: bool = False
 
     add_label_counter: bool = False
@@ -72,7 +80,7 @@ class MarkdownSummaryParams(CommonParams):
     indent_size: int = 2
 
     toc_labels: list[DocItemLabel] = [DocItemLabel.TITLE, DocItemLabel.SECTION_HEADER]
-    
+
 
 class MarkdownSummarySerializer(DocSerializer):
     """Markdown-specific document summary serializer."""
@@ -94,14 +102,17 @@ class MarkdownSummarySerializer(DocSerializer):
 
     @override
     def serialize_bold(self, text: str, **kwargs: Any) -> str:
+        """Apply Markdown bold formatting to ``text``."""
         return f"**{text}**"
 
     @override
     def serialize_italic(self, text: str, **kwargs: Any) -> str:
+        """Apply Markdown italic formatting to ``text``."""
         return f"*{text}*"
 
     @override
     def serialize_strikethrough(self, text: str, **kwargs: Any) -> str:
+        """Apply Markdown strikethrough formatting to ``text``."""
         return f"~~{text}~~"
 
     @override
@@ -111,13 +122,17 @@ def serialize_hyperlink(
         hyperlink: Union[AnyUrl, Path],
         **kwargs: Any,
     ) -> str:
+        """Render a Markdown hyperlink around ``text``.
+
+        Returns a ``[text](href)`` string with the provided URL/path.
+        """
         return f"[{text}]({str(hyperlink)})"
 
     @override
     def requires_page_break(self) -> bool:
         """Whether to add page breaks."""
         return False
-    
+
     @override
     def get_parts(
         self,
@@ -126,7 +141,8 @@ def get_parts(
     ) -> list[SerializationResult]:
         """Return a single part containing the document (or subtree) outline."""
         return self._create_document_outline(root=item, **kwargs)
-    #return [create_ser_result(text=outline, span_source=[])] if outline else []
+
+    # return [create_ser_result(text=outline, span_source=[])] if outline else []
 
     @override
     def serialize_doc(
@@ -135,9 +151,170 @@ def serialize_doc(
         parts: list[SerializationResult],
         **kwargs: Any,
     ) -> SerializationResult:
+        """Serialize a document summary from pre-rendered parts."""
         text_res = "\n\n".join([p.text for p in parts if p.text])
         return create_ser_result(text=text_res, span_source=parts)
 
+    # -------------------------
+    # Helper methods (internal)
+    # -------------------------
+
+    def _next_idx(
+        self, *, lbl: DocItemLabel, label_counter: dict[DocItemLabel, int]
+    ) -> int:
+        label_counter[lbl] = label_counter.get(lbl, 0) + 1
+        return label_counter[lbl]
+
+    def _include_label(
+        self, *, params: MarkdownSummaryParams, lbl: DocItemLabel
+    ) -> bool:
+        """Return True if label should be included (esp. for TOC mode)."""
+        if (
+            params.mode == MarkdownSummaryMode.TABLE_OF_CONTENTS
+            and lbl not in params.toc_labels
+        ):
+            return False
+        return True
+
+    def _is_node_excluded(
+        self,
+        *,
+        node: NodeItem,
+        excluded: set[str],
+        params: MarkdownSummaryParams,
+    ) -> bool:
+        """Centralize exclusion logic applied to nodes in the outline."""
+        if isinstance(node, DocItem):
+            if node.self_ref in excluded:
+                return True
+            if (
+                isinstance(node, TextItem)
+                and node.self_ref in self._captions_of_some_item
+            ):
+                return True
+            if not self._include_label(params=params, lbl=node.label):
+                return True
+        return False
+
+    def _compose_node_label(
+        self,
+        *,
+        node: NodeItem,
+        params: MarkdownSummaryParams,
+        label_counter: dict[DocItemLabel, int],
+    ) -> str:
+        """Compute the textual label for a node (without refs).
+
+        - When ``add_label_counter`` is True, add counters for non-table/picture
+          DocItems.
+        - Tables/pictures are numbered separately when building the final line.
+        - For groups, expose the raw normalized label but do not emit a line.
+        """
+        node_label = ""
+        if (
+            params.add_label_counter
+            and isinstance(node, DocItem)
+            and not isinstance(node, (TableItem, PictureItem))
+        ):
+            base = str(node.label).replace("_", "-")
+            lbl_cnt = self._next_idx(lbl=node.label, label_counter=label_counter)
+            node_label = f"{base} {lbl_cnt}"
+        elif isinstance(node, (DocItem, GroupItem)):
+            node_label = str(node.label).replace("_", "-")
+        return node_label
+
+    def _ref_part(self, *, node: NodeItem, params: MarkdownSummaryParams) -> str:
+        return f" (reference={node.self_ref})" if params.add_references else ""
+
+    def _strip_md_header_prefix(self, text: str) -> str:
+        stripped = text.lstrip()
+        while stripped.startswith("#"):
+            stripped = stripped.lstrip("#").lstrip()
+        return stripped
+
+    def _line_for_title(
+        self,
+        *,
+        node: TitleItem,
+        params: MarkdownSummaryParams,
+        node_label: str,
+        ref_part: str,
+    ) -> str:
+        raw_text = self.text_serializer.serialize(
+            item=node, doc_serializer=self, doc=self.doc
+        ).text
+        if params.use_markdown_headers:
+            text = raw_text.lstrip()
+            return f"{text}{ref_part}"
+        text = raw_text.lstrip().lstrip("# ") if raw_text.startswith("#") else raw_text
+        return (
+            f"{node_label}{ref_part}: {text}"
+            if params.add_references
+            else f"{node_label}: {text}"
+        )
+
+    def _line_for_section_header(
+        self,
+        *,
+        node: SectionHeaderItem,
+        params: MarkdownSummaryParams,
+        node_label: str,
+    ) -> str:
+        raw_text = self.text_serializer.serialize(
+            item=node, doc_serializer=self, doc=self.doc
+        ).text
+        if params.use_markdown_headers:
+            text = raw_text.lstrip()
+            if params.add_references:
+                return f"{text} (level={node.level}, reference={node.self_ref})"
+            return f"{text} (level={node.level})"
+        stripped = self._strip_md_header_prefix(raw_text)
+        if params.add_references:
+            return f"{node_label} (level={node.level}, reference={node.self_ref}): {stripped}"
+        return f"{node_label} (level={node.level}): {stripped}"
+
+    def _line_for_simple_label(self, *, node_label: str, ref_part: str) -> str:
+        return f"{node_label}{ref_part}"
+
+    def _line_for_table(
+        self, *, node_label: str, ref_part: str, label_counter: dict[DocItemLabel, int]
+    ) -> str:
+        lbl_cnt = self._next_idx(lbl=DocItemLabel.TABLE, label_counter=label_counter)
+        return f"{node_label} {lbl_cnt}{ref_part}"
+
+    def _line_for_picture(
+        self, *, node_label: str, ref_part: str, label_counter: dict[DocItemLabel, int]
+    ) -> str:
+        lbl_cnt = self._next_idx(lbl=DocItemLabel.PICTURE, label_counter=label_counter)
+        return f"{node_label} {lbl_cnt}{ref_part}"
+
+    def _get_summary(self, *, node: NodeItem, params: MarkdownSummaryParams) -> str:
+        if (
+            params.add_summary
+            and (node.summary is not None)
+            and isinstance(node.summary, str)
+        ):
+            return node.summary
+        return ""
+
+    def _indent_line(
+        self,
+        *,
+        line: str,
+        node: NodeItem,
+        current_section_level: int,
+        params: MarkdownSummaryParams,
+    ) -> str:
+        if not line:
+            return line
+        if not params.indent_by_section_level:
+            return line
+        indent_level = (
+            node.level if isinstance(node, SectionHeaderItem) else current_section_level
+        )
+        indent = " " * (params.indent_size * indent_level)
+        return f"{indent}{line}"
+
     def _create_document_outline(
         self,
         *,
@@ -148,145 +325,68 @@ def _create_document_outline(
         params = self.params.merge_with_patch(patch=kwargs)
         excluded = self.get_excluded_refs(**kwargs)
 
-        # Per-label counters; used consistently when params.add_label_counter is True
-        # and always for table/picture numbering.
         label_counter: dict[DocItemLabel, int] = {}
-        lines: list[str] = []
         visited: set[str] = set()
-
         result: list[SerializationResult] = []
-
-        # Track latest section header level for indentation
         current_section_level: int = 0
 
-        # Helper to increment and fetch the counter for a given label
-        def _next_idx(lbl: DocItemLabel) -> int:
-            label_counter[lbl] = label_counter.get(lbl, 0) + 1
-            return label_counter[lbl]
-        
-        # Helper to identify if the label should be included in the table-of-contents
-        def _include(lbl: DocItemLabel) -> int:
-            if params.mode==MarkdownSummaryMode.TABLE_OF_CONTENTS and \
-               (lbl not in params.toc_labels):
-                return False
-
-            return True
-        
-        # Iterate depth-first with groups, similar to MarkdownSerializer
-        for node, level in self.doc.iterate_items(root=root, with_groups=True):
+        for node, _level in self.doc.iterate_items(root=root, with_groups=True):
             if node.self_ref in visited:
                 continue
-            
             visited.add(node.self_ref)
 
-            if not _include(lbl=node.label):
-                continue
-            
-            summary = ""
-            if params.add_summary and \
-               (node.summary is not None) and \
-               isinstance(node.summary, str):
-                summary = node.summary
-            
             # Skip list items in outline
             if isinstance(node, ListItem):
                 continue
 
-            # Respect excluded refs
-            if isinstance(node, DocItem):
-                if node.self_ref in excluded:
-                    continue
-                if isinstance(node, TextItem) and node.self_ref in self._captions_of_some_item:
-                    continue
-
-            line:str = ""
+            # Respect exclusion logic
+            if self._is_node_excluded(node=node, excluded=excluded, params=params):
+                continue
 
-            # Base label string (normalize underscores to hyphens)
-            node_label = str(node.label).replace("_", "-")
-            if params.add_label_counter and not isinstance(node, (TableItem, PictureItem)):
-                # Apply generic counters to non-table/picture items
-                node_label = f"{node_label} {_next_idx(node.label)}"
+            summary = self._get_summary(node=node, params=params)
+            node_label = self._compose_node_label(
+                node=node, params=params, label_counter=label_counter
+            )
+            ref_part = self._ref_part(node=node, params=params)
 
-            # Build optional reference snippet only when enabled
-            ref_part = f" (reference={node.self_ref})" if params.add_references else ""
-                
+            line = ""
             if isinstance(node, TitleItem):
-
-                raw_text = self.text_serializer.serialize(
-                    item=node, doc_serializer=self, doc=self.doc
-                ).text
-
-                if params.use_markdown_headers:
-                    # raw_text already includes the heading marker
-                    text = raw_text.lstrip()
-                    line = f"{text}{ref_part}"
-                else:
-                    # strip leading markdown header markers for verbose representation
-                    text = raw_text.lstrip().lstrip("# ") if raw_text.startswith("#") else raw_text
-                    if params.add_references:
-                        line = f"{node_label}{ref_part}: {text}"
-                    else:
-                        line = f"{node_label}: {text}"
-
+                line = self._line_for_title(
+                    node=node, params=params, node_label=node_label, ref_part=ref_part
+                )
             elif isinstance(node, SectionHeaderItem):
-
-                raw_text = self.text_serializer.serialize(
-                    item=node, doc_serializer=self, doc=self.doc
-                ).text
-
-                if params.use_markdown_headers:
-                    # raw_text already includes the correct number of '#'
-                    text = raw_text.lstrip()
-                    if params.add_references:
-                        line = f"{text} (level={node.level}, reference={node.self_ref})"
-                    else:
-                        line = f"{text} (level={node.level})"
-                else:
-                    # strip leading markdown header markers for verbose representation
-                    stripped = raw_text.lstrip()
-                    while stripped.startswith("#"):
-                        stripped = stripped.lstrip("#").lstrip()
-                    text = stripped
-                    if params.add_references:
-                        line = f"{node_label} (level={node.level}, reference={node.self_ref}): {text}"
-                    else:
-                        line = f"{node_label} (level={node.level}): {text}"
-
-                # Update current section level for subsequent items
+                line = self._line_for_section_header(
+                    node=node, params=params, node_label=node_label
+                )
                 current_section_level = node.level
-
             elif isinstance(node, ListGroup):
-                # Skip listing list groups in summary to avoid leading list noise
-                line = ""
-                    
-            elif isinstance(node, TextItem):
-                line = f"{node_label}{ref_part}"
-
-            elif isinstance(node, FormItem):
-                line = f"{node_label}{ref_part}"
-
-            elif isinstance(node, CodeItem):
-                line = f"{node_label}{ref_part}"
-                
+                line = ""  # intentionally skip
+            elif isinstance(node, (TextItem, FormItem, CodeItem)):
+                line = self._line_for_simple_label(
+                    node_label=node_label, ref_part=ref_part
+                )
             elif isinstance(node, TableItem):
-                # Tables are always numbered in the summary
-                line = f"{node_label} {_next_idx(DocItemLabel.TABLE)}{ref_part}"
-                
+                line = self._line_for_table(
+                    node_label=node_label,
+                    ref_part=ref_part,
+                    label_counter=label_counter,
+                )
             elif isinstance(node, PictureItem):
-                # Pictures are always numbered in the summary
-                line = f"{node_label} {_next_idx(DocItemLabel.PICTURE)}{ref_part}"
+                line = self._line_for_picture(
+                    node_label=node_label,
+                    ref_part=ref_part,
+                    label_counter=label_counter,
+                )
 
-            if len(summary)>0:
-                line += f" (summary={summary})"
+            if summary:
+                line = f"{line} (summary={summary})" if line else line
 
-            # Apply indentation based on latest section level if enabled
-            if params.indent_by_section_level:
-                indent_level = current_section_level
-                # For a section-header, indent by its own level
-                if isinstance(node, SectionHeaderItem):
-                    indent_level = node.level
-                indent = " " * (params.indent_size * indent_level)
-                line = f"{indent}{line}" if line else line
+            line = self._indent_line(
+                line=line,
+                node=node,
+                current_section_level=current_section_level,
+                params=params,
+            )
 
             if line:
                 result.append(
@@ -295,5 +395,5 @@ def _include(lbl: DocItemLabel) -> int:
                         span_source=node if isinstance(node, DocItem) else [],
                     )
                 )
-            
+
         return result
diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
index 32d7c533..7ad20a0a 100644
--- a/docling_core/types/doc/document.py
+++ b/docling_core/types/doc/document.py
@@ -952,8 +952,10 @@ class NodeItem(BaseModel):
 
     model_config = ConfigDict(extra="forbid")
 
-    summary: Optional[str] = Field(default=None, exclude=True)  # optional, not serialized
-    
+    summary: Optional[str] = Field(
+        default=None, exclude=True
+    )  # optional, not serialized
+
     def get_ref(self) -> RefItem:
         """get_ref."""
         return RefItem(cref=self.self_ref)
diff --git a/test/test_markdown_summary.py b/test/test_markdown_summary.py
index 372ff6c6..ddeb6e74 100644
--- a/test/test_markdown_summary.py
+++ b/test/test_markdown_summary.py
@@ -10,9 +10,8 @@
     MarkdownSummarySerializer,
 )
 
-from .test_docling_doc import _construct_doc
-
 from .test_data_gen_flag import GEN_TEST_DATA
+from .test_docling_doc import _construct_doc
 
 
 def verify(exp_file: Path, actual: str):
@@ -24,6 +23,7 @@ def verify(exp_file: Path, actual: str):
             expected = f.read().rstrip()
         assert expected == actual
 
+
 @pytest.mark.parametrize(
     "mode",
     [
@@ -58,6 +58,7 @@ def test_markdown_summary_outline(
     )
     verify(exp_file=exp_path, actual=outline)
 
+
 @pytest.mark.parametrize("use_md_headers", [False, True])
 def test_markdown_summary_indentation(use_md_headers: bool):
     # Build a representative document

From 01bb9ee733f478d56ac64d62a627253c6b6f135e Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Tue, 23 Sep 2025 15:35:22 +0200
Subject: [PATCH 07/14] fixing some gt-data

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docling_core/types/doc/document.py                |  4 +---
 test/data/docling_document/unit/CodeItem.yaml     | 15 ++++++++-------
 test/data/docling_document/unit/FloatingItem.yaml |  3 ++-
 test/data/docling_document/unit/FormItem.yaml     |  3 ++-
 test/data/docling_document/unit/FormulaItem.yaml  |  9 +++++----
 test/data/docling_document/unit/KeyValueItem.yaml |  3 ++-
 test/data/docling_document/unit/ListItem.yaml     |  9 +++++----
 test/data/docling_document/unit/PictureItem.yaml  |  3 ++-
 .../docling_document/unit/SectionHeaderItem.yaml  |  9 +++++----
 test/data/docling_document/unit/TableItem.yaml    |  5 +++--
 test/data/docling_document/unit/TextItem.yaml     |  9 +++++----
 test/data/docling_document/unit/TitleItem.yaml    |  9 +++++----
 test/test_docling_doc.py                          | 11 ++++++-----
 test/test_json_schema_to_search_mapper.py         |  2 +-
 test/test_otsl_table_export.py                    |  6 +++---
 15 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
index 7ad20a0a..51237db6 100644
--- a/docling_core/types/doc/document.py
+++ b/docling_core/types/doc/document.py
@@ -952,9 +952,7 @@ class NodeItem(BaseModel):
 
     model_config = ConfigDict(extra="forbid")
 
-    summary: Optional[str] = Field(
-        default=None, exclude=True
-    )  # optional, not serialized
+    summary: Optional[str] = None  # serialized only when not None
 
     def get_ref(self) -> RefItem:
         """get_ref."""
diff --git a/test/data/docling_document/unit/CodeItem.yaml b/test/data/docling_document/unit/CodeItem.yaml
index 09995640..9d0aee3d 100644
--- a/test/data/docling_document/unit/CodeItem.yaml
+++ b/test/data/docling_document/unit/CodeItem.yaml
@@ -1,15 +1,16 @@
-children: []
 captions: []
-footnotes: []
-references: []
-image: null
+children: []
 code_language: Python
 content_layer: body
+footnotes: []
+formatting: null
+hyperlink: null
+image: null
 label: code
 orig: whatever
 parent: null
 prov: []
+references: []
 self_ref: '#'
-text: print(Hello World!)
-formatting: null
-hyperlink: null
+summary: null
+text: print(Hello World!)
\ No newline at end of file
diff --git a/test/data/docling_document/unit/FloatingItem.yaml b/test/data/docling_document/unit/FloatingItem.yaml
index 21beef40..2338cc35 100644
--- a/test/data/docling_document/unit/FloatingItem.yaml
+++ b/test/data/docling_document/unit/FloatingItem.yaml
@@ -1,5 +1,6 @@
 captions: []
 children: []
+content_layer: body
 footnotes: []
 image: null
 label: text
@@ -7,4 +8,4 @@ parent: null
 prov: []
 references: []
 self_ref: '#'
-content_layer: body
\ No newline at end of file
+summary: null
\ No newline at end of file
diff --git a/test/data/docling_document/unit/FormItem.yaml b/test/data/docling_document/unit/FormItem.yaml
index af7a61e1..39818d6d 100644
--- a/test/data/docling_document/unit/FormItem.yaml
+++ b/test/data/docling_document/unit/FormItem.yaml
@@ -28,4 +28,5 @@ label: form
 parent: null
 prov: []
 references: []
-self_ref: '#'
\ No newline at end of file
+self_ref: '#'
+summary: null
\ No newline at end of file
diff --git a/test/data/docling_document/unit/FormulaItem.yaml b/test/data/docling_document/unit/FormulaItem.yaml
index 25057908..680b8acb 100644
--- a/test/data/docling_document/unit/FormulaItem.yaml
+++ b/test/data/docling_document/unit/FormulaItem.yaml
@@ -1,10 +1,11 @@
 children: []
+content_layer: body
+formatting: null
+hyperlink: null
 label: formula
 orig: whatever
 parent: null
 prov: []
 self_ref: '#'
-text: E=mc^2
-content_layer: body
-formatting: null
-hyperlink: null
+summary: null
+text: E=mc^2
\ No newline at end of file
diff --git a/test/data/docling_document/unit/KeyValueItem.yaml b/test/data/docling_document/unit/KeyValueItem.yaml
index 219e951e..09a31ed7 100644
--- a/test/data/docling_document/unit/KeyValueItem.yaml
+++ b/test/data/docling_document/unit/KeyValueItem.yaml
@@ -28,4 +28,5 @@ label: key_value_region
 parent: null
 prov: []
 references: []
-self_ref: '#'
\ No newline at end of file
+self_ref: '#'
+summary: null
\ No newline at end of file
diff --git a/test/data/docling_document/unit/ListItem.yaml b/test/data/docling_document/unit/ListItem.yaml
index 20d8de90..ebcc755a 100644
--- a/test/data/docling_document/unit/ListItem.yaml
+++ b/test/data/docling_document/unit/ListItem.yaml
@@ -1,12 +1,13 @@
 children: []
+content_layer: body
 enumerated: true
+formatting: null
+hyperlink: null
 label: list_item
 marker: (1)
 orig: whatever
 parent: null
 prov: []
 self_ref: '#'
-text: whatever
-content_layer: body
-formatting: null
-hyperlink: null
+summary: null
+text: whatever
\ No newline at end of file
diff --git a/test/data/docling_document/unit/PictureItem.yaml b/test/data/docling_document/unit/PictureItem.yaml
index ffe342a6..f4f07d3e 100644
--- a/test/data/docling_document/unit/PictureItem.yaml
+++ b/test/data/docling_document/unit/PictureItem.yaml
@@ -1,6 +1,7 @@
 annotations: []
 captions: []
 children: []
+content_layer: body
 footnotes: []
 image: null
 label: picture
@@ -8,4 +9,4 @@ parent: null
 prov: []
 references: []
 self_ref: '#'
-content_layer: body
\ No newline at end of file
+summary: null
\ No newline at end of file
diff --git a/test/data/docling_document/unit/SectionHeaderItem.yaml b/test/data/docling_document/unit/SectionHeaderItem.yaml
index 68f641f9..67f662e6 100644
--- a/test/data/docling_document/unit/SectionHeaderItem.yaml
+++ b/test/data/docling_document/unit/SectionHeaderItem.yaml
@@ -1,11 +1,12 @@
 children: []
+content_layer: body
+formatting: null
+hyperlink: null
 label: section_header
 level: 2
 orig: whatever
 parent: null
 prov: []
 self_ref: '#'
-text: whatever
-content_layer: body
-formatting: null
-hyperlink: null
+summary: null
+text: whatever
\ No newline at end of file
diff --git a/test/data/docling_document/unit/TableItem.yaml b/test/data/docling_document/unit/TableItem.yaml
index ae08e00e..778cb312 100644
--- a/test/data/docling_document/unit/TableItem.yaml
+++ b/test/data/docling_document/unit/TableItem.yaml
@@ -1,5 +1,7 @@
+annotations: []
 captions: []
 children: []
+content_layer: body
 data:
   grid:
   - - bbox: null
@@ -192,5 +194,4 @@ parent: null
 prov: []
 references: []
 self_ref: '#'
-content_layer: body
-annotations: []
+summary: null
\ No newline at end of file
diff --git a/test/data/docling_document/unit/TextItem.yaml b/test/data/docling_document/unit/TextItem.yaml
index 1f72637a..ee8247eb 100644
--- a/test/data/docling_document/unit/TextItem.yaml
+++ b/test/data/docling_document/unit/TextItem.yaml
@@ -1,10 +1,11 @@
 children: []
+content_layer: body
+formatting: null
+hyperlink: null
 label: text
 orig: whatever
 parent: null
 prov: []
 self_ref: '#'
-text: whatever
-content_layer: body
-formatting: null
-hyperlink: null
+summary: null
+text: whatever
\ No newline at end of file
diff --git a/test/data/docling_document/unit/TitleItem.yaml b/test/data/docling_document/unit/TitleItem.yaml
index 8e2a3dea..02d61247 100644
--- a/test/data/docling_document/unit/TitleItem.yaml
+++ b/test/data/docling_document/unit/TitleItem.yaml
@@ -1,10 +1,11 @@
 children: []
+content_layer: body
+formatting: null
+hyperlink: null
 label: title
 orig: whatever
 parent: null
 prov: []
 self_ref: '#'
-text: whatever
-content_layer: body
-formatting: null
-hyperlink: null
+summary: null
+text: whatever
\ No newline at end of file
diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py
index d5ddb4dc..c60d9894 100644
--- a/test/test_docling_doc.py
+++ b/test/test_docling_doc.py
@@ -415,14 +415,15 @@ def read(name: str):
     def verify(dc, obj):
         pred = serialise(obj).strip()
 
-        if dc is KeyValueItem or dc is FormItem:
-            write(dc.__name__, pred)
+        # if dc is KeyValueItem or dc is FormItem or dc is TextItem:
+        #    write(dc.__name__, pred)
 
         pred = yaml.safe_load(pred)
-
         # print(f"\t{dc.__name__}:\n {pred}")
-        gold = read(dc.__name__)
 
+        gold = read(dc.__name__)
+        # print(f"\t{dc.__name__}:\n {gold}")
+        
         assert pred == gold, f"pred!=gold for {dc.__name__}"
 
     # Iterate over the derived classes of the BaseClass
@@ -727,7 +728,7 @@ def _test_export_methods(
         second_page = first_page + 1
         if second_page in doc.pages:  # Only test if document has at least 2 pages
             dt_pages_pred = doc.export_to_doctags(pages={first_page, second_page})
-            print(dt_pages_pred)
+            # print(dt_pages_pred)
             _verify_regression_test(dt_pages_pred, filename=filename, ext="pages.dt")
 
     # Test Tables export ...
diff --git a/test/test_json_schema_to_search_mapper.py b/test/test_json_schema_to_search_mapper.py
index e52984d4..2973fa67 100644
--- a/test/test_json_schema_to_search_mapper.py
+++ b/test/test_json_schema_to_search_mapper.py
@@ -60,7 +60,7 @@ def test_json_schema_to_search_mapper_0():
 def test_json_schema_to_search_mapper_1():
     """Test the class JsonSchemaToSearchMapper."""
     s = Record.model_json_schema()
-    print(json.dumps(s, indent=2))
+    # print(json.dumps(s, indent=2))
 
     _meta = {
         "aliases": [".production", "ccc"],
diff --git a/test/test_otsl_table_export.py b/test/test_otsl_table_export.py
index 4b3534f3..c6b4e6b3 100644
--- a/test/test_otsl_table_export.py
+++ b/test/test_otsl_table_export.py
@@ -275,9 +275,9 @@ def test_table_export_to_otsl():
         add_cell_location=False, add_cell_text=False, doc=doc
     )
     print_friendly = otsl_string.split("<nl>")
-    print("OTSL out:")
-    for s in print_friendly:
-        print(s)
+    # print("OTSL out:")
+    # for s in print_friendly:
+    #     print(s)
     assert (
         otsl_string
         == "<rhed><lcel><rhed><fcel><xcel><xcel><nl><rhed><fcel><fcel><xcel><xcel><xcel><nl><rhed><fcel><fcel><fcel><ecel><ecel><nl><ucel><fcel><fcel><fcel><fcel><fcel><nl><srow><lcel><lcel><lcel><lcel><lcel><nl>"

From 830ba5c3f40c594ba31466360694ffed3445ce49 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Tue, 23 Sep 2025 15:37:44 +0200
Subject: [PATCH 08/14] refactord the code

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docs/DoclingDocument.json      | 158 +++++++++++++++++++++++++++++++++
 test/test_docling_doc.py       |   2 +-
 test/test_otsl_table_export.py |   2 +-
 3 files changed, 160 insertions(+), 2 deletions(-)

diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json
index 305f5a9b..96b84fe9 100644
--- a/docs/DoclingDocument.json
+++ b/docs/DoclingDocument.json
@@ -194,6 +194,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "code",
           "default": "code",
@@ -475,6 +487,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "form",
           "default": "form",
@@ -598,6 +622,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "formula",
           "default": "formula",
@@ -807,6 +843,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -912,6 +960,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -962,6 +1022,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "key_value_region",
           "default": "key_value_region",
@@ -1054,6 +1126,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -1104,6 +1188,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "list_item",
           "default": "list_item",
@@ -1341,6 +1437,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "default": "picture",
           "enum": [
@@ -1842,6 +1950,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "section_header",
           "default": "section_header",
@@ -2065,6 +2185,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "default": "table",
           "enum": [
@@ -2182,6 +2314,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "enum": [
             "caption",
@@ -2285,6 +2429,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "title",
           "default": "title",
@@ -2382,6 +2538,7 @@
         "parent": null,
         "children": [],
         "content_layer": "furniture",
+        "summary": null,
         "name": "_root_",
         "label": "unspecified"
       },
@@ -2394,6 +2551,7 @@
         "parent": null,
         "children": [],
         "content_layer": "body",
+        "summary": null,
         "name": "_root_",
         "label": "unspecified"
       }
diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py
index c60d9894..6f8fbd99 100644
--- a/test/test_docling_doc.py
+++ b/test/test_docling_doc.py
@@ -423,7 +423,7 @@ def verify(dc, obj):
 
         gold = read(dc.__name__)
         # print(f"\t{dc.__name__}:\n {gold}")
-        
+
         assert pred == gold, f"pred!=gold for {dc.__name__}"
 
     # Iterate over the derived classes of the BaseClass
diff --git a/test/test_otsl_table_export.py b/test/test_otsl_table_export.py
index c6b4e6b3..dde0744c 100644
--- a/test/test_otsl_table_export.py
+++ b/test/test_otsl_table_export.py
@@ -274,7 +274,7 @@ def test_table_export_to_otsl():
     otsl_string = doc.tables[0].export_to_otsl(
         add_cell_location=False, add_cell_text=False, doc=doc
     )
-    print_friendly = otsl_string.split("<nl>")
+    otsl_string.split("<nl>")
     # print("OTSL out:")
     # for s in print_friendly:
     #     print(s)

From f376c01e2a4481babcbb8fbc1303831d8325b4f6 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Wed, 24 Sep 2025 09:43:41 +0200
Subject: [PATCH 09/14] refactored the markdown summary in experimental for now

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docling_core/experimental/serializer/__init__.py            | 5 +++++
 .../serializer/markdown_summary.py                          | 0
 test/test_doc_schema.py                                     | 6 +++---
 test/test_markdown_summary.py                               | 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)
 create mode 100644 docling_core/experimental/serializer/__init__.py
 rename docling_core/{transforms => experimental}/serializer/markdown_summary.py (100%)

diff --git a/docling_core/experimental/serializer/__init__.py b/docling_core/experimental/serializer/__init__.py
new file mode 100644
index 00000000..5c450a0e
--- /dev/null
+++ b/docling_core/experimental/serializer/__init__.py
@@ -0,0 +1,5 @@
+"""Experimental serializers for docling-core.
+
+This package contains experimental serialization utilities (e.g., Markdown
+summaries) that may change without notice.
+"""
diff --git a/docling_core/transforms/serializer/markdown_summary.py b/docling_core/experimental/serializer/markdown_summary.py
similarity index 100%
rename from docling_core/transforms/serializer/markdown_summary.py
rename to docling_core/experimental/serializer/markdown_summary.py
diff --git a/test/test_doc_schema.py b/test/test_doc_schema.py
index 109e7c88..acc560b4 100644
--- a/test/test_doc_schema.py
+++ b/test/test_doc_schema.py
@@ -35,8 +35,8 @@ def test_ccs_document():
             # try as well as dictionary
             doc = json.loads(file_json)
             CCSDocument.model_validate(doc)
-        except ValidationError as e:
-            print(f"Validation error in file {filename}:\n{e.json()}")
+        except ValidationError:
+            # print(f"Validation error in file {filename}:\n{e.json()}")
             raise
 
     # check doc-error-1 is invalid in logs
@@ -47,7 +47,7 @@ def test_ccs_document():
         assert False, f"Data in file {filename} should be invalid for CCSDocument model"
     except ValidationError as e:
         for error in e.errors():
-            print(type(error))
+            # print(type(error))
             assert all(
                 item in error["loc"] for item in ("description", "logs")
             ), f"Data in file {filename} should fail in logs"
diff --git a/test/test_markdown_summary.py b/test/test_markdown_summary.py
index ddeb6e74..d3f5b4b3 100644
--- a/test/test_markdown_summary.py
+++ b/test/test_markdown_summary.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from docling_core.transforms.serializer.markdown_summary import (
+from docling_core.experimental.serializer.markdown_summary import (
     MarkdownSummaryMode,
     MarkdownSummaryParams,
     MarkdownSummarySerializer,

From 93a341c17d9af1ed0cd5db0fb78a0abd8c074bb5 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Wed, 24 Sep 2025 09:57:01 +0200
Subject: [PATCH 10/14] refactored the code to make it inherit from
 MarkdownSerializer

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 .../serializer/markdown_summary.py            | 104 +++---------------
 1 file changed, 15 insertions(+), 89 deletions(-)

diff --git a/docling_core/experimental/serializer/markdown_summary.py b/docling_core/experimental/serializer/markdown_summary.py
index 60403441..e18e2a74 100644
--- a/docling_core/experimental/serializer/markdown_summary.py
+++ b/docling_core/experimental/serializer/markdown_summary.py
@@ -5,39 +5,15 @@
 """
 
 from enum import Enum
-from pathlib import Path
-from typing import Any, Optional, Union
+from typing import Any, Optional
 
-from pydantic import AnyUrl
 from typing_extensions import override
 
-from docling_core.transforms.serializer.base import (
-    BaseAnnotationSerializer,
-    BaseFallbackSerializer,
-    BaseFormSerializer,
-    BaseInlineSerializer,
-    BaseKeyValueSerializer,
-    BaseListSerializer,
-    BasePictureSerializer,
-    BaseTableSerializer,
-    BaseTextSerializer,
-    SerializationResult,
-)
-from docling_core.transforms.serializer.common import (
-    CommonParams,
-    DocSerializer,
-    create_ser_result,
-)
+from docling_core.transforms.serializer.base import SerializationResult
+from docling_core.transforms.serializer.common import create_ser_result
 from docling_core.transforms.serializer.markdown import (
-    MarkdownAnnotationSerializer,
-    MarkdownFallbackSerializer,
-    MarkdownFormSerializer,
-    MarkdownInlineSerializer,
-    MarkdownKeyValueSerializer,
-    MarkdownListSerializer,
-    MarkdownPictureSerializer,
-    MarkdownTableSerializer,
-    MarkdownTextSerializer,
+    MarkdownDocSerializer,
+    MarkdownParams,
 )
 from docling_core.types.doc import (
     CodeItem,
@@ -63,8 +39,11 @@ class MarkdownSummaryMode(str, Enum):
     TABLE_OF_CONTENTS = "table_of_contents"
 
 
-class MarkdownSummaryParams(CommonParams):
-    """Markdown-specific serialization parameters for outline."""
+class MarkdownSummaryParams(MarkdownParams):
+    """Markdown-specific serialization parameters for outline.
+
+    Inherits MarkdownParams to retain Markdown behaviors (escaping, links, etc.).
+    """
 
     mode: MarkdownSummaryMode = MarkdownSummaryMode.OUTLINE
 
@@ -82,57 +61,15 @@ class MarkdownSummaryParams(CommonParams):
     toc_labels: list[DocItemLabel] = [DocItemLabel.TITLE, DocItemLabel.SECTION_HEADER]
 
 
-class MarkdownSummarySerializer(DocSerializer):
-    """Markdown-specific document summary serializer."""
-
-    # Provide required serializer attributes to satisfy DocSerializer’s model
-    text_serializer: BaseTextSerializer = MarkdownTextSerializer()
-    table_serializer: BaseTableSerializer = MarkdownTableSerializer()
-    picture_serializer: BasePictureSerializer = MarkdownPictureSerializer()
-    key_value_serializer: BaseKeyValueSerializer = MarkdownKeyValueSerializer()
-    form_serializer: BaseFormSerializer = MarkdownFormSerializer()
-    fallback_serializer: BaseFallbackSerializer = MarkdownFallbackSerializer()
+class MarkdownSummarySerializer(MarkdownDocSerializer):
+    """Markdown-specific document summary serializer.
 
-    list_serializer: BaseListSerializer = MarkdownListSerializer()
-    inline_serializer: BaseInlineSerializer = MarkdownInlineSerializer()
-
-    annotation_serializer: BaseAnnotationSerializer = MarkdownAnnotationSerializer()
+    Inherits MarkdownDocSerializer to reuse Markdown formatting/post-processing
+    and sub-serializers; overrides only the parts selection logic.
+    """
 
     params: MarkdownSummaryParams = MarkdownSummaryParams()
 
-    @override
-    def serialize_bold(self, text: str, **kwargs: Any) -> str:
-        """Apply Markdown bold formatting to ``text``."""
-        return f"**{text}**"
-
-    @override
-    def serialize_italic(self, text: str, **kwargs: Any) -> str:
-        """Apply Markdown italic formatting to ``text``."""
-        return f"*{text}*"
-
-    @override
-    def serialize_strikethrough(self, text: str, **kwargs: Any) -> str:
-        """Apply Markdown strikethrough formatting to ``text``."""
-        return f"~~{text}~~"
-
-    @override
-    def serialize_hyperlink(
-        self,
-        text: str,
-        hyperlink: Union[AnyUrl, Path],
-        **kwargs: Any,
-    ) -> str:
-        """Render a Markdown hyperlink around ``text``.
-
-        Returns a ``[text](href)`` string with the provided URL/path.
-        """
-        return f"[{text}]({str(hyperlink)})"
-
-    @override
-    def requires_page_break(self) -> bool:
-        """Whether to add page breaks."""
-        return False
-
     @override
     def get_parts(
         self,
@@ -144,17 +81,6 @@ def get_parts(
 
     # return [create_ser_result(text=outline, span_source=[])] if outline else []
 
-    @override
-    def serialize_doc(
-        self,
-        *,
-        parts: list[SerializationResult],
-        **kwargs: Any,
-    ) -> SerializationResult:
-        """Serialize a document summary from pre-rendered parts."""
-        text_res = "\n\n".join([p.text for p in parts if p.text])
-        return create_ser_result(text=text_res, span_source=parts)
-
     # -------------------------
     # Helper methods (internal)
     # -------------------------

From 99a041f3a2120389386ab46d3b1e3c413a1ed6d0 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Wed, 24 Sep 2025 11:28:45 +0200
Subject: [PATCH 11/14] updated the tests without indent

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 .../serializer/markdown_summary.py            | 31 ----------
 ...ndent_mdhdr_false_indent_true_size_2.gt.md | 59 -------------------
 ...indent_mdhdr_true_indent_true_size_2.gt.md | 59 -------------------
 ...nstructed_mdsum_outline_mdhdr_false.gt.md} |  0
 ...dsum_outline_mdhdr_false_indent_true.gt.md | 59 -------------------
 ...onstructed_mdsum_outline_mdhdr_true.gt.md} |  0
 ...mdsum_outline_mdhdr_true_indent_true.gt.md | 59 -------------------
 ...mdsum_table_of_contents_mdhdr_false.gt.md} |  0
 ..._of_contents_mdhdr_false_indent_true.gt.md |  3 -
 ..._mdsum_table_of_contents_mdhdr_true.gt.md} |  0
 ...e_of_contents_mdhdr_true_indent_true.gt.md |  3 -
 test/test_markdown_summary.py                 | 30 +---------
 12 files changed, 2 insertions(+), 301 deletions(-)
 delete mode 100644 test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md
 delete mode 100644 test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md
 rename test/data/doc/{constructed_mdsum_outline_mdhdr_false_indent_false.gt.md => constructed_mdsum_outline_mdhdr_false.gt.md} (100%)
 delete mode 100644 test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md
 rename test/data/doc/{constructed_mdsum_outline_mdhdr_true_indent_false.gt.md => constructed_mdsum_outline_mdhdr_true.gt.md} (100%)
 delete mode 100644 test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md
 rename test/data/doc/{constructed_mdsum_table_of_contents_mdhdr_false_indent_false.gt.md => constructed_mdsum_table_of_contents_mdhdr_false.gt.md} (100%)
 delete mode 100644 test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md
 rename test/data/doc/{constructed_mdsum_table_of_contents_mdhdr_true_indent_false.gt.md => constructed_mdsum_table_of_contents_mdhdr_true.gt.md} (100%)
 delete mode 100644 test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md

diff --git a/docling_core/experimental/serializer/markdown_summary.py b/docling_core/experimental/serializer/markdown_summary.py
index e18e2a74..4667ce46 100644
--- a/docling_core/experimental/serializer/markdown_summary.py
+++ b/docling_core/experimental/serializer/markdown_summary.py
@@ -53,10 +53,6 @@ class MarkdownSummaryParams(MarkdownParams):
     add_references: bool = True
     add_summary: bool = True
 
-    # Indentation control: when enabled, indent each line according to
-    # the latest encountered section-header level (title treated as level 0).
-    indent_by_section_level: bool = False
-    indent_size: int = 2
 
     toc_labels: list[DocItemLabel] = [DocItemLabel.TITLE, DocItemLabel.SECTION_HEADER]
 
@@ -223,24 +219,6 @@ def _get_summary(self, *, node: NodeItem, params: MarkdownSummaryParams) -> str:
             return node.summary
         return ""
 
-    def _indent_line(
-        self,
-        *,
-        line: str,
-        node: NodeItem,
-        current_section_level: int,
-        params: MarkdownSummaryParams,
-    ) -> str:
-        if not line:
-            return line
-        if not params.indent_by_section_level:
-            return line
-        indent_level = (
-            node.level if isinstance(node, SectionHeaderItem) else current_section_level
-        )
-        indent = " " * (params.indent_size * indent_level)
-        return f"{indent}{line}"
-
     def _create_document_outline(
         self,
         *,
@@ -254,7 +232,6 @@ def _create_document_outline(
         label_counter: dict[DocItemLabel, int] = {}
         visited: set[str] = set()
         result: list[SerializationResult] = []
-        current_section_level: int = 0
 
         for node, _level in self.doc.iterate_items(root=root, with_groups=True):
             if node.self_ref in visited:
@@ -284,7 +261,6 @@ def _create_document_outline(
                 line = self._line_for_section_header(
                     node=node, params=params, node_label=node_label
                 )
-                current_section_level = node.level
             elif isinstance(node, ListGroup):
                 line = ""  # intentionally skip
             elif isinstance(node, (TextItem, FormItem, CodeItem)):
@@ -307,13 +283,6 @@ def _create_document_outline(
             if summary:
                 line = f"{line} (summary={summary})" if line else line
 
-            line = self._indent_line(
-                line=line,
-                node=node,
-                current_section_level=current_section_level,
-                params=params,
-            )
-
             if line:
                 result.append(
                     create_ser_result(
diff --git a/test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md b/test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md
deleted file mode 100644
index da6a4cb7..00000000
--- a/test/data/doc/constructed_mdsum_indent_mdhdr_false_indent_true_size_2.gt.md
+++ /dev/null
@@ -1,59 +0,0 @@
-title (reference=#/texts/1): Title of the Document
-
-text (reference=#/texts/2)
-
-text (reference=#/texts/3)
-
-  section-header (level=1, reference=#/texts/4): 1. Introduction
-
-  text (reference=#/texts/5)
-
-  table 1 (reference=#/tables/0)
-
-  picture 1 (reference=#/pictures/0)
-
-  picture 2 (reference=#/pictures/1)
-
-  text (reference=#/texts/24)
-
-  code (reference=#/texts/25)
-
-  text (reference=#/texts/26)
-
-  text (reference=#/texts/28)
-
-  formula (reference=#/texts/29)
-
-  text (reference=#/texts/30)
-
-  text (reference=#/texts/31)
-
-  code (reference=#/texts/32)
-
-  text (reference=#/texts/33)
-
-  formula (reference=#/texts/34)
-
-  form (reference=#/form_items/0)
-
-  text (reference=#/texts/35)
-
-  text (reference=#/texts/36)
-
-  text (reference=#/texts/37)
-
-  text (reference=#/texts/38)
-
-  text (reference=#/texts/39)
-
-  text (reference=#/texts/40)
-
-  text (reference=#/texts/41)
-
-  text (reference=#/texts/42)
-
-  text (reference=#/texts/43)
-
-  text (reference=#/texts/44)
-
-  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md b/test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md
deleted file mode 100644
index 68f8efc7..00000000
--- a/test/data/doc/constructed_mdsum_indent_mdhdr_true_indent_true_size_2.gt.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Title of the Document (reference=#/texts/1)
-
-text (reference=#/texts/2)
-
-text (reference=#/texts/3)
-
-  ## 1. Introduction (level=1, reference=#/texts/4)
-
-  text (reference=#/texts/5)
-
-  table 1 (reference=#/tables/0)
-
-  picture 1 (reference=#/pictures/0)
-
-  picture 2 (reference=#/pictures/1)
-
-  text (reference=#/texts/24)
-
-  code (reference=#/texts/25)
-
-  text (reference=#/texts/26)
-
-  text (reference=#/texts/28)
-
-  formula (reference=#/texts/29)
-
-  text (reference=#/texts/30)
-
-  text (reference=#/texts/31)
-
-  code (reference=#/texts/32)
-
-  text (reference=#/texts/33)
-
-  formula (reference=#/texts/34)
-
-  form (reference=#/form_items/0)
-
-  text (reference=#/texts/35)
-
-  text (reference=#/texts/36)
-
-  text (reference=#/texts/37)
-
-  text (reference=#/texts/38)
-
-  text (reference=#/texts/39)
-
-  text (reference=#/texts/40)
-
-  text (reference=#/texts/41)
-
-  text (reference=#/texts/42)
-
-  text (reference=#/texts/43)
-
-  text (reference=#/texts/44)
-
-  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_false.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_false.gt.md
similarity index 100%
rename from test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_false.gt.md
rename to test/data/doc/constructed_mdsum_outline_mdhdr_false.gt.md
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md
deleted file mode 100644
index da6a4cb7..00000000
--- a/test/data/doc/constructed_mdsum_outline_mdhdr_false_indent_true.gt.md
+++ /dev/null
@@ -1,59 +0,0 @@
-title (reference=#/texts/1): Title of the Document
-
-text (reference=#/texts/2)
-
-text (reference=#/texts/3)
-
-  section-header (level=1, reference=#/texts/4): 1. Introduction
-
-  text (reference=#/texts/5)
-
-  table 1 (reference=#/tables/0)
-
-  picture 1 (reference=#/pictures/0)
-
-  picture 2 (reference=#/pictures/1)
-
-  text (reference=#/texts/24)
-
-  code (reference=#/texts/25)
-
-  text (reference=#/texts/26)
-
-  text (reference=#/texts/28)
-
-  formula (reference=#/texts/29)
-
-  text (reference=#/texts/30)
-
-  text (reference=#/texts/31)
-
-  code (reference=#/texts/32)
-
-  text (reference=#/texts/33)
-
-  formula (reference=#/texts/34)
-
-  form (reference=#/form_items/0)
-
-  text (reference=#/texts/35)
-
-  text (reference=#/texts/36)
-
-  text (reference=#/texts/37)
-
-  text (reference=#/texts/38)
-
-  text (reference=#/texts/39)
-
-  text (reference=#/texts/40)
-
-  text (reference=#/texts/41)
-
-  text (reference=#/texts/42)
-
-  text (reference=#/texts/43)
-
-  text (reference=#/texts/44)
-
-  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_false.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_true.gt.md
similarity index 100%
rename from test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_false.gt.md
rename to test/data/doc/constructed_mdsum_outline_mdhdr_true.gt.md
diff --git a/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md b/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md
deleted file mode 100644
index 68f8efc7..00000000
--- a/test/data/doc/constructed_mdsum_outline_mdhdr_true_indent_true.gt.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Title of the Document (reference=#/texts/1)
-
-text (reference=#/texts/2)
-
-text (reference=#/texts/3)
-
-  ## 1. Introduction (level=1, reference=#/texts/4)
-
-  text (reference=#/texts/5)
-
-  table 1 (reference=#/tables/0)
-
-  picture 1 (reference=#/pictures/0)
-
-  picture 2 (reference=#/pictures/1)
-
-  text (reference=#/texts/24)
-
-  code (reference=#/texts/25)
-
-  text (reference=#/texts/26)
-
-  text (reference=#/texts/28)
-
-  formula (reference=#/texts/29)
-
-  text (reference=#/texts/30)
-
-  text (reference=#/texts/31)
-
-  code (reference=#/texts/32)
-
-  text (reference=#/texts/33)
-
-  formula (reference=#/texts/34)
-
-  form (reference=#/form_items/0)
-
-  text (reference=#/texts/35)
-
-  text (reference=#/texts/36)
-
-  text (reference=#/texts/37)
-
-  text (reference=#/texts/38)
-
-  text (reference=#/texts/39)
-
-  text (reference=#/texts/40)
-
-  text (reference=#/texts/41)
-
-  text (reference=#/texts/42)
-
-  text (reference=#/texts/43)
-
-  text (reference=#/texts/44)
-
-  text (reference=#/texts/55)
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_false.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false.gt.md
similarity index 100%
rename from test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_false.gt.md
rename to test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false.gt.md
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md
deleted file mode 100644
index fdb5b964..00000000
--- a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_false_indent_true.gt.md
+++ /dev/null
@@ -1,3 +0,0 @@
-title (reference=#/texts/1): Title of the Document
-
-  section-header (level=1, reference=#/texts/4): 1. Introduction
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_false.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true.gt.md
similarity index 100%
rename from test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_false.gt.md
rename to test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true.gt.md
diff --git a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md b/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md
deleted file mode 100644
index 341caca5..00000000
--- a/test/data/doc/constructed_mdsum_table_of_contents_mdhdr_true_indent_true.gt.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Title of the Document (reference=#/texts/1)
-
-  ## 1. Introduction (level=1, reference=#/texts/4)
diff --git a/test/test_markdown_summary.py b/test/test_markdown_summary.py
index d3f5b4b3..3644fd22 100644
--- a/test/test_markdown_summary.py
+++ b/test/test_markdown_summary.py
@@ -32,9 +32,8 @@ def verify(exp_file: Path, actual: str):
     ],
 )
 @pytest.mark.parametrize("use_md_headers", [False, True])
-@pytest.mark.parametrize("indent_by_section_level", [False, True])
 def test_markdown_summary_outline(
-    mode: MarkdownSummaryMode, use_md_headers: bool, indent_by_section_level: bool
+    mode: MarkdownSummaryMode, use_md_headers: bool
 ):
     # Build a representative document with title, headers, text, lists, table, and pictures
     doc = _construct_doc()
@@ -44,7 +43,6 @@ def test_markdown_summary_outline(
         params=MarkdownSummaryParams(
             use_markdown_headers=use_md_headers,
             mode=mode,
-            indent_by_section_level=indent_by_section_level,
         ),
     )
 
@@ -54,31 +52,7 @@ def test_markdown_summary_outline(
     root_dir = Path("./test/data/doc")
     exp_path = (
         root_dir
-        / f"constructed_mdsum_{mode.value}_mdhdr_{str(use_md_headers).lower()}_indent_{str(indent_by_section_level).lower()}.gt.md"
+        / f"constructed_mdsum_{mode.value}_mdhdr_{str(use_md_headers).lower()}.gt.md"
     )
     verify(exp_file=exp_path, actual=outline)
 
-
-@pytest.mark.parametrize("use_md_headers", [False, True])
-def test_markdown_summary_indentation(use_md_headers: bool):
-    # Build a representative document
-    doc = _construct_doc()
-
-    ser = MarkdownSummarySerializer(
-        doc=doc,
-        params=MarkdownSummaryParams(
-            use_markdown_headers=use_md_headers,
-            indent_by_section_level=True,
-            indent_size=2,
-        ),
-    )
-
-    outline = ser.serialize().text
-
-    # Compare with or generate ground-truth output for indentation-specific case
-    root_dir = Path("./test/data/doc")
-    exp_path = (
-        root_dir
-        / f"constructed_mdsum_indent_mdhdr_{str(use_md_headers).lower()}_indent_true_size_2.gt.md"
-    )
-    verify(exp_file=exp_path, actual=outline)

From 776e957e4aa374c0f1b80ff7ab2dc89ee1e185f8 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Wed, 24 Sep 2025 11:30:41 +0200
Subject: [PATCH 12/14] refactoring the markdown_summary

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docling_core/experimental/serializer/markdown_summary.py | 1 -
 test/test_markdown_summary.py                            | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/docling_core/experimental/serializer/markdown_summary.py b/docling_core/experimental/serializer/markdown_summary.py
index 4667ce46..3db6bebe 100644
--- a/docling_core/experimental/serializer/markdown_summary.py
+++ b/docling_core/experimental/serializer/markdown_summary.py
@@ -53,7 +53,6 @@ class MarkdownSummaryParams(MarkdownParams):
     add_references: bool = True
     add_summary: bool = True
 
-
     toc_labels: list[DocItemLabel] = [DocItemLabel.TITLE, DocItemLabel.SECTION_HEADER]
 
 
diff --git a/test/test_markdown_summary.py b/test/test_markdown_summary.py
index 3644fd22..63bdf719 100644
--- a/test/test_markdown_summary.py
+++ b/test/test_markdown_summary.py
@@ -32,9 +32,7 @@ def verify(exp_file: Path, actual: str):
     ],
 )
 @pytest.mark.parametrize("use_md_headers", [False, True])
-def test_markdown_summary_outline(
-    mode: MarkdownSummaryMode, use_md_headers: bool
-):
+def test_markdown_summary_outline(mode: MarkdownSummaryMode, use_md_headers: bool):
     # Build a representative document with title, headers, text, lists, table, and pictures
     doc = _construct_doc()
 
@@ -55,4 +53,3 @@ def test_markdown_summary_outline(
         / f"constructed_mdsum_{mode.value}_mdhdr_{str(use_md_headers).lower()}.gt.md"
     )
     verify(exp_file=exp_path, actual=outline)
-

From f2e28db2a039cd0d918baf0d70d71d33549b6da9 Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Wed, 24 Sep 2025 11:36:19 +0200
Subject: [PATCH 13/14] reset the docs/DoclingDocument.json

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docs/DoclingDocument.json | 158 --------------------------------------
 1 file changed, 158 deletions(-)

diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json
index 96b84fe9..305f5a9b 100644
--- a/docs/DoclingDocument.json
+++ b/docs/DoclingDocument.json
@@ -194,18 +194,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "const": "code",
           "default": "code",
@@ -487,18 +475,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "const": "form",
           "default": "form",
@@ -622,18 +598,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "const": "formula",
           "default": "formula",
@@ -843,18 +807,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -960,18 +912,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -1022,18 +962,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "const": "key_value_region",
           "default": "key_value_region",
@@ -1126,18 +1054,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -1188,18 +1104,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "const": "list_item",
           "default": "list_item",
@@ -1437,18 +1341,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "default": "picture",
           "enum": [
@@ -1950,18 +1842,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "const": "section_header",
           "default": "section_header",
@@ -2185,18 +2065,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "default": "table",
           "enum": [
@@ -2314,18 +2182,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "enum": [
             "caption",
@@ -2429,18 +2285,6 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
-        "summary": {
-          "anyOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "null"
-            }
-          ],
-          "default": null,
-          "title": "Summary"
-        },
         "label": {
           "const": "title",
           "default": "title",
@@ -2538,7 +2382,6 @@
         "parent": null,
         "children": [],
         "content_layer": "furniture",
-        "summary": null,
         "name": "_root_",
         "label": "unspecified"
       },
@@ -2551,7 +2394,6 @@
         "parent": null,
         "children": [],
         "content_layer": "body",
-        "summary": null,
         "name": "_root_",
         "label": "unspecified"
       }

From 0c21580c109ded1dafa7df0334fa115b9cb58cde Mon Sep 17 00:00:00 2001
From: Peter Staar <taa@zurich.ibm.com>
Date: Wed, 24 Sep 2025 12:42:56 +0200
Subject: [PATCH 14/14] fix for the Docs

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
---
 docs/DoclingDocument.json | 158 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)

diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json
index 305f5a9b..96b84fe9 100644
--- a/docs/DoclingDocument.json
+++ b/docs/DoclingDocument.json
@@ -194,6 +194,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "code",
           "default": "code",
@@ -475,6 +487,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "form",
           "default": "form",
@@ -598,6 +622,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "formula",
           "default": "formula",
@@ -807,6 +843,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -912,6 +960,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -962,6 +1022,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "key_value_region",
           "default": "key_value_region",
@@ -1054,6 +1126,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "name": {
           "default": "group",
           "title": "Name",
@@ -1104,6 +1188,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "list_item",
           "default": "list_item",
@@ -1341,6 +1437,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "default": "picture",
           "enum": [
@@ -1842,6 +1950,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "section_header",
           "default": "section_header",
@@ -2065,6 +2185,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "default": "table",
           "enum": [
@@ -2182,6 +2314,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "enum": [
             "caption",
@@ -2285,6 +2429,18 @@
           "$ref": "#/$defs/ContentLayer",
           "default": "body"
         },
+        "summary": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Summary"
+        },
         "label": {
           "const": "title",
           "default": "title",
@@ -2382,6 +2538,7 @@
         "parent": null,
         "children": [],
         "content_layer": "furniture",
+        "summary": null,
         "name": "_root_",
         "label": "unspecified"
       },
@@ -2394,6 +2551,7 @@
         "parent": null,
         "children": [],
         "content_layer": "body",
+        "summary": null,
         "name": "_root_",
         "label": "unspecified"
       }