diff --git a/src/modm_data/__init__.py b/src/modm_data/__init__.py
index 62d6876..0c0cd49 100644
--- a/src/modm_data/__init__.py
+++ b/src/modm_data/__init__.py
@@ -12,23 +12,6 @@
 except PackageNotFoundError:
     __version__ = "0.0.1"
 
-
-from . import (
-    cubehal,
-    cubemx,
-    cube2owl,
-    dl,
-    header2svd,
-    html,
-    html2owl,
-    html2svd,
-    owl,
-    pdf,
-    pdf2html,
-    svd,
-    utils,
-)
-
 __all__ = [
     "cube2owl",
     "cubehal",
diff --git a/src/modm_data/dl/__init__.py b/src/modm_data/dl/__init__.py
index 6abdc3d..d0766e4 100644
--- a/src/modm_data/dl/__init__.py
+++ b/src/modm_data/dl/__init__.py
@@ -1,7 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-from . import stmicro
 from .store import download_data, download_file
 
 __all__ = [
diff --git a/src/modm_data/header2svd/__init__.py b/src/modm_data/header2svd/__init__.py
index 041edaa..8292d07 100644
--- a/src/modm_data/header2svd/__init__.py
+++ b/src/modm_data/header2svd/__init__.py
@@ -5,7 +5,6 @@
 # CMSIS Header to SVD Pipeline
 """
 
-from . import stmicro
 from .header import Header
 
 __all__ = [
diff --git a/src/modm_data/header2svd/stmicro/__init__.py b/src/modm_data/header2svd/stmicro/__init__.py
index fc925f9..beca289 100644
--- a/src/modm_data/header2svd/stmicro/__init__.py
+++ b/src/modm_data/header2svd/stmicro/__init__.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: MPL-2.0
 
 from .header import Header, getDefineForDevice
-
 from .tree import normalize_memory_map
 
 __all__ = [
diff --git a/src/modm_data/html/__init__.py b/src/modm_data/html/__init__.py
index ddf16c6..39f618f 100644
--- a/src/modm_data/html/__init__.py
+++ b/src/modm_data/html/__init__.py
@@ -1,7 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-from . import stmicro
 from .document import Document
 from .chapter import Chapter
 from .table import Table
diff --git a/src/modm_data/html2owl/__init__.py b/src/modm_data/html2owl/__init__.py
index 6ff35ff..b0130eb 100644
--- a/src/modm_data/html2owl/__init__.py
+++ b/src/modm_data/html2owl/__init__.py
@@ -5,6 +5,4 @@
 # HTML to OWL Pipeline
 """
 
-from . import stmicro
-
 __all__ = ["stmicro"]
diff --git a/src/modm_data/html2svd/__init__.py b/src/modm_data/html2svd/__init__.py
index 8518d90..1112e46 100644
--- a/src/modm_data/html2svd/__init__.py
+++ b/src/modm_data/html2svd/__init__.py
@@ -5,6 +5,4 @@
 # HTML to SVD Pipeline
 """
 
-from . import stmicro
-
 __all__ = ["stmicro"]
diff --git a/src/modm_data/owl/__init__.py b/src/modm_data/owl/__init__.py
index 21d0205..68e6e4a 100644
--- a/src/modm_data/owl/__init__.py
+++ b/src/modm_data/owl/__init__.py
@@ -3,7 +3,6 @@
 
 from .store import Store
 from .identifier import DeviceIdentifier
-from . import stmicro
 
 __all__ = [
     "stmicro",
diff --git a/src/modm_data/pdf/__init__.py b/src/modm_data/pdf/__init__.py
index ac442fd..48e261b 100644
--- a/src/modm_data/pdf/__init__.py
+++ b/src/modm_data/pdf/__init__.py
@@ -18,17 +18,17 @@
 from .link import ObjLink, WebLink
 from .path import Path
 from .image import Image
-from .render import render_page_pdf
+from .render import annotate_debug_info
 from .structure import Structure
 
 __all__ = [
+    "annotate_debug_info",
     "Document",
     "Page",
     "Character",
-    "ObjLink",
-    "WebLink",
     "Path",
     "Image",
+    "ObjLink",
+    "WebLink",
     "Structure",
-    "render_page_pdf",
 ]
diff --git a/src/modm_data/pdf/character.py b/src/modm_data/pdf/character.py
index 2140a0e..e99cd1f 100644
--- a/src/modm_data/pdf/character.py
+++ b/src/modm_data/pdf/character.py
@@ -1,21 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-"""
-# PDF Characters
-
-Each character on the PDF page is represented by a character object, describing
-exactly where and how to render the associated glyph.
-
-While there are font flags, PDF files typically use entirely different fonts to
-render normal, bold, and italic characters.
-
-The character's loose bounding box may not always be available, since it must be
-explicitly provided by the font. The tight bounding box is only available as
-long as the glyph is renderable, so a space character may have a loose, but not
-a tight bounding box, or none at all.
-"""
-
 import math
 import ctypes
 from functools import cached_property
@@ -26,8 +11,16 @@
 
 class Character:
     """
-    This class contains all information about a single character in the PDF
-    page.
+    Each character on the PDF page is represented by a character object,
+    describing exactly where and how to render the associated glyph.
+
+    While there are font flags, PDF files typically use entirely different fonts
+    to render normal, bold, and italic characters.
+
+    The character's loose bounding box may not always be available, since it
+    must be explicitly provided by the font. The tight bounding box is only
+    available as long as the glyph is renderable, so a space character may have
+    a loose, but not a tight bounding box, or none at all.
     """
 
     class RenderMode(Enum):
diff --git a/src/modm_data/pdf/document.py b/src/modm_data/pdf/document.py
index 3e6c7f7..a5b725b 100644
--- a/src/modm_data/pdf/document.py
+++ b/src/modm_data/pdf/document.py
@@ -1,16 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-"""
-# PDF Documents
-
-The PDF document is the root of the entire data structure and provides access to
-PDF metadata, the table of contents, as well as individual pages.
-
-You should extend from this class for a specific vendor to provide the
-correct page class from `page()` function.
-"""
-
 import ctypes
 import logging
 import pypdfium2 as pp
@@ -39,6 +29,13 @@ def __repr__(self) -> str:
 
 class Document(pp.PdfDocument):
     """
+    The PDF document is the root of the entire data structure and provides
+    access to PDF metadata, the table of contents, as well as individual
+    pages.
+
+    You should extend from this class for a specific vendor to provide the
+    correct page class from `page()` function.
+
     This class is a convenience wrapper with caching around the high-level APIs
     of pypdfium.
     """
diff --git a/src/modm_data/pdf/image.py b/src/modm_data/pdf/image.py
index 4c88b81..a01b24d 100644
--- a/src/modm_data/pdf/image.py
+++ b/src/modm_data/pdf/image.py
@@ -1,12 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-"""
-# PDF Images
-
-Images support bitmap data.
-"""
-
 from functools import cached_property
 import pypdfium2 as pp
 from ..utils import Point, Rectangle, Line
diff --git a/src/modm_data/pdf/link.py b/src/modm_data/pdf/link.py
index f4d43df..ce1f06c 100644
--- a/src/modm_data/pdf/link.py
+++ b/src/modm_data/pdf/link.py
@@ -1,17 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-"""
-# Inter-PDF References and External Links
-
-PDF contains two types of links:
-1. Internal references to other objects by identifier: `ObjLink`.
-2. External links to URLs: `WebLink`.
-
-Both types can be extracted by calling the `modm_data.pdf.page.Page.objlinks`
-and `modm_data.pdf.page.Page.weblinks` properties.
-"""
-
 import ctypes
 from functools import cached_property
 import pypdfium2 as pp
@@ -19,7 +8,11 @@
 
 
 class ObjLink:
-    """A link to a PDF object giving the bounding box and destination page."""
+    """
+    An internal reference to other objects by an identifier giving the bounding
+    box and destination page. These links can be extracted by calling the
+    `modm_data.pdf.page.Page.objlinks` property.
+    """
 
     def __init__(self, page: "modm_data.pdf.Page", link: pp.raw.FPDF_LINK):  # noqa: F821
         """
@@ -47,7 +40,11 @@ def __repr__(self) -> str:
 
 
 class WebLink:
-    """A weblink object giving the bounding box and destination URL."""
+    """
+    An external reference to URLs giving the bounding box and destination URL.
+    These links can be extracted by calling the
+    `modm_data.pdf.page.Page.weblinks` property.
+    """
 
     def __init__(self, page: "modm_data.pdf.Page", index: int):  # noqa: F821
         """
diff --git a/src/modm_data/pdf/page.py b/src/modm_data/pdf/page.py
index a363bce..1caafda 100644
--- a/src/modm_data/pdf/page.py
+++ b/src/modm_data/pdf/page.py
@@ -1,12 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-"""
-# PDF Pages
-
-
-"""
-
 import ctypes
 import logging
 import weakref
diff --git a/src/modm_data/pdf/path.py b/src/modm_data/pdf/path.py
index 1dee9a9..fefbbc8 100644
--- a/src/modm_data/pdf/path.py
+++ b/src/modm_data/pdf/path.py
@@ -1,14 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-"""
-# PDF Graphics
-
-PDF uses a subset of the PostScript graphics language, which draws vector paths
-with various rendering options. We are only interested in the basic properties,
-in particular, for recognizing table cell borders.
-"""
-
 import ctypes
 from functools import cached_property
 from enum import Enum
@@ -18,6 +10,10 @@
 
 class Path(pp.PdfObject):
     """
+    PDF uses a subset of the PostScript graphics language, which draws vector
+    paths with various rendering options. We are only interested in the basic
+    properties, in particular, for recognizing table cell borders.
+
     This class specializes `pypdfium2.PdfObject` to add accessors for  graphics
     containing vector paths of various configurations.
 
diff --git a/src/modm_data/pdf/render.py b/src/modm_data/pdf/render.py
index f4cf967..f719e2b 100644
--- a/src/modm_data/pdf/render.py
+++ b/src/modm_data/pdf/render.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: MPL-2.0
 
 from ..utils import VLine, HLine
+from .page import Page
 import pypdfium2 as pp
 
 
@@ -47,13 +48,26 @@ def _rect(pageobj, rotation, rect, **kw):
     pp.raw.FPDFPage_InsertObject(pageobj, obj)
 
 
-def render_page_pdf(doc, page, new_doc=None, index=0):
+def annotate_debug_info(page: Page, new_doc: pp.PdfDocument = None, index: int = 0) -> pp.PdfDocument:
+    """
+    Copies each page into a new or existing PDF document and overlays the internal information on top of the content.
+    - Renders the bounding boxes in RED and origins in BLACK of all characters.
+    - Renders the bounding boxes of web links in BLUE GREEN.
+    - Renders the bounding boxes of object links in YELLOW GREEN.
+    - Renders all graphics paths in BLUE.
+    - Renders the bounding boxes of computed graphics clusters in CYAN.
+
+    :param page: The page to be annotated.
+    :param new_doc: The PDF document to copy the page to. If not provided, a new document is created.
+    :param index: The index of the page in the new document.
+    :return: The new document with the annotated page added.
+    """
     _, height = page.width, page.height
 
     if new_doc is None:
         new_doc = pp.raw.FPDF_CreateNewDocument()
     # copy page over to new doc
-    assert pp.raw.FPDF_ImportPages(new_doc, doc, str(page.number).encode("ascii"), index)
+    assert pp.raw.FPDF_ImportPages(new_doc, page.pdf, str(page.number).encode("ascii"), index)
     new_page = pp.raw.FPDF_LoadPage(new_doc, index)
     rotation = page.rotation
 
diff --git a/src/modm_data/pdf/structure.py b/src/modm_data/pdf/structure.py
index 00199bd..f8290ec 100644
--- a/src/modm_data/pdf/structure.py
+++ b/src/modm_data/pdf/structure.py
@@ -1,19 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-"""
-# Tagged PDFs
-
-A tagged PDF/UA (Universal Accessibility) contains the structure of content as a
-tree data structure with similar semantics to HTML. Sadly, the quality of the
-tags depends heavily on the PDF creation software. See [Overview of PDF tags](
-https://accessible-pdf.info/en/basics/general/overview-of-the-pdf-tags/).
-
-An example of an accessible pdf that can be inspected via these classes:
-[Rock On, D.C. Music Festival](
-https://commonlook.com/wp-content/uploads/2020/04/accessible-pdf-example.pdf).
-"""
-
 import ctypes
 from functools import cached_property, cache
 import pypdfium2 as pp
@@ -22,8 +9,13 @@
 
 class Structure:
     """
-    A PDF/UA ("tagged PDF") contains the structure of content as a tree data
-    structure with similar semantics to HTML.
+    A tagged PDF/UA (Universal Accessibility) contains the structure of content
+    as a tree data structure with similar semantics to HTML. Sadly, the quality
+    of the tags depends heavily on the PDF creation software. See
+    [Overview of PDF tags](https://accessible-pdf.info/en/basics/general/overview-of-the-pdf-tags/).
+
+    An example of an accessible pdf that can be inspected via these classes:
+    [Rock On, D.C. Music Festival](https://commonlook.com/wp-content/uploads/2020/04/accessible-pdf-example.pdf).
 
     This class is a convenience wrapper around [the pdfium structtree methods](
     https://pdfium.googlesource.com/pdfium/+/main/public/fpdf_structtree.h).
diff --git a/src/modm_data/pdf2html/__init__.py b/src/modm_data/pdf2html/__init__.py
index 16fcea6..f2d0da0 100644
--- a/src/modm_data/pdf2html/__init__.py
+++ b/src/modm_data/pdf2html/__init__.py
@@ -5,25 +5,18 @@
 # PDF to HTML Pipeline
 """
 
-from . import stmicro
-from .render import render_page_pdf
+from .render import annotate_debug_info
 from .convert import convert, patch
 from .html import format_document, write_html
 
-from . import ast
-from . import cell
-from . import figure
-from . import line
-from . import page
-from . import table
-
 __all__ = [
     "stmicro",
-    "render_page_pdf",
+    "ti",
     "convert",
-    "patch",
+    "annotate_debug_info",
     "format_document",
     "write_html",
+    "patch",
     "ast",
     "cell",
     "figure",
diff --git a/src/modm_data/pdf2html/convert.py b/src/modm_data/pdf2html/convert.py
index c65d95e..8197ede 100644
--- a/src/modm_data/pdf2html/convert.py
+++ b/src/modm_data/pdf2html/convert.py
@@ -2,9 +2,10 @@
 # SPDX-License-Identifier: MPL-2.0
 
 from anytree import RenderTree
+from typing import Iterable
 
 from .html import format_document, write_html
-from .render import render_page_pdf
+from .render import annotate_debug_info
 from ..utils import pkg_apply_patch, pkg_file_exists, apply_patch
 from .ast import merge_area
 from pathlib import Path
@@ -12,17 +13,17 @@
 
 
 def convert(
-    doc,
-    page_range,
-    output_path,
-    format_chapters=False,
-    pretty=True,
-    render_html=True,
-    render_pdf=False,
-    render_all=False,
-    show_ast=False,
-    show_tree=False,
-    show_tags=False,
+    doc: pp.PdfDocument,
+    page_range: Iterable[int],
+    output_path: Path,
+    format_chapters: bool = False,
+    pretty: bool = True,
+    render_html: bool = True,
+    render_pdf: bool = False,
+    render_all: bool = False,
+    show_ast: bool = False,
+    show_tree: bool = False,
+    show_tags: bool = False,
 ) -> bool:
     document = None
     debug_doc = None
@@ -47,7 +48,7 @@ def convert(
                     document = merge_area(document, area)
 
         if render_pdf:
-            debug_doc = render_page_pdf(doc, page, debug_doc, debug_index)
+            debug_doc = annotate_debug_info(page, debug_doc, debug_index)
             debug_index += 1
 
     if render_pdf:
diff --git a/src/modm_data/pdf2html/render.py b/src/modm_data/pdf2html/render.py
index efbbee5..07fa490 100644
--- a/src/modm_data/pdf2html/render.py
+++ b/src/modm_data/pdf2html/render.py
@@ -2,19 +2,25 @@
 # SPDX-License-Identifier: MPL-2.0
 
 import pypdfium2 as pp
-from ..pdf.render import render_page_pdf as pdf_render_page_pdf
+from ..pdf.render import annotate_debug_info as pdf_annotate_debug_info
 from ..pdf.render import _vline, _hline, _line, _rect
+from .page import Page
 
 
-def render_page_pdf(doc, page, new_doc=None, index=0):
+def annotate_debug_info(page: Page, new_doc: pp.PdfDocument = None, index: int = 0) -> pp.PdfDocument:
     """
+    Copies each page into a new or existing PDF document and overlays the internal information on top of the content.
+    In addition to the information overlayed in `modm_data.pdf.annotate_debug_info`, this function:
+    - renders all content areas in ORANGE.
+    - renders all graphic cluster in content areas in GREEN.
+    - renders all tables in content areas in BLUE.
 
-
-    :param doc: PDF document
-    :param page: PDF page
-    :param new_doc: Empty PDF document to copy debug renders to
+    :param page: The page to be annotated.
+    :param new_doc: The PDF document to copy the page to. If not provided, a new document is created.
+    :param index: The index of the page in the new document.
+    :return: The new document with the annotated page added.
     """
-    new_doc = pdf_render_page_pdf(doc, page, new_doc, index)
+    new_doc = pdf_annotate_debug_info(page, new_doc, index)
     # return new_doc
     new_page = pp.raw.FPDF_LoadPage(new_doc, index)
     rotation = page.rotation
@@ -58,33 +64,33 @@ def render_page_pdf(doc, page, new_doc=None, index=0):
             for line in cell.lines:
                 for cluster in line.clusters():
                     _rect(new_page, rotation, cluster.bbox, width=0.33, stroke=0x808080)
-            if cell.b.l:
+            if cell.borders.left:
                 _vline(
-                    new_page, rotation, cell.bbox.left, cell.bbox.bottom, cell.bbox.top, width=cell.b.l, stroke=0xFF0000
+                    new_page, rotation, cell.bbox.left, cell.bbox.bottom, cell.bbox.top, width=cell.borders.left, stroke=0xFF0000
                 )
-            if cell.b.r:
+            if cell.borders.right:
                 _vline(
                     new_page,
                     rotation,
                     cell.bbox.right,
                     cell.bbox.bottom,
                     cell.bbox.top,
-                    width=cell.b.r,
+                    width=cell.borders.right,
                     stroke=0x0000FF,
                 )
-            if cell.b.b:
+            if cell.borders.bottom:
                 _hline(
                     new_page,
                     rotation,
                     cell.bbox.bottom,
                     cell.bbox.left,
                     cell.bbox.right,
-                    width=cell.b.b,
+                    width=cell.borders.bottom,
                     stroke=0x00FF00,
                 )
-            if cell.b.t:
+            if cell.borders.top:
                 _hline(
-                    new_page, rotation, cell.bbox.top, cell.bbox.left, cell.bbox.right, width=cell.b.t, stroke=0x808080
+                    new_page, rotation, cell.bbox.top, cell.bbox.left, cell.bbox.right, width=cell.borders.top, stroke=0x808080
                 )
 
     assert pp.raw.FPDFPage_GenerateContent(new_page)
diff --git a/src/modm_data/pdf2html/stmicro/document.py b/src/modm_data/pdf2html/stmicro/document.py
index 97654c4..ba4ac88 100644
--- a/src/modm_data/pdf2html/stmicro/document.py
+++ b/src/modm_data/pdf2html/stmicro/document.py
@@ -5,9 +5,15 @@
 from anytree import RenderTree
 from .page import Page as StmPage
 from ...pdf import Document as PdfDocument
-from ..ast import normalize_lines, normalize_captions, normalize_lists
-from ..ast import normalize_paragraphs, normalize_headings, normalize_registers
-from ..ast import normalize_tables
+from ..ast import (
+    normalize_lines,
+    normalize_captions,
+    normalize_lists,
+    normalize_paragraphs,
+    normalize_headings,
+    normalize_registers,
+    normalize_tables,
+)
 
 _LOGGER = logging.getLogger(__name__)
 
diff --git a/src/modm_data/pdf2html/ti/__init__.py b/src/modm_data/pdf2html/ti/__init__.py
new file mode 100644
index 0000000..6b20557
--- /dev/null
+++ b/src/modm_data/pdf2html/ti/__init__.py
@@ -0,0 +1,8 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+
+from .document import Document
+from .page import Page
+
+__all__ = ["Document", "Page"]
diff --git a/src/modm_data/pdf2html/ti/__main__.py b/src/modm_data/pdf2html/ti/__main__.py
new file mode 100644
index 0000000..0ef8471
--- /dev/null
+++ b/src/modm_data/pdf2html/ti/__main__.py
@@ -0,0 +1,108 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+import re
+import tqdm
+import logging
+import argparse
+import subprocess
+from pathlib import Path
+from multiprocessing.pool import ThreadPool
+
+from .. import convert, patch
+
+
+def main():
+    import modm_data
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--document", type=Path)
+    parser.add_argument("--output", type=str, default="")
+    parser.add_argument("--page", type=int, action="append")
+    parser.add_argument("--range", action="append")
+    parser.add_argument("--pdf", action="store_true")
+    parser.add_argument("--ast", action="store_true")
+    parser.add_argument("--tree", action="store_true")
+    parser.add_argument("--html", action="store_true")
+    parser.add_argument("--parallel", action="store_true")
+    parser.add_argument("--chapters", action="store_true")
+    parser.add_argument("--tags", action="store_true")
+    parser.add_argument("--all", action="store_true")
+    parser.add_argument("-v", dest="verbose", action="count", default=0)
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+    doc = modm_data.pdf2html.ti.Document(args.document)
+    if doc.page_count == 0 or not doc.page(1).width:
+        print("Corrupt PDF!")
+        exit(1)
+
+    if args.page or args.range:
+        page_range = list(map(lambda p: p - 1, args.page or []))
+        if args.range:
+            for arange in args.range:
+                start, stop = arange.split(":")
+                arange = range(int(start or 0), int(stop or doc.page_count - 1) + 1)
+                page_range.extend([p - 1 for p in arange])
+        page_range.sort()
+    else:
+        page_range = range(doc.page_count)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if args.parallel:
+        log = Path(f"log/ti/html/{doc.name}.txt")
+        log.parent.mkdir(exist_ok=True, parents=True)
+        with log.open("w") as logfile:
+            print(doc.page_count, doc.metadata, doc.is_tagged, file=logfile)
+            output_dir = output_path.parent / output_path.stem
+            output_dir.mkdir(parents=True, exist_ok=True)
+            dests = [(0, "introduction")]
+            for toc in doc.toc:
+                if toc.level == 0 and not toc.title.startswith("Table"):
+                    title = toc.title.lower().strip("0123456789").strip()
+                    title = re.sub(r"[\(\)/®&\n\r,;:™]", "", title)
+                    title = re.sub(r"[ -]", "_", title)
+                    title = re.sub(r"_+", "_", title)
+                    title = title.replace("²", "2")
+                    if not any(c in toc.title for c in {"Contents", "List of ", "Index"}):
+                        dests.append((toc.page_index, title))
+                    print(toc.page_index, toc.title, file=logfile)
+            dests.append((doc.page_count, None))
+            ranges = [(p0, p1, t0) for (p0, t0), (p1, t1) in zip(dests, dests[1:]) if p0 != p1]
+            calls = []
+            for ii, (p0, p1, title) in enumerate(ranges):
+                call = (
+                    f"python3 -m modm_data.pdf2html.ti "
+                    f"--document {args.document} --range {p0 + 1}:{p1} --html "
+                    f"--output {output_dir}/chapter_{ii}_{title}.html"
+                )
+                calls.append(call + f" >> {log} 2>&1")
+                print(call, file=logfile)
+        with ThreadPool() as pool:
+            retvals = list(tqdm.tqdm(pool.imap(lambda c: subprocess.run(c, shell=True), calls), total=len(calls)))
+        for retval, call in zip(retvals, calls):
+            if retval.returncode != 0:
+                print(call)
+        if all(r.returncode == 0 for r in retvals):
+            from . import data
+
+            return patch(doc, data, output_dir)
+        return False
+
+    return convert(
+        doc,
+        page_range,
+        output_path,
+        format_chapters=args.chapters,
+        render_html=args.html,
+        render_pdf=args.pdf,
+        render_all=args.all,
+        show_ast=args.ast,
+        show_tree=args.tree,
+        show_tags=args.tags,
+    )
+
+
+exit(0 if main() else 1)
diff --git a/src/modm_data/pdf2html/ti/document.py b/src/modm_data/pdf2html/ti/document.py
new file mode 100644
index 0000000..5a55e41
--- /dev/null
+++ b/src/modm_data/pdf2html/ti/document.py
@@ -0,0 +1,55 @@
+# Copyright 2023, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+import logging
+from anytree import RenderTree
+from .page import Page as TiPage
+from ...pdf import Document as PdfDocument
+from ..ast import (
+    normalize_lines,
+    normalize_captions,
+    normalize_lists,
+    normalize_paragraphs,
+    normalize_headings,
+    normalize_registers,
+    normalize_tables,
+)
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def _debug(func, indata, debug=0):
+    _LOGGER.debug(func.__name__)
+    if debug == -1:
+        _LOGGER.debug(RenderTree(indata))
+        _LOGGER.debug()
+    outdata = func(indata)
+    if debug == 1:
+        _LOGGER.debug(RenderTree(outdata))
+        _LOGGER.debug()
+    return outdata
+
+
+def _normalize_document(document):
+    document = _debug(normalize_lines, document)
+    document = _debug(normalize_captions, document)
+    document = _debug(normalize_lists, document)
+    document = _debug(normalize_paragraphs, document)
+    document = _debug(normalize_headings, document)
+    document = _debug(normalize_registers, document)
+    document = _debug(normalize_tables, document)
+    # document = _debug(normalize_chapters, document)
+    return document
+
+
+class Document(PdfDocument):
+    def __init__(self, path: str):
+        super().__init__(path)
+        self._normalize = _normalize_document
+
+    def page(self, index: int) -> TiPage:
+        assert index < self.page_count
+        return TiPage(self, index)
+
+    def __repr__(self) -> str:
+        return f"TiDoc({self.name})"
diff --git a/src/modm_data/pdf2html/ti/page.py b/src/modm_data/pdf2html/ti/page.py
new file mode 100644
index 0000000..bb6a729
--- /dev/null
+++ b/src/modm_data/pdf2html/ti/page.py
@@ -0,0 +1,622 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+import re
+import logging
+from functools import cached_property, reduce
+from collections import defaultdict
+from ..table import Table
+from ..figure import Figure
+from ..line import CharLine
+from ...utils import HLine, VLine, Rectangle
+from ...pdf import Image
+from ..page import Page as BasePage
+from anytree import Node
+
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def is_compatible(document) -> bool:
+    if "stmicro" in document.metadata.get("Author", "").lower():
+        return True
+    return False
+
+
+def _areas_black_white(page) -> dict:
+    def _scale(r):
+        if page.rotation:
+            return Rectangle(
+                r.bottom * page.width, (1 - r.right) * page.height, r.top * page.width, (1 - r.left) * page.height
+            )
+        return Rectangle(r.left * page.width, r.bottom * page.height, r.right * page.width, r.top * page.height)
+
+    bottom_left = Rectangle(0.05, 0.02, 0.11, 0.07)
+    bottom_right = Rectangle(0.89, 0.02, 0.95, 0.07)
+    top_left = Rectangle(0.05, 0.9175, 0.5, 0.94)
+    top_right = Rectangle(0.5, 0.9175, 0.95, 0.94)
+    content = Rectangle(0.05, 0.07, 0.95, 0.9175)
+    all_content = [content]
+    areas = {
+        "id": top_left if page.index % 2 else top_right
+    }
+    if page.index == 0:
+        # Publish date on the bottom left on first page
+        areas["date"] = bottom_left
+        # number on the bottom right on first page
+        areas["number"] = bottom_right
+        # Add top areas
+        all_content.insert(0, Rectangle(0.375, 0.855, 0.975, 0.9125))
+        all_content.insert(1, Rectangle(0.025, 0.805, 0.975, 0.855))
+    else:
+        # Page number on bottom
+        areas["number"] = bottom_left if page.index % 2 else bottom_right
+        # Chapter name on top
+        # areas["top"] = top
+
+    # Recognize the two column design of the Datasheets with a big table underneath
+    if page.index < 3 and "DS" in page.pdf.name:
+        # Find a wide path that would denote the beginning of a table
+        top_rect = [
+            p.bbox.top / page.height
+            for p in page.paths
+            if _scale(content).contains(p.bbox) and p.bbox.width > page.width * 0.75
+        ]
+        if top_rect:
+            # offset for table label just above it
+            ybottom = max(*top_rect) + 0.0175
+        else:
+            ybottom = content.bottom
+        # Try to find list or sublists in these areas
+        mr = Rectangle(0.49, ybottom, 0.51, content.top)
+        br = Rectangle(0.51, ybottom, 0.5325, content.top)
+        hr = Rectangle(0.5325, ybottom, 0.555, content.top)
+        text_middle = page.text_in_area(_scale(mr))
+        text_bullets = page.text_in_area(_scale(br))
+        text_hyphens = page.text_in_area(_scale(hr))
+        if not text_middle and (
+            any(c in text_bullets for c in {"•", chr(61623)}) or any(c in text_hyphens for c in {"-"})
+        ):
+            areas["middle_bullets"] = br
+            areas["middle_hyphens"] = hr
+            all_content = all_content[:-1]
+            all_content.append(Rectangle(content.left, ybottom, 0.5, content.top))
+            all_content.append(Rectangle(0.505, ybottom, content.right, content.top))
+            if top_rect:
+                all_content.append(Rectangle(content.left, content.bottom, content.right, ybottom))
+
+    areas["content"] = all_content
+    scaled_areas = {}
+    for name, area in areas.items():
+        if isinstance(area, list):
+            scaled_areas[name] = [_scale(r) for r in area]
+        else:
+            scaled_areas[name] = _scale(area)
+    return scaled_areas
+
+
+def _spacing_black_white(page) -> dict:
+    content = 0.05
+    spacing = {
+        # Horizontal spacing: left->right
+        "x_em": 0.01 * page.width,
+        "x_left": content * page.width,
+        "x_right": (1 - content) * page.width,
+        "x_content": 0.2075 * page.width,
+        # Vertical spacing: bottom->top
+        "y_em": 0.01 * page.height,
+        # Max table line thickness
+        "y_tline": 0.005 * page.height,
+        # Max line height distance to detect paragraphs
+        "lh": 0.9,
+        # Max line height distance to detect super-/subscript
+        "sc": 0.325,
+        # Table header cell bold text threshold
+        "th": 0.33,
+    }
+    if page.rotation:
+        content = 0.14
+        spacing.update(
+            {
+                "x_em": 0.01 * page.height,
+                "y_em": 0.01 * page.width,
+                "x_left": content * page.width,
+                "x_right": (1 - content) * page.width,
+                "x_content": 0.2075 * page.width,
+                "y_tline": 0.005 * page.width,
+                "lh": 1.2,
+                "sc": 0.4,
+            }
+        )
+    return spacing | _spacing_special(page)
+
+
+def _spacing_special(page) -> dict:
+    return {}
+
+
+def _linesize_black_white(line: CharLine) -> str:
+    rsize = line.height
+    # print(rsize, line.content)
+    if rsize >= 11.9:
+        return "h1"
+    elif rsize >= 10.9:
+        return "h2"
+    elif rsize >= 9.9:
+        return "n"
+    else:
+        return "fn"
+
+
+def _colors_black_white(color: int) -> str:
+    if 0xFF <= color <= 0xFF:
+        return "black"
+    if 0xFFFFFFFF <= color <= 0xFFFFFFFF:
+        return "white"
+    if 0xB9C4CAFF <= color <= 0xB9C4CAFF:
+        return "gray"
+    if 0x1F81AFFF <= color <= 0x1F81AFFF:
+        return "lightblue"
+    if 0x2052FF <= color <= 0x2052FF:
+        return "darkblue"
+    if 0x39A9DCFF <= color <= 0x39A9DCFF:
+        return "blue"
+    return "unknown"
+
+
+class Page(BasePage):
+    def __init__(self, document, index: int):
+        super().__init__(document, index)
+        producer = self.pdf.metadata.get("Producer", "").lower()
+        self._template = "black_white"
+        if "itext" not in producer:
+            _LOGGER.error(f"Unknown page template! Defaulting to Black/White template. '{producer}'")
+
+        if "black_white" in self._template:
+            self._areas = _areas_black_white(self)
+            self._spacing = _spacing_black_white(self)
+            self._colors = _colors_black_white
+            self._line_size = _linesize_black_white
+
+    def _unicode_filter(self, code: int) -> int:
+        # Ignore Carriage Return characters and ® (superscript issues)
+        if code in {0xD, ord("®")}:
+            return None
+        # Correct some weird unicode stuffing choices
+        if code in {2}:
+            return ord("-")
+        if code in {61623, 61664}:
+            return ord("•")
+        return code
+
+    @cached_property
+    def identifier(self) -> str:
+        return self.text_in_named_area("id", check_length=False)
+
+    @cached_property
+    def top(self) -> str:
+        if self.index == 0:
+            return "Cover"
+        return self.text_in_named_area("top", check_length=False)
+
+    @cached_property
+    def is_relevant(self) -> bool:
+        # if any(c in self.top for c in {"Contents", "List of ", "Index"}):
+        #     return False
+        return True
+
+    @property
+    def content_ast(self) -> list:
+        ast = []
+        with_graphics = True
+        if "DS" in self.pdf.name:
+            # FIXME: Terrible hack to get the ordering information table fixed
+            # Should be done in the AST as a rewrite similar to bit table rewrite with VirtualTable
+            order_page = next(
+                (
+                    item.page_index
+                    for item in self.pdf.toc
+                    if item.level == 0 and re.search("ordering +information|part +numbering", item.title, re.IGNORECASE)
+                ),
+                -1,
+            )
+            with_graphics = order_page != self.index
+        for area in self._areas["content"]:
+            ast.append(self.ast_in_area(area, with_graphics=with_graphics))
+        # Add a page node to the first leaf to keep track of where a page starts
+        first_leaf = next((n for n in iter(ast[0].descendants) if n.is_leaf), ast[0])
+        Node("page", parent=first_leaf, xpos=first_leaf.xpos, number=self.number)
+        return ast
+
+    def graphics_in_area(self, area: Rectangle) -> list[Table | Figure]:
+        # Find all graphic clusters in this area
+        em = self._spacing["y_em"]
+        large_area = area.offset_x(em / 2)
+        graphic_clusters = self.graphic_clusters(lambda p: large_area.contains(p.bbox), em / 2)
+        # for bbox, paths in raw_graphic_clusters:
+        #     # Some docs have large DRAFT chars in the background
+        #     if any(path.fill == 0xe6e6e6ff and path.stroke == 0xff for path in paths):
+        #         continue
+        #     graphic_clusters.append((bbox, paths))
+
+        # Find the captions and group them by y origin to catch side-by-side figures
+        ycaptions = defaultdict(list)
+        for line in self.charlines_in_area(area, lambda c: "Bold" in c.font):
+            for cluster in line.clusters():
+                for phrase in [r"Figure \d+\.", r"Table \d+\."]:
+                    if re.match(phrase, cluster.content):
+                        ycaptions[int(round(cluster.bbox.y / em))].append((phrase, cluster.chars))
+        ycaptions = [ycaptions[k] for k in sorted(ycaptions.keys(), key=lambda y: -y)]
+
+        # Now associate these captions with the graphics bboxes
+        categories = []
+        for captions in ycaptions:
+            width = area.width / len(captions)
+            for ii, (phrase, chars) in enumerate(sorted(captions, key=lambda c: c[1][0].origin.x)):
+                left, right = area.left + ii * width, area.left + (ii + 1) * width
+                bottom, top, height = chars[0].bbox.bottom, chars[0].bbox.top, chars[0].height
+
+                # Find the graphic associated with this caption
+                graphic = next(
+                    ((b, p) for b, p in graphic_clusters if b.bottom <= bottom and left <= b.left and b.right <= right),
+                    None,
+                )
+                if graphic is None:
+                    _LOGGER.error(f"Graphic cluster not found for caption {''.join(c.char for c in chars)}")
+                    continue
+
+                if self._template == "blue_gray":
+                    # Search for all lines of the current caption with the same properties
+                    cbbox = Rectangle(left, bottom, right, top)
+                    cchars = self.chars_in_area(cbbox)
+                    while True:
+                        nbbox = Rectangle(left, max(graphic[0].top, cbbox.bottom - height), right, top)
+                        nchars = self.chars_in_area(nbbox)
+                        if len(cchars) >= len(nchars):
+                            break
+                        cbbox = nbbox
+                        cchars = nchars
+                else:
+                    cbbox = Rectangle(left, min(graphic[0].top, bottom), right, top)
+
+                otype = phrase.split(" ")[0].lower()
+                if "Figure" in phrase:
+                    # Find all other graphics in the bounding box
+                    gbbox = Rectangle(left, graphic[0].bottom, right, cbbox.bottom)
+                    graphics = []
+                    for b, p in graphic_clusters:
+                        if gbbox.overlaps(b):
+                            graphics.append((b, p))
+                    for g in graphics:
+                        graphic_clusters.remove(g)
+                    gbbox = [cluster[0] for cluster in graphics]
+                    gbbox = reduce(lambda r0, r1: r0.joined(r1), gbbox)
+                    paths = [p for cluster in graphics for p in cluster[1]]
+
+                    if self._template == "blue_gray":
+                        # Search for characters below the graphics bbox, max 1 y_em
+                        gbbox = Rectangle(left, gbbox.bottom, right, gbbox.bottom)
+                        while True:
+                            gbbox = Rectangle(left, gbbox.bottom - self._spacing["y_em"], right, gbbox.bottom)
+                            if not self.chars_in_area(gbbox):
+                                break
+                    # Generate the new bounding box which includes the caption
+                    gbbox = Rectangle(left, gbbox.bottom, right, cbbox.bottom)
+                elif "Table" in phrase:
+                    graphic_clusters.remove(graphic)
+                    gbbox, paths = graphic
+                    if (
+                        self._template == "black_white"
+                        and sum(1 for path in paths if path.count == 2) >= len(paths) / 2
+                    ):
+                        otype += "_lines"
+                categories.append((otype, cbbox, gbbox, paths))
+
+        # Deal with the remaining graphic categories
+        for gbbox, paths in graphic_clusters:
+            if gbbox.width < self._spacing["x_em"] or gbbox.height < self._spacing["y_em"]:
+                continue
+            category = ""
+            if any(isinstance(p, Image) for p in paths):
+                category = "figure"
+            elif self._template == "blue_gray":
+                if all(self._colors(path.stroke) == "gray" or self._colors(path.fill) == "darkblue" for path in paths):
+                    category = "table"
+                else:
+                    category = "figure"
+            elif self._template == "black_white":
+                # Some tables are rendered explicitly with filled rectangular
+                # shapes with others are implicitly rendered with stroked lines
+                stroked_table_lines = sum(1 for path in paths if path.count == 2) >= len(paths) / 2
+                is_table = stroked_table_lines or all(
+                    [any(p.isclose(pp) for pp in path.bbox.points) for p in path.points].count(True)
+                    >= len(path.points) * 2 / 3
+                    for path in paths
+                )
+                if len(paths) > 1 and is_table:
+                    category = "table"
+                    if stroked_table_lines:
+                        category += "_lines"
+                else:
+                    category = "figure"
+
+            if "table" in category:
+                # Check if there are only numbers on top of the table
+                cbbox = Rectangle(gbbox.left, gbbox.top, gbbox.right, gbbox.top + self._spacing["y_em"])
+                nchars = [c for c in self.chars_in_area(cbbox) if c.unicode not in {0x20, 0xA, 0xD}]
+
+                if nchars and sum(1 if c.char.isnumeric() else 0 for c in nchars) >= len(nchars) / 3:
+                    # This is a register table with invisible top borders!
+                    cbbox = Rectangle(gbbox.left, gbbox.top, gbbox.right, max(c.bbox.top for c in nchars))
+                    gbbox = Rectangle(gbbox.left, gbbox.bottom, gbbox.right, cbbox.top)
+                    name = "register_" + category
+                else:
+                    cbbox = None
+                    name = category
+                categories.append((name, cbbox, gbbox, paths))
+            else:
+                categories.append(("figure", None, gbbox, paths))
+
+        # Convert the objects into specialized classes
+        categories.sort(key=lambda o: (-o[2].y, o[2].x))
+        objects = []
+        for otype, caption_bbox, graphics_bbox, graphics_paths in categories:
+            if "figure" in otype:
+                figure = Figure(self, graphics_bbox, caption_bbox, graphics_paths)
+                objects.append(figure)
+            elif "table" in otype:
+                xlines, ylines, yhlines = [], [], []
+                for path in graphics_paths:
+                    if self._template == "blue_gray" or "_lines" in otype:
+                        if self._colors(path.stroke) == "gray" or "_lines" in otype:
+                            # Intercell paths in gray
+                            if len(path.lines) == 1:
+                                line = path.lines[0]
+                                if line.direction == line.Direction.VERTICAL:
+                                    xlines.append(line.specialize())
+                                elif line.direction == line.Direction.HORIZONTAL:
+                                    ylines.append(line.specialize())
+                                else:
+                                    _LOGGER.warn(f"Line not vertical or horizontal: {line}")
+                            else:
+                                _LOGGER.warn(f"Path too long: {path}")
+                        elif self._colors(path.fill) == "darkblue":
+                            # Add the bottom line of the dark blue header box as a very thick line
+                            line = HLine(path.bbox.bottom, path.bbox.left, path.bbox.right, 5)
+                            yhlines.append(line)
+
+                    elif self._template == "black_white":
+                        bbox = path.bbox
+                        is_vertical = bbox.width < bbox.height
+                        width = bbox.width if is_vertical else bbox.height
+                        length = bbox.height if is_vertical else bbox.width
+                        if width <= self._spacing["x_em"] / 2:
+                            if length >= self._spacing["y_em"] / 2:
+                                if is_vertical:
+                                    line = VLine(bbox.midpoint.x, bbox.bottom, bbox.top, bbox.width)
+                                    xlines.append(line)
+                                else:
+                                    line = HLine(bbox.midpoint.y, bbox.left, bbox.right, bbox.height)
+                                    ylines.append(line)
+                        else:
+                            # Split the rectangle into it's outline
+                            xlines.append(VLine(bbox.left, bbox.bottom, bbox.top, 0.1))
+                            xlines.append(VLine(bbox.right, bbox.bottom, bbox.top, 0.1))
+                            ylines.append(HLine(bbox.bottom, bbox.left, bbox.right, 0.1))
+                            ylines.append(HLine(bbox.top, bbox.left, bbox.right, 0.1))
+                if yhlines:
+                    yhlines.sort(key=lambda line: line.p0.y)
+                    ylines.append(yhlines[0])
+                if not xlines or not ylines:
+                    continue
+                table = Table(self, graphics_bbox, xlines, ylines, caption_bbox, is_register="register" in otype)
+                objects.append(table)
+
+        return objects
+
+    def ast_in_area(
+        self,
+        area: Rectangle,
+        with_graphics: bool = True,
+        ignore_xpos: bool = False,
+        with_bits: bool = True,
+        with_notes: bool = True,
+    ) -> Node:
+        x_em = self._spacing["x_em"]
+        spacing_content = self._spacing["x_content"]
+        lh_factor = self._spacing["lh"]
+        # spacing_y = self._spacing["y_em"]
+        root = Node("area", obj=area, xpos=int(area.left), page=self)
+
+        def unindent(_xpos, _current, _newlines=1):
+            current = _current
+            # Check if we need to unindent the current node
+            while (_xpos - current.xpos) < -x_em and current.parent is not None and not ignore_xpos:
+                current = current.parent
+            if _newlines >= 2 and current.name == "para":
+                current = current.parent
+            return current
+
+        def parent_name(current):
+            return "" if current.parent is None else current.parent.name
+
+        current = root
+        ypos = area.top
+        for obj in self.objects_in_area(area, with_graphics):
+            xpos = round(obj.bbox.left)
+
+            # Tables should remain in their current hierarchy regardless of indentation
+            if isinstance(obj, (Table, Figure)):
+                current = next((c for c in current.iter_path_reverse() if c.name.startswith("head")), root)
+                name = "figure" if isinstance(obj, Figure) else "table"
+                Node(
+                    name,
+                    parent=current,
+                    obj=obj,
+                    xpos=xpos,
+                    number=-1,
+                    _width=obj.bbox.width / area.width,
+                    _type=obj._type,
+                )
+                ypos = obj.bbox.bottom
+
+            # Lines of text need to be carefully checked for indentation
+            elif isinstance(obj, CharLine):
+                newlines = round((ypos - obj.origin) / (lh_factor * obj.height))
+                content = obj.content
+                lcontent = content.lstrip()
+                content_start = 0
+                linesize = self._line_size(obj)
+
+                # Check when the note has finished (=> paragraphs without italic)
+                if parent_name(current) == "note" and (
+                    (current.parent.type == "note" and not obj.contains_font(current.parent._font))
+                    or (current.parent.type in {"caution", "warning"} and newlines >= 2)
+                ):
+                    current = current.parent.parent
+
+                # Check when the list ends into something indented far too right
+                elif parent_name(current).startswith("list") and (xpos - current.xpos) >= 2 * x_em:
+                    current = current.parent.parent
+
+                # print(obj.fonts, ypos, xpos, current.xpos, f"{obj.height:.2f}", content)
+
+                # Check if line is a heading, which may be multi-line, so we must
+                # be careful not to nest them, but group them properly
+                # Headings are always inserted into the root note!
+                if linesize.startswith("h1") or (
+                    linesize.startswith("h") and xpos < (spacing_content + 2 * x_em) and "Bold" in obj.chars[0].font
+                ):
+                    if (match := re.match(r"^ *(\d+(\.\d+)?(\.\d+)?) *", content)) is not None:
+                        start = min(len(match.group(0)), len(obj.chars) - 1)
+                        marker = match.group(1)
+                        size = marker.count(".") + 2
+                    else:
+                        start = 0
+                        marker = None
+                        size = linesize[1]
+                    name = f"head{size}"
+                    # Check if we're already parsing a heading, do not split into two
+                    if parent_name(current) != name or newlines > 2:
+                        content_start = start
+                        xpos = round(obj.chars[content_start].bbox.left)
+                        current = Node(name, parent=root, obj=obj, xpos=xpos, size=size, marker=marker)
+                        current = Node("para", parent=current, obj=obj, xpos=current.xpos)
+
+                # Check if the line is a note and deal with the indentation correctly
+                elif (
+                    with_notes and (match := re.match(r" *([Nn]ote|[Cc]aution|[Ww]arning):? \d?", content)) is not None
+                ):
+                    content_start = min(len(match.group(0)), len(obj.chars) - 1)
+                    # print(obj.fonts)
+                    # Correct xposition only if the Note: string is very far left
+                    if xpos + 4 * x_em <= current.xpos:
+                        xpos = round(obj.chars[content_start].bbox.left)
+                    # Prevent nesting of notes, they should only be listed
+                    if parent_name(current) == "note":
+                        current = current.parent.parent
+                    current = unindent(xpos, current, 2)
+                    current = Node(
+                        "note",
+                        parent=current,
+                        obj=obj,
+                        xpos=xpos,
+                        type=match.group(1).lower(),
+                        _font=obj.chars[content_start].font,
+                    )
+                    current = Node("para", parent=current, obj=obj, xpos=current.xpos)
+
+                # Check if line is Table or Figure caption
+                elif with_graphics and (
+                    (match := re.match(r" *([Tt]able|[Ff]igure) ?(\d+)\.? ?", content)) is not None
+                    and "Bold" in obj.chars[0].font
+                ):
+                    content_start = min(len(match.group(0)), len(obj.chars) - 1)
+                    current = next((c for c in current.iter_path_reverse() if c.name.startswith("head")), root)
+                    current = Node(
+                        "caption",
+                        parent=current,
+                        obj=obj,
+                        xpos=xpos,
+                        _type=match.group(1).lower(),
+                        number=int(match.group(2)),
+                    )
+                    current = Node("para", parent=current, obj=obj, xpos=current.xpos)
+
+                # Check if line is list and group them according to indentation
+                elif (match := re.match(r"^ *([•–]) ..|^ *(\d+)\. ..|^ *([a-z])\) ?..", content)) is not None:
+                    current = unindent(xpos, current, newlines)
+                    content_start = len(match.group(0)) - 2
+                    xpos = round(obj.chars[content_start].bbox.left)
+                    name = "listb"
+                    value = lcontent[0]
+                    if value in {"–", "-"}:
+                        name = "lists"
+                    elif value.isalpha():
+                        name = "lista"
+                    elif value.isnumeric():
+                        name = "listn"
+                        value = int(match.group(2))
+                    current = Node(name, parent=current, obj=obj, xpos=xpos, value=value)
+                    current = Node("para", parent=current, obj=obj, xpos=current.xpos)
+
+                # Check if line is a register bit definition
+                elif with_bits and re.match(r" *([Bb]ytes? *.+? *)?B[uio]ts? *\d+", content) is not None:
+                    if obj.contains_font("Bold"):
+                        # Use the bold character as delimiter
+                        content_start = next(xi for xi, c in enumerate(obj.chars) if "Bold" in c.font)
+                    else:
+                        # Default back to the regex
+                        if "Reserved" not in content:
+                            _LOGGER.warning(
+                                f"Fallback to Regex length for Bit pattern '{content}'!\nFonts: {obj.fonts}"
+                            )
+                        content_start = re.match(
+                            r" *([Bb]ytes? *.+? *)?(B[uio]t)( *\d+:?|s *(\d+ *([:-] *\d+ *)? *,? *)+) *", content
+                        )
+                        if content_start is None:
+                            _LOGGER.error(f"Unable to match Bit regex at all! '{content}'!")
+                            content_start = 0
+                        else:
+                            content_start = len(content_start.group(0))
+                        if not content_start:
+                            _LOGGER.error(f"Missing content start (=0)! '{content}'!")
+                        content_start = min(content_start, len(obj.chars) - 1)
+
+                    current = next((c for c in current.iter_path_reverse() if c.name.startswith("head")), root)
+                    middle = obj.chars[content_start].bbox.left
+                    xpos = round(middle)
+                    current = Node(
+                        "bit",
+                        parent=current,
+                        obj=obj,
+                        xpos=xpos,
+                        _page=self,
+                        _middle=middle,
+                        _left=area.left,
+                        _right=area.right,
+                    )
+                    current = Node("para", parent=current, obj=obj, xpos=current.xpos)
+
+                # Check if this is a new paragraph
+                elif newlines >= 2 or current.name not in {"para"}:
+                    # Fix issues where notes are reflowing back left of Note: text
+                    if parent_name(current) in {"note"}:
+                        if xpos < current.parent.xpos:
+                            xpos = current.parent.xpos
+                    # Prevent multiline
+                    current = unindent(xpos, current, newlines)
+                    current = Node("para", parent=current, obj=obj, xpos=xpos if current.is_root else current.xpos)
+
+                elif parent_name(current) not in {"caption", "bit", "area"}:
+                    current = unindent(xpos, current, newlines)
+
+                # Add the actual line
+                Node("line", parent=current, obj=obj, xpos=xpos, start=content_start, str=content[content_start:50])
+
+                ypos = obj.origin
+
+        return root
+
+    def __repr__(self) -> str:
+        return f"StmPage({self.number})"
diff --git a/src/modm_data/svd/__init__.py b/src/modm_data/svd/__init__.py
index af5d163..8129788 100644
--- a/src/modm_data/svd/__init__.py
+++ b/src/modm_data/svd/__init__.py
@@ -1,7 +1,6 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-from . import stmicro
 from .model import Device, PeripheralType, Peripheral, Register, BitField, compare_device_trees
 from .write import format_svd, write_svd
 from .read import read_svd