From 1a286bad7a802ab8fed8a1564c2ec2906074be75 Mon Sep 17 00:00:00 2001 From: Craig McNamara Date: Wed, 25 Mar 2026 15:28:26 -0700 Subject: [PATCH 1/5] Add high-level accessibility API for tagged PDF generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds Prawn::Accessibility module providing structure(), structure_container(), artifact(), heading(), paragraph(), and figure() methods for creating Section 508 compliant tagged PDFs. Usage: pdf = Prawn::Document.new(marked: true, language: 'en-US') pdf.heading(1, 'Title') pdf.paragraph('Body text.') pdf.artifact { pdf.text 'Page 1' } New options on Prawn::Document.new: - marked: true — enables tagged PDF mode - language: 'en-US' — sets document language in Catalog Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/prawn.rb | 1 + lib/prawn/accessibility.rb | 134 +++++++++++++++++++++++ lib/prawn/document.rb | 6 ++ spec/prawn/accessibility_spec.rb | 178 +++++++++++++++++++++++++++++++ 4 files changed, 319 insertions(+) create mode 100644 lib/prawn/accessibility.rb create mode 100644 spec/prawn/accessibility_spec.rb diff --git a/lib/prawn.rb b/lib/prawn.rb index df1a255fb..08fd561d2 100644 --- a/lib/prawn.rb +++ b/lib/prawn.rb @@ -74,6 +74,7 @@ def verify_options(accepted, actual) require_relative 'prawn/soft_mask' require_relative 'prawn/security' require_relative 'prawn/transformation_stack' +require_relative 'prawn/accessibility' require_relative 'prawn/document' require_relative 'prawn/font' require_relative 'prawn/measurements' diff --git a/lib/prawn/accessibility.rb b/lib/prawn/accessibility.rb new file mode 100644 index 000000000..6287c51df --- /dev/null +++ b/lib/prawn/accessibility.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +module Prawn + # Provides tagged PDF (accessibility) support for Prawn documents. + # + # When a document is created with `marked: true`, all content can be + # wrapped in structure elements that screen readers and assistive + # technologies use to navigate the document. + # + # @example + # pdf = Prawn::Document.new(marked: true, language: 'en-US') + # + # pdf.structure(:H1) do + # pdf.text 'Document Title' + # end + # + # pdf.structure(:P) do + # pdf.text 'Body paragraph text.' + # end + # + # pdf.artifact do + # pdf.text 'Page 1' # not read by screen readers + # end + module Accessibility + # Whether this document is tagged for accessibility. + # + # @return [Boolean] + def tagged? + renderer.marked? + end + + # Wrap content in a structure element. The block's content will be + # associated with the given tag in the document's structure tree. + # + # Can be nested — inner structure calls become children of the outer. + # + # @param tag [Symbol] PDF structure type (:Document, :Part, :Sect, + # :H1-:H6, :P, :L, :LI, :Lbl, :LBody, :Table, :TR, :TH, :TD, + # :Figure, :Formula, :Form, :Span, :Link, :Note, :BlockQuote, + # :Caption, :TOC, :TOCI, :Reference) + # @param attributes [Hash] optional attributes + # @option attributes [String] :Alt alternative text (for Figure, Formula) + # @option attributes [String] :Lang language override for this element + # @option attributes [Symbol] :Scope table header scope (:Column, :Row, :Both) + # @yield content to render inside this structure element + # @return [void] + def structure(tag, attributes = {}, &block) + return yield if !tagged? || !block + + tree = renderer.structure_tree + tree.begin_element(tag, attributes) + tree.mark_content(tag, &block) + tree.end_element + end + + # Wrap content in a structure element without marking the content + # directly. Use this for container elements (Table, TR, L, LI) where + # the children will each have their own marked content. + # + # @param tag [Symbol] PDF structure type + # @param attributes [Hash] optional attributes + # @yield content to render inside this structure element + # @return [void] + def structure_container(tag, attributes = {}, &block) + return yield if !tagged? || !block + + tree = renderer.structure_tree + tree.begin_element(tag, attributes) + yield + tree.end_element + end + + # Mark content as an artifact (decorative, not read by screen readers). + # Use for page numbers, decorative borders, backgrounds, watermarks. + # + # @param type [Symbol, nil] artifact type (:Pagination, :Layout, + # :Page, :Background) + # @yield content to render as artifact + # @return [void] + def artifact(type: nil, &block) + return yield if !tagged? || !block + + renderer.structure_tree.mark_artifact(artifact_type: type, &block) + end + + # Render a heading at the specified level. + # + # @param level [Integer] heading level 1-6 + # @param content [String] heading text + # @param options [Hash] options passed to `text()` + # @return [void] + def heading(level, content, options = {}) + tag = :"H#{level}" + if tagged? + structure(tag) { text(content, options) } + else + text(content, options) + end + end + + # Render text wrapped in a paragraph structure element. + # + # @param content [String, nil] text to render. If nil, yields a block. + # @param options [Hash] options passed to `text()` + # @yield optional block for complex paragraph content + # @return [void] + def paragraph(content = nil, options = {}, &block) + if tagged? + if block + structure(:P, &block) + else + structure(:P) { text(content, options) } + end + elsif block + yield + else + text(content, options) + end + end + + # Render an image wrapped in a Figure structure element with alt text. + # + # @param alt_text [String] alternative text for the image + # @yield block that calls `image()` or other drawing methods + # @return [void] + def figure(alt_text:, &block) + if tagged? + structure(:Figure, Alt: alt_text, &block) + else + yield + end + end + end +end diff --git a/lib/prawn/document.rb b/lib/prawn/document.rb index 92e263697..621bd91a4 100644 --- a/lib/prawn/document.rb +++ b/lib/prawn/document.rb @@ -58,6 +58,7 @@ class Document include Prawn::Stamp include Prawn::SoftMask include Prawn::TransformationStack + include Prawn::Accessibility alias inspect to_s @@ -72,6 +73,7 @@ class Document right_margin top_margin bottom_margin skip_page_creation compress background info text_formatter print_scaling + marked language ].freeze # Any module added to this array will be included into instances of @@ -239,6 +241,10 @@ def initialize(options = {}, &block) renderer.min_version(1.6) if options[:print_scaling] == :none + if options[:language] + state.store.root.data[:Lang] = options[:language] + end + @background = options[:background] @background_scale = options[:background_scale] || 1 @font_size = 12 diff --git a/spec/prawn/accessibility_spec.rb b/spec/prawn/accessibility_spec.rb new file mode 100644 index 000000000..5306be26e --- /dev/null +++ b/spec/prawn/accessibility_spec.rb @@ -0,0 +1,178 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Prawn::Accessibility do + describe 'with tagged document' do + let(:pdf) { Prawn::Document.new(marked: true, language: 'en-US') } + + describe '#tagged?' do + it 'returns true for marked documents' do + expect(pdf).to be_tagged + end + + it 'returns false for unmarked documents' do + plain = Prawn::Document.new + expect(plain).not_to be_tagged + end + end + + describe 'language' do + it 'sets Lang on the catalog' do + root_data = pdf.state.store.root.data + expect(root_data[:Lang]).to eq('en-US') + end + end + + describe '#structure' do + it 'wraps content in a structure element' do + pdf.structure(:H1) do + pdf.text 'Title' + end + output = pdf.render + + expect(output).to include('/StructTreeRoot') + expect(output).to include('/StructElem') + end + + it 'emits BDC/EMC in the content stream' do + pdf.structure(:P) do + pdf.text 'Hello' + end + output = pdf.render + + expect(output).to include('BDC') + expect(output).to include('EMC') + end + + it 'is a no-op for untagged documents' do + plain = Prawn::Document.new + plain.structure(:P) do + plain.text 'Hello' + end + output = plain.render + + expect(output).not_to include('/StructTreeRoot') + end + end + + describe '#structure_container' do + it 'creates a parent structure without marking content directly' do + pdf.structure_container(:Table) do + pdf.structure(:TD) do + pdf.text 'Cell' + end + end + output = pdf.render + + expect(output).to include('/StructElem') + expect(output).to include('/Table') + expect(output).to include('/TD') + end + end + + describe '#artifact' do + it 'wraps content in Artifact markers' do + pdf.artifact do + pdf.text 'Page 1' + end + output = pdf.render + + expect(output).to include('/Artifact BMC') + expect(output).to include('EMC') + end + + it 'supports artifact type' do + pdf.artifact(type: :Pagination) do + pdf.text 'Page 1' + end + output = pdf.render + + expect(output).to include('/Artifact') + expect(output).to include('/Type /Pagination') + end + + it 'is a no-op for untagged documents' do + plain = Prawn::Document.new + plain.artifact do + plain.text 'Footer' + end + output = plain.render + + expect(output).not_to include('/Artifact') + end + end + + describe '#heading' do + it 'renders text in an H1 structure element' do + pdf.heading(1, 'Title', size: 24) + output = pdf.render + + expect(output).to include('/H1') + expect(output).to include('BDC') + end + + it 'supports levels 1-6' do + (1..6).each do |level| + pdf.heading(level, "Heading #{level}") + end + output = pdf.render + + (1..6).each do |level| + expect(output).to include("/H#{level}") + end + end + end + + describe '#paragraph' do + it 'renders text in a P structure element' do + pdf.paragraph('Body text.') + output = pdf.render + + expect(output).to include('BDC') + end + + it 'supports block form' do + pdf.paragraph do + pdf.text 'Complex paragraph' + end + output = pdf.render + + expect(output).to include('BDC') + expect(output).to include('EMC') + end + end + + describe '#figure' do + it 'wraps content with alt text' do + pdf.figure(alt_text: 'A logo') do + pdf.text 'IMAGE PLACEHOLDER' + end + output = pdf.render + + expect(output).to include('/Figure') + expect(output).to include('/Alt') + end + end + + describe 'full document round-trip' do + it 'produces a tagged PDF with MarkInfo and StructTreeRoot' do + pdf.heading(1, 'Test Document') + pdf.paragraph('This is a test paragraph.') + + pdf.artifact(type: :Pagination) do + pdf.text 'Page 1 of 1' + end + + output = pdf.render + + expect(output).to start_with('%PDF-1.7') + expect(output).to include('/MarkInfo') + expect(output).to include('/Marked true') + expect(output).to include('/StructTreeRoot') + expect(output).to include('/Lang') + expect(output).to include('/Document') + end + end + end +end From 80d395432d4b48d4608655e6468c6d9a5c3e4929 Mon Sep 17 00:00:00 2001 From: Craig McNamara Date: Wed, 25 Mar 2026 20:46:13 -0700 Subject: [PATCH 2/5] Document ActualText attribute in accessibility API Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/prawn/accessibility.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/prawn/accessibility.rb b/lib/prawn/accessibility.rb index 6287c51df..528bc8cd8 100644 --- a/lib/prawn/accessibility.rb +++ b/lib/prawn/accessibility.rb @@ -40,6 +40,8 @@ def tagged? # :Caption, :TOC, :TOCI, :Reference) # @param attributes [Hash] optional attributes # @option attributes [String] :Alt alternative text (for Figure, Formula) + # @option attributes [String] :ActualText replacement text for screen + # readers (e.g., "required" for "*", "selected" for "X") # @option attributes [String] :Lang language override for this element # @option attributes [Symbol] :Scope table header scope (:Column, :Row, :Both) # @yield content to render inside this structure element From 7a7d0a237cd1f7bf49fa4b1619ad6c9efb92bd3c Mon Sep 17 00:00:00 2001 From: Craig McNamara Date: Wed, 25 Mar 2026 20:48:25 -0700 Subject: [PATCH 3/5] Add tests for ActualText attribute in accessibility API Tests that ActualText is properly passed through to structure elements, useful for screen reader replacement text on symbolic characters like * (required) and X (selected checkbox). Co-Authored-By: Claude Opus 4.6 (1M context) --- spec/prawn/accessibility_spec.rb | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/spec/prawn/accessibility_spec.rb b/spec/prawn/accessibility_spec.rb index 5306be26e..adfcf8b58 100644 --- a/spec/prawn/accessibility_spec.rb +++ b/spec/prawn/accessibility_spec.rb @@ -143,6 +143,29 @@ end end + describe 'ActualText' do + it 'passes ActualText to structure elements' do + pdf.structure(:Span, ActualText: 'required') do + pdf.text '*' + end + output = pdf.render + + expect(output).to include('/ActualText') + end + + it 'allows ActualText for checkbox indicators' do + pdf.structure(:Span, ActualText: 'Selected') do + pdf.text 'X' + end + pdf.structure(:Span, ActualText: 'Not selected') do + pdf.text ' ' + end + output = pdf.render + + expect(output).to include('/ActualText') + end + end + describe '#figure' do it 'wraps content with alt text' do pdf.figure(alt_text: 'A logo') do From d6424097a6701813ae7571cf1695e6da0fca5e16 Mon Sep 17 00:00:00 2001 From: Craig McNamara Date: Thu, 26 Mar 2026 09:59:51 -0700 Subject: [PATCH 4/5] Fix RuboCop style violations in accessibility specs Co-Authored-By: Claude Opus 4.6 (1M context) --- spec/prawn/accessibility_spec.rb | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/spec/prawn/accessibility_spec.rb b/spec/prawn/accessibility_spec.rb index adfcf8b58..6af7516d3 100644 --- a/spec/prawn/accessibility_spec.rb +++ b/spec/prawn/accessibility_spec.rb @@ -13,7 +13,7 @@ it 'returns false for unmarked documents' do plain = Prawn::Document.new - expect(plain).not_to be_tagged + expect(plain).to_not(be_tagged) end end @@ -27,7 +27,7 @@ describe '#structure' do it 'wraps content in a structure element' do pdf.structure(:H1) do - pdf.text 'Title' + pdf.text('Title') end output = pdf.render @@ -37,7 +37,7 @@ it 'emits BDC/EMC in the content stream' do pdf.structure(:P) do - pdf.text 'Hello' + pdf.text('Hello') end output = pdf.render @@ -48,11 +48,11 @@ it 'is a no-op for untagged documents' do plain = Prawn::Document.new plain.structure(:P) do - plain.text 'Hello' + plain.text('Hello') end output = plain.render - expect(output).not_to include('/StructTreeRoot') + expect(output).to_not(include('/StructTreeRoot')) end end @@ -60,7 +60,7 @@ it 'creates a parent structure without marking content directly' do pdf.structure_container(:Table) do pdf.structure(:TD) do - pdf.text 'Cell' + pdf.text('Cell') end end output = pdf.render @@ -74,7 +74,7 @@ describe '#artifact' do it 'wraps content in Artifact markers' do pdf.artifact do - pdf.text 'Page 1' + pdf.text('Page 1') end output = pdf.render @@ -84,7 +84,7 @@ it 'supports artifact type' do pdf.artifact(type: :Pagination) do - pdf.text 'Page 1' + pdf.text('Page 1') end output = pdf.render @@ -95,11 +95,11 @@ it 'is a no-op for untagged documents' do plain = Prawn::Document.new plain.artifact do - plain.text 'Footer' + plain.text('Footer') end output = plain.render - expect(output).not_to include('/Artifact') + expect(output).to_not(include('/Artifact')) end end @@ -134,7 +134,7 @@ it 'supports block form' do pdf.paragraph do - pdf.text 'Complex paragraph' + pdf.text('Complex paragraph') end output = pdf.render @@ -146,7 +146,7 @@ describe 'ActualText' do it 'passes ActualText to structure elements' do pdf.structure(:Span, ActualText: 'required') do - pdf.text '*' + pdf.text('*') end output = pdf.render @@ -155,10 +155,10 @@ it 'allows ActualText for checkbox indicators' do pdf.structure(:Span, ActualText: 'Selected') do - pdf.text 'X' + pdf.text('X') end pdf.structure(:Span, ActualText: 'Not selected') do - pdf.text ' ' + pdf.text(' ') end output = pdf.render @@ -169,7 +169,7 @@ describe '#figure' do it 'wraps content with alt text' do pdf.figure(alt_text: 'A logo') do - pdf.text 'IMAGE PLACEHOLDER' + pdf.text('IMAGE PLACEHOLDER') end output = pdf.render @@ -184,7 +184,7 @@ pdf.paragraph('This is a test paragraph.') pdf.artifact(type: :Pagination) do - pdf.text 'Page 1 of 1' + pdf.text('Page 1 of 1') end output = pdf.render From 75dd1f887e47ad44062e0aae4b3d5b9bbb4fd4e2 Mon Sep 17 00:00:00 2001 From: Craig McNamara Date: Thu, 26 Mar 2026 10:01:22 -0700 Subject: [PATCH 5/5] Trigger CI re-run