diff --git a/Gemfile b/Gemfile index fce680bb3..f22f26dff 100644 --- a/Gemfile +++ b/Gemfile @@ -4,3 +4,10 @@ source 'https://rubygems.org' # Specify your gem's dependencies in review.gemspec gemspec + +# Development dependencies +group :development do + # markly gem (for Markdown support) requires Ruby >= 3.1 + # On Ruby 3.0, tests will be skipped but Re:VIEW will work with .re files + gem 'markly', '~> 0.13' if Gem.ruby_version >= Gem::Version.new('3.1.0') +end diff --git a/bin/review-ast-compile b/bin/review-ast-compile new file mode 100755 index 000000000..351cc2db3 --- /dev/null +++ b/bin/review-ast-compile @@ -0,0 +1,8 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +$LOAD_PATH.unshift(File.realpath('../lib', __dir__)) + +require 'review/ast/command/compile' + +exit ReVIEW::AST::Command::Compile.new.run(ARGV) diff --git a/bin/review-ast-dump b/bin/review-ast-dump new file mode 100755 index 000000000..a5760b82b --- /dev/null +++ b/bin/review-ast-dump @@ -0,0 +1,106 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'pathname' + +bindir = Pathname.new(__FILE__).realpath.dirname +$LOAD_PATH.unshift((bindir + '../lib').realpath) + +require 'review/init' +require 'review/ast/dumper' +require 'review/ast/json_serializer' +require 'optparse' + +def parse_options + config = ReVIEW::Configure.values + output_file = nil + serializer_options = ReVIEW::AST::JSONSerializer::Options.new + + opts = OptionParser.new + opts.version = ReVIEW::VERSION + opts.banner = "Usage: #{File.basename($PROGRAM_NAME, '.*')} [options] CHAPTER.re\n" \ + 'Dump Re:VIEW document as AST in JSON format' + + opts.on('-c', '--config=FILE', 'specify config file') do |file| + config.deep_merge!(ReVIEW::Configure.create(yamlfile: file).values) + end + + opts.on('-o', '--output=FILE', 'output to FILE instead of stdout') do |file| + output_file = file + end + + opts.on('--[no-]pretty', 'pretty print JSON (default: true)') do |v| + serializer_options.pretty = v + end + + opts.on('--[no-]location', 'include location information (default: true)') do |v| + serializer_options.include_location = v + end + + opts.on('--indent=INDENT', 'indentation for pretty print (default: " ")') do |v| + serializer_options.indent = v + end + + opts.on('--compact', 'compact output (no location, no pretty print)') do + serializer_options.pretty = false + serializer_options.include_location = false + end + + opts.on('--help', 'print help and exit') do + puts opts.help + exit 0 + end + + begin + opts.parse! + rescue OptionParser::InvalidOption => e + $stderr.puts e.message + $stderr.puts opts.help + exit 1 + end + + if ARGV.empty? + $stderr.puts 'No input files.' + $stderr.puts opts.help + exit 1 + end + + [config, output_file, serializer_options] +end + +def dump_files(dumper, output_file) + ARGV.each do |path| + output = dumper.dump_file(path) + + if output_file + File.write(output_file, output) + $stderr.puts "Output written to: #{output_file}" + else + puts output + end + end +end + +def main + config, output_file, serializer_options = parse_options + dumper = ReVIEW::AST::Dumper.new(config: config, serializer_options: serializer_options) + + begin + dump_files(dumper, output_file) + rescue ReVIEW::ApplicationError, ReVIEW::FileNotFound => e + $stderr.puts "Error: #{e.message}" + exit 1 + rescue StandardError => e + $stderr.puts "Fatal error: #{e.class}: #{e.message}" + $stderr.puts e.backtrace if $DEBUG + exit 1 + end +end + +main diff --git a/bin/review-ast-dump2re b/bin/review-ast-dump2re new file mode 100755 index 000000000..354fd5e74 --- /dev/null +++ b/bin/review-ast-dump2re @@ -0,0 +1,107 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'pathname' + +bindir = Pathname.new(__FILE__).realpath.dirname +$LOAD_PATH.unshift((bindir + '../lib').realpath) + +require 'review/init' +require 'review/ast' +require 'review/ast/json_serializer' +require 'review/ast/review_generator' +require 'optparse' +require 'json' + +def main + options = { + output: nil + } + + parser = OptionParser.new do |opts| + opts.banner = 'Usage: review-ast2re [options] FILE' + opts.version = ReVIEW::VERSION + + opts.on('-o', '--output FILE', 'Output file (default: stdout)') do |file| + options[:output] = file + end + + opts.on_tail('-h', '--help', 'Show this message') do + puts opts + exit + end + end + + begin + parser.parse! + rescue OptionParser::InvalidOption => e + $stderr.puts "Error: #{e.message}" + puts parser + exit 1 + end + + if ARGV.empty? && $stdin.tty? + $stderr.puts 'Error: No input file specified' + puts parser + exit 1 + end + + input_file = ARGV[0] || '-' + + # Read JSON AST + begin + if input_file == '-' + json_data = $stdin.read + else + unless File.exist?(input_file) + $stderr.puts "Error: Input file not found: #{input_file}" + exit 1 + end + json_data = File.read(input_file) + end + rescue StandardError => e + $stderr.puts "Error reading input file: #{e.message}" + exit 1 + end + + # Deserialize JSON to AST + begin + ast_root = ReVIEW::AST::JSONSerializer.deserialize(json_data) + rescue JSON::ParserError => e + $stderr.puts "Error parsing JSON: #{e.message}" + exit 1 + rescue StandardError => e + $stderr.puts "Error deserializing AST: #{e.message}" + exit 1 + end + + # Generate Re:VIEW text + begin + generator = ReVIEW::AST::ReVIEWGenerator.new + review_text = generator.generate(ast_root) + rescue StandardError => e + $stderr.puts "Error generating Re:VIEW text: #{e.message}" + $stderr.puts e.backtrace if ENV['DEBUG'] + exit 1 + end + + # Output result + begin + if options[:output] + File.write(options[:output], review_text) + else + puts review_text + end + rescue StandardError => e + $stderr.puts "Error writing output: #{e.message}" + exit 1 + end +end + +main diff --git a/bin/review-ast-epubmaker b/bin/review-ast-epubmaker new file mode 100755 index 000000000..96648b7d0 --- /dev/null +++ b/bin/review-ast-epubmaker @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby + +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'pathname' + +bindir = Pathname.new(__FILE__).realpath.dirname +$LOAD_PATH.unshift((bindir + '../lib').realpath) + +require 'review/ast/command/epub_maker' + +ReVIEW::AST::Command::EpubMaker.execute(*ARGV) diff --git a/bin/review-ast-idgxmlmaker b/bin/review-ast-idgxmlmaker new file mode 100755 index 000000000..29b1f8f35 --- /dev/null +++ b/bin/review-ast-idgxmlmaker @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby + +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'pathname' + +bindir = Pathname.new(__FILE__).realpath.dirname +$LOAD_PATH.unshift((bindir + '../lib').realpath) + +require 'review/ast/command/idgxml_maker' + +ReVIEW::AST::Command::IdgxmlMaker.execute(*ARGV) diff --git a/bin/review-ast-pdfmaker b/bin/review-ast-pdfmaker new file mode 100755 index 000000000..7c48739f3 --- /dev/null +++ b/bin/review-ast-pdfmaker @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby + +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'pathname' + +bindir = Pathname.new(__FILE__).realpath.dirname +$LOAD_PATH.unshift((bindir + '../lib').realpath) + +require 'review/ast/command/pdf_maker' + +ReVIEW::AST::Command::PdfMaker.execute(*ARGV) diff --git a/bin/review-ast-textmaker b/bin/review-ast-textmaker new file mode 100755 index 000000000..758b1ebfb --- /dev/null +++ b/bin/review-ast-textmaker @@ -0,0 +1,18 @@ +#!/usr/bin/env ruby + +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'pathname' + +bindir = Pathname.new(__FILE__).realpath.dirname +$LOAD_PATH.unshift((bindir + '../lib').realpath) + +require 'review/ast/command/text_maker' + +ReVIEW::AST::Command::TextMaker.execute(*ARGV) diff --git a/doc/ast.ja.md b/doc/ast.ja.md new file mode 100644 index 000000000..06e62c07e --- /dev/null +++ b/doc/ast.ja.md @@ -0,0 +1,610 @@ +# Re:VIEW AST/Renderer 概要 + +このドキュメントは、Re:VIEWのAST(Abstract Syntax Tree:抽象構文木)/Rendererアーキテクチャの全体像を理解するための入門ガイドです。 + +## 目次 + +- [AST/Rendererとは](#astrendererとは) +- [なぜASTが必要なのか](#なぜastが必要なのか) +- [アーキテクチャ概要](#アーキテクチャ概要) +- [主要コンポーネント](#主要コンポーネント) +- [基本的な使い方](#基本的な使い方) +- [AST/Rendererでできること](#astrendererでできること) +- [より詳しく知るには](#より詳しく知るには) +- [FAQ](#faq) + +## AST/Rendererとは + +Re:VIEWのAST/Rendererは、Re:VIEW文書を構造化されたデータ(AST)して扱い、様々な出力フォーマットに変換するための新しいアーキテクチャです。 + +「AST(Abstract Syntax Tree:抽象構文木)」とは、文書の構造を木構造のデータとして表現したものです。例えば、見出し・段落・リスト・表といった要素が、親子関係を持つノードとして表現されます。 + +従来の直接Builder呼び出し方式と異なり、AST方式では文書構造を中間表現(AST)として明示的に保持することで、より柔軟で拡張性の高い文書処理を実現します。 + +## なぜASTが必要なのか + +### 従来方式の課題 + +```mermaid +graph LR + A[Re:VIEW文書] --> B[Compiler] + B --> C[HTMLBuilder] + B --> D[LaTeXBuilder] + B --> E[EPUBBuilder] + + style B fill:#ffcccc + style C fill:#ffcccc + style D fill:#ffcccc + style E fill:#ffcccc +``` + +従来の方式では: +- フォーマット固有の処理が分散: 各Builderが独自に文書を解釈 +- 構文解析と出力生成が密結合: 解析処理とフォーマット変換が分離されていない +- カスタム処理や拡張が困難: 新しいフォーマットや機能の追加が複雑 +- 構造の再利用が不可: 一度解析した構造を他の用途で利用できない + +### AST方式の利点 + +```mermaid +graph LR + A[Re:VIEW文書] --> B[AST::Compiler] + B --> C[AST] + C --> D[HTMLRenderer] + C --> E[LaTeXRenderer] + C --> F[IDGXMLRenderer] + C --> G[JSON出力] + C --> H[カスタムツール] + + style C fill:#ccffcc +``` + +AST方式では: +- 構造の明示化: 文書構造を明確なデータモデル(ノードツリー)で表現 +- 再利用性: 一度構築したASTを複数のフォーマットや用途で利用可能 +- 拡張性: カスタムレンダラーやツールの開発が容易 +- 解析・変換: JSON出力、双方向変換、構文解析ツールの実現 +- 保守性: 構文解析とレンダリングの責務が明確に分離 + +## アーキテクチャ概要 + +### 処理フロー + +Re:VIEW文書がAST経由で出力されるまでの流れ: + +```mermaid +flowchart TB + A[Re:VIEW文書] --> B[AST::Compiler] + B --> C[AST構築] + C --> D[参照解決] + D --> E[後処理] + E --> F[AST生成完了] + + F --> G[HTMLRenderer] + F --> H[LaTeXRenderer] + F --> I[IDGXMLRenderer] + F --> J[JSONSerializer] + + G --> K[HTML出力] + H --> L[LaTeX出力] + I --> M[IDGXML出力] + J --> N[JSON出力] + + subgraph "1. AST生成フェーズ" + B + C + D + E + F + end + + subgraph "2. レンダリングフェーズ" + G + H + I + J + end +``` + +### 主要コンポーネントの役割 + +| コンポーネント | 役割 | 場所 | +|--------------|------|------| +| AST::Compiler | Re:VIEW文書を解析し、AST構造を構築 | `lib/review/ast/compiler.rb` | +| ASTノード | 文書の各要素(見出し、段落、リストなど)を表現 | `lib/review/ast/*_node.rb` | +| Renderer | ASTを各種出力フォーマットに変換 | `lib/review/renderer/*.rb` | +| Visitor | ASTを走査する基底クラス | `lib/review/ast/visitor.rb` | +| Indexer | 図表・リスト等のインデックスを構築 | `lib/review/ast/indexer.rb` | +| TextFormatter | テキスト整形とI18nを一元管理 | `lib/review/renderer/text_formatter.rb` | +| JSONSerializer | ASTとJSONの相互変換 | `lib/review/ast/json_serializer.rb` | + +### 従来方式との比較 + +```mermaid +graph TB + subgraph "従来方式" + A1[Re:VIEW文書] --> B1[Compiler] + B1 --> C1[Builder] + C1 --> D1[出力] + end + + subgraph "AST方式" + A2[Re:VIEW文書] --> B2[AST::Compiler] + B2 --> C2[AST] + C2 --> D2[Renderer] + D2 --> E2[出力] + C2 -.-> F2[JSON/ツール] + end + + style C2 fill:#ccffcc + style F2 fill:#ffffcc +``` + +#### 主な違い +- 中間表現の有無: AST方式では明示的な中間表現(AST)を持つ +- 処理の分離: 構文解析とレンダリングが完全に分離 +- 拡張性: ASTを利用したツールやカスタム処理が可能 + +## 主要コンポーネント + +### AST::Compiler + +Re:VIEW文書を読み込み、AST構造を構築するコンパイラです。 + +#### 主な機能 +- Re:VIEW記法の解析(見出し、段落、ブロックコマンド、リスト等) +- Markdown入力のサポート(拡張子による自動切り替え) +- 位置情報の保持(エラー報告用) +- 参照解決と後処理の実行 + +#### 処理の流れ +1. 入力ファイルを1行ずつ走査 +2. 各要素を適切なASTノードに変換 +3. 参照解決(図表・リスト等への参照を解決) +4. 後処理(構造の正規化、番号付与等) + +### ASTノード + +文書の構造を表現する各種ノードクラスです。すべてのノードは`AST::Node`(ブランチノード)または`AST::LeafNode`(リーフノード)を継承します。 + +#### ノードの階層構造 + +```mermaid +classDiagram + Node <|-- LeafNode + Node <|-- DocumentNode + Node <|-- HeadlineNode + Node <|-- ParagraphNode + Node <|-- ListNode + Node <|-- TableNode + Node <|-- CodeBlockNode + Node <|-- InlineNode + + LeafNode <|-- TextNode + LeafNode <|-- ImageNode + LeafNode <|-- FootnoteNode + + TextNode <|-- ReferenceNode + + class Node { + +location + +children + +visit_method_name() + +to_inline_text() + } + + class LeafNode { + +content + No children allowed + } +``` + +#### 主要なノードクラス +- `DocumentNode`: 文書全体のルート +- `HeadlineNode`: 見出し(レベル、ラベル、キャプション) +- `ParagraphNode`: 段落 +- `ListNode`/`ListItemNode`: リスト(箇条書き、番号付き、定義リスト) +- `TableNode`: 表 +- `CodeBlockNode`: コードブロック +- `InlineNode`: インライン要素(太字、コード、リンク等) +- `TextNode`: プレーンテキスト(LeafNode) +- `ImageNode`: 画像(LeafNode) + +詳細は[ast_node.md](./ast_node.md)を参照してください。 + +### Renderer + +ASTを各種出力フォーマットに変換するクラスです。`Renderer::Base`を継承し、Visitorパターンでノードを走査します。 + +#### 主要なRenderer +- `HtmlRenderer`: HTML出力 +- `LatexRenderer`: LaTeX出力 +- `IdgxmlRenderer`: InDesign XML出力 +- `MarkdownRenderer`: Markdown出力 +- `PlaintextRenderer`: プレーンテキスト出力 +- `TopRenderer`: TOP形式出力 + +#### Rendererの仕組み + +```ruby +# 各ノードタイプに対応したvisitメソッドを実装 +def visit_headline(node) + # HeadlineNodeをHTMLに変換 + level = node.level + caption = render_children(node.caption_node) + "#{caption}" +end +``` + +詳細は[ast_architecture.md](./ast_architecture.md)を参照してください。 + +### 補助機能 + +#### JSONSerializer + +ASTとJSON形式の相互変換を提供します。 + +```ruby +# AST → JSON +json = JSONSerializer.serialize(ast, options) + +# JSON → AST +ast = JSONSerializer.deserialize(json) +``` + +##### 用途 +- AST構造のデバッグ +- 外部ツールとの連携 +- ASTの保存と復元 + +#### ReVIEWGenerator + +ASTからRe:VIEW記法のテキストを再生成します。 + +```ruby +generator = ReVIEW::AST::ReviewGenerator.new +review_text = generator.generate(ast) +``` + +##### 用途 +- 双方向変換(Re:VIEW ↔ AST ↔ Re:VIEW) +- 構造の正規化 +- フォーマット変換ツールの実装 + +#### TextFormatter + +Rendererで使用される、テキスト整形とI18n(国際化)を一元管理するサービスクラスです。 + +```ruby +# Renderer内で使用 +formatter = text_formatter +caption = formatter.format_caption('list', chapter_number, item_number, caption_text) +``` + +##### 主な機能 +- I18nキーを使用したテキスト生成(図表番号、キャプション等) +- フォーマット固有の装飾(HTML: `図1.1:`, TOP/TEXT: `図1.1 `) +- 章番号の整形(`第1章`, `Appendix A`等) +- 参照テキストの生成 + +##### 用途 +- Rendererでの一貫したテキスト生成 +- 多言語対応(I18nキーを通じた翻訳) +- フォーマット固有の整形ルールの集約 + +## 基本的な使い方 + +### コマンドライン実行 + +Re:VIEW文書をAST経由で各種フォーマットに変換します。 + +#### 単一ファイルのコンパイル + +```bash +# HTML出力 +review-ast-compile --target=html chapter.re > chapter.html + +# LaTeX出力 +review-ast-compile --target=latex chapter.re > chapter.tex + +# JSON出力(AST構造を確認) +review-ast-compile --target=json chapter.re > chapter.json + +# AST構造のダンプ(デバッグ用) +review-ast-dump chapter.re +``` + +#### 書籍全体のビルド + +AST Rendererを使用した書籍全体のビルドには、専用のmakerコマンドを使用します: + +```bash +# PDF生成(LaTeX経由) +review-ast-pdfmaker config.yml + +# EPUB生成 +review-ast-epubmaker config.yml + +# InDesign XML生成 +review-ast-idgxmlmaker config.yml + +# テキスト生成(TOP形式またはプレーンテキスト) +review-ast-textmaker config.yml # TOP形式(◆→マーカー付き) +``` + +これらのコマンドは、従来の`review-pdfmaker`、`review-epubmaker`等と同じインターフェースを持ちますが、内部的にAST Rendererを使用します。 + +### プログラムからの利用 + +Ruby APIを使用してASTを操作できます。 + +```ruby +require 'review' +require 'review/ast/compiler' +require 'review/renderer/html_renderer' +require 'stringio' + +# 設定を読み込む +config = ReVIEW::Configure.create(yamlfile: 'config.yml') +book = ReVIEW::Book::Base.new('.', config: config) + +# チャプターを取得 +chapter = book.chapters.first + +# ASTを生成(参照解決を有効化) +compiler = ReVIEW::AST::Compiler.new +ast_root = compiler.compile_to_ast(chapter, reference_resolution: true) + +# HTMLに変換 +renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) +html = renderer.render(ast_root) + +puts html +``` + +#### 異なるフォーマットへの変換 + +```ruby +# LaTeXに変換 +require 'review/renderer/latex_renderer' +latex_renderer = ReVIEW::Renderer::LatexRenderer.new(chapter) +latex = latex_renderer.render(ast_root) + +# Markdownに変換 +require 'review/renderer/markdown_renderer' +md_renderer = ReVIEW::Renderer::MarkdownRenderer.new(chapter) +markdown = md_renderer.render(ast_root) + +# TOP形式に変換 +require 'review/renderer/top_renderer' +top_renderer = ReVIEW::Renderer::TopRenderer.new(chapter) +top_text = top_renderer.render(ast_root) +``` + +### よくあるユースケース + +#### 1. カスタムレンダラーの作成 + +特定の用途向けに独自のレンダラーを実装できます。 + +```ruby +class MyCustomRenderer < ReVIEW::Renderer::Base + def visit_headline(node) + # 独自のヘッドライン処理 + end + + def visit_paragraph(node) + # 独自の段落処理 + end +end +``` + +#### 2. AST解析ツールの作成 + +ASTを走査して統計情報を収集するツールを作成できます。 + +```ruby +class WordCountVisitor < ReVIEW::AST::Visitor + attr_reader :word_count + + def initialize + @word_count = 0 + end + + def visit_text(node) + @word_count += node.content.split.size + end +end + +visitor = WordCountVisitor.new +visitor.visit(ast) +puts "Total words: #{visitor.word_count}" +``` + +#### 3. 文書構造の変換 + +ASTを操作して文書構造を変更できます。 + +```ruby +# 特定のノードを検索して置換 +ast.children.each do |node| + if node.is_a?(ReVIEW::AST::HeadlineNode) && node.level == 1 + # レベル1の見出しを処理 + end +end +``` + +## AST/Rendererでできること + +### 対応フォーマット + +AST/Rendererは以下の出力フォーマットに対応しています: + +| フォーマット | Renderer | Makerコマンド | 用途 | +|------------|----------|--------------|------| +| HTML | `HtmlRenderer` | `review-ast-epubmaker` | Web公開、プレビュー、EPUB生成 | +| LaTeX | `LatexRenderer` | `review-ast-pdfmaker` | PDF生成(LaTeX経由) | +| IDGXML | `IdgxmlRenderer` | `review-ast-idgxmlmaker` | InDesign組版 | +| Markdown | `MarkdownRenderer` | `review-ast-compile` | Markdown形式への変換 | +| Plaintext | `PlaintextRenderer` | `review-ast-textmaker -n` | 装飾なしプレーンテキスト | +| TOP | `TopRenderer` | `review-ast-textmaker` | 編集マーカー付きテキスト | +| JSON | `JSONSerializer` | `review-ast-compile` | AST構造のJSON出力 | + +### 拡張機能 + +AST/Rendererならではの機能: + +#### JSON出力 +```bash +# AST構造をJSON形式で出力 +review-ast-compile --target=json chapter.re +``` + +##### 用途 +- AST構造のデバッグ +- 外部ツールとの連携 +- 構文解析エンジンとしての利用 + +#### 双方向変換 +```bash +# Re:VIEW → AST → JSON → AST → Re:VIEW +review-ast-compile --target=json chapter.re > ast.json +# JSONからRe:VIEWテキストを再生成 +review-ast-generate ast.json > regenerated.re +``` + +##### 用途 +- 構造の正規化 +- フォーマット変換 +- 文書の検証 + +#### カスタムツール開発 + +ASTを利用して独自のツールを開発できます: + +- 文書解析ツール: 文書の統計情報収集 +- リンティングツール: スタイルチェック、構造検証 +- 変換ツール: 独自フォーマットへの変換 +- 自動化ツール: 文書生成、テンプレート処理 + +### Re:VIEW全要素への対応 + +AST/Rendererは、Re:VIEWのすべての記法要素に対応しています: + +##### ブロック要素 +- 見出し(`=`, `==`, `===`) +- 段落 +- リスト(箇条書き、番号付き、定義リスト) +- 表(`//table`) +- コードブロック(`//list`, `//emlist`, `//cmd`等) +- 画像(`//image`, `//indepimage`) +- コラム(`//note`, `//memo`, `//column`等) +- 数式(`//texequation`) + +##### インライン要素 +- 装飾(`@`, `@`, `@`等) +- リンク(`@`, `@`) +- 参照(`@`, `@`, `@`, `@`等) +- 脚注(`@`) +- ルビ(`@`) + +詳細は[ast_node.md](./ast_node.md)および[ast_architecture.md](./ast_architecture.md)を参照してください。 + +## より詳しく知るには + +AST/Rendererについてさらに詳しく知るには、以下のドキュメントを参照してください: + +### 詳細ドキュメント + +| ドキュメント | 内容 | +|------------|------| +| [ast_architecture.md](./ast_architecture.md) | アーキテクチャ全体の詳細説明。パイプライン、コンポーネント、処理フローの詳細 | +| [ast_node.md](./ast_node.md) | ASTノードクラスの完全なリファレンス。各ノードの属性、メソッド、使用例 | +| [ast_list_processing.md](./ast_list_processing.md) | リスト処理の詳細。ListParser、NestedListAssembler、後処理の仕組み | + +### 推奨する学習順序 + +1. このドキュメント(ast.md): まず全体像を把握 +2. [ast_architecture.md](./ast_architecture.md): アーキテクチャの詳細を理解 +3. [ast_node.md](./ast_node.md): 具体的なノードクラスを学習 +4. [ast_list_processing.md](./ast_list_processing.md): 複雑なリスト処理を深掘り +5. ソースコード: 実装の詳細を確認 + +### サンプルコード + +実際の使用例は以下を参照してください: + +- `lib/review/ast/command/compile.rb`: コマンドライン実装 +- `lib/review/renderer/`: 各種Rendererの実装 +- `test/ast/`: ASTのテストコード(使用例として参考になります) + +## FAQ + +### Q1: 従来のBuilderとAST/Rendererの使い分けは? + +A: 現時点では両方とも使用可能です。 + +- AST/Renderer方式: 新機能(JSON出力、双方向変換等)が必要な場合、カスタムツールを開発する場合 +- 従来のBuilder方式: 既存のプロジェクトやワークフローを維持する場合 + +将来的にはAST/Renderer方式を標準とすることを目指しています。 + +### Q2: 既存のプロジェクトをAST方式に移行する必要はありますか? + +A: 必須ではありません。従来の方式もしばらくは引き続きサポートされます。ただし、新しい機能や拡張を利用したい場合は、AST方式の使用を推奨します。 + +### Q3: カスタムRendererを作成するには? + +A: `Renderer::Base`を継承し、必要な`visit_*`メソッドをオーバーライドします。 + +```ruby +class MyRenderer < ReVIEW::Renderer::Base + def visit_headline(node) + # 独自の処理 + end +end +``` + +詳細は[ast_architecture.md](./ast_architecture.md)のRenderer層の説明を参照してください。 + +### Q4: ASTのデバッグ方法は? + +A: 以下の方法があります: + +1. JSON出力でAST構造を確認: + ```bash + review-ast-compile --target=json chapter.re | jq . + ``` + +2. review-ast-dumpコマンドを使用: + ```bash + review-ast-dump chapter.re + ``` + +3. プログラムから直接確認: + ```ruby + require 'pp' + pp ast.to_h + ``` + +### Q5: パフォーマンスは従来方式と比べてどうですか? + +A: AST方式は中間表現(AST)を構築するオーバーヘッドがありますが、以下の利点があります: + +- 一度構築したASTを複数のフォーマットで再利用可能(複数フォーマット出力時に効率的) +- 構造化されたデータモデルによる最適化の余地 +- 参照解決やインデックス構築の効率化 + +通常の使用では、パフォーマンスの差はほとんど体感できないレベルです。 + +### Q6: Markdownファイルも処理できますか? + +A: はい、対応しています。ファイルの拡張子(`.md`)によって自動的にMarkdownコンパイラが使用されます。 + +```bash +review-ast-compile --target=html chapter.md +``` + +### Q7: 既存のプラグインやカスタマイズは動作しますか? + +A: AST/Rendererは従来のBuilderシステムとは独立しています。従来のBuilderプラグインはそのまま動作しますが、AST/Renderer方式では新しいカスタマイズ方法(カスタムRenderer、Visitor等)を使用します。 diff --git a/doc/ast.md b/doc/ast.md new file mode 100644 index 000000000..7fe312ffd --- /dev/null +++ b/doc/ast.md @@ -0,0 +1,610 @@ +# Re:VIEW AST/Renderer Overview + +This document is an introductory guide to understanding the overall architecture of Re:VIEW's AST (Abstract Syntax Tree)/Renderer. + +## Table of Contents + +- [What is AST/Renderer](#what-is-astrenderer) +- [Why AST is Needed](#why-ast-is-needed) +- [Architecture Overview](#architecture-overview) +- [Key Components](#key-components) +- [Basic Usage](#basic-usage) +- [What AST/Renderer Can Do](#what-astrenderer-can-do) +- [Learning More](#learning-more) +- [FAQ](#faq) + +## What is AST/Renderer + +Re:VIEW's AST/Renderer is a new architecture for handling Re:VIEW documents as structured data (AST) and converting them to various output formats. + +An "AST (Abstract Syntax Tree)" is a representation of document structure as a tree-structured data model. For example, elements such as headings, paragraphs, lists, and tables are represented as nodes with parent-child relationships. + +Unlike the traditional direct Builder invocation approach, the AST approach explicitly maintains document structure as an intermediate representation (AST), enabling more flexible and extensible document processing. + +## Why AST is Needed + +### Challenges with the Traditional Approach + +```mermaid +graph LR + A[Re:VIEW Document] --> B[Compiler] + B --> C[HTMLBuilder] + B --> D[LaTeXBuilder] + B --> E[EPUBBuilder] + + style B fill:#ffcccc + style C fill:#ffcccc + style D fill:#ffcccc + style E fill:#ffcccc +``` + +In the traditional approach: +- Format-specific processing is scattered: Each Builder interprets documents independently +- Parsing and output generation are tightly coupled: Parsing logic and format conversion are not separated +- Custom processing and extensions are difficult: Adding new formats or features is complex +- Structure reuse is not possible: Once-parsed structure cannot be reused for other purposes + +### Benefits of the AST Approach + +```mermaid +graph LR + A[Re:VIEW Document] --> B[AST::Compiler] + B --> C[AST] + C --> D[HTMLRenderer] + C --> E[LaTeXRenderer] + C --> F[IDGXMLRenderer] + C --> G[JSON Output] + C --> H[Custom Tools] + + style C fill:#ccffcc +``` + +The AST approach provides: +- Explicit structure: Document structure is represented with a clear data model (node tree) +- Reusability: Once-built AST can be used for multiple formats and purposes +- Extensibility: Easy to develop custom renderers and tools +- Analysis & transformation: Enables JSON output, bidirectional conversion, and syntax analysis tools +- Maintainability: Clear separation of concerns between parsing and rendering + +## Architecture Overview + +### Processing Flow + +The flow from Re:VIEW document to output via AST: + +```mermaid +flowchart TB + A[Re:VIEW Document] --> B[AST::Compiler] + B --> C[Build AST] + C --> D[Reference Resolution] + D --> E[Post-processing] + E --> F[AST Generation Complete] + + F --> G[HTMLRenderer] + F --> H[LaTeXRenderer] + F --> I[IDGXMLRenderer] + F --> J[JSONSerializer] + + G --> K[HTML Output] + H --> L[LaTeX Output] + I --> M[IDGXML Output] + J --> N[JSON Output] + + subgraph "1. AST Generation Phase" + B + C + D + E + F + end + + subgraph "2. Rendering Phase" + G + H + I + J + end +``` + +### Roles of Key Components + +| Component | Role | Location | +|-----------|------|----------| +| AST::Compiler | Parses Re:VIEW documents and builds AST structure | `lib/review/ast/compiler.rb` | +| AST Nodes | Represents document elements (headings, paragraphs, lists, etc.) | `lib/review/ast/*_node.rb` | +| Renderer | Converts AST to various output formats | `lib/review/renderer/*.rb` | +| Visitor | Base class for traversing AST | `lib/review/ast/visitor.rb` | +| Indexer | Builds indexes for figures, tables, listings, etc. | `lib/review/ast/indexer.rb` | +| TextFormatter | Centrally manages text formatting and I18n | `lib/review/renderer/text_formatter.rb` | +| JSONSerializer | Bidirectional conversion between AST and JSON | `lib/review/ast/json_serializer.rb` | + +### Comparison with Traditional Approach + +```mermaid +graph TB + subgraph "Traditional Approach" + A1[Re:VIEW Document] --> B1[Compiler] + B1 --> C1[Builder] + C1 --> D1[Output] + end + + subgraph "AST Approach" + A2[Re:VIEW Document] --> B2[AST::Compiler] + B2 --> C2[AST] + C2 --> D2[Renderer] + D2 --> E2[Output] + C2 -.-> F2[JSON/Tools] + end + + style C2 fill:#ccffcc + style F2 fill:#ffffcc +``` + +#### Key Differences +- Intermediate representation: AST approach has explicit intermediate representation (AST) +- Separation of concerns: Parsing and rendering are completely separated +- Extensibility: Tools and custom processing using AST are possible + +## Key Components + +### AST::Compiler + +A compiler that reads Re:VIEW documents and builds AST structure. + +#### Main Features +- Parsing Re:VIEW syntax (headings, paragraphs, block commands, lists, etc.) +- Support for Markdown input (automatic switching based on file extension) +- Maintains location information (for error reporting) +- Reference resolution and post-processing execution + +#### Processing Flow +1. Scan input file line by line +2. Convert each element to appropriate AST nodes +3. Reference resolution (resolve references to figures, tables, listings, etc.) +4. Post-processing (structure normalization, numbering, etc.) + +### AST Nodes + +Various node classes that represent document structure. All nodes inherit from either `AST::Node` (branch node) or `AST::LeafNode` (leaf node). + +#### Node Hierarchy + +```mermaid +classDiagram + Node <|-- LeafNode + Node <|-- DocumentNode + Node <|-- HeadlineNode + Node <|-- ParagraphNode + Node <|-- ListNode + Node <|-- TableNode + Node <|-- CodeBlockNode + Node <|-- InlineNode + + LeafNode <|-- TextNode + LeafNode <|-- ImageNode + LeafNode <|-- FootnoteNode + + TextNode <|-- ReferenceNode + + class Node { + +location + +children + +visit_method_name() + +to_inline_text() + } + + class LeafNode { + +content + No children allowed + } +``` + +#### Major Node Classes +- `DocumentNode`: Root of the entire document +- `HeadlineNode`: Headings (level, label, caption) +- `ParagraphNode`: Paragraphs +- `ListNode`/`ListItemNode`: Lists (bulleted, numbered, definition lists) +- `TableNode`: Tables +- `CodeBlockNode`: Code blocks +- `InlineNode`: Inline elements (bold, code, links, etc.) +- `TextNode`: Plain text (LeafNode) +- `ImageNode`: Images (LeafNode) + +See [ast_node.md](./ast_node.md) for details. + +### Renderer + +Classes that convert AST to various output formats. They inherit from `Renderer::Base` and traverse nodes using the Visitor pattern. + +#### Major Renderers +- `HtmlRenderer`: HTML output +- `LatexRenderer`: LaTeX output +- `IdgxmlRenderer`: InDesign XML output +- `MarkdownRenderer`: Markdown output +- `PlaintextRenderer`: Plain text output +- `TopRenderer`: TOP format output + +#### How Renderers Work + +```ruby +# Implement visit methods corresponding to each node type +def visit_headline(node) + # Convert HeadlineNode to HTML + level = node.level + caption = render_children(node.caption_node) + "#{caption}" +end +``` + +See [ast_architecture.md](./ast_architecture.md) for details. + +### Supporting Features + +#### JSONSerializer + +Provides bidirectional conversion between AST and JSON format. + +```ruby +# AST → JSON +json = JSONSerializer.serialize(ast, options) + +# JSON → AST +ast = JSONSerializer.deserialize(json) +``` + +##### Use Cases +- Debugging AST structure +- Integration with external tools +- Saving and restoring AST + +#### ReVIEWGenerator + +Regenerates Re:VIEW syntax text from AST. + +```ruby +generator = ReVIEW::AST::ReviewGenerator.new +review_text = generator.generate(ast) +``` + +##### Use Cases +- Bidirectional conversion (Re:VIEW ↔ AST ↔ Re:VIEW) +- Structure normalization +- Implementing format conversion tools + +#### TextFormatter + +A service class used by Renderers that centrally manages text formatting and I18n (internationalization). + +```ruby +# Used within Renderers +formatter = text_formatter +caption = formatter.format_caption('list', chapter_number, item_number, caption_text) +``` + +##### Main Features +- Text generation using I18n keys (figure numbers, captions, etc.) +- Format-specific decoration (HTML: `Figure 1.1:`, TOP/TEXT: `Figure 1.1 `) +- Chapter number formatting (`Chapter 1`, `Appendix A`, etc.) +- Reference text generation + +##### Use Cases +- Consistent text generation in Renderers +- Multilingual support (translation through I18n keys) +- Centralization of format-specific formatting rules + +## Basic Usage + +### Command-Line Execution + +Convert Re:VIEW documents to various formats via AST. + +#### Compiling a Single File + +```bash +# HTML output +review-ast-compile --target=html chapter.re > chapter.html + +# LaTeX output +review-ast-compile --target=latex chapter.re > chapter.tex + +# JSON output (check AST structure) +review-ast-compile --target=json chapter.re > chapter.json + +# Dump AST structure (for debugging) +review-ast-dump chapter.re +``` + +#### Building Entire Books + +To build entire books using AST Renderer, use dedicated maker commands: + +```bash +# PDF generation (via LaTeX) +review-ast-pdfmaker config.yml + +# EPUB generation +review-ast-epubmaker config.yml + +# InDesign XML generation +review-ast-idgxmlmaker config.yml + +# Text generation (TOP format or plain text) +review-ast-textmaker config.yml # TOP format (with ◆→ markers) +``` + +These commands have the same interface as traditional `review-pdfmaker`, `review-epubmaker`, etc., but internally use AST Renderer. + +### Using from Programs + +You can manipulate AST using the Ruby API. + +```ruby +require 'review' +require 'review/ast/compiler' +require 'review/renderer/html_renderer' +require 'stringio' + +# Load configuration +config = ReVIEW::Configure.create(yamlfile: 'config.yml') +book = ReVIEW::Book::Base.new('.', config: config) + +# Get chapter +chapter = book.chapters.first + +# Generate AST (with reference resolution enabled) +compiler = ReVIEW::AST::Compiler.new +ast_root = compiler.compile_to_ast(chapter, reference_resolution: true) + +# Convert to HTML +renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) +html = renderer.render(ast_root) + +puts html +``` + +#### Converting to Different Formats + +```ruby +# Convert to LaTeX +require 'review/renderer/latex_renderer' +latex_renderer = ReVIEW::Renderer::LatexRenderer.new(chapter) +latex = latex_renderer.render(ast_root) + +# Convert to Markdown +require 'review/renderer/markdown_renderer' +md_renderer = ReVIEW::Renderer::MarkdownRenderer.new(chapter) +markdown = md_renderer.render(ast_root) + +# Convert to TOP format +require 'review/renderer/top_renderer' +top_renderer = ReVIEW::Renderer::TopRenderer.new(chapter) +top_text = top_renderer.render(ast_root) +``` + +### Common Use Cases + +#### 1. Creating Custom Renderers + +You can implement your own renderer for specific purposes. + +```ruby +class MyCustomRenderer < ReVIEW::Renderer::Base + def visit_headline(node) + # Custom headline processing + end + + def visit_paragraph(node) + # Custom paragraph processing + end +end +``` + +#### 2. Creating AST Analysis Tools + +You can create tools that traverse AST to collect statistics. + +```ruby +class WordCountVisitor < ReVIEW::AST::Visitor + attr_reader :word_count + + def initialize + @word_count = 0 + end + + def visit_text(node) + @word_count += node.content.split.size + end +end + +visitor = WordCountVisitor.new +visitor.visit(ast) +puts "Total words: #{visitor.word_count}" +``` + +#### 3. Document Structure Transformation + +You can manipulate AST to modify document structure. + +```ruby +# Search and replace specific nodes +ast.children.each do |node| + if node.is_a?(ReVIEW::AST::HeadlineNode) && node.level == 1 + # Process level 1 headings + end +end +``` + +## What AST/Renderer Can Do + +### Supported Formats + +AST/Renderer supports the following output formats: + +| Format | Renderer | Maker Command | Purpose | +|--------|----------|---------------|---------| +| HTML | `HtmlRenderer` | `review-ast-epubmaker` | Web publishing, preview, EPUB generation | +| LaTeX | `LatexRenderer` | `review-ast-pdfmaker` | PDF generation (via LaTeX) | +| IDGXML | `IdgxmlRenderer` | `review-ast-idgxmlmaker` | InDesign typesetting | +| Markdown | `MarkdownRenderer` | `review-ast-compile` | Conversion to Markdown format | +| Plaintext | `PlaintextRenderer` | `review-ast-textmaker -n` | Plain text without decoration | +| TOP | `TopRenderer` | `review-ast-textmaker` | Text with editorial markers | +| JSON | `JSONSerializer` | `review-ast-compile` | JSON output of AST structure | + +### Extended Features + +Features unique to AST/Renderer: + +#### JSON Output +```bash +# Output AST structure in JSON format +review-ast-compile --target=json chapter.re +``` + +##### Use Cases +- Debugging AST structure +- Integration with external tools +- Use as a parsing engine + +#### Bidirectional Conversion +```bash +# Re:VIEW → AST → JSON → AST → Re:VIEW +review-ast-compile --target=json chapter.re > ast.json +# Regenerate Re:VIEW text from JSON +review-ast-generate ast.json > regenerated.re +``` + +##### Use Cases +- Structure normalization +- Format conversion +- Document validation + +#### Custom Tool Development + +You can develop your own tools using AST: + +- Document analysis tools: Collecting document statistics +- Linting tools: Style checking, structure validation +- Conversion tools: Converting to custom formats +- Automation tools: Document generation, template processing + +### Support for All Re:VIEW Elements + +AST/Renderer supports all Re:VIEW syntax elements: + +##### Block Elements +- Headings (`=`, `==`, `===`) +- Paragraphs +- Lists (bulleted, numbered, definition lists) +- Tables (`//table`) +- Code blocks (`//list`, `//emlist`, `//cmd`, etc.) +- Images (`//image`, `//indepimage`) +- Columns (`//note`, `//memo`, `//column`, etc.) +- Math equations (`//texequation`) + +##### Inline Elements +- Decoration (`@`, `@`, `@`, etc.) +- Links (`@`, `@`) +- References (`@`, `@
`, `@`, `@`, etc.) +- Footnotes (`@`) +- Ruby (`@`) + +See [ast_node.md](./ast_node.md) and [ast_architecture.md](./ast_architecture.md) for details. + +## Learning More + +To learn more about AST/Renderer, refer to the following documents: + +### Detailed Documentation + +| Document | Content | +|----------|---------| +| [ast_architecture.md](./ast_architecture.md) | Detailed explanation of the overall architecture. Pipeline, components, and processing flow details | +| [ast_node.md](./ast_node.md) | Complete reference for AST node classes. Attributes, methods, and usage examples for each node | +| [ast_list_processing.md](./ast_list_processing.md) | Details of list processing. ListParser, NestedListAssembler, and post-processing mechanisms | + +### Recommended Learning Path + +1. This document (ast.md): First, grasp the overall picture +2. [ast_architecture.md](./ast_architecture.md): Understand architectural details +3. [ast_node.md](./ast_node.md): Learn specific node classes +4. [ast_list_processing.md](./ast_list_processing.md): Deep dive into complex list processing +5. Source code: Check implementation details + +### Sample Code + +See the following for actual usage examples: + +- `lib/review/ast/command/compile.rb`: Command-line implementation +- `lib/review/renderer/`: Implementation of various Renderers +- `test/ast/`: AST test code (useful as usage examples) + +## FAQ + +### Q1: How to choose between traditional Builder and AST/Renderer? + +A: Both are currently available. + +- AST/Renderer approach: When you need new features (JSON output, bidirectional conversion, etc.) or want to develop custom tools +- Traditional Builder approach: When maintaining existing projects or workflows + +We aim to make the AST/Renderer approach the standard in the future. + +### Q2: Do I need to migrate existing projects to AST approach? + +A: It's not mandatory. The traditional approach will continue to be supported for a while. However, if you want to use new features and extensions, we recommend using the AST approach. + +### Q3: How to create a custom Renderer? + +A: Inherit from `Renderer::Base` and override the necessary `visit_*` methods. + +```ruby +class MyRenderer < ReVIEW::Renderer::Base + def visit_headline(node) + # Custom processing + end +end +``` + +See the Renderer layer explanation in [ast_architecture.md](./ast_architecture.md) for details. + +### Q4: How to debug AST? + +A: There are several methods: + +1. Check AST structure with JSON output: + ```bash + review-ast-compile --target=json chapter.re | jq . + ``` + +2. Use review-ast-dump command: + ```bash + review-ast-dump chapter.re + ``` + +3. Check directly from program: + ```ruby + require 'pp' + pp ast.to_h + ``` + +### Q5: How does performance compare to the traditional approach? + +A: The AST approach has overhead from building the intermediate representation (AST), but offers the following benefits: + +- Once-built AST can be reused for multiple formats (efficient when outputting multiple formats) +- Room for optimization through structured data model +- Efficient reference resolution and index building + +In normal usage, the performance difference is hardly noticeable. + +### Q6: Can Markdown files be processed? + +A: Yes, they are supported. The Markdown compiler is automatically used based on the file extension (`.md`). + +```bash +review-ast-compile --target=html chapter.md +``` + +### Q7: Do existing plugins and customizations work? + +A: AST/Renderer is independent of the traditional Builder system. Traditional Builder plugins continue to work as is, but the AST/Renderer approach uses new customization methods (custom Renderers, Visitors, etc.). diff --git a/doc/ast_architecture.ja.md b/doc/ast_architecture.ja.md new file mode 100644 index 000000000..4b5e1d5c9 --- /dev/null +++ b/doc/ast_architecture.ja.md @@ -0,0 +1,228 @@ +# Re:VIEW AST / Renderer アーキテクチャ概要 + +この文書は、Re:VIEW の最新実装(`lib/review/ast` および `lib/review/renderer` 配下のソース、ならびに `test/ast` 配下のテスト)に基づき、AST と Renderer の役割分担と処理フローについて整理したものです。 + +## パイプライン全体像 + +1. 各章(`ReVIEW::Book::Chapter`)の本文を `AST::Compiler` が読み取り、`DocumentNode` をルートに持つ AST を構築します(`lib/review/ast/compiler.rb`)。 +2. AST 生成後に参照解決 (`ReferenceResolver`) と各種後処理(`TsizeProcessor` / `FirstLineNumProcessor` / `NoindentProcessor` / `OlnumProcessor` / `ListStructureNormalizer` / `ListItemNumberingProcessor` / `AutoIdProcessor`)を適用し、構造とメタ情報を整備します。 +3. Renderer は 構築された AST を Visitor パターンで走査し、HTML・LaTeX・IDGXML などのフォーマット固有の出力へ変換します(`lib/review/renderer`)。 +4. 既存の `EPUBMaker` / `PDFMaker` / `IDGXMLMaker` などを継承する `AST::Command::EpubMaker` / `AST::Command::PdfMaker` / `AST::Command::IdgxmlMaker` が Compiler と Renderer からなる AST 版パイプラインを作ります。 + +## `AST::Compiler` の詳細 + +### 主な責務 +- Re:VIEW 記法(`.re`)または Markdown(`.md`)のソースを逐次読み込み、要素ごとに AST ノードを構築する (`compile_to_ast`, `build_ast_from_chapter`)。 + - `.re`ファイル: `AST::Compiler`が直接解析してASTを構築 + - `.md`ファイル: `MarkdownCompiler`がMarkly経由でASTを構築([Markdownサポート](#markdown-サポート)セクション参照) +- インライン記法は `InlineProcessor`、ブロック系コマンドは `BlockProcessor`、箇条書きは `ListProcessor` に委譲して組み立てる。 +- 行番号などの位置情報を保持した `SnapshotLocation` を各ノードに付与し、エラー報告やレンダリング時に利用可能にする。 +- 参照解決・後処理を含むパイプラインを統括し、検出したエラーを集約して `CompileError` として通知する。 + +### 入力走査とノード生成 + +#### Re:VIEWフォーマット(`.re`ファイル) +- `build_ast_from_chapter` は `LineInput` を用いて 1 行ずつ解析し、見出し・段落・ブロックコマンド・リストなどを判定します(`lib/review/ast/compiler.rb` 内の `case` 分岐)。 +- 見出し (`compile_headline_to_ast`) ではレベル・タグ・ラベル・キャプションを解析し、`HeadlineNode` に格納します。 +- 段落 (`compile_paragraph_to_ast`) は空行で区切り、インライン要素を `InlineProcessor.parse_inline_elements` に渡して `ParagraphNode` の子として生成します。 +- ブロックコマンド (`compile_block_command_to_ast`) は `BlockProcessor` が `BlockNode`・`CodeBlockNode`・`TableNode` など適切なノードを返します。 + - `BlockData`(`lib/review/ast/block_data.rb`): `Data.define`を使用したイミュータブルなデータ構造で、ブロックコマンドの情報(名前・引数・行・ネストされたブロック・位置情報)をカプセル化し、IO読み取りとブロック処理の責務を分離します。 + - `BlockContext` と `BlockReader`(`lib/review/ast/compiler/`)はブロックコマンドの解析と読み込みを担当します。 +- リスト系 (`compile_ul_to_ast` / `compile_ol_to_ast` / `compile_dl_to_ast`) は `ListProcessor` を通じて解析・組み立てが行われます。 + +#### Markdownフォーマット(`.md`ファイル) +- `MarkdownCompiler`が`Markly.parse`でMarkdownをCommonMark準拠のMarkly ASTに変換します(`lib/review/ast/markdown_compiler.rb`)。 +- `MarkdownAdapter`がMarkly ASTを走査し、各要素をRe:VIEW ASTノードに変換します(`lib/review/ast/markdown_adapter.rb`)。 + - 見出し → `HeadlineNode` + - 段落 → `ParagraphNode` + - コードブロック → `CodeBlockNode` + `CodeLineNode` + - リスト → `ListNode` + `ListItemNode` + - テーブル → `TableNode` + `TableRowNode` + `TableCellNode` + - インライン要素(太字、イタリック、コード、リンクなど)→ `InlineNode` + `TextNode` +- コラムマーカーは`MarkdownHtmlNode`を用いて検出され、`ColumnNode`に変換されます。 +- 変換後のASTは`.re`ファイルと同じ後処理パイプライン(参照解決など)を通ります。 + +### 参照解決と後処理 +- `ReferenceResolver` は AST を Visitor として巡回し、`InlineNode` 配下の `ReferenceNode` を該当要素の情報に差し替えます(`lib/review/ast/reference_resolver.rb`)。解決結果は `ResolvedData` として保持され、Renderer はそれを整形して出力します。 +- 後処理パイプラインは次の順序で適用されます(`compile_to_ast` 参照): + 1. `TsizeProcessor`: `//tsize` 情報を事前に反映。 + 2. `FirstLineNumProcessor`: 行番号付きコードブロックの初期値を設定。 + 3. `NoindentProcessor` / `OlnumProcessor`: `//noindent`, `//olnum` の命令を段落やリストに属性として付与。 + 4. `ListStructureNormalizer`: `//beginchild` / `//endchild` を含むリスト構造を整形し、不要なブロックを除去。 + 5. `ListItemNumberingProcessor`: 番号付きリストの `item_number` を確定。 + 6. `AutoIdProcessor`: 非表示見出しやコラムに自動 ID・通し番号を付与。 + +## AST ノード階層と特徴 + +> 詳細は[ast_node.md](ast_node.md)を参照してください。 このセクションでは、AST/Rendererアーキテクチャを理解するために必要な概要のみを説明します。 + +### 基底クラス + +ASTノードは以下の2つの基底クラスから構成されます: + +- `AST::Node`(`lib/review/ast/node.rb`): すべてのASTノードの抽象基底クラス + - 子ノードの管理(`add_child()`, `remove_child()` など) + - Visitorパターンのサポート(`accept(visitor)`, `visit_method_name()`) + - プレーンテキスト変換(`to_inline_text()`) + - 属性管理とJSONシリアライゼーション + +- `AST::LeafNode`(`lib/review/ast/leaf_node.rb`): 終端ノードの基底クラス + - 子ノードを持たない(`add_child()`を呼ぶとエラー) + - `content`属性を持つ(常に文字列) + - 継承クラス: `TextNode`, `ImageNode`, `EmbedNode`, `FootnoteNode`, `TexEquationNode` + +詳細な設計原則やメソッドの説明は[ast_node.md](ast_node.md)の「基底クラス」セクションを参照してください。 + +### 主なノードタイプ + +ASTは以下のような多様なノードタイプで構成されています: + +#### ドキュメント構造 +- `DocumentNode`: 章全体のルートノード +- `HeadlineNode`: 見出し(レベル、ラベル、キャプションを保持) +- `ParagraphNode`: 段落 +- `ColumnNode`, `MinicolumnNode`: コラム要素 + +#### リスト +- `ListNode`: リスト全体(`:ul`, `:ol`, `:dl`) +- `ListItemNode`: リスト項目(ネストレベル、番号、定義用語を保持) + +詳細は[ast_list_processing.md](ast_list_processing.md)を参照してください。 + +#### テーブル +- `TableNode`: テーブル全体 +- `TableRowNode`: 行(ヘッダー/本文を区別) +- `TableCellNode`: セル + +#### コードブロック +- `CodeBlockNode`: コードブロック(言語、キャプション情報) +- `CodeLineNode`: コードブロック内の各行 + +#### インライン要素 +- `InlineNode`: インライン命令(`@`, `@` など) +- `TextNode`: プレーンテキスト +- `ReferenceNode`: 参照(`@`, `@` など、後で解決される) + +#### その他 +- `ImageNode`: 画像(LeafNode) +- `BlockNode`: 汎用ブロック要素 +- `FootnoteNode`: 脚注(LeafNode) +- `EmbedNode`, `TexEquationNode`: 埋め込みコンテンツ(LeafNode) +- `CaptionNode`: キャプション要素 + +各ノードの詳細な属性、メソッド、使用例については[ast_node.md](ast_node.md)を参照してください。 + +### シリアライゼーション + +すべてのノードは`serialize_to_hash`を実装し、`JSONSerializer`がJSON形式での保存/復元を提供します(`lib/review/ast/json_serializer.rb`)。これによりASTのデバッグ、外部ツールとの連携、AST構造の分析が可能になります。 + +## インライン・参照処理 + +- `InlineProcessor`(`lib/review/ast/inline_processor.rb`)は `InlineTokenizer` と協調し、`@{...}` / `@$...$` / `@|...|` を解析して `InlineNode` や `TextNode` を生成します。特殊コマンド(`ruby`, `href`, `kw`, `img`, `list`, `table`, `eq`, `fn` など)は専用メソッドで AST を構築します。 +- 参照解決後のデータは Renderer での字幕生成やリンク作成に利用されます。 + +## リスト処理パイプライン + +> 詳細は[ast_list_processing.md](ast_list_processing.md)を参照してください。 このセクションでは、アーキテクチャ理解に必要な概要のみを説明します。 + +リスト処理は以下のコンポーネントで構成されています: + +### 主要コンポーネント + +- ListParser: Re:VIEW記法のリストを解析し、`ListItemData`構造体を生成(`lib/review/ast/list_parser.rb`) +- NestedListAssembler: `ListItemData`からネストされたAST構造(`ListNode`/`ListItemNode`)を構築 +- ListProcessor: パーサーとアセンブラーを統括し、コンパイラーへの統一的なインターフェースを提供(`lib/review/ast/list_processor.rb`) + +### 後処理 + +- ListStructureNormalizer: `//beginchild`/`//endchild`の正規化と連続リストの統合(`lib/review/ast/compiler/list_structure_normalizer.rb`) +- ListItemNumberingProcessor: 番号付きリストの各項目に`item_number`を付与(`lib/review/ast/compiler/list_item_numbering_processor.rb`) + +詳細な処理フロー、データ構造、設計原則については[ast_list_processing.md](ast_list_processing.md)を参照してください。 + +## AST::Visitor と Indexer + +- `AST::Visitor`(`lib/review/ast/visitor.rb`)は AST を走査するための基底クラスです。 + - 動的ディスパッチ: 各ノードの `visit_method_name()` メソッドが適切な訪問メソッド名(`:visit_headline`, `:visit_paragraph` など)を返し、Visitorの対応するメソッドを呼び出します。 + - 主要メソッド: `visit(node)`, `visit_all(nodes)`, `extract_text(node)` (private), `process_inline_content(node)` (private) + - 継承クラス: `Renderer::Base`, `ReferenceResolver`, `Indexer` などがこれを継承し、AST の走査と処理を実現しています。 +- `AST::Indexer`(`lib/review/ast/indexer.rb`)は `Visitor` を継承し、AST 走査中に図表・リスト・コードブロック・数式などのインデックスを構築します。参照解決や連番付与に利用され、Renderer は AST を走査する際に Indexer を通じてインデックス情報を取得します。 + +## Renderer 層 + +- `Renderer::Base`(`lib/review/renderer/base.rb`)は `AST::Visitor` を継承し、`render`・`render_children`・`render_inline_element` などの基盤処理を提供します。各フォーマット固有のクラスは `visit_*` メソッドをオーバーライドします。 +- `RenderingContext`(`lib/review/renderer/rendering_context.rb`)は主に HTML / LaTeX / IDGXML 系レンダラーでレンダリング中の状態(表・キャプション・定義リスト内など)とフットノートの収集を管理し、`footnotetext` への切り替えや入れ子状況の判定を支援します。 +- フォーマット別 Renderer: + - `HtmlRenderer` は HTMLBuilder と互換の出力を生成し、見出しアンカー・リスト整形・脚注処理を再現します(`lib/review/renderer/html_renderer.rb`)。`InlineElementHandler` と `InlineContext`(`lib/review/renderer/html/`)を用いてインライン要素の文脈依存処理を行います。 + - `LatexRenderer` は LaTeXBuilder の挙動(セクションカウンタ・TOC・環境制御・脚注)を再現しつつ `RenderingContext` で扱いを整理しています(`lib/review/renderer/latex_renderer.rb`)。`InlineElementHandler` と `InlineContext`(`lib/review/renderer/latex/`)を用いてインライン要素の文脈依存処理を行います。 + - `IdgxmlRenderer`, `MarkdownRenderer`, `PlaintextRenderer` も同様に `Renderer::Base` を継承し、AST からの直接出力を実現します。 + - `TopRenderer` はテキストベースの原稿フォーマットに変換し、校正記号を付与します(`lib/review/renderer/top_renderer.rb`)。 +- `renderer/rendering_context.rb` とそれを利用するレンダラー(HTML / LaTeX / IDGXML)は `FootnoteCollector` を用いて脚注のバッチ処理を行い、Builder 時代の複雑な状態管理を置き換えています。 + +## Markdown サポート + +> 詳細は[ast_markdown.md](ast_markdown.md)を参照してください。 このセクションでは、アーキテクチャ理解に必要な概要のみを説明します。 + +Re:VIEWはGitHub Flavored Markdown(GFM)をサポートしており、`.md`ファイルをRe:VIEW ASTに変換できます。 + +### アーキテクチャ + +Markdownサポートは以下の3つの主要コンポーネントで構成されています: + +- MarkdownCompiler(`lib/review/ast/markdown_compiler.rb`): Markdownドキュメント全体をRe:VIEW ASTにコンパイルする統括クラス。Marklyパーサーを初期化し、GFM拡張機能(strikethrough, table, autolink, tagfilter)を有効化します。 +- MarkdownAdapter(`lib/review/ast/markdown_adapter.rb`): Markly AST(CommonMark準拠)をRe:VIEW ASTに変換するアダプター層。各Markdown要素を対応するRe:VIEW ASTノードに変換し、コラムスタック・リストスタック・テーブルスタックを管理します。 +- MarkdownHtmlNode(`lib/review/ast/markdown_html_node.rb`): Markdown内のHTML要素を解析し、特別な意味を持つHTMLコメント(コラムマーカーなど)を識別するための補助ノード。最終的なASTには含まれず、変換処理中にのみ使用されます。 + +### 変換処理の流れ + +``` +Markdown文書 → Markly.parse → Markly AST + ↓ + MarkdownAdapter.convert + ↓ + Re:VIEW AST + ↓ + 参照解決・後処理 + ↓ + Renderer群 +``` + +### サポート機能 + +- GFM拡張: 取り消し線、テーブル、オートリンク、タグフィルタリング +- Re:VIEW独自拡張: + - コラム構文(HTMLコメント: `` / ``) + - コラム構文(見出し: `### [column] Title` / `### [/column]`) + - 自動コラムクローズ(見出しレベルに基づく) + - スタンドアローン画像の検出(段落内の単独画像をブロックレベルの`ImageNode`に変換) + +### 制限事項 + +Markdownでは以下のRe:VIEW固有機能はサポートされていません: +- `//list`(キャプション付きコードブロック)→ 通常のコードブロックとして扱われます +- `//table`(キャプション付き表)→ GFMテーブルは使用できますが、キャプションやラベルは付けられません +- `//footnote`(脚注) +- 一部のインライン命令(`@`, `@` など) + +詳細は[ast_markdown.md](ast_markdown.md)を参照してください。 + +## 既存ツールとの統合 + +- EPUB/PDF/IDGXML などの Maker クラス(`AST::Command::EpubMaker`, `AST::Command::PdfMaker`, `AST::Command::IdgxmlMaker`)は、それぞれ内部に `RendererConverterAdapter` クラスを定義して Renderer を従来の Converter インターフェースに適合させています(`lib/review/ast/command/epub_maker.rb`, `pdf_maker.rb`, `idgxml_maker.rb`)。各 Adapter は章単位で対応する Renderer(`HtmlRenderer`, `LatexRenderer`, `IdgxmlRenderer`)を生成し、出力をそのまま組版パイプラインへ渡します。 +- `lib/review/ast/command/compile.rb` は `review-ast-compile` CLI を提供し、`--target` で指定したフォーマットに対して AST→Renderer パイプラインを直接実行します。`--check` モードでは AST 生成と検証のみを行います。 + +## JSON / 開発支援ツール + +- `JSONSerializer` と `AST::Dumper`(`lib/review/ast/dumper.rb`)は AST を JSON へシリアライズし、デバッグや外部ツールとの連携に利用できます。`Options` により位置情報や簡易モードの有無を制御可能です。 +- `AST::ReviewGenerator`(`lib/review/ast/review_generator.rb`)は AST から Re:VIEW 記法を再生成し、双方向変換や差分検証に利用されます。 +- `lib/review/ast/diff/html.rb` / `idgxml.rb` / `latex.rb` は Builder と Renderer の出力差異をハッシュ比較し、`test/ast/test_html_renderer_builder_comparison.rb` などで利用されています。 + +## テストによる保証 + +- `test/ast/test_ast_comprehensive.rb` / `test_ast_complex_integration.rb` は章全体を AST に変換し、ノード構造とレンダリング結果を検証します。 +- `test/ast/test_html_renderer_inline_elements.rb` や `test_html_renderer_join_lines_by_lang.rb` はインライン要素・改行処理など HTML 特有の仕様を確認しています。 +- `test/ast/test_list_structure_normalizer.rb`, `test_list_processor.rb` は複雑なリストや `//beginchild` の正規化を網羅します。 +- `test/ast/test_ast_comprehensive_inline.rb` は AST→Renderer の往復で特殊なインライン命令が崩れないことを保証します。 +- `test/ast/test_markdown_adapter.rb`, `test_markdown_compiler.rb` はMarkdownのAST変換が正しく動作することを検証します。 + +これらの実装とテストにより、AST を中心とした新しいパイプラインと Renderer 群は従来 Builder と互換の出力を維持しつつ、構造化されたデータモデルとユーティリティを提供しています。 diff --git a/doc/ast_architecture.md b/doc/ast_architecture.md new file mode 100644 index 000000000..67a6a225a --- /dev/null +++ b/doc/ast_architecture.md @@ -0,0 +1,228 @@ +# Re:VIEW AST / Renderer Architecture Overview + +This document provides an organized view of the roles and processing flow of AST and Renderer, based on the latest implementation of Re:VIEW (sources under `lib/review/ast` and `lib/review/renderer`, as well as tests under `test/ast`). + +## Overall Pipeline + +1. `AST::Compiler` reads the body of each chapter (`ReVIEW::Book::Chapter`) and builds an AST with `DocumentNode` as the root (`lib/review/ast/compiler.rb`). +2. After AST generation, reference resolution (`ReferenceResolver`) and various post-processors (`TsizeProcessor` / `FirstLineNumProcessor` / `NoindentProcessor` / `OlnumProcessor` / `ListStructureNormalizer` / `ListItemNumberingProcessor` / `AutoIdProcessor`) are applied to organize structure and metadata. +3. Renderers traverse the built AST using the Visitor pattern and convert it to format-specific output such as HTML, LaTeX, IDGXML, etc. (`lib/review/renderer`). +4. `AST::Command::EpubMaker` / `AST::Command::PdfMaker` / `AST::Command::IdgxmlMaker`, which inherit from existing `EPUBMaker` / `PDFMaker` / `IDGXMLMaker`, create AST-based pipelines consisting of Compiler and Renderer. + +## Details of `AST::Compiler` + +### Main Responsibilities +- Sequentially reads Re:VIEW notation (`.re`) or Markdown (`.md`) source and builds AST nodes for each element (`compile_to_ast`, `build_ast_from_chapter`). + - `.re` files: `AST::Compiler` directly parses and builds AST + - `.md` files: `MarkdownCompiler` builds AST via Markly (see [Markdown Support](#markdown-support) section) +- Delegates inline notation to `InlineProcessor`, block commands to `BlockProcessor`, and lists to `ListProcessor` for assembly. +- Attaches `SnapshotLocation` containing position information such as line numbers to each node, making it available for error reporting and rendering. +- Oversees the pipeline including reference resolution and post-processing, aggregating detected errors and notifying them as `CompileError`. + +### Input Scanning and Node Generation + +#### Re:VIEW Format (`.re` files) +- `build_ast_from_chapter` uses `LineInput` to parse line by line, determining headings, paragraphs, block commands, lists, etc. (`case` branches in `lib/review/ast/compiler.rb`). +- Headings (`compile_headline_to_ast`) parse level, tag, label, and caption, storing them in `HeadlineNode`. +- Paragraphs (`compile_paragraph_to_ast`) are delimited by blank lines, passing inline elements to `InlineProcessor.parse_inline_elements` to generate children of `ParagraphNode`. +- Block commands (`compile_block_command_to_ast`) use `BlockProcessor` to return appropriate nodes such as `BlockNode`, `CodeBlockNode`, `TableNode`, etc. + - `BlockData` (`lib/review/ast/block_data.rb`): An immutable data structure using `Data.define` that encapsulates block command information (name, arguments, lines, nested blocks, location info), separating IO reading from block processing responsibilities. + - `BlockContext` and `BlockReader` (`lib/review/ast/compiler/`) handle parsing and reading of block commands. +- List types (`compile_ul_to_ast` / `compile_ol_to_ast` / `compile_dl_to_ast`) are parsed and assembled through `ListProcessor`. + +#### Markdown Format (`.md` files) +- `MarkdownCompiler` uses `Markly.parse` to convert Markdown to a CommonMark-compliant Markly AST (`lib/review/ast/markdown_compiler.rb`). +- `MarkdownAdapter` traverses the Markly AST and converts each element to Re:VIEW AST nodes (`lib/review/ast/markdown_adapter.rb`). + - Headings → `HeadlineNode` + - Paragraphs → `ParagraphNode` + - Code blocks → `CodeBlockNode` + `CodeLineNode` + - Lists → `ListNode` + `ListItemNode` + - Tables → `TableNode` + `TableRowNode` + `TableCellNode` + - Inline elements (bold, italic, code, links, etc.) → `InlineNode` + `TextNode` +- Column markers are detected using `MarkdownHtmlNode` and converted to `ColumnNode`. +- The converted AST goes through the same post-processing pipeline (reference resolution, etc.) as `.re` files. + +### Reference Resolution and Post-processing +- `ReferenceResolver` traverses the AST as a Visitor and replaces `ReferenceNode` under `InlineNode` with information from corresponding elements (`lib/review/ast/reference_resolver.rb`). Resolution results are stored as `ResolvedData`, which Renderers format for output. +- The post-processing pipeline is applied in the following order (see `compile_to_ast`): + 1. `TsizeProcessor`: Pre-applies `//tsize` information. + 2. `FirstLineNumProcessor`: Sets initial values for line-numbered code blocks. + 3. `NoindentProcessor` / `OlnumProcessor`: Attaches `//noindent`, `//olnum` directives as attributes to paragraphs and lists. + 4. `ListStructureNormalizer`: Formats list structures containing `//beginchild` / `//endchild` and removes unnecessary blocks. + 5. `ListItemNumberingProcessor`: Determines `item_number` for numbered lists. + 6. `AutoIdProcessor`: Assigns automatic IDs and sequential numbers to hidden headings and columns. + +## AST Node Hierarchy and Features + +> See [ast_node.md](ast_node.md) for details. This section explains only the overview needed to understand the AST/Renderer architecture. + +### Base Classes + +AST nodes are composed of two base classes: + +- `AST::Node` (`lib/review/ast/node.rb`): Abstract base class for all AST nodes + - Child node management (`add_child()`, `remove_child()`, etc.) + - Visitor pattern support (`accept(visitor)`, `visit_method_name()`) + - Plain text conversion (`to_inline_text()`) + - Attribute management and JSON serialization + +- `AST::LeafNode` (`lib/review/ast/leaf_node.rb`): Base class for terminal nodes + - No children (calling `add_child()` raises an error) + - Has `content` attribute (always a string) + - Subclasses: `TextNode`, `ImageNode`, `EmbedNode`, `FootnoteNode`, `TexEquationNode` + +See the "Base Classes" section in [ast_node.md](ast_node.md) for detailed design principles and method descriptions. + +### Major Node Types + +AST is composed of various node types: + +#### Document Structure +- `DocumentNode`: Root node for the entire chapter +- `HeadlineNode`: Headings (holds level, label, caption) +- `ParagraphNode`: Paragraphs +- `ColumnNode`, `MinicolumnNode`: Column elements + +#### Lists +- `ListNode`: Entire list (`:ul`, `:ol`, `:dl`) +- `ListItemNode`: List items (holds nesting level, number, definition term) + +See [ast_list_processing.md](ast_list_processing.md) for details. + +#### Tables +- `TableNode`: Entire table +- `TableRowNode`: Rows (distinguishes header/body) +- `TableCellNode`: Cells + +#### Code Blocks +- `CodeBlockNode`: Code blocks (language, caption info) +- `CodeLineNode`: Each line within code block + +#### Inline Elements +- `InlineNode`: Inline commands (`@`, `@`, etc.) +- `TextNode`: Plain text +- `ReferenceNode`: References (`@`, `@`, etc., resolved later) + +#### Others +- `ImageNode`: Images (LeafNode) +- `BlockNode`: Generic block elements +- `FootnoteNode`: Footnotes (LeafNode) +- `EmbedNode`, `TexEquationNode`: Embedded content (LeafNode) +- `CaptionNode`: Caption elements + +See [ast_node.md](ast_node.md) for detailed attributes, methods, and usage examples for each node. + +### Serialization + +All nodes implement `serialize_to_hash`, and `JSONSerializer` provides saving/restoring in JSON format (`lib/review/ast/json_serializer.rb`). This enables AST debugging, integration with external tools, and AST structure analysis. + +## Inline and Reference Processing + +- `InlineProcessor` (`lib/review/ast/inline_processor.rb`) works with `InlineTokenizer` to parse `@{...}` / `@$...$` / `@|...|` and generate `InlineNode` and `TextNode`. Special commands (`ruby`, `href`, `kw`, `img`, `list`, `table`, `eq`, `fn`, etc.) build AST with dedicated methods. +- Data after reference resolution is used for caption generation and link creation in Renderers. + +## List Processing Pipeline + +> See [ast_list_processing.md](ast_list_processing.md) for details. This section explains only the overview needed for architecture understanding. + +List processing consists of the following components: + +### Main Components + +- ListParser: Parses Re:VIEW list notation and generates `ListItemData` structures (`lib/review/ast/list_parser.rb`) +- NestedListAssembler: Builds nested AST structure (`ListNode`/`ListItemNode`) from `ListItemData` +- ListProcessor: Oversees parser and assembler, providing a unified interface to the compiler (`lib/review/ast/list_processor.rb`) + +### Post-processing + +- ListStructureNormalizer: Normalizes `//beginchild`/`//endchild` and merges consecutive lists (`lib/review/ast/compiler/list_structure_normalizer.rb`) +- ListItemNumberingProcessor: Assigns `item_number` to each item in numbered lists (`lib/review/ast/compiler/list_item_numbering_processor.rb`) + +See [ast_list_processing.md](ast_list_processing.md) for detailed processing flow, data structures, and design principles. + +## AST::Visitor and Indexer + +- `AST::Visitor` (`lib/review/ast/visitor.rb`) is the base class for traversing AST. + - Dynamic dispatch: Each node's `visit_method_name()` method returns the appropriate visit method name (`:visit_headline`, `:visit_paragraph`, etc.) and calls the corresponding method in the Visitor. + - Main methods: `visit(node)`, `visit_all(nodes)`, `extract_text(node)` (private), `process_inline_content(node)` (private) + - Subclasses: `Renderer::Base`, `ReferenceResolver`, `Indexer`, etc. inherit from this to realize AST traversal and processing. +- `AST::Indexer` (`lib/review/ast/indexer.rb`) inherits from `Visitor` and builds indexes for figures, tables, lists, code blocks, equations, etc. during AST traversal. Used for reference resolution and sequential numbering, Renderers obtain index information through Indexer when traversing AST. + +## Renderer Layer + +- `Renderer::Base` (`lib/review/renderer/base.rb`) inherits from `AST::Visitor` and provides foundational processing such as `render`, `render_children`, `render_inline_element`. Format-specific classes override `visit_*` methods. +- `RenderingContext` (`lib/review/renderer/rendering_context.rb`) manages state during rendering (inside tables, captions, definition lists, etc.) and footnote collection, mainly for HTML/LaTeX/IDGXML renderers, supporting switching to `footnotetext` and determining nesting conditions. +- Format-specific Renderers: + - `HtmlRenderer` generates output compatible with HTMLBuilder, reproducing heading anchors, list formatting, footnote processing (`lib/review/renderer/html_renderer.rb`). Uses `InlineElementHandler` and `InlineContext` (`lib/review/renderer/html/`) for context-dependent inline element processing. + - `LatexRenderer` reproduces LaTeXBuilder behavior (section counters, TOC, environment control, footnotes) while organizing handling with `RenderingContext` (`lib/review/renderer/latex_renderer.rb`). Uses `InlineElementHandler` and `InlineContext` (`lib/review/renderer/latex/`) for context-dependent inline element processing. + - `IdgxmlRenderer`, `MarkdownRenderer`, `PlaintextRenderer` also inherit from `Renderer::Base` to realize direct output from AST. + - `TopRenderer` converts to text-based manuscript format and adds proofreading marks (`lib/review/renderer/top_renderer.rb`). +- `renderer/rendering_context.rb` and renderers using it (HTML/LaTeX/IDGXML) use `FootnoteCollector` for batch processing of footnotes, replacing complex state management from the Builder era. + +## Markdown Support + +> See [ast_markdown.md](ast_markdown.md) for details. This section explains only the overview needed for architecture understanding. + +Re:VIEW supports GitHub Flavored Markdown (GFM) and can convert `.md` files to Re:VIEW AST. + +### Architecture + +Markdown support consists of three main components: + +- MarkdownCompiler (`lib/review/ast/markdown_compiler.rb`): Oversees compiling entire Markdown documents to Re:VIEW AST. Initializes Markly parser and enables GFM extensions (strikethrough, table, autolink, tagfilter). +- MarkdownAdapter (`lib/review/ast/markdown_adapter.rb`): Adapter layer that converts Markly AST (CommonMark compliant) to Re:VIEW AST. Converts each Markdown element to corresponding Re:VIEW AST nodes and manages column stack, list stack, and table stack. +- MarkdownHtmlNode (`lib/review/ast/markdown_html_node.rb`): Auxiliary node for parsing HTML elements in Markdown and identifying HTML comments with special meaning (column markers, etc.). Not included in final AST, used only during conversion processing. + +### Conversion Process Flow + +``` +Markdown document → Markly.parse → Markly AST + ↓ + MarkdownAdapter.convert + ↓ + Re:VIEW AST + ↓ + Reference resolution & post-processing + ↓ + Renderers +``` + +### Supported Features + +- GFM extensions: Strikethrough, tables, autolink, tag filtering +- Re:VIEW-specific extensions: + - Column syntax (HTML comment: `` / ``) + - Column syntax (heading: `### [column] Title` / `### [/column]`) + - Automatic column closing (based on heading level) + - Standalone image detection (converts single images in paragraphs to block-level `ImageNode`) + +### Limitations + +The following Re:VIEW-specific features are not supported in Markdown: +- `//list` (code block with caption) → Treated as regular code block +- `//table` (table with caption) → GFM tables can be used but cannot have captions or labels +- `//footnote` (footnotes) +- Some inline commands (`@`, `@`, etc.) + +See [ast_markdown.md](ast_markdown.md) for details. + +## Integration with Existing Tools + +- Maker classes for EPUB/PDF/IDGXML, etc. (`AST::Command::EpubMaker`, `AST::Command::PdfMaker`, `AST::Command::IdgxmlMaker`) each define `RendererConverterAdapter` classes internally to adapt Renderer to the traditional Converter interface (`lib/review/ast/command/epub_maker.rb`, `pdf_maker.rb`, `idgxml_maker.rb`). Each Adapter generates corresponding Renderers (`HtmlRenderer`, `LatexRenderer`, `IdgxmlRenderer`) per chapter and passes output directly to the typesetting pipeline. +- `lib/review/ast/command/compile.rb` provides the `review-ast-compile` CLI, directly executing the AST→Renderer pipeline for the format specified with `--target`. In `--check` mode, only AST generation and validation are performed. + +## JSON / Development Support Tools + +- `JSONSerializer` and `AST::Dumper` (`lib/review/ast/dumper.rb`) serialize AST to JSON, available for debugging and integration with external tools. `Options` control presence of location information and simple mode. +- `AST::ReviewGenerator` (`lib/review/ast/review_generator.rb`) regenerates Re:VIEW notation from AST, used for bidirectional conversion and diff verification. +- `lib/review/ast/diff/html.rb` / `idgxml.rb` / `latex.rb` perform hash comparison of Builder and Renderer output differences, used in `test/ast/test_html_renderer_builder_comparison.rb`, etc. + +## Test Guarantees + +- `test/ast/test_ast_comprehensive.rb` / `test_ast_complex_integration.rb` convert entire chapters to AST and verify node structure and rendering results. +- `test/ast/test_html_renderer_inline_elements.rb` and `test_html_renderer_join_lines_by_lang.rb` verify HTML-specific specifications such as inline elements and line break processing. +- `test/ast/test_list_structure_normalizer.rb`, `test_list_processor.rb` comprehensively cover complex lists and `//beginchild` normalization. +- `test/ast/test_ast_comprehensive_inline.rb` ensures special inline commands don't break in AST→Renderer round trips. +- `test/ast/test_markdown_adapter.rb`, `test_markdown_compiler.rb` verify Markdown AST conversion works correctly. + +Through these implementations and tests, the new AST-centric pipeline and Renderer suite maintain output compatible with traditional Builders while providing structured data models and utilities. diff --git a/doc/ast_list_processing.ja.md b/doc/ast_list_processing.ja.md new file mode 100644 index 000000000..55f59d7f2 --- /dev/null +++ b/doc/ast_list_processing.ja.md @@ -0,0 +1,311 @@ +# Re:VIEW ASTでのリスト処理アーキテクチャ + +## 概要 + +Re:VIEWのASTにおけるリスト処理は、複数のコンポーネントが協調して動作する洗練されたアーキテクチャを採用しています。このドキュメントでは、リスト処理に関わる主要なクラスとその相互関係について詳しく説明します。 + +## 主要コンポーネント + +### 1. リスト用ASTノードクラス + +#### ListNode +`ListNode`は、すべてのリスト型(番号なしリスト、番号付きリスト、定義リスト)を表現する汎用的なノードクラスです。 + +##### 主な属性 +- `list_type`: リストの種類(`:ul`, `:ol`, `:dl`) +- `start_number`: 番号付きリストの開始番号(デフォルト: `nil`) +- `olnum_start`: InDesignのolnum開始値(IDGXML用、デフォルト: `nil`) +- `children`: 子ノード(`ListItemNode`)を格納(標準的なノード構造) + +##### 便利メソッド +- `ol?()`: 番号付きリストかどうかを判定 +- `ul?()`: 番号なしリストかどうかを判定 +- `dl?()`: 定義リストかどうかを判定 + +##### 特徴 +- 異なるリスト型を統一的に扱える設計 +- 標準的なAST構造(`children`)による統一的な処理 + +#### ListItemNode +`ListItemNode`は、個々のリスト項目を表現します。 + +##### 主な属性 +- `level`: ネストレベル(1から始まる) +- `number`: 番号付きリストにおける項目番号(元の入力に由来) +- `item_number`: 番号付きリストの絶対番号(`ListItemNumberingProcessor`によって設定される) +- `item_type`: 定義リストでの`:dt`(用語)/`:dd`(定義)識別子(通常のリストでは`nil`) +- `children`: 定義内容や入れ子のリストを保持する子ノード +- `term_children`: 定義リストの用語部分を保持するための子ノード配列 + +##### 便利メソッド +- `definition_term?()`: 定義リストの用語項目(`:dt`)かどうかを判定 +- `definition_desc?()`: 定義リストの定義項目(`:dd`)かどうかを判定 + +##### 特徴 +- ネストされたリスト構造をサポート +- インライン要素(強調、リンクなど)を子ノードとして保持可能 +- 定義リストでは用語(`term_children`)と定義(`children`)を明確に分離 +- 番号付きリストでは`item_number`が後処理で自動的に設定される + +### 2. 構文解析コンポーネント + +#### ListParser +`ListParser`は、Re:VIEW記法のリストを解析し、構造化されたデータに変換します。 + +##### 責務 +- 生のテキスト行からリスト項目を抽出 +- ネストレベルの判定 +- 継続行の収集 +- 各リスト型(ul/ol/dl)に特化した解析ロジック + +##### 主なメソッド +```ruby +def parse_unordered_list(f) + # * item + # ** nested item + # のような記法を解析 +end + +def parse_ordered_list(f) + # 1. item + # 11. item番号11(ネストではなく実番号) + # のような記法を解析 +end + +def parse_definition_list(f) + # : term + # definition + # のような記法を解析 +end +``` + +##### データ構造 +```ruby +ListItemData = Struct.new( + :type, # :ul_item, :ol_item, :dt, :dd + :level, # ネストレベル(デフォルト: 1) + :content, # 項目のテキスト + :continuation_lines,# 継続行(デフォルト: []) + :metadata, # 追加情報(番号、インデントなど、デフォルト: {}) + keyword_init: true +) +``` + +#### ListItemDataのメソッド +- `with_adjusted_level(new_level)`: レベルを調整した新しいインスタンスを返す(イミュータブル操作) + +##### 補足 +- すべてのリスト記法は先頭に空白を含む行としてパーサーに渡される想定です(`lib/review/ast/compiler.rb`でそのような行のみリストとして扱う)。 +- 番号付きリストは桁数によるネストをサポートせず、`level`は常に1として解釈されます。 + +### 3. 組み立てコンポーネント + +#### NestedListAssembler +`NestedListAssembler`は、`ListParser`が生成したデータから実際のASTノード構造を組み立てます。 + +##### 責務 +- フラットなリスト項目データをネストされたAST構造に変換 +- インライン要素の解析と組み込み +- 親子関係の適切な設定 + +##### 主な処理フロー +1. `ListItemData`の配列を受け取る +2. レベルに基づいてネスト構造を構築 +3. 各項目のコンテンツをインライン解析 +4. 完成したAST構造を返す + +### 4. 協調コンポーネント + +#### ListProcessor +`ListProcessor`は、リスト処理全体を調整する高レベルのインターフェースです。 + +##### 責務 +- `ListParser`と`NestedListAssembler`の協調 +- コンパイラーへの統一的なインターフェース提供 +- 生成したリストノードをASTに追加 + +##### 主なメソッド +```ruby +def process_unordered_list(f) + items = @parser.parse_unordered_list(f) + return if items.empty? + + list_node = @nested_list_assembler.build_unordered_list(items) + add_to_ast(list_node) +end +``` + +##### 公開アクセサー +- `parser`: `ListParser`インスタンスへの読み取り専用アクセス(テストやカスタム用途向け) +- `nested_list_assembler`: `NestedListAssembler`インスタンスへの読み取り専用アクセス(テストやカスタム用途向け) + +##### 追加メソッド +- `process_list(f, list_type)`: リスト型を指定した汎用処理メソッド +- `build_list_from_items(items, list_type)`: 事前に解析された項目からリストを構築(テストや特殊用途向け) +- `parse_list_items(f, list_type)`: ASTを構築せずにリスト項目のみを解析(テスト用) + +### 5. 後処理コンポーネント + +#### ListStructureNormalizer + +`//beginchild`と`//endchild`で構成された一時的なリスト要素を正規化し、AST上に正しい入れ子構造を作ります。 + +##### 責務 +- `//beginchild`/`//endchild`ブロックを検出してリスト項目へ再配置 +- 同じ型の連続したリストを統合 +- 定義リストの段落から用語と定義を分離 + +#### ListItemNumberingProcessor +番号付きリストの各項目に絶対番号を割り当てます。 + +##### 責務 +- `start_number`から始まる連番の割り当て +- 各`ListItemNode`の`item_number`属性の更新(`attr_accessor`で定義) +- 入れ子構造の有無にかかわらずリスト内の順序に基づく番号付け + +##### 処理の詳細 +- `ListNode.start_number`を基準に連番を生成 +- `start_number`が指定されていない場合は1から開始 +- ネストされたリストについても、親リスト内の順序に基づいて番号を付与 + +これらの後処理は`AST::Compiler`内で常に順番に呼び出され、生成済みのリスト構造を最終形に整えます。 + +## 処理フローの詳細 + +### 1. 番号なしリスト(Unordered List)の処理 + +``` +入力テキスト: + * 項目1 + 継続行 + ** ネストされた項目 + * 項目2 + +処理フロー: +1. Compiler → ListProcessor.process_unordered_list(f) +2. ListProcessor → ListParser.parse_unordered_list(f) + - 各行を解析し、ListItemData構造体の配列を生成 + - レベル判定: "*"の数でネストレベルを決定 +3. ListProcessor → NestedListAssembler.build_unordered_list(items) + - ListNodeを作成(list_type: :ul) + - 各ListItemDataに対してListItemNodeを作成 + - ネスト構造を構築 +4. ListProcessor → ASTへリストノードを追加 +5. AST Compiler → ListStructureNormalizer.process(常に実行) +6. AST Compiler → ListItemNumberingProcessor.process(番号付きリスト向けだが全体フロー内で呼び出される) +``` + +### 2. 番号付きリスト(Ordered List)の処理 + +``` +入力テキスト: + 1. 第1項目 + 11. 第2項目(項目番号11) + 2. 第3項目 + +処理フロー: +1. ListParserが各行を解析し、`number`メタデータを保持(レベルは常に1) +2. NestedListAssemblerが`start_number`と項目番号を設定しつつリストノードを構築 +3. ListProcessorがリストノードをASTに追加 +4. AST CompilerでListStructureNormalizer → ListItemNumberingProcessorの順に後処理(ネストは発生しないが絶対番号を割り当て) +``` + +### 3. 定義リスト(Definition List)の処理 + +``` +入力テキスト: + : 用語1 + 定義内容1 + 定義内容2 + : 用語2 + 定義内容3 + +処理フロー: +1. ListParserが各用語行を検出し、後続のインデント行を定義コンテンツとして`continuation_lines`に保持 +2. NestedListAssemblerが用語部分を`term_children`に、定義本文を`children`にそれぞれ格納した`ListItemNode`を生成 +3. ListStructureNormalizerが段落ベースの定義リストを分割する場合でも、最終的に同じ構造へ統合される +``` + +## 重要な設計上の決定 + +### 1. 責務の分離 +- 解析(ListParser)と組み立て(NestedListAssembler)を明確に分離 +- 後処理(ListStructureNormalizer, ListItemNumberingProcessor)を独立したコンポーネントに分離 +- 各コンポーネントが単一の責任を持つ +- テスト可能性と保守性の向上 + +### 2. 段階的な処理 +- テキスト → 構造化データ → ASTノード → AST後処理 → レンダリング +- 各段階で適切な抽象化レベルを維持 + +### 3. 柔軟な拡張性 +- 新しいリスト型の追加が容易 +- インライン要素の処理を統合 +- 異なるレンダラーへの対応 + +### 4. 統一的な設計 +- ListNodeは標準的なAST構造(`children`)を用い、ListItemNodeは必要なメタデータを属性として保持 +- 定義リスト向けの`term_children`など特殊な情報も構造化して管理 + +## クラス関係図 + +``` + AST::Compiler + | + | 使用 + v + ListProcessor + / | \ + / | \ + 使用 / | \ 使用 + v v v + ListParser Nested InlineProcessor + List + Assembler + | | | + | | | + 生成 | 使用 | 生成 | + v v v + ListItemData ListNode (AST) + | + | 後処理 + v + ListStructureNormalizer + | + | 後処理 + v + ListItemNumberingProcessor + | + | 含む + v + ListItemNode (AST) + | + | 含む + v + TextNode / InlineNode (AST) +``` + +## 使用例 + +### コンパイラーでの使用 +```ruby +# AST::Compiler内 +def compile_ul_to_ast(f) + list_processor.process_unordered_list(f) +end +``` + +### カスタムリスト処理 +```ruby +# 独自のリスト処理を実装する場合 +processor = ListProcessor.new(ast_compiler) +items = processor.parser.parse_unordered_list(input) +# カスタム処理... +list_node = processor.nested_list_assembler.build_nested_structure(items, :ul) +``` + +## まとめ + +Re:VIEWのASTリスト処理アーキテクチャは、明確な責務分離と段階的な処理により、複雑なリスト構造を効率的に処理します。ListParser、NestedListAssembler、ListProcessor、そして後処理コンポーネント(ListStructureNormalizer、ListItemNumberingProcessor)の協調により、Re:VIEW記法からASTへの変換、構造の正規化、そして最終的なレンダリングまでがスムーズに行われます。 + +この設計により、新しいリスト型の追加や、異なるレンダリング要件への対応、さらには構造の正規化処理の追加が容易になっています。 diff --git a/doc/ast_list_processing.md b/doc/ast_list_processing.md new file mode 100644 index 000000000..fbb059a61 --- /dev/null +++ b/doc/ast_list_processing.md @@ -0,0 +1,311 @@ +# Re:VIEW AST List Processing Architecture + +## Overview + +List processing in Re:VIEW's AST adopts a sophisticated architecture where multiple components work collaboratively. This document explains in detail the main classes involved in list processing and their interrelationships. + +## Main Components + +### 1. List AST Node Classes + +#### ListNode +`ListNode` is a generic node class that represents all list types (unordered lists, ordered lists, definition lists). + +##### Main Attributes +- `list_type`: Type of list (`:ul`, `:ol`, `:dl`) +- `start_number`: Starting number for ordered lists (default: `nil`) +- `olnum_start`: InDesign olnum start value (for IDGXML, default: `nil`) +- `children`: Stores child nodes (`ListItemNode`) (standard node structure) + +##### Convenience Methods +- `ol?()`: Determines if it's an ordered list +- `ul?()`: Determines if it's an unordered list +- `dl?()`: Determines if it's a definition list + +##### Features +- Designed to handle different list types uniformly +- Unified processing through standard AST structure (`children`) + +#### ListItemNode +`ListItemNode` represents individual list items. + +##### Main Attributes +- `level`: Nesting level (starts from 1) +- `number`: Item number in ordered lists (derived from original input) +- `item_number`: Absolute number in ordered lists (set by `ListItemNumberingProcessor`) +- `item_type`: `:dt` (term) / `:dd` (definition) identifier in definition lists (`nil` for regular lists) +- `children`: Child nodes holding definition content or nested lists +- `term_children`: Array of child nodes holding the term part of definition lists + +##### Convenience Methods +- `definition_term?()`: Determines if it's a definition list term item (`:dt`) +- `definition_desc?()`: Determines if it's a definition list definition item (`:dd`) + +##### Features +- Supports nested list structures +- Can hold inline elements (emphasis, links, etc.) as child nodes +- Clearly separates terms (`term_children`) and definitions (`children`) in definition lists +- `item_number` is automatically set by post-processing for ordered lists + +### 2. Parsing Components + +#### ListParser +`ListParser` parses Re:VIEW list notation and converts it to structured data. + +##### Responsibilities +- Extract list items from raw text lines +- Determine nesting levels +- Collect continuation lines +- Parsing logic specialized for each list type (ul/ol/dl) + +##### Main Methods +```ruby +def parse_unordered_list(f) + # Parse notation like: + # * item + # ** nested item +end + +def parse_ordered_list(f) + # Parse notation like: + # 1. item + # 11. item number 11 (actual number, not nesting) +end + +def parse_definition_list(f) + # Parse notation like: + # : term + # definition +end +``` + +##### Data Structure +```ruby +ListItemData = Struct.new( + :type, # :ul_item, :ol_item, :dt, :dd + :level, # Nesting level (default: 1) + :content, # Item text + :continuation_lines,# Continuation lines (default: []) + :metadata, # Additional information (number, indent, etc., default: {}) + keyword_init: true +) +``` + +#### ListItemData Methods +- `with_adjusted_level(new_level)`: Returns new instance with adjusted level (immutable operation) + +##### Notes +- All list notation is expected to be passed to the parser as lines containing leading whitespace (only such lines are treated as lists in `lib/review/ast/compiler.rb`). +- Ordered lists do not support nesting by number of digits, and `level` is always interpreted as 1. + +### 3. Assembly Components + +#### NestedListAssembler +`NestedListAssembler` assembles actual AST node structures from data generated by `ListParser`. + +##### Responsibilities +- Convert flat list item data to nested AST structure +- Parse and incorporate inline elements +- Properly set parent-child relationships + +##### Main Processing Flow +1. Receive array of `ListItemData` +2. Build nesting structure based on levels +3. Parse each item's content as inline +4. Return completed AST structure + +### 4. Coordination Components + +#### ListProcessor +`ListProcessor` is a high-level interface that coordinates entire list processing. + +##### Responsibilities +- Coordinate `ListParser` and `NestedListAssembler` +- Provide unified interface to compiler +- Add generated list nodes to AST + +##### Main Methods +```ruby +def process_unordered_list(f) + items = @parser.parse_unordered_list(f) + return if items.empty? + + list_node = @nested_list_assembler.build_unordered_list(items) + add_to_ast(list_node) +end +``` + +##### Public Accessors +- `parser`: Read-only access to `ListParser` instance (for testing and custom purposes) +- `nested_list_assembler`: Read-only access to `NestedListAssembler` instance (for testing and custom purposes) + +##### Additional Methods +- `process_list(f, list_type)`: Generic processing method with specified list type +- `build_list_from_items(items, list_type)`: Build list from pre-parsed items (for testing and special purposes) +- `parse_list_items(f, list_type)`: Parse only list items without building AST (for testing) + +### 5. Post-processing Components + +#### ListStructureNormalizer + +Normalizes temporary list elements composed of `//beginchild` and `//endchild` to create proper nesting structure in AST. + +##### Responsibilities +- Detect `//beginchild`/`//endchild` blocks and relocate them to list items +- Merge consecutive lists of the same type +- Separate terms and definitions from definition list paragraphs + +#### ListItemNumberingProcessor +Assigns absolute numbers to each item in ordered lists. + +##### Responsibilities +- Assign sequential numbers starting from `start_number` +- Update `item_number` attribute of each `ListItemNode` (defined by `attr_accessor`) +- Number based on order within list regardless of nesting structure + +##### Processing Details +- Generate sequential numbers based on `ListNode.start_number` +- Start from 1 if `start_number` is not specified +- Assign numbers to nested lists based on order within parent list + +These post-processors are always called in order within `AST::Compiler` to finalize generated list structures. + +## Detailed Processing Flow + +### 1. Unordered List Processing + +``` +Input text: + * Item 1 + Continuation line + ** Nested item + * Item 2 + +Processing flow: +1. Compiler → ListProcessor.process_unordered_list(f) +2. ListProcessor → ListParser.parse_unordered_list(f) + - Parse each line and generate array of ListItemData structures + - Level determination: Determine nesting level by number of "*" +3. ListProcessor → NestedListAssembler.build_unordered_list(items) + - Create ListNode (list_type: :ul) + - Create ListItemNode for each ListItemData + - Build nesting structure +4. ListProcessor → Add list node to AST +5. AST Compiler → ListStructureNormalizer.process (always executed) +6. AST Compiler → ListItemNumberingProcessor.process (for ordered lists but called in overall flow) +``` + +### 2. Ordered List Processing + +``` +Input text: + 1. First item + 11. Second item (item number 11) + 2. Third item + +Processing flow: +1. ListParser parses each line and preserves `number` metadata (level is always 1) +2. NestedListAssembler builds list node setting `start_number` and item numbers +3. ListProcessor adds list node to AST +4. AST Compiler post-processes in order: ListStructureNormalizer → ListItemNumberingProcessor (no nesting occurs but absolute numbers are assigned) +``` + +### 3. Definition List Processing + +``` +Input text: + : Term 1 + Definition content 1 + Definition content 2 + : Term 2 + Definition content 3 + +Processing flow: +1. ListParser detects each term line and holds subsequent indented lines as definition content in `continuation_lines` +2. NestedListAssembler generates `ListItemNode` storing term part in `term_children` and definition body in `children` +3. Even when ListStructureNormalizer splits paragraph-based definition lists, they are ultimately integrated into the same structure +``` + +## Important Design Decisions + +### 1. Separation of Responsibilities +- Clear separation between parsing (ListParser) and assembly (NestedListAssembler) +- Separate post-processing (ListStructureNormalizer, ListItemNumberingProcessor) into independent components +- Each component has a single responsibility +- Improved testability and maintainability + +### 2. Staged Processing +- Text → Structured data → AST nodes → AST post-processing → Rendering +- Maintain appropriate abstraction level at each stage + +### 3. Flexible Extensibility +- Easy to add new list types +- Integrate inline element processing +- Support different renderers + +### 4. Unified Design +- ListNode uses standard AST structure (`children`), ListItemNode holds necessary metadata as attributes +- Special information like `term_children` for definition lists is also managed in structured way + +## Class Relationship Diagram + +``` + AST::Compiler + | + | uses + v + ListProcessor + / | \ + / | \ + uses / | \ uses + v v v + ListParser Nested InlineProcessor + List + Assembler + | | | + | | | + generates | uses | generates | + v v v + ListItemData ListNode (AST) + | + | post-process + v + ListStructureNormalizer + | + | post-process + v + ListItemNumberingProcessor + | + | contains + v + ListItemNode (AST) + | + | contains + v + TextNode / InlineNode (AST) +``` + +## Usage Examples + +### Usage in Compiler +```ruby +# Inside AST::Compiler +def compile_ul_to_ast(f) + list_processor.process_unordered_list(f) +end +``` + +### Custom List Processing +```ruby +# When implementing custom list processing +processor = ListProcessor.new(ast_compiler) +items = processor.parser.parse_unordered_list(input) +# Custom processing... +list_node = processor.nested_list_assembler.build_nested_structure(items, :ul) +``` + +## Summary + +Re:VIEW's AST list processing architecture efficiently handles complex list structures through clear separation of responsibilities and staged processing. Through the coordination of ListParser, NestedListAssembler, ListProcessor, and post-processing components (ListStructureNormalizer, ListItemNumberingProcessor), conversion from Re:VIEW notation to AST, structure normalization, and final rendering proceed smoothly. + +This design makes it easy to add new list types, adapt to different rendering requirements, and add structure normalization processing. diff --git a/doc/ast_markdown.ja.md b/doc/ast_markdown.ja.md new file mode 100644 index 000000000..c353c18da --- /dev/null +++ b/doc/ast_markdown.ja.md @@ -0,0 +1,955 @@ +# Re:VIEW Markdown サポート + +Re:VIEWはAST版Markdownコンパイラを通じてGitHub Flavored Markdown(GFM)をサポートしています。この文書では、サポートされているMarkdown機能とRe:VIEW ASTへの変換方法について説明します。 + +## 概要 + +Markdownサポートは、Re:VIEWのAST/Rendererアーキテクチャ上に実装されています。Markdownドキュメントは内部的にRe:VIEW ASTに変換され、従来のRe:VIEWフォーマット(`.re`ファイル)と同等に扱われます。 + +### 双方向変換のサポート + +Re:VIEWは以下の双方向変換をサポートしています: + +1. Markdown → AST → 各種フォーマット: MarkdownCompilerを使用してMarkdownをASTに変換し、各種Rendererで出力 +2. Re:VIEW → AST → Markdown: Re:VIEWフォーマットをASTに変換し、MarkdownRendererでMarkdown形式に出力 + +この双方向変換により、以下が可能になります: +- Markdownで執筆した文書をPDF、EPUB、HTMLなどに変換 +- Re:VIEWで執筆した文書をMarkdown形式に変換してGitHubなどで公開 +- 異なるフォーマット間でのコンテンツの相互変換 + +### アーキテクチャ + +Markdownサポートは双方向の変換をサポートしています: + +#### Markdown → Re:VIEW AST(入力) + +- Markly: GFM拡張を備えた高速CommonMarkパーサー(外部gem) +- MarkdownCompiler: MarkdownドキュメントをRe:VIEW ASTにコンパイルする統括クラス +- MarkdownAdapter: Markly ASTをRe:VIEW ASTに変換するアダプター層 +- MarkdownHtmlNode: HTML要素の解析とコラムマーカーの検出を担当(内部使用) + +#### Re:VIEW AST → Markdown(出力) + +- MarkdownRenderer: Re:VIEW ASTをMarkdown形式で出力するレンダラー + - キャプションは`**Caption**`形式で出力 + - 画像は`![alt](path)`形式で出力 + - テーブルはGFMパイプスタイルで出力 + - 脚注は`[^id]`記法で出力 + +### サポートされている拡張機能 + +以下のGitHub Flavored Markdown拡張機能が有効化されています: +- strikethrough: 取り消し線(`~~text~~`) +- table: テーブル(パイプスタイル) +- autolink: オートリンク(`http://example.com`を自動的にリンクに変換) + +### Re:VIEW独自の拡張 + +標準的なGFMに加えて、以下のRe:VIEW独自の拡張機能もサポートされています: + +- コラム構文: 見出し(`### [column] Title`)で開始し、HTMLコメント(``)または自動クローズで終了するコラムブロック +- 自動コラムクローズ: 見出しレベルに基づくコラムの自動クローズ機能 +- 属性ブロック: Pandoc/kramdown互換の`{#id caption="..."}`構文によるID・キャプション指定 +- Re:VIEW参照記法: `@{id}`、`@{id}`、`@
{id}`による図表参照 +- 脚注サポート: Markdown標準の`[^id]`記法による脚注 + +## Markdown基本記法 + +Re:VIEWは[CommonMark](https://commonmark.org/)および[GitHub Flavored Markdown(GFM)](https://github.github.com/gfm/)の仕様に準拠しています。標準的なMarkdown記法の詳細については、これらの公式仕様を参照してください。 + +### サポートされている主な要素 + +以下のMarkdown要素がRe:VIEW ASTに変換されます: + +| Markdown記法 | 説明 | Re:VIEW AST | +|------------|------|-------------| +| 段落 | 空行で区切られたテキストブロック | `ParagraphNode` | +| 見出し(`#`〜`######`) | 6段階の見出しレベル | `HeadlineNode` | +| 太字(`**text**`) | 強調表示 | `InlineNode(:b)` | +| イタリック(`*text*`) | 斜体表示 | `InlineNode(:i)` | +| コード(`` `code` ``) | インラインコード | `InlineNode(:code)` | +| リンク(`[text](url)`) | ハイパーリンク | `InlineNode(:href)` | +| 取り消し線(`~~text~~`) | 取り消し線(GFM拡張) | `InlineNode(:del)` | +| 箇条書きリスト(`*`, `-`, `+`) | 順序なしリスト | `ListNode(:ul)` | +| 番号付きリスト(`1.`, `2.`) | 順序付きリスト | `ListNode(:ol)` | +| コードブロック(` ``` `) | 言語指定可能なコードブロック | `CodeBlockNode` | +| コードブロック+属性 | `{#id caption="..."}`でID・キャプション指定 | `CodeBlockNode(:list)` | +| 引用(`>`) | 引用ブロック | `BlockNode(:quote)` | +| テーブル(GFM) | パイプスタイルのテーブル | `TableNode` | +| テーブル+属性 | `{#id caption="..."}`でID・キャプション指定 | `TableNode`(ID・キャプション付き) | +| 画像(`![alt](path)`) | 画像(単独行はブロック、行内はインライン) | `ImageNode` / `InlineNode(:icon)` | +| 画像+属性 | `{#id caption="..."}`でID・キャプション指定 | `ImageNode`(ID・キャプション付き) | +| 水平線(`---`, `***`) | 区切り線 | `BlockNode(:hr)` | +| HTMLブロック | 生HTML(保持される) | `EmbedNode(:html)` | +| 脚注参照(`[^id]`) | 脚注への参照 | `InlineNode(:fn)` + `ReferenceNode` | +| 脚注定義(`[^id]: 内容`) | 脚注の定義 | `FootnoteNode` | +| Re:VIEW参照(`@{id}`) | 図表リストへの参照 | `InlineNode(type)` + `ReferenceNode` | +| 定義リスト(Markdown出力) | 用語と説明のペア | `DefinitionListNode` / `DefinitionItemNode` | + +### 変換例 + +```markdown +## 見出し + +これは **太字** と *イタリック* を含む段落です。`インラインコード`も使えます。 + +* 箇条書き項目1 +* 箇条書き項目2 + +詳細は[公式サイト](https://example.com)を参照してください。 +``` + +### 画像の扱い + +画像は文脈によって異なるASTノードに変換されます: + +#### 単独行の画像(ブロックレベル) + +```markdown +![図1のキャプション](image.png) +``` +単独行の画像は `ImageNode`(ブロックレベル)に変換され、Re:VIEWの `//image[image][図1のキャプション]` と同等になります。 + +#### IDとキャプションの明示的指定 + +属性ブロック構文を使用して、画像にIDとキャプションを明示的に指定できます。属性ブロックは画像と同じ行に書くことも、次の行に書くこともできます: + +```markdown +![代替テキスト](images/sample.png){#fig-sample caption="サンプル画像"} +``` + +または、次の行に書く形式: + +```markdown +![代替テキスト](images/sample.png) +{#fig-sample caption="サンプル画像"} +``` + +これにより、`ImageNode`に`id="fig-sample"`と`caption="サンプル画像"`が設定されます。属性ブロックのキャプションが指定されている場合、それが優先されます。IDのみを指定することも可能です: + +```markdown +![サンプル画像](images/sample.png){#fig-sample} +``` + +または: + +```markdown +![サンプル画像](images/sample.png) +{#fig-sample} +``` + +この場合、代替テキスト「サンプル画像」がキャプションとして使用されます。 + +#### インライン画像 + +```markdown +これは ![アイコン](icon.png) インライン画像です。 +``` +行内の画像は `InlineNode(:icon)` に変換され、Re:VIEWの `@{icon.png}` と同等になります。 + +## コラム(Re:VIEW拡張) + +Re:VIEWはMarkdownドキュメント内でコラムブロックをサポートしています。コラムは見出し構文で開始し、HTMLコメントまたは自動クローズで終了します。 + +### 方法1: 見出し構文 + HTMLコメントで終了 + +```markdown +### [column] コラムのタイトル + +ここにコラムの内容を書きます。 + +コラム内ではすべてのMarkdown機能を使用できます。 + + +``` + +タイトルなしのコラムの場合: + +```markdown +### [column] + +タイトルなしのコラム内容。 + + +``` + +### 方法2: 見出し構文(自動クローズ) + +以下の場合にコラムは自動的にクローズされます: +- 同じレベルの見出しに遭遇したとき +- より高いレベル(小さい数字)の見出しに遭遇したとき +- ドキュメントの終わり + +```markdown +### [column] コラムのタイトル + +ここにコラムの内容を書きます。 + +### 次のセクション +``` + +この例では、「次のセクション」の見出しに遭遇したときにコラムが自動的にクローズされます。 + +ドキュメント終了時の自動クローズの例: + +```markdown +### [column] ヒントとコツ + +このコラムはドキュメントの最後で自動的にクローズされます。 + +明示的な終了マーカーは不要です。 +``` + +より高いレベルの見出しでの例: + +```markdown +### [column] サブセクションコラム + +レベル3のコラム。 + +## メインセクション + +このレベル2の見出しはレベル3のコラムをクローズします。 +``` + +### コラムの自動クローズ規則 + +- 同じレベル: `### [column]` は別の `###` 見出しが現れるとクローズ +- より高いレベル: `### [column]` は `##` または `#` 見出しが現れるとクローズ +- より低いレベル: `### [column]` は `####` 以下が現れてもクローズされない +- ドキュメント終了: すべての開いているコラムは自動的にクローズ + +### コラムのネスト + +コラムはネスト可能ですが、見出しレベルに注意してください: + +```markdown +## [column] 外側のコラム + +外側のコラムの内容。 + +### [column] 内側のコラム + +内側のコラムの内容。 + + + +外側のコラムに戻ります。 + + +``` + +## コードブロックとリスト(Re:VIEW拡張) + +### キャプション付きコードブロック + +コードブロックにIDとキャプションを指定して、Re:VIEWの`//list`コマンドと同等の機能を使用できます。属性ブロックは言語指定の後に記述します: + +````markdown +```ruby {#lst-hello caption="挨拶プログラム"} +def hello(name) + puts "Hello, #{name}!" +end +``` +```` + +属性ブロック`{#lst-hello caption="挨拶プログラム"}`を言語指定の後に記述することで、コードブロックにIDとキャプションが設定されます。この場合、`CodeBlockNode`の`code_type`は`:list`になります。 + +IDのみを指定することも可能です: + +````markdown +```ruby {#lst-example} +# コード +``` +```` + +属性ブロックを指定しない通常のコードブロックは`code_type: :emlist`として扱われます。 + +注意:コードブロックの属性ブロックは、開始のバッククオート行に記述する必要があります。画像やテーブルとは異なり、次の行に書くことはできません。 + +## テーブル(Re:VIEW拡張) + +### キャプション付きテーブル + +GFMテーブルにIDとキャプションを指定できます。属性ブロックはテーブルの直後の行に記述します: + +```markdown +| 名前 | 年齢 | 職業 | +|------|------|------| +| Alice| 25 | エンジニア | +| Bob | 30 | デザイナー | +{#tbl-users caption="ユーザー一覧"} +``` + +属性ブロック`{#tbl-users caption="ユーザー一覧"}`をテーブルの直後の行に記述することで、テーブルにIDとキャプションが設定されます。これはRe:VIEWの`//table`コマンドと同等の機能です。 + +## 図表参照(Re:VIEW拡張) + +### Re:VIEW記法による参照 + +Markdown内でRe:VIEWの参照記法を使用して、図・表・リストを参照できます: + +```markdown +![サンプル画像](images/sample.png) +{#fig-sample caption="サンプル画像"} + +図@{fig-sample}を参照してください。 +``` + +```markdown +```ruby {#lst-hello caption="挨拶プログラム"} +def hello + puts "Hello, World!" +end +``` + +リスト@{lst-hello}を参照してください。 +``` + +```markdown +| 名前 | 年齢 | +|------|------| +| Alice| 25 | +{#tbl-users caption="ユーザー一覧"} + +表@
{tbl-users}を参照してください。 +``` + +この記法はRe:VIEWの標準的な参照記法と同じです。参照先のIDは、上記の属性ブロックで指定したIDと対応している必要があります。 + +参照は後続の処理で適切な番号に置き換えられます: +- `@{fig-sample}` → 「図1.1」 +- `@{lst-hello}` → 「リスト1.1」 +- `@
{tbl-users}` → 「表1.1」 + +### 参照の解決 + +参照は後続の処理(参照解決フェーズ)で適切な図番・表番・リスト番号に置き換えられます。AST内では`InlineNode`と`ReferenceNode`の組み合わせとして表現されます。 + +## 脚注(Re:VIEW拡張) + +Markdown標準の脚注記法をサポートしています: + +### 脚注の使用 + +```markdown +これは脚注のテストです[^1]。 + +複数の脚注も使えます[^note]。 + +[^1]: これは最初の脚注です。 + +[^note]: これは名前付き脚注です。 + 複数行の内容も + サポートします。 +``` + +脚注参照`[^id]`と脚注定義`[^id]: 内容`を使用できます。脚注定義は複数行にまたがることができ、インデントされた行は前の脚注の続きとして扱われます。 + +### FootnoteNodeへの変換 + +脚注定義は`FootnoteNode`に変換され、Re:VIEWの`//footnote`コマンドと同等に扱われます。脚注参照は`InlineNode(:fn)`として表現されます。 + +## 定義リスト(Markdown出力) + +Re:VIEWの定義リスト(`: 用語`形式)をMarkdown形式に変換する場合、以下の形式で出力されます: + +### 基本的な出力形式 + +```markdown +**用語**: 説明文 + +**別の用語**: 別の説明文 +``` + +用語は太字(`**term**`)で強調され、コロンと空白の後に説明が続きます。 + +### 用語に強調が含まれる場合 + +用語に既に太字(`**text**`)や強調(`@{text}`)が含まれている場合、MarkdownRendererは二重の太字マークアップ(`****text****`)を避けるため、用語を太字で囲みません: + +Re:VIEW入力例: +```review + : @{重要な}用語 + 説明文 +``` + +Markdown出力: +```markdown +**重要な**用語: 説明文 +``` + +このように、用語内の強調要素がそのまま保持され、外側の太字マークアップは追加されません。 + +### 定義リストのAST表現 + +定義リストはRe:VIEW ASTでは以下のノードで表現されます: +- `DefinitionListNode`: 定義リスト全体を表すノード +- `DefinitionItemNode`: 個々の用語と説明のペアを表すノード + - `term_children`: 用語のインライン要素のリスト + - `children`: 説明部分のブロック要素のリスト + +MarkdownRendererは、`term_children`内に`InlineNode(:b)`または`InlineNode(:strong)`が含まれているかをチェックし、含まれている場合は外側の太字マークアップを省略します。 + +## その他のMarkdown機能 + +### 改行 +- ソフト改行: 単一の改行はスペースに変換 +- ハード改行: 行末の2つのスペースで改行を挿入 + +### HTMLブロック +生のHTMLブロックは `EmbedNode(:html)` として保持され、Re:VIEWの `//embed[html]` と同等に扱われます。インラインHTMLもサポートされます。 + +## 制限事項と注意点 + +### ファイル拡張子 + +Markdownファイルは適切に処理されるために `.md` 拡張子を使用する必要があります。Re:VIEWシステムは拡張子によってファイル形式を自動判別します。 + +**重要:** Re:VIEWは`.md`拡張子のみをサポートしています。`.markdown`拡張子はサポートされていません。 + +### 画像パス + +画像パスはプロジェクトの画像ディレクトリ(デフォルトでは`images/`)からの相対パスか、Re:VIEWの画像パス規約を使用する必要があります。 + +#### 例 +```markdown +![キャプション](sample.png) +``` + +### Re:VIEW固有の機能 + +以下のRe:VIEW機能がMarkdown内でサポートされています: + +#### サポートされているRe:VIEW機能 +- `//list`(キャプション付きコードブロック)→ 属性ブロック`{#id caption="..."}`で指定可能 +- `//table`(キャプション付き表)→ 属性ブロック`{#id caption="..."}`で指定可能 +- `//image`(キャプション付き画像)→ 属性ブロック`{#id caption="..."}`で指定可能 +- `//footnote`(脚注)→ Markdown標準の`[^id]`記法をサポート +- 図表参照(`@{id}`、`@{id}`、`@
{id}`)→ 完全サポート +- コラム(`//column`)→ HTMLコメントまたは見出し記法でサポート + +#### サポートされていないRe:VIEW固有機能 +- `//cmd`、`//embed`などの特殊なブロック命令 +- インライン命令の一部(`@`、`@`、`@`など) +- 複雑なテーブル機能(セル結合、カスタム列幅など) + +すべてのRe:VIEW機能にアクセスする必要がある場合は、Re:VIEWフォーマット(`.re`ファイル)を使用してください。 + +### コラムのネスト + +コラムをネストする場合、見出しレベルに注意が必要です。内側のコラムは外側のコラムよりも高い見出しレベル(大きい数字)を使用してください: + +```markdown +## [column] 外側のコラム +外側の内容 + +### [column] 内側のコラム +内側の内容 + + +外側のコラムに戻る + +``` + +### HTMLコメントの使用 + +HTMLコメント``はコラムの終了マーカーとして使用されます。一般的なコメントとして使用する場合は、`/column`と書かないように注意してください: + +```markdown + + +``` + +## 使用方法 + +### コマンドラインツール + +#### AST経由での変換(推奨) + +MarkdownファイルをAST経由で各種フォーマットに変換する場合、AST専用のコマンドを使用します: + +```bash +# MarkdownをJSON形式のASTにダンプ +review-ast-dump chapter.md > chapter.json + +# MarkdownをRe:VIEW形式に変換 +review-ast-dump2re chapter.md > chapter.re + +# MarkdownからEPUBを生成(AST経由) +review-ast-epubmaker config.yml + +# MarkdownからPDFを生成(AST経由) +review-ast-pdfmaker config.yml + +# MarkdownからInDesign XMLを生成(AST経由) +review-ast-idgxmlmaker config.yml +``` + +#### review-ast-compileの使用 + +`review-ast-compile`コマンドでは、Markdownを指定したフォーマットに直接変換できます: + +```bash +# MarkdownをJSON形式のASTに変換 +review-ast-compile --target=ast chapter.md + +# MarkdownをHTMLに変換(AST経由) +review-ast-compile --target=html chapter.md + +# MarkdownをLaTeXに変換(AST経由) +review-ast-compile --target=latex chapter.md + +# MarkdownをInDesign XMLに変換(AST経由) +review-ast-compile --target=idgxml chapter.md + +# MarkdownをMarkdownに変換(AST経由、正規化・整形) +review-ast-compile --target=markdown chapter.md +``` + +注意: `--target=ast`を指定すると、生成されたAST構造をJSON形式で出力します。これはデバッグやAST構造の確認に便利です。 + +#### Re:VIEW形式からMarkdown形式への変換 + +Re:VIEWフォーマット(`.re`ファイル)をMarkdown形式に変換することもできます: + +```bash +# Re:VIEWファイルをMarkdownに変換 +review-ast-compile --target=markdown chapter.re > chapter.md +``` + +この変換により、Re:VIEWで書かれた文書をMarkdown形式で出力できます。MarkdownRendererは以下の形式で出力します: + +- コードブロック: キャプションは`**Caption**`形式で出力され、その後にフェンスドコードブロックが続きます +- テーブル: キャプションは`**Caption**`形式で出力され、その後にGFMパイプスタイルのテーブルが続きます +- 画像: Markdown標準の`![alt](path)`形式で出力されます +- 脚注: Markdown標準の`[^id]`記法で出力されます + +#### 従来のreview-compileとの互換性 + +従来の`review-compile`コマンドも引き続き使用できますが、AST/Rendererアーキテクチャを利用する場合は`review-ast-compile`や各種`review-ast-*maker`コマンドの使用を推奨します: + +```bash +# 従来の方式(互換性のため残されています) +review-compile --target=html chapter.md +review-compile --target=latex chapter.md +``` + +### プロジェクト設定 + +Markdownを使用するようにプロジェクトを設定: + +```yaml +# config.yml +contentdir: src + +# CATALOG.yml +CHAPS: + - chapter1.md + - chapter2.md +``` + +### Re:VIEWプロジェクトとの統合 + +MarkdownファイルとRe:VIEWファイルを同じプロジェクト内で混在させることができます: + +``` +project/ + ├── config.yml + ├── CATALOG.yml + └── src/ + ├── chapter1.re # Re:VIEWフォーマット + ├── chapter2.md # Markdownフォーマット + └── chapter3.re # Re:VIEWフォーマット +``` + +## サンプル + +### 完全なドキュメントの例 + +````markdown +# Rubyの紹介 + +Rubyはシンプルさと生産性に重点を置いた動的でオープンソースのプログラミング言語です[^intro]。 + +## インストール + +Rubyをインストールするには、次の手順に従います: + +1. [Rubyウェブサイト](https://www.ruby-lang.org/ja/)にアクセス +2. プラットフォームに応じたインストーラーをダウンロード +3. インストーラーを実行 + +### [column] バージョン管理 + +Rubyのインストールを管理するには、**rbenv**や**RVM**のようなバージョンマネージャーの使用を推奨します。 + + + +## 基本構文 + +シンプルなRubyプログラムの例をリスト@{lst-hello}に示します: + +```ruby {#lst-hello caption="RubyでHello World"} +# RubyでHello World +puts "Hello, World!" + +# メソッドの定義 +def greet(name) + "Hello, #{name}!" +end + +puts greet("Ruby") +``` + +### 変数 + +Rubyにはいくつかの変数タイプがあります(表@
{tbl-vars}参照): + +| タイプ | プレフィックス | 例 | +|------|--------|---------| +| ローカル | なし | `variable` | +| インスタンス | `@` | `@variable` | +| クラス | `@@` | `@@variable` | +| グローバル | `$` | `$variable` | +{#tbl-vars caption="Rubyの変数タイプ"} + +## プロジェクト構造 + +典型的なRubyプロジェクトの構造を図@{fig-structure}に示します: + +![プロジェクト構造図](images/ruby-structure.png) +{#fig-structure caption="Rubyプロジェクトの構造"} + +## まとめ + +> Rubyはプログラマーを幸せにするために設計されています。 +> +> -- まつもとゆきひろ + +詳細については、~~公式ドキュメント~~ [Ruby Docs](https://docs.ruby-lang.org/)をご覧ください[^docs]。 + +--- + +Happy coding! ![Rubyロゴ](ruby-logo.png) + +[^intro]: Rubyは1995年にまつもとゆきひろ氏によって公開されました。 + +[^docs]: 公式ドキュメントには豊富なチュートリアルとAPIリファレンスが含まれています。 +```` + +## 変換の詳細 + +### ASTノードマッピング + +| Markdown要素 | Re:VIEW ASTノード | +|------------------|------------------| +| 段落 | `ParagraphNode` | +| 見出し | `HeadlineNode` | +| 太字 | `InlineNode(:b)` | +| イタリック | `InlineNode(:i)` | +| コード | `InlineNode(:code)` | +| リンク | `InlineNode(:href)` | +| 取り消し線 | `InlineNode(:del)` | +| 箇条書きリスト | `ListNode(:ul)` | +| 番号付きリスト | `ListNode(:ol)` | +| リスト項目 | `ListItemNode` | +| コードブロック | `CodeBlockNode` | +| コードブロック(属性付き) | `CodeBlockNode(:list)` | +| 引用 | `BlockNode(:quote)` | +| テーブル | `TableNode` | +| テーブル(属性付き) | `TableNode`(ID・キャプション付き) | +| テーブル行 | `TableRowNode` | +| テーブルセル | `TableCellNode` | +| 単独画像 | `ImageNode` | +| 単独画像(属性付き) | `ImageNode`(ID・キャプション付き) | +| インライン画像 | `InlineNode(:icon)` | +| 水平線 | `BlockNode(:hr)` | +| HTMLブロック | `EmbedNode(:html)` | +| コラム(HTMLコメント/見出し) | `ColumnNode` | +| コードブロック行 | `CodeLineNode` | +| 脚注定義 `[^id]: 内容` | `FootnoteNode` | +| 脚注参照 `[^id]` | `InlineNode(:fn)` + `ReferenceNode` | +| 図表参照 `@{id}` | `InlineNode(type)` + `ReferenceNode` | +| 定義リスト(出力のみ) | `DefinitionListNode` | +| 定義項目(出力のみ) | `DefinitionItemNode` | + +### 位置情報の追跡 + +すべてのASTノードには以下を追跡する位置情報(`SnapshotLocation`)が含まれます: +- ソースファイル名 +- 行番号 + +これにより正確なエラー報告とデバッグが可能になります。 + +### 実装アーキテクチャ + +Markdownサポートは以下の3つの主要コンポーネントから構成されています: + +#### 1. MarkdownCompiler + +`MarkdownCompiler`は、Markdownドキュメント全体をRe:VIEW ASTにコンパイルする責務を持ちます。 + +主な機能: +- Marklyパーサーの初期化と設定 +- GFM拡張機能の有効化(strikethrough, table, autolink) +- 脚注サポートの有効化(Markly::FOOTNOTES) +- Re:VIEW inline notation保護(`@{id}`記法の保護) +- MarkdownAdapterとの連携 +- AST生成の統括 + +Re:VIEW記法の保護: + +MarkdownCompilerは、Marklyによる解析の前にRe:VIEW inline notation(`@{id}`)を保護します。Marklyは`@`をHTMLタグとして誤って解釈するため、`@<`をプレースホルダ`@@REVIEW_AT_LT@@`に置換してからパースし、MarkdownAdapterで元に戻します。 + +#### 2. MarkdownAdapter + +`MarkdownAdapter`は、Markly ASTをRe:VIEW ASTに変換するアダプター層です。 + +##### ContextStack + +MarkdownAdapterは内部に`ContextStack`クラスを持ち、AST構築時の階層的なコンテキストを管理します。これにより、以下のような状態管理が統一され、例外安全性が保証されます: + +- リスト、テーブル、コラムなどのネストされた構造の管理 +- `with_context`メソッドによる例外安全なコンテキスト切り替え(`ensure`ブロックで自動クリーンアップ) +- `find_all`、`any?`メソッドによるスタック内の特定ノード検索 +- コンテキストの検証機能(`validate!`)によるデバッグ支援 + +主な機能: +- Markly ASTの走査と変換 +- 各Markdown要素の対応するRe:VIEW ASTノードへの変換 +- ContextStackによる統一された階層的コンテキスト管理 +- インライン要素の再帰的処理(InlineTokenizerを使用) +- 属性ブロックの解析とID・キャプションの抽出 +- Re:VIEW inline notation(`@{id}`)の処理 + +特徴: +- ContextStackによる例外安全な状態管理: すべてのコンテキスト(リスト、テーブル、コラム等)を単一のContextStackで管理し、`ensure`ブロックによる自動クリーンアップで例外安全性を保証 +- コラムの自動クローズ: 同じレベル以上の見出しでコラムを自動的にクローズ。コラムレベルはColumnNode.level属性に保存され、ContextStackから取得可能 +- スタンドアローン画像の検出: 段落内に単独で存在する画像(属性ブロック付き含む)をブロックレベルの`ImageNode`に変換。`softbreak`/`linebreak`ノードを無視することで、画像と属性ブロックの間に改行があっても正しく認識 +- 属性ブロックパーサー: `{#id caption="..."}`形式の属性を解析してIDとキャプションを抽出 +- Markly脚注サポート: Marklyのネイティブ脚注機能(Markly::FOOTNOTES)を使用して`[^id]`と`[^id]: 内容`を処理 +- InlineTokenizerによるinline notation処理: Re:VIEWのinline notation(`@{id}`等)をInlineTokenizerで解析してInlineNodeとReferenceNodeに変換 + +#### 3. MarkdownHtmlNode(内部使用) + +`MarkdownHtmlNode`は、Markdown内のHTML要素を解析し、特別な意味を持つHTMLコメント(コラムマーカーなど)を識別するための補助ノードです。 + +主な機能: +- HTMLコメントの解析 +- コラム終了マーカー(``)の検出 + +特徴: +- このノードは最終的なASTには含まれず、変換処理中にのみ使用されます +- コラム終了マーカー(``)を検出すると`end_column`メソッドを呼び出し +- 一般的なHTMLブロックは`EmbedNode(:html)`として保持されます + +#### 4. MarkdownRenderer + +`MarkdownRenderer`は、Re:VIEW ASTをMarkdown形式で出力するレンダラーです。 + +主な機能: +- Re:VIEW ASTの走査とMarkdown形式への変換 +- GFM互換のMarkdown記法での出力 +- キャプション付き要素の適切な形式での出力 + +出力形式: +- コードブロックのキャプション: `**Caption**`形式で出力し、その後にフェンスドコードブロックを出力 +- テーブルのキャプション: `**Caption**`形式で出力し、その後にGFMパイプスタイルのテーブルを出力 +- 画像: Markdown標準の`![alt](path)`形式で出力 +- 脚注参照: `[^id]`形式で出力 +- 脚注定義: `[^id]: 内容`形式で出力 + +特徴: +- 純粋なMarkdown形式での出力を優先 +- GFM(GitHub Flavored Markdown)との互換性を重視 +- 未解決の参照でもエラーにならず、ref_idをそのまま使用 + +### 変換処理の流れ + +1. 前処理: MarkdownCompilerがRe:VIEW inline notation(`@{id}`)を保護 + - `@<` → `@@REVIEW_AT_LT@@` に置換してMarklyの誤解釈を防止 + +2. 解析フェーズ: MarklyがMarkdownをパースしてMarkly AST(CommonMark準拠)を生成 + - GFM拡張(strikethrough, table, autolink)を有効化 + - 脚注サポート(Markly::FOOTNOTES)を有効化 + +3. 変換フェーズ: MarkdownAdapterがMarkly ASTを走査し、各要素をRe:VIEW ASTノードに変換 + - ContextStackで階層的なコンテキスト管理 + - 属性ブロック `{#id caption="..."}` を解析してIDとキャプションを抽出 + - Re:VIEW inline notationプレースホルダを元に戻してInlineTokenizerで処理 + - Marklyの脚注ノード(`:footnote_reference`、`:footnote_definition`)をFootnoteNodeとInlineNode(:fn)に変換 + +4. 後処理フェーズ: コラムやリストなどの入れ子構造を適切に閉じる + - ContextStackの`ensure`ブロックによる自動クリーンアップ + - 未閉じのコラムを検出してエラー報告 + +```ruby +# 変換の流れ +markdown_text → 前処理(@< のプレースホルダ化) + ↓ + Markly.parse(GFM拡張 + 脚注サポート) + ↓ + Markly AST + ↓ + MarkdownAdapter.convert + (ContextStack管理、属性ブロック解析、 + InlineTokenizer処理、脚注変換) + ↓ + Re:VIEW AST +``` + +### コラム処理の詳細 + +コラムは見出し構文で開始し、HTMLコメントまたは自動クローズで終了します: + +#### コラム開始(見出し構文) +- `process_heading`メソッドで検出 +- 見出しテキストから`[column]`マーカーを抽出 +- 見出しレベルをColumnNode.level属性に保存してContextStackにpush + +#### コラム終了(2つの方法) + +1. HTMLコメント構文: `` + - `process_html_block`メソッドで検出 + - `MarkdownHtmlNode`を使用してコラム終了マーカーを識別 + - `end_column`メソッドを呼び出してContextStackからpop + +2. 自動クローズ: 同じ/より高いレベルの見出し + - `auto_close_columns_for_heading`メソッドがContextStackから現在のColumnNodeを取得し、level属性を確認 + - 新しい見出しレベルが現在のコラムレベル以下の場合、コラムを自動クローズ + - ドキュメント終了時も自動的にクローズ(`close_all_columns`) + +コラムの階層はContextStackで管理され、level属性でクローズ判定が行われます。 + +## 高度な機能 + +### カスタム処理 + +`MarkdownAdapter` クラスを拡張してカスタム処理を追加できます: + +```ruby +class CustomMarkdownAdapter < ReVIEW::AST::MarkdownAdapter + # メソッドをオーバーライドして動作をカスタマイズ +end +``` + +### Rendererとの統合 + +Markdownから生成されたASTは、すべてのRe:VIEW AST Rendererで動作します: +- HTMLRenderer: HTML形式で出力 +- LaTeXRenderer: LaTeX形式で出力(PDF生成用) +- IDGXMLRenderer: InDesign XML形式で出力 +- MarkdownRenderer: Markdown形式で出力(正規化・整形) +- その他のカスタムRenderer + +AST構造を経由することで、Markdownで書かれた文書も従来のRe:VIEWフォーマット(`.re`ファイル)と同じように処理され、同じ出力品質を実現できます。 + +#### MarkdownRendererの出力例 + +Re:VIEWフォーマットをMarkdown形式に変換する場合、以下のような出力になります: + +Re:VIEW入力例: +````review += 章タイトル + +//list[sample][サンプルコード][ruby]{ +def hello + puts "Hello, World!" +end +//} + +リスト@{sample}を参照してください。 + +//table[data][データ表]{ +名前 年齢 +----- +Alice 25 +Bob 30 +//} + + : API + Application Programming Interface + : @{REST} + Representational State Transfer +```` + +MarkdownRenderer出力: +`````markdown +# 章タイトル + +**サンプルコード** + +```ruby +def hello + puts "Hello, World!" +end +``` + +リスト@{sample}を参照してください。 + +**データ表** + +| 名前 | 年齢 | +| :-- | :-- | +| Alice | 25 | +| Bob | 30 | + +API: Application Programming Interface + +REST: Representational State Transfer + +````` + +注意: +- キャプションは`**Caption**`形式で出力され、コードブロックやテーブルの直前に配置されます +- 定義リストの用語は太字で出力されますが、用語内に既に強調が含まれている場合(例:`@{REST}`)は、二重の太字マークアップを避けるため外側の太字は省略されます +- これにより、人間が読みやすく、かつGFM互換のMarkdownが生成されます + +## テスト + +Markdownサポートの包括的なテストが用意されています: + +### テストファイル + +- `test/ast/test_markdown_adapter.rb`: MarkdownAdapterのテスト +- `test/ast/test_markdown_compiler.rb`: MarkdownCompilerのテスト +- `test/ast/test_markdown_renderer.rb`: MarkdownRendererのテスト +- `test/ast/test_markdown_renderer_fixtures.rb`: フィクスチャベースのMarkdownRendererテスト +- `test/ast/test_renderer_builder_comparison.rb`: RendererとBuilderの出力比較テスト + +### テストの実行 + +```bash +# すべてのテストを実行 +bundle exec rake test + +# Markdown関連のテストのみ実行 +ruby test/ast/test_markdown_adapter.rb +ruby test/ast/test_markdown_compiler.rb +ruby test/ast/test_markdown_renderer.rb + +# フィクスチャテストの実行 +ruby test/ast/test_markdown_renderer_fixtures.rb +``` + +### フィクスチャの再生成 + +MarkdownRendererの出力形式を変更した場合、フィクスチャを再生成する必要があります: + +```bash +bundle exec ruby test/fixtures/generate_markdown_fixtures.rb +``` + +これにより、`test/fixtures/markdown/`ディレクトリ内のMarkdownフィクスチャファイルが最新の出力形式で再生成されます。 + +## 参考資料 + +- [CommonMark仕様](https://commonmark.org/) +- [GitHub Flavored Markdown仕様](https://github.github.com/gfm/) +- [Markly Ruby Gem](https://github.com/gjtorikian/markly) +- [Re:VIEWフォーマットドキュメント](format.md) +- [AST概要](ast.md) +- [ASTアーキテクチャ詳細](ast_architecture.md) +- [ASTノード詳細](ast_node.md) diff --git a/doc/ast_markdown.md b/doc/ast_markdown.md new file mode 100644 index 000000000..7cf153139 --- /dev/null +++ b/doc/ast_markdown.md @@ -0,0 +1,955 @@ +# Re:VIEW Markdown Support + +Re:VIEW supports GitHub Flavored Markdown (GFM) through the AST-based Markdown compiler. This document describes supported Markdown features and conversion methods to Re:VIEW AST. + +## Overview + +Markdown support is implemented on top of Re:VIEW's AST/Renderer architecture. Markdown documents are internally converted to Re:VIEW AST and treated equivalently to traditional Re:VIEW format (`.re` files). + +### Bidirectional Conversion Support + +Re:VIEW supports the following bidirectional conversions: + +1. Markdown → AST → Various formats: Convert Markdown to AST using MarkdownCompiler and output with various Renderers +2. Re:VIEW → AST → Markdown: Convert Re:VIEW format to AST and output in Markdown format with MarkdownRenderer + +This bidirectional conversion enables: +- Converting documents written in Markdown to PDF, EPUB, HTML, etc. +- Converting documents written in Re:VIEW to Markdown format for publishing on GitHub, etc. +- Mutual content conversion between different formats + +### Architecture + +Markdown support provides bidirectional conversion: + +#### Markdown → Re:VIEW AST (Input) + +- Markly: Fast CommonMark parser with GFM extensions (external gem) +- MarkdownCompiler: Oversees compiling Markdown documents to Re:VIEW AST +- MarkdownAdapter: Adapter layer that converts Markly AST to Re:VIEW AST +- MarkdownHtmlNode: Handles HTML element parsing and column marker detection (internal use) + +#### Re:VIEW AST → Markdown (Output) + +- MarkdownRenderer: Renderer that outputs Re:VIEW AST in Markdown format + - Captions are output in `**Caption**` format + - Images are output in `![alt](path)` format + - Tables are output in GFM pipe style + - Footnotes are output in `[^id]` notation + +### Supported Extensions + +The following GitHub Flavored Markdown extensions are enabled: +- strikethrough: Strikethrough text (`~~text~~`) +- table: Tables (pipe style) +- autolink: Autolinks (automatically converts `http://example.com` to links) + +### Re:VIEW-Specific Extensions + +In addition to standard GFM, the following Re:VIEW-specific extensions are supported: + +- Column syntax: Column blocks starting with heading (`### [column] Title`) and ending with HTML comment (``) or auto-close +- Auto column close: Automatic column closing based on heading level +- Attribute blocks: ID and caption specification using Pandoc/kramdown-compatible `{#id caption="..."}` syntax +- Re:VIEW reference notation: Figure/table/listing references using `@{id}`, `@{id}`, `@
{id}` +- Footnote support: Footnotes using Markdown standard `[^id]` notation + +## Markdown Basic Syntax + +Re:VIEW conforms to [CommonMark](https://commonmark.org/) and [GitHub Flavored Markdown (GFM)](https://github.github.com/gfm/) specifications. For details on standard Markdown syntax, refer to these official specifications. + +### Main Supported Elements + +The following Markdown elements are converted to Re:VIEW AST: + +| Markdown Syntax | Description | Re:VIEW AST | +|----------------|-------------|-------------| +| Paragraph | Text block separated by blank lines | `ParagraphNode` | +| Headings (`#` to `######`) | 6 heading levels | `HeadlineNode` | +| Bold (`**text**`) | Strong emphasis | `InlineNode(:b)` | +| Italic (`*text*`) | Italic emphasis | `InlineNode(:i)` | +| Code (`` `code` ``) | Inline code | `InlineNode(:code)` | +| Link (`[text](url)`) | Hyperlink | `InlineNode(:href)` | +| Strikethrough (`~~text~~`) | Strikethrough (GFM extension) | `InlineNode(:del)` | +| Bulleted list (`*`, `-`, `+`) | Unordered list | `ListNode(:ul)` | +| Numbered list (`1.`, `2.`) | Ordered list | `ListNode(:ol)` | +| Code block (` ``` `) | Code block with language specification | `CodeBlockNode` | +| Code block + attributes | ID and caption with `{#id caption="..."}` | `CodeBlockNode(:list)` | +| Blockquote (`>`) | Quote block | `BlockNode(:quote)` | +| Table (GFM) | Pipe-style table | `TableNode` | +| Table + attributes | ID and caption with `{#id caption="..."}` | `TableNode` (with ID/caption) | +| Image (`![alt](path)`) | Image (standalone line is block, inline is inline) | `ImageNode` / `InlineNode(:icon)` | +| Image + attributes | ID and caption with `{#id caption="..."}` | `ImageNode` (with ID/caption) | +| Horizontal rule (`---`, `***`) | Divider | `BlockNode(:hr)` | +| HTML block | Raw HTML (preserved) | `EmbedNode(:html)` | +| Footnote reference (`[^id]`) | Reference to footnote | `InlineNode(:fn)` + `ReferenceNode` | +| Footnote definition (`[^id]: content`) | Footnote definition | `FootnoteNode` | +| Re:VIEW reference (`@{id}`) | Reference to figures/tables/listings | `InlineNode(type)` + `ReferenceNode` | +| Definition list (Markdown output) | Term and description pairs | `DefinitionListNode` / `DefinitionItemNode` | + +### Conversion Example + +```markdown +## Heading + +This is a paragraph with **bold** and *italic* text. You can also use `inline code`. + +* Bulleted item 1 +* Bulleted item 2 + +See the [official site](https://example.com) for details. +``` + +### Image Handling + +Images are converted to different AST nodes depending on context: + +#### Standalone Image (Block Level) + +```markdown +![Figure 1 caption](image.png) +``` +Standalone images are converted to `ImageNode` (block level), equivalent to Re:VIEW's `//image[image][Figure 1 caption]`. + +#### Explicit ID and Caption Specification + +You can explicitly specify ID and caption for images using attribute block syntax. The attribute block can be written on the same line as the image or on the next line: + +```markdown +![alt text](images/sample.png){#fig-sample caption="Sample image"} +``` + +Or written on the next line: + +```markdown +![alt text](images/sample.png) +{#fig-sample caption="Sample image"} +``` + +This sets `id="fig-sample"` and `caption="Sample image"` on the `ImageNode`. If attribute block caption is specified, it takes precedence. You can also specify only the ID: + +```markdown +![Sample image](images/sample.png){#fig-sample} +``` + +Or: + +```markdown +![Sample image](images/sample.png) +{#fig-sample} +``` + +In this case, the alt text "Sample image" is used as the caption. + +#### Inline Images + +```markdown +This is an ![icon](icon.png) inline image. +``` +Inline images are converted to `InlineNode(:icon)`, equivalent to Re:VIEW's `@{icon.png}`. + +## Columns (Re:VIEW Extension) + +Re:VIEW supports column blocks within Markdown documents. Columns start with heading syntax and end with HTML comments or auto-close. + +### Method 1: Heading Syntax + HTML Comment End + +```markdown +### [column] Column Title + +Write your column content here. + +You can use all Markdown features within columns. + + +``` + +For columns without title: + +```markdown +### [column] + +Column content without title. + + +``` + +### Method 2: Heading Syntax (Auto-close) + +Columns are automatically closed in the following cases: +- When encountering a heading of the same level +- When encountering a heading of higher level (smaller number) +- At document end + +```markdown +### [column] Column Title + +Write your column content here. + +### Next Section +``` + +In this example, the column is automatically closed when the "Next Section" heading is encountered. + +Example of auto-close at document end: + +```markdown +### [column] Tips and Tricks + +This column will be automatically closed at the end of the document. + +No explicit end marker is needed. +``` + +Example with higher level heading: + +```markdown +### [column] Subsection Column + +Level 3 column. + +## Main Section + +This level 2 heading closes the level 3 column. +``` + +### Column Auto-close Rules + +- Same level: `### [column]` closes when another `###` heading appears +- Higher level: `### [column]` closes when `##` or `#` heading appears +- Lower level: `### [column]` does not close when `####` or lower appears +- Document end: All open columns are automatically closed + +### Column Nesting + +Columns can be nested, but pay attention to heading levels: + +```markdown +## [column] Outer Column + +Outer column content. + +### [column] Inner Column + +Inner column content. + + + +Back to outer column. + + +``` + +## Code Blocks and Lists (Re:VIEW Extension) + +### Code Blocks with Captions + +You can specify ID and caption for code blocks to use functionality equivalent to Re:VIEW's `//list` command. The attribute block is written after the language specification: + +````markdown +```ruby {#lst-hello caption="Greeting program"} +def hello(name) + puts "Hello, #{name}!" +end +``` +```` + +By writing the attribute block `{#lst-hello caption="Greeting program"}` after the language specification, ID and caption are set on the code block. In this case, the `code_type` of `CodeBlockNode` becomes `:list`. + +You can also specify only the ID: + +````markdown +```ruby {#lst-example} +# code +``` +```` + +Regular code blocks without attribute blocks are treated as `code_type: :emlist`. + +Note: Attribute blocks for code blocks must be written on the opening backtick line. Unlike images and tables, they cannot be written on the next line. + +## Tables (Re:VIEW Extension) + +### Tables with Captions + +You can specify ID and caption for GFM tables. The attribute block is written on the line immediately after the table: + +```markdown +| Name | Age | Occupation | +|------|-----|------------| +| Alice| 25 | Engineer | +| Bob | 30 | Designer | +{#tbl-users caption="User list"} +``` + +By writing the attribute block `{#tbl-users caption="User list"}` on the line immediately after the table, ID and caption are set on the table. This is equivalent to Re:VIEW's `//table` command. + +## Figure/Table References (Re:VIEW Extension) + +### References Using Re:VIEW Notation + +You can use Re:VIEW reference notation within Markdown to reference figures, tables, and listings: + +```markdown +![Sample image](images/sample.png) +{#fig-sample caption="Sample image"} + +See Figure @{fig-sample}. +``` + +```markdown +```ruby {#lst-hello caption="Greeting program"} +def hello + puts "Hello, World!" +end +``` + +See Listing @{lst-hello}. +``` + +```markdown +| Name | Age | +|------|-----| +| Alice| 25 | +{#tbl-users caption="User list"} + +See Table @
{tbl-users}. +``` + +This notation is the same as Re:VIEW's standard reference notation. The reference IDs must correspond to the IDs specified in the attribute blocks above. + +References are replaced with appropriate numbers in subsequent processing: +- `@{fig-sample}` → "Figure 1.1" +- `@{lst-hello}` → "Listing 1.1" +- `@
{tbl-users}` → "Table 1.1" + +### Reference Resolution + +References are replaced with appropriate figure/table/listing numbers in subsequent processing (reference resolution phase). They are represented as a combination of `InlineNode` and `ReferenceNode` in the AST. + +## Footnotes (Re:VIEW Extension) + +Markdown standard footnote notation is supported: + +### Using Footnotes + +```markdown +This is a footnote test[^1]. + +Multiple footnotes can also be used[^note]. + +[^1]: This is the first footnote. + +[^note]: This is a named footnote. + Multiple line content is + also supported. +``` + +You can use footnote references `[^id]` and footnote definitions `[^id]: content`. Footnote definitions can span multiple lines, and indented lines are treated as continuations of the previous footnote. + +### Conversion to FootnoteNode + +Footnote definitions are converted to `FootnoteNode` and treated equivalently to Re:VIEW's `//footnote` command. Footnote references are represented as `InlineNode(:fn)`. + +## Definition Lists (Markdown Output) + +When converting Re:VIEW definition lists (`: term` format) to Markdown format, they are output in the following format: + +### Basic Output Format + +```markdown +**term**: description + +**another term**: another description +``` + +Terms are emphasized in bold (`**term**`) followed by a colon, space, and description. + +### When Terms Include Emphasis + +When a term already includes bold (`**text**`) or emphasis (`@{text}`), MarkdownRenderer does not wrap the term in bold to avoid double bold markup (`****text****`): + +Re:VIEW input example: +```review + : @{Important} term + Description +``` + +Markdown output: +```markdown +**Important** term: Description +``` + +In this way, emphasis elements within the term are preserved as is, and outer bold markup is not added. + +### AST Representation of Definition Lists + +Definition lists are represented in Re:VIEW AST with the following nodes: +- `DefinitionListNode`: Node representing the entire definition list +- `DefinitionItemNode`: Node representing individual term and description pairs + - `term_children`: List of inline elements for the term + - `children`: List of block elements for the description + +MarkdownRenderer checks if `term_children` contains `InlineNode(:b)` or `InlineNode(:strong)`, and if so, omits the outer bold markup. + +## Other Markdown Features + +### Line Breaks +- Soft break: Single line break is converted to space +- Hard break: Two spaces at line end insert a line break + +### HTML Blocks +Raw HTML blocks are preserved as `EmbedNode(:html)` and treated equivalently to Re:VIEW's `//embed[html]`. Inline HTML is also supported. + +## Limitations and Notes + +### File Extension + +Markdown files must use the `.md` extension to be processed properly. The Re:VIEW system automatically detects file format by extension. + +**Important:** Re:VIEW only supports the `.md` extension. The `.markdown` extension is not supported. + +### Image Paths + +Image paths must be relative paths from the project's image directory (default `images/`) or use Re:VIEW's image path conventions. + +#### Example +```markdown +![Caption](sample.png) +``` + +### Re:VIEW-Specific Features + +The following Re:VIEW features are supported within Markdown: + +#### Supported Re:VIEW Features +- `//list` (code block with caption) → Can be specified with attribute block `{#id caption="..."}` +- `//table` (table with caption) → Can be specified with attribute block `{#id caption="..."}` +- `//image` (image with caption) → Can be specified with attribute block `{#id caption="..."}` +- `//footnote` (footnote) → Supports Markdown standard `[^id]` notation +- Figure/table references (`@{id}`, `@{id}`, `@
{id}`) → Fully supported +- Column (`//column`) → Supported with HTML comment or heading notation + +#### Unsupported Re:VIEW-Specific Features +- Special block commands like `//cmd`, `//embed`, etc. +- Some inline commands (`@`, `@`, `@`, etc.) +- Complex table features (cell merging, custom column widths, etc.) + +If you need access to all Re:VIEW features, use Re:VIEW format (`.re` files). + +### Column Nesting + +When nesting columns, pay attention to heading levels. Inner columns should use higher heading levels (larger numbers) than outer columns: + +```markdown +## [column] Outer Column +Outer content + +### [column] Inner Column +Inner content + + +Back to outer column + +``` + +### HTML Comment Usage + +HTML comment `` is used as a column end marker. When using as a general comment, be careful not to write `/column`: + +```markdown + + +``` + +## Usage + +### Command-Line Tools + +#### Conversion via AST (Recommended) + +When converting Markdown files to various formats via AST, use AST-specific commands: + +```bash +# Dump Markdown to JSON-formatted AST +review-ast-dump chapter.md > chapter.json + +# Convert Markdown to Re:VIEW format +review-ast-dump2re chapter.md > chapter.re + +# Generate EPUB from Markdown (via AST) +review-ast-epubmaker config.yml + +# Generate PDF from Markdown (via AST) +review-ast-pdfmaker config.yml + +# Generate InDesign XML from Markdown (via AST) +review-ast-idgxmlmaker config.yml +``` + +#### Using review-ast-compile + +With the `review-ast-compile` command, you can directly convert Markdown to specified formats: + +```bash +# Convert Markdown to JSON-formatted AST +review-ast-compile --target=ast chapter.md + +# Convert Markdown to HTML (via AST) +review-ast-compile --target=html chapter.md + +# Convert Markdown to LaTeX (via AST) +review-ast-compile --target=latex chapter.md + +# Convert Markdown to InDesign XML (via AST) +review-ast-compile --target=idgxml chapter.md + +# Convert Markdown to Markdown (via AST, normalization/formatting) +review-ast-compile --target=markdown chapter.md +``` + +Note: Specifying `--target=ast` outputs the generated AST structure in JSON format. This is useful for debugging and checking AST structure. + +#### Converting Re:VIEW Format to Markdown Format + +You can also convert Re:VIEW format (`.re` files) to Markdown format: + +```bash +# Convert Re:VIEW file to Markdown +review-ast-compile --target=markdown chapter.re > chapter.md +``` + +This conversion allows you to output documents written in Re:VIEW in Markdown format. MarkdownRenderer outputs in the following formats: + +- Code blocks: Captions are output in `**Caption**` format, followed by fenced code blocks +- Tables: Captions are output in `**Caption**` format, followed by GFM pipe-style tables +- Images: Output in Markdown standard `![alt](path)` format +- Footnotes: Output in Markdown standard `[^id]` notation + +#### Compatibility with Traditional review-compile + +The traditional `review-compile` command can still be used, but when utilizing AST/Renderer architecture, we recommend using `review-ast-compile` and various `review-ast-*maker` commands: + +```bash +# Traditional method (kept for compatibility) +review-compile --target=html chapter.md +review-compile --target=latex chapter.md +``` + +### Project Configuration + +Configure project to use Markdown: + +```yaml +# config.yml +contentdir: src + +# CATALOG.yml +CHAPS: + - chapter1.md + - chapter2.md +``` + +### Integration with Re:VIEW Projects + +You can mix Markdown and Re:VIEW files in the same project: + +``` +project/ + ├── config.yml + ├── CATALOG.yml + └── src/ + ├── chapter1.re # Re:VIEW format + ├── chapter2.md # Markdown format + └── chapter3.re # Re:VIEW format +``` + +## Sample + +### Complete Document Example + +````markdown +# Introduction to Ruby + +Ruby is a dynamic, open source programming language with a focus on simplicity and productivity[^intro]. + +## Installation + +To install Ruby, follow these steps: + +1. Visit the [Ruby website](https://www.ruby-lang.org/en/) +2. Download the installer for your platform +3. Run the installer + +### [column] Version Management + +For managing Ruby installations, we recommend using version managers like **rbenv** or **RVM**. + + + +## Basic Syntax + +A simple Ruby program example is shown in Listing @{lst-hello}: + +```ruby {#lst-hello caption="Hello World in Ruby"} +# Hello World in Ruby +puts "Hello, World!" + +# Define a method +def greet(name) + "Hello, #{name}!" +end + +puts greet("Ruby") +``` + +### Variables + +Ruby has several variable types (see Table @
{tbl-vars}): + +| Type | Prefix | Example | +|------|--------|---------| +| Local | none | `variable` | +| Instance | `@` | `@variable` | +| Class | `@@` | `@@variable` | +| Global | `$` | `$variable` | +{#tbl-vars caption="Ruby variable types"} + +## Project Structure + +A typical Ruby project structure is shown in Figure @{fig-structure}: + +![Project structure diagram](images/ruby-structure.png) +{#fig-structure caption="Ruby project structure"} + +## Summary + +> Ruby is designed to make programmers happy. +> +> -- Yukihiro Matsumoto + +For more information, see ~~official documentation~~ [Ruby Docs](https://docs.ruby-lang.org/)[^docs]. + +--- + +Happy coding! ![Ruby logo](ruby-logo.png) + +[^intro]: Ruby was released by Yukihiro Matsumoto in 1995. + +[^docs]: The official documentation includes rich tutorials and API references. +```` + +## Conversion Details + +### AST Node Mapping + +| Markdown Element | Re:VIEW AST Node | +|------------------|------------------| +| Paragraph | `ParagraphNode` | +| Heading | `HeadlineNode` | +| Bold | `InlineNode(:b)` | +| Italic | `InlineNode(:i)` | +| Code | `InlineNode(:code)` | +| Link | `InlineNode(:href)` | +| Strikethrough | `InlineNode(:del)` | +| Bulleted list | `ListNode(:ul)` | +| Numbered list | `ListNode(:ol)` | +| List item | `ListItemNode` | +| Code block | `CodeBlockNode` | +| Code block (with attributes) | `CodeBlockNode(:list)` | +| Blockquote | `BlockNode(:quote)` | +| Table | `TableNode` | +| Table (with attributes) | `TableNode` (with ID/caption) | +| Table row | `TableRowNode` | +| Table cell | `TableCellNode` | +| Standalone image | `ImageNode` | +| Standalone image (with attributes) | `ImageNode` (with ID/caption) | +| Inline image | `InlineNode(:icon)` | +| Horizontal rule | `BlockNode(:hr)` | +| HTML block | `EmbedNode(:html)` | +| Column (HTML comment/heading) | `ColumnNode` | +| Code block line | `CodeLineNode` | +| Footnote definition `[^id]: content` | `FootnoteNode` | +| Footnote reference `[^id]` | `InlineNode(:fn)` + `ReferenceNode` | +| Figure/table reference `@{id}` | `InlineNode(type)` + `ReferenceNode` | +| Definition list (output only) | `DefinitionListNode` | +| Definition item (output only) | `DefinitionItemNode` | + +### Location Information Tracking + +All AST nodes include location information (`SnapshotLocation`) that tracks: +- Source file name +- Line number + +This enables accurate error reporting and debugging. + +### Implementation Architecture + +Markdown support consists of three main components: + +#### 1. MarkdownCompiler + +`MarkdownCompiler` is responsible for compiling entire Markdown documents to Re:VIEW AST. + +Main features: +- Initializing and configuring Markly parser +- Enabling GFM extensions (strikethrough, table, autolink) +- Enabling footnote support (Markly::FOOTNOTES) +- Re:VIEW inline notation protection (`@{id}` notation protection) +- Coordination with MarkdownAdapter +- Overseeing AST generation + +Re:VIEW notation protection: + +MarkdownCompiler protects Re:VIEW inline notation (`@{id}`) before parsing by Markly. Since Markly incorrectly interprets `@` as HTML tags, `@<` is replaced with placeholder `@@REVIEW_AT_LT@@` before parsing and restored by MarkdownAdapter. + +#### 2. MarkdownAdapter + +`MarkdownAdapter` is the adapter layer that converts Markly AST to Re:VIEW AST. + +##### ContextStack + +MarkdownAdapter has an internal `ContextStack` class that manages hierarchical context during AST construction. This unifies state management like the following and guarantees exception safety: + +- Managing nested structures like lists, tables, columns +- Exception-safe context switching with `with_context` method (automatic cleanup in `ensure` block) +- Searching for specific nodes in stack with `find_all`, `any?` methods +- Debug support with context validation (`validate!`) + +Main features: +- Traversing and converting Markly AST +- Converting each Markdown element to corresponding Re:VIEW AST node +- Unified hierarchical context management with ContextStack +- Recursive processing of inline elements (using InlineTokenizer) +- Parsing attribute blocks and extracting IDs/captions +- Processing Re:VIEW inline notation (`@{id}`) + +Features: +- Exception-safe state management with ContextStack: All contexts (lists, tables, columns, etc.) are managed in a single ContextStack, guaranteeing exception safety with automatic cleanup in `ensure` blocks +- Auto column close: Automatically closes columns with same level or higher headings. Column level is stored in ColumnNode.level attribute and can be retrieved from ContextStack +- Standalone image detection: Converts images that exist alone in paragraphs (including those with attribute blocks) to block-level `ImageNode`. Correctly recognizes even when there's a line break between image and attribute block by ignoring `softbreak`/`linebreak` nodes +- Attribute block parser: Parses `{#id caption="..."}` format attributes to extract ID and caption +- Markly footnote support: Uses Markly's native footnote feature (Markly::FOOTNOTES) to process `[^id]` and `[^id]: content` +- Inline notation processing with InlineTokenizer: Parses Re:VIEW inline notation (`@{id}`, etc.) with InlineTokenizer and converts to InlineNode and ReferenceNode + +#### 3. MarkdownHtmlNode (Internal Use) + +`MarkdownHtmlNode` is an auxiliary node for parsing HTML elements in Markdown and identifying HTML comments with special meaning (column markers, etc.). + +Main features: +- Parsing HTML comments +- Detecting column end markers (``) + +Features: +- This node is not included in the final AST, used only during conversion processing +- Calls `end_column` method when column end marker (``) is detected +- General HTML blocks are preserved as `EmbedNode(:html)` + +#### 4. MarkdownRenderer + +`MarkdownRenderer` is a renderer that outputs Re:VIEW AST in Markdown format. + +Main features: +- Traversing Re:VIEW AST and converting to Markdown format +- Output in GFM-compatible Markdown notation +- Output of captioned elements in appropriate format + +Output formats: +- Code block captions: Output in `**Caption**` format followed by fenced code block +- Table captions: Output in `**Caption**` format followed by GFM pipe-style table +- Images: Output in Markdown standard `![alt](path)` format +- Footnote references: Output in `[^id]` format +- Footnote definitions: Output in `[^id]: content` format + +Features: +- Prioritizes pure Markdown format output +- Emphasizes compatibility with GFM (GitHub Flavored Markdown) +- Does not error on unresolved references, uses ref_id as is + +### Conversion Process Flow + +1. Preprocessing: MarkdownCompiler protects Re:VIEW inline notation (`@{id}`) + - Replace `@<` → `@@REVIEW_AT_LT@@` to prevent Markly misinterpretation + +2. Parsing phase: Markly parses Markdown and generates Markly AST (CommonMark compliant) + - Enable GFM extensions (strikethrough, table, autolink) + - Enable footnote support (Markly::FOOTNOTES) + +3. Conversion phase: MarkdownAdapter traverses Markly AST and converts each element to Re:VIEW AST node + - Hierarchical context management with ContextStack + - Parse attribute blocks `{#id caption="..."}` to extract ID and caption + - Restore Re:VIEW inline notation placeholder and process with InlineTokenizer + - Convert Markly footnote nodes (`:footnote_reference`, `:footnote_definition`) to FootnoteNode and InlineNode(:fn) + +4. Post-processing phase: Properly close nested structures like columns and lists + - Automatic cleanup with ContextStack's `ensure` block + - Detect unclosed columns and report errors + +```ruby +# Conversion flow +markdown_text → Preprocessing (@< placeholderization) + ↓ + Markly.parse (GFM extensions + footnote support) + ↓ + Markly AST + ↓ + MarkdownAdapter.convert + (ContextStack management, attribute block parsing, + InlineTokenizer processing, footnote conversion) + ↓ + Re:VIEW AST +``` + +### Column Processing Details + +Columns start with heading syntax and end with HTML comments or auto-close: + +#### Column Start (Heading Syntax) +- Detected in `process_heading` method +- Extract `[column]` marker from heading text +- Save heading level to ColumnNode.level attribute and push to ContextStack + +#### Column End (Two Methods) + +1. HTML comment syntax: `` + - Detected in `process_html_block` method + - Use `MarkdownHtmlNode` to identify column end marker + - Call `end_column` method to pop from ContextStack + +2. Auto-close: Same/higher level heading + - `auto_close_columns_for_heading` method retrieves current ColumnNode from ContextStack and checks level attribute + - If new heading level is less than or equal to current column level, auto-close column + - Also automatically closes at document end (`close_all_columns`) + +Column hierarchy is managed by ContextStack, and close determination is made by level attribute. + +## Advanced Features + +### Custom Processing + +You can extend the `MarkdownAdapter` class to add custom processing: + +```ruby +class CustomMarkdownAdapter < ReVIEW::AST::MarkdownAdapter + # Override methods to customize behavior +end +``` + +### Integration with Renderers + +AST generated from Markdown works with all Re:VIEW AST Renderers: +- HTMLRenderer: Output in HTML format +- LaTeXRenderer: Output in LaTeX format (for PDF generation) +- IDGXMLRenderer: Output in InDesign XML format +- MarkdownRenderer: Output in Markdown format (normalization/formatting) +- Other custom Renderers + +By going through AST structure, documents written in Markdown are processed the same as traditional Re:VIEW format (`.re` files) and achieve the same output quality. + +#### MarkdownRenderer Output Example + +When converting Re:VIEW format to Markdown format, the output looks like this: + +Re:VIEW input example: +````review += Chapter Title + +//list[sample][Sample code][ruby]{ +def hello + puts "Hello, World!" +end +//} + +See Listing @{sample}. + +//table[data][Data table]{ +Name Age +----- +Alice 25 +Bob 30 +//} + + : API + Application Programming Interface + : @{REST} + Representational State Transfer +```` + +MarkdownRenderer output: +`````markdown +# Chapter Title + +**Sample code** + +```ruby +def hello + puts "Hello, World!" +end +``` + +See Listing @{sample}. + +**Data table** + +| Name | Age | +| :-- | :-- | +| Alice | 25 | +| Bob | 30 | + +API: Application Programming Interface + +REST: Representational State Transfer + +````` + +Notes: +- Captions are output in `**Caption**` format and placed immediately before code blocks or tables +- Definition list terms are output in bold, but if the term already contains emphasis (e.g., `@{REST}`), outer bold is omitted to avoid double bold markup +- This generates human-readable, GFM-compatible Markdown + +## Testing + +Comprehensive tests for Markdown support are provided: + +### Test Files + +- `test/ast/test_markdown_adapter.rb`: MarkdownAdapter tests +- `test/ast/test_markdown_compiler.rb`: MarkdownCompiler tests +- `test/ast/test_markdown_renderer.rb`: MarkdownRenderer tests +- `test/ast/test_markdown_renderer_fixtures.rb`: Fixture-based MarkdownRenderer tests +- `test/ast/test_renderer_builder_comparison.rb`: Renderer and Builder output comparison tests + +### Running Tests + +```bash +# Run all tests +bundle exec rake test + +# Run only Markdown-related tests +ruby test/ast/test_markdown_adapter.rb +ruby test/ast/test_markdown_compiler.rb +ruby test/ast/test_markdown_renderer.rb + +# Run fixture tests +ruby test/ast/test_markdown_renderer_fixtures.rb +``` + +### Regenerating Fixtures + +If you change MarkdownRenderer output format, you need to regenerate fixtures: + +```bash +bundle exec ruby test/fixtures/generate_markdown_fixtures.rb +``` + +This regenerates Markdown fixture files in the `test/fixtures/markdown/` directory with the latest output format. + +## References + +- [CommonMark Specification](https://commonmark.org/) +- [GitHub Flavored Markdown Specification](https://github.github.com/gfm/) +- [Markly Ruby Gem](https://github.com/gjtorikian/markly) +- [Re:VIEW Format Documentation](format.md) +- [AST Overview](ast.md) +- [AST Architecture Details](ast_architecture.md) +- [AST Node Details](ast_node.md) diff --git a/doc/ast_node.ja.md b/doc/ast_node.ja.md new file mode 100644 index 000000000..3dabdc7a0 --- /dev/null +++ b/doc/ast_node.ja.md @@ -0,0 +1,603 @@ +# Re:VIEW AST::Node 概要 + +## 概要 + +Re:VIEWのAST(Abstract Syntax Tree)は、Re:VIEW形式のテキストを構造化したノードツリーで、様々な出力形式に変換できます。 + +## 基本設計パターン + +1. Visitorパターン: ASTノードの処理にVisitorパターンを使用 +2. コンポジットパターン: 親子関係を持つノード構造 +3. ファクトリーパターン: CaptionNodeなどの作成 +4. シリアライゼーション: JSON形式でのAST保存・復元 + +## 基底クラス: `AST::Node` + +### 主要属性 +- `location`: ソースファイル内の位置情報(ファイル名、行番号) +- `parent`: 親ノード(Nodeインスタンス) +- `children`: 子ノードの配列 +- `type`: ノードタイプ(文字列) +- `id`: ID(該当する場合) +- `content`: コンテンツ(該当する場合) +- `original_text`: 元のテキスト + +### 主要メソッド +- `add_child(child)`, `remove_child(child)`, `replace_child(old_child, new_child)`, `insert_child(idx, *nodes)`: 子ノードの管理 +- `leaf_node?()`: リーフノードかどうかを判定 +- `reference_node?()`: 参照ノードかどうかを判定 +- `id?()`: IDを持つかどうかを判定 +- `add_attribute(key, value)`, `attribute?(key)`: 属性の管理 +- `visit_method_name()`: Visitorパターンで使用するメソッド名をシンボルで返す +- `to_inline_text()`: マークアップを除いたテキスト表現を返す(ブランチノードでは例外を発生、サブクラスでオーバーライド) +- `to_h`, `to_json`: 基本的なJSON形式のシリアライゼーション +- `serialize_to_hash(options)`: 拡張されたシリアライゼーション + +### 設計原則 +- ブランチノード: `LeafNode`を継承していないノードクラス全般。子ノードを持つことができる(`ParagraphNode`, `InlineNode`など) +- リーフノード: `LeafNode`を継承し、子ノードを持つことができない(`TextNode`, `ImageNode`など) +- `LeafNode`は`content`属性を持つが、サブクラスが独自の属性を定義可能 +- 同じノードで`content`と`children`を混在させない + - リーフノードも`children`を持つが、必ず空配列を返す(`nil`にはならない) + +## 基底クラス: `AST::LeafNode` + +### 概要 +- 親クラス: Node +- 用途: 子ノードを持たない終端ノードの基底クラス +- 特徴: + - `content`属性を持つ(常に文字列、デフォルトは空文字列) + - 子ノードを追加しようとするとエラーを発生 + - `leaf_node?`メソッドが`true`を返す + +### 主要メソッド +- `leaf_node?()`: 常に`true`を返す +- `children`: 常に空配列を返す +- `add_child(child)`: エラーを発生(子を持てない) +- `to_inline_text()`: `content`を返す + +### LeafNodeを継承するクラス +- `TextNode`: プレーンテキスト(およびそのサブクラス`ReferenceNode`) +- `ImageNode`: 画像(ただし`content`の代わりに`id`, `caption_node`, `metric`を持つ) +- `TexEquationNode`: LaTeX数式 +- `EmbedNode`: 埋め込みコンテンツ +- `FootnoteNode`: 脚注定義 + +## ノードクラス階層図 + +``` +AST::Node (基底クラス) +├── [ブランチノード] - 子ノードを持つことができる +│ ├── DocumentNode # ドキュメントルート +│ ├── HeadlineNode # 見出し(=, ==, ===) +│ ├── ParagraphNode # 段落テキスト +│ ├── InlineNode # インライン要素(@{}, @{}等) +│ ├── CaptionNode # キャプション(テキスト+インライン要素) +│ ├── ListNode # リスト(ul, ol, dl) +│ │ └── ListItemNode # リストアイテム +│ ├── TableNode # テーブル +│ │ ├── TableRowNode # テーブル行 +│ │ └── TableCellNode # テーブルセル +│ ├── CodeBlockNode # コードブロック +│ │ └── CodeLineNode # コード行 +│ ├── BlockNode # 汎用ブロック(//quote, //read等) +│ ├── ColumnNode # コラム(====[column]{id}) +│ └── MinicolumnNode # ミニコラム(//note, //memo等) +│ +└── LeafNode (リーフノードの基底クラス) - 子ノードを持てない + ├── TextNode # プレーンテキスト + │ └── ReferenceNode # 参照情報を持つテキストノード + ├── ImageNode # 画像(//image, //indepimage等) + ├── FootnoteNode # 脚注定義(//footnote) + ├── TexEquationNode # LaTeX数式ブロック(//texequation) + └── EmbedNode # 埋め込みコンテンツ(//embed, //raw) +``` + +### ノードの分類 + +#### 構造ノード(コンテナ) +- `DocumentNode`, `HeadlineNode`, `ParagraphNode`, `ListNode`, `TableNode`, `CodeBlockNode`, `BlockNode`, `ColumnNode`, `MinicolumnNode` + +#### コンテンツノード(リーフ) +- `TextNode`, `ReferenceNode`, `ImageNode`, `FootnoteNode`, `TexEquationNode`, `EmbedNode` + +#### 特殊ノード +- `InlineNode` (テキストを含むがインライン要素) +- `CaptionNode` (テキストとインライン要素の混合) +- `ReferenceNode` (TextNodeのサブクラス、参照情報を保持) +- `ListItemNode`, `TableRowNode`, `TableCellNode`, `CodeLineNode` (特定の親ノード専用) + +## ノードクラス詳細 + +### 1. ドキュメント構造ノード + +#### `DocumentNode` + +- 親クラス: Node +- 属性: + - `title`: ドキュメントタイトル + - `chapter`: 関連するチャプター +- 用途: ASTのルートノード、ドキュメント全体を表現 +- 例: 一つのチャプターファイル全体 +- 特徴: 通常はHeadlineNode、ParagraphNode、BlockNodeなどを子として持つ + +#### `HeadlineNode` + +- 親クラス: Node +- 属性: + - `level`: 見出しレベル(1-6) + - `label`: ラベル(オプション) + - `caption_node`: キャプション(CaptionNodeインスタンス) +- 用途: `=`, `==`, `===` 形式の見出し +- 例: + - `= Chapter Title` → level=1, caption_node=CaptionNode + - `=={label} Section Title` → level=2, label="label", caption_node=CaptionNode +- メソッド: `to_s`: デバッグ用の文字列表現 + +#### `ParagraphNode` + +- 親クラス: Node +- 用途: 通常の段落テキスト +- 特徴: 子ノードとしてTextNodeやInlineNodeを含む +- 例: 通常のテキスト段落、リスト内のテキスト + +### 2. テキストコンテンツノード + +#### `TextNode` + +- 親クラス: Node +- 属性: + - `content`: テキスト内容(文字列) +- 用途: プレーンテキストを表現 +- 特徴: リーフノード(子ノードを持たない) +- 例: 段落内の文字列、インライン要素内の文字列 + +#### `ReferenceNode` + +- 親クラス: TextNode +- 属性: + - `content`: 表示テキスト(継承) + - `ref_id`: 参照ID(主要な参照先) + - `context_id`: コンテキストID(章ID等、オプション) + - `resolved`: 参照が解決済みかどうか + - `resolved_data`: 構造化された解決済みデータ(ResolvedData) +- 用途: 参照系インライン要素(`@{}`, `@
{}`, `@{}`など)の子ノードとして使用 +- 特徴: + - TextNodeのサブクラスで、参照情報を保持 + - イミュータブル設計(参照解決時には新しいインスタンスを作成) + - 未解決時は参照IDを表示、解決後は適切な参照テキストを生成 +- 主要メソッド: + - `resolved?()`: 参照が解決済みかどうかを判定 + - `with_resolved_data(data)`: 解決済みの新しいインスタンスを返す +- 例: `@{sample-image}` → ReferenceNode(ref_id: "sample-image") + +#### `InlineNode` + +- 親クラス: Node +- 属性: + - `inline_type`: インライン要素タイプ(文字列) + - `args`: 引数配列 +- 用途: インライン要素(`@{}`, `@{}` など) +- 例: + - `@{太字}` → inline_type="b", args=["太字"] + - `@{https://example.com,リンク}` → inline_type="href", args=["https://example.com", "リンク"] +- 特徴: 子ノードとしてTextNodeを含むことが多い + +### 3. コードブロックノード + +#### `CodeBlockNode` + +- 親クラス: Node +- 属性: + - `lang`: プログラミング言語(オプション) + - `caption_node`: キャプション(CaptionNodeインスタンス) + - `line_numbers`: 行番号表示フラグ + - `code_type`: コードブロックタイプ(`:list`, `:emlist`, `:listnum` など) + - `original_text`: 元のコードテキスト +- 用途: `//list`, `//emlist`, `//listnum` などのコードブロック +- 特徴: `CodeLineNode`の子ノードを持つ +- メソッド: + - `original_lines()`: 元のテキスト行配列 + - `processed_lines()`: 処理済みテキスト行配列 + +#### `CodeLineNode` + +- 親クラス: Node +- 属性: + - `line_number`: 行番号(オプション) + - `original_text`: 元のテキスト +- 用途: コードブロック内の各行 +- 特徴: インライン要素も含むことができる(Re:VIEW記法が使用可能) +- 例: コード内の`@{強調}`のような記法 + +### 4. リストノード + +#### `ListNode` + +- 親クラス: Node +- 属性: + - `list_type`: リストタイプ(`:ul`(箇条書き), `:ol`(番号付き), `:dl`(定義リスト)) + - `olnum_start`: 番号付きリストの開始番号(オプション) +- 用途: 箇条書きリスト(`*`, `1.`, `: 定義`形式) +- 子ノード: `ListItemNode`の配列 + +#### `ListItemNode` + +- 親クラス: Node +- 属性: + - `level`: ネストレベル(1以上) + - `number`: 番号付きリストの番号(オプション) + - `item_type`: アイテムタイプ(`:ul_item`, `:ol_item`, `:dt`, `:dd`) +- 用途: リストアイテム +- 特徴: ネストしたリストや段落を子として持つことができる + +### 5. テーブルノード + +#### `TableNode` + +- 親クラス: Node +- 属性: + - `caption_node`: キャプション(CaptionNodeインスタンス) + - `table_type`: テーブルタイプ(`:table`, `:emtable`, `:imgtable`) + - `metric`: メトリック情報(幅設定など) +- 特別な構造: + - `header_rows`: ヘッダー行の配列 + - `body_rows`: ボディ行の配列 +- 用途: `//table`コマンドのテーブル +- メソッド: ヘッダーとボディの行を分けて管理 + +#### `TableRowNode` + +- 親クラス: Node +- 属性: + - `row_type`: 行タイプ(`:header`, `:body`) +- 用途: テーブルの行 +- 子ノード: `TableCellNode`の配列 + +#### `TableCellNode` + +- 親クラス: Node +- 属性: + - `cell_type`: セルタイプ(`:th`(ヘッダー)または `:td`(通常セル)) + - `colspan`, `rowspan`: セル結合情報(オプション) +- 用途: テーブルのセル +- 特徴: TextNodeやInlineNodeを子として持つ + +### 6. メディアノード + +#### `ImageNode` + +- 親クラス: Node +- 属性: + - `caption_node`: キャプション(CaptionNodeインスタンス) + - `metric`: メトリック情報(サイズ、スケール等) + - `image_type`: 画像タイプ(`:image`, `:indepimage`, `:numberlessimage`) +- 用途: `//image`, `//indepimage`コマンドの画像 +- 特徴: リーフノード +- 例: `//image[sample][キャプション][scale=0.8]` + +### 7. 特殊ブロックノード + +#### `BlockNode` + +- 親クラス: Node +- 属性: + - `block_type`: ブロックタイプ(`:quote`, `:read`, `:lead` など) + - `args`: 引数配列 + - `caption_node`: キャプション(CaptionNodeインスタンス、オプション) +- 用途: 汎用ブロックコンテナ(引用、読み込み等) +- 例: + - `//quote{ ... }` → block_type=":quote" + - `//read[ファイル名]` → block_type=":read", args=["ファイル名"] + +#### `ColumnNode` + +- 親クラス: Node +- 属性: + - `level`: コラムレベル(通常9) + - `label`: ラベル(ID)— インデックス対応完了 + - `caption_node`: キャプション(CaptionNodeインスタンス) + - `column_type`: コラムタイプ(`:column`) +- 用途: `//column`コマンドのコラム、`====[column]{id} タイトル`形式 +- 特徴: + - 見出しのような扱いだが、独立したコンテンツブロック + - `label`属性でIDを指定可能、`@{chapter|id}`で参照 + - AST::Indexerでインデックス処理される + +#### `MinicolumnNode` + +- 親クラス: Node +- 属性: + - `minicolumn_type`: ミニコラムタイプ(`:note`, `:memo`, `:tip`, `:info`, `:warning`, `:important`, `:caution` など) + - `caption_node`: キャプション(CaptionNodeインスタンス) +- 用途: `//note`, `//memo`, `//tip`などのミニコラム +- 特徴: 装飾的なボックス表示される小さなコンテンツブロック + +#### `EmbedNode` + +- 親クラス: Node +- 属性: + - `lines`: 埋め込みコンテンツの行配列 + - `arg`: 引数(単一行の場合) + - `embed_type`: 埋め込みタイプ(`:block`または`:inline`) +- 用途: 埋め込みコンテンツ(`//embed`, `//raw`など) +- 特徴: リーフノード、生のコンテンツをそのまま保持 + +#### `FootnoteNode` + +- 親クラス: Node +- 属性: + - `id`: 脚注ID + - `content`: 脚注内容 + - `footnote_type`: 脚注タイプ(`:footnote`または`:endnote`) +- 用途: `//footnote`コマンドの脚注定義 +- 特徴: + - ドキュメント内の脚注定義部分 + - AST::FootnoteIndexで統合処理(インライン参照とブロック定義) + - 重複ID問題と内容表示の改善完了 + +#### `TexEquationNode` + +- 親クラス: Node +- 属性: + - `label`: 数式ID(オプション) + - `caption_node`: キャプション(CaptionNodeインスタンス) + - `code`: LaTeX数式コード +- 用途: `//texequation`コマンドのLaTeX数式ブロック +- 特徴: + - ID付き数式への参照機能対応 + - LaTeX数式コードをそのまま保持 + - 数式インデックスで管理される + +### 8. 特殊ノード + +#### `CaptionNode` + +- 親クラス: Node +- 特殊機能: + - ファクトリーメソッド `CaptionNode.parse(caption_text, location)` + - テキストとインライン要素の解析 +- 用途: キャプションでインライン要素とテキストを含む +- メソッド: + - `to_inline_text()`: マークアップを除いたプレーンテキスト変換(子ノードを再帰的に処理) + - `contains_inline?()`: インライン要素を含むかチェック + - `empty?()`: 空かどうかのチェック +- 例: `this is @{bold} caption` → TextNode + InlineNode + TextNode +- 設計方針: + - 常に構造化されたノード(children配列)として扱われる + - JSON出力では文字列としての`caption`フィールドを出力しない + - キャプションは構造を持つべきという設計原則を徹底 + +## 処理システム + +### Visitorパターン (`Visitor`) + +- 目的: ノードごとの処理メソッドを動的に決定 +- メソッド命名規則: `visit_#{node_type}`(例:`visit_headline`, `visit_paragraph`) +- メソッド名の決定: 各ノードの`visit_method_name()`メソッドが適切なシンボルを返す +- 主要メソッド: + - `visit(node)`: ノードの`visit_method_name()`を呼び出して適切なvisitメソッドを決定し実行 + - `visit_all(nodes)`: 複数のノードを訪問して結果の配列を返す +- 例: `HeadlineNode`に対して`visit_headline(node)`が呼ばれる +- 実装の詳細: + - ノードの`visit_method_name()`がCamelCaseからsnake_caseへの変換を行う + - クラス名から`Node`サフィックスを除去して`visit_`プレフィックスを追加 + +### インデックス系システム (`Indexer`) + +- 目的: ASTノードから各種インデックスを生成 +- 対応要素: + - HeadlineNode: 見出しインデックス + - ColumnNode: コラムインデックス + - ImageNode, TableNode, ListNode: 各種図表インデックス + +### 脚注インデックス (`FootnoteIndex`) + +- 目的: AST専用の脚注管理システム +- 特徴: + - インライン参照とブロック定義の統合処理 + - 重複ID問題の解決 + - 従来のBook::FootnoteIndexとの互換性保持 + +### 6. データ構造 (`BlockData`) + +#### `BlockData` + + +- 定義: `Data.define`を使用したイミュータブルなデータ構造 +- 目的: ブロックコマンドの情報をカプセル化し、IO読み取りとブロック処理の責務を分離 +- パラメータ: + - `name` [Symbol]: ブロックコマンド名(例:`:list`, `:note`, `:table`) + - `args` [Array]: コマンドライン引数(デフォルト: `[]`) + - `lines` [Array]: ブロック内のコンテンツ行(デフォルト: `[]`) + - `nested_blocks` [Array]: ネストされたブロックコマンド(デフォルト: `[]`) + - `location` [SnapshotLocation]: エラー報告用のソース位置情報 +- 主要メソッド: + - `nested_blocks?()`: ネストされたブロックを持つかどうかを判定 + - `line_count()`: 行数を返す + - `content?()`: コンテンツ行を持つかどうかを判定 + - `arg(index)`: 指定されたインデックスの引数を安全に取得 +- 使用例: + - Compilerがブロックを読み取り、BlockDataインスタンスを作成 + - BlockProcessorがBlockDataを受け取り、適切なASTノードを生成 +- 特徴: イミュータブルな設計により、データの一貫性と予測可能性を保証 + +### 7. リスト処理アーキテクチャ + +リスト処理は複数のコンポーネントが協調して動作します。詳細は [doc/ast_list_processing.md](./ast_list_processing.md) を参照してください。 + +#### `ListParser` + +- 目的: Re:VIEW記法のリストを解析 +- 責務: + - 生テキスト行からリスト項目を抽出 + - ネストレベルの判定 + - 継続行の収集 +- データ構造: + - `ListItemData`: `Struct.new`で定義されたリスト項目データ + - `type`: 項目タイプ(`:ul_item`, `:ol_item`, `:dt`, `:dd`) + - `level`: ネストレベル(デフォルト: 1) + - `content`: 項目内容 + - `continuation_lines`: 継続行の配列(デフォルト: `[]`) + - `metadata`: メタデータハッシュ(デフォルト: `{}`) + - `with_adjusted_level(new_level)`: レベルを調整した新しいインスタンスを返す + +#### `NestedListAssembler` + +- 目的: 解析されたデータから実際のAST構造を組み立て +- 対応機能: + - 6レベルまでの深いネスト対応 + - 非対称・不規則パターンの処理 + - リストタイプの混在対応(番号付き・箇条書き・定義リスト) +- 主要メソッド: + - `build_nested_structure(items, list_type)`: ネスト構造の構築 + - `build_unordered_list(items)`: 箇条書きリストの構築 + - `build_ordered_list(items)`: 番号付きリストの構築 + +#### `ListProcessor` + +- 目的: リスト処理全体の調整 +- 責務: + - ListParserとNestedListAssemblerの協調 + - コンパイラーへの統一的なインターフェース提供 +- 内部構成: + - `@parser`: ListParserインスタンス + - `@nested_list_assembler`: NestedListAssemblerインスタンス +- 公開アクセサー: + - `parser`: ListParserへのアクセス(読み取り専用) + - `nested_list_assembler`: NestedListAssemblerへのアクセス(読み取り専用) +- 主要メソッド: + - `process_unordered_list(f)`: 箇条書きリスト処理 + - `process_ordered_list(f)`: 番号付きリスト処理 + - `process_definition_list(f)`: 定義リスト処理 + - `parse_list_items(f, list_type)`: リスト項目の解析(テスト用) + - `build_list_from_items(items, list_type)`: 項目からリストノードを構築 + +#### `ListStructureNormalizer` + +- 目的: リスト構造の正規化と整合性保証 +- 責務: + - ネストされたリスト構造の整合性チェック + - 不正なネスト構造の修正 + - 空のリストノードの除去 + +#### `ListItemNumberingProcessor` + +- 目的: 番号付きリストの番号管理 +- 責務: + - 連番の割り当て + - ネストレベルに応じた番号の管理 + - カスタム開始番号のサポート + +### 8. インライン要素レンダラー (`InlineElementRenderer`) + +- 目的: LaTeXレンダラーからインライン要素処理を分離 +- 特徴: + - 保守性とテスタビリティの向上 + - メソッド名の統一(`render_inline_xxx`形式) + - コラム参照機能の完全実装 + +### 9. JSON シリアライゼーション (`JSONSerializer`) + +- Options クラス: シリアライゼーション設定 + - `simple_mode`: 簡易モード(基本属性のみ) + - `include_location`: 位置情報を含める + - `include_original_text`: 元テキストを含める +- 主要メソッド: + - `serialize(node, options)`: ASTをJSON形式に変換 + - `deserialize(json_data)`: JSONからASTを復元 +- 用途: AST構造の保存、デバッグ、ツール連携 +- CaptionNode処理: + - JSON出力では文字列としての`caption`フィールドを出力しない + - 常に`caption_node`として構造化されたノードを出力 + - デシリアライゼーション時は後方互換性のため文字列も受け入れ可能 + +### 10. コンパイラー (`Compiler`) + +- 目的: Re:VIEWコンテンツからASTを生成 +- 連携コンポーネント: + - `InlineProcessor`: インライン要素の処理 + - `BlockProcessor`: ブロック要素の処理 + - `ListProcessor`: リスト構造の処理(ListParser、NestedListAssemblerと協調) +- パフォーマンス機能: コンパイル時間の計測とトラッキング +- 主要メソッド: `compile_to_ast(chapter)`: チャプターからASTを生成 + +## 使用例とパターン + +### 1. 基本的なAST構造例 +``` +DocumentNode +├── HeadlineNode (level=1) +│ └── caption_node: CaptionNode +│ └── TextNode (content="Chapter Title") +├── ParagraphNode +│ ├── TextNode (content="This is ") +│ ├── InlineNode (inline_type="b") +│ │ └── TextNode (content="bold") +│ └── TextNode (content=" text.") +└── CodeBlockNode (lang="ruby", code_type="list") + ├── CodeLineNode + │ └── TextNode (content="puts 'Hello'") + └── CodeLineNode + └── TextNode (content="end") +``` + +### 2. リーフノードの特徴 +以下のノードは子ノードを持たない(リーフノード): +- `TextNode`: プレーンテキスト +- `ReferenceNode`: 参照情報を持つテキスト(TextNodeのサブクラス) +- `ImageNode`: 画像参照 +- `EmbedNode`: 埋め込みコンテンツ + +### 3. 特殊な子ノード管理 +- `TableNode`: `header_rows`, `body_rows`配列で行を分類管理 +- `CodeBlockNode`: `CodeLineNode`の配列で行を管理 +- `CaptionNode`: テキストとインライン要素の混合コンテンツ +- `ListNode`: ネストしたリスト構造をサポート + +### 4. ノードの位置情報 (`SnapshotLocation`) +- すべてのノードは`location`属性でソースファイル内の位置を保持 +- デバッグやエラーレポートに使用 + +### 5. インライン要素の種類 +主要なインライン要素タイプ: +- テキスト装飾: `b`, `i`, `tt`, `u`, `strike` +- リンク: `href`, `link` +- 参照: `img`, `table`, `list`, `chap`, `hd`, `column` (コラム参照) +- 特殊: `fn` (脚注), `kw` (キーワード), `ruby` (ルビ) +- 数式: `m` (インライン数式) +- クロスチャプター参照: `@{chapter|id}` 形式 + +### 6. ブロック要素の種類 +主要なブロック要素タイプ: +- 基本: `quote`, `lead`, `flushright`, `centering` +- コード: `list`, `listnum`, `emlist`, `emlistnum`, `cmd`, `source` +- 表: `table`, `emtable`, `imgtable` +- メディア: `image`, `indepimage` +- コラム: `note`, `memo`, `tip`, `info`, `warning`, `important`, `caution` + +## 実装上の注意点 + +1. ノードの設計原則: + - ブランチノードは`Node`を継承し、子ノードを持てる + - リーフノードは`LeafNode`を継承し、子ノードを持てない + - 同じノードで`content`と`children`を混在させない + - `to_inline_text()`メソッドを適切にオーバーライドする + +2. 循環参照の回避: 親子関係の管理で循環参照が発生しないよう注意 + +3. データ・クラス構造: + - 中間表現はイミュータブルなデータクラス(`Data.define`)、ノードはミュータブルな通常クラスという使い分け + - リーフノードのサブクラスは子ノード配列を持たない、という使い分け + +4. 拡張性: 新しいノードタイプの追加が容易な構造 + - Visitorパターンによる処理の分離 + - `visit_method_name()`による動的なメソッドディスパッチ + +5. 互換性: 既存のBuilder/Compilerシステムとの互換性維持 + +6. CaptionNodeの一貫性: キャプションは常に構造化ノード(CaptionNode)として扱い、文字列として保持しない + +7. イミュータブル設計: `BlockData`などのデータ構造は`Data.define`を使用し、予測可能性と一貫性を保証 + +このASTシステムにより、Re:VIEWはテキスト形式から構造化されたデータに変換し、HTML、PDF、EPUB等の様々な出力形式に対応できるようになっています。 diff --git a/doc/ast_node.md b/doc/ast_node.md new file mode 100644 index 000000000..789de9181 --- /dev/null +++ b/doc/ast_node.md @@ -0,0 +1,602 @@ +# Re:VIEW AST::Node Overview + +## Overview + +Re:VIEW's AST (Abstract Syntax Tree) is a structured node tree of Re:VIEW format text that can be converted to various output formats. + +## Basic Design Patterns + +1. Visitor Pattern: Uses Visitor pattern for processing AST nodes +2. Composite Pattern: Node structure with parent-child relationships +3. Factory Pattern: Creation of CaptionNode, etc. +4. Serialization: Saving and restoring AST in JSON format + +## Base Class: `AST::Node` + +### Main Attributes +- `location`: Location information in source file (file name, line number) +- `parent`: Parent node (Node instance) +- `children`: Array of child nodes +- `type`: Node type (string) +- `id`: ID (if applicable) +- `content`: Content (if applicable) +- `original_text`: Original text + +### Main Methods +- `add_child(child)`, `remove_child(child)`, `replace_child(old_child, new_child)`, `insert_child(idx, *nodes)`: Child node management +- `leaf_node?()`: Determines if it's a leaf node +- `reference_node?()`: Determines if it's a reference node +- `id?()`: Determines if it has an ID +- `add_attribute(key, value)`, `attribute?(key)`: Attribute management +- `visit_method_name()`: Returns method name as symbol for use in Visitor pattern +- `to_inline_text()`: Returns text representation without markup (raises exception for branch nodes, overridden in subclasses) +- `to_h`, `to_json`: Basic JSON serialization +- `serialize_to_hash(options)`: Extended serialization + +### Design Principles +- Branch nodes: All node classes not inheriting from `LeafNode`. Can have child nodes (`ParagraphNode`, `InlineNode`, etc.) +- Leaf nodes: Inherit from `LeafNode`, cannot have child nodes (`TextNode`, `ImageNode`, etc.) +- `LeafNode` has `content` attribute, but subclasses can define their own attributes +- Do not mix `content` and `children` in the same node + - Leaf nodes also have `children`, but always return an empty array (never `nil`) + +## Base Class: `AST::LeafNode` + +### Overview +- Parent class: Node +- Purpose: Base class for terminal nodes that cannot have children +- Features: + - Has `content` attribute (always string, default is empty string) + - Raises error when attempting to add child nodes + - `leaf_node?` method returns `true` + +### Main Methods +- `leaf_node?()`: Always returns `true` +- `children`: Always returns empty array +- `add_child(child)`: Raises error (cannot have children) +- `to_inline_text()`: Returns `content` + +### Classes Inheriting from LeafNode +- `TextNode`: Plain text (and its subclass `ReferenceNode`) +- `ImageNode`: Images (but has `id`, `caption_node`, `metric` instead of `content`) +- `TexEquationNode`: LaTeX equations +- `EmbedNode`: Embedded content +- `FootnoteNode`: Footnote definitions + +## Node Class Hierarchy + +``` +AST::Node (base class) +├── [Branch nodes] - Can have child nodes +│ ├── DocumentNode # Document root +│ ├── HeadlineNode # Headings (=, ==, ===) +│ ├── ParagraphNode # Paragraph text +│ ├── InlineNode # Inline elements (@{}, @{}, etc.) +│ ├── CaptionNode # Caption (text + inline elements) +│ ├── ListNode # List (ul, ol, dl) +│ │ └── ListItemNode # List item +│ ├── TableNode # Table +│ │ ├── TableRowNode # Table row +│ │ └── TableCellNode # Table cell +│ ├── CodeBlockNode # Code block +│ │ └── CodeLineNode # Code line +│ ├── BlockNode # Generic block (//quote, //read, etc.) +│ ├── ColumnNode # Column (====[column]{id}) +│ └── MinicolumnNode # Mini-column (//note, //memo, etc.) +│ +└── LeafNode (base class for leaf nodes) - Cannot have child nodes + ├── TextNode # Plain text + │ └── ReferenceNode # Text node with reference information + ├── ImageNode # Image (//image, //indepimage, etc.) + ├── FootnoteNode # Footnote definition (//footnote) + ├── TexEquationNode # LaTeX equation block (//texequation) + └── EmbedNode # Embedded content (//embed, //raw) +``` + +### Node Classification + +#### Structure Nodes (Containers) +- `DocumentNode`, `HeadlineNode`, `ParagraphNode`, `ListNode`, `TableNode`, `CodeBlockNode`, `BlockNode`, `ColumnNode`, `MinicolumnNode` + +#### Content Nodes (Leaves) +- `TextNode`, `ReferenceNode`, `ImageNode`, `FootnoteNode`, `TexEquationNode`, `EmbedNode` + +#### Special Nodes +- `InlineNode` (contains text but is an inline element) +- `CaptionNode` (mixed text and inline elements) +- `ReferenceNode` (subclass of TextNode, holds reference information) +- `ListItemNode`, `TableRowNode`, `TableCellNode`, `CodeLineNode` (specific to certain parent nodes) + +## Node Class Details + +### 1. Document Structure Nodes + +#### `DocumentNode` + +- Parent class: Node +- Attributes: + - `title`: Document title + - `chapter`: Related chapter +- Purpose: Root node of AST, represents entire document +- Example: One entire chapter file +- Features: Usually has HeadlineNode, ParagraphNode, BlockNode, etc. as children + +#### `HeadlineNode` + +- Parent class: Node +- Attributes: + - `level`: Heading level (1-6) + - `label`: Label (optional) + - `caption_node`: Caption (CaptionNode instance) +- Purpose: `=`, `==`, `===` format headings +- Examples: + - `= Chapter Title` → level=1, caption_node=CaptionNode + - `=={label} Section Title` → level=2, label="label", caption_node=CaptionNode +- Methods: `to_s`: String representation for debugging + +#### `ParagraphNode` + +- Parent class: Node +- Purpose: Regular paragraph text +- Features: Contains TextNode and InlineNode as children +- Example: Regular text paragraph, text within lists + +### 2. Text Content Nodes + +#### `TextNode` + +- Parent class: Node +- Attributes: + - `content`: Text content (string) +- Purpose: Represents plain text +- Features: Leaf node (no children) +- Example: String in paragraph, string in inline element + +#### `ReferenceNode` + +- Parent class: TextNode +- Attributes: + - `content`: Display text (inherited) + - `ref_id`: Reference ID (main reference target) + - `context_id`: Context ID (chapter ID, etc., optional) + - `resolved`: Whether reference is resolved + - `resolved_data`: Structured resolved data (ResolvedData) +- Purpose: Used as child node of reference inline elements (`@{}`, `@
{}`, `@{}`, etc.) +- Features: + - Subclass of TextNode, holds reference information + - Immutable design (creates new instance when resolving reference) + - Displays reference ID when unresolved, generates appropriate reference text when resolved +- Main methods: + - `resolved?()`: Determines if reference is resolved + - `with_resolved_data(data)`: Returns new resolved instance +- Example: `@{sample-image}` → ReferenceNode(ref_id: "sample-image") + +#### `InlineNode` + +- Parent class: Node +- Attributes: + - `inline_type`: Inline element type (string) + - `args`: Argument array +- Purpose: Inline elements (`@{}`, `@{}`, etc.) +- Examples: + - `@{bold}` → inline_type="b", args=["bold"] + - `@{https://example.com,link}` → inline_type="href", args=["https://example.com", "link"] +- Features: Often contains TextNode as children + +### 3. Code Block Nodes + +#### `CodeBlockNode` + +- Parent class: Node +- Attributes: + - `lang`: Programming language (optional) + - `caption_node`: Caption (CaptionNode instance) + - `line_numbers`: Line number display flag + - `code_type`: Code block type (`:list`, `:emlist`, `:listnum`, etc.) + - `original_text`: Original code text +- Purpose: Code blocks like `//list`, `//emlist`, `//listnum` +- Features: Has `CodeLineNode` children +- Methods: + - `original_lines()`: Original text line array + - `processed_lines()`: Processed text line array + +#### `CodeLineNode` + +- Parent class: Node +- Attributes: + - `line_number`: Line number (optional) + - `original_text`: Original text +- Purpose: Each line in code block +- Features: Can include inline elements (Re:VIEW notation can be used) +- Example: Notation like `@{emphasis}` in code + +### 4. List Nodes + +#### `ListNode` + +- Parent class: Node +- Attributes: + - `list_type`: List type (`:ul` (bulleted), `:ol` (ordered), `:dl` (definition)) + - `olnum_start`: Starting number for ordered list (optional) +- Purpose: Bulleted lists (`*`, `1.`, `: definition` format) +- Children: Array of `ListItemNode` + +#### `ListItemNode` + +- Parent class: Node +- Attributes: + - `level`: Nesting level (1 or higher) + - `number`: Number in ordered list (optional) + - `item_type`: Item type (`:ul_item`, `:ol_item`, `:dt`, `:dd`) +- Purpose: List items +- Features: Can have nested lists and paragraphs as children + +### 5. Table Nodes + +#### `TableNode` + +- Parent class: Node +- Attributes: + - `caption_node`: Caption (CaptionNode instance) + - `table_type`: Table type (`:table`, `:emtable`, `:imgtable`) + - `metric`: Metric information (width settings, etc.) +- Special structure: + - `header_rows`: Array of header rows + - `body_rows`: Array of body rows +- Purpose: Tables from `//table` command +- Methods: Manages header and body rows separately + +#### `TableRowNode` + +- Parent class: Node +- Attributes: + - `row_type`: Row type (`:header`, `:body`) +- Purpose: Table row +- Children: Array of `TableCellNode` + +#### `TableCellNode` + +- Parent class: Node +- Attributes: + - `cell_type`: Cell type (`:th` (header) or `:td` (regular cell)) + - `colspan`, `rowspan`: Cell merge information (optional) +- Purpose: Table cell +- Features: Has TextNode and InlineNode as children + +### 6. Media Nodes + +#### `ImageNode` + +- Parent class: Node +- Attributes: + - `caption_node`: Caption (CaptionNode instance) + - `metric`: Metric information (size, scale, etc.) + - `image_type`: Image type (`:image`, `:indepimage`, `:numberlessimage`) +- Purpose: Images from `//image`, `//indepimage` commands +- Features: Leaf node +- Example: `//image[sample][Caption][scale=0.8]` + +### 7. Special Block Nodes + +#### `BlockNode` + +- Parent class: Node +- Attributes: + - `block_type`: Block type (`:quote`, `:read`, `:lead`, etc.) + - `args`: Argument array + - `caption_node`: Caption (CaptionNode instance, optional) +- Purpose: Generic block container (quotes, reads, etc.) +- Examples: + - `//quote{ ... }` → block_type=":quote" + - `//read[filename]` → block_type=":read", args=["filename"] + +#### `ColumnNode` + +- Parent class: Node +- Attributes: + - `level`: Column level (usually 9) + - `label`: Label (ID) — indexing complete + - `caption_node`: Caption (CaptionNode instance) + - `column_type`: Column type (`:column`) +- Purpose: Column from `//column` command, `====[column]{id} Title` format +- Features: + - Treated like heading but independent content block + - Can specify ID with `label` attribute, referenced with `@{chapter|id}` + - Indexed by AST::Indexer + +#### `MinicolumnNode` + +- Parent class: Node +- Attributes: + - `minicolumn_type`: Mini-column type (`:note`, `:memo`, `:tip`, `:info`, `:warning`, `:important`, `:caution`, etc.) + - `caption_node`: Caption (CaptionNode instance) +- Purpose: Mini-columns like `//note`, `//memo`, `//tip` +- Features: Small content blocks displayed in decorative boxes + +#### `EmbedNode` + +- Parent class: Node +- Attributes: + - `lines`: Array of embedded content lines + - `arg`: Argument (for single line) + - `embed_type`: Embed type (`:block` or `:inline`) +- Purpose: Embedded content (`//embed`, `//raw`, etc.) +- Features: Leaf node, preserves raw content as is + +#### `FootnoteNode` + +- Parent class: Node +- Attributes: + - `id`: Footnote ID + - `content`: Footnote content + - `footnote_type`: Footnote type (`:footnote` or `:endnote`) +- Purpose: Footnote definition from `//footnote` command +- Features: + - Footnote definition part in document + - Integrated processing with AST::FootnoteIndex (inline references and block definitions) + - Duplicate ID issue and content display improvements complete + +#### `TexEquationNode` + +- Parent class: Node +- Attributes: + - `label`: Equation ID (optional) + - `caption_node`: Caption (CaptionNode instance) + - `code`: LaTeX equation code +- Purpose: LaTeX equation block from `//texequation` command +- Features: + - Reference function for equations with ID + - Preserves LaTeX equation code as is + - Managed by equation index + +### 8. Special Nodes + +#### `CaptionNode` + +- Parent class: Node +- Special features: + - Factory method `CaptionNode.parse(caption_text, location)` + - Parsing text and inline elements +- Purpose: Contains inline elements and text in captions +- Methods: + - `to_inline_text()`: Plain text conversion without markup (recursively processes children) + - `contains_inline?()`: Checks if it contains inline elements + - `empty?()`: Checks if empty +- Example: `this is @{bold} caption` → TextNode + InlineNode + TextNode +- Design policy: + - Always treated as structured node (children array) + - Does not output string `caption` field in JSON output + - Enforces design principle that captions should have structure + +## Processing Systems + +### Visitor Pattern (`Visitor`) + +- Purpose: Dynamically determine processing method for each node +- Method naming convention: `visit_#{node_type}` (e.g., `visit_headline`, `visit_paragraph`) +- Method name determination: Each node's `visit_method_name()` method returns appropriate symbol +- Main methods: + - `visit(node)`: Calls node's `visit_method_name()` to determine and execute appropriate visit method + - `visit_all(nodes)`: Visits multiple nodes and returns array of results +- Example: `visit_headline(node)` is called for `HeadlineNode` +- Implementation details: + - Node's `visit_method_name()` converts from CamelCase to snake_case + - Removes `Node` suffix from class name and adds `visit_` prefix + +### Index Systems (`Indexer`) + +- Purpose: Generate various indexes from AST nodes +- Supported elements: + - HeadlineNode: Heading index + - ColumnNode: Column index + - ImageNode, TableNode, ListNode: Various figure/table indexes + +### Footnote Index (`FootnoteIndex`) + +- Purpose: AST-specific footnote management system +- Features: + - Integrated processing of inline references and block definitions + - Resolution of duplicate ID issues + - Maintains compatibility with traditional Book::FootnoteIndex + +### 6. Data Structures (`BlockData`) + +#### `BlockData` + +- Definition: Immutable data structure using `Data.define` +- Purpose: Encapsulates block command information, separating IO reading from block processing responsibilities +- Parameters: + - `name` [Symbol]: Block command name (e.g., `:list`, `:note`, `:table`) + - `args` [Array]: Command line arguments (default: `[]`) + - `lines` [Array]: Content lines within block (default: `[]`) + - `nested_blocks` [Array]: Nested block commands (default: `[]`) + - `location` [SnapshotLocation]: Source location information for error reporting +- Main methods: + - `nested_blocks?()`: Determines if it has nested blocks + - `line_count()`: Returns number of lines + - `content?()`: Determines if it has content lines + - `arg(index)`: Safely retrieves argument at specified index +- Usage example: + - Compiler reads blocks and creates BlockData instances + - BlockProcessor receives BlockData and generates appropriate AST nodes +- Features: Immutable design ensures data consistency and predictability + +### 7. List Processing Architecture + +List processing involves multiple components working together. See [doc/ast_list_processing.md](./ast_list_processing.md) for details. + +#### `ListParser` + +- Purpose: Parse Re:VIEW list notation +- Responsibilities: + - Extract list items from raw text lines + - Determine nesting levels + - Collect continuation lines +- Data structure: + - `ListItemData`: List item data defined with `Struct.new` + - `type`: Item type (`:ul_item`, `:ol_item`, `:dt`, `:dd`) + - `level`: Nesting level (default: 1) + - `content`: Item content + - `continuation_lines`: Array of continuation lines (default: `[]`) + - `metadata`: Metadata hash (default: `{}`) + - `with_adjusted_level(new_level)`: Returns new instance with adjusted level + +#### `NestedListAssembler` + +- Purpose: Assemble actual AST structure from parsed data +- Supported features: + - Deep nesting up to 6 levels + - Handling asymmetric and irregular patterns + - Mixed list types (ordered, unordered, definition lists) +- Main methods: + - `build_nested_structure(items, list_type)`: Build nested structure + - `build_unordered_list(items)`: Build unordered list + - `build_ordered_list(items)`: Build ordered list + +#### `ListProcessor` + +- Purpose: Coordinate entire list processing +- Responsibilities: + - Coordinate ListParser and NestedListAssembler + - Provide unified interface to compiler +- Internal components: + - `@parser`: ListParser instance + - `@nested_list_assembler`: NestedListAssembler instance +- Public accessors: + - `parser`: Access to ListParser (read-only) + - `nested_list_assembler`: Access to NestedListAssembler (read-only) +- Main methods: + - `process_unordered_list(f)`: Process unordered list + - `process_ordered_list(f)`: Process ordered list + - `process_definition_list(f)`: Process definition list + - `parse_list_items(f, list_type)`: Parse list items (for testing) + - `build_list_from_items(items, list_type)`: Build list node from items + +#### `ListStructureNormalizer` + +- Purpose: Normalize list structure and ensure consistency +- Responsibilities: + - Check consistency of nested list structures + - Fix invalid nesting structures + - Remove empty list nodes + +#### `ListItemNumberingProcessor` + +- Purpose: Manage numbers for ordered lists +- Responsibilities: + - Assign sequential numbers + - Manage numbers according to nesting level + - Support custom starting numbers + +### 8. Inline Element Renderer (`InlineElementRenderer`) + +- Purpose: Separate inline element processing from LaTeX renderer +- Features: + - Improved maintainability and testability + - Unified method naming (`render_inline_xxx` format) + - Full implementation of column reference functionality + +### 9. JSON Serialization (`JSONSerializer`) + +- Options class: Serialization settings + - `simple_mode`: Simple mode (basic attributes only) + - `include_location`: Include location information + - `include_original_text`: Include original text +- Main methods: + - `serialize(node, options)`: Convert AST to JSON format + - `deserialize(json_data)`: Restore AST from JSON +- Usage: Save AST structure, debug, tool integration +- CaptionNode processing: + - Does not output string `caption` field in JSON output + - Always outputs structured node as `caption_node` + - Can accept strings during deserialization for backward compatibility + +### 10. Compiler (`Compiler`) + +- Purpose: Generate AST from Re:VIEW content +- Coordinated components: + - `InlineProcessor`: Process inline elements + - `BlockProcessor`: Process block elements + - `ListProcessor`: Process list structures (coordinates with ListParser, NestedListAssembler) +- Performance features: Compilation time measurement and tracking +- Main methods: `compile_to_ast(chapter)`: Generate AST from chapter + +## Usage Examples and Patterns + +### 1. Basic AST Structure Example +``` +DocumentNode +├── HeadlineNode (level=1) +│ └── caption_node: CaptionNode +│ └── TextNode (content="Chapter Title") +├── ParagraphNode +│ ├── TextNode (content="This is ") +│ ├── InlineNode (inline_type="b") +│ │ └── TextNode (content="bold") +│ └── TextNode (content=" text.") +└── CodeBlockNode (lang="ruby", code_type="list") + ├── CodeLineNode + │ └── TextNode (content="puts 'Hello'") + └── CodeLineNode + └── TextNode (content="end") +``` + +### 2. Leaf Node Features +The following nodes do not have children (leaf nodes): +- `TextNode`: Plain text +- `ReferenceNode`: Text with reference information (subclass of TextNode) +- `ImageNode`: Image reference +- `EmbedNode`: Embedded content + +### 3. Special Child Node Management +- `TableNode`: Manages rows classified in `header_rows`, `body_rows` arrays +- `CodeBlockNode`: Manages lines in `CodeLineNode` array +- `CaptionNode`: Mixed content of text and inline elements +- `ListNode`: Supports nested list structure + +### 4. Node Location Information (`SnapshotLocation`) +- All nodes hold position in source file with `location` attribute +- Used for debugging and error reporting + +### 5. Inline Element Types +Main inline element types: +- Text decoration: `b`, `i`, `tt`, `u`, `strike` +- Links: `href`, `link` +- References: `img`, `table`, `list`, `chap`, `hd`, `column` (column reference) +- Special: `fn` (footnote), `kw` (keyword), `ruby` (ruby) +- Math: `m` (inline math) +- Cross-chapter references: `@{chapter|id}` format + +### 6. Block Element Types +Main block element types: +- Basic: `quote`, `lead`, `flushright`, `centering` +- Code: `list`, `listnum`, `emlist`, `emlistnum`, `cmd`, `source` +- Tables: `table`, `emtable`, `imgtable` +- Media: `image`, `indepimage` +- Columns: `note`, `memo`, `tip`, `info`, `warning`, `important`, `caution` + +## Implementation Notes + +1. Node design principles: + - Branch nodes inherit from `Node` and can have children + - Leaf nodes inherit from `LeafNode` and cannot have children + - Do not mix `content` and `children` in same node + - Override `to_inline_text()` method appropriately + +2. Avoid circular references: Be careful not to create circular references when managing parent-child relationships + +3. Data/Class structure: + - Intermediate representations use immutable data classes (`Data.define`), nodes use mutable regular classes + - Leaf node subclasses don't have child node arrays + +4. Extensibility: Structure that makes adding new node types easy + - Separation of processing through Visitor pattern + - Dynamic method dispatch through `visit_method_name()` + +5. Compatibility: Maintain compatibility with existing Builder/Compiler system + +6. CaptionNode consistency: Always treat captions as structured nodes (CaptionNode), not as strings + +7. Immutable design: Data structures like `BlockData` use `Data.define` to ensure predictability and consistency + +This AST system enables Re:VIEW to convert text format to structured data and support various output formats such as HTML, PDF, EPUB, etc. diff --git a/lib/review/ast.rb b/lib/review/ast.rb new file mode 100644 index 000000000..fadb86d55 --- /dev/null +++ b/lib/review/ast.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +# AST Node classes +require 'review/ast/node' +require 'review/ast/document_node' +require 'review/ast/headline_node' +require 'review/ast/paragraph_node' +require 'review/ast/list_node' +require 'review/ast/table_node' +require 'review/ast/table_row_node' +require 'review/ast/table_cell_node' +require 'review/ast/image_node' +require 'review/ast/code_block_node' +require 'review/ast/code_line_node' +require 'review/ast/inline_node' +require 'review/ast/text_node' +require 'review/ast/embed_node' +require 'review/ast/block_node' +require 'review/ast/column_node' +require 'review/ast/minicolumn_node' +require 'review/ast/caption_node' +require 'review/ast/markdown_html_node' +require 'review/ast/tex_equation_node' + +# AST Processing classes +require 'review/ast/compiler' +require 'review/ast/block_processor' +require 'review/ast/inline_processor' + +# AST Utility classes +require 'review/ast/exception' +require 'review/ast/json_serializer' +require 'review/ast/list_processor' +require 'review/ast/list_parser' +require 'review/ast/review_generator' + +module ReVIEW + module AST + # AST module namespace for all AST-related functionality + end +end diff --git a/lib/review/ast/block_context.rb b/lib/review/ast/block_context.rb new file mode 100644 index 000000000..4aee5321d --- /dev/null +++ b/lib/review/ast/block_context.rb @@ -0,0 +1,173 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # BlockContext - Scoped context for block processing + # + # This class provides consistent location information and processing environment + # for specific blocks (//list, //image, //table, etc.). + # + # Main features: + # - Maintain and propagate block start location + # - Node creation within context + # - Accurate location information for inline processing + # - Support for nested block processing + class BlockContext + attr_reader :start_location, :compiler, :block_data + + def initialize(block_data:, compiler:) + @block_data = block_data + @start_location = block_data.location + @compiler = compiler + end + + # Create AST node within this context + # Location information is automatically set to block start location + # + # @param node_class [Class] Node class to create + # @param attrs [Hash] Node attributes + # @return [AST::Node] Created node + def create_node(node_class, **attrs) + # Use block start location if location is not explicitly specified + attrs[:location] ||= @start_location + node_class.new(**attrs) + end + + # Create a new AST node, optionally configure it with a block, and append it to current node + # + # @return [AST::Node] The created and appended node + # + # @example + # context.append_new_node(AST::ListNode, list_type: :ul) do |list_node| + # list_node.add_child(item1) + # list_node.add_child(item2) + # end + def append_new_node(node_class, **attrs) + node = create_node(node_class, **attrs) + yield(node) if block_given? + @compiler.add_child_to_current_node(node) + node + end + + # Process inline elements within this context + # Temporarily override compiler's location information to block start location + # + # @param text [String] Text to process + # @param parent_node [AST::Node] Parent node to add inline elements to + def process_inline_elements(text, parent_node) + # Use bang method to safely override location information temporarily + @compiler.with_temporary_location!(@start_location) do + @compiler.inline_processor.parse_inline_elements(text, parent_node) + end + end + + # Process caption within this context + # Generate caption using block start location + # + # @param args [Array] Arguments array + # @param caption_index [Integer] Caption index + # @return [CaptionNode, nil] Processed caption node or nil + def process_caption(args, caption_index) + return nil unless args && caption_index && caption_index >= 0 && args.size > caption_index + + caption_text = args[caption_index] + return nil if caption_text.nil? + + @compiler.build_caption_node(caption_text, caption_location: @start_location) + end + + # Process nested blocks + # Recursively process each nested block and add to parent node + # + # @param parent_node [AST::Node] Parent node to add nested blocks to + def process_nested_blocks(parent_node) + return unless @block_data.nested_blocks? + + # Use bang method to safely override AST node context temporarily + @compiler.with_temporary_ast_node!(parent_node) do + # Process nested blocks recursively + @block_data.nested_blocks.each do |nested_block| + @compiler.block_processor.process_block_command(nested_block) + end + end + end + + # Integrated processing of structured content and nested blocks + # Properly handle both text lines and nested blocks + # + # @param parent_node [AST::Node] Parent node to add content to + def process_structured_content_with_blocks(parent_node) + # Process regular lines + if @block_data.content? + @compiler.process_structured_content(parent_node, @block_data.lines) + end + + # Process nested blocks + process_nested_blocks(parent_node) + end + + # Safely get block data arguments + # + # @param index [Integer] Argument index + # @return [String, nil] Argument value or nil + def arg(index) + @block_data.arg(index) + end + + # Check if block has content + # + # @return [Boolean] Whether content exists + def content? + @block_data.content? + end + + # Check if block has nested blocks + # + # @return [Boolean] Whether nested blocks exist + def nested_blocks? + @block_data.nested_blocks? + end + + # Get block line count + # + # @return [Integer] Line count + def line_count + @block_data.line_count + end + + # Get block content lines + # + # @return [Array] Array of content lines + def lines + @block_data.lines + end + + # Get block name + # + # @return [Symbol] Block name + def name + @block_data.name + end + + # Get block arguments + # + # @return [Array] Array of arguments + def args + @block_data.args + end + + # Debug string representation + # + # @return [String] Debug string + def inspect + "#" + end + end + end +end diff --git a/lib/review/ast/block_data.rb b/lib/review/ast/block_data.rb new file mode 100644 index 000000000..6ad44ce1b --- /dev/null +++ b/lib/review/ast/block_data.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # Block command data structure for separating IO reading from block processing + # + # This class encapsulates all information about a block command that has been + # read from input, including any nested block commands. It serves as the interface + # between Compiler (IO responsibility) and BlockProcessor (processing responsibility). + # + # @param name [Symbol] Block command name (e.g., :list, :note, :table) + # @param args [Array] Parsed arguments from the command line + # @param lines [Array] Content lines within the block + # @param nested_blocks [Array] Any nested block commands found within this block + # @param location [SnapshotLocation] Source location information for error reporting + BlockData = Struct.new(:name, :args, :lines, :nested_blocks, :location, keyword_init: true) do + def initialize(name:, location:, args: [], lines: [], nested_blocks: []) + super(name: name, args: args, lines: lines, nested_blocks: nested_blocks, location: location) + end + + def nested_blocks? + nested_blocks && nested_blocks.any? + end + + def line_count + lines.size + end + + def content? + lines.any? + end + + # Get argument at specified index safely + # + # @param index [Integer] argument index + # @return [String, nil] argument value or nil if not found + def arg(index) + return nil unless args && index && index >= 0 + + args[index] + end + + def inspect + "#<#{self.class} name=#{name} args=#{args.inspect} lines=#{line_count} nested=#{nested_blocks.size}>" + end + end + end +end diff --git a/lib/review/ast/block_node.rb b/lib/review/ast/block_node.rb new file mode 100644 index 000000000..d65548322 --- /dev/null +++ b/lib/review/ast/block_node.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +require_relative 'node' +require_relative 'caption_node' +require_relative 'captionable' + +module ReVIEW + module AST + # BlockNode - Generic block container node + # Used for various block-level constructs like quote, read, etc. + class BlockNode < Node + include Captionable + + attr_accessor :caption_node + attr_reader :block_type, :args + + def initialize(location:, block_type:, args: nil, caption_node: nil, **kwargs) + super(location: location, **kwargs) + @block_type = block_type # :quote, :read, etc. + @args = args || [] + @caption_node = caption_node + end + + def to_h + result = super.merge( + block_type: block_type + ) + result[:args] = args if args + result[:caption_node] = caption_node&.to_h if caption_node + result + end + + def self.deserialize_from_hash(hash) + block_type = hash['block_type'] ? hash['block_type'].to_sym : :quote + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + block_type: block_type, + args: hash['args'], + caption_node: deserialize_caption_from_hash(hash) + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:block_type] = block_type + hash[:args] = args if args + serialize_caption_to_hash(hash, options) + if children.any? + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + end + hash + end + end + end +end diff --git a/lib/review/ast/block_processor.rb b/lib/review/ast/block_processor.rb new file mode 100644 index 000000000..9129e5b27 --- /dev/null +++ b/lib/review/ast/block_processor.rb @@ -0,0 +1,606 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/lineinput' +require 'stringio' +require 'review/ast' +require_relative 'block_data' +require_relative 'block_processor/code_block_structure' +require_relative 'block_processor/table_processor' +require_relative 'raw_content_parser' + +module ReVIEW + module AST + # BlockProcessor - Block command processing and AST building + # + # This class handles the conversion of Re:VIEW block commands to AST nodes, + # including code blocks, images, tables, lists, quotes, and minicolumns. + # + # Responsibilities: + # - Process block commands (//list, //image, //table, etc.) + # - Build appropriate AST nodes for block elements + # - Handle block-specific parsing (table structure, list items, etc.) + # - Coordinate with inline processor for content within blocks + class BlockProcessor + # Configuration object for BlockProcessor + # This provides a clean API for configuring custom blocks and code blocks + class Configuration + def initialize(processor) + @processor = processor + end + + # Register a custom block handler + # @param command_name [Symbol] The block command name + # @param handler_method [Symbol] The handler method name + # @see BlockProcessor#register_block_handler + # @example + # config.register_block_handler(:custom_box, :build_custom_box_ast) + def register_block_handler(command_name, handler_method) + @processor.register_block_handler(command_name, handler_method) + end + + # Register a custom code block type + # @param command_name [Symbol] The code block command name + # @param config [Hash] Configuration options + # @see BlockProcessor#register_code_block_handler + # @example + # config.register_code_block(:python) + # config.register_code_block(:pythonnum, line_numbers: true) + def register_code_block(command_name, config = {}) + @processor.register_code_block_handler(command_name, config) + end + end + + @configuration_blocks = [] + + class << self + # Configure BlockProcessor with custom blocks and code blocks + # This method allows users to register custom handlers in a clean, + # declarative way without needing to override initialize. + # + # @yield [config] Configuration block + # @yieldparam config [Configuration] Configuration object + # + # @example Register custom code blocks + # ReVIEW::AST::BlockProcessor.configure do |config| + # config.register_code_block(:python) + # config.register_code_block(:pythonnum, line_numbers: true) + # end + # + # @example Register custom block handlers + # ReVIEW::AST::BlockProcessor.configure do |config| + # config.register_block_handler(:custom_box, :build_custom_box_ast) + # end + # + # @example Register both types + # ReVIEW::AST::BlockProcessor.configure do |config| + # config.register_code_block(:python) + # config.register_block_handler(:custom_box, :build_custom_box_ast) + # end + def configure(&block) + @configuration_blocks << block if block + end + + # Get all registered configuration blocks (for testing) + # @return [Array] Array of configuration blocks + def configuration_blocks + @configuration_blocks.dup + end + + # Clear all registered configuration blocks (for testing) + # @return [void] + def clear_configuration! + @configuration_blocks = [] + end + end + + def initialize(ast_compiler) + @ast_compiler = ast_compiler + @table_processor = TableProcessor.new(ast_compiler) + @dynamic_command_table = BLOCK_COMMAND_TABLE.dup + @dynamic_code_block_configs = CODE_BLOCK_CONFIGS.dup + + apply_configuration + end + + # Register a new block command handler + # @param command_name [Symbol] The block command name (e.g., :custom_block) + # @param handler_method [Symbol] The method name to handle this command + # @example + # register_block_handler(:custom_block, :build_custom_block_ast) + def register_block_handler(command_name, handler_method) + @dynamic_command_table[command_name] = handler_method + end + + # @return [Array] List of all registered command names + def registered_commands + @dynamic_command_table.keys + end + + # Register a custom code block type with its configuration + # + # @param command_name [Symbol] The code block command name (e.g., :python, :javascript) + # @param config [Hash] Configuration options + # @option config [Integer] :id_index Index of ID argument (optional, for //list-style blocks) + # @option config [Integer] :caption_index Index of caption argument (default: 0) + # @option config [Integer] :lang_index Index of language argument (default: 1) + # @option config [String] :default_lang Default language if lang_index not provided (default: command_name) + # @option config [Boolean] :line_numbers Whether to show line numbers (default: false) + # + # @example Register a simple code block + # register_code_block_handler(:python) + # # => caption_index: 0, lang_index: 1, default_lang: 'python' + # + # @example Register a code block with line numbers + # register_code_block_handler(:pythonnum, + # line_numbers: true, + # default_lang: 'python' + # ) + # + # @example Register a list-style code block with ID + # register_code_block_handler(:mylist, + # id_index: 0, + # caption_index: 1, + # lang_index: 2 + # ) + def register_code_block_handler(command_name, config = {}) + # Provide sensible defaults + default_config = { + caption_index: 0, + lang_index: 1, + default_lang: command_name.to_s + } + merged_config = default_config.merge(config) + + # Register the configuration + @dynamic_code_block_configs[command_name] = merged_config + + # Register the command handler to use build_code_block_ast + @dynamic_command_table[command_name] = :build_code_block_ast + + merged_config + end + + # Register a new code block configuration + # @param command_name [Symbol] The code block command name (e.g., :python) + # @param config [Hash] The configuration hash with keys like :id_index, :caption_index, :lang_index, :line_numbers, :default_lang + # @example + # register_code_block_config(:python, { id_index: 0, caption_index: 1, lang_index: 2 }) + def register_code_block_config(command_name, config) + @dynamic_code_block_configs[command_name] = config + end + + # @return [Hash] Hash of all registered code block configs + def registered_code_block_configs + @dynamic_code_block_configs.dup + end + + # Unified entry point - table-driven block processing + # Receives BlockData and calls corresponding method based on dynamic command table + def process_block_command(block_data) + handler_method = @dynamic_command_table[block_data.name] + + unless handler_method + location_info = block_data.location.format_for_error + raise CompileError, "Unknown block command: //#{block_data.name}#{location_info}" + end + + # Process block using Block-Scoped Compilation + @ast_compiler.with_block_context(block_data) do |context| + send(handler_method, context) + end + end + + private + + def apply_configuration + config = Configuration.new(self) + self.class.configuration_blocks.each do |block| + block.call(config) + end + end + + def build_code_block_ast(context) + config = @dynamic_code_block_configs[context.name] + unless config + raise CompileError, "Unknown code block type: #{context.name}#{context.start_location.format_for_error}" + end + + structure = CodeBlockStructure.from_context(context, config) + context.append_new_node(AST::CodeBlockNode, + id: structure.id, + caption_node: structure.caption_node, + lang: structure.lang, + line_numbers: structure.line_numbers, + code_type: structure.code_type, + original_text: structure.original_text) do |node| + if structure.content? + structure.lines.each_with_index do |line, index| + line_node = context.create_node(AST::CodeLineNode, + line_number: structure.numbered? ? index + 1 : nil, + original_text: line) + + context.process_inline_elements(line, line_node) + + node.add_child(line_node) + end + end + + context.process_nested_blocks(node) + end + end + + def build_image_ast(context) + caption_node = context.process_caption(context.args, 1) + + # Collect block content if present, otherwise use empty string + content = if context.content? + (context.lines.join("\n") + "\n").chomp + else + '' + end + + context.append_new_node(AST::ImageNode, + id: context.arg(0), + caption_node: caption_node, + metric: context.arg(2), + image_type: context.name, + content: content) + end + + def build_table_ast(context) + @table_processor.build_table_node(context) + end + + # Build list with support for both simple lines and //li blocks + def build_list_ast(context) + context.append_new_node(AST::ListNode, list_type: context.name) do |list_node| + # Process text content as simple list items + if context.content? + context.lines.each do |line| + item_node = context.create_node(AST::ListItemNode, + content: line, + level: 1) + list_node.add_child(item_node) + end + end + + # Process nested blocks (including //li blocks) + context.process_nested_blocks(list_node) + end + end + + # Build individual list item with nested content support + def build_list_item_ast(context) + # Validate that //li is inside a list block + parent_node = @ast_compiler.current_ast_node + unless parent_node.is_a?(AST::ListNode) + raise CompileError, "//li must be inside //ul, //ol, or //dl block#{context.start_location.format_for_error}" + end + + context.append_new_node(AST::ListItemNode, level: 1) do |item_node| + # Process content using the same structured content processing as other blocks + # This handles paragraphs, nested lists, and block elements naturally + if context.content? + @ast_compiler.process_structured_content(item_node, context.lines) + end + + # Process nested blocks within this item + context.process_nested_blocks(item_node) + end + end + + # Build definition items (//dt or //dd) for definition lists + def build_definition_item_ast(context) + # Validate that //dt or //dd is inside a //dl block + parent_node = @ast_compiler.current_ast_node + unless parent_node.is_a?(AST::ListNode) && parent_node.list_type == :dl + raise CompileError, "//#{context.name} must be inside //dl block#{context.start_location.format_for_error}" + end + + context.append_new_node(AST::ListItemNode, level: 1, item_type: context.name) do |item_node| + # Process content + if context.content? + @ast_compiler.process_structured_content(item_node, context.lines) + end + + # Process nested blocks + context.process_nested_blocks(item_node) + end + end + + # Build minicolumn (with nesting support) + def build_minicolumn_ast(context) + # Check for nested minicolumn - traverse up the AST to find any minicolumn ancestor + current_node = @ast_compiler.current_ast_node + while current_node + if current_node.is_a?(AST::MinicolumnNode) + @ast_compiler.error("minicolumn cannot be nested: //#{context.name}") + # Continue processing without creating the nested minicolumn + # (same as Builder pattern - log error and continue) + return + end + current_node = current_node.parent + end + + # Handle both 1-arg and 2-arg minicolumn syntax + # //note[caption]{ ... } - 1 arg: caption only + # //note[id][caption]{ ... } - 2 args: id and caption + if context.args.length >= 2 + # 2-argument form: [id][caption] + id = context.arg(0) + caption_index = 1 + else + # 1-argument form: [caption] + id = nil + caption_index = 0 + end + + caption_node = context.process_caption(context.args, caption_index) + + context.append_new_node(AST::MinicolumnNode, + minicolumn_type: context.name, + id: id, + caption_node: caption_node) do |node| + # Process structured content + context.process_structured_content_with_blocks(node) + end + end + + def build_column_ast(context) + caption_node = context.process_caption(context.args, 1) + + context.append_new_node(AST::ColumnNode, + level: 2, # Default level for block columns + label: context.arg(0), + caption_node: caption_node, + column_type: :column) do |node| + # Process structured content + context.process_structured_content_with_blocks(node) + end + end + + def build_quote_block_ast(context) + context.append_new_node(AST::BlockNode, block_type: context.name) do |node| + # Process structured content and nested blocks + if context.nested_blocks? + context.process_structured_content_with_blocks(node) + elsif context.content? + case context.name + when :quote, :lead, :blockquote, :read, :centering, :flushright, :address, :talk + @ast_compiler.process_structured_content(node, context.lines) + else + context.lines.each { |line| context.process_inline_elements(line, node) } + end + end + end + end + + def build_complex_block_ast(context) + # Determine caption index based on block type + caption_index = case context.name + when :graph + 2 # //graph[id][command][caption] + when :bibpaper + 1 # //bibpaper[id][caption] + when :doorquote, :point, :shoot, :term, :box, :insn + 0 # //doorquote[caption], //point[caption], //box[caption], etc. + end + + # Process caption if applicable + caption_node = caption_index ? context.process_caption(context.args, caption_index) : nil + + context.append_new_node(AST::BlockNode, + block_type: context.name, + args: context.args, + caption_node: caption_node) do |node| + # Process content and nested blocks + if context.nested_blocks? + context.process_structured_content_with_blocks(node) + elsif context.content? + case context.name + when :box, :insn + # Line-based processing for box/insn - preserve each line as separate node + context.lines.each do |line| + # Create a paragraph node for each line (including empty lines) + # This preserves line structure for listinfo processing + para_node = context.create_node(AST::ParagraphNode) + context.process_inline_elements(line, para_node) unless line.empty? + node.add_child(para_node) + end + when :point, :shoot, :term + # Paragraph-based processing for point/shoot/term + # Empty lines separate paragraphs + @ast_compiler.process_structured_content(node, context.lines) + else + # Default: inline processing for each line + context.lines.each do |line| + context.process_inline_elements(line, node) + end + end + end + end + end + + def build_control_command_ast(context) + context.append_new_node(AST::BlockNode, + block_type: context.name, + args: context.args) do |node| + if context.content? + context.lines.each do |line| + text_node = context.create_node(AST::TextNode, content: line) + node.add_child(text_node) + end + end + end + end + + def build_tex_equation_ast(context) + require_relative('tex_equation_node') + + # Collect all LaTeX content lines and normalize (remove trailing newline) + content = if context.content? + (context.lines.join("\n") + "\n").chomp + else + '' + end + + caption_node = context.process_caption(context.args, 1) + + context.append_new_node(AST::TexEquationNode, + id: context.arg(0), + caption_node: caption_node, + content: content) + end + + def build_raw_ast(context) + raw_content = context.arg(0) || '' + target_builders, content = RawContentParser.parse(raw_content) + + context.append_new_node(AST::EmbedNode, + embed_type: :raw, + target_builders: target_builders, + content: content || '') + end + + def build_embed_ast(context) + arg = context.arg(0) + target_builders = parse_embed_builders(arg) + lines = context.lines || [] + + context.append_new_node(AST::EmbedNode, + embed_type: :block, + target_builders: target_builders, + content: lines.join("\n")) + end + + def parse_embed_builders(arg) + return nil if arg.nil? || arg.empty? + + # Parse format like "|html,latex|" or "html,latex" + cleaned = arg.gsub(/^\s*\|/, '').gsub(/\|\s*$/, '').gsub(/\s/, '') + return nil if cleaned.empty? + + cleaned.split(',') + end + + def build_footnote_ast(context) + footnote_id = context.arg(0) + footnote_content = context.arg(1) || '' + + context.append_new_node(AST::FootnoteNode, + id: footnote_id, + footnote_type: context.name) do |node| + if footnote_content && !footnote_content.empty? + context.process_inline_elements(footnote_content, node) + end + end + end + + CODE_BLOCK_CONFIGS = { # rubocop:disable Lint/UselessConstantScoping + list: { id_index: 0, caption_index: 1, lang_index: 2 }, + listnum: { id_index: 0, caption_index: 1, lang_index: 2, line_numbers: true }, + emlist: { caption_index: 0, lang_index: 1 }, + emlistnum: { caption_index: 0, lang_index: 1, line_numbers: true }, + cmd: { caption_index: 0, default_lang: 'shell' }, + source: { caption_index: 0, lang_index: 1 } + }.freeze + + BLOCK_COMMAND_TABLE = { # rubocop:disable Lint/UselessConstantScoping + # Code blocks + list: :build_code_block_ast, + listnum: :build_code_block_ast, + emlist: :build_code_block_ast, + emlistnum: :build_code_block_ast, + cmd: :build_code_block_ast, + source: :build_code_block_ast, + + # Media blocks + image: :build_image_ast, + indepimage: :build_image_ast, + numberlessimage: :build_image_ast, + + # Table blocks + table: :build_table_ast, + emtable: :build_table_ast, + imgtable: :build_table_ast, + + # Simple list blocks (//ul, //ol, //dl commands) + ul: :build_list_ast, + ol: :build_list_ast, + dl: :build_list_ast, + + # List item blocks (//li command for use within lists) + li: :build_list_item_ast, + + # Definition list blocks (//dt and //dd for use within //dl) + dt: :build_definition_item_ast, + dd: :build_definition_item_ast, + + # Minicolumn blocks + note: :build_minicolumn_ast, + memo: :build_minicolumn_ast, + tip: :build_minicolumn_ast, + info: :build_minicolumn_ast, + warning: :build_minicolumn_ast, + important: :build_minicolumn_ast, + caution: :build_minicolumn_ast, + notice: :build_minicolumn_ast, + + # Column blocks + column: :build_column_ast, + + # Reference blocks + footnote: :build_footnote_ast, + endnote: :build_footnote_ast, + + # Embed blocks + embed: :build_embed_ast, + raw: :build_raw_ast, + + # Quote and content blocks + read: :build_quote_block_ast, + quote: :build_quote_block_ast, + blockquote: :build_quote_block_ast, + lead: :build_quote_block_ast, + centering: :build_quote_block_ast, + flushright: :build_quote_block_ast, + address: :build_quote_block_ast, + talk: :build_quote_block_ast, + + # Complex blocks + doorquote: :build_complex_block_ast, + bibpaper: :build_complex_block_ast, + graph: :build_complex_block_ast, + box: :build_complex_block_ast, + insn: :build_complex_block_ast, + point: :build_complex_block_ast, + term: :build_complex_block_ast, + + # Control commands + comment: :build_control_command_ast, + olnum: :build_control_command_ast, + blankline: :build_control_command_ast, + noindent: :build_control_command_ast, + pagebreak: :build_control_command_ast, + firstlinenum: :build_control_command_ast, + tsize: :build_control_command_ast, + label: :build_control_command_ast, + printendnotes: :build_control_command_ast, + hr: :build_control_command_ast, + bpo: :build_control_command_ast, + parasep: :build_control_command_ast, + beginchild: :build_control_command_ast, + endchild: :build_control_command_ast, + + # Math blocks + texequation: :build_tex_equation_ast + }.freeze + end + end +end diff --git a/lib/review/ast/block_processor/code_block_structure.rb b/lib/review/ast/block_processor/code_block_structure.rb new file mode 100644 index 000000000..05c9942b8 --- /dev/null +++ b/lib/review/ast/block_processor/code_block_structure.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class BlockProcessor + # Data structure representing code block structure (intermediate representation) + CodeBlockStructure = Struct.new(:id, :caption_node, :lang, :line_numbers, :code_type, :lines, :original_text, keyword_init: true) do + # @param context [BlockContext] Block context + # @param config [Hash] Code block configuration + # @return [CodeBlockStructure] Parsed code block structure + def self.from_context(context, config) + id = context.arg(config[:id_index]) + caption_node = context.process_caption(context.args, config[:caption_index]) + lang = context.arg(config[:lang_index]) || config[:default_lang] + line_numbers = config[:line_numbers] || false + lines = context.lines || [] + original_text = lines.join("\n") + + new( + id: id, + caption_node: caption_node, + lang: lang, + line_numbers: line_numbers, + code_type: context.name, + lines: lines, + original_text: original_text + ) + end + + def numbered? + line_numbers + end + + def content? + !lines.empty? + end + + def caption_text + caption_node&.to_inline_text || '' + end + end + end + end +end diff --git a/lib/review/ast/block_processor/table_processor.rb b/lib/review/ast/block_processor/table_processor.rb new file mode 100644 index 000000000..fa181f729 --- /dev/null +++ b/lib/review/ast/block_processor/table_processor.rb @@ -0,0 +1,182 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'table_structure' + +module ReVIEW + module AST + class BlockProcessor + # TableProcessor - Handles table-related block processing + # + # This class is responsible for processing table block commands + # (//table, //emtable, //imgtable) and converting them into + # proper AST structures with TableNode, TableRowNode, and TableCellNode. + class TableProcessor + def initialize(ast_compiler) + @ast_compiler = ast_compiler + end + + # @param context [BlockContext] Block context + # @return [TableNode] Created table node + def build_table_node(context) + id = if context.name == :emtable + nil + else + context.arg(0) + end + + caption_node = if context.name == :emtable + context.process_caption(context.args, 0) + else + context.process_caption(context.args, 1) + end + + attrs = { + id: id, + caption_node: caption_node, + table_type: context.name + } + attrs[:metric] = context.arg(2) if context.name == :imgtable + + context.append_new_node(AST::TableNode, **attrs) do |node| + if context.content? + process_content(node, context.lines, context.start_location) + elsif context.name != :imgtable + raise ReVIEW::CompileError, 'no rows in the table' + end + + context.process_nested_blocks(node) + end + end + + # @param table_node [TableNode] Table node to populate + # @param lines [Array] Content lines + # @param block_location [SnapshotLocation] Block start location + def process_content(table_node, lines, block_location) + structure = TableStructure.from_lines(lines) + + header_rows, body_rows = build_rows_from_structure(structure, block_location) + + adjust_columns(header_rows + body_rows) + + process_and_add_rows(table_node, header_rows, body_rows) + end + + # @param line [String] Line content + # @param is_header [Boolean] Whether all cells should be header cells + # @param first_cell_header [Boolean] Whether only first cell should be header + # @param block_location [SnapshotLocation] Block start location + # @return [TableRowNode] Created row node + def create_row(line, block_location:, is_header: false, first_cell_header: false) + cells = line.strip.split(row_separator_regexp).map { |s| s.sub(/\A\./, '') } + if cells.empty? + location_info = block_location.format_for_error + raise CompileError, "Invalid table row: empty line or no tab-separated cells#{location_info}" + end + + row_type = is_header ? :header : :body + row_node = create_node(AST::TableRowNode, row_type: row_type) + + cells.each_with_index do |cell_content, index| + cell_type = if is_header + :th + elsif first_cell_header && index == 0 # rubocop:disable Lint/DuplicateBranch + :th + else + :td + end + + cell_node = create_node(AST::TableCellNode, cell_type: cell_type) + @ast_compiler.inline_processor.parse_inline_elements(cell_content, cell_node) + row_node.add_child(cell_node) + end + + row_node + end + + private + + # @param structure [TableStructure] Table structure data + # @param block_location [SnapshotLocation] Block start location + # @return [Array, Array>] Header rows and body rows + def build_rows_from_structure(structure, block_location) + header_rows = structure.header_lines.map do |line| + create_row(line, is_header: true, block_location: block_location) + end + + body_rows = structure.body_lines.map do |line| + create_row(line, first_cell_header: structure.first_cell_header, block_location: block_location) + end + + [header_rows, body_rows] + end + + # @param table_node [TableNode] Table node to populate + # @param header_rows [Array] Header rows + # @param body_rows [Array] Body rows + def process_and_add_rows(table_node, header_rows, body_rows) + header_rows.each { |row| table_node.add_header_row(row) } + body_rows.each { |row| table_node.add_body_row(row) } + end + + # Matches the behavior of Builder#adjust_n_cols + # @param rows [Array] Rows to adjust + def adjust_columns(rows) + return if rows.empty? + + rows.each do |row| + while row.children.last && row.children.last.children.empty? + row.children.pop + end + end + + max_cols = rows.map { |row| row.children.size }.max + + rows.each do |row| + cells_needed = max_cols - row.children.size + cell_type = row.children.first&.cell_type || :td + cells_needed.times do + empty_cell = create_node(AST::TableCellNode, cell_type: cell_type) + row.add_child(empty_cell) + end + end + end + + # Matches the logic in Builder#table_row_separator_regexp + # @return [Regexp] Separator pattern + def row_separator_regexp + chapter = @ast_compiler.chapter + config = if chapter && chapter.book + chapter.book.config || {} + else + {} + end + + case config['table_row_separator'] + when 'singletab' + /\t/ + when 'spaces' + /\s+/ + when 'verticalbar' + /\s*\|\s*/ + else + # Default: 'tabs' or nil - consecutive tabs treated as one separator + /\t+/ + end + end + + # @param node_class [Class] Node class to instantiate + # @param attributes [Hash] Node attributes + # @return [Node] Created node + def create_node(node_class, **attributes) + node_class.new(location: @ast_compiler.location, **attributes) + end + end + end + end +end diff --git a/lib/review/ast/block_processor/table_structure.rb b/lib/review/ast/block_processor/table_structure.rb new file mode 100644 index 000000000..f26c22826 --- /dev/null +++ b/lib/review/ast/block_processor/table_structure.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class BlockProcessor + class TableProcessor + # Data structure representing table structure (intermediate representation) + TableStructure = Struct.new(:header_lines, :body_lines, :first_cell_header, keyword_init: true) do + # @param lines [Array] Raw table content lines + # @return [TableStructure] Parsed table structure + # @raise [ReVIEW::CompileError] If table is empty or invalid + def self.from_lines(lines) + validate_lines(lines) + separator_index = find_separator_index(lines) + + if separator_index + new( + header_lines: lines[0...separator_index], + body_lines: lines[(separator_index + 1)..-1] || [], + first_cell_header: false + ) + else + new( + header_lines: [], + body_lines: lines, + first_cell_header: true + ) + end + end + + class << self + private + + # @param lines [Array] Content lines + def validate_lines(lines) + if lines.nil? || lines.empty? + raise ReVIEW::CompileError, 'no rows in the table' + end + + separator_index = find_separator_index(lines) + + if separator_index && separator_index == 0 && lines.length == 1 + raise ReVIEW::CompileError, 'no rows in the table' + end + end + + # @param lines [Array] Content lines + # @return [Integer, nil] Separator index or nil if not found + def find_separator_index(lines) + lines.find_index { |line| line.match?(/\A[=-]{12}/) || line.match?(/\A[={}-]{12}/) } + end + end + end + end + end + end +end diff --git a/lib/review/ast/book_indexer.rb b/lib/review/ast/book_indexer.rb new file mode 100644 index 000000000..1bcbd255b --- /dev/null +++ b/lib/review/ast/book_indexer.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/book/index' +require_relative 'indexer' +require_relative 'compiler' + +module ReVIEW + module AST + # BookIndexer - Book-wide index management for AST-based processing + # + # This class provides centralized index management for entire books, + # using AST::Indexer to build indexes for each chapter and coordinating + # them into book-wide indexes for cross-chapter references. + # + # Responsibilities: + # - Use AST::Indexer to build indexes for each chapter + # - Coordinate index collection across multiple chapters + # - Provide book-wide index access for cross-chapter references + # - Maintain compatibility with existing index structures + # - Support AST-based processing pipelines + class BookIndexer + attr_reader :book, :chapter_indexers + + # Build book-wide indexes for cross-chapter references + # This is the main entry point for building indexes for an entire book + def self.build(book) + return unless book + + indexer = new(book) + indexer.build_all_chapter_indexes + indexer + end + + def initialize(book) + @book = book + end + + # Build indexes for all chapters in the book + def build_all_chapter_indexes + @book.each_chapter do |chapter| + build_chapter_index(chapter) + end + + # Build book-level indexes + build_bibpaper_index_from_bib_file + build_chapter_index_for_book + end + + # Build index for a specific chapter using AST::Indexer + def build_chapter_index(chapter) + begin + # Compile chapter to AST + ast = compile_chapter_to_ast(chapter) + + # Create indexer and build indexes + indexer = AST::Indexer.new(chapter) + indexer.build_indexes(ast) + rescue StandardError => e + warn "Failed to build index for chapter #{chapter.id}: #{e.message}" + end + end + + private + + # Compile chapter to AST using appropriate compiler + def compile_chapter_to_ast(chapter) + compiler = AST::Compiler.for_chapter(chapter) + compiler.compile_to_ast(chapter, reference_resolution: false) + end + + # Build bibpaper index from bib file if it exists + def build_bibpaper_index_from_bib_file + return unless @book.bib_exist? + + begin + # Create a Bib object with file content + bib = ReVIEW::Book::Bib.new(file_content: @book.bib_content, book: @book) + + # Compile bib file to AST + ast = compile_chapter_to_ast(bib) + + # Create indexer and build indexes + # The bibpaper_index will be set on @book via ast_indexes= in BookUnit + indexer = AST::Indexer.new(bib) + indexer.build_indexes(ast) + rescue StandardError => e + warn "Failed to build bibpaper index: #{e.message}" + end + end + + # Build chapter index for the book (chapters and parts) + # Calling chapter_index triggers lazy initialization via create_chapter_index + def build_chapter_index_for_book + @book.chapter_index + end + end + end +end diff --git a/lib/review/ast/caption_node.rb b/lib/review/ast/caption_node.rb new file mode 100644 index 000000000..5f4894a04 --- /dev/null +++ b/lib/review/ast/caption_node.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require_relative 'node' + +module ReVIEW + module AST + # Represents a caption that can contain both text and inline elements + class CaptionNode < Node + # Convert caption to inline text representation (with markup removed). + # This method extracts plain text from the caption by recursively processing + # all child nodes (text and inline elements) and joining their text content. + # + # @return [String] The plain text content without markup + def to_inline_text + children.map(&:to_inline_text).join + end + + # Check if caption contains any inline elements + def contains_inline? + children.any?(InlineNode) + end + + # Check if caption is empty + def empty? + children.empty? || children.all? { |child| child.is_a?(LeafNode) && child.content.to_s.strip.empty? } + end + + # Convert caption to hash representation + def to_h + { + type: 'CaptionNode', + location: location&.to_h, + children: children.map(&:to_h) + } + end + + # Override serialize_to_hash to return CaptionNode structure + def serialize_to_hash(options) + if children.empty? + '' + else + # Return full CaptionNode structure + super + end + end + + def self.deserialize_from_hash(hash) + node = new(location: ReVIEW::AST::JSONSerializer.restore_location(hash)) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + if child.is_a?(ReVIEW::AST::Node) + node.add_child(child) + elsif child.is_a?(String) + # Convert plain string to TextNode + node.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::AST::JSONSerializer.restore_location(hash), content: child)) + end + end + end + node + end + end + end +end diff --git a/lib/review/ast/captionable.rb b/lib/review/ast/captionable.rb new file mode 100644 index 000000000..049aa9859 --- /dev/null +++ b/lib/review/ast/captionable.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module ReVIEW + module AST + # Provides common functionality for nodes that have a caption_node attribute + # + # Classes that include this module should: + # - Have an attr_accessor :caption_node + # - Call serialize_caption_to_hash in serialize_properties + # - Call deserialize_caption_from_hash in deserialize_from_hash + module Captionable + def caption_node + @caption_node + end + + # Get caption text from caption_node + # @return [String] caption text or empty string if no caption + def caption_text + caption_node&.to_inline_text || '' + end + + # Check if this node has a caption + # @return [Boolean] true if caption_node exists + def caption? + !caption_node.nil? + end + + # Helper method to serialize caption_node to hash + # @param hash [Hash] hash to add caption_node to + # @param options [JSONSerializer::Options] serialization options + # @return [Hash] the modified hash + def serialize_caption_to_hash(hash, options) + hash[:caption_node] = caption_node&.serialize_to_hash(options) if caption_node + hash + end + + module ClassMethods + # Helper method to deserialize caption_node from hash + # @param hash [Hash] hash containing caption data + # @return [CaptionNode, nil] deserialized caption node or nil + def deserialize_caption_from_hash(hash) + _, caption_node = ReVIEW::AST::JSONSerializer.deserialize_caption_fields(hash) + caption_node + end + end + + def self.included(base) + base.extend(ClassMethods) + end + end + end +end diff --git a/lib/review/ast/code_block_node.rb b/lib/review/ast/code_block_node.rb new file mode 100644 index 000000000..70b5bdbc1 --- /dev/null +++ b/lib/review/ast/code_block_node.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true + +require_relative 'node' +require_relative 'caption_node' +require_relative 'captionable' + +module ReVIEW + module AST + class CodeBlockNode < Node + include Captionable + + attr_accessor :first_line_num + attr_reader :lang, :line_numbers, :code_type + + def initialize(location:, lang: nil, id: nil, caption_node: nil, line_numbers: false, code_type: nil, first_line_num: nil, **kwargs) + super(location: location, id: id, **kwargs) + @lang = lang + @caption_node = caption_node + @line_numbers = line_numbers + @code_type = code_type + @first_line_num = first_line_num + @children = [] + end + + attr_reader :children + + # Get original lines as array (for builders that don't need inline processing) + def original_lines + return [] unless original_text + + original_text.split("\n") + end + + # Get plain text content for syntax highlighting + # Uses original_text from each CodeLineNode + def plain_text + children.map do |line_node| + line_node.respond_to?(:original_text) ? line_node.original_text : '' + end.join("\n") + "\n" + end + + # Check if code block contains inline elements (e.g., @{}, @{}) + # When inline elements are present, syntax highlighting should be disabled + # to allow proper rendering of the inline markup + def contains_inline? + children.any? do |line_node| + next false unless line_node.respond_to?(:children) + + line_node.children.any?(AST::InlineNode) + end + end + + # Get processed lines by reconstructing from AST (for builders that need inline processing) + def processed_lines + children.map do |line_node| + line_node.children.map do |child| + case child + when AST::TextNode + child.content + when AST::InlineNode + # Reconstruct Re:VIEW syntax from original args (preserve original IDs) + content = if child.args.any? + child.args.first + elsif child.children&.any? + child.children.map do |grandchild| + grandchild.leaf_node? ? grandchild.content : grandchild.to_s + end.join + else + '' + end + "@<#{child.inline_type}>{#{content}}" + else + child.to_s + end + end.join + end + end + + def to_h + result = super.merge( + lang: lang, + caption_node: caption_node&.to_h, + line_numbers: line_numbers, + children: children.map(&:to_h) + ) + result[:code_type] = code_type if code_type + result[:first_line_num] = first_line_num if first_line_num + result[:original_text] = original_text if original_text + result + end + + def self.deserialize_from_hash(hash) + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + id: hash['id'], + caption_node: deserialize_caption_from_hash(hash), + lang: hash['lang'], + line_numbers: hash['numbered'] || hash['line_numbers'] || false, + code_type: hash['code_type'], + original_text: hash['original_text'] + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:id] = id if id && !id.empty? + hash[:lang] = lang + serialize_caption_to_hash(hash, options) + hash[:line_numbers] = line_numbers + hash[:code_type] = code_type if code_type + hash[:first_line_num] = first_line_num if first_line_num + hash[:original_text] = original_text if original_text + hash[:children] = children.map { |child| child.serialize_to_hash(options) } if children&.any? + hash + end + end + end +end diff --git a/lib/review/ast/code_line_node.rb b/lib/review/ast/code_line_node.rb new file mode 100644 index 000000000..835d2a1de --- /dev/null +++ b/lib/review/ast/code_line_node.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'node' + +module ReVIEW + module AST + # CodeLineNode - Represents a line in a code block + # + # A code line can contain text nodes and inline elements. + # Line numbers are tracked for numbered code blocks (listnum, emlistnum). + class CodeLineNode < Node + def initialize(location:, line_number: nil, original_text: '', **kwargs) + super(location: location, **kwargs) + @line_number = line_number + @original_text = original_text + @children = [] + end + + attr_reader :line_number, :original_text, :children + + def to_h + result = super + result[:line_number] = line_number + result[:original_text] = original_text + result + end + + def serialize_to_hash(options = nil) + hash = super + hash[:line_number] = line_number if line_number + hash[:original_text] = original_text + hash + end + + def self.deserialize_from_hash(hash) + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + line_number: hash['line_number'], + original_text: hash['original_text'] + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + end + end +end diff --git a/lib/review/ast/column_node.rb b/lib/review/ast/column_node.rb new file mode 100644 index 000000000..87c3204e4 --- /dev/null +++ b/lib/review/ast/column_node.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +require_relative 'node' +require_relative 'caption_node' +require_relative 'captionable' + +module ReVIEW + module AST + class ColumnNode < Node + include Captionable + + attr_accessor :auto_id, :column_number + attr_reader :level, :label, :column_type + + def initialize(location:, level: nil, label: nil, caption_node: nil, column_type: :column, auto_id: nil, column_number: nil, **kwargs) + super(location: location, **kwargs) + @level = level + @label = label + @caption_node = caption_node + @column_type = column_type + @auto_id = auto_id + @column_number = column_number + end + + def to_h + result = super.merge( + level: level, + label: label, caption_node: caption_node&.to_h, + column_type: column_type + ) + result[:auto_id] = auto_id if auto_id + result[:column_number] = column_number if column_number + result + end + + # Deserialize from hash + def self.deserialize_from_hash(hash) + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + level: hash['level'], + label: hash['label'], + caption_node: deserialize_caption_from_hash(hash), + column_type: hash['column_type']&.to_sym + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + hash[:level] = level + hash[:label] = label + serialize_caption_to_hash(hash, options) + hash[:column_type] = column_type.to_s if column_type + hash[:auto_id] = auto_id if auto_id + hash[:column_number] = column_number if column_number + hash + end + end + end +end diff --git a/lib/review/ast/command/compile.rb b/lib/review/ast/command/compile.rb new file mode 100644 index 000000000..335c37d34 --- /dev/null +++ b/lib/review/ast/command/compile.rb @@ -0,0 +1,386 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'optparse' +require 'stringio' +require 'review/book' +require 'review/ast/compiler' +require 'review/version' +require 'review/configure' +require 'review/loggable' +require 'review/logger' + +module ReVIEW + module AST + module Command + # Compile - AST-based compilation command + class Compile + include ReVIEW::Loggable + + class CompileError < StandardError; end + class FileNotFoundError < CompileError; end + class UnsupportedFormatError < CompileError; end + class MissingTargetError < CompileError; end + + # Exit status codes + EXIT_SUCCESS = 0 + EXIT_COMPILE_ERROR = 1 + EXIT_UNEXPECTED_ERROR = 2 + + attr_reader :options, :logger + + def initialize + @options = { + target: nil, + check_only: false, + verbose: false, + output_file: nil, + config_file: nil + } + @version_requested = false + @help_requested = false + + @logger = ReVIEW.logger + end + + def run(args) + parse_arguments(args) + + # --version or --help already handled + return EXIT_SUCCESS if @version_requested || @help_requested + + validate_options + compile + EXIT_SUCCESS + rescue CompileError => e + error_handler.handle(e) + EXIT_COMPILE_ERROR + rescue StandardError => e + error_handler.handle_unexpected(e) + EXIT_UNEXPECTED_ERROR + end + + private + + def parse_arguments(args) + parser = create_option_parser + parser.parse!(args) + + if args.empty? && !@help_requested && !@version_requested && !@options[:check_only] + raise CompileError, 'No input file specified. Use -h for help.' + end + + @input_file = args[0] unless args.empty? + end + + def create_option_parser + OptionParser.new do |opts| + opts.banner = 'Usage: review-ast-compile --target FORMAT ' + opts.version = ReVIEW::VERSION + + opts.on('-t', '--target FORMAT', 'Output format (html, latex, idgxml, markdown) [required unless --check]') do |fmt| + @options[:target] = fmt + end + + opts.on('-o', '--output-file FILE', 'Output file (default: stdout)') do |file| + @options[:output_file] = file + end + + opts.on('--config FILE', '--yaml FILE', 'Configuration file (config.yml)') do |file| + @options[:config_file] = file + end + + opts.on('-c', '--check', 'Check only, no output') do + @options[:check_only] = true + end + + opts.on('-v', '--verbose', 'Verbose output') do + @options[:verbose] = true + end + + opts.on_tail('--version', 'Show version') do + puts opts.version + @version_requested = true + end + + opts.on_tail('-h', '--help', 'Show this help') do + puts opts + @help_requested = true + end + end + end + + def validate_options + # --check mode doesn't require --target + return if @options[:check_only] + + # --target is required for output generation + if @options[:target].nil? + raise MissingTargetError, '--target option is required (use --target html or --target latex)' + end + end + + def compile + validate_input_file + + content = load_file(@input_file) + chapter = create_chapter(content) + ast = generate_ast(chapter) + + if @options[:check_only] + log("Syntax check passed: #{@input_file}") + else + output = render(ast, chapter) + output_content(output) + end + end + + def validate_input_file + unless @input_file + raise CompileError, 'No input file specified' + end + + unless File.exist?(@input_file) + raise FileNotFoundError, "Input file not found: #{@input_file}" + end + + unless File.readable?(@input_file) + raise CompileError, "Cannot read file: #{@input_file}" + end + end + + def load_file(path) + log("Loading: #{path}") + File.read(path) + rescue StandardError => e + raise CompileError, "Failed to read file: #{e.message}" + end + + def create_chapter(content) + config = load_configuration + + require 'review/i18n' + I18n.setup(config['language'] || 'ja') + + book_basedir = File.dirname(@input_file) + book = ReVIEW::Book::Base.new(book_basedir, config: config) + basename = File.basename(@input_file, '.*') + + chapter_number = find_chapter_number(book, basename) + + # If chapter number not found, try to extract from filename (e.g., ch03.re -> 3) + if chapter_number.nil? + chapter_number = extract_chapter_number_from_filename(basename) + end + + chapter_number ||= 1 + + chapter = ReVIEW::Book::Chapter.new( + book, + chapter_number, + basename, + @input_file, + StringIO.new(content) + ) + + require 'review/ast/book_indexer' + ReVIEW::AST::BookIndexer.build(book) + + chapter + end + + def find_chapter_number(book, basename) + return nil unless book + + catalog_file = File.join(book.basedir, 'catalog.yml') + return nil unless File.exist?(catalog_file) + + begin + require 'yaml' + catalog = YAML.safe_load_file(catalog_file) + + if catalog['CHAPS'] + catalog['CHAPS'].each_with_index do |chapter_file, index| + # Remove extension and compare basename + catalog_basename = File.basename(chapter_file, '.*') + return index + 1 if catalog_basename == basename + end + end + rescue StandardError => e + log("Warning: Could not parse catalog.yml: #{e.message}") + end + + nil + end + + def extract_chapter_number_from_filename(basename) + # Try to extract chapter number from common filename patterns + case basename + when /^ch(?:ap)?(\d+)$/i # ch01, ch1, chap01, chap1, etc. + $1.to_i + when /^chapter(\d+)$/i # rubocop:disable Lint/DuplicateBranch -- chapter01, chapter1, etc. + $1.to_i + when /^(\d+)$/ # rubocop:disable Lint/DuplicateBranch -- 01, 1, etc. + $1.to_i + else + log("Warning: Could not extract chapter number from filename '#{basename}', using fallback") + nil + end + end + + def generate_ast(chapter) + log('Generating AST...') + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + compiler.compile_to_ast(chapter) + rescue StandardError => e + raise CompileError, "AST generation failed: #{e.message}" + end + + def render(ast, chapter) + log("Rendering to #{@options[:target]}...") + + renderer_class = load_renderer(@options[:target]) + renderer = renderer_class.new(chapter) + renderer.render(ast) + rescue StandardError => e + raise CompileError, "Rendering failed: #{e.message}" + end + + def load_configuration + config_file = @options[:config_file] + + if config_file.nil? + default_config = File.join(File.dirname(@input_file), 'config.yml') + config_file = default_config if File.exist?(default_config) + end + + if config_file && File.exist?(config_file) + log("Loading configuration: #{config_file}") + begin + config = ReVIEW::Configure.create( + maker: 'ast-compile', + yamlfile: config_file + ) + rescue StandardError => e + raise CompileError, "Failed to load configuration: #{e.message}" + end + else + if @options[:config_file] + raise CompileError, "Configuration file not found: #{@options[:config_file]}" + end + + log('Using default configuration') + config = ReVIEW::Configure.values + end + + config + end + + def load_renderer(format) + case format + when 'html' + require 'review/renderer/html_renderer' + ReVIEW::Renderer::HtmlRenderer + when 'latex' + require 'review/renderer/latex_renderer' + ReVIEW::Renderer::LatexRenderer + when 'idgxml' + require 'review/renderer/idgxml_renderer' + ReVIEW::Renderer::IdgxmlRenderer + when 'markdown' + require 'review/renderer/markdown_renderer' + ReVIEW::Renderer::MarkdownRenderer + else + raise UnsupportedFormatError, "Unsupported format: #{format} (supported: html, latex, idgxml, markdown)" + end + end + + def output_content(content) + if @options[:output_file] + log("Writing to: #{@options[:output_file]}") + File.write(@options[:output_file], content) + puts "Successfully generated: #{@options[:output_file]}" + else + log('Writing to: stdout') + print content + end + rescue StandardError => e + raise CompileError, "Failed to write output: #{e.message}" + end + + def generate_output_filename + basename = File.basename(@input_file, '.*') + ext = output_extension(@options[:target]) + "#{basename}#{ext}" + end + + def output_extension(format) + case format + when 'html' + '.html' + when 'latex' + '.tex' + when 'idgxml' + '.xml' + when 'markdown' + '.md' + else + '.txt' + end + end + + def log(message) + puts message if @options[:verbose] + end + + def error_handler + @error_handler ||= ErrorHandler.new(@options[:verbose], logger: @logger) + end + + # Internal class for error handling + class ErrorHandler + include ReVIEW::Loggable + + def initialize(verbose, logger:) + @verbose = verbose + @logger = logger + end + + def handle(err) + error err.message.to_s + case err + when FileNotFoundError + error 'Please check the file path and try again.' + when UnsupportedFormatError + error 'Supported formats: html, latex, idgxml' + when MissingTargetError + error 'Example: review-ast-compile --target html chapter1.re' + end + + if @verbose && err.backtrace + error "\nBacktrace:" + error err.backtrace.take(10).join("\n") + end + end + + def handle_unexpected(err) + error "Unexpected error occurred: #{err.class}" + error err.message + + if @verbose && err.backtrace + error "\nBacktrace:" + error err.backtrace.join("\n") + else + error "\nUse --verbose for more details." + end + end + end + end + end + end +end diff --git a/lib/review/ast/command/epub_maker.rb b/lib/review/ast/command/epub_maker.rb new file mode 100644 index 000000000..49cef0196 --- /dev/null +++ b/lib/review/ast/command/epub_maker.rb @@ -0,0 +1,152 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/epubmaker' +require 'review/ast' +require 'review/renderer/html_renderer' + +module ReVIEW + module AST + module Command + # EpubMaker - EPUBMaker with AST Renderer support + # + # This class extends EPUBMaker to support both traditional Builder and new Renderer approaches. + # It automatically selects the appropriate processor based on configuration settings. + class EpubMaker < ReVIEW::EPUBMaker + def initialize + super + @processor_type = 'AST/Renderer' + end + + private + + # Override converter creation to use AST Renderer + def create_converter(book) + # Build indexes for all chapters to support cross-chapter references + # This must be done before rendering any chapter + require_relative('../book_indexer') + ReVIEW::AST::BookIndexer.build(book) + + # Create a wrapper that makes Renderer compatible with Converter interface + # Renderer will be created per chapter in the adapter + RendererConverterAdapter.new(book) + end + + # Override build_body to use AST Renderer instead of traditional Builder + # This is a complete override of the parent's build_body method, + # replacing only the converter creation part + def build_body(basetmpdir, yamlfile) + @precount = 0 + @bodycount = 0 + @postcount = 0 + + @manifeststr = '' + @ncxstr = '' + @tocdesc = [] + @img_graph = ReVIEW::ImgGraph.new(@config, 'html', path_name: '_review_graph') + + basedir = File.dirname(yamlfile) + base_path = Pathname.new(basedir) + book = ReVIEW::Book::Base.new(basedir, config: @config) + + # Use AST Renderer instead of traditional Builder + @converter = create_converter(book) + @compile_errors = nil + + book.parts.each do |part| + if part.name.present? + if part.file? + build_chap(part, base_path, basetmpdir, true) + else + htmlfile = "part_#{part.number}.#{@config['htmlext']}" + build_part(part, basetmpdir, htmlfile) + title = ReVIEW::I18n.t('part', part.number) + if part.name.strip.present? + title += ReVIEW::I18n.t('chapter_postfix') + part.name.strip + end + @htmltoc.add_item(0, htmlfile, title, chaptype: 'part') + write_buildlogtxt(basetmpdir, htmlfile, '') + end + end + + part.chapters.each do |chap| + build_chap(chap, base_path, basetmpdir, false) + end + end + check_compile_status + + begin + @img_graph.make_mermaid_images + rescue ApplicationError => e + error! e.message + end + @img_graph.cleanup_graphimg + end + end + + # Adapter to make Renderer compatible with Converter interface + class RendererConverterAdapter + def initialize(book) + @book = book + @config = book.config + @compile_errors = [] + end + + # Convert a chapter using the AST Renderer + def convert(filename, output_path) + chapter = find_chapter_or_part(filename) + return false unless chapter + + begin + # Compile chapter to AST using auto-detection for file format + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + ast_root = compiler.compile_to_ast(chapter) + + # Create renderer with current chapter + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + + # Render to HTML + html_output = renderer.render(ast_root) + + # Write output + File.write(output_path, html_output) + + true + rescue StandardError => e + @compile_errors << "#{filename}: #{e.message}" + if @config['debug'] + puts "AST Renderer Error in #{filename}: #{e.message}" + puts e.backtrace.first(5) + end + false + end + end + + # Compatibility method for error handling + attr_reader :compile_errors + + private + + def find_chapter_or_part(filename) + basename = File.basename(filename, '.*') + + chapter = @book.chapters.find { |ch| File.basename(ch.path, '.*') == basename } + return chapter if chapter + + @book.parts.each do |part| + if part.file? && File.basename(part.path, '.*') == basename + return part + end + end + + nil + end + end + end + end +end diff --git a/lib/review/ast/command/idgxml_maker.rb b/lib/review/ast/command/idgxml_maker.rb new file mode 100644 index 000000000..f79870901 --- /dev/null +++ b/lib/review/ast/command/idgxml_maker.rb @@ -0,0 +1,198 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/idgxmlmaker' +require 'review/ast' +require_relative '../book_indexer' +require 'review/renderer/idgxml_renderer' + +module ReVIEW + module AST + module Command + class IdgxmlMaker < ReVIEW::IDGXMLMaker + def initialize + super + @processor_type = 'AST/Renderer' + @renderer_adapter = nil + end + + private + + def build_body(basetmpdir, _yamlfile) + base_path = Pathname.new(@basedir) + book = @book || ReVIEW::Book::Base.new(@basedir, config: @config) + + if @config.dig('ast', 'debug') + puts "AST::Command::IdgxmlMaker: Using #{@processor_type} processor" + end + + ReVIEW::AST::BookIndexer.build(book) + + @renderer_adapter = create_converter(book) + @converter = @renderer_adapter + @compile_errors = false + + book.parts.each do |part| + if part.name.present? + if part.file? + build_chap(part, base_path, basetmpdir, true) + else + xmlfile = "part_#{part.number}.xml" + build_part(part, basetmpdir, xmlfile) + end + end + part.chapters.each do |chap| + build_chap(chap, base_path, basetmpdir, false) + end + end + + report_renderer_errors + end + + def build_chap(chap, base_path, basetmpdir, ispart) + filename = if ispart.present? + chap.path + else + Pathname.new(chap.path).relative_path_from(base_path).to_s + end + id = File.basename(filename).sub(/\.re\Z/, '') + if @buildonly && !@buildonly.include?(id) + warn "skip #{id}.re" + return + end + + xmlfile = "#{id}.xml" + output_path = File.join(basetmpdir, xmlfile) + success = @converter.convert(filename, output_path) + if success + apply_filter(output_path) + else + @compile_errors = true + end + rescue StandardError => e + @compile_errors = true + error "compile error in #{filename} (#{e.class})" + error e.message + end + + def create_converter(book) + RendererConverterAdapter.new( + book, + img_math: @img_math, + img_graph: @img_graph + ) + end + + def report_renderer_errors + return unless @renderer_adapter&.any_errors? + + @compile_errors = true + summary = @renderer_adapter.compilation_error_summary + @logger.error(summary) if summary + end + end + + class RendererConverterAdapter + attr_reader :compile_errors_list + + def initialize(book, img_math:, img_graph:) + @book = book + @img_math = img_math + @img_graph = img_graph + @config = book.config + @logger = ReVIEW.logger + @compile_errors_list = [] + end + + def convert(filename, output_path) + chapter = find_chapter(filename) + unless chapter + record_error("#{filename}: chapter not found") + return false + end + + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + ast_root = compiler.compile_to_ast(chapter) + + renderer = ReVIEW::Renderer::IdgxmlRenderer.new(chapter) + inject_shared_resources(renderer) + + xml_output = renderer.render(ast_root) + File.write(output_path, xml_output) + + true + # rescue ReVIEW::CompileError, ReVIEW::SyntaxError, ReVIEW::AST::InlineTokenizeError => e + # handle_known_error(filename, e) + # false + # rescue StandardError => e + # handle_unexpected_error(filename, e) + # false + end + + def any_errors? + !@compile_errors_list.empty? + end + + def compilation_error_summary + return nil if @compile_errors_list.empty? + + summary = ["Compilation errors occurred in #{@compile_errors_list.length} file(s):"] + @compile_errors_list.each_with_index do |error, i| + summary << " #{i + 1}. #{error}" + end + summary.join("\n") + end + + private + + def inject_shared_resources(renderer) + renderer.img_math = @img_math if @img_math + renderer.img_graph = @img_graph if @img_graph + end + + def find_chapter(filename) + basename = File.basename(filename, '.*') + + chapter = @book.chapters.find { |ch| File.basename(ch.path, '.*') == basename } + return chapter if chapter + + @book.parts_in_file.find { |part| File.basename(part.path, '.*') == basename } + end + + def handle_known_error(filename, error) + message = "#{filename}: #{error.class.name} - #{error.message}" + @compile_errors_list << message + @logger.error("Compilation error in #{filename}: #{error.message}") + if error.respond_to?(:location) && error.location + @logger.error(" at line #{error.location.lineno} in #{error.location.filename}") + end + log_backtrace(error) + end + + def handle_unexpected_error(filename, error) + message = "#{filename}: #{error.message}" + @compile_errors_list << message + @logger.error("AST Renderer Error in #{filename}: #{error.message}") + log_backtrace(error) + end + + def log_backtrace(error) + return unless @config.dig('ast', 'debug') + + @logger.debug('Backtrace:') + error.backtrace.first(10).each { |line| @logger.debug(" #{line}") } + end + + def record_error(message) + @compile_errors_list << message + @logger.error("AST Renderer Error: #{message}") + end + end + end + end +end diff --git a/lib/review/ast/command/pdf_maker.rb b/lib/review/ast/command/pdf_maker.rb new file mode 100644 index 000000000..478b52763 --- /dev/null +++ b/lib/review/ast/command/pdf_maker.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/pdfmaker' +require 'review/ast' +require 'review/renderer/latex_renderer' + +module ReVIEW + module AST + module Command + # PdfMaker - PDFMaker with AST Renderer support + # + # This class extends PDFMaker to support both traditional Builder and new Renderer approaches. + # It automatically selects the appropriate processor based on configuration settings. + class PdfMaker < ReVIEW::PDFMaker + def initialize + super + @processor_type = nil + @compile_errors_list = [] + end + + # Override check_compile_status to provide detailed error information + def check_compile_status(ignore_errors) + # Check for errors in both main class and adapter + has_errors = @compile_errors || (@renderer_adapter && @renderer_adapter.any_errors?) + return unless has_errors + + # Set the compile_errors flag for parent class compatibility + @compile_errors = true + + # Output detailed error summary + if summary = compilation_error_summary + @logger.error summary + end + + super + end + + # Provide summary of all compilation errors + def compilation_error_summary + errors = @compile_errors_list.dup + errors.concat(@renderer_adapter.compile_errors_list) if @renderer_adapter + + return nil if errors.empty? + + summary = ["Compilation errors occurred in #{errors.length} file(s):"] + errors.each_with_index do |error, i| + summary << " #{i + 1}. #{error}" + end + summary.join("\n") + end + + private + + # Override converter creation to use Renderer when appropriate + def create_converter(book) + # Create a wrapper that makes Renderer compatible with Converter interface + # Renderer will be created per chapter in the adapter + @renderer_adapter = RendererConverterAdapter.new(book) + end + + # Override the converter creation point in build_pdf + # This method replaces the direct Converter.new call in the parent class + def make_input_files(book) + # Build indexes for all chapters to support cross-chapter references + # This must be done before rendering any chapter + require_relative('../book_indexer') + ReVIEW::AST::BookIndexer.build(book) + + @converter = create_converter(book) + + super + end + end + + # Adapter to make Renderer compatible with Converter interface + class RendererConverterAdapter + attr_reader :compile_errors_list + + def initialize(book) + @book = book + @config = book.config + @compile_errors = false + @compile_errors_list = [] + @logger = ReVIEW.logger + end + + def any_errors? + @compile_errors || !@compile_errors_list.empty? + end + + # Convert a chapter using the AST Renderer + def convert(filename, output_path) + chapter = find_chapter(filename) + return false unless chapter + + begin + # AST environment uses AST::Indexer for indexing during rendering + # No need to call generate_indexes - AST::Indexer handles it in visit_document + + # Compile chapter to AST using auto-detection for file format + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + ast_root = compiler.compile_to_ast(chapter) + + # Create renderer with current chapter + renderer = ReVIEW::Renderer::LatexRenderer.new(chapter) + + # Render to LaTeX (AST::Indexer will handle indexing during this process) + latex_output = renderer.render(ast_root) + + # Write output + File.write(output_path, latex_output) + + true + rescue ReVIEW::CompileError, ReVIEW::SyntaxError, ReVIEW::AST::InlineTokenizeError => e + # These are known ReVIEW compilation errors - handle them specifically + error_message = "#{filename}: #{e.class.name} - #{e.message}" + @compile_errors_list << error_message + @compile_errors = true + + @logger.error "Compilation error in #{filename}: #{e.message}" + + # Show location information if available + if e.respond_to?(:location) && e.location + @logger.error " at line #{e.location.lineno} in #{e.location.filename}" + end + + # Show backtrace in debug mode + if @config['debug'] + @logger.debug('Backtrace:') + e.backtrace.first(10).each { |line| @logger.debug(" #{line}") } + end + + false + rescue StandardError => e + error_message = "#{filename}: #{e.message}" + @compile_errors_list << error_message + @compile_errors = true # Set flag for parent class compatibility + + # Always output error to user, not just in debug mode + @logger.error "AST Renderer Error in #{filename}: #{e.message}" + + # Show backtrace in debug mode + if @config['debug'] + @logger.debug('Backtrace:') + e.backtrace.first(10).each { |line| @logger.debug(" #{line}") } + end + + false + end + end + + private + + # Find chapter or part object by filename + def find_chapter(filename) + basename = File.basename(filename, '.*') + + # First check chapters + chapter = @book.chapters.find { |ch| File.basename(ch.path, '.*') == basename } + return chapter if chapter + + # Then check parts with content files + @book.parts_in_file.find { |part| File.basename(part.path, '.*') == basename } + end + end + end + end +end diff --git a/lib/review/ast/command/text_maker.rb b/lib/review/ast/command/text_maker.rb new file mode 100644 index 000000000..2789eb505 --- /dev/null +++ b/lib/review/ast/command/text_maker.rb @@ -0,0 +1,191 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/textmaker' +require 'review/ast' +require 'review/renderer/top_renderer' +require 'review/renderer/plaintext_renderer' + +module ReVIEW + module AST + module Command + # TextMaker - TEXTMaker with AST Renderer support + # + # This class extends TEXTMaker to support both traditional Builder and new Renderer approaches. + # It automatically selects the appropriate processor based on configuration settings. + class TextMaker < ReVIEW::TEXTMaker + def initialize + super + @processor_type = nil + @compile_errors_list = [] + end + + # Override check_compile_status to provide detailed error information + def check_compile_status(ignore_errors) + # Check for errors in both main class and adapter + has_errors = @compile_errors || (@renderer_adapter && @renderer_adapter.any_errors?) + return unless has_errors + + # Set the compile_errors flag for parent class compatibility + @compile_errors = true + + # Output detailed error summary + if summary = compilation_error_summary + @logger.error summary + end + + super if defined?(super) + end + + # Provide summary of all compilation errors + def compilation_error_summary + errors = @compile_errors_list.dup + errors.concat(@renderer_adapter.compile_errors_list) if @renderer_adapter + + return nil if errors.empty? + + summary = ["Compilation errors occurred in #{errors.length} file(s):"] + errors.each_with_index do |error, i| + summary << " #{i + 1}. #{error}" + end + summary.join("\n") + end + + private + + # Override build_body to use Renderer + def build_body(basetmpdir, _yamlfile) + # Build indexes for all chapters to support cross-chapter references + # This must be done before rendering any chapter + require_relative('../book_indexer') + ReVIEW::AST::BookIndexer.build(@book) + + @converter = create_converter(@book) + + base_path = Pathname.new(@basedir) + @book.parts.each do |part| + if part.name.present? + if part.file? + build_chap(part, base_path, basetmpdir, true) + else + textfile = "part_#{part.number}.txt" + build_part(part, basetmpdir, textfile) + end + end + + part.chapters.each { |chap| build_chap(chap, base_path, basetmpdir, false) } + end + end + + # Create a converter that uses Renderer + def create_converter(book) + @renderer_adapter = RendererConverterAdapter.new(book, @plaintext) + end + end + + # Adapter to make Renderer compatible with Converter interface + class RendererConverterAdapter + attr_reader :compile_errors_list + + def initialize(book, plaintext) + @book = book + @config = book.config + @plaintext = plaintext + @compile_errors = false + @compile_errors_list = [] + @logger = ReVIEW.logger + end + + def any_errors? + @compile_errors || !@compile_errors_list.empty? + end + + # Convert a chapter using the AST Renderer + def convert(filename, output_path) + chapter = find_chapter(filename) + return false unless chapter + + begin + # Compile chapter to AST using auto-detection for file format + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + ast_root = compiler.compile_to_ast(chapter) + + # Create renderer with current chapter + renderer = if @plaintext + ReVIEW::Renderer::PlaintextRenderer.new(chapter) + else + ReVIEW::Renderer::TopRenderer.new(chapter) + end + + # Render to text + text_output = renderer.render(ast_root) + + # Write output + File.write(output_path, text_output) + + true + rescue ReVIEW::CompileError, ReVIEW::SyntaxError, ReVIEW::AST::InlineTokenizeError => e + # These are known ReVIEW compilation errors - handle them specifically + error_message = "#{filename}: #{e.class.name} - #{e.message}" + @compile_errors_list << error_message + @compile_errors = true + + @logger.error "Compilation error in #{filename}: #{e.message}" + + # Show location information if available + if e.respond_to?(:location) && e.location + @logger.error " at line #{e.location.lineno} in #{e.location.filename}" + end + + # Show backtrace in debug mode + if @config['debug'] + @logger.debug('Backtrace:') + e.backtrace.first(10).each { |line| @logger.debug(" #{line}") } + end + + false + rescue StandardError => e + error_message = "#{filename}: #{e.message}" + @compile_errors_list << error_message + @compile_errors = true + + # Always output error to user, not just in debug mode + @logger.error "AST Renderer Error in #{filename}: #{e.message}" + + # Show first backtrace line to help identify the issue + if e.backtrace && !e.backtrace.empty? + @logger.error " at #{e.backtrace.first}" + end + + # Show full backtrace in debug mode + if @config['debug'] + @logger.error('Full Backtrace:') + e.backtrace.first(20).each { |line| @logger.error(" #{line}") } + end + + false + end + end + + private + + # Find chapter or part object by filename + def find_chapter(filename) + basename = File.basename(filename, '.*') + + # First check chapters + chapter = @book.chapters.find { |ch| File.basename(ch.path, '.*') == basename } + return chapter if chapter + + # Then check parts with content files + @book.parts_in_file.find { |part| File.basename(part.path, '.*') == basename } + end + end + end + end +end diff --git a/lib/review/ast/compiler.rb b/lib/review/ast/compiler.rb new file mode 100644 index 000000000..d159b20db --- /dev/null +++ b/lib/review/ast/compiler.rb @@ -0,0 +1,546 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast' +require 'review/exception' +require 'review/loggable' +require 'review/lineinput' +require 'review/snapshot_location' +require_relative 'inline_processor' +require_relative 'block_processor' +require_relative 'block_data' +require_relative 'block_context' +require_relative 'compiler/block_reader' +require_relative 'list_processor' +require_relative 'footnote_node' +require_relative 'reference_resolver' +require_relative 'compiler/tsize_processor' +require_relative 'compiler/firstlinenum_processor' +require_relative 'compiler/noindent_processor' +require_relative 'compiler/olnum_processor' +require_relative 'compiler/list_structure_normalizer' +require_relative 'compiler/list_item_numbering_processor' +require_relative 'compiler/auto_id_processor' +require_relative 'headline_parser' + +module ReVIEW + module AST + # Compiler - Core AST compilation logic and coordination + # + # This class handles the main AST compilation flow, coordinating between + # inline and block processors to build complete AST structures from Re:VIEW content. + # + # Responsibilities: + # - Main AST compilation coordination + # - Headline and paragraph AST building + # - AST mode management and rendering coordination + # - Document structure management + class Compiler + MAX_HEADLINE_LEVEL = 6 + + # Factory method to create appropriate compiler based on file format + def self.for_chapter(chapter) + filename = chapter.respond_to?(:filename) ? chapter.filename : chapter.basename + + # Check file extension for format detection + if filename&.end_with?('.md') + require_relative('markdown_compiler') + MarkdownCompiler.new + else + # Default to Re:VIEW format + new + end + end + include Loggable + + def initialize + # AST related + @ast_root = nil + @current_ast_node = nil + + # Location tracking - initialize with default location + @current_location = SnapshotLocation.new(nil, 0) + + # Processors for specialized AST handling + @inline_processor = InlineProcessor.new(self) + @block_processor = BlockProcessor.new(self) + @list_processor = ListProcessor.new(self) + + @logger = ReVIEW.logger + + # Get config for debug output + @config = {} + + # Error accumulation flag (similar to HTMLBuilder's Compiler) + @compile_errors = false + + # Commands that preserve content as-is (matching ReVIEW::Compiler behavior) + @non_parsed_commands = %i[embed texequation graph] + + @post_processors = [ + # Post-process AST for tsize commands (must be before other processors) + 'ReVIEW::AST::Compiler::TsizeProcessor', + + # Post-process AST for firstlinenum commands + 'ReVIEW::AST::Compiler::FirstLineNumProcessor', + + # Post-process AST for noindent and olnum commands + 'ReVIEW::AST::Compiler::NoindentProcessor', + 'ReVIEW::AST::Compiler::OlnumProcessor', + + # Normalize list structures (process //beginchild and //endchild) + 'ReVIEW::AST::Compiler::ListStructureNormalizer', + + # Assign item numbers to ordered list items + 'ReVIEW::AST::Compiler::ListItemNumberingProcessor', + + # Generate auto_id for HeadlineNode (nonum/notoc/nodisp) and ColumnNode + 'ReVIEW::AST::Compiler::AutoIdProcessor' + ] + end + + attr_reader :ast_root, :current_ast_node, :chapter, :inline_processor, :block_processor, :list_processor + + def compile_to_ast(chapter, reference_resolution: true) + @chapter = chapter + # Create AST root with appropriate location + # For test compatibility, use a special calculation for line numbers + f = LineInput.from_string(@chapter.content) + + @ast_root = AST::DocumentNode.new( + location: SnapshotLocation.new(@chapter.basename, f.lineno + 1) + ) + @current_ast_node = @ast_root + + build_ast_from_chapter + + # Resolve references after AST building but before post-processing + # Skip if explicitly requested (e.g., during index building) + if reference_resolution + resolve_references + end + + execute_post_processes + + # Check for accumulated errors (similar to HTMLBuilder's Compiler) + if @compile_errors + raise CompileError, "#{chapter.basename} cannot be compiled." + end + + # Return the compiled AST + @ast_root + end + + def build_caption_node(raw_caption_text, caption_location:) + return nil if raw_caption_text.nil? || raw_caption_text.empty? + + caption_node = AST::CaptionNode.new(location: caption_location) + + begin + with_temporary_location!(caption_location) do + inline_processor.parse_inline_elements(raw_caption_text, caption_node) + end + rescue StandardError => e + raise CompileError, "Error processing caption '#{raw_caption_text}': #{e.message}#{caption_location.format_for_error}" + end + + caption_node + end + + # Helper methods that need to be accessible from processors + def location + @current_location + end + + # Force override current location - FOR TESTING ONLY + # This method bypasses normal location tracking and should only be used in tests + # @param location [SnapshotLocation] The location to force set + def force_override_location!(location) + @current_location = location + end + + # Update current location based on file input position + # @param file_input [LineInput] The file input object + def update_current_location(file_input) + @current_location = SnapshotLocation.new(@chapter.basename, file_input.lineno) + end + + # Override error method to accumulate errors (similar to HTMLBuilder's Compiler) + def error(msg, location: nil) + @compile_errors = true + super + end + + def add_child_to_current_node(node) + @current_ast_node.add_child(node) + end + + # Block-Scoped Compilation Support + + # Execute block processing in dedicated context + # Maintain block start location information and perform AST construction with consistent location information + # + # @param block_data [BlockData] Block data to process + # @yield [BlockContext] Block processing context + # @return [Object] Processing result within block + def with_block_context(block_data) + context = BlockContext.new(block_data: block_data, compiler: self) + + yield(context) + end + + # Temporarily override location information and execute block + # Automatically restore original location information after block execution + # + # @param new_location [SnapshotLocation] Location information to set temporarily + # @yield New location information is effective during block execution + # @return [Object] Block execution result + def with_temporary_location!(new_location) + old_location = @current_location + @current_location = new_location + begin + yield + ensure + @current_location = old_location + end + end + + # Temporarily override AST node and execute block + # Automatically restore original AST node after block execution + # + # @param new_node [AST::Node] AST node to set temporarily + # @yield New AST node is effective during block execution + # @return [Object] Block execution result + def with_temporary_ast_node!(new_node) + old_node = @current_ast_node + @current_ast_node = new_node + begin + yield + ensure + @current_ast_node = old_node + end + end + + # Universal block content processing method for HTML Builder compatibility + # This method processes structured content within block elements using the same + # parsing logic as regular document processing, ensuring consistent behavior + def process_structured_content(parent_node, lines) + return unless lines && lines.any? + + # Create StringIO from lines to simulate file input for line processing + content = lines.join("\n") + "\n" + line_input = ReVIEW::LineInput.from_string(content) + + # Save current node context + saved_current_node = @current_ast_node + saved_location = @current_location + + # Set parent as current node for child processing + @current_ast_node = parent_node + + # Process lines using the same logic as main document processing + while line_input.next? + # Create location that reflects position within the block + @current_location = SnapshotLocation.new(@chapter&.basename || 'block', line_input.lineno) + line_content = line_input.peek + + case line_content + when /\A\s*\z/ # blank line + line_input.gets # consume blank line but don't create node + when /\A\s+\*+\s/ # unordered list (must start with space, supports nesting with **) + compile_ul_to_ast(line_input) + when /\A\s+\d+\.\s/ # ordered list (must start with space) + compile_ol_to_ast(line_input) + when /\A\s+:\s/ # definition list (must start with space) + compile_dl_to_ast(line_input) + else + # Regular paragraph content + compile_paragraph_to_ast(line_input) + end + end + + # Restore context + @current_ast_node = saved_current_node + @current_location = saved_location + end + + # IO reading dedicated method - nesting support and error handling + def read_block_command(f, initial_line = nil) + # Save location information at block start + block_start_location = @current_location + + line = initial_line || f.gets + unless line + raise CompileError, "Unexpected end of file while reading block command#{@current_location.format_for_error}" + end + + # Special handling for termination tags (processed in normal compilation flow) + if line.start_with?('//}') + raise CompileError, "Unexpected block terminator '//}' without opening block#{@current_location.format_for_error}" + end + + # Extract command name + command_match = line.match(%r{\A//([a-z]+)}) + unless command_match + raise CompileError, "Invalid block command syntax: '#{line.strip}'#{@current_location.format_for_error}" + end + + name = command_match[1].to_sym + args = parse_args(line.sub(%r{\A//[a-z]+}, '').rstrip.chomp('{'), name) + + # Read block content (with nesting support) + if block_open?(line) + lines, nested_blocks = read_block_with_nesting(f, name, block_start_location) + else + lines = [] + nested_blocks = [] + end + + BlockData.new( + name: name, + args: args, + lines: lines, + nested_blocks: nested_blocks, + location: block_start_location + ) + rescue StandardError => e + # Re-raise block reading errors with appropriate location information + if e.is_a?(CompileError) + raise e + else + raise CompileError, "Error reading block command: #{e.message}#{@current_location.format_for_error}" + end + end + + # Reading with nested block support - enhanced error handling + def read_block_with_nesting(f, parent_command, block_start_location) + reader = BlockReader.new( + compiler: self, + file_input: f, + parent_command: parent_command, + start_location: block_start_location, + preserve_whitespace: preserve_whitespace?(parent_command) + ) + reader.read + end + + private + + def build_ast_from_chapter + f = LineInput.from_string(@chapter.content) + + # Build the complete AST structure + while f.next? + # Create a snapshot location that captures the current line number + @current_location = SnapshotLocation.new(@chapter.basename, f.lineno + 1) + line_content = f.peek + case line_content + when /\A\#@/ + f.gets # skip preprocessor directives + when /\A=+[\[\s{]/ + compile_headline_to_ast(f.gets) + when /\A\s*\z/ # rubocop:disable Lint/DuplicateBranch -- blank lines separate elements + f.gets # consume blank line but don't create node + when %r{\A//} + compile_block_command_to_ast(f) + when /\A\s+\*+\s/ # unordered list (must start with space, supports nesting with **) + compile_ul_to_ast(f) + when /\A\s+\d+\.\s/ # ordered list (must start with space) + compile_ol_to_ast(f) + when /\A\s*:\s/ # definition list (may start with optional space) + compile_dl_to_ast(f) + else + compile_paragraph_to_ast(f) + end + end + end + + def compile_paragraph_to_ast(f) + raw_lines = [] + f.until_match(%r{\A//|\A\#@}) do |line| + break if line.strip.empty? + + # Match ReVIEW::Compiler behavior: preserve tabs, strip other whitespace + processed_line = strip_preserving_leading_tabs(line) + raw_lines.push(processed_line) + end + + return if raw_lines.empty? + + # Create single paragraph node with multiple lines joined by \n + # AST preserves line breaks; HTMLRenderer removes them for Builder compatibility + node = AST::ParagraphNode.new(location: location) + combined_text = raw_lines.join("\n") # Join lines with newline (AST preserves structure) + inline_processor.parse_inline_elements(combined_text, node) + @current_ast_node.add_child(node) + end + + def compile_headline_to_ast(line) + parse_result = HeadlineParser.parse(line, location: location) + return nil unless parse_result + + caption_node = build_caption_node(parse_result.caption, caption_location: location) + current_node = find_appropriate_parent_for_level(parse_result.level) + + create_headline_node(parse_result, caption_node, current_node) + end + + def create_column_node(parse_result, caption_node, current_node) + node = AST::ColumnNode.new( + location: location, + level: parse_result.level, + label: parse_result.label, + caption_node: caption_node, + column_type: :column, + inline_processor: inline_processor + ) + current_node.add_child(node) + @current_ast_node = node + end + + def create_headline_node(parse_result, caption_node, current_node) + if parse_result.column? + create_column_node(parse_result, caption_node, current_node) + elsif parse_result.closing_tag? + handle_closing_tag(parse_result) + else + create_regular_headline(parse_result, caption_node, current_node) + end + end + + def handle_closing_tag(parse_result) + open_tag = parse_result.closing_tag_name + + # Validate that we're closing the correct tag by checking current AST node + if open_tag == 'column' + unless @current_ast_node.is_a?(AST::ColumnNode) + raise ReVIEW::ApplicationError, "column is not opened#{@current_location.format_for_error}" + end + else + raise ReVIEW::ApplicationError, "Unknown closing tag: /#{open_tag}#{@current_location.format_for_error}" + end + + @current_ast_node = @current_ast_node.parent || @ast_root + end + + def create_regular_headline(parse_result, caption_node, current_node) + node = AST::HeadlineNode.new( + location: location, + level: parse_result.level, + label: parse_result.label, + caption_node: caption_node, + tag: parse_result.tag + ) + current_node.add_child(node) + @current_ast_node = @ast_root + end + + def compile_block_command_to_ast(f) + block_data = read_block_command(f) + block_processor.process_block_command(block_data) + end + + # Compile unordered list to AST (delegates to list processor) + def compile_ul_to_ast(f) + list_processor.process_unordered_list(f) + end + + # Compile ordered list to AST (delegates to list processor) + def compile_ol_to_ast(f) + list_processor.process_ordered_list(f) + end + + # Compile definition list to AST (delegates to list processor) + def compile_dl_to_ast(f) + list_processor.process_definition_list(f) + end + + # Find appropriate parent node for a given headline level + # This handles section nesting by traversing up the current node hierarchy + def find_appropriate_parent_for_level(level) + node = @current_ast_node + + # Traverse up to find a node at the appropriate level + while node != @ast_root + # If current node is a ColumnNode or HeadlineNode, check its level + if node.respond_to?(:level) && node.level + # If we find a node at same or higher level, go to its parent + if node.level >= level + node = node.parent || @ast_root + else + # Current node level is lower, this is the right parent + break + end + else + # Move up one level + node = node.parent || @ast_root + end + end + + node + end + + def execute_post_processes + @post_processors.each do |processor_name| + processor_klass = Object.const_get(processor_name) + processor_klass.process(@ast_root, chapter: @chapter, compiler: self) + end + end + + # Strip leading and trailing whitespace while preserving leading tabs + # @param line [String] The line to process + # @return [String] The processed line with preserved leading tabs + def strip_preserving_leading_tabs(line) + match = line.match(/^\t+/) + leading_tabs = match ? match[0] : '' + leading_tabs + line.strip + end + + def block_open?(line) + line.rstrip.end_with?('{') + end + + def preserve_whitespace?(command) + @non_parsed_commands.include?(command) + end + + def parse_args(str, _name = nil) + return [] if str.empty? + + require 'strscan' + scanner = StringScanner.new(str) + words = [] + while word = scanner.scan(/(\[\]|\[.*?[^\\]\])/) + w2 = word[1..-2].gsub(/\\(.)/) do + ch = $1 + [']', '\\'].include?(ch) ? ch : '\\' + ch + end + words << w2 + end + unless scanner.eos? + # Handle error - would need access to error reporting + return [] + end + + words + end + + # Resolve references in the AST + def resolve_references + # Skip reference resolution in test environments or when chapter lacks book context + # Chapter objects always have book method (from BookUnit/Chapter) + return unless @chapter.book + + resolver = ReferenceResolver.new(@chapter) + result = resolver.resolve_references(@ast_root) + + warn "Reference resolution: #{result[:resolved]} resolved, #{result[:failed]} failed" if result[:failed] > 0 + end + end + end +end diff --git a/lib/review/ast/compiler/auto_id_processor.rb b/lib/review/ast/compiler/auto_id_processor.rb new file mode 100644 index 000000000..df06145d8 --- /dev/null +++ b/lib/review/ast/compiler/auto_id_processor.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require_relative 'post_processor' + +module ReVIEW + module AST + class Compiler + # AutoIdProcessor - Post-process to generate auto_id for nodes without explicit labels + # + # This processor assigns automatic IDs to: + # - HeadlineNode with nonum/notoc/nodisp tags (when label is not provided) + # - ColumnNode (always, used for anchor generation) + # + # Auto IDs are generated with sequential counters to ensure uniqueness. + class AutoIdProcessor < PostProcessor + private + + def process_node(node) + @nonum_counter = 0 + @column_counter = 0 + @ast_root = node + visit(@ast_root) + @ast_root + end + + # Visit HeadlineNode - assign auto_id if needed + def visit_headline(node) + # Only assign auto_id to special headlines without explicit label + if needs_auto_id?(node) && !node.label + @nonum_counter += 1 + chapter_name = @chapter&.name || 'test' + node.auto_id = "#{chapter_name}_nonum#{@nonum_counter}" + end + + visit_children(node) + node + end + + # Visit ColumnNode - always assign auto_id and column_number + def visit_column(node) + @column_counter += 1 + node.auto_id = "column-#{@column_counter}" + node.column_number = @column_counter + + visit_children(node) + node + end + + def visit_document(node) + visit_children(node) + node + end + + # Override `Visitor#visit` to avoid NotImplementedError + def visit(node) + case node + when HeadlineNode + visit_headline(node) + when ColumnNode + visit_column(node) + when DocumentNode + visit_document(node) + else + # For other nodes, just visit children + visit_children(node) + node + end + end + + def needs_auto_id?(node) + node.is_a?(HeadlineNode) && (node.nonum? || node.notoc? || node.nodisp?) + end + + def visit_children(node) + node.children.each { |child| visit(child) } + end + end + end + end +end diff --git a/lib/review/ast/compiler/block_reader.rb b/lib/review/ast/compiler/block_reader.rb new file mode 100644 index 000000000..748bfb117 --- /dev/null +++ b/lib/review/ast/compiler/block_reader.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class Compiler + # BlockReader - Reads block content with nesting support + # + # This class handles reading block content from input, + # managing nested blocks and tracking block depth. + class BlockReader + def initialize(compiler:, file_input:, parent_command:, start_location:, preserve_whitespace:) + @compiler = compiler + @f = file_input + @parent_command = parent_command + @start_location = start_location + @preserve_whitespace = preserve_whitespace + @lines = [] + @nested_blocks = [] + @block_depth = 1 + end + + # Read block content with nesting support + # + # @return [Array, Array>] lines and nested blocks + def read + while @f.next? + line = read_line + process_line(line) + break if @block_depth == 0 + end + + validate_block_closed! + [@lines, @nested_blocks] + end + + private + + def read_line + line = @f.gets + unless line + location_info = @start_location ? @start_location.format_for_error : '' + raise CompileError, "Unexpected end of file in block //#{@parent_command} started#{location_info}" + end + + update_location + line + end + + def update_location + @compiler.update_current_location(@f) + end + + def process_line(line) + if closing_tag?(line) + handle_closing_tag(line) + elsif nested_block_command?(line) + handle_nested_block(line) + elsif preprocessor_directive?(line) + # Skip preprocessor directives + else + handle_content_line(line) + end + end + + def closing_tag?(line) + line.start_with?('//}') + end + + def handle_closing_tag(line) + @block_depth -= 1 + if @block_depth > 0 + # Nested termination tag - treat as content + @lines << normalize_line(line) + end + end + + def nested_block_command?(line) + line.match?(%r{\A//[a-z]+}) + end + + def handle_nested_block(line) + nested_block_data = @compiler.read_block_command(@f, line) + @nested_blocks << nested_block_data + rescue CompileError => e + raise CompileError, "#{e.message} (in nested block within //#{@parent_command})" + end + + def preprocessor_directive?(line) + /\A\#@/.match?(line) + end + + def handle_content_line(line) + @lines << normalize_line(line) + end + + def normalize_line(line) + if @preserve_whitespace + line.chomp + else + line.rstrip + end + end + + def validate_block_closed! + return if @block_depth == 0 + + location_info = @start_location ? @start_location.format_for_error : '' + raise CompileError, "Unclosed block //#{@parent_command} started#{location_info}" + end + end + end + end +end diff --git a/lib/review/ast/compiler/firstlinenum_processor.rb b/lib/review/ast/compiler/firstlinenum_processor.rb new file mode 100644 index 000000000..171f03596 --- /dev/null +++ b/lib/review/ast/compiler/firstlinenum_processor.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/block_node' +require 'review/ast/code_block_node' +require_relative 'post_processor' + +module ReVIEW + module AST + class Compiler + # FirstLineNumProcessor - Processes //firstlinenum commands in AST + # + # This processor finds //firstlinenum block commands and applies the + # starting line number to the next CodeBlockNode. The //firstlinenum + # block node itself is removed from the AST. + # + # Usage: + # FirstLineNumProcessor.process(ast_root) + class FirstLineNumProcessor < PostProcessor + private + + def process_node(node) + indices_to_remove = [] + + node.children.each_with_index do |child, idx| + if firstlinenum_command?(child) + # Extract firstlinenum value + value = extract_firstlinenum_value(child) + + if value + # Find the next CodeBlockNode + target_code_block = find_next_code_block(node.children, idx + 1) + if target_code_block + apply_firstlinenum(target_code_block, value) + end + end + + # Mark firstlinenum node for removal + indices_to_remove << idx + else + # Recursively process child nodes + process_node(child) + end + end + + # Remove marked nodes in reverse order to avoid index shifting + indices_to_remove.reverse_each do |idx| + node.children.delete_at(idx) + end + end + + def firstlinenum_command?(node) + node.is_a?(BlockNode) && node.block_type == :firstlinenum + end + + # Extract firstlinenum value from firstlinenum node + # @param firstlinenum_node [BlockNode] firstlinenum block node + # @return [Integer, nil] line number value or nil + def extract_firstlinenum_value(firstlinenum_node) + arg = firstlinenum_node.args.first + return nil unless arg + + arg.to_i + end + + # Find the next CodeBlockNode in children array + # @param children [Array] array of child nodes + # @param start_index [Integer] index to start searching from + # @return [CodeBlockNode, nil] next CodeBlockNode or nil if not found + def find_next_code_block(children, start_index) + (start_index...children.length).each do |j| + node = children[j] + return node if node.is_a?(CodeBlockNode) + end + nil + end + + # Apply firstlinenum value to code block node + # @param code_block [CodeBlockNode] code block node + # @param value [Integer] starting line number + def apply_firstlinenum(code_block, value) + code_block.first_line_num = value + end + end + end + end +end diff --git a/lib/review/ast/compiler/list_item_numbering_processor.rb b/lib/review/ast/compiler/list_item_numbering_processor.rb new file mode 100644 index 000000000..7e040d408 --- /dev/null +++ b/lib/review/ast/compiler/list_item_numbering_processor.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/list_node' +require_relative 'post_processor' + +module ReVIEW + module AST + class Compiler + # ListItemNumberingProcessor - Assigns item numbers to ordered list items + # + # This processor traverses the AST and assigns absolute item numbers to each + # ListItemNode in ordered lists (ol). The item number is calculated based on + # the list's start_number (default: 1) and the item's position in the list. + # + # Usage: + # ListItemNumberingProcessor.process(ast_root) + class ListItemNumberingProcessor < PostProcessor + private + + def process_node(node) + if ordered_list_node?(node) + assign_item_numbers(node) + end + + node.children.each { |child| process_node(child) } + end + + def ordered_list_node?(node) + node.is_a?(ListNode) && node.ol? + end + + def assign_item_numbers(list_node) + start_number = list_node.start_number || 1 + + list_node.children.each_with_index do |item, index| + next unless item.is_a?(ListItemNode) + + item.item_number = start_number + index + end + end + end + end + end +end diff --git a/lib/review/ast/compiler/list_structure_normalizer.rb b/lib/review/ast/compiler/list_structure_normalizer.rb new file mode 100644 index 000000000..a0993f909 --- /dev/null +++ b/lib/review/ast/compiler/list_structure_normalizer.rb @@ -0,0 +1,230 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/block_node' +require 'review/ast/list_node' +require 'review/ast/paragraph_node' +require 'review/ast/text_node' +require 'review/ast/inline_processor' +require_relative 'post_processor' + +module ReVIEW + module AST + class Compiler + # ListStructureNormalizer - Processes //beginchild and //endchild commands in AST + # + # This processor transforms the flat structure created by //beginchild and //endchild + # into proper nested list structures. It also handles definition list paragraph splitting. + # + # Processing: + # 1. Finds //beginchild and //endchild block pairs + # 2. Moves nodes between them into the last list item + # 3. Removes the //beginchild and //endchild block nodes + # 4. Merges consecutive lists of the same type + # 5. Splits definition list paragraphs into separate terms + # + # Execution Order (in AST::Compiler): + # 1. OlnumProcessor - Sets start_number on ordered lists + # 2. ListStructureNormalizer - Normalizes list structure (this class) + # 3. ListItemNumberingProcessor - Assigns item_number to each list item + # + # This processor only handles structural transformations and does not deal with + # item numbering. Item numbers are assigned later by ListItemNumberingProcessor + # based on the normalized structure. + # + # Usage: + # ListStructureNormalizer.process(ast_root) + class ListStructureNormalizer < PostProcessor + private + + def process_node(node) + normalize_node(node) + end + + def normalize_node(node) + return if node.children.empty? + + normalized_children = [] + children = node.children.dup + idx = 0 + last_list_context = nil + + while idx < children.size + child = children[idx] + + if beginchild_block?(child) + unless last_list_context + raise ReVIEW::ApplicationError, "//beginchild is shown, but previous element isn't ul, ol, or dl" + end + + nested_nodes, idx = extract_nested_child_sequence(children, idx, last_list_context) + nested_nodes.each { |nested| normalize_node(nested) } + nested_nodes.each { |nested| last_list_context[:item].add_child(nested) } + normalize_node(last_list_context[:item]) + last_list_context[:item] = last_list_context[:list_node].children.last + next + end + + if endchild_block?(child) + raise ReVIEW::ApplicationError, "//endchild is shown, but any opened //beginchild doesn't exist" + end + + if paragraph_node?(child) && + last_list_context && + last_list_context[:list_type] == :dl && + definition_paragraph?(child) + transfer_definition_paragraph(last_list_context, child) + last_list_context[:item] = last_list_context[:list_node].children.last + idx += 1 + next + end + + normalize_node(child) + normalized_children << child + last_list_context = last_list_context_for(child) + idx += 1 + end + + node.children.replace(merge_consecutive_lists(normalized_children)) + end + + def extract_nested_child_sequence(children, begin_index, initial_list_context = nil) + collected = [] + depth = 1 + idx = begin_index + 1 + # Track list types for better error messages + list_type_stack = initial_list_context ? [initial_list_context[:list_type]] : [] + + while idx < children.size + current = children[idx] + + if beginchild_block?(current) + depth += 1 + elsif endchild_block?(current) + depth -= 1 + if depth == 0 + idx += 1 + return [collected, idx] + end + # Pop from stack when we close a nested beginchild + list_type_stack.pop unless list_type_stack.empty? + end + + # Track list types as we encounter them + if current.is_a?(ReVIEW::AST::ListNode) && current.children.any? + list_type_stack.push(current.list_type) + end + + collected << current + idx += 1 + end + + # Generate error message with tracked list types + if list_type_stack.empty? + raise ReVIEW::ApplicationError, '//beginchild of dl,ol,ul misses //endchild' + else + # Reverse to show the order like Builder does (most recent first) + types = list_type_stack.reverse.join(',') + raise ReVIEW::ApplicationError, "//beginchild of #{types} misses //endchild" + end + end + + def beginchild_block?(node) + node.is_a?(ReVIEW::AST::BlockNode) && node.block_type == :beginchild + end + + def endchild_block?(node) + node.is_a?(ReVIEW::AST::BlockNode) && node.block_type == :endchild + end + + def paragraph_node?(node) + node.is_a?(ReVIEW::AST::ParagraphNode) + end + + def definition_paragraph?(paragraph) + text = paragraph_text(paragraph) + text.lines.any? { |line| line =~ /\A\s*[:\t]/ } + end + + def last_list_context_for(node) + return nil unless node.is_a?(ReVIEW::AST::ListNode) && node.children.any? + + { + item: node.children.last, + list_node: node, + list_type: node.list_type + } + end + + def merge_consecutive_lists(children) + merged = [] + + children.each do |child| + if child.is_a?(ReVIEW::AST::ListNode) && + merged.last.is_a?(ReVIEW::AST::ListNode) && + merged.last.list_type == child.list_type + # Merge the children from the second list into the first + # Note: item_number will be assigned later by ListItemNumberingProcessor + child.children.each do |item| + merged.last.add_child(item) + end + else + merged << child + end + end + + merged + end + + def transfer_definition_paragraph(context, paragraph) + list_node = context[:list_node] + current_item = context[:item] + text = paragraph_text(paragraph) + + text.each_line do |line| + stripped = line.strip + next if stripped.empty? + + if line.lstrip.start_with?(':') + term_text = line.sub(/\A\s*:\s*/, '').strip + term_children = parse_inline_nodes(term_text) + new_item = ReVIEW::AST::ListItemNode.new(location: paragraph.location, level: 1, term_children: term_children) + list_node.add_child(new_item) + current_item = new_item + else + inline_nodes = parse_inline_nodes(stripped) + inline_nodes = [ReVIEW::AST::TextNode.new(content: stripped)] if inline_nodes.empty? + inline_nodes.each { |node| current_item.add_child(node) } + end + end + + context[:item] = list_node.children.last + end + + def paragraph_text(paragraph) + paragraph.children.map do |child| + if child.leaf_node? + child.content + else + '' + end + end.join + end + + def parse_inline_nodes(text) + return [] if text.nil? || text.empty? + + temp_node = ReVIEW::AST::ParagraphNode.new(location: nil) + @compiler.inline_processor.parse_inline_elements(text, temp_node) + temp_node.children + end + end + end + end +end diff --git a/lib/review/ast/compiler/noindent_processor.rb b/lib/review/ast/compiler/noindent_processor.rb new file mode 100644 index 000000000..426f7e756 --- /dev/null +++ b/lib/review/ast/compiler/noindent_processor.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/block_node' +require 'review/ast/paragraph_node' +require_relative 'post_processor' + +module ReVIEW + module AST + class Compiler + # NoindentProcessor - Processes //noindent commands in AST + # + # This processor finds //noindent block commands and applies the noindent + # attribute to the next appropriate node (typically ParagraphNode). + # The //noindent block node itself is removed from the AST. + # + # Usage: + # NoindentProcessor.process(ast_root) + class NoindentProcessor < PostProcessor + private + + def process_node(node) + # Process in reverse order to safely delete nodes without index shifting issues + (node.children.length - 1).downto(0) do |idx| + child = node.children[idx] + + # Check if this is a noindent block command + if noindent_command?(child) + # Find the next target node for noindent attribute + target_node = find_next_target_node(node.children, idx + 1) + if target_node + target_node.add_attribute(:noindent, true) + end + + # Remove the noindent block node from AST + node.children.delete_at(idx) + else + # Recursively process child nodes + process_node(child) + end + end + end + + def noindent_command?(node) + node.is_a?(BlockNode) && node.block_type == :noindent + end + + def find_next_target_node(children, start_index) + (start_index...children.length).each do |j| + node = children[j] + return node if target_node_for_noindent?(node) + end + nil + end + + def target_node_for_noindent?(node) + # ParagraphNode is the primary target for noindent + return true if node.is_a?(ParagraphNode) + + # Other nodes that can have noindent applied + # Add more node types here as needed + if node.is_a?(BlockNode) + case node.block_type + when :quote, :lead, :flushright, :flushleft + return true + end + end + + false + end + end + end + end +end diff --git a/lib/review/ast/compiler/olnum_processor.rb b/lib/review/ast/compiler/olnum_processor.rb new file mode 100644 index 000000000..faaf66340 --- /dev/null +++ b/lib/review/ast/compiler/olnum_processor.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/block_node' +require 'review/ast/list_node' +require_relative 'post_processor' + +module ReVIEW + module AST + class Compiler + # OlnumProcessor - Processes //olnum commands in AST + # + # This processor finds //olnum block commands and applies the starting number + # to the next ordered list node. If no ordered list follows, the olnum is + # removed. The //olnum block node itself is removed from the AST. + # + # Usage: + # OlnumProcessor.process(ast_root) + class OlnumProcessor < PostProcessor + private + + def process_node(node) + # First pass: process //olnum commands + process_olnum(node) + # Second pass: set olnum_start for all ordered lists + add_olnum_starts(node) + end + + def process_olnum(node) + # Collect indices to delete (process in reverse to avoid index shifting) + indices_to_delete = [] + + node.children.each_with_index do |child, idx| + if olnum_command?(child) + # Find the next ordered list for olnum + target_list = find_next_ordered_list(node.children, idx + 1) + if target_list + olnum_value = extract_olnum_value(child) + target_list.start_number = olnum_value + # Mark this list as explicitly set by //olnum + target_list.olnum_start = olnum_value + end + + indices_to_delete << idx + else + # Recursively process child nodes + process_olnum(child) + end + end + + # Delete olnum nodes in reverse order to avoid index shifting + indices_to_delete.reverse_each { |idx| node.children.delete_at(idx) } + end + + # Set olnum_start for lists without explicit //olnum + def add_olnum_starts(node) + if ordered_list_node?(node) && node.olnum_start.nil? + start_number = node.start_number || 1 + + # Check if items have consecutive increasing numbers + is_consecutive = node.children.each_with_index.all? do |item, idx| + next true unless item.is_a?(ListItemNode) + + expected = start_number + idx + actual = item.number || expected + actual == expected + end + + node.olnum_start = is_consecutive ? start_number : 1 + end + + node.children.each { |child| add_olnum_starts(child) } + end + + def olnum_command?(node) + node.is_a?(BlockNode) && node.block_type == :olnum + end + + def find_next_ordered_list(children, start_index) + (start_index...children.length).each do |j| + node = children[j] + if ordered_list_node?(node) + return node + end + end + nil + end + + def ordered_list_node?(node) + node.is_a?(ListNode) && node.ol? + end + + def extract_olnum_value(olnum_node) + (olnum_node.args.first || 1).to_i + end + end + end + end +end diff --git a/lib/review/ast/compiler/post_processor.rb b/lib/review/ast/compiler/post_processor.rb new file mode 100644 index 000000000..74e888dc8 --- /dev/null +++ b/lib/review/ast/compiler/post_processor.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/block_node' +require 'review/ast/table_node' + +module ReVIEW + module AST + class Compiler + # PostProcessor - Base class for AST post-processing + # + # This abstract class provides the interface for post-processors that + # transform or enhance the AST after initial compilation. + # + # Post-processors are executed in order after AST construction to: + # - Apply control commands (tsize, firstlinenum, noindent, olnum) + # - Normalize structures (list nesting) + # - Generate metadata (auto IDs, item numbers) + class PostProcessor + def self.process(ast_root, chapter:, compiler:) + new(chapter: chapter, compiler: compiler).process(ast_root) + end + + def initialize(chapter:, compiler:) + @chapter = chapter + @compiler = compiler + end + + def process(ast_root) + process_node(ast_root) + end + + private + + def process_node(_node) + raise NotImplementedError + end + end + end + end +end diff --git a/lib/review/ast/compiler/tsize_processor.rb b/lib/review/ast/compiler/tsize_processor.rb new file mode 100644 index 000000000..349752121 --- /dev/null +++ b/lib/review/ast/compiler/tsize_processor.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/block_node' +require 'review/ast/table_node' +require_relative 'post_processor' + +module ReVIEW + module AST + class Compiler + # TsizeProcessor - Processes //tsize commands in AST + # + # This processor finds //tsize block commands and applies column width + # information to the next TableNode. The //tsize block node itself is + # removed from the AST. + # + # Usage: + # TsizeProcessor.process(ast_root, chapter: chapter) + class TsizeProcessor < PostProcessor + def initialize(chapter:, compiler:) + super + @target_format = determine_target_format(chapter) + end + + private + + # Determine target format for tsize processing from chapter's book config + # @param chapter [Chapter, nil] chapter object + # @return [String, nil] builder name or nil + def determine_target_format(chapter) + return nil unless chapter&.book&.config + + # Check if builder is specified in config + builder = chapter.book.config['builder'] + return builder if builder + + # If builder is not explicitly set, return nil + # This causes TsizeProcessor to apply all tsize commands (no filtering) + # which maintains backward compatibility + nil + end + + def process_node(node) + indices_to_remove = [] + + node.children.each_with_index do |child, idx| + if tsize_command?(child) + # Extract tsize value (considering target specification) + tsize_value = extract_tsize_value(child) + + if tsize_value + # Find the next TableNode + target_table = find_next_table(node.children, idx + 1) + if target_table + apply_tsize_to_table(target_table, tsize_value) + end + end + + # Mark tsize node for removal + indices_to_remove << idx + else + # Recursively process child nodes + process_node(child) + end + end + + # Remove marked nodes in reverse order to avoid index shifting + indices_to_remove.reverse_each do |idx| + node.children.delete_at(idx) + end + end + + def tsize_command?(node) + node.is_a?(BlockNode) && node.block_type == :tsize + end + + # Extract tsize value from tsize node, considering target specification + # @param tsize_node [BlockNode] tsize block node + # @return [String, nil] tsize value or nil if not applicable to target format + def extract_tsize_value(tsize_node) + arg = tsize_node.args.first + return nil unless arg + + # Parse target specification format: |latex,html|value + # Target names are multi-character words (latex, html, idgxml, etc.) + # LaTeX column specs like |l|c|r| are NOT target specifications + # We distinguish by checking if the first part contains only builder names (words with 2+ chars) + if matched = arg.match(/\A\|([a-z]{2,}(?:\s*,\s*[a-z]{2,})*)\|(.*)/) + # This is a target specification like |latex,html|10,20,30 + targets = matched[1].split(',').map(&:strip) + value = matched[2] + + # Check if current format is in the target list + # If target_format is nil, we can't determine if this should be applied + # so we return nil (skip it) + return nil if @target_format.nil? + + return targets.include?(@target_format) ? value : nil + else + # Generic format (applies to all formats) + # This includes LaTeX column specs like |l|c|r| which should be used as-is + arg + end + end + + # Find the next TableNode in children array + # @param children [Array] array of child nodes + # @param start_index [Integer] index to start searching from + # @return [TableNode, nil] next TableNode or nil if not found + def find_next_table(children, start_index) + (start_index...children.length).each do |j| + node = children[j] + return node if node.is_a?(TableNode) + end + nil + end + + # Apply tsize specification to table node + # @param table_node [TableNode] table node to apply tsize to + # @param tsize_value [String] tsize specification string + def apply_tsize_to_table(table_node, tsize_value) + # Use TableNode's built-in tsize parsing method + table_node.parse_and_set_tsize(tsize_value) + end + end + end + end +end diff --git a/lib/review/ast/diff/html.rb b/lib/review/ast/diff/html.rb new file mode 100644 index 000000000..f57c18d35 --- /dev/null +++ b/lib/review/ast/diff/html.rb @@ -0,0 +1,145 @@ +# frozen_string_literal: true + +require 'nokogiri' +require 'diff/lcs' +require 'digest' +require_relative 'result' + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + module Diff + # Html comparator for semantic HTML comparison + # + # Parses HTML, normalizes whitespace and attributes, tokenizes structure, + # and compares using hash-based comparison for efficiency. + class Html + SIGNIFICANT_WS = %w[pre textarea script style code].freeze + VOID_ELEMENTS = %w[area base br col embed hr img input link meta param source track wbr].freeze + + PreparedData = Struct.new(:tokens, :signature, :doc, keyword_init: true) + + def initialize + # No options needed for HTML comparison + end + + # Compare two HTML strings + # @param left [String] First HTML content + # @param right [String] Second HTML content + # @return [Result] Comparison result + def compare(left, right) + left_data = prepare(left) + right_data = prepare(right) + + changes = ::Diff::LCS.sdiff(left_data[:tokens], right_data[:tokens]) + + Result.new(left_data[:signature], right_data[:signature], changes) + end + + # Quick equality check + # @param left [String] First HTML content + # @param right [String] Second HTML content + # @return [Boolean] true if contents are equivalent + def equal?(left, right) + compare(left, right).equal? + end + + # Get pretty diff output + # @param left [String] First HTML content + # @param right [String] Second HTML content + # @return [String] Formatted diff + def diff(left, right) + compare(left, right).pretty_diff + end + + private + + def prepare(html) + doc = canonicalize(parse_html(html)) + tokens = tokenize(doc) + signature = subtree_hash(tokens) + + PreparedData.new(tokens: tokens, signature: signature, doc: doc) + end + + def parse_html(html) + Nokogiri::HTML5.parse(html) + end + + def canonicalize(doc) + remove_comment!(doc) + + doc.traverse do |node| + next unless node.text? || node.element? + + if node.text? + preserve = node.ancestors.any? { |a| SIGNIFICANT_WS.include?(a.name) } + unless preserve + text = node.text.gsub(/\s+/, ' ').strip + if text.empty? + node.remove + else + node.content = text + end + end + elsif node.element? + node.attribute_nodes.each do |attr| + next if attr.name == attr.name.downcase + + node.delete(attr.name) + node[attr.name.downcase] = attr.value + end + + if node['class'] + classes = node['class'].split(/\s+/).reject(&:empty?).uniq.sort + if classes.empty? + node.remove_attribute('class') + else + node['class'] = classes.join(' ') + end + end + end + end + + doc + end + + def remove_comment!(doc) + doc.xpath('//comment()').remove + end + + # Structured token array + # [:start, tag_name, [[attr, val], ...]] / [:end, tag_name] / [:void, tag_name, [[attr, val], ...]] / [:text, "content"] + def tokenize(node, acc = []) + node.children.each do |n| + if n.element? + attrs = n.attribute_nodes.map { |a| [a.name, a.value] }.sort_by { |k, _| k } + if VOID_ELEMENTS.include?(n.name) + acc << [:void, n.name, attrs] + else + acc << [:start, n.name, attrs] + tokenize(n, acc) + acc << [:end, n.name] + end + elsif n.text? + t = n.text + next if t.nil? || t.empty? + + acc << [:text, t] + end + end + acc + end + + def subtree_hash(tokens) + Digest::SHA1.hexdigest(tokens.map { |t| t.join("\u241F") }.join("\u241E")) + end + end + end + end +end diff --git a/lib/review/ast/diff/idgxml.rb b/lib/review/ast/diff/idgxml.rb new file mode 100644 index 000000000..faf035709 --- /dev/null +++ b/lib/review/ast/diff/idgxml.rb @@ -0,0 +1,173 @@ +# frozen_string_literal: true + +require 'nokogiri' +require 'diff/lcs' +require 'digest' +require_relative 'result' + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + module Diff + # Idgxml comparator for semantic IDGXML comparison + # + # Handles IDGXML-specific features like InDesign namespaces (aid:, aid5:) + # and processing instructions while normalizing for comparison. + class Idgxml + # Elements where whitespace is significant + SIGNIFICANT_WS = %w[code pre].freeze + + # Self-closing elements (void elements) in IDGXML + VOID_ELEMENTS = %w[br label index].freeze + + PreparedData = Struct.new(:tokens, :signature, :doc, keyword_init: true) + + def initialize + # No options needed for IDGXML comparison + end + + # Compare two IDGXML strings + # @param left [String] First IDGXML content + # @param right [String] Second IDGXML content + # @return [Result] Comparison result + def compare(left, right) + left_data = prepare(left) + right_data = prepare(right) + + changes = ::Diff::LCS.sdiff(left_data[:tokens], right_data[:tokens]) + + Result.new(left_data[:signature], right_data[:signature], changes) + end + + # Quick equality check + # @param left [String] First IDGXML content + # @param right [String] Second IDGXML content + # @return [Boolean] true if contents are equivalent + def equal?(left, right) + compare(left, right).equal? + end + + # Get pretty diff output + # @param left [String] First IDGXML content + # @param right [String] Second IDGXML content + # @return [String] Formatted diff + def diff(left, right) + compare(left, right).pretty_diff + end + + private + + def prepare(idgxml) + doc = canonicalize(parse_xml(idgxml)) + tokens = tokenize(doc) + signature = subtree_hash(tokens) + + PreparedData.new(tokens: tokens, signature: signature, doc: doc) + end + + def parse_xml(idgxml) + # Wrap in a root element if not already wrapped + # IDGXML fragments may not have a single root + wrapped = "#{idgxml}" + Nokogiri::XML(wrapped) do |config| + config.noblanks.nonet + end + end + + def canonicalize(doc) + remove_comment!(doc) + + doc.traverse do |node| + next unless node.text? || node.element? || node.processing_instruction? + + if node.text? + preserve = node.ancestors.any? { |a| SIGNIFICANT_WS.include?(a.name) } + unless preserve + # Normalize whitespace + text = node.text.gsub(/\s+/, ' ').strip + if text.empty? + node.remove + else + node.content = text + end + end + elsif node.element? + # Normalize attribute names to lowercase and sort + node.attribute_nodes.each do |attr| + # Keep namespace prefixes as-is (aid:, aid5:) + # Only normalize the local name part + next if attr.name == attr.name.downcase + + node.delete(attr.name) + node[attr.name.downcase] = attr.value + end + + # Normalize class attribute if present + if node['class'] + classes = node['class'].split(/\s+/).reject(&:empty?).uniq.sort + if classes.empty? + node.remove_attribute('class') + else + node['class'] = classes.join(' ') + end + end + elsif node.processing_instruction? + # Processing instructions like + # Normalize the content by sorting attributes + # This is important for IDGXML comparison + content = node.content + # Parse key="value" pairs and sort them + pairs = content.scan(/(\w+)="([^"]*)"/) + if pairs.any? + sorted_content = pairs.sort_by { |k, _v| k }.map { |k, v| %Q(#{k}="#{v}") }.join(' ') + node.content = sorted_content + end + end + end + + doc + end + + def remove_comment!(doc) + doc.xpath('//comment()').remove + end + + # Structured token array + # [:start, tag_name, [[attr, val], ...]] / [:end, tag_name] / [:void, tag_name, [[attr, val], ...]] / [:text, "content"] / [:pi, target, content] + def tokenize(node, acc = []) + node.children.each do |n| + if n.element? + attrs = n.attribute_nodes.map { |a| [a.name, a.value] }.sort_by { |k, _| k } + if VOID_ELEMENTS.include?(n.name) + acc << [:void, n.name, attrs] + else + acc << [:start, n.name, attrs] + tokenize(n, acc) + acc << [:end, n.name] + end + elsif n.text? + t = n.text + next if t.nil? || t.empty? + + acc << [:text, t] + elsif n.processing_instruction? + # Include processing instructions in tokens + # Format: [:pi, target, content] + acc << [:pi, n.name, n.content] + end + end + acc + end + + def subtree_hash(tokens) + Digest::SHA1.hexdigest(tokens.map { |t| t.join("\u241F") }.join("\u241E")) + end + end + end + end +end diff --git a/lib/review/ast/diff/latex.rb b/lib/review/ast/diff/latex.rb new file mode 100644 index 000000000..f7251c339 --- /dev/null +++ b/lib/review/ast/diff/latex.rb @@ -0,0 +1,117 @@ +# frozen_string_literal: true + +require 'diff/lcs' +require_relative 'result' + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + module Diff + # Latex comparator with configurable normalization options + # + # Compares LaTeX strings with options to ignore whitespace differences, + # blank lines, and normalize command formatting. + class Latex + # @param ignore_whitespace [Boolean] Normalize whitespace for comparison + # @param ignore_blank_lines [Boolean] Remove blank lines before comparison + # @param ignore_paragraph_breaks [Boolean] Normalize paragraph breaks + # @param normalize_commands [Boolean] Normalize LaTeX command formatting + def initialize(ignore_whitespace: true, ignore_blank_lines: true, + ignore_paragraph_breaks: true, normalize_commands: true) + @ignore_whitespace = ignore_whitespace + @ignore_blank_lines = ignore_blank_lines + @ignore_paragraph_breaks = ignore_paragraph_breaks + @normalize_commands = normalize_commands + end + + # Compare two LaTeX strings + # @param left [String] First LaTeX content + # @param right [String] Second LaTeX content + # @return [Result] Comparison result + def compare(left, right) + normalized_left = normalize_latex(left) + normalized_right = normalize_latex(right) + + # Generate line-by-line diff + lines_left = normalized_left.split("\n") + lines_right = normalized_right.split("\n") + changes = ::Diff::LCS.sdiff(lines_left, lines_right) + + # For LaTeX, signatures are the normalized strings themselves + Result.new(normalized_left, normalized_right, changes) + end + + # Quick equality check + # @param left [String] First LaTeX content + # @param right [String] Second LaTeX content + # @return [Boolean] true if contents are equivalent + def equal?(left, right) + compare(left, right).equal? + end + + # Get pretty diff output + # @param left [String] First LaTeX content + # @param right [String] Second LaTeX content + # @return [String] Formatted diff + def diff(left, right) + compare(left, right).pretty_diff + end + + private + + # Normalize LaTeX string for comparison + def normalize_latex(latex) + return '' if latex.nil? || latex.empty? + + normalized = latex.dup + + # Handle paragraph breaks before removing blank lines + if @ignore_paragraph_breaks + # Normalize paragraph breaks (multiple newlines) to single newlines + normalized = normalized.gsub(/\n\n+/, "\n") + end + + if @ignore_blank_lines + # Remove blank lines (but preserve paragraph structure if configured) + lines = normalized.split("\n") + lines = lines.reject { |line| line.strip.empty? } + normalized = lines.join("\n") + end + + if @ignore_whitespace + # Normalize whitespace around commands + normalized = normalized.gsub(/\s*\\\s*/, '\\') + # Normalize multiple spaces + normalized = normalized.gsub(/\s+/, ' ') + # Remove leading/trailing whitespace from lines + lines = normalized.split("\n") + lines = lines.map(&:strip) + normalized = lines.join("\n") + # Remove leading/trailing whitespace + normalized = normalized.strip + end + + if @normalize_commands + # Normalize command spacing + normalized = normalized.gsub(/\\([a-zA-Z]+)\s*\{/, '\\\\\\1{') + # Normalize environment spacing + normalized = normalized.gsub(/\\(begin|end)\s*\{([^}]+)\}/, '\\\\\\1{\\2}') + # Add newlines around \begin{...} and \end{...} + # This makes diffs more readable by putting each environment on its own line + normalized = normalized.gsub(/([^\n])\\begin\{/, "\\1\n\\\\begin{") + normalized = normalized.gsub(/\\begin\{([^}]+)\}([^\n])/, "\\\\begin{\\1}\n\\2") + normalized = normalized.gsub(/([^\n])\\end\{/, "\\1\n\\\\end{") + normalized = normalized.gsub(/\\end\{([^}]+)\}([^\n])/, "\\\\end{\\1}\n\\2") + end + + normalized + end + end + end + end +end diff --git a/lib/review/ast/diff/markdown.rb b/lib/review/ast/diff/markdown.rb new file mode 100644 index 000000000..26755260e --- /dev/null +++ b/lib/review/ast/diff/markdown.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +require 'diff/lcs' +require_relative 'result' + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + module Diff + # Markdown comparator with configurable normalization options + # + # Compares Markdown strings with options to ignore whitespace differences, + # blank lines, and normalize formatting. + class Markdown + # @param ignore_whitespace [Boolean] Normalize whitespace for comparison + # @param ignore_blank_lines [Boolean] Remove blank lines before comparison + # @param ignore_paragraph_breaks [Boolean] Normalize paragraph breaks + # @param normalize_headings [Boolean] Normalize heading formatting + # @param normalize_lists [Boolean] Normalize list formatting + def initialize(ignore_whitespace: true, ignore_blank_lines: true, + ignore_paragraph_breaks: true, normalize_headings: true, + normalize_lists: true) + @ignore_whitespace = ignore_whitespace + @ignore_blank_lines = ignore_blank_lines + @ignore_paragraph_breaks = ignore_paragraph_breaks + @normalize_headings = normalize_headings + @normalize_lists = normalize_lists + end + + # Compare two Markdown strings + # @param left [String] First Markdown content + # @param right [String] Second Markdown content + # @return [Result] Comparison result + def compare(left, right) + normalized_left = normalize_markdown(left) + normalized_right = normalize_markdown(right) + + # Generate line-by-line diff + lines_left = normalized_left.split("\n") + lines_right = normalized_right.split("\n") + changes = ::Diff::LCS.sdiff(lines_left, lines_right) + + # For Markdown, signatures are the normalized strings themselves + Result.new(normalized_left, normalized_right, changes) + end + + # Quick equality check + # @param left [String] First Markdown content + # @param right [String] Second Markdown content + # @return [Boolean] true if contents are equivalent + def equal?(left, right) + compare(left, right).equal? + end + + # Get pretty diff output + # @param left [String] First Markdown content + # @param right [String] Second Markdown content + # @return [String] Formatted diff + def diff(left, right) + compare(left, right).pretty_diff + end + + private + + # Normalize Markdown string for comparison + def normalize_markdown(markdown) + return '' if markdown.nil? || markdown.empty? + + normalized = markdown.dup + + # Handle paragraph breaks before removing blank lines + if @ignore_paragraph_breaks + # Normalize paragraph breaks (multiple newlines) to double newlines + normalized = normalized.gsub(/\n\n+/, "\n\n") + end + + if @ignore_blank_lines + # Remove completely blank lines (but preserve paragraph structure if configured) + lines = normalized.split("\n") + lines = lines.reject { |line| line.strip.empty? } + normalized = lines.join("\n") + end + + if @ignore_whitespace + # Normalize multiple spaces to single space + normalized = normalized.gsub(/[ \t]+/, ' ') + # Remove leading/trailing whitespace from lines + lines = normalized.split("\n") + lines = lines.map(&:strip) + normalized = lines.join("\n") + # Remove leading/trailing whitespace from entire content + normalized = normalized.strip + end + + if @normalize_headings + # Normalize ATX-style headings (ensure space after #) + normalized = normalized.gsub(/^(#+)([^# \n])/, '\1 \2') + # Normalize trailing # in headings (remove them) + normalized = normalized.gsub(/^(#+\s+.+?)\s*#+\s*$/, '\1') + end + + if @normalize_lists + # Normalize unordered list markers (* - +) to consistent marker (*) + normalized = normalized.gsub(/^(\s*)[-+]\s+/, '\1* ') + # Normalize list item spacing (ensure single space after marker) + normalized = normalized.gsub(/^(\s*[*\-+])\s+/, '\1 ') + normalized = normalized.gsub(/^(\s*\d+\.)\s+/, '\1 ') + end + + normalized + end + end + end + end +end diff --git a/lib/review/ast/diff/node.rb b/lib/review/ast/diff/node.rb new file mode 100644 index 000000000..6e01dffa0 --- /dev/null +++ b/lib/review/ast/diff/node.rb @@ -0,0 +1,323 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative '../visitor' + +module ReVIEW + module AST + module Diff + # Compares two AST nodes for structural equivalence using the Visitor pattern + # (ignoring location information) + class Node < ReVIEW::AST::Visitor + # Result of AST node comparison + class Result + attr_reader :differences + + def initialize + @differences = [] + end + + # Add a difference to the result + def add_difference(path, message) + @differences << "#{path}: #{message}" + end + + # Check if the comparison was successful (no differences) + def equal? + @differences.empty? + end + + # Get a human-readable summary of differences + def to_s + if equal? + 'AST nodes are equivalent' + else + "AST nodes differ:\n " + @differences.join("\n ") + end + end + end + + # Compare two AST nodes and return a Result + # + # @param node1 [AST::Node] First node to compare + # @param node2 [AST::Node] Second node to compare + # @param path [String] Path to current node (for error messages) + # @return [Result] Result of comparison + def compare(node1, node2, path = 'root') + @node2 = node2 + @path = path + @result = Result.new + + compare_nodes(node1) + + @result + end + + private + + # Override visit to handle two-node comparison + def compare_nodes(node1) + # Both should be nil or both should be non-nil + if node1.nil? && @node2.nil? + return + elsif node1.nil? + @result.add_difference(@path, "node1 is nil but node2 is #{@node2.class}") + return + elsif @node2.nil? + @result.add_difference(@path, "node1 is #{node1.class} but node2 is nil") + return + end + + # Node types should match + unless node1.instance_of?(@node2.class) + @result.add_difference(@path, "node types differ (#{node1.class} vs #{@node2.class})") + return + end + + # Visit the node using the visitor pattern + visit(node1) + end + + # Compare common attributes and recurse into children + def compare_common(node1, &block) + # Compare node-specific attributes if block is provided + yield if block + + # Compare children recursively + compare_children(node1) + end + + # Compare a specific attribute + def compare_attr(node1, attr, name) + val1 = node1.send(attr) + val2 = @node2.send(attr) + return if val1 == val2 + + @result.add_difference(@path, "#{name} mismatch (#{val1.inspect} vs #{val2.inspect})") + end + + # Compare children arrays + def compare_children(node1) + children1 = node1.respond_to?(:children) ? node1.children : [] + children2 = @node2.respond_to?(:children) ? @node2.children : [] + + if children1.size != children2.size + @result.add_difference(@path, "children count mismatch (#{children1.size} vs #{children2.size})") + return + end + + children1.zip(children2).each_with_index do |(child1, child2), index| + # Save current state + saved_node2 = @node2 + saved_path = @path + + # Update state for child comparison + @node2 = child2 + @path = "#{saved_path}[#{index}]" + + compare_nodes(child1) + + # Restore state + @node2 = saved_node2 + @path = saved_path + end + end + + # Compare two child nodes (for special children like caption_node) + def compare_child_node(node1, node2, child_path) + # Save current state + saved_node2 = @node2 + saved_path = @path + + # Update state for child comparison + @node2 = node2 + @path = "#{saved_path}.#{child_path}" + + compare_nodes(node1) + + # Restore state + @node2 = saved_node2 + @path = saved_path + end + + # Visitor methods for each node type + + def visit_document(node) + compare_common(node) + end + + def visit_headline(node) + compare_common(node) do + compare_attr(node, :level, 'headline level') + compare_attr(node, :label, 'headline label') + compare_child_node(node.caption_node, @node2.caption_node, 'caption') + end + end + + def visit_text(node) + compare_attr(node, :content, 'text content') + end + + def visit_paragraph(node) + compare_common(node) + end + + def visit_inline(node) + compare_common(node) do + compare_attr(node, :inline_type, 'inline type') + # args comparison can be lenient as they might be reconstructed differently + end + end + + def visit_code_block(node) + compare_common(node) do + compare_attr(node, :id, 'code block id') if node.id || @node2.id + compare_attr(node, :lang, 'code block lang') if node.lang || @node2.lang + compare_attr(node, :line_numbers, 'code block line_numbers') + compare_child_node(node.caption_node, @node2.caption_node, 'caption') if node.caption_node || @node2.caption_node + end + end + + def visit_code_line(node) + compare_common(node) + end + + def visit_table(node) + compare_common(node) do + compare_attr(node, :id, 'table id') if node.id || @node2.id + compare_attr(node, :table_type, 'table type') + compare_attr(node, :metric, 'table metric') if node.metric || @node2.metric + compare_attr(node, :col_spec, 'table col_spec') if node.col_spec || @node2.col_spec + # cellwidth is an array + if node.cellwidth != @node2.cellwidth + @result.add_difference(@path, "table cellwidth mismatch (#{node.cellwidth.inspect} vs #{@node2.cellwidth.inspect})") + end + compare_child_node(node.caption_node, @node2.caption_node, 'caption') if node.caption_node || @node2.caption_node + end + end + + def visit_table_row(node) + compare_common(node) do + compare_attr(node, :row_type, 'table row type') + end + end + + def visit_table_cell(node) + compare_common(node) + end + + def visit_image(node) + compare_common(node) do + compare_attr(node, :id, 'image id') if node.id || @node2.id + compare_attr(node, :metric, 'image metric') if node.metric || @node2.metric + compare_attr(node, :image_type, 'image type') + compare_child_node(node.caption_node, @node2.caption_node, 'caption') if node.caption_node || @node2.caption_node + end + end + + def visit_list(node) + compare_common(node) do + compare_attr(node, :list_type, 'list type') + end + end + + def visit_list_item(node) + compare_common(node) do + compare_attr(node, :level, 'list item level') + compare_attr(node, :item_type, 'list item type') if node.item_type || @node2.item_type + compare_attr(node, :number, 'list item number') if node.number || @node2.number + + # Compare term_children for definition lists + if node.term_children&.any? || @node2.term_children&.any? + term_children1 = node.term_children || [] + term_children2 = @node2.term_children || [] + + if term_children1.size == term_children2.size + term_children1.zip(term_children2).each_with_index do |(term1, term2), index| + compare_child_node(term1, term2, "term[#{index}]") + end + else + @result.add_difference(@path, "term_children count mismatch (#{term_children1.size} vs #{term_children2.size})") + end + end + end + end + + def visit_block(node) + compare_common(node) do + compare_attr(node, :block_type, 'block type') + # args is an array + if node.args != @node2.args + @result.add_difference(@path, "block args mismatch (#{node.args.inspect} vs #{@node2.args.inspect})") + end + compare_child_node(node.caption_node, @node2.caption_node, 'caption') if node.caption_node || @node2.caption_node + end + end + + def visit_minicolumn(node) + compare_common(node) do + compare_attr(node, :minicolumn_type, 'minicolumn type') + compare_child_node(node.caption_node, @node2.caption_node, 'caption') if node.caption_node || @node2.caption_node + end + end + + def visit_column(node) + compare_common(node) do + compare_attr(node, :level, 'column level') + compare_attr(node, :label, 'column label') if node.label || @node2.label + compare_attr(node, :column_type, 'column type') if node.column_type || @node2.column_type + compare_child_node(node.caption_node, @node2.caption_node, 'caption') if node.caption_node || @node2.caption_node + end + end + + def visit_caption(node) + compare_common(node) + end + + def visit_footnote(node) + compare_common(node) do + compare_attr(node, :id, 'footnote id') + compare_attr(node, :footnote_type, 'footnote type') + end + end + + def visit_reference(node) + compare_common(node) do + compare_attr(node, :ref_id, 'reference ref_id') + compare_attr(node, :context_id, 'reference context_id') + end + end + + def visit_embed(node) + compare_common(node) do + compare_attr(node, :embed_type, 'embed type') + compare_attr(node, :content, 'embed content') + # target_builders is an array - compare it + if node.target_builders != @node2.target_builders + @result.add_difference(@path, "target_builders mismatch (#{node.target_builders.inspect} vs #{@node2.target_builders.inspect})") + end + end + end + + def visit_tex_equation(node) + compare_common(node) do + compare_attr(node, :id, 'tex equation id') if node.id || @node2.id + compare_attr(node, :content, 'tex equation content') + compare_child_node(node.caption_node, @node2.caption_node, 'caption') if node.caption_node || @node2.caption_node + end + end + + def visit_markdown_html(node) + compare_common(node) do + compare_attr(node, :content, 'markdown html content') + end + end + end + end + end +end diff --git a/lib/review/ast/diff/result.rb b/lib/review/ast/diff/result.rb new file mode 100644 index 000000000..ec7d3f43a --- /dev/null +++ b/lib/review/ast/diff/result.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + module Diff + # Result of a diff comparison operation + # + # Holds comparison results from any format-specific comparator (Html, Idgxml, Latex). + # Provides unified interface for checking equality and viewing differences. + class Result + # @return [String] Signature/hash of left content + # - For Html/Idgxml: SHA1 hash of token structure + # - For Latex: normalized string + attr_reader :left_signature + + # @return [String] Signature/hash of right content + attr_reader :right_signature + + # @return [Array] Raw diff changes from Diff::LCS.sdiff + # - For Html/Idgxml: changes contain token arrays + # - For Latex: changes contain line strings + attr_reader :changes + + # @param left_signature [String] Signature of left content + # @param right_signature [String] Signature of right content + # @param changes [Array] Diff::LCS.sdiff output + def initialize(left_signature, right_signature, changes) + @left_signature = left_signature + @right_signature = right_signature + @changes = changes + end + + # Check if contents are equal + # @return [Boolean] true if signatures match + def equal? + @left_signature == @right_signature + end + + # Check if contents are different + # @return [Boolean] true if signatures don't match + def different? + !equal? + end + + # Alias for equal? to match existing HtmlDiff/IdgxmlDiff API + # @return [Boolean] + def same_hash? + equal? + end + + # Generate human-readable diff output + # @return [String] Formatted diff showing changes + def pretty_diff + return '' if equal? + + output = [] + @changes.each do |change| + action = change.action # '-'(remove) '+'(add) '!'(change) '='(same) + case action + when '=' + # Skip unchanged lines/tokens for brevity + next + when '-' + output << "- #{change.old_element.inspect}" + when '+' + output << "+ #{change.new_element.inspect}" + when '!' + output << "- #{change.old_element.inspect}" + output << "+ #{change.new_element.inspect}" + end + end + output.join("\n") + end + + # Alias for pretty_diff + # @return [String] + def diff + pretty_diff + end + end + end + end +end diff --git a/lib/review/ast/document_node.rb b/lib/review/ast/document_node.rb new file mode 100644 index 000000000..8ffe4b9d3 --- /dev/null +++ b/lib/review/ast/document_node.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_relative 'node' + +module ReVIEW + module AST + class DocumentNode < Node + def self.deserialize_from_hash(hash) + node = new(location: ReVIEW::AST::JSONSerializer.restore_location(hash)) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:children] = children.map { |child| child.serialize_to_hash(options) } if children.any? + hash + end + end + end +end diff --git a/lib/review/ast/dumper.rb b/lib/review/ast/dumper.rb new file mode 100644 index 000000000..e71bb8f4e --- /dev/null +++ b/lib/review/ast/dumper.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/book' +require 'json' +require_relative 'compiler' +require_relative 'json_serializer' + +module ReVIEW + module AST + class Dumper + attr_reader :config, :serializer_options + + def initialize(config: nil, serializer_options: nil) + @config = config || ReVIEW::Configure.values + @serializer_options = serializer_options || JSONSerializer::Options.new + end + + def dump_file(path) + unless File.exist?(path) + raise FileNotFound, "file not found: #{path}" + end + + # Determine the directory containing the file + file_dir = File.dirname(File.expand_path(path)) + + # Load book from the file's directory and build indexes for cross-references + Dir.chdir(file_dir) do + book = ReVIEW::Book::Base.new('.', config: @config) + + # Build book-wide indexes for cross-chapter references (headlines, images, tables, lists, columns, etc.) + require_relative('book_indexer') + ReVIEW::AST::BookIndexer.build(book) + + dump_ast(path, book) + end + end + + def dump_files(paths) + results = {} + paths.each do |path| + results[path] = dump_file(path) + end + results + end + + private + + def dump_ast(path, book) + basename = File.basename(path) + chap = ReVIEW::Book::Chapter.new(book, nil, basename, path) + + compiler = ReVIEW::AST::Compiler.for_chapter(chap) + + ast_root = compiler.compile_to_ast(chap) + + # Serialize AST to JSON + if ast_root + ReVIEW::AST::JSONSerializer.serialize(ast_root, @serializer_options) + else + raise "Failed to generate AST for #{path}" + end + end + end + end +end diff --git a/lib/review/ast/embed_node.rb b/lib/review/ast/embed_node.rb new file mode 100644 index 000000000..ea1c78ba8 --- /dev/null +++ b/lib/review/ast/embed_node.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +require_relative 'leaf_node' + +module ReVIEW + module AST + # EmbedNode is a leaf node that contains embedded content that should be + # passed through to specific builders. It cannot have children. + # + # Attributes: + # - content: The processed content ready for rendering. + # Renderers should use this attribute. + # - embed_type: :block for //embed{}, :raw for //raw{}, :inline for @{}/@{} + # - target_builders: Array of builder names (e.g., ["html", "latex"]), or nil for all builders + class EmbedNode < LeafNode + attr_reader :embed_type, :target_builders + + def initialize(location:, embed_type: :block, target_builders: nil, content: '', **kwargs) + super(location: location, content: content, **kwargs) + @embed_type = embed_type # :block, :inline, or :raw + @target_builders = target_builders # Array of builder names, nil means all builders + end + + # Check if this embed is targeted for a specific builder + def targeted_for?(builder_name) + return true if @target_builders.nil? # No specification means all builders + + @target_builders.include?(builder_name.to_s) + end + + # Override to_h to exclude children array for EmbedNode + def to_h + result = super + result.merge!( + embed_type: embed_type, + target_builders: target_builders, + content: content + ) + + result + end + + # Override serialize_to_hash to exclude children array for EmbedNode + def serialize_to_hash(options = nil) + options ||= ReVIEW::AST::JSONSerializer::Options.new + + # Start with type + hash = { + type: self.class.name.split('::').last + } + + # Include location information + if options.include_location + hash[:location] = location&.to_h + end + + # Call node-specific serialization + serialize_properties(hash, options) + + hash + end + + def self.deserialize_from_hash(hash) + new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + embed_type: hash['embed_type']&.to_sym || :inline, + target_builders: hash['target_builders'], + content: hash['content'] || '' + ) + end + + private + + def serialize_properties(hash, _options) + hash[:embed_type] = embed_type + hash[:target_builders] = target_builders if target_builders + hash[:content] = content if content + hash + end + end + end +end diff --git a/lib/review/ast/exception.rb b/lib/review/ast/exception.rb new file mode 100644 index 000000000..6038df100 --- /dev/null +++ b/lib/review/ast/exception.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +# +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +module ReVIEW + module AST + class InlineTokenizeError < ::ReVIEW::SyntaxError; end + end +end diff --git a/lib/review/ast/footnote_index.rb b/lib/review/ast/footnote_index.rb new file mode 100644 index 000000000..e3a45d1d1 --- /dev/null +++ b/lib/review/ast/footnote_index.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # AST-specific footnote index that properly handles the dual nature + # of footnotes: inline references (@{id}) and block definitions (//footnote[id][content]) + class FootnoteIndex + # Internal footnote entry struct + Entry = Struct.new(:id, :number, :content, :footnote_node) do + def initialize(id, number, content: nil, footnote_node: nil) + super(id, number, content || '', footnote_node) + end + + # Update entry with new information + def update(content: nil, footnote_node: nil) + self.content = content if content && !content.empty? + self.footnote_node = footnote_node if footnote_node + end + + # Check if this entry has an AST footnote node + def footnote_node? + !footnote_node.nil? + end + + # Get caption_node for compatibility with other index items + # For footnotes/endnotes, returns the footnote_node which contains the content nodes + def caption_node + footnote_node + end + end + + def initialize + @entries = {} + @counter = 0 + end + + # Add or update a footnote entry + # This method handles both inline references and block definitions intelligently + def add_or_update(id, content: nil, footnote_node: nil) + if @entries.key?(id) + # Update existing entry with new information + @entries[id].update(content: content, footnote_node: footnote_node) + else + # Create new entry + @counter += 1 + @entries[id] = Entry.new(id, @counter, content: content, footnote_node: footnote_node) + end + + @entries[id] + end + + # Get footnote entry by ID + def [](id) + @entries[id] + end + + # Check if footnote exists + def key?(id) + @entries.key?(id) + end + + # Get footnote number + def number(id) + entry = @entries[id] + entry ? entry.number : nil + end + + # Get all footnote IDs + def keys + @entries.keys + end + + # Get number of footnotes + def size + @entries.size + end + + # Iterate over all entries (for compatibility with Book::Index) + def each(&block) + return enum_for(:each) unless block + + @entries.each_value(&block) + end + + # Map over all entries (for compatibility with Enumerable) + def map(&block) + return enum_for(:map) unless block + + @entries.values.map(&block) + end + + # Convert to traditional Book::FootnoteIndex for compatibility + def to_book_index + book_index = ReVIEW::Book::FootnoteIndex.new + @entries.each_value do |entry| + # Add Entry directly - it has all necessary methods (id, number, content, footnote_node, footnote_node?) + book_index.add_item(entry) + end + book_index + end + end + end +end diff --git a/lib/review/ast/footnote_node.rb b/lib/review/ast/footnote_node.rb new file mode 100644 index 000000000..25fd892a7 --- /dev/null +++ b/lib/review/ast/footnote_node.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'node' + +module ReVIEW + module AST + # FootnoteNode represents a footnote definition in the AST + # + # This node corresponds to the //footnote command in Re:VIEW syntax. + # It stores the footnote ID and the parsed content as children nodes. + # The footnote content is available through the children attribute. + class FootnoteNode < Node + attr_reader :id, :footnote_type + + def initialize(location:, id:, footnote_type: :footnote) + super(location: location) + @id = id + @footnote_type = footnote_type # :footnote or :endnote + end + + # Convert footnote content to plain text (with markup removed) + # + # @return [String] The plain text content without markup + def to_inline_text + children.map(&:to_inline_text).join + end + + # Override to_h to include FootnoteNode-specific attributes + def to_h + result = { + type: self.class.name.split('::').last, + location: location&.to_h, + id: @id, + children: children.map(&:to_h) + } + result[:footnote_type] = @footnote_type.to_s if @footnote_type != :footnote + result + end + + def self.deserialize_from_hash(hash) + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + id: hash['id'], + footnote_type: hash['footnote_type'] ? hash['footnote_type'].to_sym : :footnote + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:id] = @id + hash[:footnote_type] = @footnote_type.to_s if @footnote_type != :footnote + if children.any? + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + end + hash + end + end + end +end diff --git a/lib/review/ast/headline_node.rb b/lib/review/ast/headline_node.rb new file mode 100644 index 000000000..65e846a71 --- /dev/null +++ b/lib/review/ast/headline_node.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require_relative 'node' +require_relative 'caption_node' +require_relative 'captionable' + +module ReVIEW + module AST + class HeadlineNode < Node + include Captionable + + attr_accessor :auto_id + attr_reader :level, :label, :tag + + def initialize(location:, level: nil, label: nil, caption_node: nil, tag: nil, auto_id: nil, **kwargs) + super(location: location, **kwargs) + @level = level + @label = label + @caption_node = caption_node + @tag = tag + @auto_id = auto_id + end + + # Check if headline has specific tag option + def tag?(tag_name) + @tag == tag_name + end + + # Check for specific headline options + def nonum? + tag?('nonum') + end + + def notoc? + tag?('notoc') + end + + def nodisp? + tag?('nodisp') + end + + def to_h + result = super.merge( + level: level, + label: label, + caption_node: caption_node&.to_h, + tag: tag + ) + result[:auto_id] = auto_id if auto_id + result + end + + def self.deserialize_from_hash(hash) + new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + level: hash['level'], + label: hash['label'], + caption_node: deserialize_caption_from_hash(hash) + ) + end + + private + + def serialize_properties(hash, options) + hash[:level] = level + hash[:label] = label + serialize_caption_to_hash(hash, options) + hash[:tag] = tag if tag + hash[:auto_id] = auto_id if auto_id + hash + end + end + end +end diff --git a/lib/review/ast/headline_parser.rb b/lib/review/ast/headline_parser.rb new file mode 100644 index 000000000..0fc1e9973 --- /dev/null +++ b/lib/review/ast/headline_parser.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # HeadlineParser - Parses headline syntax and extracts components + # + # This class is responsible for parsing headline lines and extracting: + # - Level (number of = characters) + # - Tag (e.g., [column], [nonum]) + # - Label (e.g., {label}) + # - Caption text + class HeadlineParser + MAX_HEADLINE_LEVEL = 6 + + # Parse result class with helper methods + class ParseResult + attr_reader :level, :tag, :label, :caption + + def initialize(level:, tag:, label:, caption:) + @level = level + @tag = tag + @label = label + @caption = caption + end + + # Check if this is a column tag + def column? + @tag == 'column' + end + + # Check if this is a closing tag (e.g., /column) + def closing_tag? + @tag&.start_with?('/') + end + + # Get the closing tag name without the leading '/' + # Returns nil if not a closing tag + def closing_tag_name + return nil unless closing_tag? + + @tag[1..-1] + end + end + + # Parse headline line and return components + # + # @param line [String] headline line (e.g., "== [nonum]{label}Caption") + # @param location [SnapshotLocation] location information for error messages + # @return [ParseResult, nil] parsed result or nil if not a headline + def self.parse(line, location:) + new(line, location: location).parse + end + + def initialize(line, location:) + @line = line + @location = location + end + + def parse + level_match = /\A(=+)(?:\[(.+?)\])?/.match(@line) + return nil unless level_match + + level = level_match[1].size + validate_level!(level) + + tag = level_match[2] + remaining = @line[level_match.end(0)..-1].strip + label, caption = extract_label_and_caption(remaining) + + ParseResult.new(level: level, tag: tag, label: label, caption: caption) + end + + private + + def validate_level!(level) + return if level <= MAX_HEADLINE_LEVEL + + error_msg = "Invalid header: max headline level is #{MAX_HEADLINE_LEVEL}" + error_msg += " at line #{@location.lineno}" if @location&.lineno + error_msg += " in #{@location.filename}" if @location&.filename + raise CompileError, error_msg + end + + def extract_label_and_caption(text) + # Check for old syntax: {label} Caption + if text =~ /\A\{([^}]+)\}\s*(.+)/ + return [$1, $2.strip] + end + + # Check for new syntax: Caption{label} - but only if the last {...} is not part of inline markup + if text.match(/\A(.+?)\{([^}]+)\}\s*\z/) && !$1.match?(/@<[^>]+>\s*\z/) + return [$2, $1.strip] + end + + # No label, or label is part of inline markup - treat everything as caption + [nil, text] + end + end + end +end diff --git a/lib/review/ast/image_node.rb b/lib/review/ast/image_node.rb new file mode 100644 index 000000000..81d63a763 --- /dev/null +++ b/lib/review/ast/image_node.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +require_relative 'leaf_node' +require_relative 'caption_node' +require_relative 'captionable' + +module ReVIEW + module AST + class ImageNode < LeafNode + include Captionable + + attr_reader :metric, :image_type + + def initialize(location:, id: nil, caption_node: nil, metric: nil, image_type: :image, content: '', **kwargs) + super(location: location, id: id, content: content, **kwargs) + @caption_node = caption_node + @metric = metric + @image_type = image_type + end + + # Override to_h to include ImageNode-specific attributes + def to_h + result = super + result[:id] = id if id? + result[:caption_node] = caption_node&.to_h if caption_node + result[:metric] = metric if metric + result[:image_type] = image_type + result + end + + # Override serialize_to_hash to include ImageNode-specific attributes + def serialize_to_hash(options = nil) + options ||= ReVIEW::AST::JSONSerializer::Options.new + + # Start with type + hash = { + type: self.class.name.split('::').last + } + + # Include location information + if options.include_location + hash[:location] = location&.to_h + end + + # Call node-specific serialization + serialize_properties(hash, options) + + # LeafNode automatically excludes children + hash + end + + def self.deserialize_from_hash(hash) + new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + id: hash['id'], + caption_node: deserialize_caption_from_hash(hash), + metric: hash['metric'], + image_type: hash['image_type']&.to_sym || :image, + content: hash['content'] || '' + ) + end + + private + + def serialize_properties(hash, options) + hash[:id] = id if id? + serialize_caption_to_hash(hash, options) + hash[:metric] = metric if metric + hash[:image_type] = image_type + hash[:content] = content if content && !content.empty? + hash + end + end + end +end diff --git a/lib/review/ast/indexer.rb b/lib/review/ast/indexer.rb new file mode 100644 index 000000000..5215e376c --- /dev/null +++ b/lib/review/ast/indexer.rb @@ -0,0 +1,496 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/book/index' +require 'review/exception' +require 'review/sec_counter' +require_relative 'footnote_node' +require_relative 'footnote_index' +require_relative 'headline_node' +require_relative 'column_node' +require_relative 'minicolumn_node' +require_relative 'code_block_node' +require_relative 'image_node' +require_relative 'table_node' +require_relative 'embed_node' +require_relative 'tex_equation_node' +require_relative 'block_node' +require_relative 'inline_node' +require_relative 'visitor' + +module ReVIEW + module AST + # Indexer - AST-based index building for Re:VIEW documents + # + # This class provides high-performance index building by directly analyzing + # AST structures instead of going through Builder classes. It generates + # the same index structures as IndexBuilder for compatibility. + # + # Features: + # - AST node traversal using Visitor pattern + # - Compatible with existing IndexBuilder output + # - High-performance processing without Builder overhead + # - Comprehensive index support (lists, tables, images, headlines, etc.) + class Indexer < Visitor + attr_reader :list_index, :table_index, :equation_index, + :footnote_index, :endnote_index, + :numberless_image_index, :image_index, :icon_index, :indepimage_index, + :headline_index, :column_index, :bibpaper_index + + def initialize(chapter) + super() + @chapter = chapter + @book = chapter.book + initialize_indexes + initialize_counters + end + + # Main index building method + # Traverses the AST and builds all indexes + def build_indexes(ast_root) + return self unless ast_root + + # Extract and set chapter title from first level-1 headline (for Markdown files) + extract_and_set_chapter_title(ast_root) + + visit(ast_root) + + set_indexes_on_chapter + + self + end + + # Get all indexes as a hash (for compatibility) + def indexes + { + list: @list_index, + table: @table_index, + equation: @equation_index, + footnote: @footnote_index.to_book_index, + endnote: @endnote_index.to_book_index, + image: @image_index, + icon: @icon_index, + numberless_image: @numberless_image_index, + indepimage: @indepimage_index, + headline: @headline_index, + column: @column_index, + bibpaper: @bibpaper_index + } + end + + # Find index by type name (type-safe alternative to send) + def index_for(type) + case type.to_sym + when :list then @list_index + when :table then @table_index + when :equation then @equation_index + when :footnote then @footnote_index + when :endnote then @endnote_index + when :image then @image_index + when :icon then @icon_index + when :numberless_image then @numberless_image_index + when :indepimage then @indepimage_index + when :headline then @headline_index + when :column then @column_index + when :bibpaper then @bibpaper_index + else + raise ArgumentError, "Unknown index type: #{type}" + end + end + + # Extract and set chapter title from first level-1 headline + # This is particularly important for Markdown files where chapter.title is empty + def extract_and_set_chapter_title(ast_root) + # Skip if chapter already has a title (from Re:VIEW format parsing) + return if @chapter.title && !@chapter.title.empty? + + # Find first level-1 headline + headline = find_first_headline(ast_root, level: 1) + return unless headline + + # Extract text from caption node + title = extract_text_from_caption(headline.caption_node) + @chapter.instance_variable_set(:@title, title) if title && !title.empty? + end + + private + + def visit_caption_children(node) + visit_all(node.caption_node.children) if node.caption_node + end + + # Set indexes on chapter using public API + def set_indexes_on_chapter + @chapter.ast_indexes = { + footnote_index: @footnote_index, + endnote_index: @endnote_index, + list_index: @list_index, + table_index: @table_index, + equation_index: @equation_index, + image_index: @image_index, + icon_index: @icon_index, + numberless_image_index: @numberless_image_index, + indepimage_index: @indepimage_index, + headline_index: @headline_index, + column_index: @column_index, + bibpaper_index: @bibpaper_index + } + end + + # Extract footnote content from FootnoteNode + def extract_footnote_content(node) + node.to_inline_text + end + + def initialize_indexes + @list_index = ReVIEW::Book::ListIndex.new + @table_index = ReVIEW::Book::TableIndex.new + @equation_index = ReVIEW::Book::EquationIndex.new + @footnote_index = AST::FootnoteIndex.new + @endnote_index = AST::FootnoteIndex.new + @headline_index = ReVIEW::Book::HeadlineIndex.new(@chapter) + @column_index = ReVIEW::Book::ColumnIndex.new + @chapter_index = ReVIEW::Book::ChapterIndex.new + @bibpaper_index = ReVIEW::Book::BibpaperIndex.new + + @image_index = ReVIEW::Book::ImageIndex.new(@chapter) + @icon_index = ReVIEW::Book::IconIndex.new(@chapter) + unless @book + # Create basic indexes even without book for testing + end + @numberless_image_index = ReVIEW::Book::NumberlessImageIndex.new(@chapter) + @indepimage_index = ReVIEW::Book::IndepImageIndex.new(@chapter) + end + + def initialize_counters + @sec_counter = ReVIEW::SecCounter.new(6, @chapter) # 6 is max level + + # Initialize cross-reference tracking like IndexBuilder + @headline_stack = [] + @crossref = { + footnote: {}, + endnote: {} + } + end + + def visit_document(node) + visit_all(node.children) + end + + def visit_paragraph(node) + visit_all(node.children) + end + + def visit_text(node) + # Text nodes have no children and don't contribute to indexes + end + + def visit_list(node) + visit_all(node.children) + end + + def visit_list_item(node) + visit_all(node.term_children) if node.term_children&.any? + visit_all(node.children) + end + + def visit_caption(node) + visit_all(node.children) + end + + def visit_code_line(node) + visit_all(node.children) + end + + def visit_table_row(node) + visit_all(node.children) + end + + def visit_table_cell(node) + visit_all(node.children) + end + + def visit_reference(node) + visit_all(node.children) + end + + def visit_headline(node) + check_id(node.label) + @sec_counter.inc(node.level) + + if node.level >= 2 + # Build item_id exactly like IndexBuilder + cursor = node.level - 2 + @headline_stack ||= [] + caption_text = extract_caption_text(node.caption_node) + @headline_stack[cursor] = (node.label || caption_text) + if @headline_stack.size > cursor + 1 + @headline_stack = @headline_stack.take(cursor + 1) + end + + item_id = @headline_stack.join('|') + + # Always add to headline index like IndexBuilder does + item = ReVIEW::Book::Index::Item.new(item_id, @sec_counter.number_list, caption_text, caption_node: node.caption_node) + @headline_index.add_item(item) + + visit_caption_children(node) + end + + visit_all(node.children) + end + + def visit_column(node) + # Extract caption text like IndexBuilder does + caption_text = extract_caption_text(node.caption_node) + + # Use label if available, otherwise use caption as ID (like IndexBuilder does) + item_id = node.label || caption_text + + check_id(node.label) if node.label + + item = ReVIEW::Book::Index::Item.new(item_id, @column_index.size + 1, caption_text, caption_node: node.caption_node) + @column_index.add_item(item) + + visit_caption_children(node) + visit_all(node.children) + end + + def visit_code_block(node) + if node.id? + check_id(node.id) + item = ReVIEW::Book::Index::Item.new(node.id, @list_index.size + 1) + @list_index.add_item(item) + + visit_caption_children(node) + end + + visit_all(node.children) + end + + def visit_table(node) + if node.id? + check_id(node.id) + caption_text = extract_caption_text(node.caption_node) + item = ReVIEW::Book::Index::Item.new(node.id, @table_index.size + 1, caption_text, caption_node: node.caption_node) + @table_index.add_item(item) + + # For imgtable, also add to indepimage_index (like IndexBuilder does) + if node.table_type == :imgtable + image_item = ReVIEW::Book::Index::Item.new(node.id, @indepimage_index.size + 1) + @indepimage_index.add_item(image_item) + end + + visit_caption_children(node) + end + + visit_all(node.children) + end + + def visit_image(node) + if node.id? + check_id(node.id) + caption_text = extract_caption_text(node.caption_node) + item = ReVIEW::Book::Index::Item.new(node.id, @image_index.size + 1, caption_text, caption_node: node.caption_node) + @image_index.add_item(item) + + visit_caption_children(node) + end + + visit_all(node.children) + end + + def visit_minicolumn(node) + # Minicolumns are typically indexed by their type and content + visit_caption_children(node) + + visit_all(node.children) + end + + def visit_embed(node) + case node.embed_type + when :block + # Embed blocks contain raw content that shouldn't be processed for inline elements + # since it's meant to be output as-is for specific formats + end + + visit_all(node.children) + end + + def visit_footnote(node) + check_id(node.id) + + footnote_content = extract_footnote_content(node) + + if node.footnote_type == :footnote + @crossref[:footnote][node.id] ||= 0 + @footnote_index.add_or_update(node.id, content: footnote_content, footnote_node: node) + elsif node.footnote_type == :endnote + @crossref[:endnote][node.id] ||= 0 + @endnote_index.add_or_update(node.id, content: footnote_content, footnote_node: node) + end + + visit_all(node.children) + end + + def visit_tex_equation(node) + if node.id? + check_id(node.id) + caption_text = extract_caption_text(node.caption_node) + item = ReVIEW::Book::Index::Item.new(node.id, @equation_index.size + 1, caption_text, caption_node: node.caption_node) + @equation_index.add_item(item) + end + + visit_all(node.children) + end + + def visit_block(node) + if node.block_type + case node.block_type.to_s + when 'bibpaper' + if node.args.length >= 2 + bib_id = node.args[0] + bib_caption = node.args[1] + check_id(bib_id) + item = ReVIEW::Book::Index::Item.new(bib_id, @bibpaper_index.size + 1, bib_caption, caption_node: node.caption_node) + @bibpaper_index.add_item(item) + end + end + end + + visit_caption_children(node) + visit_all(node.children) + end + + def visit_inline(node) + case node.inline_type + when :fn + if node.args.first + footnote_id = node.args.first + check_id(footnote_id) + # Track cross-reference + @crossref[:footnote][footnote_id] = @crossref[:footnote][footnote_id] ? @crossref[:footnote][footnote_id] + 1 : 1 + # Add reference entry (content will be filled when FootnoteNode is processed) + @footnote_index.add_or_update(footnote_id) + end + when :endnote + if node.args.first + endnote_id = node.args.first + check_id(endnote_id) + # Track cross-reference + @crossref[:endnote][endnote_id] = @crossref[:endnote][endnote_id] ? @crossref[:endnote][endnote_id] + 1 : 1 + # Add reference entry (content will be filled when FootnoteNode is processed) + @endnote_index.add_or_update(endnote_id) + end + when :bib + if node.args.first + bib_id = node.args.first + check_id(bib_id) + # Add to index if not already present (for compatibility with tests and IndexBuilder behavior) + unless @bibpaper_index.key?(bib_id) + item = ReVIEW::Book::Index::Item.new(bib_id, @bibpaper_index.size + 1) + @bibpaper_index.add_item(item) + end + end + when :eq + if node.args.first + eq_id = node.args.first + check_id(eq_id) + end + when :img + # Image references are handled when the actual image blocks are processed + # No special processing needed for inline image references + when :icon + if node.args.first + icon_id = node.args.first + check_id(icon_id) + # Add icon to index if not already present + unless @icon_index.key?(icon_id) + item = ReVIEW::Book::Index::Item.new(icon_id, @icon_index.size + 1) + @icon_index.add_item(item) + end + end + when :list, :table + # These are references, already processed in their respective nodes + end + + visit_all(node.children) + end + + # Find first headline node with specified level + # @param node [Node] The node to search + # @param level [Integer] The headline level to find + # @return [HeadlineNode, nil] The found headline or nil + def find_first_headline(node, level:) + return node if node.is_a?(HeadlineNode) && node.level == level + + return nil unless node.respond_to?(:children) + + node.children.each do |child| + result = find_first_headline(child, level: level) + return result if result + end + + nil + end + + # Extract plain text from caption node + # @param caption_node [CaptionNode, nil] The caption node + # @return [String] The extracted text + def extract_text_from_caption(caption_node) + return '' unless caption_node + + result = +'' + extract_text_recursive(caption_node, result) + result + end + + # Recursively extract text from node and its children + # @param node [Node] The node to extract text from + # @param result [String] The accumulator string + def extract_text_recursive(node, result) + if node.is_a?(TextNode) + result << node.content + elsif node.respond_to?(:children) + node.children.each { |child| extract_text_recursive(child, result) } + end + end + + # Extract plain text from caption node + def extract_caption_text(caption_node) + caption_node&.to_inline_text || '' + end + + # Extract text content from inline nodes + def extract_inline_text(inline_node) + inline_node.children.map { |child| child.leaf_node? ? child.content : child.to_s }.join + end + + # ID validation (same as IndexBuilder) + def check_id(id) + if id + # Check for various deprecated characters + id.scan(%r![#%\\{}\[\]~/$'"|*?&<>`\s]!) do |char| + warn "deprecated ID: `#{char}` in `#{id}`" + end + + if id.start_with?('.') + warn "deprecated ID: `#{id}` begins from `.`" + end + end + end + + # Warning output + def warn(message) + # For now, just output to stderr + # In a real implementation, this should use the proper logging system + $stderr.puts "WARNING: #{message}" + end + end + end +end diff --git a/lib/review/ast/inline_node.rb b/lib/review/ast/inline_node.rb new file mode 100644 index 000000000..a67945285 --- /dev/null +++ b/lib/review/ast/inline_node.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require_relative 'node' + +module ReVIEW + module AST + class InlineNode < Node + attr_reader :inline_type, :args, :target_chapter_id, :target_item_id + + def initialize(location:, inline_type: nil, args: nil, + target_chapter_id: nil, target_item_id: nil, **kwargs) + super(location: location, **kwargs) + @inline_type = inline_type + @args = args || [] + @target_chapter_id = target_chapter_id + @target_item_id = target_item_id + end + + def to_h + super.merge( + inline_type: inline_type, + args: args, + target_chapter_id: target_chapter_id, + target_item_id: target_item_id + ) + end + + def cross_chapter_reference? + !target_chapter_id.nil? + end + + # Convert inline node to inline text representation (text without markup). + # InlineNode recursively processes all child nodes and joins their text. + # + # @return [String] The text content without markup + def to_inline_text + children.map(&:to_inline_text).join + end + + def self.deserialize_from_hash(hash) + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + inline_type: hash['element'] || hash['inline_type'], + args: hash['args'] || [] + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + hash[:inline_type] = inline_type + hash[:args] = args + hash[:target_chapter_id] = target_chapter_id if target_chapter_id + hash[:target_item_id] = target_item_id if target_item_id + hash + end + end + end +end diff --git a/lib/review/ast/inline_processor.rb b/lib/review/ast/inline_processor.rb new file mode 100644 index 000000000..a56eb00f7 --- /dev/null +++ b/lib/review/ast/inline_processor.rb @@ -0,0 +1,345 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast' +require_relative 'inline_tokenizer' +require_relative 'reference_node' +require_relative 'raw_content_parser' + +module ReVIEW + module AST + # InlineProcessor - Inline element parsing and AST node creation + # + # This class handles the complex parsing of Re:VIEW inline elements + # and converts them to appropriate AST nodes with proper nesting. + # + # Responsibilities: + # - Parse inline markup (@{content}) within text + # - Create appropriate AST nodes for different inline types + # - Handle nested inline elements + # - Process specialized inline formats (ruby, href, kw, etc.) + class InlineProcessor + # Default mapping of inline commands to handler methods + DEFAULT_INLINE_HANDLERS = { + embed: :create_inline_embed_ast_node, + ruby: :create_inline_ruby_ast_node, + href: :create_inline_href_ast_node, + kw: :create_inline_kw_ast_node, + img: :create_inline_ref_ast_node, + imgref: :create_inline_ref_ast_node, + list: :create_inline_ref_ast_node, + table: :create_inline_ref_ast_node, + eq: :create_inline_ref_ast_node, + fn: :create_inline_ref_ast_node, + endnote: :create_inline_ref_ast_node, + column: :create_inline_ref_ast_node, + w: :create_inline_ref_ast_node, + wb: :create_inline_ref_ast_node, + bib: :create_inline_ref_ast_node, + bibref: :create_inline_ref_ast_node, + hd: :create_inline_cross_ref_ast_node, + chap: :create_inline_cross_ref_ast_node, + chapref: :create_inline_cross_ref_ast_node, + title: :create_inline_cross_ref_ast_node, + sec: :create_inline_cross_ref_ast_node, + secref: :create_inline_cross_ref_ast_node, + sectitle: :create_inline_cross_ref_ast_node, + labelref: :create_inline_cross_ref_ast_node, + ref: :create_inline_cross_ref_ast_node, + raw: :create_inline_raw_ast_node + }.freeze + + def initialize(ast_compiler) + @ast_compiler = ast_compiler + @tokenizer = InlineTokenizer.new + # Copy the static table to allow runtime modifications + @inline_handlers = DEFAULT_INLINE_HANDLERS.dup + end + + # Parse inline elements and create AST nodes + def parse_inline_elements(str, parent_node) + return if str.empty? + + # Use tokenizer to parse both fence syntax (@$...$, @|...|) and brace syntax (@{...}) + tokens = @tokenizer.tokenize(str, location: @ast_compiler.location) + + tokens.each do |token| + if token.type == :inline + create_inline_ast_node_from_token(token, parent_node) + else + # Plain text + unless token.content.empty? + text_node = AST::TextNode.new( + location: @ast_compiler.location, + content: token.content + ) + parent_node.add_child(text_node) + end + end + end + end + + # Register a new inline command handler + # @param command [Symbol] The inline command name (e.g., :custom) + # @param handler_method [Symbol] The method name to handle this command + # @example + # processor.register_inline_handler(:custom, :create_inline_custom_ast_node) + def register_inline_handler(command, handler_method) + @inline_handlers[command.to_sym] = handler_method + end + + # @return [Array] List of all registered inline commands + def registered_inline_commands + @inline_handlers.keys + end + + private + + # Create inline AST node from parsed token + def create_inline_ast_node_from_token(token, parent_node) + command = token.command.to_sym + content = token.content + + # Look up handler method from dynamic registry + handler_method = @inline_handlers[command] + + if handler_method + # Call registered handler + # ref_ast_node and cross_ref_ast_node need command as first argument (ref_type) + # Others just need content and parent_node + if handler_method == :create_inline_ref_ast_node || handler_method == :create_inline_cross_ref_ast_node + send(handler_method, command, content, parent_node) + else + send(handler_method, content, parent_node) + end + else + # Default handler for unknown inline commands + create_standard_inline_node(command, content, parent_node) + end + end + + # Create standard inline node (default handler for unknown commands) + def create_standard_inline_node(command, content, parent_node) + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: command, + args: [content] + ) + + # Handle nested inline elements in the content + if content.include?('@<') + parse_inline_elements(content, inline_node) + else + # Simple text content + text_node = AST::TextNode.new( + location: @ast_compiler.location, + content: content + ) + inline_node.add_child(text_node) + end + + parent_node.add_child(inline_node) + end + + # Create inline embed AST node + def create_inline_embed_ast_node(arg, parent_node) + target_builders, embed_content = RawContentParser.parse(arg) + + node = AST::EmbedNode.new( + location: @ast_compiler.location, + embed_type: :inline, + target_builders: target_builders, + content: embed_content || '' + ) + parent_node.add_child(node) + end + + # Create inline ruby AST node + def create_inline_ruby_ast_node(arg, parent_node) + # Parse ruby format: "base_text,ruby_text" + if arg.include?(',') + base_text, ruby_text = arg.split(',', 2) + args = [base_text.strip, ruby_text.strip] + + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: :ruby, + args: args + ) + + # Add text nodes for both parts + parent_text = AST::TextNode.new( + location: @ast_compiler.location, + content: base_text.strip + ) + inline_node.add_child(parent_text) + + ruby_text = AST::TextNode.new( + location: @ast_compiler.location, + content: ruby_text.strip + ) + inline_node.add_child(ruby_text) + else + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: :ruby, + args: [arg] + ) + + text_node = AST::TextNode.new( + location: @ast_compiler.location, + content: arg + ) + inline_node.add_child(text_node) + end + + parent_node.add_child(inline_node) + end + + # Create inline href AST node + def create_inline_href_ast_node(arg, parent_node) + # Parse href format: "URL" or "URL, display_text" + args, text_content = if arg.include?(',') + parts = arg.split(',', 2) + [[parts[0].strip, parts[1].strip], parts[1].strip] # Display text + else + [[arg], arg] # URL as display text + end + + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: :href, + args: args + ) + + text_node = AST::TextNode.new( + location: @ast_compiler.location, + content: text_content + ) + inline_node.add_child(text_node) + + parent_node.add_child(inline_node) + end + + # Create inline kw AST node + def create_inline_kw_ast_node(arg, parent_node) + # Parse kw format: "keyword" or "keyword, supplement" + if arg.include?(',') + parts = arg.split(',', 2) + args = [parts[0].strip, parts[1].strip] + + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: :kw, + args: args + ) + + # Add text nodes for both parts + main_text = AST::TextNode.new( + location: @ast_compiler.location, + content: parts[0].strip + ) + inline_node.add_child(main_text) + + supplement_text = AST::TextNode.new( + location: @ast_compiler.location, + content: parts[1].strip + ) + inline_node.add_child(supplement_text) + else + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: :kw, + args: [arg] + ) + + text_node = AST::TextNode.new( + location: @ast_compiler.location, + content: arg + ) + inline_node.add_child(text_node) + end + + parent_node.add_child(inline_node) + end + + # Create inline reference AST node (for img, list, table, eq, fn, endnote) + def create_inline_ref_ast_node(ref_type, arg, parent_node) + # Parse reference format: "ID" or "chapter_id|ID" + if arg.include?('|') + parts = arg.split('|', 2) + chapter_id = parts[0].strip + item_id = parts[1].strip + reference_node = AST::ReferenceNode.new(item_id, chapter_id, location: @ast_compiler.location) + args = [chapter_id, item_id] + else + chapter_id = nil + item_id = arg + reference_node = AST::ReferenceNode.new(item_id, nil, location: @ast_compiler.location) + args = [arg] + end + + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: ref_type, + args: args, + target_chapter_id: chapter_id, + target_item_id: item_id + ) + + inline_node.add_child(reference_node) + + parent_node.add_child(inline_node) + end + + # Create inline cross-reference AST node (for chap, chapref, sec, secref, sectitle, labelref, ref) + def create_inline_cross_ref_ast_node(ref_type, arg, parent_node) + # Handle special case for hd, sec, secref, and sectitle which support pipe-separated format + if %i[hd sec secref sectitle].include?(ref_type.to_sym) && arg.include?('|') + parts = arg.split('|', 2) + chapter_id = parts[0].strip + item_id = parts[1].strip + reference_node = AST::ReferenceNode.new(item_id, chapter_id, location: @ast_compiler.location) + args = [chapter_id, item_id] + else + # Standard cross-references with single ID argument + chapter_id = nil + item_id = arg + reference_node = AST::ReferenceNode.new(item_id, nil, location: @ast_compiler.location) + args = [arg] + end + + inline_node = AST::InlineNode.new( + location: @ast_compiler.location, + inline_type: ref_type, + args: args, + target_chapter_id: chapter_id, + target_item_id: item_id + ) + + inline_node.add_child(reference_node) + + parent_node.add_child(inline_node) + end + + # Create inline raw AST node (@ command) + def create_inline_raw_ast_node(content, parent_node) + target_builders, processed_content = RawContentParser.parse(content) + + embed_node = AST::EmbedNode.new( + location: @ast_compiler.location, + embed_type: :inline, + target_builders: target_builders, + content: processed_content || '' + ) + + parent_node.add_child(embed_node) + end + end + end +end diff --git a/lib/review/ast/inline_tokenizer.rb b/lib/review/ast/inline_tokenizer.rb new file mode 100644 index 000000000..db45b80cb --- /dev/null +++ b/lib/review/ast/inline_tokenizer.rb @@ -0,0 +1,357 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/exception' + +module ReVIEW + module AST + # Token classes using Struct for immutable, structured tokens + + # Text token for plain text content + TextToken = Struct.new(:content, keyword_init: true) do + def type + :text + end + end + + # Inline element token for @{content} syntax + InlineToken = Struct.new(:command, :content, :start_pos, :end_pos, keyword_init: true) do + def type + :inline + end + end + + # InlineTokenizer - Tokenizes inline markup syntax into structured tokens + # + # This class handles the parsing of Re:VIEW inline markup syntax and converts + # it into a sequence of tokens that can be processed by the InlineProcessor. + # It supports both brace syntax (@{...}) and fence syntax (@$...$, @|...|). + # + # ## Supported Inline Element Syntax + # + # ### Brace Syntax + # - @{content} - Basic inline element with braces + # - @{} - Empty content is allowed + # + # ### Fence Syntax + # - @$content$ - Dollar-delimited fence syntax + # - @|content| - Pipe-delimited fence syntax + # + # ## Command Name Rules + # - Only ASCII lowercase letters [a-z] are allowed + # - Cannot be empty + # - Cannot start with numbers or contain uppercase/symbols + # + # ## Escape Sequence Rules (Consistent Across All Inline Elements) + # + # The tokenizer implements consistent escape rules for all inline element types + # (@, @, @, etc.) regardless of their semantic meaning: + # + # ### Supported Escape Sequences + # - `\}` → `}` - Escape closing brace to include literal brace in content + # - `\\` → `\` - Escape backslash to include literal backslash in content + # - `\@` → `@` - Escape at-sign to include literal at-sign in content + # - `\{` → `\{` - Opening brace is NOT escaped (preserved as-is) + # - `\x` → `\x` - Other characters after backslash are preserved as-is + # + # ### Termination Rules + # - Brace elements: Terminated by the FIRST unescaped `}` character + # - Fence elements: Terminated by the matching fence delimiter + # - Line breaks are not allowed within inline elements + # + # ### No Automatic Brace Balancing + # - The tokenizer does NOT perform automatic brace balancing + # - Nested braces must be properly escaped using `\}` when they should be literal + # - This ensures consistent behavior across all inline element types + # + # ## Usage Examples + # + # ### LaTeX Math (all braces must be escaped) + # ``` + # @{\sum_{i=1\}^{n\} x_i} # Correct: produces \sum_{i=1}^{n} x_i + # @{\sum_{i=1}^{n} x_i} # Wrong: terminates at first } + # ``` + # + # ### Unbalanced Code (escape literal braces) + # ``` + # @{if (x > 0) \{ print("positive")} # Correct: literal { in output + # @{array[0\]} # Correct: literal } in output + # ``` + # + # ### JSON Strings (escape all braces) + # ``` + # @{JSON.parse("\{\"key\": \"value\"\}")} # Correct: all braces escaped + # ``` + # + # ## Error Handling + # - Unclosed inline elements raise InlineTokenizeError with location info + # - Invalid command names raise InlineTokenizeError + # - Line breaks within elements raise InlineTokenizeError + # - Nested fence syntax raises InlineTokenizeError for clarity + # + # Responsibilities: + # - Parse inline markup strings into tokens + # - Apply consistent escape sequence rules + # - Support multiple delimiter types (braces, fences) + # - Maintain position tracking for error reporting + # - Enforce consistent termination rules for all element types + class InlineTokenizer + # Tokenize string into inline elements and text parts + # @param str [String] The input string to tokenize + # @param location [SnapshotLocation] Current file location for error reporting + # @return [Array] Array of Token objects (TextToken or InlineToken) + def tokenize(str, location: nil) + @location = location + tokens = [] + pos = 0 + + while pos < str.length + # Look for any @<...> pattern first to catch invalid command names + match = str.match(/@<([^>]*)>([{$|])/, pos) + + if match + # Add text before the match as plain text token + if match.begin(0) > pos + text_content = str[pos...match.begin(0)] + tokens << TextToken.new(content: text_content) unless text_content.empty? + end + + # Validate command name - only ASCII lowercase letters allowed + command = match[1] + if command.empty? + raise ReVIEW::AST::InlineTokenizeError, "Invalid command name '#{command}': command name cannot be empty" + elsif !command.match(/\A[a-z]+\z/) + raise ReVIEW::AST::InlineTokenizeError, "Invalid command name '#{command}': only ASCII lowercase letters are allowed" + end + + # Parse the inline element + inline_token = parse_inline_element_at(str, match.begin(0)) + if inline_token + tokens << inline_token + pos = inline_token.end_pos + else + # Failed to parse as inline element, treat as text + tokens << TextToken.new(content: match[0]) + pos = match.end(0) + end + else + # No more inline elements, add remaining text + remaining_text = str[pos..-1] + tokens << TextToken.new(content: remaining_text) unless remaining_text.empty? + break + end + end + + tokens + end + + private + + # Parse inline element at specific position + def parse_inline_element_at(str, start_pos) + # Match @ part from the specified position - only ASCII lowercase letters allowed + substring = str[start_pos..-1] + command_match = substring.match(/\A@<([a-z]+)>([{$|])/) + return nil unless command_match + + command = command_match[1] + + # Command name validation is now enforced by the regex pattern + # Only ASCII lowercase letters [a-z] are allowed + + delimiter = command_match[2] + content_start = start_pos + command_match[0].length + + # Find matching closing delimiter + case delimiter + when '{' + content, end_pos = parse_brace_content(str, content_start, start_pos) + when '$', '|' + content, end_pos = parse_fence_content(str, content_start, delimiter, start_pos) + else + return nil + end + + return nil unless content && end_pos + + InlineToken.new( + command: command, + content: content, + start_pos: start_pos, + end_pos: end_pos + ) + end + + # Parse content within braces with consistent escape rules + # + # This method implements the core escape sequence processing for brace syntax. + # It processes characters sequentially until the first unescaped '}' is found. + # + # ## Escape Processing Rules + # - `\}` → `}` (escaped closing brace becomes literal) + # - `\\` → `\` (escaped backslash becomes literal) + # - `\@` → `@` (escaped at-sign becomes literal) + # - `\{` → `\{` (opening brace preserved as-is, not escaped) + # - `\x` → `\x` (other chars after backslash preserved as-is) + # + # ## Termination + # - Always terminates at the FIRST unescaped `}` character + # - Does NOT perform automatic brace balancing + # - Line breaks within content raise an error + # + # @param str [String] The input string being parsed + # @param start_pos [Integer] Position after the opening '{' + # @param element_start [Integer] Position of the '@<' for error reporting + # @return [Array] Content and end position, or raises error + def parse_brace_content(str, start_pos, element_start = nil) + content = '' + pos = start_pos + + # Use provided element_start or calculate it + element_start ||= start_pos - 5 # fallback estimate + + while pos < str.length + char = str[pos] + + case char + when "\n", "\r" + # Line breaks are not allowed within inline elements + error_msg = 'Line breaks are not allowed within inline elements' + error_msg += format_location_info_simple(str, element_start) + raise ReVIEW::AST::InlineTokenizeError, error_msg + when '\\' + # Handle escaped character - implements consistent escape rules + if pos + 1 < str.length + next_char = str[pos + 1] + content += case next_char + when '}' + # \} → } : Escape closing brace (allows literal } in content) + '}' + when '\\' + # \\ → \ : Escape backslash (allows literal \ in content) + '\\' + when '@' + # \@ → @ : Escape at-sign (allows literal @ in content) + '@' + else + # \x → \x : Other characters are NOT escaped (preserve as-is) + # This includes \{ which remains \{ (opening brace not escaped) + char + next_char + end + pos += 2 + else + # Backslash at end of string - preserve as-is + content += char + pos += 1 + end + when '}' + # First unescaped } terminates the inline element (consistent termination rule) + # No brace balancing is performed - this ensures predictable behavior + return [content, pos + 1] + else + # Regular character - add to content as-is + content += char + pos += 1 + end + end + + # If we reach here, no closing brace was found (reached end of string) + error_msg = 'Unclosed inline element braces' + error_msg += format_location_info_simple(str, element_start) + raise ReVIEW::AST::InlineTokenizeError, error_msg + end + + # Parse content within fence delimiters + def parse_fence_content(str, start_pos, delimiter, element_start = nil) + # Use provided element_start or calculate it + element_start ||= start_pos - 5 # fallback estimate + + end_pos = str.index(delimiter, start_pos) + unless end_pos + error_msg = 'Unclosed inline element fence' + error_msg += format_location_info_simple(str, element_start) + raise ReVIEW::AST::InlineTokenizeError, error_msg + end + + content = str[start_pos...end_pos] + + # Check for line breaks in fence content + if content.include?("\n") || content.include?("\r") + error_msg = 'Line breaks are not allowed within inline elements' + error_msg += format_location_info_simple(str, element_start) + raise ReVIEW::AST::InlineTokenizeError, error_msg + end + + # Check for nested fence syntax which can be confusing + if /@<[a-z]+>[{$|]/.match?(content) + error_msg = 'Nested inline elements within fence syntax are not allowed' + error_msg += format_location_info_simple(str, element_start) + raise ReVIEW::AST::InlineTokenizeError, error_msg + end + + [content, end_pos + 1] + end + + # Extract a preview of the problematic element for error display + def extract_element_preview(str, start_pos) + # Start from the @< position + preview_start = start_pos + + # Find the end of the element or a reasonable preview length + max_preview_length = 50 + preview_end = [start_pos + max_preview_length, str.length].min + + # For fence elements, look for matching delimiters beyond the opening one + matched = /\A@<[a-z]+>([$|])/.match(str[start_pos..-1]) + if matched + delimiter = matched[0] + delimiter_pos = start_pos + matched.end(0) - 1 + + # Look for the closing delimiter + closing_pos = str.index(delimiter, delimiter_pos + 1) + preview_end = if closing_pos && closing_pos <= start_pos + max_preview_length + # Found a proper closing delimiter within reasonable range + closing_pos + 1 + else + # No closing delimiter found or too far - show more content + [start_pos + max_preview_length, str.length].min + end + else + # For brace elements, look for the closing brace + brace_pos = str.index('}', start_pos + 1) + if brace_pos && brace_pos <= start_pos + max_preview_length + preview_end = brace_pos + 1 + end + end + + preview = str[preview_start...preview_end] + + # Add ellipsis if we truncated and don't end with a delimiter + if preview_end < str.length && !preview.match?(/[}$|]\z/) + preview += '...' + end + + preview + end + + # Simple format for location info when called from tokenize method + def format_location_info_simple(str, element_pos) + info = '' + + # Add element information + element_preview = extract_element_preview(str, element_pos) + info += " in element: #{element_preview}" + + # Add file location if available + info += @location.format_for_error if @location + + info + end + end + end +end diff --git a/lib/review/ast/json_serializer.rb b/lib/review/ast/json_serializer.rb new file mode 100644 index 000000000..b63567907 --- /dev/null +++ b/lib/review/ast/json_serializer.rb @@ -0,0 +1,144 @@ +# frozen_string_literal: true + +require 'json' + +module ReVIEW + module AST + module JSONSerializer + # Options for JSON serialization + class Options + attr_accessor :pretty, :include_location, :indent + + def initialize(pretty: true, include_location: true) + @pretty = pretty + @include_location = include_location + @indent = ' ' + end + end + + module_function + + # Serialize AST node to JSON + def serialize(node, options = Options.new) + hash = serialize_to_hash(node, options) + if options.pretty + JSON.pretty_generate(hash, indent: options.indent) + else + JSON.generate(hash) + end + end + + # Serialize AST node to Hash + def serialize_to_hash(node, options = Options.new) + case node + when Array + node.map { |item| serialize_to_hash(item, options) } + when Hash + node.transform_values { |value| serialize_to_hash(value, options) } + when ReVIEW::AST::Node + # Delegate to the node's own serialization method + node.serialize_to_hash(options) + else + node + end + end + + # Deserialize JSON string to AST nodes + def deserialize(json_string) + hash = JSON.parse(json_string) + deserialize_from_hash(hash) + end + + def deserialize_caption_fields(hash) + caption_value = hash['caption'] + caption_node_value = hash['caption_node'] + + caption_node = if caption_node_value + deserialize_from_hash(caption_node_value) + elsif caption_value.is_a?(Hash) || caption_value.is_a?(Array) + deserialize_from_hash(caption_value) + end + + caption_string = caption_value.is_a?(String) ? caption_value : nil + + [caption_string, caption_node] + end + + # Helper method to create location from hash or use a default + def restore_location(hash) + location_data = hash['location'] + return nil unless location_data && location_data.is_a?(Hash) + + filename = location_data['filename'] + lineno = location_data['lineno'] + return nil unless filename && lineno + + ReVIEW::SnapshotLocation.new(filename, lineno) + end + + # Deserialize hash to AST node + def deserialize_from_hash(hash) + return nil unless hash + + case hash + when Array + hash.map { |item| deserialize_from_hash(item) } + when String + # Plain string is treated as text content + hash + when Hash + node_type = hash['type'] + return hash.to_s unless node_type + + # Check if the node class exists + begin + node_class = ReVIEW::AST.const_get(node_type) + rescue NameError + raise StandardError, "Unknown node type: #{node_type}. Cannot deserialize JSON with unknown node type." + end + + # Verify it's actually a node class + unless node_class.respond_to?(:deserialize_from_hash) + raise StandardError, "Node class #{node_type} does not implement deserialize_from_hash method." + end + + # Delegate to the node class + node_class.deserialize_from_hash(hash) + else + raise StandardError, "invalid hash: `#{hash}`" + end + end + + # JSON schema definition for validation + def json_schema + { + '$schema' => 'http://json-schema.org/draft-07/schema#', + 'title' => 'ReVIEW AST JSON Schema', + 'type' => 'object', + 'required' => ['type'], + 'properties' => { + 'type' => { + 'type' => 'string', + 'enum' => %w[ + DocumentNode HeadlineNode ParagraphNode InlineNode TextNode + CodeBlockNode ImageNode TableNode ListNode ListItemNode EmbedNode ColumnNode + ] + }, + 'location' => { + 'type' => 'object', + 'properties' => { + 'filename' => { 'type' => ['string', 'null'] }, + 'lineno' => { 'type' => ['integer', 'null'] } + } + }, + 'children' => { + 'type' => 'array', + 'items' => { '$ref' => '#' } + } + }, + 'additionalProperties' => true + } + end + end + end +end diff --git a/lib/review/ast/leaf_node.rb b/lib/review/ast/leaf_node.rb new file mode 100644 index 000000000..af9b7ea0a --- /dev/null +++ b/lib/review/ast/leaf_node.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'node' + +module ReVIEW + module AST + # LeafNode - Base class for nodes that do not have children + # + # LeafNode is the base class for all AST nodes that represent terminal/leaf nodes + # in the syntax tree. These nodes contain content but cannot have child nodes. + # + # Design principles: + # - Leaf nodes cannot have children + # - Leaf nodes should have a content attribute (always a string, never nil - defaults to empty string) + # - Leaf nodes can have other attributes (id, caption_node, etc.) inherited from Node + # - Attempting to add children raises an error + # + # Examples of leaf nodes: + # - TextNode: contains plain text content + # - ImageNode: contains id, caption_node, metric (no content) + # - TexEquationNode: contains id, caption_node, and LaTeX content + # - EmbedNode: contains embedded content (raw commands, etc.) + # - ReferenceNode: contains resolved reference text + class LeafNode < Node + attr_reader :content + + def initialize(location:, content: '', **kwargs) + super(location: location, **kwargs) + @content = content || '' + end + + # LeafNode is a leaf node + def leaf_node? + true + end + + # LeafNode always returns empty children array + def children + [] + end + + # Prevent adding children to leaf nodes + def add_child(_child) + raise ArgumentError, "Cannot add children to leaf node #{self.class}" + end + + # Prevent removing children from leaf nodes (no-op since there are no children) + def remove_child(_child) + raise ArgumentError, "Cannot remove children from leaf node #{self.class}" + end + + # Convert leaf node to inline text representation. + # Leaf nodes return their content as inline text. + # + # @return [String] The content of this leaf node + def to_inline_text + content + end + end + end +end diff --git a/lib/review/ast/list_item_node.rb b/lib/review/ast/list_item_node.rb new file mode 100644 index 000000000..682398bf8 --- /dev/null +++ b/lib/review/ast/list_item_node.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +require_relative 'node' + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ListItemNode < Node + attr_reader :level, :number, :item_type, :term_children + attr_accessor :item_number + + def initialize(location:, level: 1, number: nil, item_type: nil, term_children: [], **kwargs) + super(location: location, **kwargs) + @level = level + @number = number + @item_type = item_type # :dt, :dd, or nil for regular list items + @term_children = term_children # For definition lists: stores processed term content separately + @item_number = nil # Absolute item number for ordered lists (set by ListItemNumberingProcessor) + end + + def to_h + result = super.merge( + level: level + ) + result[:number] = number if number + result[:item_type] = item_type if item_type + result[:term_children] = term_children.map(&:to_h) if term_children.any? + result + end + + # Convenience methods for type checking + def definition_term? + item_type == :dt + end + + def definition_desc? + item_type == :dd + end + + def self.deserialize_from_hash(hash) + # Deserialize term_children if present + term_children = [] + if hash['term_children'] + term_children = hash['term_children'].map do |child_hash| + ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + end.compact + end + + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + level: hash['level'] || 1, + number: hash['number'], + item_type: hash['item_type']&.to_sym, + term_children: term_children + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:children] = children.map { |child| child.serialize_to_hash(options) } if children.any? + hash[:term_children] = term_children.map { |child| child.serialize_to_hash(options) } if term_children.any? + hash[:level] = level + hash[:number] = number if number + hash[:item_type] = item_type if item_type + hash + end + end + end +end diff --git a/lib/review/ast/list_node.rb b/lib/review/ast/list_node.rb new file mode 100644 index 000000000..2516cba56 --- /dev/null +++ b/lib/review/ast/list_node.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +require_relative 'node' +require_relative 'list_item_node' + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ListNode < Node + attr_reader :list_type + attr_accessor :start_number, :olnum_start + + def initialize(location:, list_type: nil, start_number: nil, olnum_start: nil, **kwargs) + super(location: location, **kwargs) + @list_type = list_type # :ul, :ol, :dl + @start_number = start_number + @olnum_start = olnum_start # InDesign's olnum starting value (for IDGXML) + end + + # Convenience methods for type checking + def ol? + list_type == :ol + end + + def ul? + list_type == :ul + end + + def dl? + list_type == :dl + end + + def to_h + result = super.merge( + list_type: list_type + ) + result[:start_number] = start_number if start_number && start_number != 1 + result + end + + def self.deserialize_from_hash(hash) + node = new(location: ReVIEW::AST::JSONSerializer.restore_location(hash), list_type: hash['list_type'].to_sym) + + # Process children (should be ListItemNode objects) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:list_type] = list_type + hash[:start_number] = start_number if start_number && start_number != 1 + if children.any? + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + end + hash + end + end + end +end diff --git a/lib/review/ast/list_parser.rb b/lib/review/ast/list_parser.rb new file mode 100644 index 000000000..352f69864 --- /dev/null +++ b/lib/review/ast/list_parser.rb @@ -0,0 +1,202 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # ListParser - Parse list lines and extract structured information + # + # This class handles parsing of different list types in Re:VIEW markup: + # - Unordered lists (ul): * item content + # - Ordered lists (ol): 1. item content + # - Definition lists (dl): : term content + # + # Responsibilities: + # - Parse individual list lines to extract level, content, and metadata + # - Handle continuation lines that belong to list items + # - Provide structured data for list builders to construct AST nodes + class ListParser + # Parsed list item data structure + ListItemData = Struct.new(:type, :level, :content, :continuation_lines, :metadata, keyword_init: true) do + def initialize(**args) + super + self.level ||= 1 + self.continuation_lines ||= [] + self.metadata ||= {} + end + + # Create a new ListItemData with adjusted level + # @param new_level [Integer] New level value + # @return [ListItemData] New instance with adjusted level, or self if no change needed + def with_adjusted_level(new_level) + return self if new_level == level + + ListItemData.new( + type: type, + level: new_level, + content: content, + continuation_lines: continuation_lines, + metadata: metadata + ) + end + end + + def initialize(location_provider = nil) + @location_provider = location_provider + end + + # Parse unordered list items from file input + # @param f [LineInput] Input file stream + # @return [Array] Parsed list items + def parse_unordered_list(f) + items = [] + + f.while_match(/\A\s+\*|\A\#@/) do |line| + next if comment_line?(line) + + item_data = parse_unordered_line(line) + next unless item_data + + # Collect continuation lines directly within this context + continuation_lines = [] + f.while_match(/\A\s+(?!\*)\S/) do |cont| + continuation_lines << cont.strip + end + item_data.continuation_lines = continuation_lines + + items << item_data + end + + items + end + + # Parse ordered list items from file input + # @param f [LineInput] Input file stream + # @return [Array] Parsed list items + def parse_ordered_list(f) + items = [] + + f.while_match(/\A\s+\d+\.|\A\#@/) do |line| + next if comment_line?(line) + + item_data = parse_ordered_line(line) + next unless item_data + + # Collect continuation lines directly within this context + continuation_lines = [] + f.while_match(/\A\s+(?!\d+\.)\S/) do |cont| + continuation_lines << cont.strip + end + item_data.continuation_lines = continuation_lines + + items << item_data + end + + items + end + + # Parse definition list items from file input + # @param f [LineInput] Input file stream + # @return [Array] Parsed list items + def parse_definition_list(f) + items = [] + + f.while_match(/\A\s*:|\A\#@/) do |line| + next if comment_line?(line) + + item_data = parse_definition_line(line) + next unless item_data + + # Collect definition content lines directly within this context + continuation_lines = [] + f.while_match(/\A\s+(?!:)\S/) do |cont| + continuation_lines << cont.strip + end + item_data.continuation_lines = continuation_lines + + items << item_data + end + + items + end + + private + + # Parse a single unordered list line + # @param line [String] Input line + # @return [ListItemData, nil] Parsed item data or nil if invalid + def parse_unordered_line(line) + # Extract level and content - Re:VIEW uses space indentation + * for nesting + match = line.match(/\A(\s*)(\*+)\s*(.*)$/) + return nil unless match + + indent_spaces = match[1].length + stars = match[2].size + content = match[3].strip + + # Calculate nesting level based on stars (*, **, ***, etc.) + level = stars + + ListItemData.new( + type: :ul, + level: level, + content: content, + metadata: { stars: stars, indent_spaces: indent_spaces } + ) + end + + # Parse a single ordered list line + # @param line [String] Input line + # @return [ListItemData, nil] Parsed item data or nil if invalid + def parse_ordered_line(line) + # Extract indentation, number and content + match = line.match(/\A(\s+)(\d+)\.\s*(.*)$/) + return nil unless match + + _indent = match[1] + num = match[2] + content = match[3].strip + + # Re:VIEW ordered lists do not support nesting - all items are level 1 + # The number format (1, 11, 111, etc.) is just the actual number, not a level indicator + level = 1 + + ListItemData.new( + type: :ol, + level: level, + content: content, + metadata: { number: num.to_i, number_string: num } + ) + end + + # Parse a single definition list line + # @param line [String] Input line + # @return [ListItemData, nil] Parsed item data or nil if invalid + def parse_definition_line(line) + # Extract term + match = line.match(/\A\s*:\s*(.*)$/) + return nil unless match + + term = match[1].strip + + ListItemData.new( + type: :dl, + level: 1, # Definition lists are always level 1 + content: term, + metadata: { is_term: true } + ) + end + + # Check if line is a comment line + # @param line [String] Input line + # @return [Boolean] True if comment line + def comment_line?(line) + /\A\#@/.match?(line) + end + end + end +end diff --git a/lib/review/ast/list_processor.rb b/lib/review/ast/list_processor.rb new file mode 100644 index 000000000..026d988a8 --- /dev/null +++ b/lib/review/ast/list_processor.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'list_parser' +require_relative 'list_processor/nested_list_assembler' + +module ReVIEW + module AST + # ListProcessor - Main coordinator for list processing + # + # This class orchestrates the full list processing pipeline by coordinating + # between ListParser (for parsing) and NestedListAssembler (for AST construction). + # It provides clean, testable methods that replace the monolithic list processing + # methods in ASTCompiler. + # + # Responsibilities: + # - Coordinate the full list processing pipeline + # - Provide clean interfaces for different list types + # - Handle rendering through AST renderer + # - Manage dependencies between parser, builder, and renderer + class ListProcessor + def initialize(ast_compiler) + @ast_compiler = ast_compiler + @parser = ListParser.new(ast_compiler) + @nested_list_assembler = NestedListAssembler.new(ast_compiler, ast_compiler.inline_processor) + end + + # Process unordered list from file input + # @param f [LineInput] Input file stream + def process_unordered_list(f) + items = @parser.parse_unordered_list(f) + return if items.empty? + + list_node = @nested_list_assembler.build_unordered_list(items) + add_to_ast(list_node) + end + + # Process ordered list from file input + # @param f [LineInput] Input file stream + def process_ordered_list(f) + items = @parser.parse_ordered_list(f) + return if items.empty? + + list_node = @nested_list_assembler.build_ordered_list(items) + add_to_ast(list_node) + end + + # Process definition list from file input + # @param f [LineInput] Input file stream + def process_definition_list(f) + items = @parser.parse_definition_list(f) + return if items.empty? + + list_node = @nested_list_assembler.build_definition_list(items) + add_to_ast(list_node) + end + + # Process any list type (for generic handling) + # @param f [LineInput] Input file stream + # @param list_type [Symbol] Type of list (:ul, :ol, :dl) + def process_list(f, list_type) + case list_type + when :ul + process_unordered_list(f) + when :ol + process_ordered_list(f) + when :dl + process_definition_list(f) + else + location_info = @ast_compiler.location.format_for_error + raise CompileError, "Unknown list type: #{list_type}#{location_info}" + end + end + + # Build list from pre-parsed items (for testing or special cases) + # @param items [Array] Pre-parsed items + # @param list_type [Symbol] Type of list + # @return [ListNode] Built list node + def build_list_from_items(items, list_type) + @nested_list_assembler.build_nested_structure(items, list_type) + end + + # Parse list items without building AST (for testing) + # @param f [LineInput] Input file stream + # @param list_type [Symbol] Type of list + # @return [Array] Parsed items + def parse_list_items(f, list_type) + case list_type + when :ul + @parser.parse_unordered_list(f) + when :ol + @parser.parse_ordered_list(f) + when :dl + @parser.parse_definition_list(f) + else + location_info = @ast_compiler.location.format_for_error + raise CompileError, "Unknown list type: #{list_type}#{location_info}" + end + end + + # Get parser for testing or direct access + # @return [ListParser] The list parser instance + attr_reader :parser + + # Get builder for testing or direct access + # @return [NestedListAssembler] The list builder instance + attr_reader :nested_list_assembler + + private + + # Add list node to AST + # @param list_node [ListNode] List node to add + def add_to_ast(list_node) + @ast_compiler.add_child_to_current_node(list_node) + end + end + end +end diff --git a/lib/review/ast/list_processor/nested_list_assembler.rb b/lib/review/ast/list_processor/nested_list_assembler.rb new file mode 100644 index 000000000..be620c3ec --- /dev/null +++ b/lib/review/ast/list_processor/nested_list_assembler.rb @@ -0,0 +1,245 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/node' +require 'review/ast/list_node' +require 'review/ast/text_node' +require 'review/ast/paragraph_node' + +module ReVIEW + module AST + class ListProcessor + # NestedListAssembler - Build nested list AST structures + # + # This class constructs properly nested AST node structures from + # parsed list item data. It handles the complex logic of building + # nested parent-child relationships between list nodes and items. + # + # Responsibilities: + # - Build nested ListNode and ListItemNode structures + # - Handle different list types (ul, ol, dl) with type-specific logic + # - Manage nesting levels and parent-child relationships + # - Create proper AST hierarchy for complex nested lists + # + class NestedListAssembler + def initialize(location_provider, inline_processor) + @location_provider = location_provider + @inline_processor = inline_processor + end + + # Build nested list structure from flat list items + # @param items [Array] Parsed list items + # @param list_type [Symbol] List type (:ul, :ol, :dl) + # @return [ListNode] Root list node with nested structure + def build_nested_structure(items, list_type) + return create_list_node(list_type) if items.empty? + + case list_type + when :ul + build_unordered_list(items) + when :ol + build_ordered_list(items) + when :dl + build_definition_list(items) + else + raise ReVIEW::CompileError, "Unknown list type: #{list_type}" + end + end + + # Build unordered list with proper nesting + # @param items [Array] Parsed unordered list items + # @return [ReVIEW::AST::ListNode] Root unordered list node + def build_unordered_list(items) + create_list_node(:ul) do |root_list| + build_proper_nested_structure(items, root_list, :ul) + end + end + + # Build ordered list with proper nesting + # @param items [Array] Parsed ordered list items + # @return [ReVIEW::AST::ListNode] Root ordered list node + def build_ordered_list(items) + create_list_node(:ol) do |root_list| + # Set start_number based on the first item's number if available + if items.first && items.first.metadata[:number] + root_list.start_number = items.first.metadata[:number] + end + + build_proper_nested_structure(items, root_list, :ol) + end + end + + # Build definition list with proper structure + # @param items [Array] Parsed definition list items + # @return [ReVIEW::AST::ListNode] Root definition list node + def build_definition_list(items) + create_list_node(:dl) do |root_list| + items.each do |item_data| + list = create_list_item_node(item_data) do |item_node| + # Add definition content (additional children) - only definition, not term + item_data.continuation_lines.each do |definition_line| + add_definition_content(item_node, definition_line) + end + end + # Create list item for term/definition pair with term_children + root_list.add_child(list) + end + end + end + + private + + # Build proper nested structure as Re:VIEW expects + def build_proper_nested_structure(items, root_list, list_type) + return if items.empty? + + current_lists = { 1 => root_list } + previous_level = 0 + + items.each do |item_data| + # 1. Validate and adjust level + level = item_data.level + if level > previous_level && (level - previous_level) > 1 + @location_provider.error('too many *.') + level = previous_level + 1 + end + previous_level = level + + # 2. Build item node with content + item_data = item_data.with_adjusted_level(level) + item_node = create_list_item_node(item_data) do |node| + add_all_content_to_item(node, item_data) + end + + # 3. Add to structure + if level == 1 + root_list.add_child(item_node) + current_lists[1] = root_list + else + add_to_parent_list(item_node, level, list_type, current_lists) + end + end + end + + # Add item to parent list at nested level + # @param item_node [ReVIEW::AST::ListItemNode] Item to add + # @param level [Integer] Nesting level + # @param list_type [Symbol] Type of list + # @param current_lists [Hash] Map of level to list node + def add_to_parent_list(item_node, level, list_type, current_lists) + parent_list = current_lists[level - 1] + return unless parent_list&.children&.any? + + last_parent_item = parent_list.children.last + + # Find or create nested list + nested_list = last_parent_item.children.find do |child| + child.is_a?(ReVIEW::AST::ListNode) && child.list_type == list_type + end + + nested_list ||= create_list_node(list_type) do |list| + last_parent_item.add_child(list) + end + + nested_list.add_child(item_node) + current_lists[level] = nested_list + end + + # Add all content from item data to list item node + # @param item_node [ReVIEW::AST::ListItemNode] Target item node + # @param item_data [ListParser::ListItemData] Source item data + def add_all_content_to_item(item_node, item_data) + # Add main content + add_content_to_item(item_node, item_data.content) + + # Add continuation lines + item_data.continuation_lines.each do |line| + add_content_to_item(item_node, line) + end + end + + # Add content to list item using inline processor + # @param item_node [ReVIEW::AST::ListItemNode] Target item node + # @param content [String] Content to add + def add_content_to_item(item_node, content) + @inline_processor.parse_inline_elements(content, item_node) + end + + # Add definition content with special handling for definition lists + # @param item_node [ReVIEW::AST::ListItemNode] Target item node + # @param definition_content [String] Definition content + def add_definition_content(item_node, definition_content) + if definition_content.include?('@<') + # Create a paragraph node to hold the definition with inline elements + definition_paragraph = ReVIEW::AST::ParagraphNode.new(location: current_location) + @inline_processor.parse_inline_elements(definition_content, definition_paragraph) + item_node.add_child(definition_paragraph) + else + # Create a simple text node for the definition + definition_node = ReVIEW::AST::TextNode.new(location: current_location, content: definition_content) + item_node.add_child(definition_node) + end + end + + # Process definition list term content with inline elements + # @param term_content [String] Term content to process + # @return [Array] Processed term children nodes + def process_definition_term_content(term_content) + # Create a temporary container to collect processed term elements + temp_container = ReVIEW::AST::ParagraphNode.new(location: current_location) + @inline_processor.parse_inline_elements(term_content, temp_container) + + # Return the processed elements + temp_container.children + end + + # Create a new ListNode + # @param list_type [Symbol] Type of list (:ul, :ol, :dl, etc.) + # @yield [node] Optional block for node initialization + # @yieldparam node [ReVIEW::AST::ListNode] The created list node + # @return [ReVIEW::AST::ListNode] New list node + def create_list_node(list_type) + node = ReVIEW::AST::ListNode.new(location: current_location, list_type: list_type) + yield(node) if block_given? + node + end + + # Create a new ListItemNode from parsed data + # @param item_data [ListParser::ListItemData] Parsed item data + # @param term_children [Array] Optional term children for definition lists + # @yield [node] Optional block for node initialization + # @yieldparam node [ReVIEW::AST::ListItemNode] The created list item node + # @return [ReVIEW::AST::ListItemNode] New list item node + def create_list_item_node(item_data) + node_attributes = { + location: current_location, + level: item_data.level + } + + # Add type-specific attributes + case item_data.type + when :ol + node_attributes[:number] = item_data.metadata[:number] + when :dl + node_attributes[:term_children] = process_definition_term_content(item_data.content) + end + + node = ReVIEW::AST::ListItemNode.new(**node_attributes) + yield(node) if block_given? + node + end + + # Get current location for node creation + # @return [SnapshotLocation] Current location + def current_location + @location_provider.location + end + end + end + end +end diff --git a/lib/review/ast/markdown_adapter.rb b/lib/review/ast/markdown_adapter.rb new file mode 100644 index 000000000..744fd07ff --- /dev/null +++ b/lib/review/ast/markdown_adapter.rb @@ -0,0 +1,904 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast' +require 'review/snapshot_location' +require 'review/exception' +require_relative 'markdown_html_node' +require_relative 'inline_tokenizer' + +module ReVIEW + module AST + # MarkdownAdapter - Adapter to convert Markly AST to Re:VIEW AST + # + # This class walks the Markly AST and creates corresponding + # Re:VIEW AST nodes. + class MarkdownAdapter + # ContextStack manages hierarchical context for AST node construction. + # It provides exception-safe context switching with automatic cleanup. + class ContextStack + def initialize(initial_context) + @stack = [initial_context] + end + + def current + @stack.last + end + + def push(node) + @stack.push(node) + end + + def pop + raise ReVIEW::CompileError, 'Cannot pop initial context from stack' if @stack.length <= 1 + + @stack.pop + end + + def with_context(node) + push(node) + yield + ensure + pop + end + + def depth + @stack.length + end + + def validate! + if @stack.any?(&:nil?) + raise ReVIEW::CompileError, 'Context corruption: nil found in stack' + end + end + + def empty? + @stack.length <= 1 + end + + def find_all(klass) + @stack.find_all { |node| node.is_a?(klass) } + end + + def any?(klass) + @stack.any?(klass) + end + end + + # parse CodeBlock or other attributes to get id and caption + class AttributeParser + def parse(text) + # Ensure input is UTF-8 (Markly's fence_info returns ASCII-8BIT) + text = text.dup.force_encoding('UTF-8') if text.encoding == Encoding::ASCII_8BIT + + return nil unless text =~ /\A\s*\{([^}]+)\}\s*\z/ + + attrs = {} + attr_text = ::Regexp.last_match(1) + + # Extract ID: #id + if attr_text =~ /#([a-zA-Z0-9_-]+)/ + attrs[:id] = ::Regexp.last_match(1) + end + + # Extract caption attribute: caption="..." + if attr_text =~ /caption=["']([^"']+)["']/ + attrs[:caption] = ::Regexp.last_match(1) + end + + # Extract classes: .classname + # attrs[:classes] = attr_text.scan(/\.([a-zA-Z0-9_-]+)/).flatten + attrs.empty? ? nil : attrs + end + end + + # Placeholder for Re:VIEW inline notation marker (@<) + # Used to restore notation from MarkdownCompiler's preprocessing + REVIEW_NOTATION_PLACEHOLDER = '@@REVIEW_AT_LT@@' + + def initialize(compiler) + @compiler = compiler + @context = nil # Will be initialized in convert() + + # Initialize InlineTokenizer for processing Re:VIEW notation + @inline_tokenizer = InlineTokenizer.new + @attribute_parser = AttributeParser.new + end + + # Convert Markly document to Re:VIEW AST + # + # @param markly_doc [Markly::Node] Markly document root + # @param ast_root [DocumentNode] Re:VIEW AST root + # @param chapter [ReVIEW::Book::Chapter] Chapter context (required) + def convert(markly_doc, ast_root, chapter) + @ast_root = ast_root + @chapter = chapter + + # Initialize context stack with document root + @context = ContextStack.new(ast_root) + + begin + # Walk the Markly AST + walk_node(markly_doc) + + # Close any remaining open columns at the end of the document + close_all_columns + + # Validate final state + validate_final_state! + rescue ReVIEW::CompileError + raise + rescue StandardError => e + raise ReVIEW::CompileError, "Markdown conversion failed: #{e.message}\n#{e.backtrace.join("\n")}" + end + end + + private + + # Recursively walk Markly nodes + def walk_node(cm_node) + # Process based on node type + case cm_node.type + when :document + # Process children + process_children(cm_node) + + when :header + process_heading(cm_node) + + when :paragraph + process_paragraph(cm_node) + + when :list + process_list(cm_node) + + when :list_item + process_list_item(cm_node) + + when :code_block + process_code_block(cm_node) + + when :blockquote + process_blockquote(cm_node) + + when :table + process_table(cm_node) + + when :table_row + process_table_row(cm_node) + + when :table_header + process_table_header(cm_node) + + when :table_cell + process_table_cell(cm_node) + + when :html_block, :html + process_html_block(cm_node) + + when :hrule + process_thematic_break(cm_node) + + when :footnote_definition + process_footnote_definition(cm_node) + + else # rubocop:disable Style/EmptyElse + # For inline elements and other types, delegate to inline processing + # This includes :text, :strong, :emph, :code, :link, :image, etc. + nil # Inline elements are processed within their parent context + end + end + + # Process children of a node + def process_children(cm_node) + cm_node.each do |child| + walk_node(child) + end + end + + # Process heading node + def process_heading(cm_node) + level = cm_node.header_level + + # Extract text content to check for column marker + heading_text = extract_text(cm_node) + + # Check if this is a column start marker: ### [column] Title or ### [column] + if heading_text =~ /\A\s*\[column\](.*)/ + title = $1.strip + title = nil if title.empty? + + # Start a column with heading-based syntax + start_column_from_heading(cm_node, title, level) + else + # Auto-close columns if we encounter a heading at the same or higher level + auto_close_columns_for_heading(level) + + # Regular heading processing + # Create caption node with inline elements + caption_node = CaptionNode.new( + location: current_location(cm_node) + ) + process_inline_content(cm_node, caption_node) + + # Create headline node + headline = HeadlineNode.new( + location: current_location(cm_node), + level: level, + label: nil, # Markdown doesn't have explicit labels + caption_node: caption_node + ) + + add_node_to_current_context(headline) + end + end + + # Process paragraph node + def process_paragraph(cm_node) + # Check if this paragraph contains only an image + if standalone_image_paragraph?(cm_node) + process_standalone_image(cm_node) + return + end + + # Check if this is an attribute block for the previous table + para_text = extract_text(cm_node).strip + # Pattern: {#id caption="..."} + attrs = parse_attribute_block(para_text) + + # Check if this is an attribute block for the previous table + if attrs && @context.current.children.last.is_a?(TableNode) + # Apply attributes to the last table + last_table_node = @context.current.children.last + table_id = attrs[:id] + caption_text = attrs[:caption] + + # Build caption node if caption text is provided + caption_node = nil + if caption_text && !caption_text.empty? + caption_node = CaptionNode.new(location: current_location(cm_node)) + caption_node.add_child(TextNode.new( + location: current_location(cm_node), + content: caption_text + )) + end + + # Update table attributes + last_table_node.update_attributes(id: table_id, caption_node: caption_node) + + return # Don't add this paragraph as a regular node + end + + # Regular paragraph processing + para = ParagraphNode.new( + location: current_location(cm_node) + ) + + # Process inline content + process_inline_content(cm_node, para) + + add_node_to_current_context(para) + end + + # Process list node + def process_list(cm_node) + list_node = ListNode.new( + location: current_location(cm_node), + list_type: cm_node.list_type == :ordered_list ? :ol : :ul, + start_number: cm_node.list_type == :ordered_list ? cm_node.list_start : nil + ) + + add_node_to_current_context(list_node) + + # Use unified context management with exception safety + @context.with_context(list_node) do + # Process list items + process_children(cm_node) + end + end + + # Process list item node + def process_list_item(cm_node) + item = ListItemNode.new( + location: current_location(cm_node) + ) + + add_node_to_current_context(item) + + # Use unified context management with exception safety + @context.with_context(item) do + cm_node.each_with_index do |child, idx| + if child.type == :paragraph && idx == 0 + # For the first paragraph in a list item, process inline content directly + process_inline_content(child, item) + else + # For other blocks, process normally + walk_node(child) + end + end + end + end + + # Process code block node + def process_code_block(cm_node) + code_info = cm_node.fence_info || '' + + # Parse language and attributes + # Pattern: ruby {#id caption="..."} + lang = nil + attrs = nil + + if code_info =~ /\A(\S+)\s+(.+)\z/ + lang = ::Regexp.last_match(1) + attr_text = ::Regexp.last_match(2) + attrs = parse_attribute_block(attr_text) + else + lang = code_info.strip + lang = nil if lang.empty? + end + + # Extract ID and caption from attributes + code_id = attrs&.[](:id) + caption_text = attrs&.[](:caption) + + # Create caption node if caption text exists + caption_node = if caption_text && !caption_text.empty? + node = CaptionNode.new(location: current_location(cm_node)) + node.add_child(TextNode.new( + location: current_location(cm_node), + content: caption_text + )) + node + end + + # Use :list type if ID is present (numbered list), otherwise :emlist + code_type = code_id ? :list : :emlist + + # Restore Re:VIEW notation markers in code block content + code_content = cm_node.string_content + code_content = code_content.gsub(REVIEW_NOTATION_PLACEHOLDER, '@<') + + code_block = CodeBlockNode.new( + location: current_location(cm_node), + id: code_id, + lang: lang, + code_type: code_type, + caption_node: caption_node, + original_text: code_content + ) + + # Add code lines + code_content.each_line.with_index do |line, idx| + line_node = CodeLineNode.new( + location: current_location(cm_node, line_offset: idx), + original_text: line.chomp + ) + line_node.add_child(TextNode.new( + location: current_location(cm_node, line_offset: idx), + content: line.chomp + )) + code_block.add_child(line_node) + end + + add_node_to_current_context(code_block) + end + + # Process blockquote node + def process_blockquote(cm_node) + quote_node = BlockNode.new( + location: current_location(cm_node), + block_type: :quote + ) + + add_node_to_current_context(quote_node) + + # Use unified context management with exception safety + @context.with_context(quote_node) do + process_children(cm_node) + end + end + + # Process table node (GFM extension) + def process_table(cm_node) + table_node = TableNode.new( + location: current_location(cm_node) + ) + + add_node_to_current_context(table_node) + + # Use unified context management with exception safety + @context.with_context(table_node) do + process_children(cm_node) + end + + # Check if the last row contains only attribute block + # This happens when Markly includes the attribute line as part of the table + if table_node.body_rows.any? + last_row = table_node.body_rows.last + # Check if the last row has only one cell with attribute block + if last_row.children.length >= 1 + first_cell = last_row.children.first + # Extract text from all children of the cell + cell_text = first_cell.children.map do |child| + child.is_a?(TextNode) ? child.content : '' + end.join.strip + + attrs = parse_attribute_block(cell_text) + if attrs + # Remove the last row from children (body_rows is a filtered view) + table_node.children.delete(last_row) + + # Apply attributes to the table + table_id = attrs[:id] + caption_text = attrs[:caption] + + # Build caption node if caption text is provided + caption_node = nil + if caption_text && !caption_text.empty? + caption_node = CaptionNode.new(location: current_location(cm_node)) + caption_node.add_child(TextNode.new( + location: current_location(cm_node), + content: caption_text + )) + end + + # Update table attributes + table_node.update_attributes(id: table_id, caption_node: caption_node) + + # No need to track this table for next paragraph + return + end + end + end + end + + # Process table row node + def process_table_row(cm_node) + row_node = TableRowNode.new( + location: current_location(cm_node), + row_type: :body + ) + + @context.current.add_body_row(row_node) + + # Process cells + @context.with_context(row_node) do + process_children(cm_node) + end + end + + # Process table header node + def process_table_header(cm_node) + row_node = TableRowNode.new( + location: current_location(cm_node), + row_type: :header + ) + + @context.current.add_header_row(row_node) + + # Process cells + @context.with_context(row_node) do + process_children(cm_node) + end + end + + # Process table cell node + def process_table_cell(cm_node) + cell_type = if @context.current.is_a?(TableRowNode) && @context.current.row_type == :header + :th + else + :td + end + + cell_node = TableCellNode.new( + location: current_location(cm_node), + cell_type: cell_type + ) + + # Process cell content + process_inline_content(cm_node, cell_node) + + add_node_to_current_context(cell_node) + end + + # Process HTML block + def process_html_block(cm_node) + html_content = cm_node.string_content.strip + + # Create MarkdownHtmlNode to analyze HTML content + html_node = MarkdownHtmlNode.new( + location: current_location(cm_node), + html_content: html_content, + html_type: detect_html_type(html_content) + ) + + # Check if this is a column marker + if html_node.column_end? + end_column(html_node) + else + # Regular HTML content - add to current context + embed_node = EmbedNode.new( + location: current_location(cm_node), + embed_type: :html, + content: html_content + ) + add_node_to_current_context(embed_node) + end + end + + # Process thematic break (horizontal rule) + def process_thematic_break(cm_node) + hr_node = BlockNode.new( + location: current_location(cm_node), + block_type: :hr + ) + + add_node_to_current_context(hr_node) + end + + # Process inline content within a node + def process_inline_content(cm_node, parent_node) + cm_node.each do |child| + process_inline_node(child, parent_node) + end + end + + # Process individual inline node + def process_inline_node(cm_node, parent_node) + case cm_node.type + when :text + text = cm_node.string_content + + # Restore Re:VIEW notation markers (@<) from placeholders + text = text.gsub(REVIEW_NOTATION_PLACEHOLDER, '@<') + + # Process Re:VIEW inline notation + # Use InlineTokenizer to properly parse @{id} with escape sequences + location = current_location(cm_node) + + begin + # Tokenize text for Re:VIEW inline notation + tokens = @inline_tokenizer.tokenize(text, location: location) + + # Process each token + tokens.each do |token| + case token.type + when :text + # Text token: create TextNode + parent_node.add_child(TextNode.new(location: location, content: token.content)) + + when :inline + # InlineToken: Re:VIEW inline notation @{id} + ref_type = token.command.to_sym + ref_id = token.content + + # Create ReferenceNode + reference_node = ReferenceNode.new(ref_id, nil, location: location) + + # Create InlineNode with reference type + inline_node = InlineNode.new(location: location, inline_type: ref_type, args: [ref_id]) + inline_node.add_child(reference_node) + + parent_node.add_child(inline_node) + end + end + rescue InlineTokenizeError => e + # If tokenization fails, add error message as comment and add original text + # This allows the document to continue processing + warn("Failed to parse inline notation: #{e.message}") + parent_node.add_child(TextNode.new(location: location, content: text)) + end + + when :strong + inline_node = InlineNode.new(location: current_location(cm_node), inline_type: :b, args: [extract_text(cm_node)]) + process_inline_content(cm_node, inline_node) + parent_node.add_child(inline_node) + + when :emph + inline_node = InlineNode.new(location: current_location(cm_node), inline_type: :i, args: [extract_text(cm_node)]) + process_inline_content(cm_node, inline_node) + parent_node.add_child(inline_node) + + when :code + # Restore Re:VIEW notation markers in inline code + code_content = cm_node.string_content + code_content = code_content.gsub(REVIEW_NOTATION_PLACEHOLDER, '@<') + + inline_node = InlineNode.new(location: current_location(cm_node), inline_type: :code, args: [code_content]) + inline_node.add_child(TextNode.new(location: current_location(cm_node), content: code_content)) + parent_node.add_child(inline_node) + + when :link + # Create href inline node + inline_node = InlineNode.new(location: current_location(cm_node), inline_type: :href, args: [cm_node.url, extract_text(cm_node)]) + process_inline_content(cm_node, inline_node) + parent_node.add_child(inline_node) + + when :image + # Create icon inline node (Re:VIEW's image inline) + inline_node = InlineNode.new(location: current_location(cm_node), inline_type: :icon, args: [cm_node.url]) + parent_node.add_child(inline_node) + + when :strikethrough + # GFM extension + inline_node = InlineNode.new(location: current_location(cm_node), inline_type: :del, args: [extract_text(cm_node)]) + process_inline_content(cm_node, inline_node) + parent_node.add_child(inline_node) + + when :softbreak + # Soft line break - convert to space + parent_node.add_child(TextNode.new(location: current_location(cm_node), content: ' ')) + + when :linebreak + # Hard line break - preserve as newline + parent_node.add_child(TextNode.new(location: current_location(cm_node), content: "\n")) + + when :footnote_reference + # Footnote reference [^id] parsed by Markly + # Get the actual footnote ID from the parent footnote definition + footnote_id = if cm_node.respond_to?(:parent_footnote_def) && cm_node.parent_footnote_def + cm_node.parent_footnote_def.string_content + else + cm_node.string_content # Fallback to reference number + end + + # Create ReferenceNode + reference_node = ReferenceNode.new(footnote_id, nil, location: current_location(cm_node)) + + # Create InlineNode with fn type + inline_node = InlineNode.new(location: current_location(cm_node), inline_type: :fn, args: [footnote_id]) + inline_node.add_child(reference_node) + + parent_node.add_child(inline_node) + + when :html_inline + # Inline HTML - store as text for now + parent_node.add_child(TextNode.new(location: current_location(cm_node), content: cm_node.string_content)) + + else + # Process any children + process_inline_content(cm_node, parent_node) + end + end + + # Extract text content from a node + def extract_text(cm_node) + text = '' + cm_node.each do |child| + text += case child.type + when :text, :code + child.string_content + else + extract_text(child) + end + end + text + end + + # Create location for current node + def current_location(cm_node, line_offset: 0) + # Try to use source position if available + line = if cm_node.respond_to?(:source_position) && cm_node.source_position + cm_node.source_position[:start_line] + line_offset + else + 1 + line_offset # Default to line 1 if source position not available + end + + SnapshotLocation.new(@chapter.basename, line) + end + + # Detect HTML type from content + def detect_html_type(html_content) + if html_content.strip.start_with?('') + :comment + elsif html_content.strip.start_with?('<') && html_content.strip.end_with?('>') + :tag + else + :block + end + end + + # Start a new column context from heading syntax + def start_column_from_heading(cm_node, title, level) + # Create caption node if title is provided + caption_node = if title && !title.empty? + node = CaptionNode.new(location: current_location(cm_node)) + node.add_child(TextNode.new( + location: current_location(cm_node), + content: title + )) + node + end + + # Create column node with level + column_node = ColumnNode.new( + location: current_location(cm_node), + caption_node: caption_node, + level: level + ) + + @context.push(column_node) + end + + # End current column context + def end_column(_html_node) + unless @context.current.is_a?(ColumnNode) + # Warning: /column without matching column + return + end + + column_node = @context.current + @context.pop + + @context.current.add_child(column_node) + end + + # Add node to current context (column or document) + def add_node_to_current_context(node) + current = @context.current + if current.nil? + raise ReVIEW::CompileError, "Internal error: No current context. Cannot add #{node.class} node." + end + + unless current.respond_to?(:add_child) + raise ReVIEW::CompileError, "Internal error: Current context #{current.class} doesn't support add_child." + end + + current.add_child(node) + end + + # Check if paragraph contains only a standalone image + def standalone_image_paragraph?(cm_node) + children = cm_node.to_a + return false if children.empty? + + # Filter out softbreak and linebreak nodes (they're just formatting) + significant_children = children.reject { |c| %i[softbreak linebreak].include?(c.type) } + + # Pattern 1: Only image node + if significant_children.length == 1 + return significant_children.first.type == :image + end + + # Pattern 2: Image node followed by attribute block text + if significant_children.length == 2 + first = significant_children[0] + second = significant_children[1] + if first.type == :image && second.type == :text + # Check if the text is an attribute block + text_content = second.string_content.strip + return !parse_attribute_block(text_content).nil? + end + end + + false + end + + # Process standalone image as block-level ImageNode + def process_standalone_image(cm_node) + children = cm_node.to_a + # Filter out softbreak and linebreak nodes + significant_children = children.reject { |c| %i[softbreak linebreak].include?(c.type) } + + image_node = significant_children[0] # Get the image node + + # Check if there's an attribute block after the image (second child of paragraph) + # Pattern: ![alt](url){#id caption="..."} + attrs = nil + if significant_children.length == 2 && significant_children[1].type == :text + text_content = significant_children[1].string_content + attrs = parse_attribute_block(text_content) + end + + # Extract image information + image_id = attrs&.[](:id) || extract_image_id(image_node.url) + alt_text = extract_text(image_node) # Extract alt text from children + caption_text = attrs&.[](:caption) || alt_text + + caption_node = if caption_text && !caption_text.empty? + node = CaptionNode.new(location: current_location(image_node)) + node.add_child(TextNode.new( + location: current_location(image_node), + content: caption_text + )) + node + end + + image_block = ImageNode.new( + location: current_location(image_node), + id: image_id, + caption_node: caption_node, + content: '', + image_type: :image + ) + + add_node_to_current_context(image_block) + end + + # Extract image ID from URL (remove extension if present) + def extract_image_id(url) + # Remove file extension for Re:VIEW compatibility + File.basename(url, '.*') + end + + # Process footnote definition from Markly + # Markly parses [^id]: content into :footnote_definition nodes + def process_footnote_definition(cm_node) + # Get footnote ID from Markly node's string_content + footnote_id = cm_node.string_content + + # Create FootnoteNode + footnote_node = FootnoteNode.new( + location: current_location(cm_node), + id: footnote_id, + footnote_type: :footnote + ) + + # Process footnote content (children of the footnote_definition node) + # Markly already parsed the content, including inline markup + @context.with_context(footnote_node) do + process_children(cm_node) + end + + add_node_to_current_context(footnote_node) + end + + # Auto-close columns when encountering a heading at the same or higher level + def auto_close_columns_for_heading(heading_level) + # Close columns that are at the same or lower level than the current heading + while @context.current.is_a?(ColumnNode) + column_node = @context.current + column_level = column_node.level + + # If the column was started at the same level or lower, close it + # (lower level number = higher heading, e.g., # is level 1, ## is level 2) + break if column_level && heading_level > column_level + + # Close the column + @context.pop + + # Add completed column to parent context + @context.current.add_child(column_node) + end + end + + # Close all remaining open columns + def close_all_columns + while @context.current.is_a?(ColumnNode) + column_node = @context.current + @context.pop + + # Add completed column to parent context + @context.current.add_child(column_node) + end + end + + # Parse attribute block in the format {#id .class attr="value"} + # @param text [String] Text potentially containing attributes + # @return [Hash, nil] Hash of attributes or nil if not an attribute block + def parse_attribute_block(text) + @attribute_parser.parse(text) + end + + # Validate that final state is clean after conversion + def validate_final_state! + if @context.current != @ast_root + raise ReVIEW::CompileError, "Internal error: Context not properly restored. Expected to be at root but at #{@context.current.class}" + end + + # Check for unclosed columns + column_nodes = @context.find_all(ColumnNode) + unless column_nodes.empty? + raise ReVIEW::CompileError, "Internal error: #{column_nodes.length} unclosed column(s) remain" + end + + @context.validate! + end + end + end +end diff --git a/lib/review/ast/markdown_compiler.rb b/lib/review/ast/markdown_compiler.rb new file mode 100644 index 000000000..e34c70431 --- /dev/null +++ b/lib/review/ast/markdown_compiler.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'compiler' +require_relative 'markdown_adapter' +require_relative 'inline_tokenizer' + +# markly requires Ruby >= 3.1 +begin + require 'markly' +rescue LoadError + # markly is not available +end + +module ReVIEW + module AST + # MarkdownCompiler - Compiler for GitHub Flavored Markdown documents + # + # This class compiles Markdown documents to Re:VIEW AST using Markly + # for parsing and MarkdownAdapter for AST conversion. + class MarkdownCompiler < Compiler + def initialize + super + @adapter = MarkdownAdapter.new(self) + end + + # Compile Markdown content to AST + # + # @param chapter [ReVIEW::Book::Chapter] Chapter context + # @param reference_resolution [Boolean] Whether to resolve references (default: true) + # @return [DocumentNode] The compiled AST root + def compile_to_ast(chapter, reference_resolution: true) + # Check if markly is available + unless defined?(Markly) + raise ReVIEW::CompileError, 'Markdown compilation requires markly gem, which is only available for Ruby >= 3.1. Please upgrade Ruby or use .re files instead.' + end + + @chapter = chapter + + # Create AST root + @ast_root = AST::DocumentNode.new( + location: SnapshotLocation.new(@chapter.basename, 1) + ) + @current_ast_node = @ast_root + + # Parse Markdown with Markly + # NOTE: tagfilter is removed to allow Re:VIEW inline notation @{id} + extensions = %i[strikethrough table autolink] + + markdown_content = @chapter.content + + # Protect Re:VIEW inline notation from Markly's HTML parser + # Markly treats @ as HTML tags and removes them + # Replace @< with a temporary placeholder before parsing + markdown_content = markdown_content.gsub('@<', MarkdownAdapter::REVIEW_NOTATION_PLACEHOLDER) + + # Parse the Markdown content + markly_doc = Markly.parse( + markdown_content, + flags: Markly::FOOTNOTES, + extensions: extensions + ) + + # Convert Markly AST to Re:VIEW AST + @adapter.convert(markly_doc, @ast_root, @chapter) + + if reference_resolution + resolve_references + end + + @ast_root + end + + # Resolve references using ReferenceResolver + # This also builds indexes which sets chapter title + def resolve_references + # Skip reference resolution in test environments or when chapter lacks book context + return unless @chapter.book + + require_relative('reference_resolver') + resolver = ReferenceResolver.new(@chapter) + resolver.resolve_references(@ast_root) + end + + # Helper method to provide location information + def location + @current_location || SnapshotLocation.new(@chapter.basename, 1) + end + + # Add child to current node + def add_child_to_current_node(node) + @current_ast_node.add_child(node) + end + + # Push a new context node + def push_context(node) + @current_ast_node = node + end + + # Pop context node + def pop_context + @current_ast_node = @current_ast_node.parent || @ast_root + end + end + end +end diff --git a/lib/review/ast/markdown_html_node.rb b/lib/review/ast/markdown_html_node.rb new file mode 100644 index 000000000..e87137ff1 --- /dev/null +++ b/lib/review/ast/markdown_html_node.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'node' + +module ReVIEW + module AST + # MarkdownHtmlNode - Node for HTML content in Markdown documents + # + # This node represents raw HTML content found in Markdown documents, + # including HTML comments and tags that may have special meaning + # for Re:VIEW processing (such as column markers). + class MarkdownHtmlNode < Node + attr_reader :html_content, :html_type + + # Initialize MarkdownHtmlNode + # + # @param location [SnapshotLocation] Source location + # @param html_content [String] Raw HTML content + # @param html_type [Symbol] Type of HTML content (:comment, :tag, :block) + def initialize(location:, html_content:, html_type: :block) + super(location: location) + @html_content = html_content + @html_type = html_type + end + + # Check if this is an HTML comment + # + # @return [Boolean] True if this is an HTML comment + def comment? + @html_type == :comment + end + + # Check if this is an HTML tag + # + # @return [Boolean] True if this is an HTML tag + def tag? + @html_type == :tag + end + + # Extract content from HTML comment + # For comments like "" returns "column: Title" + # + # @return [String, nil] Comment content or nil if not a comment + def comment_content + return nil unless comment? + + # Remove HTML comment markers + content = @html_content.strip + if content.start_with?('') + content[4..-4].strip + else + content + end + end + + # Check if this is a column end comment + # Matches patterns like "" + # + # @return [Boolean] True if this is a column end comment + def column_end? + return false unless comment? + + content = comment_content + content&.match?(%r{\A\s*/column\s*\z}) + end + end + end +end diff --git a/lib/review/ast/minicolumn_node.rb b/lib/review/ast/minicolumn_node.rb new file mode 100644 index 000000000..625d51229 --- /dev/null +++ b/lib/review/ast/minicolumn_node.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require_relative 'node' +require_relative 'caption_node' +require_relative 'captionable' + +module ReVIEW + module AST + # MinicolumnNode - Represents minicolumn blocks (note, memo, tip, etc.) + class MinicolumnNode < Node + include Captionable + + attr_reader :minicolumn_type + + def initialize(location:, minicolumn_type: nil, caption_node: nil, **kwargs) + super(location: location, **kwargs) + @minicolumn_type = minicolumn_type # :note, :memo, :tip, :info, :warning, :important, :caution, :notice + @caption_node = caption_node + end + + def to_h + result = super.merge( + minicolumn_type: minicolumn_type + ) + result[:caption_node] = caption_node&.to_h if caption_node + result + end + + # Deserialize from hash + def self.deserialize_from_hash(hash) + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + minicolumn_type: hash['minicolumn_type'] || hash['column_type'], + caption_node: deserialize_caption_from_hash(hash) + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:minicolumn_type] = minicolumn_type + serialize_caption_to_hash(hash, options) + if children.any? + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + end + hash + end + end + end +end diff --git a/lib/review/ast/node.rb b/lib/review/ast/node.rb new file mode 100644 index 000000000..58d087381 --- /dev/null +++ b/lib/review/ast/node.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'json' +require_relative 'json_serializer' + +module ReVIEW + module AST + # Abstract base class for all AST nodes + # This class should not be instantiated directly - use specific subclasses instead + # + # Design principles: + # - Branch nodes (like ParagraphNode, InlineNode) inherit from Node and have children + # - Leaf nodes (like TextNode, ImageNode) inherit from LeafNode and cannot have children + # - LeafNode may have a content attribute, but subclasses can define their own data attributes + # - Never mix content and children in the same node + class Node + attr_reader :location, :type, :id, :original_text, :children + attr_accessor :parent + + def initialize(location:, type: nil, id: nil, original_text: nil, **_kwargs) + # Prevent direct instantiation of abstract base class (except in tests) + if self.instance_of?(ReVIEW::AST::Node) + raise StandardError, 'AST::Node is an abstract class and cannot be instantiated directly. Use a specific subclass instead.' + end + + @location = location + @children = [] + @parent = nil + @type = type + @id = id + @original_text = original_text + @attributes = {} + end + + def leaf_node? + false + end + + def reference_node? + false + end + + def add_child(child) + child.parent = self + @children << child + end + + def remove_child(child) + child.parent = nil + @children.delete(child) + end + + # Replace a child node with a new node + def replace_child(old_child, new_child) + index = @children.index(old_child) + return false unless index + + old_child.parent = nil + @children[index] = new_child + new_child.parent = self + true + end + + def insert_child(idx, *nodes) + nodes.each do |node| + node.parent = self + end + @children.insert(idx, *nodes) + end + + # Check if node has a non-empty id + def id? + @id && !@id.empty? + end + + # Attribute management methods + def add_attribute(key, value) + @attributes[key] = value + end + + def attribute?(key) + @attributes.key?(key) + end + + # Return the visit method name for this node as a symbol. + # This is used by the Visitor pattern for method dispatch. + # + # @return [Symbol] The visit method symbol (e.g., :visit_headline) + # + # @example + # HeadlineNode.new.visit_method_name #=> :visit_headline + def visit_method_name + class_name = self.class.name.split('::').last + + # Convert CamelCase to snake_case and remove 'Node' suffix + method_name = class_name. + gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2'). + gsub(/([a-z\d])([A-Z])/, '\1_\2'). + downcase. + gsub(/_node$/, '') + + :"visit_#{method_name}" + end + + # Convert node to inline text representation (text without markup). + # This is used in inline contexts such as captions, headings, and footnotes. + # + # Default implementation for branch nodes (block elements): + # Block elements cannot be used in inline contexts, so this raises an error. + # Subclasses that can produce inline text should override this method. + # + # @return [String] The text content without markup + # @raise [ArgumentError] If block element is used in inline context + def to_inline_text + raise ArgumentError, "Block element #{self.class} cannot be used in inline context" + end + + # Basic JSON serialization for compatibility + def to_h + result = { + type: self.class.name.split('::').last, + location: location&.to_h, + children: children.map(&:to_h) + } + result[:node_type] = @type if @type && !@type.empty? + result[:id] = @id if @id && !@id.empty? + result + end + + def to_json(*args) + to_h.to_json(*args) + end + + # Enhanced JSON serialization with options (using JSONSerializer) + def serialize_to_hash(options = nil) + options ||= JSONSerializer::Options.new + + # Start with type + hash = { + type: self.class.name.split('::').last + } + + # Include location information + if options.include_location + hash[:location] = location&.to_h + end + + # Call node-specific serialization + serialize_properties(hash, options) + + # Serialize child nodes if any + if children && children.any? + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + end + + hash + end + + private + + # Override this method in subclasses to add node-specific properties + def serialize_properties(hash, _options) + # Base Node implementation - does nothing by default + hash + end + end + end +end diff --git a/lib/review/ast/paragraph_node.rb b/lib/review/ast/paragraph_node.rb new file mode 100644 index 000000000..a2aebddbd --- /dev/null +++ b/lib/review/ast/paragraph_node.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require_relative 'node' + +module ReVIEW + module AST + class ParagraphNode < Node + # Convert paragraph content to inline text by joining children's inline text + # + # While ParagraphNode is a block element, in some contexts (like footnote indexing) + # we need to extract the text content. This method allows extracting the inline + # text from the paragraph's children. + # + # @return [String] The inline text content + def to_inline_text + children.map(&:to_inline_text).join + end + + # Deserialize from hash + def self.deserialize_from_hash(hash) + node = new(location: ReVIEW::AST::JSONSerializer.restore_location(hash)) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + if child.is_a?(ReVIEW::AST::Node) + node.add_child(child) + elsif child.is_a?(String) + # Convert plain string to TextNode + node.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::AST::JSONSerializer.restore_location(hash), content: child)) + end + end + end + node + end + + private + + def serialize_properties(hash, options) + hash[:children] = children.map { |child| child.serialize_to_hash(options) } + hash + end + end + end +end diff --git a/lib/review/ast/raw_content_parser.rb b/lib/review/ast/raw_content_parser.rb new file mode 100644 index 000000000..0f27ba843 --- /dev/null +++ b/lib/review/ast/raw_content_parser.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class RawContentParser + def self.parse(content) + new.parse(content) + end + + # Parse raw content for builder specification + # @param content [String, nil] + # @return [Array<(Array, String)>] builders + def parse(content) + return [nil, content] if content.nil? || content.empty? + + if (matched = content.match(/\A\|(.*?)\|(.*)/)) + builders = matched[1].split(',').map { |i| i.gsub(/\s/, '') } + processed_content = matched[2] + [builders, processed_content] + else + [nil, content] + end + end + end + end +end diff --git a/lib/review/ast/reference_node.rb b/lib/review/ast/reference_node.rb new file mode 100644 index 000000000..79c4fba3a --- /dev/null +++ b/lib/review/ast/reference_node.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'text_node' +require_relative 'resolved_data' + +module ReVIEW + module AST + # ReferenceNode - node that holds reference information (used as a child of InlineNode) + # + # Placed as a child node of reference-type InlineNode instead of traditional TextNode. + # This node is immutable, and a new instance is created when resolving references. + class ReferenceNode < TextNode + attr_reader :ref_id, :context_id, :resolved_data + + # @param ref_id [String] reference ID (primary reference target) + # @param context_id [String] context ID (chapter ID, etc., optional) + # @param resolved_data [ResolvedData, nil] structured resolved data + # @param location [SnapshotLocation, nil] location in source code + def initialize(ref_id, context_id = nil, location:, resolved_data: nil) + # Display resolved_data's item_id if resolved, otherwise display original reference ID + # This content is used for debugging/display purposes in the AST + content = if resolved_data + resolved_data.item_id || ref_id + else + context_id ? "#{context_id}|#{ref_id}" : ref_id + end + + super(content: content, location: location) + + @ref_id = ref_id + @context_id = context_id + @resolved_data = resolved_data + end + + def reference_node? + true + end + + # Check if the reference has been resolved + # @return [Boolean] true if resolved + def resolved? + !!@resolved_data + end + + # Check if this is a cross-chapter reference + # @return [Boolean] true if referencing another chapter + def cross_chapter? + !@context_id.nil? + end + + # Return the full reference ID (concatenated with context_id if present) + # @return [String] full reference ID + def full_ref_id + @context_id ? "#{@context_id}|#{@ref_id}" : @ref_id + end + + # Return a new ReferenceNode instance resolved with structured data + # @param data [ResolvedData] structured resolved data + # @return [ReferenceNode] new resolved instance + def with_resolved_data(data) + self.class.new( + @ref_id, + @context_id, + resolved_data: data, + location: @location + ) + end + + # Node description string for debugging + # @return [String] debug string representation + def to_s + status = resolved? ? "resolved: #{@content}" : 'unresolved' + "#" + end + + # Override to_h to include ReferenceNode-specific attributes + def to_h + result = { + type: self.class.name.split('::').last, + location: location_to_h + } + result[:content] = content if content + result[:ref_id] = @ref_id + result[:context_id] = @context_id if @context_id + if @resolved_data + # Pass default options to serialize_to_hash + options = ReVIEW::AST::JSONSerializer::Options.new + result[:resolved_data] = @resolved_data.serialize_to_hash(options) + end + result + end + + # Override serialize_to_hash to include ReferenceNode-specific attributes + def serialize_to_hash(options = nil) + options ||= ReVIEW::AST::JSONSerializer::Options.new + + # Start with type + hash = { + type: self.class.name.split('::').last + } + + # Include location information + if options.include_location + hash[:location] = location_to_h + end + + # Add TextNode's content (inherited from TextNode) + hash[:content] = content if content + + # Add ReferenceNode-specific attributes + hash[:ref_id] = @ref_id + hash[:context_id] = @context_id if @context_id + if @resolved_data + hash[:resolved_data] = @resolved_data.serialize_to_hash + end + + hash + end + + def self.deserialize_from_hash(hash) + resolved_data = if hash['resolved_data'] + ReVIEW::AST::ResolvedData.deserialize_from_hash(hash['resolved_data']) + end + new( + hash['ref_id'], + hash['context_id'], + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + resolved_data: resolved_data + ) + end + + private + + def location_to_h + return nil unless location + + { + filename: location.filename, + lineno: location.lineno + } + end + end + end +end diff --git a/lib/review/ast/reference_resolver.rb b/lib/review/ast/reference_resolver.rb new file mode 100644 index 000000000..9a69813a1 --- /dev/null +++ b/lib/review/ast/reference_resolver.rb @@ -0,0 +1,544 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'reference_node' +require_relative 'resolved_data' +require_relative 'inline_node' +require_relative 'indexer' +require_relative 'visitor' +require 'review/exception' + +module ReVIEW + module AST + # ReferenceResolver - Specialized class for reference resolution + # + # Traverses ReferenceNodes contained in AST and resolves them to + # appropriate reference content using index information. + class ReferenceResolver < Visitor + # Default mapping of reference types to resolver methods + DEFAULT_RESOLVER_METHODS = { + img: :resolve_image_ref, + imgref: :resolve_image_ref, + table: :resolve_table_ref, + list: :resolve_list_ref, + eq: :resolve_equation_ref, + fn: :resolve_footnote_ref, + endnote: :resolve_endnote_ref, + column: :resolve_column_ref, + chap: :resolve_chapter_ref, + chapref: :resolve_chapter_ref_with_title, + title: :resolve_chapter_title, + hd: :resolve_headline_ref, + sec: :resolve_section_ref, + secref: :resolve_section_ref, + sectitle: :resolve_section_ref, + labelref: :resolve_label_ref, + ref: :resolve_label_ref, + w: :resolve_word_ref, + wb: :resolve_word_ref, + bib: :resolve_bib_ref, + bibref: :resolve_bib_ref + }.freeze + + def initialize(chapter) + super() + @chapter = chapter + @book = chapter.book + @resolver_methods = DEFAULT_RESOLVER_METHODS.dup + end + + def resolve_references(ast) + # Build indexes for current chapter from AST + # This also sets chapter title for Markdown files + build_indexes_from_ast(ast) + + @resolve_count = 0 + @error_count = 0 + + visit(ast) + + { resolved: @resolve_count, failed: @error_count } + end + + # Register a new reference type resolver + # @param ref_type [Symbol] The reference type (e.g., :custom) + # @param resolver_method [Symbol] The method name to handle this reference type + # @example + # resolver.register_resolver_method(:custom, :resolve_custom_ref) + def register_resolver_method(ref_type, resolver_method) + @resolver_methods[ref_type.to_sym] = resolver_method + end + + # @return [Array] List of all registered reference types + def registered_reference_types + @resolver_methods.keys + end + + private + + def visit_all_with_caption(node) + visit(node.caption_node) if node.respond_to?(:caption_node) && node.caption_node + visit_all(node.children) + end + + def build_indexes_from_ast(ast) + indexer = Indexer.new(@chapter) + indexer.build_indexes(ast) + end + + # Resolve ReferenceNode (ref_type taken from parent InlineNode) + # @param node [ReferenceNode] The reference node to resolve + # @param ref_type [Symbol] The reference type (e.g., :img, :table, :list) + def resolve_node(node, ref_type) + method_name = @resolver_methods[ref_type] + raise CompileError, "Unknown reference type: #{ref_type}" unless method_name + + resolved_data = send(method_name, node) + + resolved_node = node.with_resolved_data(resolved_data) + node.parent&.replace_child(node, resolved_node) + + !resolved_data.nil? + end + + def visit_document(node) + visit_all(node.children) + end + + def visit_paragraph(node) + visit_all(node.children) + end + + def visit_text(node) + end + + def visit_headline(node) + visit_all_with_caption(node) + end + + def visit_column(node) + visit_all_with_caption(node) + end + + def visit_code_block(node) + visit_all_with_caption(node) + end + + def visit_table(node) + visit_all_with_caption(node) + end + + def visit_image(node) + visit_all_with_caption(node) + end + + def visit_minicolumn(node) + visit_all_with_caption(node) + end + + def visit_embed(node) + visit_all(node.children) + end + + def visit_footnote(node) + visit_all(node.children) + end + + def visit_tex_equation(node) + visit_all_with_caption(node) + end + + def visit_block(node) + visit_all_with_caption(node) + end + + def visit_list(node) + visit_all(node.children) + end + + def visit_list_item(node) + visit_all(node.term_children) if node.term_children&.any? + visit_all(node.children) + end + + def visit_caption(node) + visit_all(node.children) + end + + def visit_code_line(node) + visit_all(node.children) + end + + def visit_table_row(node) + visit_all(node.children) + end + + def visit_table_cell(node) + visit_all(node.children) + end + + def visit_inline(node) + visit_all(node.children) + end + + def visit_reference(node) + return if node.resolved? + + parent_inline = node.parent + return unless parent_inline.is_a?(InlineNode) + + ref_type = parent_inline.inline_type + + # Skip non-reference inline elements (decoration elements) + # Only process elements that are registered as reference types + return unless @resolver_methods.key?(ref_type.to_sym) + + if resolve_node(node, ref_type.to_sym) + @resolve_count += 1 + else + @error_count += 1 + end + end + + # Generic method to resolve indexed item references (image, table, list, etc.) + # Both the index method and ResolvedData factory method are automatically derived from item_type_label + # (e.g., :image -> :image_index and ResolvedData.image) + # @param node [ReferenceNode] The reference node containing ref_id and context_id + # @param item_type_label [Symbol] Label for index method, factory method, and error messages (e.g., :image, ]:table, :list) + # @return [ResolvedData] The resolved reference data + def resolve_indexed_item_ref(node, item_type_label) + index_method = :"#{item_type_label}_index" + + target_chapter = target_chapter_for(node) + raise CompileError, "Chapter not found for #{item_type_label} reference: #{node.context_id}" unless target_chapter + + index = target_chapter.send(index_method) + item = find_index_item(index, node.ref_id) + raise CompileError, "#{item_type_label.to_s.capitalize} reference not found: #{node.full_ref_id}" unless item + + ResolvedData.send(item_type_label, + chapter_number: target_chapter.number, + item_number: index_item_number(item), + chapter_id: node.context_id, + item_id: node.ref_id, + chapter_type: chapter_type(target_chapter), + caption_node: item.caption_node) + rescue ReVIEW::KeyError + raise CompileError, "#{item_type_label.to_s.capitalize} reference not found: #{node.full_ref_id}" + end + + def resolve_image_ref(node) + resolve_indexed_item_ref(node, :image) + end + + def resolve_table_ref(node) + resolve_indexed_item_ref(node, :table) + end + + def resolve_list_ref(node) + resolve_indexed_item_ref(node, :list) + end + + def resolve_equation_ref(node) + item = find_index_item(@chapter.equation_index, node.ref_id) + unless item + raise CompileError, "Equation reference not found: #{node.ref_id}" + end + + ResolvedData.equation( + chapter_number: @chapter.number, + item_number: index_item_number(item), + item_id: node.ref_id, + chapter_type: chapter_type(@chapter), + caption_node: item.caption_node + ) + rescue ReVIEW::KeyError + raise CompileError, "Equation reference not found: #{node.ref_id}" + end + + def resolve_footnote_ref(node) + item = find_index_item(@chapter.footnote_index, node.ref_id) + unless item + raise CompileError, "Footnote reference not found: #{node.ref_id}" + end + + if item.respond_to?(:footnote_node?) && !item.footnote_node? + raise CompileError, "Footnote reference not found: #{node.ref_id}" + end + + item_number = item.respond_to?(:number) ? item.number : nil + caption_node = item.respond_to?(:footnote_node) ? item.footnote_node : nil + ResolvedData.footnote( + item_number: item_number, + item_id: node.ref_id, + caption_node: caption_node + ) + end + + def resolve_endnote_ref(node) + item = find_index_item(@chapter.endnote_index, node.ref_id) + unless item + raise CompileError, "Endnote reference not found: #{node.ref_id}" + end + + if item.respond_to?(:footnote_node?) && !item.footnote_node? + raise CompileError, "Endnote reference not found: #{node.ref_id}" + end + + item_number = item.respond_to?(:number) ? item.number : nil + caption_node = item.respond_to?(:caption_node) ? item.caption_node : nil + ResolvedData.endnote( + item_number: item_number, + item_id: node.ref_id, + caption_node: caption_node + ) + end + + def resolve_column_ref(node) + target_chapter = target_chapter_for(node) + raise CompileError, "Chapter not found for column reference: #{node.context_id}" unless target_chapter + + item = safe_column_fetch(target_chapter, node.ref_id) + ResolvedData.column( + chapter_number: target_chapter.number, + item_number: index_item_number(item), + chapter_id: node.context_id, + item_id: node.ref_id, + chapter_type: chapter_type(target_chapter), + caption_node: item.caption_node + ) + end + + # Resolve chapter references (for @, @, @) + # These all resolve to the same ResolvedData::ChapterReference, but renderers + # format them differently based on the inline type + def resolve_chapter_ref(node) + resolve_chapter_ref_common(node) + end + + def resolve_chapter_ref_with_title(node) + resolve_chapter_ref_common(node) + end + + def resolve_chapter_title(node) + resolve_chapter_ref_common(node) + end + + def resolve_chapter_ref_common(node) + chapter = find_chapter_by_id(node.ref_id) + raise CompileError, "Chapter reference not found: #{node.ref_id}" unless chapter + + ResolvedData.chapter( + chapter_number: chapter.number, + chapter_id: node.ref_id, + item_id: node.ref_id, + chapter_title: chapter.title, + chapter_type: chapter_type(chapter) + ) + end + + def resolve_headline_ref(node) + target_chapter = target_chapter_for(node) + raise CompileError, "Chapter not found for headline reference: #{node.context_id}" if node.cross_chapter? && !target_chapter + + headline = find_index_item(target_chapter&.headline_index, node.ref_id) + raise CompileError, "Headline not found: #{node.full_ref_id}" unless headline + + ResolvedData.headline( + headline_number: headline.number, + chapter_number: target_chapter&.number, + chapter_id: node.context_id, + item_id: node.ref_id, + chapter_type: chapter_type(target_chapter), + caption_node: headline.caption_node + ) + end + + def resolve_section_ref(node) + # Section references use the same data structure as headline references + # Renderers will format appropriately (e.g., adding "節" for secref) + resolve_headline_ref(node) + end + + # Label references search multiple indexes (by priority order) + # Try to find the label in various indexes and return appropriate ResolvedData + def resolve_label_ref(node) + if @chapter.image_index + item = find_index_item(@chapter.image_index, node.ref_id) + if item + return ResolvedData.image( + chapter_number: @chapter.number, + item_number: index_item_number(item), + item_id: node.ref_id, + chapter_type: chapter_type(@chapter), + caption_node: item.caption_node + ) + end + end + + if @chapter.table_index + item = find_index_item(@chapter.table_index, node.ref_id) + if item + return ResolvedData.table( + chapter_number: @chapter.number, + item_number: index_item_number(item), + item_id: node.ref_id, + chapter_type: chapter_type(@chapter), + caption_node: item.caption_node + ) + end + end + + if @chapter.list_index + item = find_index_item(@chapter.list_index, node.ref_id) + if item + return ResolvedData.list( + chapter_number: @chapter.number, + item_number: index_item_number(item), + item_id: node.ref_id, + chapter_type: chapter_type(@chapter), + caption_node: item.caption_node + ) + end + end + + if @chapter.equation_index + item = find_index_item(@chapter.equation_index, node.ref_id) + if item + return ResolvedData.equation( + chapter_number: @chapter.number, + item_number: index_item_number(item), + item_id: node.ref_id, + chapter_type: chapter_type(@chapter), + caption_node: item.caption_node + ) + end + end + + if @chapter.headline_index + item = find_index_item(@chapter.headline_index, node.ref_id) + if item + return ResolvedData.headline( + headline_number: item.number, + chapter_number: @chapter.number, + item_id: node.ref_id, + chapter_type: chapter_type(@chapter), + caption_node: item.caption_node + ) + end + end + + if @chapter.column_index + item = find_index_item(@chapter.column_index, node.ref_id) + if item + return ResolvedData.column( + chapter_number: @chapter.number, + item_number: index_item_number(item), + item_id: node.ref_id, + chapter_type: chapter_type(@chapter), + caption_node: item.caption_node + ) + end + end + + # TODO: Support for other labeled elements (note, memo, tip, etc.) + # Currently there are no dedicated indexes for these elements, + # so we need to add label_index in the future + + raise CompileError, "Label not found: #{node.ref_id}" + end + + def index_item_number(item) + return unless item + + number = item.respond_to?(:number) ? item.number : nil + number.nil? ? nil : number.to_s + end + + def find_index_item(index, id) + return nil unless index + + begin + index[id] + rescue ReVIEW::KeyError + nil + end + end + + def safe_column_fetch(chapter, column_id) + raise CompileError, "Column reference not found: #{column_id}" unless chapter + + chapter.column(column_id) + rescue ReVIEW::KeyError + raise CompileError, "Column reference not found: #{column_id}" + end + + def resolve_word_ref(node) + dictionary = @book.config['dictionary'] || {} + unless dictionary.key?(node.ref_id) + raise CompileError, "word not bound: #{node.ref_id}" + end + + ResolvedData.word( + word_content: dictionary[node.ref_id], + item_id: node.ref_id + ) + end + + # Bibpapers are book-wide, so use @book.bibpaper_index instead of chapter index + def resolve_bib_ref(node) + item = find_index_item(@book.bibpaper_index, node.ref_id) + raise CompileError, "unknown bib: #{node.ref_id}" unless item + + ResolvedData.bibpaper( + item_number: index_item_number(item), + item_id: node.ref_id, + caption_node: item.caption_node + ) + rescue ReVIEW::KeyError + raise CompileError, "unknown bib: #{node.ref_id}" + end + + # Get target chapter for a reference node + # Returns the referenced chapter if context_id is present, otherwise current chapter + # @param node [ReferenceNode] The reference node + # @return [Chapter] The target chapter + def target_chapter_for(node) + node.cross_chapter? ? find_chapter_by_id(node.context_id) : @chapter + end + + def find_chapter_by_id(id) + begin + item = @book.chapter_index[id] + return item.content if item + rescue ReVIEW::KeyError + # fall through to contents search + end + + Array(@book.contents).find { |chap| chap.id == id } + end + + # Determine chapter type based on chapter attributes + # @param chapter [Chapter] The chapter to check + # @return [Symbol] One of :chapter, :appendix, :part, :predef + def chapter_type(chapter) + return nil unless chapter + + if chapter.is_a?(ReVIEW::Book::Part) + :part + elsif chapter.on_predef? + :predef + elsif chapter.on_appendix? + :appendix + else + :chapter + end + end + end + end +end diff --git a/lib/review/ast/resolved_data.rb b/lib/review/ast/resolved_data.rb new file mode 100644 index 000000000..682dd4d1d --- /dev/null +++ b/lib/review/ast/resolved_data.rb @@ -0,0 +1,253 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # ResolvedData - Immutable data structure holding resolved reference information + # + # This class contains structured data about resolved references, + # separating the logical resolution (what is being referenced) + # from the presentation (how it should be displayed). + class ResolvedData + attr_reader :chapter_number, :item_number, :chapter_id, :item_id + attr_reader :chapter_title, :headline_number, :word_content + attr_reader :caption_node, :chapter_type + + # Get caption text from caption_node + # @return [String] Caption text, empty string if no caption_node + def caption_text + caption_node&.to_inline_text || '' + end + + def cross_chapter? + # If chapter_id is set and different from current context, it's cross-chapter + !@chapter_id.nil? + end + + # Check if the reference was successfully resolved + # @return [Boolean] true if the reference exists and was found + def exists? + # If item_number is set, the reference was found + !@item_number.nil? + end + + # Helper methods for chapter type checking + # @return [Boolean] true if the referenced chapter is a regular chapter + def chapter? + @chapter_type == :chapter + end + + # @return [Boolean] true if the referenced chapter is an appendix + def appendix? + @chapter_type == :appendix + end + + # @return [Boolean] true if the referenced chapter is a part + def part? + @chapter_type == :part + end + + def ==(other) + other.instance_of?(self.class) && + @chapter_number == other.chapter_number && + @item_number == other.item_number && + @chapter_id == other.chapter_id && + @item_id == other.item_id && + @caption_node == other.caption_node && + @chapter_title == other.chapter_title && + @headline_number == other.headline_number && + @word_content == other.word_content && + @chapter_type == other.chapter_type + end + + alias_method :eql?, :== + + def to_s + parts = ['#<ResolvedData'] + parts << "chapter=#{@chapter_number}" if @chapter_number + parts << "item=#{@item_number}" if @item_number + parts << "chapter_id=#{@chapter_id}" if @chapter_id + parts << "item_id=#{@item_id}" + parts << "type=#{@chapter_type}" if @chapter_type + parts.join(' ') + '>' + end + + # Serialize to hash + # @param options [JSONSerializer::Options, nil] Serialization options + # @return [Hash] Serialized hash representation + def serialize_to_hash(options = nil) + options ||= ReVIEW::AST::JSONSerializer::Options.new + hash = { type: self.class.name.split('::').last } + serialize_properties(hash, options) + hash + end + + # Serialize properties - to be overridden by subclasses + # @param hash [Hash] Hash to populate with properties + # @param options [JSONSerializer::Options] Serialization options + # @return [Hash] Populated hash + def serialize_properties(hash, options) + hash[:chapter_number] = @chapter_number if @chapter_number + hash[:item_number] = @item_number if @item_number + hash[:chapter_id] = @chapter_id if @chapter_id + hash[:item_id] = @item_id if @item_id + hash[:chapter_title] = @chapter_title if @chapter_title + hash[:headline_number] = @headline_number if @headline_number + hash[:word_content] = @word_content if @word_content + hash[:chapter_type] = @chapter_type if @chapter_type + hash[:caption_node] = @caption_node.serialize_to_hash(options) if @caption_node + hash + end + + # @param hash [Hash] Hash to deserialize from + # @return [ResolvedData] Deserialized ResolvedData instance + def self.deserialize_from_hash(hash) + return nil unless hash + + type = hash['type'] + return nil unless type + + # Get nested class by name using const_get + klass = const_get(type) + klass.deserialize_from_hash(hash) + rescue NameError + raise StandardError, "Unknown ResolvedData type: #{type}" + end + + # Get the reference type for this resolved data + # @return [Symbol] Reference type (e.g., :image, :table, :list) + def reference_type + raise NotImplementedError, "#{self.class}#reference_type must be implemented" + end + + # Factory methods for common reference types + + def self.image(chapter_number:, item_number:, item_id:, chapter_id: nil, chapter_type: nil, caption_node: nil) + ImageReference.new( + chapter_number: chapter_number, + item_number: item_number, + chapter_id: chapter_id, + item_id: item_id, + chapter_type: chapter_type, + caption_node: caption_node + ) + end + + def self.table(chapter_number:, item_number:, item_id:, chapter_id: nil, chapter_type: nil, caption_node: nil) + TableReference.new( + chapter_number: chapter_number, + item_number: item_number, + chapter_id: chapter_id, + item_id: item_id, + chapter_type: chapter_type, + caption_node: caption_node + ) + end + + def self.list(chapter_number:, item_number:, item_id:, chapter_id: nil, chapter_type: nil, caption_node: nil) + ListReference.new( + chapter_number: chapter_number, + item_number: item_number, + chapter_id: chapter_id, + item_id: item_id, + chapter_type: chapter_type, + caption_node: caption_node + ) + end + + def self.equation(chapter_number:, item_number:, item_id:, chapter_type: nil, caption_node: nil) + EquationReference.new( + chapter_number: chapter_number, + item_number: item_number, + item_id: item_id, + chapter_type: chapter_type, + caption_node: caption_node + ) + end + + def self.footnote(item_number:, item_id:, caption_node: nil) + FootnoteReference.new( + item_number: item_number, + item_id: item_id, + caption_node: caption_node + ) + end + + def self.endnote(item_number:, item_id:, caption_node: nil) + EndnoteReference.new( + item_number: item_number, + item_id: item_id, + caption_node: caption_node + ) + end + + def self.chapter(chapter_number:, chapter_id:, item_id:, chapter_title: nil, caption_node: nil, chapter_type: nil) + ChapterReference.new( + chapter_number: chapter_number, + chapter_id: chapter_id, + item_id: item_id, + chapter_title: chapter_title, + caption_node: caption_node, + chapter_type: chapter_type + ) + end + + def self.headline(headline_number:, item_id:, chapter_id: nil, chapter_number: nil, chapter_type: nil, caption_node: nil) + HeadlineReference.new( + item_id: item_id, + chapter_id: chapter_id, + chapter_number: chapter_number, + headline_number: headline_number, # Array format [1, 2, 3] + chapter_type: chapter_type, + caption_node: caption_node + ) + end + + def self.word(word_content:, item_id:, caption_node: nil) + WordReference.new( + item_id: item_id, + word_content: word_content, + caption_node: caption_node + ) + end + + def self.column(chapter_number:, item_number:, item_id:, chapter_id: nil, chapter_type: nil, caption_node: nil) + ColumnReference.new( + chapter_number: chapter_number, + item_number: item_number, + chapter_id: chapter_id, + item_id: item_id, + chapter_type: chapter_type, + caption_node: caption_node + ) + end + + def self.bibpaper(item_number:, item_id:, caption_node: nil) + BibpaperReference.new( + item_number: item_number, + item_id: item_id, + caption_node: caption_node + ) + end + end + end +end + +# Require nested class files +require_relative 'resolved_data/captioned_item_reference' +require_relative 'resolved_data/image_reference' +require_relative 'resolved_data/table_reference' +require_relative 'resolved_data/list_reference' +require_relative 'resolved_data/equation_reference' +require_relative 'resolved_data/footnote_reference' +require_relative 'resolved_data/endnote_reference' +require_relative 'resolved_data/chapter_reference' +require_relative 'resolved_data/headline_reference' +require_relative 'resolved_data/word_reference' +require_relative 'resolved_data/column_reference' +require_relative 'resolved_data/bibpaper_reference' diff --git a/lib/review/ast/resolved_data/bibpaper_reference.rb b/lib/review/ast/resolved_data/bibpaper_reference.rb new file mode 100644 index 000000000..1d2717747 --- /dev/null +++ b/lib/review/ast/resolved_data/bibpaper_reference.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ResolvedData + class BibpaperReference < ResolvedData + def initialize(item_number:, item_id:, caption_node: nil) + super() + @item_number = item_number + @item_id = item_id + @caption_node = caption_node + end + + def reference_type + :bibpaper + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + item_number: hash['item_number'], + item_id: hash['item_id'], + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/captioned_item_reference.rb b/lib/review/ast/resolved_data/captioned_item_reference.rb new file mode 100644 index 000000000..af68b6896 --- /dev/null +++ b/lib/review/ast/resolved_data/captioned_item_reference.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ResolvedData + # Base class for references with chapter number, item number, and caption + # This class consolidates the common pattern used by ImageReference, TableReference, + # ListReference, EquationReference, and ColumnReference + # + # Note: This class does not perform any formatting. All formatting is handled by + # TextFormatter and Renderer classes to maintain proper separation of concerns. + class CaptionedItemReference < ResolvedData + def initialize(chapter_number:, item_number:, item_id:, chapter_id: nil, chapter_type: nil, caption_node: nil) + super() + @chapter_number = chapter_number + @item_number = item_number + @chapter_id = chapter_id + @item_id = item_id + @chapter_type = chapter_type + @caption_node = caption_node + end + + # Template method - subclasses must implement this + # @return [String] The I18n key for the label (e.g., 'image', 'table', 'list') + def label_key + raise NotImplementedError, "#{self.class} must implement #label_key" + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/chapter_reference.rb b/lib/review/ast/resolved_data/chapter_reference.rb new file mode 100644 index 000000000..e1f24c0a5 --- /dev/null +++ b/lib/review/ast/resolved_data/chapter_reference.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ResolvedData + # ChapterReference - represents chapter references (@<chap>, @<chapref>, @<title>) + class ChapterReference < ResolvedData + def initialize(chapter_number:, chapter_id:, item_id:, chapter_title: nil, caption_node: nil, chapter_type: nil) + super() + @chapter_number = chapter_number + @chapter_id = chapter_id + @item_id = item_id + @chapter_title = chapter_title + @caption_node = caption_node + @chapter_type = chapter_type + end + + def reference_type + :chapter + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + chapter_number: hash['chapter_number'], + chapter_id: hash['chapter_id'], + item_id: hash['item_id'], + chapter_title: hash['chapter_title'], + caption_node: caption_node, + chapter_type: hash['chapter_type']&.to_sym + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/column_reference.rb b/lib/review/ast/resolved_data/column_reference.rb new file mode 100644 index 000000000..9fb99e49a --- /dev/null +++ b/lib/review/ast/resolved_data/column_reference.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'captioned_item_reference' + +module ReVIEW + module AST + class ResolvedData + class ColumnReference < CaptionedItemReference + def label_key + 'column' + end + + def reference_type + :column + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + chapter_number: hash['chapter_number'], + item_number: hash['item_number'], + item_id: hash['item_id'], + chapter_id: hash['chapter_id'], + chapter_type: hash['chapter_type']&.to_sym, + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/endnote_reference.rb b/lib/review/ast/resolved_data/endnote_reference.rb new file mode 100644 index 000000000..22ac1588b --- /dev/null +++ b/lib/review/ast/resolved_data/endnote_reference.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ResolvedData + class EndnoteReference < ResolvedData + def initialize(item_number:, item_id:, caption_node: nil) + super() + @item_number = item_number + @item_id = item_id + @caption_node = caption_node + end + + def reference_type + :endnote + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + item_number: hash['item_number'], + item_id: hash['item_id'], + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/equation_reference.rb b/lib/review/ast/resolved_data/equation_reference.rb new file mode 100644 index 000000000..2f152f75f --- /dev/null +++ b/lib/review/ast/resolved_data/equation_reference.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'captioned_item_reference' + +module ReVIEW + module AST + class ResolvedData + class EquationReference < CaptionedItemReference + # Equation doesn't have chapter_id parameter, so override initialize + def initialize(chapter_number:, item_number:, item_id:, chapter_type: nil, caption_node: nil) + super(chapter_number: chapter_number, + item_number: item_number, + item_id: item_id, + chapter_id: nil, + chapter_type: chapter_type, + caption_node: caption_node) + end + + def label_key + 'equation' + end + + def reference_type + :equation + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + chapter_number: hash['chapter_number'], + item_number: hash['item_number'], + item_id: hash['item_id'], + chapter_type: hash['chapter_type']&.to_sym, + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/footnote_reference.rb b/lib/review/ast/resolved_data/footnote_reference.rb new file mode 100644 index 000000000..978eaf108 --- /dev/null +++ b/lib/review/ast/resolved_data/footnote_reference.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ResolvedData + class FootnoteReference < ResolvedData + def initialize(item_number:, item_id:, caption_node: nil) + super() + @item_number = item_number + @item_id = item_id + @caption_node = caption_node + end + + def reference_type + :footnote + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + item_number: hash['item_number'], + item_id: hash['item_id'], + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/headline_reference.rb b/lib/review/ast/resolved_data/headline_reference.rb new file mode 100644 index 000000000..e79d6c06a --- /dev/null +++ b/lib/review/ast/resolved_data/headline_reference.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ResolvedData + class HeadlineReference < ResolvedData + def initialize(item_id:, headline_number:, chapter_id: nil, chapter_number: nil, chapter_type: nil, caption_node: nil) + super() + @item_id = item_id + @chapter_id = chapter_id + @chapter_number = chapter_number + @headline_number = headline_number + @chapter_type = chapter_type + @caption_node = caption_node + end + + def reference_type + :headline + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + item_id: hash['item_id'], + headline_number: hash['headline_number'], + chapter_id: hash['chapter_id'], + chapter_number: hash['chapter_number'], + chapter_type: hash['chapter_type']&.to_sym, + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/image_reference.rb b/lib/review/ast/resolved_data/image_reference.rb new file mode 100644 index 000000000..98da1f51b --- /dev/null +++ b/lib/review/ast/resolved_data/image_reference.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'captioned_item_reference' + +module ReVIEW + module AST + class ResolvedData + class ImageReference < CaptionedItemReference + def label_key + 'image' + end + + def reference_type + :image + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + chapter_number: hash['chapter_number'], + item_number: hash['item_number'], + item_id: hash['item_id'], + chapter_id: hash['chapter_id'], + chapter_type: hash['chapter_type']&.to_sym, + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/list_reference.rb b/lib/review/ast/resolved_data/list_reference.rb new file mode 100644 index 000000000..acde013f6 --- /dev/null +++ b/lib/review/ast/resolved_data/list_reference.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'captioned_item_reference' + +module ReVIEW + module AST + class ResolvedData + class ListReference < CaptionedItemReference + def label_key + 'list' + end + + def reference_type + :list + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + chapter_number: hash['chapter_number'], + item_number: hash['item_number'], + item_id: hash['item_id'], + chapter_id: hash['chapter_id'], + chapter_type: hash['chapter_type']&.to_sym, + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/table_reference.rb b/lib/review/ast/resolved_data/table_reference.rb new file mode 100644 index 000000000..5b83dd37f --- /dev/null +++ b/lib/review/ast/resolved_data/table_reference.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'captioned_item_reference' + +module ReVIEW + module AST + class ResolvedData + class TableReference < CaptionedItemReference + def label_key + 'table' + end + + def reference_type + :table + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + chapter_number: hash['chapter_number'], + item_number: hash['item_number'], + item_id: hash['item_id'], + chapter_id: hash['chapter_id'], + chapter_type: hash['chapter_type']&.to_sym, + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/resolved_data/word_reference.rb b/lib/review/ast/resolved_data/word_reference.rb new file mode 100644 index 000000000..15a0cb2c8 --- /dev/null +++ b/lib/review/ast/resolved_data/word_reference.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + class ResolvedData + class WordReference < ResolvedData + def initialize(item_id:, word_content:, caption_node: nil) + super() + @item_id = item_id + @word_content = word_content + @caption_node = caption_node + end + + def reference_type + :word + end + + def self.deserialize_from_hash(hash) + caption_node = if hash['caption_node'] + ReVIEW::AST::JSONSerializer.deserialize_from_hash(hash['caption_node']) + end + new( + item_id: hash['item_id'], + word_content: hash['word_content'], + caption_node: caption_node + ) + end + end + end + end +end diff --git a/lib/review/ast/review_generator.rb b/lib/review/ast/review_generator.rb new file mode 100644 index 000000000..41b87a459 --- /dev/null +++ b/lib/review/ast/review_generator.rb @@ -0,0 +1,719 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast' +require_relative 'visitor' + +module ReVIEW + module AST + # ReVIEWGenerator - Generate Re:VIEW text from AST nodes + class ReVIEWGenerator < Visitor + # Generate Re:VIEW text from AST root node + def generate(ast_root) + visit(ast_root) + end + + private + + # Visit all children of a node and concatenate results + # Uses parent's visit_all method for consistency + # @param node [AST::Node] The parent node + # @return [String] Concatenated text from all children + def visit_children(node) + visit_all(node.children).join + end + + # Escape special characters for Re:VIEW inline markup + # Escapes backslashes and closing braces to prevent markup breaking + # @param text [String] The text to escape + # @return [String] Escaped text safe for Re:VIEW inline markup + def escape_inline_content(text) + text.to_s.gsub('\\', '\\\\\\\\').gsub('}', '\\}') + end + + # Convert CaptionNode to Re:VIEW markup format + # @param caption_node [CaptionNode, nil] The caption node to convert + # @return [String] Re:VIEW markup string + def caption_to_review_markup(caption_node) + return '' if caption_node.nil? || caption_node.children.empty? + + caption_node.children.map { |child| render_node_as_review_markup(child) }.join + end + + # Recursively render AST nodes as Re:VIEW markup text + # This method is primarily used for rendering caption content where inline elements + # need to be processed. For general node visiting, use the visit_* methods instead. + # @param node [Node] The node to render + # @return [String] Re:VIEW markup representation + def render_node_as_review_markup(node) + case node + when ReVIEW::AST::TextNode + node.content + when ReVIEW::AST::InlineNode + # Use the visit_inline_* methods for consistency + visit(node) + else + node.leaf_node? ? node.content : '' + end + end + + def visit_document(node) + visit_children(node) + end + + def visit_headline(node) + text = '=' * (node.level || 1) + text += "{#{node.label}}" if node.label && !node.label.empty? + + caption_text = caption_to_review_markup(node.caption_node) + text += ' ' + caption_text unless caption_text.empty? + + text + "\n\n" + visit_children(node) + end + + def visit_paragraph(node) + content = visit_children(node) + return '' if content.strip.empty? + + content + "\n\n" + end + + def visit_text(node) + node.content || '' + end + + def visit_reference(node) + # ReferenceNode inherits from TextNode and has content + # Simply output the content (which is the ref_id or resolved item_id) + node.content || '' + end + + def visit_footnote(node) + # FootnoteNode represents a footnote definition + # Format: //footnote[id][content] + content = visit_children(node).strip + footnote_type = node.footnote_type == :endnote ? 'endnote' : 'footnote' + "//#{footnote_type}[#{node.id}][#{content}]\n\n" + end + + def visit_tex_equation(node) + # TexEquationNode represents LaTeX equation blocks + # Format: //texequation[id][caption]{content//} + text = '//texequation' + text += "[#{node.id}]" if node.id? + caption_text = caption_to_review_markup(node.caption_node) + text += "[#{caption_text}]" unless caption_text.empty? + text += "{\n" + text += node.content || '' + text += "\n" unless node.content&.end_with?("\n") + text + "//}\n\n" + end + + def visit_inline(node) + # Use dynamic method dispatch for extensibility + # To add a new inline type, define a method: visit_inline_<type>(node) + method_name = "visit_inline_#{node.inline_type}" + if respond_to?(method_name, true) + send(method_name, node) + else + # Default implementation for unknown inline types + visit_inline_default(node) + end + end + + # Default implementation for inline elements + # Uses children content or first arg as fallback + def visit_inline_default(node) + content = visit_children(node) + # Use args as fallback if children are empty + if content.empty? && node.args.any? + content = node.args.first.to_s + end + escaped_content = escape_inline_content(content) + "@<#{node.inline_type}>{#{escaped_content}}" + end + + # Inline element: @<kw>{word, description} + def visit_inline_kw(node) + if node.args.size >= 2 + word = escape_inline_content(node.args[0]) + desc = escape_inline_content(node.args[1]) + "@<kw>{#{word}, #{desc}}" + elsif node.args.size == 1 + word = escape_inline_content(node.args[0]) + "@<kw>{#{word}}" + else + content = escape_inline_content(visit_children(node)) + "@<kw>{#{content}}" + end + end + + # Inline element: @<ruby>{base, ruby_text} + def visit_inline_ruby(node) + base = escape_inline_content(node.args[0]) + if node.args.size >= 2 + ruby_text = escape_inline_content(node.args[1]) + "@<ruby>{#{base}, #{ruby_text}}" + else + "@<ruby>{#{base}}" + end + end + + # Inline element: @<href>{url, text} + def visit_inline_href(node) + url = node.args[0] || '' + content = visit_children(node) + if content.empty? + "@<href>{#{url}}" + else + escaped_content = escape_inline_content(content) + "@<href>{#{url}, #{escaped_content}}" + end + end + + def visit_code_block(node) + # Determine block type + block_type = if node.id? + node.line_numbers ? 'listnum' : 'list' + else + node.line_numbers ? 'emlistnum' : 'emlist' + end + + # Build opening tag + text = '//' + block_type + text += "[#{node.id}]" if node.id? + + caption_text = caption_to_review_markup(node.caption_node) + has_lang = node.lang && !node.lang.empty? + has_caption = !caption_text.empty? + + # Handle caption and language parameters based on block type + if block_type == 'list' || block_type == 'listnum' + # list/listnum: //list[id][caption][lang] + text += "[#{caption_text}]" if has_caption || has_lang + text += "[#{node.lang}]" if has_lang + elsif has_lang + # emlist/emlistnum with lang: //emlist[caption][lang] + # Caption parameter is required even when empty + text += "[#{caption_text}]" + text += "[#{node.lang}]" + elsif has_caption + # emlist/emlistnum with only caption: //emlist[caption] + text += "[#{caption_text}]" + end + + text += "{\n" + + # Add code lines from original_text or reconstruct from AST + if node.original_text && !node.original_text.empty? + text += node.original_text + elsif node.children.any? + # Reconstruct from AST structure + lines = node.children.map do |line_node| + if line_node.children + line_node.children.map do |child| + case child + when ReVIEW::AST::TextNode + child.content + when ReVIEW::AST::InlineNode + "@<#{child.inline_type}>{#{child.args.first || ''}}" + else + child.to_s + end + end.join + else + line_node.to_s + end + end + text += lines.join("\n") + end + text += "\n" unless text.end_with?("\n") + + text + "//}\n\n" + end + + def visit_list(node) + case node.list_type + when :ul + visit_unordered_list(node) + when :ol + visit_ordered_list(node) + when :dl + visit_definition_list(node) + else + visit_children(node) + end + end + + def visit_list_item(node) + # This should be handled by parent list type + visit_children(node) + end + + def visit_table(node) + table_type = node.table_type || :table + text = build_table_header(node, table_type) + text += build_table_body(node.header_rows, node.body_rows) + text + "//}\n\n" + end + + def visit_image(node) + # Use image_type to determine the command (:image, :indepimage, :numberlessimage) + image_command = node.image_type || :image + text = "//#{image_command}[#{node.id || ''}]" + + caption_text = caption_to_review_markup(node.caption_node) + text += "[#{caption_text}]" unless caption_text.empty? + text += "[#{node.metric}]" if node.metric && !node.metric.empty? + text + "\n\n" + end + + def visit_minicolumn(node) + text = "//#{node.minicolumn_type}" + + caption_text = caption_to_review_markup(node.caption_node) + text += "[#{caption_text}]" unless caption_text.empty? + text += "{\n" + + # Handle children - they may be strings or nodes + if node.children.any? + content_lines = [] + node.children.each do |child| + if child.is_a?(String) + # Skip empty strings + content_lines << child unless child.strip.empty? + else + content_lines << visit(child) + end + end + if content_lines.any? + text += content_lines.join("\n") + text += "\n" unless text.end_with?("\n") + end + end + + text + "//}\n\n" + end + + def visit_block(node) + # Use dynamic method dispatch for extensibility + # To add a new block type, define a method: visit_block_<type>(node) + # + # EXTENSION GUIDE: When adding new block types: + # 1. Define a new method: visit_block_<blocktype>(node) + # 2. For simple wrapper blocks (like quote, read, lead): + # - Get content: content = visit_children(node) + # - Ensure newline: text += "\n" unless content.end_with?("\n") + # - Format: "//blocktype{\ncontent\n//}\n\n" + # 3. For directive blocks (like pagebreak, hr): + # - Format: "//blocktype\n\n" + # 4. For blocks with parameters (like footnote[id][content]): + # - Use node.args for parameters + # - Format: "//blocktype[#{node.args.join('][')}]\n\n" + # 5. For blocks with caption (like texequation): + # - Use caption_to_review_markup(node.caption_node) + # - Check node.id? for ID availability + method_name = "visit_block_#{node.block_type}" + if respond_to?(method_name, true) + send(method_name, node) + else + # Default: just render children for unknown block types + visit_children(node) + end + end + + # Simple wrapper block helper + # Wraps content in //blocktype{ ... //} + def render_simple_wrapper_block(block_type, content) + text = "//#{block_type}{\n" + content + text += "\n" unless content.end_with?("\n") + text + "//}\n\n" + end + + # Block: //quote{ ... //} + def visit_block_quote(node) + content = visit_children(node) + render_simple_wrapper_block('quote', content) + end + + # Block: //read{ ... //} + def visit_block_read(node) + content = visit_children(node) + render_simple_wrapper_block('read', content) + end + + # Block: //lead{ ... //} + def visit_block_lead(node) + content = visit_children(node) + render_simple_wrapper_block('lead', content) + end + + # Block: //centering{ ... //} + def visit_block_centering(node) + content = visit_children(node) + render_simple_wrapper_block('centering', content) + end + + # Block: //flushright{ ... //} + def visit_block_flushright(node) + content = visit_children(node) + render_simple_wrapper_block('flushright', content) + end + + # Block: //comment{ ... //} + def visit_block_comment(node) + content = visit_children(node) + render_simple_wrapper_block('comment', content) + end + + # Block: //address{ ... //} + def visit_block_address(node) + content = visit_children(node) + render_simple_wrapper_block('address', content) + end + + # Block: //talk{ ... //} + def visit_block_talk(node) + content = visit_children(node) + render_simple_wrapper_block('talk', content) + end + + # Block: //blankline + def visit_block_blankline(_node) + "//blankline\n\n" + end + + # Block: //noindent + def visit_block_noindent(node) + "//noindent\n" + visit_children(node) + end + + # Block: //pagebreak + def visit_block_pagebreak(_node) + "//pagebreak\n\n" + end + + # Block: //hr + def visit_block_hr(_node) + "//hr\n\n" + end + + # Block: //parasep + def visit_block_parasep(_node) + "//parasep\n\n" + end + + # Block: //bpo + def visit_block_bpo(_node) + "//bpo\n\n" + end + + # Block: //printendnotes + def visit_block_printendnotes(_node) + "//printendnotes\n\n" + end + + # Block: //beginchild + def visit_block_beginchild(_node) + "//beginchild\n\n" + end + + # Block: //endchild + def visit_block_endchild(_node) + "//endchild\n\n" + end + + # Block: //olnum[num] + def visit_block_olnum(node) + "//olnum[#{node.args.join(', ')}]\n\n" + end + + # Block: //firstlinenum[num] + def visit_block_firstlinenum(node) + "//firstlinenum[#{node.args.join(', ')}]\n\n" + end + + # Block: //tsize[...] + def visit_block_tsize(node) + "//tsize[#{node.args.join(', ')}]\n\n" + end + + # Block: //label[id] + def visit_block_label(node) + "//label[#{node.args.first}]\n\n" + end + + # Block: //footnote[id][content] + def visit_block_footnote(node) + content = visit_children(node) + "//footnote[#{node.args.join('][') || ''}][#{content.strip}]\n\n" + end + + # Block: //endnote[id][content] + def visit_block_endnote(node) + content = visit_children(node) + "//endnote[#{node.args.join('][') || ''}][#{content.strip}]\n\n" + end + + # Block: //texequation[id][caption]{ ... //} + def visit_block_texequation(node) + text = '//texequation' + caption_text = caption_to_review_markup(node.caption_node) + if node.id || !caption_text.empty? + text += "[#{node.id}]" if node.id + text += "[#{caption_text}]" unless caption_text.empty? + end + text += "{\n" + content = visit_children(node) + text += content + text += "\n" unless content.end_with?("\n") + text + "//}\n\n" + end + + # Block: //doorquote[...]{ ... //} + def visit_block_doorquote(node) + text = '//doorquote' + text += "[#{node.args.join('][')}]" if node.args.any? + text += "{\n" + content = visit_children(node) + text += content + text += "\n" unless content.end_with?("\n") + text + "//}\n\n" + end + + # Block: //bibpaper[...]{ ... //} + def visit_block_bibpaper(node) + text = '//bibpaper' + text += "[#{node.args.join('][')}]" if node.args.any? + text += "{\n" + content = visit_children(node) + text += content + text += "\n" unless content.end_with?("\n") + text + "//}\n\n" + end + + # Block: //graph[...]{ ... //} + def visit_block_graph(node) + text = '//graph' + text += "[#{node.args.join('][')}]" if node.args.any? + text += "{\n" + content = visit_children(node) + text += content + text += "\n" unless content.end_with?("\n") + text + "//}\n\n" + end + + # Block: //box[caption]{ ... //} + def visit_block_box(node) + text = '//box' + text += "[#{node.args.first}]" if node.args.any? + text += "{\n" + content = visit_children(node) + text += content + text += "\n" unless content.end_with?("\n") + text + "//}\n\n" + end + + def visit_embed(node) + case node.embed_type + when :block + target = node.target_builders&.join(',') || '' + text = "//embed[#{target}]{\n" + text += node.content || '' + text += "\n" unless text.end_with?("\n") + text + "//}\n\n" + when :raw + target = node.target_builders&.join(',') || '' + text = "//raw[#{target}]{\n" + text += node.content || '' + text += "\n" unless text.end_with?("\n") + text + "//}\n\n" + else + # Inline embed should be handled in inline context + "@<embed>{#{node.content || ''}}" + end + end + + def visit_caption(node) + visit_children(node) + end + + def visit_column(node) + text = '=' * (node.level || 1) + text += '[column]' + text += "{#{node.label}}" if node.label && !node.label.empty? + caption_text = caption_to_review_markup(node.caption_node) + text += " #{caption_text}" unless caption_text.empty? + text += "\n\n" + text += visit_children(node) + text += "\n" unless text.end_with?("\n") + text += '=' * (node.level || 1) + text += "[/column]\n\n" + text + end + + def visit_unordered_list(node) + text = '' + node.children.each do |item| + next unless item.is_a?(ReVIEW::AST::ListItemNode) + + level = item.level || 1 + marker = '*' * level + text += format_list_item(marker, level, item) + end + text + (text.empty? ? '' : "\n") + end + + def visit_ordered_list(node) + text = '' + node.children.each_with_index do |item, index| + next unless item.is_a?(ReVIEW::AST::ListItemNode) + + level = item.level || 1 + number = item.number || (index + 1) + marker = "#{number}." + text += format_list_item(marker, level, item) + end + text + (text.empty? ? '' : "\n") + end + + def visit_definition_list(node) + text = '' + node.children.each do |item| + next unless item.is_a?(ReVIEW::AST::ListItemNode) + + next unless item.term_children.any? || item.children.any? + + term = item.term_children.any? ? visit_all(item.term_children).join : '' + text += ": #{term}\n" + + item.children.each do |defn| + defn_text = visit(defn) + # Remove trailing newlines from paragraph content in definition lists + # to avoid creating blank lines between definition items + defn_text = defn_text.sub(/\n+\z/, '') if defn.is_a?(ReVIEW::AST::ParagraphNode) + text += "\t#{defn_text}\n" unless defn_text.strip.empty? + end + end + text + (text.empty? ? '' : "\n") + end + + # Format a list item with proper indentation + def format_list_item(marker, _level, item) + # For Re:VIEW format, all list items start with a single space + indent = ' ' + + # Separate nested lists from other content + non_list_children = [] + nested_lists = [] + + item.children.each do |child| + if child.is_a?(ReVIEW::AST::ListNode) + nested_lists << child + else + non_list_children << child + end + end + + # Process non-list content + # Check if we have multiple TextNodes (possibly with InlineNodes in between) + # which indicates continuation lines in the original markup + text_node_count = non_list_children.count { |c| c.is_a?(ReVIEW::AST::TextNode) } + + if text_node_count > 1 + # Multiple text nodes indicate continuation lines + # Process each child separately and join with newlines + parts = [] + current_line = [] + + non_list_children.each do |child| + # Start a new line if we already have content + if child.is_a?(ReVIEW::AST::TextNode) && current_line.any? + # Join the current line and strip it + parts << current_line.join.strip + current_line = [] + end + # Add the visited child to the current line (TextNode or InlineNode) + current_line << visit(child) + end + + # Don't forget the last line + parts << current_line.join.strip if current_line.any? + + content = parts.first + continuation = parts[1..].map { |part| " #{part}" }.join("\n") + content += "\n" + continuation unless continuation.empty? + else + content = visit_all(non_list_children).join.strip + end + + # Build the item text + text = "#{indent}#{marker} #{content}\n" + + # Process nested lists separately + nested_lists.each do |nested_list| + nested_text = visit(nested_list) + # Remove the trailing newline from nested list to avoid extra blank line + text += nested_text.chomp + end + + text + end + + # Build table opening tag with type, ID, and caption + # @param node [TableNode] The table node + # @param table_type [Symbol] The table type (:table, :imgtable, etc.) + # @return [String] Table opening tag with parameters + def build_table_header(node, table_type) + text = "//#{table_type}" + text += "[#{node.id}]" if node.id? + + caption_text = caption_to_review_markup(node.caption_node) + text += "[#{caption_text}]" unless caption_text.empty? + text + "{\n" + end + + # Build table body with header and body rows + # @param header_rows [Array<RowNode>] Header row nodes + # @param body_rows [Array<RowNode>] Body row nodes + # @return [String] Formatted table rows with separator + def build_table_body(header_rows, body_rows) + lines = format_table_rows(header_rows) + lines << ('-' * 12) if header_rows.any? + lines.concat(format_table_rows(body_rows)) + + return '' if lines.empty? + + lines.join("\n") + "\n" + end + + # Format multiple table rows + # @param rows [Array<RowNode>] Row nodes to format + # @return [Array<String>] Formatted row strings + def format_table_rows(rows) + rows.map { |row| format_table_row(row) } + end + + # Format a single table row + # @param row [RowNode] Row node to format + # @return [String] Tab-separated cell contents + def format_table_row(row) + row.children.map { |cell| render_cell_content(cell) }.join("\t") + end + + # Render table cell content + # @param cell [CellNode] Cell node to render + # @return [String] Cell content or '.' for empty cells + def render_cell_content(cell) + content = cell.children.map { |child| visit(child) }.join + # Empty cells should be represented with a dot in Re:VIEW syntax + content.empty? ? '.' : content + end + end + end +end diff --git a/lib/review/ast/table_cell_node.rb b/lib/review/ast/table_cell_node.rb new file mode 100644 index 000000000..d966cb98a --- /dev/null +++ b/lib/review/ast/table_cell_node.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'node' + +module ReVIEW + module AST + # TableCellNode - Represents a cell in a table + # + # A table cell can contain text nodes and inline elements. + # Cells are separated by tabs in the original Re:VIEW syntax. + # + # The cell_type attribute determines whether this cell should be + # rendered as a header cell (<th>) or data cell (<td>). + class TableCellNode < Node + attr_reader :children, :cell_type + + def initialize(location:, cell_type: :td, **kwargs) + super + @children = [] + @cell_type = cell_type # :th or :td + end + + def self.deserialize_from_hash(hash) + node = new(location: ReVIEW::AST::JSONSerializer.restore_location(hash)) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + super + hash[:cell_type] = @cell_type if @cell_type != :td + hash + end + end + end +end diff --git a/lib/review/ast/table_column_width_parser.rb b/lib/review/ast/table_column_width_parser.rb new file mode 100644 index 000000000..e714338d0 --- /dev/null +++ b/lib/review/ast/table_column_width_parser.rb @@ -0,0 +1,117 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # Parse tsize specification and generate column width information + # This class handles the logic from LATEXBuilder's tsize/separate_tsize methods + class TableColumnWidthParser + # Result struct for parse method + Result = Struct.new(:col_spec, :cellwidth) + + # Check if cellwidth is a fixed-width specification (contains '{') + # @param cellwidth [String] column width specification (e.g., "p{10mm}", "l", "c") + # @return [Boolean] true if fixed-width (contains braces) + def self.fixed_width?(cellwidth) + cellwidth && cellwidth.include?('{') + end + + # Initialize parser with tsize specification and column count + # @param tsize [String] tsize specification (e.g., "10,18,50" or "p{10mm}p{18mm}|p{50mm}") + # @param col_count [Integer] number of columns + def initialize(tsize, col_count) + raise ArgumentError, 'col_count must be positive' if col_count.nil? || col_count <= 0 + + @tsize = tsize + @col_count = col_count + end + + # Parse tsize specification and return result as Struct + # @return [Result] Result struct with col_spec and cellwidth + def parse + if @tsize.nil? || @tsize.empty? + default_spec + elsif simple_format? + parse_simple_format + else + parse_complex_format + end + end + + private + + # Generate default column specification + # @return [Result] Result struct with default values + def default_spec + Result.new( + '|' + ('l|' * @col_count), + ['l'] * @col_count + ) + end + + # Check if tsize is in simple format (e.g., "10,18,50") + # @return [Boolean] true if simple format + def simple_format? + /\A[\d., ]+\Z/.match?(@tsize) + end + + # Parse simple format tsize (e.g., "10,18,50" means p{10mm},p{18mm},p{50mm}) + # @return [Result] Result struct with parsed values + def parse_simple_format + cellwidth = @tsize.split(/\s*,\s*/).map { |i| "p{#{i}mm}" } + col_spec = '|' + cellwidth.join('|') + '|' + + Result.new(col_spec, cellwidth) + end + + # Parse complex format tsize (e.g., "p{10mm}p{18mm}|p{50mm}") + # @return [Result] Result struct with parsed values + def parse_complex_format + cellwidth = separate_columns(@tsize) + Result.new(@tsize, cellwidth) + end + + # Parse tsize string into array of column specifications + # Example: "p{10mm}p{18mm}|p{50mm}" -> ["p{10mm}", "p{18mm}", "p{50mm}"] + # @param size [String] tsize specification + # @return [Array<String>] array of column specifications + def separate_columns(size) + columns = [] + current = +'' + in_brace = false + + size.each_char do |ch| + case ch + when '|' + # Skip pipe characters (table borders) + next + when '{' + in_brace = true + current << ch + when '}' + in_brace = false + current << ch + columns << current + current = +'' + else + if in_brace || current.empty? + current << ch + else + columns << current + current = ch.dup + end + end + end + + columns << current unless current.empty? + + columns + end + end + end +end diff --git a/lib/review/ast/table_node.rb b/lib/review/ast/table_node.rb new file mode 100644 index 000000000..b2c69c600 --- /dev/null +++ b/lib/review/ast/table_node.rb @@ -0,0 +1,155 @@ +# frozen_string_literal: true + +require_relative 'node' +require_relative 'caption_node' +require_relative 'captionable' +require_relative 'json_serializer' + +module ReVIEW + module AST + class TableNode < Node + include Captionable + + attr_accessor :col_spec, :cellwidth + attr_reader :table_type, :metric + + def initialize(location:, id: nil, caption_node: nil, table_type: :table, metric: nil, col_spec: nil, cellwidth: nil, **kwargs) + super(location: location, id: id, **kwargs) + @caption_node = caption_node + @table_type = table_type # :table, :emtable, :imgtable + @metric = metric + @col_spec = col_spec # Column specification string (e.g., "|l|c|r|") + @cellwidth = cellwidth # Array of column width specifications + @header_rows = [] + @body_rows = [] + end + + def header_rows + @children.find_all do |node| + node.row_type == :header + end + end + + def body_rows + @children.find_all do |node| + node.row_type == :body + end + end + + def add_header_row(row_node) + unless row_node.row_type == :header + raise ArgumentError, "Expected header row (row_type: :header), got #{row_node.row_type}" + end + + idx = @children.index { |child| child.row_type == :body } + if idx + insert_child(idx, row_node) + else + add_child(row_node) + end + end + + def add_body_row(row_node) + unless row_node.row_type == :body + raise ArgumentError, "Expected body row (row_type: :body), got #{row_node.row_type}" + end + + add_child(row_node) + end + + # Get column count from table rows + def column_count + all_rows = header_rows + body_rows + all_rows.first&.children&.length || 1 + end + + # Get default column specification (left-aligned with borders) + def default_col_spec + '|' + ('l|' * column_count) + end + + # Get default cellwidth array (all left-aligned) + def default_cellwidth + ['l'] * column_count + end + + # Parse tsize value and set col_spec and cellwidth on this table + # @param tsize_value [String] tsize specification + def parse_and_set_tsize(tsize_value) + require_relative('table_column_width_parser') + parser = TableColumnWidthParser.new(tsize_value, column_count) + result = parser.parse + @col_spec = result.col_spec + @cellwidth = result.cellwidth + end + + # Update table attributes after creation + # This is used by MarkdownAdapter to set id and caption from attribute blocks + # @param id [String, nil] Table ID + # @param caption_node [CaptionNode, nil] Caption node + def update_attributes(id: nil, caption_node: nil) + @id = id if id + @caption_node = caption_node if caption_node + end + + def to_h + result = super.merge( + caption_node: caption_node&.to_h, + table_type: table_type, + header_rows: header_rows.map(&:to_h), + body_rows: body_rows.map(&:to_h) + ) + result[:metric] = metric if metric + result[:col_spec] = col_spec if col_spec + result[:cellwidth] = cellwidth if cellwidth + result + end + + # Override serialize_to_hash to use header_rows/body_rows instead of children + def serialize_to_hash(options = nil) + options ||= JSONSerializer::Options.new + hash = { + type: self.class.name.split('::').last + } + + # Include location information + if options.include_location + hash[:location] = location&.to_h + end + + # Add TableNode-specific properties (no children field) + hash[:id] = id if id && !id.empty? + hash[:table_type] = table_type + serialize_caption_to_hash(hash, options) + hash[:header_rows] = header_rows.map { |row| row.serialize_to_hash(options) } + hash[:body_rows] = body_rows.map { |row| row.serialize_to_hash(options) } + hash[:metric] = metric if metric + hash[:col_spec] = col_spec if col_spec + hash[:cellwidth] = cellwidth if cellwidth + + hash + end + + def self.deserialize_from_hash(hash) + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + id: hash['id'], + caption_node: deserialize_caption_from_hash(hash), + table_type: hash['table_type'] || :table, + metric: hash['metric'] + ) + # Process header and body rows + (hash['header_rows'] || []).each do |row_hash| + row = ReVIEW::AST::JSONSerializer.deserialize_from_hash(row_hash) + node.add_header_row(row) if row.is_a?(ReVIEW::AST::TableRowNode) + end + (hash['body_rows'] || []).each do |row_hash| + row = ReVIEW::AST::JSONSerializer.deserialize_from_hash(row_hash) + node.add_body_row(row) if row.is_a?(ReVIEW::AST::TableRowNode) + end + + node + end + end + end +end diff --git a/lib/review/ast/table_row_node.rb b/lib/review/ast/table_row_node.rb new file mode 100644 index 000000000..ec329802e --- /dev/null +++ b/lib/review/ast/table_row_node.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'node' + +module ReVIEW + module AST + # TableRowNode - Represents a row in a table + # + # A table row contains multiple table cells (TableCellNode). + # Each cell can contain text and inline elements. + class TableRowNode < Node + ROW_TYPES = %i[header body] + + def initialize(location:, row_type: :body, **kwargs) + super + @children = [] + @row_type = row_type.to_sym + + validate_row_type + end + + attr_reader :children, :row_type + + def self.deserialize_from_hash(hash) + row_type = hash['row_type']&.to_sym || :body + node = new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + row_type: row_type + ) + if hash['children'] + hash['children'].each do |child_hash| + child = ReVIEW::AST::JSONSerializer.deserialize_from_hash(child_hash) + node.add_child(child) if child.is_a?(ReVIEW::AST::Node) + end + end + node + end + + private + + def serialize_properties(hash, options) + super + hash[:row_type] = @row_type.to_s + end + + def validate_row_type + unless ROW_TYPES.include?(row_type) + raise ArgumentError, "invalid row_type in TableRowNode: `#{row_type}`" + end + end + end + end +end diff --git a/lib/review/ast/tex_equation_node.rb b/lib/review/ast/tex_equation_node.rb new file mode 100644 index 000000000..daae8cd4a --- /dev/null +++ b/lib/review/ast/tex_equation_node.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'leaf_node' +require_relative 'caption_node' +require_relative 'captionable' + +module ReVIEW + module AST + # TexEquationNode - LaTeX mathematical equation block + # + # Represents LaTeX equation blocks like: + # //texequation{ + # \int_{-\infty}^{\infty} e^{-x^2} dx = \sqrt{\pi} + # //} + # + # //texequation[eq1][Caption]{ + # E = mc^2 + # //} + class TexEquationNode < LeafNode + include Captionable + + def initialize(location:, content:, id: nil, caption_node: nil) + super(location: location, id: id, content: content) + @caption_node = caption_node + end + + def to_s + "TexEquationNode(id: #{@id.inspect}, caption_node: #{@caption_node.inspect})" + end + + # Override to_h to include TexEquationNode-specific attributes + def to_h + result = super + result[:id] = id if id? + result[:caption_node] = caption_node&.to_h if caption_node + result + end + + # Override serialize_to_hash to include TexEquationNode-specific attributes + def serialize_to_hash(options = nil) + options ||= ReVIEW::AST::JSONSerializer::Options.new + + # Start with type + hash = { + type: self.class.name.split('::').last + } + + # Include location information + if options.include_location + hash[:location] = location&.to_h + end + + # Call node-specific serialization + serialize_properties(hash, options) + + # LeafNode automatically excludes children + hash + end + + def self.deserialize_from_hash(hash) + new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + id: hash['id'], + caption_node: deserialize_caption_from_hash(hash), + content: hash['content'] || '' + ) + end + + private + + def serialize_properties(hash, options) + hash[:id] = id if id? + serialize_caption_to_hash(hash, options) + hash[:content] = content if content && !content.empty? + hash + end + end + end +end diff --git a/lib/review/ast/text_node.rb b/lib/review/ast/text_node.rb new file mode 100644 index 000000000..d53f539ba --- /dev/null +++ b/lib/review/ast/text_node.rb @@ -0,0 +1,65 @@ +# frozen_string_literal: true + +require_relative 'leaf_node' + +module ReVIEW + module AST + class TextNode < LeafNode + # Override to_h to exclude children array for TextNode + def to_h + result = { + type: self.class.name.split('::').last, + location: location_to_h + } + result[:content] = @content if @content && !@content.empty? + # TextNode is a leaf node - do not include children array + result + end + + # Override serialize_to_hash to exclude children array for TextNode + def serialize_to_hash(options = nil) + options ||= ReVIEW::AST::JSONSerializer::Options.new + + # Start with type + hash = { + type: self.class.name.split('::').last + } + + # Include location information + if options.include_location + hash[:location] = location_to_h + end + + # Call node-specific serialization (adds content) + serialize_properties(hash, options) + + # TextNode is a leaf node - do not include children array + hash + end + + def self.deserialize_from_hash(hash) + new( + location: ReVIEW::AST::JSONSerializer.restore_location(hash), + content: hash['content'] || '' + ) + end + + private + + def serialize_properties(hash, _options) + # Add content property explicitly for TextNode + hash[:content] = content if content + hash + end + + def location_to_h + return nil unless location + + { + filename: location.filename, + lineno: location.lineno + } + end + end + end +end diff --git a/lib/review/ast/visitor.rb b/lib/review/ast/visitor.rb new file mode 100644 index 000000000..d9854196d --- /dev/null +++ b/lib/review/ast/visitor.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module AST + # Base visitor class for traversing AST nodes using the Visitor pattern. + # This class provides a generic way to walk through AST structures and + # perform operations on each node type. + # + # Usage: + # class MyVisitor < ReVIEW::AST::Visitor + # def visit_headline(node) + # # Process headline node + # end + # + # def visit_paragraph(node) + # # Process paragraph node + # end + # end + # + # visitor = MyVisitor.new + # result = visitor.visit(ast_root) + class Visitor + # Visit a node and dispatch to the appropriate visit method. + # + # @param node [Object] The AST node to visit + # @return [Object] The result of the visit method + def visit(node) + return nil if node.nil? + + method_name = node.visit_method_name + + if respond_to?(method_name, true) + send(method_name, node) + else + raise NotImplementedError, "Visitor #{self.class.name} does not implement #{method_name} for #{node.class.name}" + end + end + + # Visit multiple nodes and return an array of results. + # + # @param nodes [Array] Array of AST nodes to visit + # @return [Array] Array of visit results + def visit_all(nodes) + return [] unless nodes + + nodes.map { |node| visit(node) } + end + end + end +end diff --git a/lib/review/book/book_unit.rb b/lib/review/book/book_unit.rb index db3e93fb8..357f7ecbc 100644 --- a/lib/review/book/book_unit.rb +++ b/lib/review/book/book_unit.rb @@ -68,6 +68,22 @@ def generate_indexes(use_bib: false) end end + # Set indexes using AST-based indexing + def ast_indexes=(indexes) + @footnote_index = indexes[:footnote_index] if indexes[:footnote_index] + @endnote_index = indexes[:endnote_index] if indexes[:endnote_index] + @list_index = indexes[:list_index] if indexes[:list_index] + @table_index = indexes[:table_index] if indexes[:table_index] + @equation_index = indexes[:equation_index] if indexes[:equation_index] + @image_index = indexes[:image_index] if indexes[:image_index] + @icon_index = indexes[:icon_index] if indexes[:icon_index] + @numberless_image_index = indexes[:numberless_image_index] if indexes[:numberless_image_index] + @indepimage_index = indexes[:indepimage_index] if indexes[:indepimage_index] + @headline_index = indexes[:headline_index] if indexes[:headline_index] + @column_index = indexes[:column_index] if indexes[:column_index] + @book.bibpaper_index = indexes[:bibpaper_index] if @book.present? && indexes[:bibpaper_index] + end + def dirname @path && File.dirname(@path) end diff --git a/lib/review/book/chapter.rb b/lib/review/book/chapter.rb index 9f6a9d23a..561284003 100644 --- a/lib/review/book/chapter.rb +++ b/lib/review/book/chapter.rb @@ -149,7 +149,9 @@ def on_postdef? private def on_file?(contents) - contents.map(&:strip).include?("#{id}#{@book.ext}") + contents.map(&:strip).include?("#{id}#{@book.ext}") || + contents.map(&:strip).include?("#{id}.re") || + contents.map(&:strip).include?("#{id}.md") end # backward compatibility diff --git a/lib/review/book/index/item.rb b/lib/review/book/index/item.rb index 595a364af..c02f01ca6 100644 --- a/lib/review/book/index/item.rb +++ b/lib/review/book/index/item.rb @@ -18,10 +18,11 @@ module ReVIEW module Book class Index class Item - def initialize(id, number, caption = nil) + def initialize(id, number, caption = nil, caption_node: nil) @id = id @number = number @caption = caption + @caption_node = caption_node @path = nil @index = nil end @@ -29,6 +30,7 @@ def initialize(id, number, caption = nil) attr_reader :id attr_reader :number attr_reader :caption + attr_accessor :caption_node attr_accessor :index # internal use only alias_method :content, :caption diff --git a/lib/review/html_escape_utils.rb b/lib/review/html_escape_utils.rb new file mode 100644 index 000000000..beb3f6ea1 --- /dev/null +++ b/lib/review/html_escape_utils.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require 'cgi' + +module ReVIEW + # HTML escape utility methods for AST/Renderer + # This module provides basic HTML escaping methods used by HTML Renderer classes. + # For Builder classes, use HTMLUtils or LaTeXUtils instead. + module HtmlEscapeUtils + # HTML content escaping using CGI.escapeHTML + def escape_content(str) + CGI.escapeHTML(str.to_s) + end + + # URL escaping using CGI.escape + # Note: LaTeXUtils has its own escape_url implementation for LaTeX-specific needs + def escape_url(str) + CGI.escape(str.to_s) + end + end +end diff --git a/lib/review/renderer.rb b/lib/review/renderer.rb new file mode 100644 index 000000000..da165257e --- /dev/null +++ b/lib/review/renderer.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +# Renderer module for converting AST nodes to various output formats. +# This module provides a cleaner, more maintainable approach to output +# generation compared to the traditional Builder pattern. +# +# The renderer approach separates concerns: +# - AST generation (handled by Compiler) +# - Format-specific rendering (handled by Renderer subclasses) +# +# Usage: +# # HTML output +# html_renderer = ReVIEW::Renderer::HtmlRenderer.new +# html_output = html_renderer.render(ast_root) +# +# # JSON output is handled by ReVIEW::AST::JSONSerializer + +module ReVIEW + module Renderer + # Load renderer classes + autoload :Base, 'review/renderer/base' + autoload :HtmlRenderer, 'review/renderer/html_renderer' + autoload :LatexRenderer, 'review/renderer/latex_renderer' + autoload :PlaintextRenderer, 'review/renderer/plaintext_renderer' + # NOTE: JSONRenderer removed - use ReVIEW::AST::JSONSerializer instead + end +end diff --git a/lib/review/renderer/base.rb b/lib/review/renderer/base.rb new file mode 100644 index 000000000..374b7d0f0 --- /dev/null +++ b/lib/review/renderer/base.rb @@ -0,0 +1,237 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/visitor' +require 'review/exception' +require 'review/renderer/text_formatter' + +module ReVIEW + module Renderer + # Error class for renderer-specific errors + class RenderError < ReVIEW::ApplicationError; end + + # Base class for all AST renderers. + # This class extends the Visitor pattern to provide rendering capabilities + # for converting AST nodes into various output formats. + # + # Subclasses should implement visit methods for specific node types: + # - visit_document(node) + # - visit_headline(node) + # - visit_paragraph(node) + # - visit_codeblock(node) + # - visit_table(node) + # - etc. + # + # Usage: + # class HtmlRenderer < ReVIEW::Renderer::Base + # def visit_headline(node) + # level = node.level + # caption = process_inline_content(node.caption) + # "<h#{level}>#{caption}</h#{level}>" + # end + # end + # + # renderer = HtmlRenderer.new + # html_output = renderer.render(ast_root) + class Base < ReVIEW::AST::Visitor + # Initialize the renderer with chapter context. + # + # @param chapter [ReVIEW::Book::Chapter] Chapter context + def initialize(chapter) + @chapter = chapter + @book = chapter&.book + @config = @book&.config || {} + super() + @text_formatter = ReVIEW::Renderer::TextFormatter.new( + config: @config, + chapter: @chapter + ) + end + + attr_reader :text_formatter + + # Render an AST node to the target format. + # + # @param ast_root [Object] The root AST node to render + # @return [String] The rendered output + def render(ast_root) + result = visit(ast_root) + post_process(result) + end + + # Check if caption should be positioned at top for given type + # + # @param type [String] Element type (e.g., 'image', 'table', 'list', 'equation') + # @return [Boolean] true if caption should be at top, false otherwise + def caption_top?(type) + config['caption_position'] && config['caption_position'][type] == 'top' + end + + # Render all children of a node and join the results. + # + # @param node [Object] The parent node whose children should be rendered + # @return [String] The joined rendered output of all children + def render_children(node) + node.children.map { |child| visit(child) }.join + end + + # Get the format type for this renderer. + # Subclasses must override this method to specify their format. + # + # @return [Symbol] Format type (:html, :latex, :idgxml, :text, :top) + def format_type + raise NotImplementedError, "#{self.class} must implement #format_type" + end + + private + + attr_reader :config + + # Post-process the rendered result. + # Subclasses can override this to perform final formatting, + # cleanup, or validation. + # + # @param result [Object] The result from visiting the AST + # @return [String] The final rendered output + def post_process(result) + result.to_s + end + + # Escape special characters for the target format. + # + # @param str [String] The string to escape + # @return [String] The escaped string + def escape(str) + str.to_s + end + + # Default visit methods for common node types. + # These provide basic fallback behavior that subclasses can override. + + def visit_text(node) + escape(node.content.to_s) + end + + def visit_inline(node) + content = render_children(node) + render_inline_element(node.inline_type, content, node) + end + + # Render a specific inline element. + # + # @param type [String] The inline element type (e.g., 'b', 'i', 'code') + # @param content [String] The content of the inline element + # @param node [Object] The original inline node (for additional attributes) + # @return [String] The rendered inline element + def render_inline_element(_type, content, _node = nil) + # Default implementation just returns the content + content + end + + # Visit a code block node. + # This method uses dynamic method dispatch to call format-specific handlers. + # Subclasses should implement visit_code_block_<type> methods for each code block type. + # + # @param node [Object] The code block node + # @return [String] The rendered code block + def visit_code_block(node) + method_name = "visit_code_block_#{node.code_type}" + if respond_to?(method_name, true) + send(method_name, node) + else + raise NotImplementedError, "Unknown code block type: #{node.code_type}" + end + end + + # Visit a block node. + # This method uses dynamic method dispatch to call format-specific handlers. + # Subclasses should implement visit_block_<type> methods for each block type. + # + # @param node [Object] The block node + # @return [String] The rendered block + def visit_block(node) + method_name = "visit_block_#{node.block_type}" + if respond_to?(method_name, true) + send(method_name, node) + else + raise NotImplementedError, "Unknown block type: #{node.block_type}" + end + end + + # Parse metric option for images and tables + # + # @param type [String] Builder type (e.g., 'latex', 'html') + # @param metric [String] Metric string (e.g., 'latex::width=80mm,scale=0.5') + # @return [String] Processed metric string + # + # @example + # parse_metric('latex', 'latex::width=80mm') # => 'width=80mm' + # parse_metric('latex', 'scale=0.5') # => 'scale=0.5' + # parse_metric('html', 'latex::width=80mm') # => '' + def parse_metric(type, metric) + return '' if metric.nil? || metric.empty? + + params = metric.split(/,\s*/) + results = [] + params.each do |param| + # Check if param has builder prefix (e.g., "latex::") + if /\A.+?::/.match?(param) + # Skip if not for this builder type + next unless /\A#{type}::/.match?(param) + + # Remove the builder prefix + param = param.sub(/\A#{type}::/, '') + end + # Handle metric transformations if needed + param2 = handle_metric(param) + results.push(param2) + end + result_metric(results) + end + + # Handle individual metric transformations + # + # @param str [String] Metric string (e.g., 'scale=0.5') + # @return [String] Transformed metric string + def handle_metric(str) + str + end + + # Combine metric results into final string + # + # @param array [Array<String>] Array of metric strings + # @return [String] Combined metric string + def result_metric(array) + array.join(',') + end + + # Extract text content from a node, handling various node types. + # This is useful for extracting plain text from caption nodes or + # inline content. + # + # @param node [Object] The node to extract text from + # @return [String] The extracted text content + def extract_text(node) + case node + when String + node + when nil + '' + else + if node.children&.any? + node.children.map { |child| extract_text(child) }.join + elsif node.leaf_node? + node.content.to_s + else + node.to_s + end + end + end + end + end +end diff --git a/lib/review/renderer/footnote_collector.rb b/lib/review/renderer/footnote_collector.rb new file mode 100644 index 000000000..0a4f213ae --- /dev/null +++ b/lib/review/renderer/footnote_collector.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module Renderer + # FootnoteCollector - Collects and manages footnotes within a rendering context + # + # This class handles the collection of footnotes that occur in contexts where + # they cannot be rendered immediately (e.g., within table captions, minicolumns). + # Instead of rendering \footnote{} directly, these contexts use \footnotemark + # and collect the footnotes for later output as \footnotetext{}. + # + # Key responsibilities: + # - Collect footnote nodes and their assigned numbers + # - Generate appropriate footnotetext output for LaTeX + # - Generate appropriate footnote output for HTML + # - Track footnote order and numbering + class FootnoteCollector + include Enumerable + + # Footnote data structure + FootnoteEntry = Struct.new(:node, :number, :content, keyword_init: true) + + def initialize + @footnotes = [] + end + + # Add a footnote to the collection + # @param footnote_node [AST::FootnoteNode] the footnote AST node + # @param footnote_number [Integer] the assigned footnote number + def add(footnote_node, footnote_number) + entry = FootnoteEntry.new( + node: footnote_node, + number: footnote_number, + content: nil # Content will be rendered when needed + ) + @footnotes << entry + end + + # Get the number of collected footnotes + # @return [Integer] number of footnotes + def size + @footnotes.size + end + + # Clear all collected footnotes + def clear + @footnotes.clear + end + + # Iterate over collected footnotes + # @yield [FootnoteEntry] each footnote entry + def each(&block) + @footnotes.each(&block) + end + + # Get all footnote numbers in order + # @return [Array<Integer>] array of footnote numbers + def numbers + @footnotes.map(&:number) + end + + # Convert to hash for debugging/serialization + # @return [Hash] hash representation + def to_h + { + size: size, + numbers: numbers, + footnotes: @footnotes.map do |entry| + # Get text preview from footnote node children + preview_text = if entry.node.respond_to?(:to_inline_text) + entry.node.to_inline_text + else + '' + end + { + number: entry.number, + id: entry.node.id, + content_preview: preview_text.slice(0, 50) + } + end + } + end + + # String representation for debugging + # @return [String] string representation + def to_s + if @footnotes.empty? + 'FootnoteCollector[empty]' + else + numbers_str = numbers.join(', ') + "FootnoteCollector[#{size} footnotes: #{numbers_str}]" + end + end + end + end +end diff --git a/lib/review/renderer/html/inline_context.rb b/lib/review/renderer/html/inline_context.rb new file mode 100644 index 000000000..021988f96 --- /dev/null +++ b/lib/review/renderer/html/inline_context.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/htmlutils' +require 'review/html_escape_utils' +require_relative '../inline_render_proxy' + +module ReVIEW + module Renderer + module Html + # Context for inline element rendering with business logic + # Used by InlineElementHandler + class InlineContext + include ReVIEW::HTMLUtils + include ReVIEW::HtmlEscapeUtils + + attr_reader :config, :book, :chapter, :img_math + + def initialize(config:, book:, chapter:, renderer:, img_math: nil) + @config = config + @book = book + @chapter = chapter + # Automatically create proxy from renderer to limit access + @render_proxy = InlineRenderProxy.new(renderer) + @img_math = img_math + end + + def extname + ".#{config['htmlext'] || 'html'}" + end + + def epub3? + config['epubversion'].to_i == 3 + end + + def math_format + config['math_format'] || 'mathjax' + end + + # === HTMLUtils and HtmlEscapeUtils methods are available via include === + # From HTMLUtils: + # - escape(str) or h(str) - Basic HTML escaping + # - escape_comment(str) - HTML comment escaping (escapes '-' to '-') + # - normalize_id(id) - ID normalization for HTML elements + # From HtmlEscapeUtils: + # - escape_content(str) - Content escaping (same as escape) + # - escape_url(str) - URL escaping using CGI.escape + + def chapter_number(chapter_id) + book.chapter_index.number(chapter_id) + end + + def chapter_title(chapter_id) + book.chapter_index.title(chapter_id) + end + + def chapter_display_string(chapter_id) + book.chapter_index.display_string(chapter_id) + end + + def chapter_link_enabled? + config['chapterlink'] + end + + def footnote_number(fn_id) + chapter.footnote(fn_id).number + end + + def build_icon_html(icon_id) + image_item = chapter.image(icon_id) + path = image_item.path.sub(%r{\A\./}, '') + %Q(<img src="#{path}" alt="[#{icon_id}]" />) + end + + def bibpaper_number(bib_id) + chapter.bibpaper(bib_id).number + end + + def build_bib_reference_link(bib_id, number) + bib_file = book.bib_file.gsub(/\.re\Z/, extname) + %Q(<a href="#{bib_file}#bib-#{normalize_id(bib_id)}">[#{number}]</a>) + end + + def over_secnolevel?(n) + secnolevel = config['secnolevel'] || 0 + # Section level = chapter level (1) + n.size + # Only show numbers if secnolevel is >= section level + section_level = n.is_a?(::Array) ? (1 + n.size) : (1 + n.to_s.split('.').size) + secnolevel >= section_level + end + + def render_children(node) + @render_proxy.render_children(node) + end + + def text_formatter + @render_proxy.text_formatter + end + end + end + end +end diff --git a/lib/review/renderer/html/inline_element_handler.rb b/lib/review/renderer/html/inline_element_handler.rb new file mode 100644 index 000000000..f7c4ea36f --- /dev/null +++ b/lib/review/renderer/html/inline_element_handler.rb @@ -0,0 +1,606 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'digest' +require 'review/htmlutils' +require 'review/html_escape_utils' + +module ReVIEW + module Renderer + module Html + # Inline element handler for HTML rendering + # Uses InlineContext for shared logic + class InlineElementHandler + include ReVIEW::HTMLUtils + include ReVIEW::HtmlEscapeUtils + include ReVIEW::Loggable + + def initialize(inline_context) + @ctx = inline_context + @img_math = @ctx.img_math + @logger = ReVIEW.logger + end + + def render_inline_b(_type, content, _node) + %Q(<b>#{content}</b>) + end + + def render_inline_strong(_type, content, _node) + %Q(<strong>#{content}</strong>) + end + + def render_inline_i(_type, content, _node) + %Q(<i>#{content}</i>) + end + + def render_inline_em(_type, content, _node) + %Q(<em>#{content}</em>) + end + + def render_inline_code(_type, content, _node) + %Q(<code class="inline-code tt">#{content}</code>) + end + + def render_inline_tt(_type, content, _node) + %Q(<code class="tt">#{content}</code>) + end + + def render_inline_ttb(_type, content, _node) + %Q(<code class="tt"><b>#{content}</b></code>) + end + + def render_inline_tti(_type, content, _node) + %Q(<code class="tt"><i>#{content}</i></code>) + end + + def render_inline_kbd(_type, content, _node) + %Q(<kbd>#{content}</kbd>) + end + + def render_inline_samp(_type, content, _node) + %Q(<samp>#{content}</samp>) + end + + def render_inline_var(_type, content, _node) + %Q(<var>#{content}</var>) + end + + def render_inline_sup(_type, content, _node) + %Q(<sup>#{content}</sup>) + end + + def render_inline_sub(_type, content, _node) + %Q(<sub>#{content}</sub>) + end + + def render_inline_del(_type, content, _node) + %Q(<del>#{content}</del>) + end + + def render_inline_ins(_type, content, _node) + %Q(<ins>#{content}</ins>) + end + + def render_inline_u(_type, content, _node) + %Q(<u>#{content}</u>) + end + + def render_inline_br(_type, _content, _node) + '<br />' + end + + def render_inline_bou(_type, content, _node) + %Q(<span class="bou">#{content}</span>) + end + + def render_inline_ami(_type, content, _node) + %Q(<span class="ami">#{content}</span>) + end + + def render_inline_big(_type, content, _node) + %Q(<big>#{content}</big>) + end + + def render_inline_small(_type, content, _node) + %Q(<small>#{content}</small>) + end + + def render_inline_balloon(_type, content, _node) + %Q(<span class="balloon">#{content}</span>) + end + + def render_inline_cite(_type, content, _node) + %Q(<cite>#{content}</cite>) + end + + def render_inline_dfn(_type, content, _node) + %Q(<dfn>#{content}</dfn>) + end + + def render_inline_chap(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + chapter_num = @ctx.text_formatter.format_chapter_number_full(data.chapter_number, data.chapter_type) + build_chapter_link(data.item_id, chapter_num) + end + + def render_inline_chapref(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + display_str = @ctx.text_formatter.format_reference(:chapter, data) + build_chapter_link(data.item_id, display_str) + end + + def render_inline_title(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + title = data.chapter_title || '' + build_chapter_link(data.item_id, title) + end + + def render_inline_fn(_type, _content, node) + # Footnote reference + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + build_footnote_link(data.item_id, data.item_number) + end + + def render_inline_kw(_type, content, node) + if node.args.length >= 2 + build_keyword_with_index(node.args[0], alt: node.args[1].strip) + elsif node.args.length == 1 + build_keyword_with_index(node.args[0]) + else + build_keyword_with_index(content) + end + end + + def render_inline_idx(_type, content, node) + index_str = node.args.first || content + content + build_index_comment(index_str) + end + + def render_inline_hidx(_type, _content, node) + index_str = node.args.first + build_index_comment(index_str) + end + + def render_inline_href(_type, _content, node) + args = node.args + if args.length >= 2 + url = args[0] + text = escape_content(args[1]) + if url.start_with?('#') + build_anchor_link(url[1..-1], text) + else + build_external_link(url, text) + end + elsif args.length >= 1 + url = args[0] + escaped_url = escape_content(url) + if url.start_with?('#') + build_anchor_link(url[1..-1], escaped_url) + else + build_external_link(url, escaped_url) + end + else + content + end + end + + def render_inline_ruby(_type, content, node) + if node.args.length >= 2 + build_ruby(node.args[0], node.args[1]) + else + content + end + end + + def render_inline_raw(_type, _content, node) + node.targeted_for?('html') ? (node.content || '') : '' + end + + def render_inline_embed(_type, _content, node) + node.targeted_for?('html') ? (node.content || '') : '' + end + + def render_inline_abbr(_type, content, _node) + %Q(<abbr>#{content}</abbr>) + end + + def render_inline_acronym(_type, content, _node) + %Q(<acronym>#{content}</acronym>) + end + + def render_inline_list(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text = @ctx.text_formatter.format_reference_text(:list, data) + wrap_reference_with_html(text, data, 'listref') + end + + def render_inline_table(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text = @ctx.text_formatter.format_reference_text(:table, data) + wrap_reference_with_html(text, data, 'tableref') + end + + def render_inline_img(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text = @ctx.text_formatter.format_reference_text(:image, data) + wrap_reference_with_html(text, data, 'imgref') + end + + def render_inline_comment(_type, content, _node) + if @ctx.config['draft'] + %Q(<span class="draft-comment">#{content}</span>) + else + '' + end + end + + def render_inline_w(_type, content, _node) + # Content should already be resolved by ReferenceResolver + content + end + + def render_inline_wb(_type, content, _node) + # Content should already be resolved by ReferenceResolver + %Q(<b>#{content}</b>) + end + + def render_inline_dtp(_type, content, _node) + "<?dtp #{content} ?>" + end + + def render_inline_recipe(_type, content, _node) + %Q(<span class="recipe">「#{content}」</span>) + end + + def render_inline_uchar(_type, content, _node) + %Q(&#x#{content};) + end + + def render_inline_tcy(_type, content, _node) + style = 'tcy' + if content.size == 1 && content.match(/[[:ascii:]]/) + style = 'upright' + end + %Q(<span class="#{style}">#{content}</span>) + end + + def render_inline_pageref(_type, content, _node) + # Page reference is unsupported in HTML + content + end + + def render_inline_icon(_type, content, node) + # Icon is an image reference + id = node.args.first || content + begin + @ctx.build_icon_html(id) + rescue ReVIEW::KeyError, NoMethodError + warn "image not bound: #{id}" + %Q(<pre>missing image: #{id}</pre>) + end + end + + def render_inline_bib(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + bib_id = data.item_id + bib_number = data.item_number + @ctx.build_bib_reference_link(bib_id, bib_number) + end + + def render_inline_endnote(_type, _content, node) + # Endnote reference + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + build_endnote_link(data.item_id, data.item_number) + end + + def render_inline_m(_type, content, node) + # Math/equation rendering + # Get raw string from node args (content is already escaped) + str = node.args.first || content + + # Use 'equation' class like HTMLBuilder + case @ctx.config['math_format'] + when 'mathml' + begin + require 'math_ml' + require 'math_ml/symbol/character_reference' + rescue LoadError + app_error 'not found math_ml' + return %Q(<span class="equation">#{escape(str)}</span>) + end + parser = MathML::LaTeX::Parser.new(symbol: MathML::Symbol::CharacterReference) + # parser.parse returns MathML::Math object, need to convert to string + %Q(<span class="equation">#{parser.parse(str, nil)}</span>) + when 'mathjax' + %Q(<span class="equation">\\( #{str.gsub('<', '\lt{}').gsub('>', '\gt{}').gsub('&', '&')} \\)</span>) + when 'imgmath' + unless @img_math + app_error 'ImgMath not initialized' + return %Q(<span class="equation">#{escape(str)}</span>) + end + + math_str = '$' + str + '$' + key = Digest::SHA256.hexdigest(str) + img_path = @img_math.defer_math_image(math_str, key) + %Q(<span class="equation"><img src="#{img_path}" class="math_gen_#{key}" alt="#{escape(str)}" /></span>) + else + %Q(<span class="equation">#{escape(str)}</span>) + end + end + + def render_inline_sec(_type, _content, node) + # Section number reference: @<sec>{id} or @<sec>{chapter|id} + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + n = data.headline_number + chapter_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + + # Build full section number including chapter number + full_number = if n.present? && chapter_num && !chapter_num.to_s.empty? && @ctx.over_secnolevel?(n) + ([chapter_num] + n).join('.') + else + '' + end + + if @ctx.config['chapterlink'] && full_number.present? + # Get target chapter ID for link + chapter_id = data.chapter_id || @ctx.chapter.id + anchor = 'h' + full_number.tr('.', '-') + %Q(<a href="#{chapter_id}#{@ctx.extname}##{anchor}">#{full_number}</a>) + else + full_number + end + end + + def render_inline_secref(type, content, node) + render_inline_hd(type, content, node) + end + + def render_inline_labelref(_type, content, node) + # Label reference: @<labelref>{id} + # This should match HTMLBuilder's inline_labelref behavior + idref = node.target_item_id || content + marker = @ctx.text_formatter.format_label_marker(idref) + %Q(<a target='#{escape_content(idref)}'>「#{escape_content(marker)}」</a>) + end + + def render_inline_ref(type, content, node) + render_inline_labelref(type, content, node) + end + + def render_inline_eq(_type, _content, node) + # Equation reference + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text = @ctx.text_formatter.format_reference_text(:equation, data) + wrap_reference_with_html(text, data, 'eqref') + end + + def render_inline_hd(_type, _content, node) + # Headline reference: @<hd>{id} or @<hd>{chapter|id} + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + n = data.headline_number + chapter_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + + # Render caption with inline markup + caption_html = if data.caption_node + @ctx.render_children(data.caption_node) + else + data.caption_text + end + + # Build full section number including chapter number + full_number = if n.present? && chapter_num && !chapter_num.to_s.empty? && @ctx.over_secnolevel?(n) + ([chapter_num] + n).join('.') + end + + str = @ctx.text_formatter.format_headline_quote(full_number, caption_html) + + if @ctx.config['chapterlink'] && full_number + # Get target chapter ID for link + chapter_id = data.chapter_id || @ctx.chapter.id + anchor = 'h' + full_number.tr('.', '-') + %Q(<a href="#{chapter_id}#{@ctx.extname}##{anchor}">#{str}</a>) + else + str + end + end + + def render_inline_column(_type, _content, node) + # Column reference: @<column>{id} or @<column>{chapter|id} + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + + # Render caption with inline markup + caption_html = if data.caption_node + @ctx.render_children(data.caption_node) + else + escape_content(data.caption_text) + end + + anchor = "column-#{data.item_number}" + column_text = @ctx.text_formatter.format_column_label(caption_html) + + if @ctx.config['chapterlink'] + chapter_id = data.chapter_id || @ctx.chapter.id + %Q(<a href="#{chapter_id}#{@ctx.extname}##{anchor}" class="columnref">#{column_text}</a>) + else + column_text + end + end + + def render_inline_sectitle(_type, _content, node) + # Section title reference + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + + # Render caption with inline markup + title_html = if data.caption_node + @ctx.render_children(data.caption_node) + else + escape_content(data.caption_text) + end + + if @ctx.config['chapterlink'] + n = data.headline_number + chapter_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + full_number = ([chapter_num] + n).join('.') + anchor = 'h' + full_number.tr('.', '-') + + # Get target chapter ID for link + chapter_id = data.chapter_id || @ctx.chapter.id + %Q(<a href="#{chapter_id}#{@ctx.extname}##{anchor}">#{title_html}</a>) + else + title_html + end + end + + private + + def target_format?(format_name) + format_name.to_s == 'html' + end + + def build_index_comment(index_str) + %Q(<!-- IDX:#{escape_comment(index_str)} -->) + end + + def build_keyword_with_index(word, alt: nil) + escaped_word = escape_content(word) + + if alt && !alt.empty? + escaped_alt = escape_content(alt) + # Include alt text in visible content, but only word in IDX comment + text = "#{escaped_word} (#{escaped_alt})" + %Q(<b class="kw">#{text}</b><!-- IDX:#{escaped_word} -->) + else + %Q(<b class="kw">#{escaped_word}</b><!-- IDX:#{escaped_word} -->) + end + end + + def build_ruby(base, ruby_text) + %Q(<ruby>#{escape_content(base)}<rt>#{escape_content(ruby_text)}</rt></ruby>) + end + + def build_anchor_link(anchor_id, content, css_class: 'link') + %Q(<a href="##{normalize_id(anchor_id)}" class="#{css_class}">#{content}</a>) + end + + def build_external_link(url, content, css_class: 'link') + %Q(<a href="#{escape_content(url)}" class="#{css_class}">#{content}</a>) + end + + def build_footnote_link(fn_id, number) + if @ctx.epub3? + %Q(<a id="fnb-#{normalize_id(fn_id)}" href="#fn-#{normalize_id(fn_id)}" class="noteref" epub:type="noteref">#{@ctx.text_formatter.format_footnote_mark(number)}</a>) + else + %Q(<a id="fnb-#{normalize_id(fn_id)}" href="#fn-#{normalize_id(fn_id)}" class="noteref">*#{number}</a>) + end + end + + def build_chapter_link(chapter_id, content) + if @ctx.chapter_link_enabled? + %Q(<a href="./#{chapter_id}#{@ctx.extname}">#{content}</a>) + else + content + end + end + + def build_endnote_link(endnote_id, number) + if @ctx.epub3? + %Q(<a id="endnoteb-#{normalize_id(endnote_id)}" href="#endnote-#{normalize_id(endnote_id)}" class="noteref" epub:type="noteref">#{@ctx.text_formatter.format_endnote_mark(number)}</a>) + else + %Q(<a id="endnoteb-#{normalize_id(endnote_id)}" href="#endnote-#{normalize_id(endnote_id)}" class="noteref">#{number}</a>) + end + end + + # Wrap reference text with HTML decoration (span and optional link) + # @param text [String] Plain text reference (e.g., "図1.1") + # @param data [ResolvedData] Resolved reference data + # @param css_class [String] CSS class name (e.g., 'imgref', 'tableref', 'listref', 'eqref') + # @return [String] HTML with span and optional link + def wrap_reference_with_html(text, data, css_class) + escaped_text = escape_content(text) + + return %Q(<span class="#{css_class}">#{escaped_text}</span>) unless @ctx.config['chapterlink'] + + # Build link with chapter_id and item_id + chapter_id = data.chapter_id || @ctx.chapter&.id + extname = ".#{@ctx.config['htmlext'] || 'html'}" + %Q(<span class="#{css_class}"><a href="./#{chapter_id}#{extname}##{normalize_id(data.item_id)}">#{escaped_text}</a></span>) + end + end + end + end +end diff --git a/lib/review/renderer/html_renderer.rb b/lib/review/renderer/html_renderer.rb new file mode 100644 index 000000000..6032d7304 --- /dev/null +++ b/lib/review/renderer/html_renderer.rb @@ -0,0 +1,1315 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/renderer/base' +require 'review/ast/caption_node' +require 'review/htmlutils' +require 'review/textutils' +require 'review/html_escape_utils' +require 'review/highlighter' +require 'review/sec_counter' +require 'review/i18n' +require 'review/loggable' +require 'review/ast/indexer' +require 'review/ast/compiler' +require 'review/template' +require 'review/img_math' +require 'digest' +require_relative 'rendering_context' +require_relative 'html/inline_context' +require_relative 'html/inline_element_handler' + +module ReVIEW + module Renderer + class HtmlRenderer < Base + include ReVIEW::HTMLUtils + include ReVIEW::TextUtils + include ReVIEW::HtmlEscapeUtils + include ReVIEW::Loggable + + attr_reader :chapter, :book + + def initialize(chapter, img_math: nil) + super(chapter) + + # Initialize logger like HTMLBuilder for error handling + @logger = ReVIEW.logger + + # Initialize section counter like HTMLBuilder (handle nil chapter) + @sec_counter = @chapter ? SecCounter.new(5, @chapter) : nil + + # Initialize template variables like HTMLBuilder + @javascripts = [] + @body_ext = '' + + # Initialize ImgMath for equation image generation (like Builder) + # Accept shared instance or create new one + @img_math = img_math || ReVIEW::ImgMath.new(config) + + # Initialize RenderingContext for cleaner state management + @rendering_context = RenderingContext.new(:document) + + # Initialize HTML-specific inline context and inline element handler + @inline_context = Html::InlineContext.new(config: config, book: book, chapter: chapter, renderer: self, img_math: @img_math) + @inline_element_handler = Html::InlineElementHandler.new(@inline_context) + end + + # Format type for this renderer + # @return [Symbol] Format type :html + def format_type + :html + end + + def visit_document(node) + render_children(node) + end + + def visit_headline(node) + level = node.level + caption = render_caption_inline(node.caption_node) + + if node.nonum? || node.notoc? || node.nodisp? + # Use label if provided, otherwise use auto_id generated by Compiler + id = normalize_id(node.label || node.auto_id) + + spacing_before = level > 1 ? "\n" : '' + + if node.nodisp? + a_tag = %Q(<a id="#{id}" />) + %Q(#{spacing_before}#{a_tag}<h#{level} id="#{id}" hidden="true">#{caption}</h#{level}>\n) + elsif node.notoc? + %Q(#{spacing_before}<h#{level} id="#{id}" notoc="true">#{caption}</h#{level}>\n) + else + %Q(#{spacing_before}<h#{level} id="#{id}">#{caption}</h#{level}>\n) + end + else + prefix, anchor = headline_prefix(level) + + anchor_html = anchor ? %Q(<a id="h#{anchor}"></a>) : '' + secno_html = prefix ? %Q(<span class="secno">#{prefix}</span>) : '' + spacing_before = level > 1 ? "\n" : '' + + if node.label + label_id = normalize_id(node.label) + %Q(#{spacing_before}<h#{level} id="#{label_id}">#{anchor_html}#{secno_html}#{caption}</h#{level}>\n) + else + "#{spacing_before}<h#{level}>#{anchor_html}#{secno_html}#{caption}</h#{level}>\n" + end + end + end + + def visit_paragraph(node) + content = render_children(node) + content = join_paragraph_lines(content).strip + + # Check for noindent attribute + if node.attribute?(:noindent) + %Q(<p class="noindent">#{content}</p>\n) + else + "<p>#{content}</p>\n" + end + end + + # Join paragraph lines according to join_lines_by_lang setting + # This matches HTMLBuilder's join_lines_to_paragraph behavior + # + # @param content [String] paragraph content with newlines + # @return [String] processed content with lines joined appropriately + def join_paragraph_lines(content) + if config['join_lines_by_lang'] + # Split by newlines to get individual lines + lines = content.split("\n") + + # Add spaces between lines based on language rules + lazy = true + lang = config['language'] || 'ja' + 0.upto(lines.size - 2) do |n| + if add_space?(lines[n], lines[n + 1], lang, lazy) + lines[n] += ' ' + end + end + lines.join + else + # Default: just remove newlines (no space added) + content.gsub(/\n+/, '') + end + end + + def visit_list(node) + tag = case node.list_type + when :ul + 'ul' + when :ol + 'ol' + when :dl + 'dl' + else + raise NotImplementedError, "HTMLRenderer does not support list_type #{node.list_type}." + end + + # Check for start_number for ordered lists + # Only output start attribute if it's not the default value (1) + start_attr = '' + if node.list_type == :ol && node.start_number && node.start_number != 1 + start_attr = %Q( start="#{node.start_number}") + end + + content = render_children(node) + # Format list items with proper line breaks like HTMLBuilder + formatted_content = content.gsub(%r{</li>(?=<li>)}, "</li>\n") + formatted_content = formatted_content.gsub(/<li>([^<]*)<ul>/, "<li>\\1<ul>\n") + formatted_content = formatted_content.gsub('</ul></li>', "</ul>\n</li>") + "<#{tag}#{start_attr}>\n#{formatted_content}\n</#{tag}>\n" + end + + def visit_list_item(node) + # Get parent list to determine list type + parent_list = node.parent + if parent_list && parent_list.list_type == :dl + # Definition list item - use term_children for term like LaTeXRenderer + term = if node.term_children&.any? + node.term_children.map { |child| visit(child) }.join + else + '' + end + + # Children contain the definition content + # Join all children into a single dd like HTMLBuilder does with join_lines_to_paragraph + if node.children.empty? + # Only term, no definition - add empty dd like HTMLBuilder + "<dt>#{term}</dt><dd></dd>" + else + # Render all child content and join together + definition_parts = node.children.map { |child| visit(child) } + # Join multiple paragraphs/text into single dd content, removing <p> tags + definition_content = definition_parts.map { |part| part.gsub(%r{</?p[^>]*>}, '').strip }.join + "<dt>#{term}</dt><dd>#{definition_content}</dd>" + end + else + # Regular list item + content = render_children(node) + "<li>#{content}</li>" + end + end + + def visit_text(node) + escape_content(node.content.to_s) + end + + def visit_code_line(node) + # Process each line like HTMLBuilder - detab and preserve exact content + # Add newline like other renderers (LaTeX, Markdown, Top) do + line_content = render_children(node) + detab(line_content) + "\n" + end + + def visit_table(node) + # Check if this is an imgtable - handle as image like HTMLBuilder + if node.table_type == :imgtable + return render_imgtable(node) + end + + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + + # Process caption with proper context management + caption_html = if node.caption_node + @rendering_context.with_child_context(:caption) do |caption_context| + caption_content = render_caption_with_context(node.caption_node, caption_context) + # Generate table number like HTMLBuilder using chapter table index + table_number = if node.id + generate_table_header(node.id, caption_content) + else + # No ID - just use caption without numbering + caption_content + end + %Q(<p class="caption">#{table_number}</p> +) + end + else + '' + end + + # Process table content with table context + table_html = @rendering_context.with_child_context(:table) do |table_context| + # Process all table rows using visitor pattern with table context + all_rows = node.header_rows + node.body_rows + rows_html = all_rows.map { |row| visit_with_context(row, table_context) }.join("\n") + rows_html += "\n" unless rows_html.empty? + + %Q(<table> +#{rows_html}</table>) + end + + %Q(<div#{id_attr} class="table"> +#{caption_html}#{table_html} +</div> +) + end + + def visit_table_row(node) + cells_html = render_children(node) + "<tr>#{cells_html}</tr>" + end + + def visit_table_cell(node) + content = render_children(node) + tag = node.cell_type == :th ? 'th' : 'td' + "<#{tag}>#{content}</#{tag}>" + end + + def visit_column(node) + # Use auto_id generated by Compiler for anchor + id_attr = node.label ? %Q( id="#{normalize_id(node.label)}") : '' + anchor_id = %Q(<a id="#{node.auto_id}"></a>) + + # HTMLBuilder uses h4 tag for column headers + caption_content = render_caption_inline(node.caption_node) + caption_html = if caption_content.empty? + node.label ? anchor_id : '' + elsif node.label + %Q(<h4#{id_attr}>#{anchor_id}#{caption_content}</h4>) + else + %Q(<h4>#{anchor_id}#{caption_content}</h4>) + end + + content = render_children(node) + + %Q(<div class="column">\n#{caption_html}#{content}</div>) + end + + def visit_minicolumn(node) + type = node.minicolumn_type.to_s + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + + caption_content = render_caption_inline(node.caption_node) + caption_html = caption_content.empty? ? '' : %Q(<p class="caption">#{caption_content}</p>\n) + + # Content already contains proper paragraph structure from ParagraphNode children + content_html = render_children(node) + + %Q(<div class="#{type}"#{id_attr}>\n#{caption_html}#{content_html}</div>\n) + end + + def visit_image(node) + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + + caption_node = node.caption_node + + # Process image with caption context management + if caption_node + @rendering_context.with_child_context(:caption) do |caption_context| + # Check if image is bound like HTMLBuilder does + if @chapter&.image_bound?(node.id) + image_image_html_with_context(node.id, caption_node, id_attr, caption_context, node.image_type) + else + # For dummy images, ImageNode doesn't have lines, so use empty array + image_dummy_html_with_context(node.id, caption_node, [], id_attr, caption_context, node.image_type) + end + end + elsif @chapter&.image_bound?(node.id) + # No caption, no special context needed + image_image_html(node.id, caption_node, id_attr, node.image_type) + else + image_dummy_html(node.id, caption_node, [], id_attr, node.image_type) + end + end + + # visit_block is now handled by Base renderer with dynamic method dispatch + # Individual visit_block_* methods delegate to existing render_*_block methods + + def visit_block_note(node) + render_note_block(node) + end + + def visit_block_memo(node) + render_memo_block(node) + end + + def visit_block_tip(node) + render_tip_block(node) + end + + def visit_block_info(node) + render_info_block(node) + end + + def visit_block_warning(node) + render_warning_block(node) + end + + def visit_block_important(node) + render_important_block(node) + end + + def visit_block_caution(node) + render_caution_block(node) + end + + def visit_block_notice(node) + render_notice_block(node) + end + + def visit_block_quote(node) + render_quote_block(node) + end + + def visit_block_blockquote(node) + render_quote_block(node) + end + + def visit_block_lead(node) + render_lead_block(node) + end + + def visit_block_comment(node) + render_comment_block(node) + end + + def visit_block_firstlinenum(node) + render_firstlinenum_block(node) + end + + def visit_block_blankline(_node) + '<p><br /></p>' + end + + def visit_block_pagebreak(_node) + %Q(<div class="pagebreak"></div>\n) + end + + def visit_block_label(node) + render_label_block(node) + end + + def visit_block_tsize(node) + render_tsize_block(node) + end + + def visit_block_printendnotes(node) + render_printendnotes_block(node) + end + + def visit_block_flushright(node) + render_flushright_block(node) + end + + def visit_block_centering(node) + render_centering_block(node) + end + + def visit_block_bibpaper(node) + render_bibpaper_block(node) + end + + def visit_tex_equation(node) + content = node.content + + math_format = config['math_format'] + + return render_texequation_body(content, math_format) unless node.id? + + id_attr = %Q( id="#{normalize_id(node.id)}") + caption_content = render_caption_inline(node.caption_node) + caption_text = caption_content.empty? ? nil : caption_content + caption_html = %Q(<p class="caption">#{text_formatter.format_caption('equation', get_chap, @chapter.equation(node.id).number, caption_text)}</p>\n) + + caption_top_html = caption_top?('equation') ? caption_html : '' + caption_bottom_html = caption_top?('equation') ? '' : caption_html + + equation_body_html = render_texequation_body(content, math_format) + + %Q(<div#{id_attr} class="caption-equation">\n#{caption_top_html}#{equation_body_html}#{caption_bottom_html}</div>\n) + end + + # Render equation body with appropriate format (matches HTMLBuilder's texequation_body) + def render_texequation_body(content, math_format) + result = %Q(<div class="equation">\n) + + math_content = render_math_format(content, math_format) + # Check if error case returned complete div (early return from helper) + return math_content if math_content.include?('</div>') + + result + math_content + "</div>\n" + end + + def render_math_format(content, math_format) + case math_format + when 'mathjax' + render_mathjax_format(content) + when 'mathml' + render_mathml_format(content) + when 'imgmath' + render_imgmath_format(content) + else + # Fallback: render as preformatted text + %Q(<pre>#{escape(content)}\n</pre>\n) + end + end + + # Render math content using MathJax (display mode with $$) + def render_mathjax_format(content) + "$$#{content.gsub('<', '\lt{}').gsub('>', '\gt{}').gsub('&', '&')}$$\n" + end + + def render_mathml_format(content) + begin + require 'math_ml' + require 'math_ml/symbol/character_reference' + rescue LoadError + app_error 'not found math_ml' + return %Q(<div class="equation">\n<pre>#{escape(content)}\n</pre>\n</div>\n) + end + parser = MathML::LaTeX::Parser.new(symbol: MathML::Symbol::CharacterReference) + # Add newline to content like HTMLBuilder does + # parser.parse returns MathML::Math object, need to convert to string + parser.parse(content + "\n", true).to_s + end + + def render_imgmath_format(content) + unless @img_math + app_error 'ImgMath not initialized' + return %Q(<div class="equation">\n<pre>#{escape(content)}\n</pre>\n</div>\n) + end + + fontsize = config['imgmath_options']['fontsize'].to_f + lineheight = config['imgmath_options']['lineheight'].to_f + math_str = "\\begin{equation*}\n\\fontsize{#{fontsize}}{#{lineheight}}\\selectfont\n#{content}\n\\end{equation*}\n" + key = Digest::SHA256.hexdigest(math_str) + + img_path = @img_math.defer_math_image(math_str, key) + %Q(<img src="#{img_path}" class="math_gen_#{key}" alt="#{escape(content)}" />\n) + end + + # Render AST to HTML body content only (without template). + # + # @param ast_root [Object] The root AST node to render + # @return [String] HTML body content only + def render_body(ast_root) + visit(ast_root) + end + + def layoutfile + # Determine layout file like HTMLBuilder + if config.maker == 'webmaker' + htmldir = 'web/html' + localfilename = 'layout-web.html.erb' + else + htmldir = 'html' + localfilename = 'layout.html.erb' + end + + htmlfilename = if config['htmlversion'] == 5 || config['htmlversion'].nil? + File.join(htmldir, 'layout-html5.html.erb') + else + File.join(htmldir, 'layout-xhtml1.html.erb') + end + + layout_file = File.join(@book.basedir || '.', 'layouts', localfilename) + + # Check for custom layout file + if File.exist?(layout_file) + # Respect safe mode like HTMLBuilder + if ENV['REVIEW_SAFE_MODE'].to_i & 4 > 0 + warn 'user\'s layout is prohibited in safe mode. ignored.' + layout_file = File.expand_path(htmlfilename, ReVIEW::Template::TEMPLATE_DIR) + end + else + # Use default template + layout_file = File.expand_path(htmlfilename, ReVIEW::Template::TEMPLATE_DIR) + end + + layout_file + end + + # Helper methods for references + def get_chap(chapter = @chapter) + if config['secnolevel'] && config['secnolevel'] > 0 && + !chapter.number.nil? && !chapter.number.to_s.empty? + if chapter.is_a?(ReVIEW::Book::Part) + return text_formatter.format_part_short(chapter) + else + return chapter.format_number(nil) + end + end + nil + end + + private + + # Generate a complete HTML document with template. + def post_process(result) + @body = result + + # Set up template variables like HTMLBuilder + # Chapter title is already plain text (markup removed), just escape it + @title = escape_content(@chapter&.title || '') + @language = config['language'] || 'ja' + @stylesheets = config['stylesheet'] || [] + @next = @chapter&.next_chapter + @prev = @chapter&.prev_chapter + @next_title = @next ? escape_content(@next.title) : '' + @prev_title = @prev ? escape_content(@prev.title) : '' + + # Handle MathJax configuration like HTMLBuilder + if config['math_format'] == 'mathjax' + @javascripts.push(%Q(<script>MathJax = { tex: { inlineMath: [['\\\\(', '\\\\)']] }, svg: { fontCache: 'global' } };</script>)) + @javascripts.push(%Q(<script type="text/javascript" id="MathJax-script" async="true" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>)) + end + + # Render template + ReVIEW::Template.load(layoutfile).result(binding) + end + + def visit_code_block_emlist(node) + processed_content = format_code_content(node) + + code_block_wrapper( + node, + div_class: 'emlist-code', + pre_class: build_pre_class('emlist', node.lang), + content: processed_content, + caption_style: :top_bottom + ) + end + + def visit_code_block_emlistnum(node) + numbered_lines = format_emlistnum_content(node) + + code_block_wrapper( + node, + div_class: 'emlistnum-code', + pre_class: build_pre_class('emlist', node.lang), + content: numbered_lines, + caption_style: :top_bottom + ) + end + + def visit_code_block_list(node) + processed_content = format_code_content(node) + + code_block_wrapper( + node, + div_class: 'caption-code', + pre_class: build_pre_class('list', node.lang), + content: processed_content, + caption_style: :numbered + ) + end + + def visit_code_block_listnum(node) + numbered_lines = format_listnum_content(node) + + code_block_wrapper( + node, + div_class: 'code', + pre_class: build_pre_class('list', node.lang, with_highlight: false), + content: numbered_lines, + caption_style: :numbered + ) + end + + def visit_code_block_source(node) + processed_content = format_code_content(node) + + code_block_wrapper( + node, + div_class: 'source-code', + pre_class: 'source', + content: processed_content, + caption_style: :top_bottom + ) + end + + def visit_code_block_cmd(node) + processed_content = format_code_content(node, default_lang: 'shell-session') + + code_block_wrapper( + node, + div_class: 'cmd-code', + pre_class: 'cmd', + content: processed_content, + caption_style: :top_bottom + ) + end + + def code_block_wrapper(node, div_class:, pre_class:, content:, caption_style:) + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + + caption_top = render_code_caption(node, caption_style, :top) + caption_bottom = render_code_caption(node, caption_style, :bottom) + + %Q(<div#{id_attr} class="#{div_class}"> +#{caption_top}<pre class="#{pre_class}">#{content}</pre> +#{caption_bottom}</div> +) + end + + def render_code_caption(node, style, position) + caption_node = node.caption_node + return '' unless caption_node + + caption_content = render_caption_inline(caption_node) + return '' if caption_content.empty? + + case style + when :top_bottom + return '' unless position == :top ? caption_top?('list') : !caption_top?('list') + + %Q(<p class="caption">#{caption_content}</p> +) + when :numbered + return '' unless position == :top + + list_number = generate_list_header(node.id, caption_content) + %Q(<p class="caption">#{list_number}</p> +) + else + '' + end + end + + def build_pre_class(base_class, lang, with_highlight: true) + classes = [base_class] + classes << "language-#{lang}" if lang + classes << 'highlight' if with_highlight && highlight? + classes.join(' ') + end + + def format_code_content(node, default_lang: nil) + lang = node.lang || default_lang + + # Disable highlighting if code block contains inline elements (e.g., @<b>{}) + # to allow proper rendering of inline markup + if highlight? && !node.contains_inline? + highlight(body: node.plain_text, lexer: lang, format: 'html') + else + # render_children already escapes text, no need to escape again + lines_content = render_children(node) + lines = lines_content.split("\n") + lines.inject('') { |i, j| i + detab(j) + "\n" } + end + end + + def format_emlistnum_content(node) + lang = node.lang + first_line_number = node&.first_line_num || 1 + + # Disable highlighting if code block contains inline elements + if highlight? && !node.contains_inline? + highlight(body: node.plain_text, lexer: lang, format: 'html', linenum: true, options: { linenostart: first_line_number }) + else + lines_content = render_children(node) + lines = lines_content.split("\n") + lines.pop if lines.last && lines.last.empty? + lines.map.with_index(first_line_number) do |line, i| + "#{i.to_s.rjust(2)}: #{detab(line)}" + end.join("\n") + "\n" + end + end + + def format_listnum_content(node) + lang = node.lang + first_line_number = node&.first_line_num || 1 + + # Disable highlighting if code block contains inline elements + if highlight? && !node.contains_inline? + highlight(body: node.plain_text, lexer: lang, format: 'html', linenum: true, options: { linenostart: first_line_number }) + else + lines_content = render_children(node) + lines = lines_content.split("\n") + lines.pop if lines.last && lines.last.empty? + lines.map.with_index(first_line_number) do |line, i| + "#{i.to_s.rjust(2)}: #{detab(line)}" + end.join("\n") + "\n" + end + end + + def highlight? + highlighter.highlight?('html') + end + + def highlight(body:, lexer: nil, format: 'html', linenum: false, options: {}, location: nil) + highlighter.highlight( + body: body, + lexer: lexer, + format: format, + linenum: linenum, + options: options, + location: location + ) + end + + def highlighter + @highlighter ||= ReVIEW::Highlighter.new(config) + end + + def generate_list_header(id, caption) + list_item = @chapter&.list(id) + raise NotImplementedError, "no such list: #{id}" unless list_item + + list_num = list_item.number + chapter_num = @chapter&.number + + text_formatter.format_caption('list', chapter_num, list_num, caption) + end + + def visit_reference(node) + if node.resolved? + format_resolved_reference(node.resolved_data) + else + # Reference resolution was skipped or disabled + # Return content as fallback + node.content || '' + end + end + + # Format resolved reference based on ResolvedData + # Gets plain text from TextFormatter and wraps it with HTML markup + def format_resolved_reference(data) + # Get plain text from TextFormatter (no HTML markup) + plain_text = text_formatter.format_reference(data.reference_type, data) + + # Wrap with HTML-specific markup based on reference type + case data.reference_type + when :image, :table, :list, :equation + # For image/table/list/equation, wrap with span and optional link + css_class = case data.reference_type # rubocop:disable Style/HashLikeCase + when :image then 'imgref' + when :table then 'tableref' + when :list then 'listref' + when :equation then 'eqref' + end + format_html_reference(plain_text, data, css_class) + when :bibpaper + # For bibliography, wrap with span class="bibref" + %Q(<span class="bibref">#{plain_text}</span>) + when :chapter, :headline, :column, :word + # For chapter/headline/column/word, escape HTML entities + escape_html(plain_text) + else + # For other types (footnote, endnote), return plain text as-is + plain_text + end + end + + # Format HTML reference with link support + # @param text [String] Plain text to wrap + # @param data [ResolvedData] Resolved reference data + # @param css_class [String] CSS class name + # @return [String] HTML markup + def format_html_reference(text, data, css_class) + return %Q(<span class="#{css_class}">#{text}</span>) unless config['chapterlink'] + + # Use chapter_id from data, or fall back to current chapter's id + chapter_id = data.chapter_id || @chapter&.id + extname = ".#{config['htmlext'] || 'html'}" + %Q(<span class="#{css_class}"><a href="./#{chapter_id}#{extname}##{normalize_id(data.item_id)}">#{text}</a></span>) + end + + def visit_footnote(node) + # Handle FootnoteNode - render as footnote or endnote definition + # Note: This renders the footnote/endnote definition block at document level. + # For inline footnote references (@<fn>{id}), see render_footnote method. + footnote_content = render_children(node) + + # Check if this is a footnote or endnote based on footnote_type attribute + if node.footnote_type == :endnote + # Endnote - skip rendering here, will be rendered by printendnotes + return '' + end + + # Match HTMLBuilder's footnote output format + footnote_number = @chapter&.footnote(node.id)&.number || '??' + + # Check epubversion for consistent output with HTMLBuilder + if config['epubversion'].to_i == 3 + # EPUB3 version with epub:type attributes + # Only add back link if epubmaker/back_footnote is configured (like HTMLBuilder) + back_link = '' + if config['epubmaker'] && config['epubmaker']['back_footnote'] + back_link = %Q(<a href="#fnb-#{normalize_id(node.id)}">#{text_formatter.format_footnote_backmark}</a>) + end + %Q(<div class="footnote" epub:type="footnote" id="fn-#{normalize_id(node.id)}"><p class="footnote">#{back_link}#{text_formatter.format_footnote_textmark(footnote_number)}#{footnote_content}</p></div>) + else + # Non-EPUB version + footnote_back_link = %Q(<a href="#fnb-#{normalize_id(node.id)}">*#{footnote_number}</a>) + %Q(<div class="footnote" id="fn-#{normalize_id(node.id)}"><p class="footnote">[#{footnote_back_link}] #{footnote_content}</p></div>) + end + end + + def visit_embed(node) + # All embed types now use unified processing + process_raw_embed(node) + end + + def render_inline_element(type, content, node) + # Try delegating to inline element handler first + handler_method = "render_inline_#{type}" + if @inline_element_handler.respond_to?(handler_method, true) + return @inline_element_handler.send(handler_method, type, content, node) + end + + # Fall back to renderer's own methods if handler returns nil + method_name = "render_inline_#{type}" + if respond_to?(method_name, true) + send(method_name, type, content, node) + else + raise NotImplementedError, "Unknown inline element: #{type}" + end + end + + def render_note_block(node) + render_callout_block(node, 'note') + end + + def render_memo_block(node) + render_callout_block(node, 'memo') + end + + def render_tip_block(node) + render_callout_block(node, 'tip') + end + + def render_info_block(node) + render_callout_block(node, 'info') + end + + def render_warning_block(node) + render_callout_block(node, 'warning') + end + + def render_important_block(node) + render_callout_block(node, 'important') + end + + def render_caution_block(node) + render_callout_block(node, 'caution') + end + + def render_notice_block(node) + render_callout_block(node, 'notice') + end + + def render_quote_block(node) + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + content = render_children(node) + %Q(<blockquote#{id_attr}>#{content}</blockquote>) + end + + def render_lead_block(node) + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + content = render_children(node) + %Q(<div#{id_attr} class="lead">\n#{content}</div>\n) + end + + def render_comment_block(node) + return '' unless config['draft'] + + content_lines = [] + + if node.args.first && !node.args.first.empty? + content_lines << escape(node.args.first) + end + + if node.content && !node.content.empty? + body_content = render_children(node) + content_lines << body_content unless body_content.empty? + end + + return '' if content_lines.empty? + + content_str = content_lines.join('<br />') + %Q(<div class="draft-comment">#{content_str}</div>) + end + + def render_callout_block(node, type) + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + + caption_content = render_caption_inline(node.caption_node) + caption_html = caption_content.empty? ? '' : %Q(<div class="#{type}-header">#{caption_content}</div>) + + content = render_children(node) + + %Q(<div class="#{type}"#{id_attr}>\n#{caption_html}#{content}</div>) + end + + def render_label_block(node) + # Extract label from args + label = node.args.first + return '' unless label + + %Q(<a id="#{normalize_id(label)}"></a>) + end + + def render_tsize_block(_node) + # Table size control - HTMLBuilder outputs nothing for HTML + # tsize is only used for LaTeX/PDF output + '' + end + + def render_printendnotes_block(_node) + # Render collected endnotes like HTMLBuilder's printendnotes method + return '' unless @chapter + return '' unless @chapter.endnotes + + # Check if there are any endnotes using size + return '' if @chapter.endnotes.size == 0 + + # Mark that we've shown endnotes (like Builder base class) + @shown_endnotes = true + + # Begin endnotes block + result = %Q(<div class="endnotes">\n) + + # Render each endnote like HTMLBuilder's endnote_item + @chapter.endnotes.each do |en| + back = '' + if config['epubmaker'] && config['epubmaker']['back_footnote'] + back = %Q(<a href="#endnoteb-#{normalize_id(en.id)}">#{text_formatter.format_footnote_backmark}</a>) + end + # Render endnote content from footnote_node + endnote_content = render_children(en.footnote_node) + result += %Q(<div class="endnote" id="endnote-#{normalize_id(en.id)}"><p class="endnote">#{back}#{text_formatter.format_endnote_textmark(@chapter.endnote(en.id).number)}#{endnote_content}</p></div>\n) + end + + # End endnotes block + result + %Q(</div>\n) + end + + def render_flushright_block(node) + # Render children (which produces <p> tags) + content = render_children(node) + # Replace <p> with <p class="flushright"> like HTMLBuilder + content.gsub('<p>', %Q(<p class="flushright">)) + end + + def render_centering_block(node) + # Render children (which produces <p> tags) + content = render_children(node) + # Replace <p> with <p class="center"> like HTMLBuilder + content.gsub('<p>', %Q(<p class="center">)) + end + + def render_bibpaper_block(node) + # For BlockNode, id and caption are in args array like HTMLBuilder's bibpaper(lines, id, caption) + id = node.args[0] + caption_text = node.args[1] + + # Start div (puts in HTMLBuilder, so newline after) + result = %Q(<div class="bibpaper">\n) + + # Add anchor and number like HTMLBuilder's bibpaper_header + # bibpaper_header uses print for anchor, then puts for caption (with newline) + if id && @chapter + begin + bibpaper_number = @chapter.bibpaper(id).number + result += %Q(<a id="bib-#{normalize_id(id)}">[#{bibpaper_number}]</a> ) + rescue StandardError + # If bibpaper not found, use ?? like other references + result += %Q(<a id="bib-#{normalize_id(id)}">[??]</a> ) + end + end + + # Add caption as plain text (BlockNode doesn't have caption_node) + # HTMLBuilder uses puts " #{compile_inline(caption)}", so space before caption and newline after + if caption_text && !caption_text.empty? + result += escape_content(caption_text) + "\n" + end + + # Add content wrapped in <p> if present (like split_paragraph does) + # HTMLBuilder uses print for bibpaper_bibpaper, so no newline after + # Then puts '</div>' adds the closing tag with newline + content = render_children(node) + unless content.strip.empty? + # strip to remove paragraph newlines, match Builder's behavior + result += %Q(<p>#{content.strip}</p>) + end + + # Close div (puts in HTMLBuilder, so it's on the same line as </p>) + result + "</div>\n" + end + + def escape(str) + # Use EscapeUtils for consistency + escape_content(str.to_s) + end + + def headline_prefix(level) + return [nil, nil] unless @sec_counter + + @sec_counter.inc(level) + anchor = @sec_counter.anchor(level) + prefix = @sec_counter.prefix(level, config['secnolevel']) + [prefix, anchor] + end + + def image_image_html(id, caption_node, id_attr, image_type = :image) + caption_html = image_header_html(id, caption_node, image_type) + caption_present = !caption_html.empty? + + begin + image_path = @chapter.image(id).path.sub(%r{\A\./}, '') + alt_text = escape(render_caption_inline(caption_node)) + + img_html = %Q(<img src="#{image_path}" alt="#{alt_text}" />) + + # Check caption positioning like HTMLBuilder + if caption_top?('image') && caption_present + %Q(<div#{id_attr} class="image">\n#{caption_html}#{img_html}\n</div>\n) + else + %Q(<div#{id_attr} class="image">\n#{img_html}\n#{caption_html}</div>\n) + end + rescue StandardError + # If image loading fails, fall back to dummy + image_dummy_html(id, caption_node, [], id_attr, image_type) + end + end + + def image_image_html_with_context(id, caption_node, id_attr, caption_context, image_type = :image) + caption_html = image_header_html_with_context(id, caption_node, caption_context, image_type) + caption_present = !caption_html.empty? + + begin + image_path = @chapter.image(id).path.sub(%r{\A\./}, '') + img_html = %Q(<img src="#{image_path}" alt="#{escape(render_caption_inline(caption_node))}" />) + + # Check caption positioning like HTMLBuilder + if caption_top?('image') && caption_present + %Q(<div#{id_attr} class="image">\n#{caption_html}#{img_html}\n</div>\n) + else + %Q(<div#{id_attr} class="image">\n#{img_html}\n#{caption_html}</div>\n) + end + rescue StandardError + # If image loading fails, fall back to dummy + image_dummy_html_with_context(id, caption_node, [], id_attr, caption_context, image_type) + end + end + + def image_dummy_html(id, caption_node, lines, id_attr, image_type = :image) + caption_html = image_header_html(id, caption_node, image_type) + caption_present = !caption_html.empty? + + # Generate dummy image content exactly like HTMLBuilder + # HTMLBuilder puts each line and adds newlines via 'puts' + lines_content = if lines.empty? + "\n" # Empty image block just has one newline + else + "\n" + lines.map { |line| escape(line) }.join("\n") + "\n" + end + + # Check caption positioning like HTMLBuilder + if caption_top?('image') && caption_present + %Q(<div#{id_attr} class="image">\n#{caption_html}<pre class="dummyimage">#{lines_content}</pre>\n</div>\n) + else + %Q(<div#{id_attr} class="image">\n<pre class="dummyimage">#{lines_content}</pre>\n#{caption_html}</div>\n) + end + end + + def image_dummy_html_with_context(id, caption_node, lines, id_attr, caption_context, image_type = :image) + caption_html = image_header_html_with_context(id, caption_node, caption_context, image_type) + caption_present = !caption_html.empty? + + # Generate dummy image content exactly like HTMLBuilder + lines_content = if lines.empty? + "\n" # Empty image block just has one newline + else + "\n" + lines.map { |line| escape(line) }.join("\n") + "\n" + end + + # Check caption positioning like HTMLBuilder + if caption_top?('image') && caption_present + %Q(<div#{id_attr} class="image">\n#{caption_html}<pre class="dummyimage">#{lines_content}</pre>\n</div>\n) + else + %Q(<div#{id_attr} class="image">\n<pre class="dummyimage">#{lines_content}</pre>\n#{caption_html}</div>\n) + end + end + + def image_header_html(id, caption_node, image_type = :image) + caption_content = render_caption_inline(caption_node) + return '' if caption_content.empty? + + # For indepimage (numberless image), use numberless_image label like HTMLBuilder + if image_type == :indepimage || image_type == :numberlessimage + image_number = text_formatter.format_numberless_image + caption_text = "#{image_number}#{text_formatter.format_caption_prefix}#{caption_content}" + else + # Generate image number like HTMLBuilder using chapter image index + image_item = @chapter&.image(id) + unless image_item && image_item.number + raise ReVIEW::KeyError, "image '#{id}' not found" + end + + caption_text = text_formatter.format_caption('image', get_chap, image_item.number, caption_content) + end + + %Q(<p class="caption">\n#{caption_text}\n</p>\n) + end + + def image_header_html_with_context(id, caption_node, caption_context, image_type = :image) + caption_content = render_caption_with_context(caption_node, caption_context) + return '' if caption_content.empty? + + # For indepimage (numberless image), use numberless_image label like HTMLBuilder + if image_type == :indepimage || image_type == :numberlessimage + image_number = text_formatter.format_numberless_image + caption_text = "#{image_number}#{text_formatter.format_caption_prefix}#{caption_content}" + else + # Generate image number like HTMLBuilder using chapter image index + image_item = @chapter&.image(id) + unless image_item && image_item.number + raise ReVIEW::KeyError, "image '#{id}' not found" + end + + caption_text = text_formatter.format_caption('image', get_chap, image_item.number, caption_content) + end + + %Q(<p class="caption">\n#{caption_text}\n</p>\n) + end + + def generate_table_header(id, caption) + table_item = @chapter.table(id) + table_num = table_item.number + chapter_num = @chapter.number + + text_formatter.format_caption('table', chapter_num, table_num, caption) + rescue ReVIEW::KeyError + raise NotImplementedError, "no such table: #{id}" + end + + def render_imgtable(node) + id = node.id + caption_node = node.caption_node + + # Check if image is bound like HTMLBuilder does + unless @chapter&.image_bound?(id) + warn "image not bound: #{id}" + # For dummy images, use empty array for lines (no lines in TableNode) + return render_imgtable_dummy(id, caption_node, []) + end + + id_attr = id ? %Q( id="#{normalize_id(id)}") : '' + + # Generate table caption HTML if caption exists + caption_content = render_caption_inline(caption_node) + caption_html = if caption_content.empty? + '' + else + table_caption = generate_table_header(id, caption_content) + %Q(<p class="caption">#{table_caption}</p>\n) + end + + # Render image tag + begin + image_path = @chapter.image(id).path.sub(%r{\A\./}, '') + alt_text = escape(node.caption_text) + img_html = %Q(<img src="#{image_path}" alt="#{alt_text}" />\n) + + # Check caption positioning like HTMLBuilder (uses 'table' type for imgtable) + if caption_top?('table') && !caption_content.empty? + %Q(<div#{id_attr} class="imgtable image">\n#{caption_html}#{img_html}</div>\n) + else + %Q(<div#{id_attr} class="imgtable image">\n#{img_html}#{caption_html}</div>\n) + end + rescue ReVIEW::KeyError + app_error "no such table: #{id}" + end + end + + def render_imgtable_dummy(id, caption_node, lines) + id_attr = id ? %Q( id="#{normalize_id(id)}") : '' + + # Generate table caption HTML if caption exists + caption_content = render_caption_inline(caption_node) + caption_html = if caption_content.empty? + '' + else + table_caption = generate_table_header(id, caption_content) + %Q(<p class="caption">#{table_caption}</p>\n) + end + + # Generate dummy content like image_dummy_html + lines_content = if lines.empty? + "\n" + else + "\n" + lines.map { |line| escape(line) }.join("\n") + "\n" + end + + # Check caption positioning like HTMLBuilder + if caption_top?('table') && !caption_content.empty? + %Q(<div#{id_attr} class="imgtable image">\n#{caption_html}<pre class="dummyimage">#{lines_content}</pre>\n</div>\n) + else + %Q(<div#{id_attr} class="imgtable image">\n<pre class="dummyimage">#{lines_content}</pre>\n#{caption_html}</div>\n) + end + end + + def render_caption_inline(caption_node) + return '' unless caption_node + + content = render_children(caption_node) + # Join lines like visit_paragraph does + join_paragraph_lines(content) + end + + def render_caption_with_context(caption_node, caption_context) + return '' unless caption_node + + render_children_with_context(caption_node, caption_context) + end + + # Process raw embed content (//raw and @<raw>) + def process_raw_embed(node) + # Check if content should be output for this renderer + return '' unless node.targeted_for?('html') + + # Get content + content = node.content || '' + + # Process \n based on embed type + case node.embed_type + when :inline, :raw + # For inline and raw embeds, convert \\n to actual newlines + content = content.gsub('\\n', "\n") + end + + # Apply XHTML compliance for HTML output + result = ensure_xhtml_compliance(content) + + # For block embeds, add trailing newline + node.embed_type == :block ? result + "\n" : result + end + + def ensure_xhtml_compliance(content) + content.gsub(/<hr(\s[^>]*)?>/, '<hr\1 />'). + gsub(/<br(\s[^>]*)?>/, '<br\1 />'). + gsub(%r{<img([^>]*[^/])>}, '<img\1 />'). + gsub(%r{<input([^>]*[^/])>}, '<input\1 />') + end + + # Builder compatibility - return target name for embed blocks + def target_name + 'html' + end + + def render_children_with_context(node, context) + old_context = @rendering_context + @rendering_context = context + result = render_children(node) + @rendering_context = old_context + result + end + + def visit_with_context(node, context) + old_context = @rendering_context + @rendering_context = context + result = visit(node) + @rendering_context = old_context + result + end + end + end +end diff --git a/lib/review/renderer/idgxml/inline_context.rb b/lib/review/renderer/idgxml/inline_context.rb new file mode 100644 index 000000000..a2fa9548a --- /dev/null +++ b/lib/review/renderer/idgxml/inline_context.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/htmlutils' +require_relative '../inline_render_proxy' + +module ReVIEW + module Renderer + module Idgxml + # Context for inline element rendering with business logic + # Used by InlineElementHandler + class InlineContext + include ReVIEW::HTMLUtils + + attr_reader :config, :book, :chapter, :img_math + + def initialize(config:, book:, chapter:, renderer:, img_math: nil) + @config = config + @book = book + @chapter = chapter + # Automatically create proxy from renderer to limit access + @render_proxy = InlineRenderProxy.new(renderer) + @img_math = img_math + end + + # === HTMLUtils methods are available via include === + # - escape_html(str) + # - normalize_id(id) + + # Escape for IDGXML (uses HTML escaping) + def escape(str) + escape_html(str.to_s) + end + + def chapter_link_enabled? + config['chapterlink'] + end + + def draft_mode? + config['draft'] + end + + def nolf_mode? + config.key?('nolf') ? config['nolf'] : true + end + + def math_format + config['math_format'] + end + + def over_secnolevel?(n) + secnolevel = config['secnolevel'] || 2 + secnolevel >= n.to_s.split('.').size + end + + def get_chap # rubocop:disable Naming/AccessorMethodName + if config['secnolevel'] && config['secnolevel'] > 0 && + !chapter.number.nil? && !chapter.number.to_s.empty? + if chapter.is_a?(ReVIEW::Book::Part) + return text_formatter.format_part_short(chapter) + else + return chapter.format_number(nil) + end + end + nil + end + + def bibpaper_number(bib_id) + chapter.bibpaper(bib_id).number + end + + def increment_texinlineequation + @render_proxy.increment_texinlineequation + end + + def render_children(node) + @render_proxy.render_children(node) + end + + def render_caption_inline(caption_node) + @render_proxy.render_caption_inline(caption_node) + end + + def text_formatter + @render_proxy.text_formatter + end + end + end + end +end diff --git a/lib/review/renderer/idgxml/inline_element_handler.rb b/lib/review/renderer/idgxml/inline_element_handler.rb new file mode 100644 index 000000000..730398ce4 --- /dev/null +++ b/lib/review/renderer/idgxml/inline_element_handler.rb @@ -0,0 +1,555 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'digest/sha2' + +module ReVIEW + module Renderer + module Idgxml + # Inline element handler for IDGXML rendering + # Uses InlineContext for shared logic + class InlineElementHandler + include ReVIEW::HTMLUtils + include ReVIEW::Loggable + + def initialize(inline_context) + @ctx = inline_context + @img_math = @ctx.img_math + @logger = ReVIEW.logger + end + + # Basic formatting + # Note: content is already escaped by visit_text, so don't escape again + def render_inline_b(_type, content, _node) + %Q(<b>#{content}</b>) + end + + def render_inline_i(_type, content, _node) + %Q(<i>#{content}</i>) + end + + def render_inline_em(_type, content, _node) + %Q(<em>#{content}</em>) + end + + def render_inline_strong(_type, content, _node) + %Q(<strong>#{content}</strong>) + end + + def render_inline_tt(_type, content, _node) + %Q(<tt>#{content}</tt>) + end + + def render_inline_ttb(_type, content, _node) + %Q(<tt style='bold'>#{content}</tt>) + end + + def render_inline_ttbold(type, content, node) + render_inline_ttb(type, content, node) + end + + def render_inline_tti(_type, content, _node) + %Q(<tt style='italic'>#{content}</tt>) + end + + def render_inline_u(_type, content, _node) + %Q(<underline>#{content}</underline>) + end + + def render_inline_ins(_type, content, _node) + %Q(<ins>#{content}</ins>) + end + + def render_inline_del(_type, content, _node) + %Q(<del>#{content}</del>) + end + + def render_inline_sup(_type, content, _node) + %Q(<sup>#{content}</sup>) + end + + def render_inline_sub(_type, content, _node) + %Q(<sub>#{content}</sub>) + end + + def render_inline_ami(_type, content, _node) + %Q(<ami>#{content}</ami>) + end + + def render_inline_bou(_type, content, _node) + %Q(<bou>#{content}</bou>) + end + + def render_inline_keytop(_type, content, _node) + %Q(<keytop>#{content}</keytop>) + end + + # Code + def render_inline_code(_type, content, _node) + %Q(<tt type='inline-code'>#{content}</tt>) + end + + # Hints + def render_inline_hint(_type, content, _node) + if @ctx.config['nolf'] + %Q(<hint>#{content}</hint>) + else + %Q(\n<hint>#{content}</hint>) + end + end + + # Maru (circled numbers/letters) + def render_inline_maru(_type, content, node) + str = node.args.first || content + + if /\A\d+\Z/.match?(str) + sprintf('&#x%x;', 9311 + str.to_i) + elsif /\A[A-Z]\Z/.match?(str) + begin + sprintf('&#x%x;', 9398 + str.codepoints.to_a[0] - 65) + rescue NoMethodError + sprintf('&#x%x;', 9398 + str[0] - 65) + end + elsif /\A[a-z]\Z/.match?(str) + begin + sprintf('&#x%x;', 9392 + str.codepoints.to_a[0] - 65) + rescue NoMethodError + sprintf('&#x%x;', 9392 + str[0] - 65) + end + else + escape(str) + end + end + + # Ruby (furigana) + def render_inline_ruby(_type, content, node) + if node.args.length >= 2 + base = escape(node.args[0]) + ruby = escape(node.args[1]) + %Q(<GroupRuby><aid:ruby xmlns:aid="http://ns.adobe.com/AdobeInDesign/3.0/"><aid:rb>#{base}</aid:rb><aid:rt>#{ruby}</aid:rt></aid:ruby></GroupRuby>) + else + content + end + end + + # Keyword + def render_inline_kw(_type, content, node) + if node.args.length >= 2 + word = node.args[0] + alt = node.args[1] + + result = '<keyword>' + result += if alt && !alt.empty? + escape("#{word}(#{alt.strip})") + else + escape(word) + end + result += '</keyword>' + + result += %Q(<index value="#{escape(word)}" />) + + if alt && !alt.empty? + alt.split(/\s*,\s*/).each do |e| + result += %Q(<index value="#{escape(e.strip)}" />) + end + end + + result + elsif node.args.length == 1 + # Single argument case - get raw string from args + word = node.args[0] + result = %Q(<keyword>#{escape(word)}</keyword>) + result += %Q(<index value="#{escape(word)}" />) + result + else + # Fallback + %Q(<keyword>#{content}</keyword>) + end + end + + # Index + def render_inline_idx(_type, content, node) + str = node.args.first || content + %Q(#{escape(str)}<index value="#{escape(str)}" />) + end + + def render_inline_hidx(_type, content, node) + str = node.args.first || content + %Q(<index value="#{escape(str)}" />) + end + + # Links + def render_inline_href(_type, content, node) + if node.args.length >= 2 + url = node.args[0].gsub('\,', ',').strip + label = node.args[1].gsub('\,', ',').strip + %Q(<a linkurl='#{escape(url)}'>#{escape(label)}</a>) + elsif node.args.length >= 1 + url = node.args[0].gsub('\,', ',').strip + %Q(<a linkurl='#{escape(url)}'>#{escape(url)}</a>) + else + %Q(<a linkurl='#{content}'>#{content}</a>) + end + end + + # References + def render_inline_list(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + base_ref = @ctx.text_formatter.format_reference(:list, data) + "<span type='list'>#{base_ref}</span>" + end + + def render_inline_table(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + base_ref = @ctx.text_formatter.format_reference(:table, data) + "<span type='table'>#{base_ref}</span>" + end + + def render_inline_img(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + base_ref = @ctx.text_formatter.format_reference(:image, data) + "<span type='image'>#{base_ref}</span>" + end + + def render_inline_eq(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + base_ref = @ctx.text_formatter.format_reference(:equation, data) + "<span type='eq'>#{base_ref}</span>" + end + + def render_inline_imgref(type, content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + + # If no caption, fall back to render_inline_img + if data.caption_text.blank? + return render_inline_img(type, content, node) + end + + # Build reference with caption + base_ref = @ctx.text_formatter.format_reference(:image, data) + caption = @ctx.text_formatter.format_image_quote(data.caption_text) + "<span type='image'>#{base_ref}#{caption}</span>" + end + + # Column reference + def render_inline_column(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + + # Use caption_node to render inline elements if available + # For cross-chapter references, caption_node may not be available, so fall back to caption_text + compiled_caption = if data.caption_node + @ctx.render_caption_inline(data.caption_node) + else + escape(data.caption_text) + end + + column_text = @ctx.text_formatter.format_column_label(compiled_caption) + + if @ctx.chapter_link_enabled? + %Q(<link href="column-#{data.item_number}">#{column_text}</link>) + else + column_text + end + end + + # Footnotes + def render_inline_fn(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + if data.caption_node + # Render the stored AST node when available to preserve inline markup + rendered = @ctx.render_caption_inline(data.caption_node) + %Q(<footnote>#{rendered}</footnote>) + else + # Fallback: use caption_text + rendered_text = escape(data.caption_text.to_s.strip) + %Q(<footnote>#{rendered_text}</footnote>) + end + end + + # Endnotes + def render_inline_endnote(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + %Q(<span type='endnoteref' idref='endnoteb-#{normalize_id(data.item_id)}'>(#{data.item_number})</span>) + end + + # Bibliography + def render_inline_bib(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + bib_id = data.item_id + bib_number = data.item_number + %Q(<span type='bibref' idref='#{bib_id}'>[#{bib_number}]</span>) + end + + # Headline reference + def render_inline_hd(_type, content, node) + ref_node = node.children.first + return content unless ref_node.reference_node? && ref_node.resolved? + + data = ref_node.resolved_data + @ctx.text_formatter.format_reference(:headline, data) + end + + # Section number reference + def render_inline_sec(_type, _content, node) + ref_node = node.children.first + return '' unless ref_node.reference_node? && ref_node.resolved? + + data = ref_node.resolved_data + n = data.headline_number + chapter_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + # Get section number like Builder does (including chapter number) + if n.present? && chapter_num && !chapter_num.empty? && @ctx.over_secnolevel?(n) + ([chapter_num] + n).join('.') + else + '' + end + end + + # Section title reference + def render_inline_sectitle(_type, content, node) + ref_node = node.children.first + return content unless ref_node.reference_node? && ref_node.resolved? + + if ref_node.resolved_data.caption_node + @ctx.render_caption_inline(ref_node.resolved_data.caption_node) + else + ref_node.resolved_data.caption_text + end + end + + # Chapter reference + def render_inline_chap(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + # Format chapter number to full form (e.g., "第1章", "付録A", "第II部") + chapter_num = @ctx.text_formatter.format_chapter_number_full(data.chapter_number, data.chapter_type) + if @ctx.chapter_link_enabled? + %Q(<link href="#{data.item_id}">#{chapter_num}</link>) + else + chapter_num.to_s + end + end + + def render_inline_chapref(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + display_str = @ctx.text_formatter.format_reference(:chapter, data) + if @ctx.chapter_link_enabled? + %Q(<link href="#{data.item_id}">#{display_str}</link>) + else + display_str + end + end + + def render_inline_title(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + title = data.chapter_title || '' + if @ctx.chapter_link_enabled? + %Q(<link href="#{data.item_id}">#{title}</link>) + else + title + end + end + + # Labels + def render_inline_labelref(_type, content, node) + # Get idref from node.args (raw, not escaped) + idref = node.args.first || content + marker = @ctx.text_formatter.format_label_marker(idref) + %Q(<ref idref='#{escape(idref)}'>「#{escape(marker)}」</ref>) + end + + def render_inline_ref(type, content, node) + render_inline_labelref(type, content, node) + end + + def render_inline_pageref(_type, content, node) + idref = node.args.first || content + %Q(<pageref idref='#{escape(idref)}'>●●</pageref>) + end + + # Icon (inline image) + def render_inline_icon(_type, content, node) + id = node.args.first || content + begin + %Q(<Image href="file://#{@ctx.chapter.image(id).path.sub(%r{\A\./}, '')}" type="inline" />) + rescue StandardError + '' + end + end + + # Balloon + def render_inline_balloon(_type, content, node) + # Content is already escaped and rendered from children + # Need to get raw text from node to process @maru markers + # Since InlineNode processes children first, we need raw args + if node.args.first + # Get raw string from args (not escaped yet) + str = node.args.first + processed = escape(str).gsub(/@maru\[(\d+)\]/) do + # $1 is the captured number string + number = $1 + # Generate maru character directly + if /\A\d+\Z/.match?(number) + sprintf('&#x%x;', 9311 + number.to_i) + else + "@maru[#{number}]" + end + end + %Q(<balloon>#{processed}</balloon>) + else + # Fallback: use content as-is + %Q(<balloon>#{content}</balloon>) + end + end + + # Unicode character + def render_inline_uchar(_type, content, node) + str = node.args.first || content + %Q(&#x#{str};) + end + + # Math + def render_inline_m(_type, content, node) + str = node.args.first || content + + if @ctx.math_format == 'imgmath' + require 'review/img_math' + @ctx.increment_texinlineequation + + math_str = '$' + str + '$' + key = Digest::SHA256.hexdigest(str) + @img_math ||= ReVIEW::ImgMath.new(@ctx.config) + img_path = @img_math.defer_math_image(math_str, key) + %Q(<inlineequation><Image href="file://#{img_path}" type="inline" /></inlineequation>) + else + counter_value = @ctx.increment_texinlineequation + %Q(<replace idref="texinline-#{counter_value}"><pre>#{escape(str)}</pre></replace>) + end + end + + # DTP processing instruction + def render_inline_dtp(_type, content, node) + str = node.args.first || content + "<?dtp #{str} ?>" + end + + # Break + # Returns a protected newline marker that will be preserved through paragraph + # and nolf processing, then restored to an actual newline in visit_document + def render_inline_br(_type, _content, _node) + "\x01IDGXML_INLINE_NEWLINE\x01" + end + + # Raw + def render_inline_raw(_type, _content, node) + if node.targeted_for?('idgxml') + # Convert \\n to actual newlines + (node.content || '').gsub('\\n', "\n") + else + '' + end + end + + def render_inline_embed(_type, _content, node) + if node.targeted_for?('idgxml') + # Convert \\n to actual newlines + (node.content || '').gsub('\\n', "\n") + else + '' + end + end + + # Comment + def render_inline_comment(_type, content, node) + if @ctx.draft_mode? + str = node.args.first || content + %Q(<msg>#{escape(str)}</msg>) + else + '' + end + end + + # Recipe (FIXME placeholder) + def render_inline_recipe(_type, content, node) + id = node.args.first || content + %Q(<recipe idref="#{escape(id)}">[XXX]「#{escape(id)}」 p.XX</recipe>) + end + + # Alias for secref + def render_inline_secref(type, content, node) + render_inline_hd(type, content, node) + end + + private + + def escape(str) + @ctx.escape(str) + end + end + end + end +end diff --git a/lib/review/renderer/idgxml_renderer.rb b/lib/review/renderer/idgxml_renderer.rb new file mode 100644 index 000000000..00752bd43 --- /dev/null +++ b/lib/review/renderer/idgxml_renderer.rb @@ -0,0 +1,1832 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# +# == Newline Protection Markers +# +# This renderer uses special markers to protect certain newlines from being +# removed during paragraph joining and nolf (no-line-feed) processing: +# +# - IDGXML_INLINE_NEWLINE: Protects newlines from inline elements (@<br>{}, @<raw>{\n}) +# These newlines must be preserved in the final output as they are intentionally +# inserted by the user for formatting purposes. +# +# - IDGXML_PRE_NEWLINE: Protects newlines inside <pre> tags during nolf processing +# +# - IDGXML_LISTINFO_NEWLINE: Protects newlines inside <listinfo> tags +# +# - IDGXML_ENDNOTE_NEWLINE: Protects newlines inside <endnotes> blocks +# +# The markers are restored to actual newlines at the end of visit_document. +require 'review/htmlutils' +require 'review/textutils' +require 'review/sec_counter' +require 'review/ast/caption_node' +require 'review/ast/paragraph_node' +require 'review/i18n' +require 'review/loggable' +require 'digest/sha2' +require_relative 'base' +require_relative 'rendering_context' +require_relative 'idgxml/inline_context' +require_relative 'idgxml/inline_element_handler' + +module ReVIEW + module Renderer + class IdgxmlRenderer < Base + include ReVIEW::HTMLUtils + include ReVIEW::TextUtils + include ReVIEW::Loggable + + attr_reader :chapter, :book, :logger + attr_accessor :img_math, :img_graph + + def initialize(chapter) + super + + # Initialize logger for Loggable module + @logger = ReVIEW.logger + + I18n.setup(config['language'] || 'ja') + + # Initialize section counters like IDGXMLBuilder + @section = 0 + @subsection = 0 + @subsubsection = 0 + @subsubsubsection = 0 + @sec_counter = SecCounter.new(5, @chapter) if @chapter + + # Initialize table state + @tablewidth = nil + @table_id = nil + @col = 0 + @table_node_cellwidth = nil # Temporarily stores cellwidth from TableNode during table processing + + # Initialize equation counters + @texblockequation = 0 + @texinlineequation = 0 + + # Initialize ImgMath for math rendering + @img_math = nil + + # Initialize ImgGraph for graph rendering + @img_graph = nil + + # Initialize root element name + @rootelement = 'doc' + + # Get structuredxml setting + @secttags = config['structuredxml'] + + # Initialize RenderingContext + @rendering_context = RenderingContext.new(:document) + + # Initialize AST helpers + @ast_indexer = nil + @ast_compiler = nil + + # Initialize IDGXML-specific inline context and inline element handler + @inline_context = Idgxml::InlineContext.new( + config: config, + book: book, + chapter: chapter, + renderer: self, + img_math: @img_math + ) + @inline_element_handler = Idgxml::InlineElementHandler.new(@inline_context) + end + + # Format type for this renderer + # @return [Symbol] Format type :idgxml + def format_type + :idgxml + end + + # Increment texinlineequation counter and return new value + # Called from inline element handler via InlineContext + def increment_texinlineequation + @texinlineequation += 1 + end + + def visit_document(node) + # Check nolf mode (enabled by default for IDGXML) + # IDGXML format removes newlines between tags by default + nolf = config.key?('nolf') ? config['nolf'] : true + + # Output XML declaration and root element + output = [] + output << %Q(<?xml version="1.0" encoding="UTF-8"?>) + output << %Q(<#{@rootelement} xmlns:aid="http://ns.adobe.com/AdobeInDesign/4.0/">) + + # Render document content + content = render_children(node) + + # Close section tags if structuredxml is enabled + closing_tags = '' + if @secttags + closing_tags += '</sect4>' if @subsubsubsection > 0 + closing_tags += '</sect3>' if @subsubsection > 0 + closing_tags += '</sect2>' if @subsection > 0 + closing_tags += '</sect>' if @section > 0 + closing_tags += '</chapter>' + end + + # Combine all parts + output << content + output << closing_tags + output << "</#{@rootelement}>\n" + + result = output.join + + # Remove newlines between tags if nolf mode is enabled (default) + # But preserve newlines inside <pre> tags and listinfo tags + if nolf + # Protect newlines inside <pre> tags + result = result.gsub(%r{<pre>(.*?)</pre>}m) do |match| + match.gsub("\n", "\x01IDGXML_PRE_NEWLINE\x01") + end + + # Remove all newlines between tags and before closing tags + # This handles both >\n< and text\n< patterns + result = result.gsub(/\n+</, '<') + + # Restore newlines inside <pre> tags + result = result.gsub("\x01IDGXML_PRE_NEWLINE\x01", "\n") + end + + # Restore protected newlines from listinfo, inline elements, and endnotes + result = result.gsub("\x01IDGXML_LISTINFO_NEWLINE\x01", "\n") + result = result.gsub("\x01IDGXML_INLINE_NEWLINE\x01", "\n") + result.gsub("\x01IDGXML_ENDNOTE_NEWLINE\x01", "\n") + end + + def visit_headline(node) + # Skip nodisp headlines (display: no, TOC: yes) + return '' if node.nodisp? + + level = node.level + label = node.label + caption = render_children(node.caption_node) if node.caption_node + + result = [] + + # Close section tags as needed + closing = output_close_sect_tags(level) + result << closing if closing && !closing.empty? + + # Handle section tag opening for structuredxml mode + case level + when 1 + result << %Q(<chapter id="chap:#{@chapter.number}">) if @secttags + @section = 0 + @subsection = 0 + @subsubsection = 0 + @subsubsubsection = 0 + when 2 + @section += 1 + result << %Q(<sect id="sect:#{@chapter.number}.#{@section}">) if @secttags + @subsection = 0 + @subsubsection = 0 + @subsubsubsection = 0 + when 3 + @subsection += 1 + result << %Q(<sect2 id="sect:#{@chapter.number}.#{@section}.#{@subsection}">) if @secttags + @subsubsection = 0 + @subsubsubsection = 0 + when 4 + @subsubsection += 1 + result << %Q(<sect3 id="sect:#{@chapter.number}.#{@section}.#{@subsection}.#{@subsubsection}">) if @secttags + @subsubsubsection = 0 + when 5 + @subsubsubsection += 1 + result << %Q(<sect4 id="sect:#{@chapter.number}.#{@section}.#{@subsection}.#{@subsubsection}.#{@subsubsubsection}">) if @secttags + when 6 + # ignore level 6 + else + raise "caption level too deep or unsupported: #{level}" + end + + # Get headline prefix + prefix, _anchor = headline_prefix(level) if @sec_counter + + # Generate label attribute + label_attr = label.nil? ? '' : %Q( id="#{label}") + + # Generate TOC caption (without footnotes and tags) + toccaption = escape(caption.to_s.gsub(/@<fn>\{.+?\}/, '').gsub(/<[^>]+>/, '')) + + # Output title with DTP processing instruction + result << %Q(<title#{label_attr} aid:pstyle="h#{level}">#{prefix}#{caption}) + + result.join("\n") + "\n" + end + + def visit_paragraph(node) + content = render_children(node) + + # Join lines in paragraph by removing newlines (like join_lines in IDGXMLBuilder) + # Inline elements like @
{} and @{} use protected markers that are preserved + # unless join_lines_by_lang is explicitly enabled + content = if config['join_lines_by_lang'] + content.tr("\n", ' ') + else + content.delete("\n") + end + + # Handle noindent attribute + if node.attribute?(:noindent) + return %Q(

#{content}

) + end + + # Check for tab indentation (inlist attribute) + if content =~ /\A(\t+)/ + indent_level = $1.size + content_without_tabs = content.sub(/\A\t+/, '') + return %Q(

#{content_without_tabs}

) + end + + # Regular paragraph + "

#{content}

" + end + + def visit_text(node) + escape(node.content.to_s) + end + + def visit_reference(node) + if node.resolved? + format_resolved_reference(node.resolved_data) + else + # Reference resolution was skipped or disabled + # Return content as fallback + node.content || '' + end + end + + # Format resolved reference based on ResolvedData + # Uses TextFormatter for centralized text formatting + def format_resolved_reference(data) + plain_text = text_formatter.format_reference(data.reference_type, data) + # IDGXML is XML-based, so escape all text content + escape_html(plain_text) + end + + def visit_list(node) + case node.list_type + when :ul + visit_ul(node) + when :ol + visit_ol(node) + when :dl + visit_dl(node) + else + raise NotImplementedError, "IdgxmlRenderer does not support list_type #{node.list_type}" + end + end + + def visit_list_item(node) + # Should not be called directly; handled by parent list + raise NotImplementedError, 'List item processing should be handled by visit_list' + end + + # visit_code_block is now handled by Base renderer with dynamic method dispatch + # Aliases will be defined after the original methods + + def visit_code_line(node) + # Render children and detab + content = render_children(node) + detab(content, tabwidth) + end + + def visit_table(node) + # Handle imgtable specially + if node.table_type == :imgtable + return visit_imgtable(node) + end + + # Regular table processing + visit_regular_table(node) + end + + def visit_table_row(node) + # Should be handled by visit_table + raise NotImplementedError, 'Table row processing should be handled by visit_table' + end + + def visit_table_cell(node) + # Should be handled by visit_table + raise NotImplementedError, 'Table cell processing should be handled by visit_table' + end + + def visit_image(node) + image_type = node.image_type + + case image_type + when :indepimage, :numberlessimage + visit_indepimage(node) + else + visit_regular_image(node) + end + end + + def visit_minicolumn(node) + type = node.minicolumn_type.to_s + caption = render_children(node.caption_node) if node.caption_node + content = render_children(node) + + # notice uses -t suffix when caption is present + if type == 'notice' && caption && !caption.empty? + captionblock_with_content('notice-t', content, caption, 'notice-title') + else + # Content already contains

tags from paragraphs + captionblock_with_content(type, content, caption) + end + end + + def visit_column(node) + caption = render_children(node.caption_node) if node.caption_node + content = render_children(node) + + # Determine column type (empty string for regular column) + type = '' + + # Generate column output using auto_id from Compiler + id_attr = %Q(id="#{node.auto_id}") + + result = [] + result << "<#{type}column #{id_attr}>" + if caption + result << %Q(#{caption}) + end + result << content.chomp + result << "" + + result.join("\n") + "\n" + end + + # visit_block is now handled by Base renderer with dynamic method dispatch + # Individual block type visitors + + def visit_block_quote(node) + content = render_children(node) + "#{content}\n" + end + + def visit_block_lead(node) + content = render_children(node) + "#{content}\n" + end + + def visit_block_read(node) + content = render_children(node) + "#{content}\n" + end + + def visit_block_note(node) + caption = node.args.first + content = render_children(node) + captionblock('note', content, caption) + end + + def visit_block_memo(node) + caption = node.args.first + content = render_children(node) + captionblock('memo', content, caption) + end + + def visit_block_tip(node) + caption = node.args.first + content = render_children(node) + captionblock('tip', content, caption) + end + + def visit_block_info(node) + caption = node.args.first + content = render_children(node) + captionblock('info', content, caption) + end + + def visit_block_warning(node) + caption = node.args.first + content = render_children(node) + captionblock('warning', content, caption) + end + + def visit_block_important(node) + caption = node.args.first + content = render_children(node) + captionblock('important', content, caption) + end + + def visit_block_caution(node) + caption = node.args.first + content = render_children(node) + captionblock('caution', content, caption) + end + + def visit_block_planning(node) + caption = node.args.first + content = render_children(node) + captionblock('planning', content, caption) + end + + def visit_block_best(node) + caption = node.args.first + content = render_children(node) + captionblock('best', content, caption) + end + + def visit_block_security(node) + caption = node.args.first + content = render_children(node) + captionblock('security', content, caption) + end + + def visit_block_reference(node) + caption = node.args.first + content = render_children(node) + captionblock('reference', content, caption) + end + + def visit_block_link(node) + caption = node.args.first + content = render_children(node) + captionblock('link', content, caption) + end + + def visit_block_practice(node) + caption = node.args.first + content = render_children(node) + captionblock('practice', content, caption) + end + + def visit_block_expert(node) + caption = node.args.first + content = render_children(node) + captionblock('expert', content, caption) + end + + def visit_block_point(node) + caption = node.args.first + content = render_block_content_with_paragraphs(node) + if caption && !caption.empty? && node.caption_node + caption_with_inline = render_caption_inline(node.caption_node) + captionblock_with_content('point-t', content, caption_with_inline, 'point-title') + else + captionblock_with_content('point', content, nil) + end + end + + def visit_block_shoot(node) + caption = node.args.first + content = render_block_content_with_paragraphs(node) + if caption && !caption.empty? && node.caption_node + caption_with_inline = render_caption_inline(node.caption_node) + captionblock_with_content('shoot-t', content, caption_with_inline, 'shoot-title') + else + captionblock_with_content('shoot', content, nil) + end + end + + def visit_block_notice(node) + caption = node.args.first + content = render_block_content_with_paragraphs(node) + if caption && !caption.empty? && node.caption_node + caption_with_inline = render_caption_inline(node.caption_node) + captionblock_with_content('notice-t', content, caption_with_inline, 'notice-title') + else + captionblock_with_content('notice', content, nil) + end + end + + def visit_block_term(node) + content = render_block_content_with_paragraphs(node) + captionblock_with_content('term', content, nil) + end + + def visit_block_insn(node) + visit_syntaxblock(node) + end + + def visit_block_box(node) + visit_syntaxblock(node) + end + + def visit_block_flushright(node) + content = render_children(node) + content.gsub('

', %Q(

)) + "\n" + end + + def visit_block_centering(node) + content = render_children(node) + content.gsub('

', %Q(

)) + "\n" + end + + def visit_block_rawblock(node) + visit_rawblock(node) + end + + def visit_block_comment(node) + visit_comment_block(node) + end + + def visit_block_noindent(_node) + '' + end + + def visit_block_blankline(_node) + "

\n" + end + + def visit_block_pagebreak(_node) + "\n" + end + + def visit_block_hr(_node) + "


\n" + end + + def visit_block_label(node) + label_id = node.args.first + %Q(
) + end + + content = render_children(node) + unless content.empty? + # Wrap content in

tag like Builder does with split_paragraph + content = content.strip + result << "

#{content}

" + end + + result << "\n" + result.join("\n") + end + + def visit_tex_equation(node) + @texblockequation += 1 + content = node.content + + result = [] + + if node.id? + result << '' + + # Render caption with inline elements + caption_node = node.caption_node + rendered_caption = caption_node ? render_children(caption_node) : '' + + # Generate caption + caption_str = %Q(
) + + result << caption_str if caption_top?('equation') + end + + # Handle math format + if config['math_format'] == 'imgmath' + # Initialize ImgMath if needed + unless @img_math + require 'review/img_math' + @img_math = ReVIEW::ImgMath.new(config) + end + + fontsize = config.dig('imgmath_options', 'fontsize').to_f + lineheight = config.dig('imgmath_options', 'lineheight').to_f + math_str = "\\begin{equation*}\n\\fontsize{#{fontsize}}{#{lineheight}}\\selectfont\n#{content}\n\\end{equation*}\n" + key = Digest::SHA256.hexdigest(math_str) + img_path = @img_math.defer_math_image(math_str, key) + result << '' + result << %Q() + result << '' + else + result << %Q(
#{content}
) + end + + if node.id? + result << caption_str unless caption_top?('equation') + result << '' + end + + result.join("\n") + "\n" + end + + def visit_embed(node) + # All embed types now use unified processing + process_raw_embed(node) + end + + def visit_footnote(_node) + # FootnoteNode is not rendered directly - it's just a definition + # The actual footnote output is generated by @{id} inline element + # Return empty string to indicate no output for this definition block + '' + end + + def render_list(node, list_type) + tag_name = list_tag_name(node, list_type) + + body = case list_type + when :ul + render_unordered_items(node) + when :ol + render_ordered_items(node) + when :dl + render_definition_items(node) + else + raise NotImplementedError, "IdgxmlRenderer does not support list_type #{list_type}" + end + + "<#{tag_name}>#{body}" + end + + def list_tag_name(node, list_type) + levels = node.children&.map { |item| item.respond_to?(:level) ? item.level : nil }&.compact + max_level = levels&.max || 1 + max_level > 1 ? "#{list_type}#{max_level}" : list_type.to_s + end + + def render_unordered_items(node) + node.children.map { |item| render_unordered_item(item) }.join + end + + def render_unordered_item(item) + content = render_list_item_body(item) + %Q(
  • #{content}
  • ) + end + + def render_ordered_items(node) + # num attribute: display number from source (start_number or item.number) + # olnum attribute: InDesign's internal counter (set by OlnumProcessor) + # + # OlnumProcessor analyzes the list during AST compilation and sets: + # - start_number: the first item's display number + # - olnum_start: the starting value for InDesign's counter + # - For //olnum[N] directive: olnum_start = N + # - For explicit numbering: olnum_start = 1 + + start_number = node.start_number || 1 + current_number = start_number + current_olnum = node.olnum_start || 1 + + items = node.children.map do |item| + # num: the display number (from source or calculated) + display_number = item.respond_to?(:number) && item.number ? item.number : current_number + + content = render_list_item_body(item) + rendered = %Q(
  • #{content}
  • ) + current_number += 1 + current_olnum += 1 + rendered + end + + items.join + end + + def render_definition_items(node) + node.children.map { |item| render_definition_item(item) }.join + end + + def render_definition_item(item) + term_content = render_inline_nodes(item.term_children) + + # Definition content handling: + # - Initial inline content (paragraphs) are joined together without

    tags + # - Block elements (lists) are rendered as-is + # - Paragraphs after block elements are wrapped in

    tags + definition_parts = [] + has_block_element = false + + item.children.each do |child| + if child.is_a?(ReVIEW::AST::ParagraphNode) + # Render paragraph content + content = render_children(child) + # Join lines in paragraph by removing newlines (like join_lines in Builder) + content = if config['join_lines_by_lang'] + content.tr("\n", ' ') + else + content.delete("\n") + end + + definition_parts << if has_block_element + # After a block element, wrap paragraphs in

    tags + "

    #{content}

    " + else + # Initial paragraphs are not wrapped + content + end + else + # Block element (list, etc.) + definition_parts << visit(child) + has_block_element = true + end + end + + definition_content = definition_parts.join + + if definition_content.empty? + %Q(
    #{term_content}
    ) + else + %Q(
    #{term_content}
    #{definition_content}
    ) + end + end + + def render_list_item_body(item) + parts = [] + inline_buffer = [] + + item.children.each do |child| + if inline_node?(child) + inline_buffer << visit(child) + else + unless inline_buffer.empty? + parts << format_inline_buffer(inline_buffer) + inline_buffer.clear + end + parts << visit(child) + end + end + + parts << format_inline_buffer(inline_buffer) unless inline_buffer.empty? + content = parts.compact.join + content.end_with?("\n") ? content.chomp : content + end + + def ast_compiler + @ast_compiler ||= ReVIEW::AST::Compiler.for_chapter(@chapter) + end + + def render_inline_element(type, content, node) + # Delegate to inline element handler + method_name = "render_inline_#{type}" + if @inline_element_handler.respond_to?(method_name, true) + @inline_element_handler.send(method_name, type, content, node) + else + raise NotImplementedError, "Unknown inline element: #{type}" + end + end + + # Helpers + + def get_chap(chapter = @chapter) + if config['secnolevel'] && config['secnolevel'] > 0 && + !chapter.number.nil? && !chapter.number.to_s.empty? + if chapter.is_a?(ReVIEW::Book::Part) + return text_formatter.format_part_short(chapter) + else + return chapter.format_number(nil) + end + end + nil + end + + def over_secnolevel?(n) + secnolevel = config['secnolevel'] || 2 + secnolevel >= n.to_s.split('.').size + end + + # Render inline elements from caption_node + # @param caption_node [CaptionNode] Caption node to render + # @return [String] Rendered inline elements + def render_caption_inline(caption_node) + content = caption_node ? render_children(caption_node) : '' + + if config['join_lines_by_lang'] + content.gsub(/\n+/, ' ') + else + content.delete("\n") + end + end + + # Escape for IDGXML (uses HTML escaping) + def escape(str) + escape_html(str.to_s) + end + + private + + def render_nodes(nodes) + return '' unless nodes && !nodes.empty? + + nodes.map { |child| visit(child) }.join + end + + def render_inline_nodes(nodes) + return '' unless nodes && !nodes.empty? + + format_inline_buffer(nodes.map { |child| visit(child) }) + end + + def format_inline_buffer(buffer) + return '' if buffer.empty? + + content = buffer.join("\n") + if config['join_lines_by_lang'] + content.tr("\n", ' ') + else + content.delete("\n") + end + end + + def inline_node?(node) + node.is_a?(ReVIEW::AST::TextNode) || node.is_a?(ReVIEW::AST::InlineNode) + end + + # Close section tags based on level + def output_close_sect_tags(level) + return unless @secttags + + closing_tags = [] + closing_tags << '' if level <= 5 && @subsubsubsection > 0 + closing_tags << '' if level <= 4 && @subsubsection > 0 + closing_tags << '' if level <= 3 && @subsection > 0 + closing_tags << '' if level <= 2 && @section > 0 + + closing_tags.join + end + + # Get headline prefix + def headline_prefix(level) + return [nil, nil] unless @sec_counter + + @sec_counter.inc(level) + anchor = @sec_counter.anchor(level) + prefix = @sec_counter.prefix(level, config['secnolevel']) + [prefix, anchor] + end + + # Check caption position + def caption_top?(type) + config.dig('caption_position', type) == 'top' + end + + # Handle metric for IDGXML + def handle_metric(str) + k, v = str.split('=', 2) + %Q(#{k}="#{v.sub(/\A["']/, '').sub(/["']\Z/, '')}") + end + + def result_metric(array) + " #{array.join(' ')}" + end + + # Captionblock helper for minicolumns + def captionblock(type, content, caption, specialstyle = nil) + result = [] + result << "<#{type}>" + if caption && !caption.empty? + style = specialstyle || "#{type}-title" + result << %Q(#{caption}) + end + blocked_lines = split_paragraph_content(content) + result << blocked_lines.join.chomp + result << "" + result.join("\n") + "\n" + end + + # Captionblock helper for content that already contains

    tags + def captionblock_with_content(type, content, caption, specialstyle = nil) + result = [] + result << "<#{type}>" + if caption && !caption.empty? + style = specialstyle || "#{type}-title" + result << %Q(#{caption}) + end + # Content already contains

    tags, use as-is + result << content.chomp + result << "" + result.join + "\n" + end + + # Syntaxblock helper for special code blocks + def syntaxblock(type, content, caption) + result = [] + + captionstr = nil + if caption && !caption.empty? + titleopentag = %Q(caption aid:pstyle="#{type}-title") + titleclosetag = 'caption' + if type == 'insn' + titleopentag = %Q(floattitle type="insn") + titleclosetag = 'floattitle' + end + captionstr = %Q(<#{titleopentag}>#{caption}) + end + + result << "<#{type}>" + result << captionstr if caption_top?('list') && captionstr + result << content.chomp + result << captionstr if !caption_top?('list') && captionstr + result << "" + + result.join("\n") + "\n" + end + + # Split paragraph content (from TextUtils) + def split_paragraph_content(content) + # Split content by double newlines to create paragraphs + paragraphs = content.split(/\n\n+/) + paragraphs.map { |para| "

    #{para.strip}

    " } + end + + # Render block content with paragraph grouping + # Used for point/shoot/notice/term blocks + def render_block_content_with_paragraphs(node) + # Render children directly - inline elements are already parsed during AST construction + render_children(node) + end + + # Visit unordered list + def visit_ul(node) + render_list(node, :ul) + end + + # Visit ordered list + def visit_ol(node) + render_list(node, :ol) + end + + # Visit definition list + def visit_dl(node) + render_list(node, :dl) + end + + # Visit list code block + def visit_code_block_list(node) + result = [] + result << '' + + # Generate caption if present + caption_content = nil + if node.caption_node && node.id? + caption_content = render_children(node.caption_node) + list_header_output = generate_list_header(node.id, caption_content) + result << list_header_output if caption_top?('list') + end + + # Generate code content (already includes trailing newlines for each line) + code_content = generate_code_lines_body(node) + # Combine
    , code content, and 
    in a single string + result << "
    #{code_content}
    " + + # Add caption at bottom if configured + if caption_content && !caption_top?('list') + list_header_output = generate_list_header(node.id, caption_content) + result << list_header_output + end + + result << '
    ' + # Join without newlines (nolf mode), then add final newline + result.join + "\n" + end + + # Visit listnum code block + def visit_code_block_listnum(node) + result = [] + result << '' + + # Generate caption if present + caption_content = nil + if node.caption_node && node.id? + caption_content = render_children(node.caption_node) + list_header_output = generate_list_header(node.id, caption_content) + result << list_header_output if caption_top?('list') + end + + # Generate code content with line numbers (already includes trailing newlines for each line) + code_content = generate_listnum_body(node) + # Combine
    , code content, and 
    in a single string + result << "
    #{code_content}
    " + + # Add caption at bottom if configured + if caption_content && !caption_top?('list') + list_header_output = generate_list_header(node.id, caption_content) + result << list_header_output + end + + result << '
    ' + # Join without newlines (nolf mode), then add final newline + result.join + "\n" + end + + # Visit emlist code block + def visit_code_block_emlist(node) + caption_content = node.caption_node ? render_children(node.caption_node) : nil + quotedlist(node, 'emlist', caption_content) + end + + # Visit emlistnum code block + def visit_code_block_emlistnum(node) + caption_content = node.caption_node ? render_children(node.caption_node) : nil + quotedlist_with_linenum(node, 'emlistnum', caption_content) + end + + # Visit cmd code block + def visit_code_block_cmd(node) + caption_content = node.caption_node ? render_children(node.caption_node) : nil + quotedlist(node, 'cmd', caption_content) + end + + # Visit source code block + def visit_code_block_source(node) + result = [] + result << '' + + caption_content = node.caption_node ? render_children(node.caption_node) : nil + caption_content = nil if caption_content && caption_content.empty? + + if caption_top?('list') && caption_content + result << %Q(
    ) + end + + # Generate code content (already includes trailing newlines for each line) + code_content = generate_code_lines_body(node) + # Combine
    , code content, and 
    in a single string + result << "
    #{code_content}
    " + + if !caption_top?('list') && caption_content + result << %Q(
    ) + end + + result << '' + # Join without newlines (nolf mode), then add final newline + result.join + "\n" + end + + # Generate list header like IDGXMLBuilder + def generate_list_header(id, caption) + return '' unless caption && !caption.empty? + + %Q() + end + + # Generate code lines body like IDGXMLBuilder + def generate_code_lines_body(node) + lines = node.children.map { |line| visit(line) } + + result = [] + no = 1 + + lines.each do |line| + if config['listinfo'] + line_output = %Q() + (i + first_line_num).to_s.rjust(2) + ': ' + line, tabwidth) + + if config['listinfo'] + line_output = %Q() + + # Use present? like Builder to avoid empty caption tags + if caption_top?('list') && caption.present? + result << %Q() + end + + # Generate code content (already includes trailing newlines for each line) + code_content = generate_code_lines_body(node) + # Combine
    , code content, and 
    in a single string + # This matches IDGXMLBuilder behavior: print '
    '; print lines; puts '
    ' + result << "
    #{code_content}
    " + + if !caption_top?('list') && caption.present? + result << %Q(
    ) + end + + result << '' + # Join without newlines (nolf mode), then add final newline + result.join + "\n" + end + + # Quotedlist with line numbers + def quotedlist_with_linenum(node, css_class, caption) + result = [] + result << %Q() + + # Use present? like Builder to avoid empty caption tags + if caption_top?('list') && caption.present? + result << %Q() + end + + # Generate code content with line numbers (already includes trailing newlines for each line) + code_content = generate_listnum_body(node) + # Combine
    , code content, and 
    in a single string + result << "
    #{code_content}
    " + + if !caption_top?('list') && caption.present? + result << %Q(
    ) + end + + result << '' + # Join without newlines (nolf mode), then add final newline + result.join + "\n" + end + + # Visit regular table + def visit_regular_table(node) + @tablewidth = nil + if config['tableopt'] + pt_unit = config['pt_to_mm_unit'] + pt_unit = pt_unit.to_f if pt_unit + pt_unit = 1.0 if pt_unit.nil? || pt_unit == 0 + @tablewidth = config['tableopt'].split(',')[0].to_f / pt_unit + end + @col = 0 + + # Parse table rows + all_rows = node.header_rows + node.body_rows + rows_data = parse_table_rows_from_ast(all_rows) + + result = [] + result << '
    [#{bib_number}] #{caption_inline}#{text_formatter.format_caption_plain('equation', get_chap, @chapter.equation(node.id).number, rendered_caption)}#{caption_content}#{caption_content}#{text_formatter.format_caption_plain('list', get_chap, @chapter.list(id).number, caption)}#{caption}#{caption}#{caption}#{caption}
    ' + + caption_content = node.caption_node ? render_children(node.caption_node) : nil + + # Caption at top if configured + if caption_top?('table') && caption_content + result << generate_table_header(node.id, caption_content) + end + + # Generate tbody + result << if @tablewidth.nil? + '' + else + %Q() + end + + @table_id = node.id + + # Get cellwidth from TableNode (set by TsizeProcessor) for use in generate_table_rows + # This is a raw array of width specifications (e.g., ["10", "20", "30"] for simple format) + @table_node_cellwidth = node.cellwidth + + result << generate_table_rows(rows_data, node.header_rows.length) + + result << '' + + # Caption at bottom if configured + if !caption_top?('table') && caption_content + result << generate_table_header(node.id, caption_content) + end + + result << '
    ' + + result.join("\n") + "\n" + end + + # Parse table rows from AST + def parse_table_rows_from_ast(rows) + processed_rows = [] + + rows.each do |row_node| + cells = row_node.children.map do |cell_node| + render_children(cell_node) + end + + col_count = cells.length + @col = col_count if col_count > @col + + # Apply table width processing if enabled + if @tablewidth + cells = cells.map do |cell| + cell.gsub("\t.\t", "\tDUMMYCELLSPLITTER\t"). + gsub("\t..\t", "\t.\t"). + gsub(/\t\.\Z/, "\tDUMMYCELLSPLITTER"). + gsub(/\t\.\.\Z/, "\t."). + gsub(/\A\./, '') + end + end + + processed_rows << cells + end + + { rows: processed_rows } + end + + # Generate table header + def generate_table_header(id, caption) + return '' unless caption && !caption.empty? + + if id.nil? + %Q(#{caption}) + else + %Q(#{text_formatter.format_caption_plain('table', get_chap, @chapter.table(id).number, caption)}) + end + end + + # Generate table rows + def generate_table_rows(rows_data, header_count) + rows = rows_data[:rows] + + # Calculate cell widths + cellwidth = [] + if @tablewidth + if @table_node_cellwidth.nil? + # No tsize specified - distribute width equally + @col.times { |n| cellwidth[n] = @tablewidth / @col } + else + # Extract numeric values from cellwidth specifications + # For simple format: ["p{10mm}", "p{20mm}", "p{30mm}"] -> ["10", "20", "30"] + # For IDGXML simple format: ["10", "20", "30"] (already numeric) + cellwidth = @table_node_cellwidth.map do |spec| + # Extract numeric part from p{Nmm} format or use as-is if already numeric + if /\A(\d+(?:\.\d+)?)\z/.match?(spec) + spec + elsif spec =~ /p\{(\d+(?:\.\d+)?)mm\}/ + $1 + else # rubocop:disable Style/EmptyElse + # Unknown format - use default + nil + end + end.compact + + totallength = 0 + cellwidth.size.times do |n| + cellwidth[n] = cellwidth[n].to_f / config['pt_to_mm_unit'] + totallength += cellwidth[n] + end + if cellwidth.size < @col + cw = (@tablewidth - totallength) / (@col - cellwidth.size) + (cellwidth.size..(@col - 1)).each { |i| cellwidth[i] = cw } + end + end + end + + result = [] + + # Output header rows if present + if header_count > 0 + header_count.times do |y| + if @tablewidth.nil? + result << %Q(#{rows.shift.join("\t")}) + else + i = 0 + rows.shift.each_with_index do |cell, x| + result << %Q(#{cell.sub('DUMMYCELLSPLITTER', '')}) + i += 1 + end + end + end + end + + # Output body rows + if @tablewidth + rows.each_with_index do |row, y| + i = 0 + row.each_with_index do |cell, x| + result << %Q(#{cell.sub('DUMMYCELLSPLITTER', '')}) + i += 1 + end + end + else + lastline = rows.pop + rows.each { |row| result << "#{row.join("\t")}" } + result << %Q(#{lastline.join("\t")}) if lastline + end + + result.join("\n") + end + + # Visit imgtable + def visit_imgtable(node) + caption_content = node.caption_node ? render_children(node.caption_node) : nil + + if @chapter.image_bound?(node.id) + metrics = parse_metric('idgxml', node.metric) + + result = [] + result << '' + + if caption_top?('table') && caption_content + result << generate_table_header(node.id, caption_content) + end + + result << %Q() + + if !caption_top?('table') && caption_content + result << generate_table_header(node.id, caption_content) + end + + result << '
    ' + + result.join("\n") + "\n" + else + # Fall back to image dummy + visit_image_dummy(node.id, caption_content, []) + end + end + + # Visit regular image + def visit_regular_image(node) + caption_content = node.caption_node ? render_children(node.caption_node) : nil + + if @chapter.image_bound?(node.id) + metrics = parse_metric('idgxml', node.metric) + + result = [] + result << '' + + if caption_top?('image') && caption_content + result << generate_image_header(node.id, caption_content) + end + + result << %Q() + + if !caption_top?('image') && caption_content + result << generate_image_header(node.id, caption_content) + end + + result << '' + + result.join("\n") + "\n" + else + # Fall back to dummy image + visit_image_dummy(node.id, caption_content, []) + end + end + + # Visit indepimage + def visit_indepimage(node) + caption_content = node.caption_node ? render_children(node.caption_node) : nil + caption_content = nil if caption_content && caption_content.empty? + metrics = parse_metric('idgxml', node.metric) + + result = [] + result << '' + + if caption_top?('image') && caption_content + result << %Q(#{caption_content}) + end + + begin + result << %Q() + rescue StandardError + # Image not found, but continue + end + + if !caption_top?('image') && caption_content + result << %Q(#{caption_content}) + end + + result << '' + + result.join("\n") + "\n" + end + + # Visit image dummy + def visit_image_dummy(id, caption, lines) + result = [] + result << '' + + if caption_top?('image') && caption + result << generate_image_header(id, caption) + end + + result << %Q(
    )
    +        lines.each do |line|
    +          result << detab(line, tabwidth)
    +          result << "\n"
    +        end
    +        result << '
    ' + + if !caption_top?('image') && caption + result << generate_image_header(id, caption) + end + + result << '' + + result.join("\n") + "\n" + end + + # Generate image header + def generate_image_header(id, caption) + return '' unless caption && !caption.empty? + + %Q(#{text_formatter.format_caption_plain('image', get_chap, @chapter.image(id).number, caption)}) + end + + # Visit rawblock + def visit_rawblock(node) + result = [] + no = 1 + + # Get lines from child TextNodes + lines = node.children.map { |child| child.is_a?(ReVIEW::AST::TextNode) ? child.content : '' } + lines.each do |line| + # Unescape HTML entities + unescaped = line.gsub('<', '<').gsub('>', '>').gsub('"', '"').gsub('&', '&') + result << unescaped + result << "\n" unless lines.length == no + no += 1 + end + + result.join + end + + # Visit comment block + def visit_comment_block(node) + return '' unless config['draft'] + + lines = [] + lines << escape(node.args.first) if node.args.first && !node.args.first.empty? + + # Process children as separate text lines (not as paragraphs) + if node.children && !node.children.empty? + node.children.each do |child| + lines << if child.is_a?(ReVIEW::AST::TextNode) + escape(child.content.to_s) + else + # For other node types, render normally + visit(child) + end + end + end + + return '' if lines.empty? + + str = lines.join("\n") + "#{str}" + end + + # Process raw embed + def process_raw_embed(node) + # Check if this embed is targeted for IDGXML + unless node.targeted_for?('idgxml') + return '' + end + + # Get content + content = node.content || '' + + # Process \n based on embed type + case node.embed_type + when :inline + # For inline raw/embed, convert literal \n to protected newline marker + content = content.gsub('\n', "\x01IDGXML_INLINE_NEWLINE\x01") + when :raw + # For raw blocks, convert \\n to actual newlines + content = content.gsub('\\n', "\n") + end + + # For block embeds, add trailing newline + node.embed_type == :block ? content + "\n" : content + end + + # Visit syntaxblock (box, insn) - processes lines with listinfo + def visit_syntaxblock(node) + type = node.block_type.to_s + + # Render caption if present + captionstr = nil + if node.caption_node + titleopentag = %Q(caption aid:pstyle="#{type}-title") + titleclosetag = 'caption' + if type == 'insn' + titleopentag = %Q(floattitle type="insn") + titleclosetag = 'floattitle' + end + # Use caption_node to render inline elements + caption_with_inline = render_caption_inline(node.caption_node) + captionstr = %Q(<#{titleopentag}>#{caption_with_inline}) + end + + result = [] + result << "<#{type}>" + + # Output caption at top if configured + result << captionstr if caption_top?('list') && captionstr + + # Process lines with listinfo + lines = extract_lines_from_node(node) + if config['listinfo'] && lines.any? + # Generate all listinfo entries as a single string (like IDGXMLBuilder's print/puts) + listinfo_output = lines.map.with_index do |line, i| + no = i + 1 + line_parts = [] + line_parts << %Q(' + # Always include line content (even if empty) followed by newline + # Protect newlines inside listinfo from nolf processing + line_parts << detab(line, tabwidth) + line_parts << "\x01IDGXML_LISTINFO_NEWLINE\x01" + line_parts << '' + line_parts.join + end.join + result << listinfo_output + else + lines_output = lines.map { |line| detab(line, tabwidth) + "\n" }.join + result << lines_output + end + + # Output caption at bottom if configured + result << captionstr if !caption_top?('list') && captionstr + + result << "" + result.join + "\n" + end + + # Extract lines from block node and process inline elements + def extract_lines_from_node(node) + # If node has ParagraphNode children (e.g., box/insn blocks), treat each as a separate line + if node.children.all?(AST::ParagraphNode) + # Each ParagraphNode represents one line - inline elements are already parsed + node.children.map { |para| render_children(para) } + else + # Fallback: render all children and split by newlines + full_content = render_children(node) + + # Split by newlines to get individual lines + # Keep empty lines (important for blank lines in the source) + lines = full_content.split("\n", -1) + + # Remove the last empty line if present (split always creates one at the end) + lines.pop if lines.last == '' + + lines + end + end + + def resolve_bibpaper_number(bib_id) + if @chapter + begin + return @chapter.bibpaper(bib_id).number + rescue StandardError + # Fallback to AST indexer if chapter lookup fails + end + end + + if @ast_indexer&.bibpaper_index + begin + return @ast_indexer.bibpaper_index.number(bib_id) + rescue StandardError + # fall through + end + end + + '??' + end + + # Parse tsize target specification like |idgxml|2 or |idgxml,html|2 + def parse_tsize_target(arg) + # Format: |target1,target2,...|value + if arg =~ /\A\|([^|]+)\|(.+)/ + targets = Regexp.last_match(1).split(',').map(&:strip) + value = Regexp.last_match(2) + [targets, value] + else + # No target specification (malformed) + [nil, arg] + end + end + + # Get tabwidth setting (default to 8) + def tabwidth + config['tabwidth'] || 8 + end + + # Graph generation helper methods (for non-mermaid graphs) + def system_graph_graphviz(_id, file_path, tf_path) + system("dot -Tpdf -o#{file_path} #{tf_path}") + end + + def system_graph_gnuplot(_id, file_path, content, tf_path) + File.open(tf_path, 'w') do |tf| + tf.puts <<~GNUPLOT + set terminal pdf + set output "#{file_path}" + #{content} + GNUPLOT + end + system("gnuplot #{tf_path}") + end + + def system_graph_blockdiag(_id, file_path, tf_path, command) + system("#{command} -Tpdf -o #{file_path} #{tf_path}") + end + end + end +end diff --git a/lib/review/renderer/inline_render_proxy.rb b/lib/review/renderer/inline_render_proxy.rb new file mode 100644 index 000000000..2f1c857e5 --- /dev/null +++ b/lib/review/renderer/inline_render_proxy.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +module ReVIEW + module Renderer + # Shared proxy class that provides minimal interface to renderer for InlineContext classes. + # This proxy limits access to renderer methods, exposing only what's needed for inline rendering. + # + # This class is used by HTML, LaTeX, and IDGXML InlineContext classes to prevent + # tight coupling between InlineElementHandler and the full renderer interface. + # + # Common methods (always available): + # - render_children(node): Render all children of a node + # - text_formatter: Access to TextFormatter instance + # + # Optional methods (available if renderer supports them): + # - rendering_context: Current rendering context (for LaTeX footnote handling) + # - render_caption_inline(caption_node): Render caption with inline markup (for LaTeX/IDGXML) + # - increment_texinlineequation: Increment equation counter (for IDGXML math rendering) + class InlineRenderProxy + def initialize(renderer) + @renderer = renderer + end + + # Render all children of a node and join the results + # @param node [Object] The parent node whose children should be rendered + # @return [String] The joined rendered output of all children + def render_children(node) + @renderer.render_children(node) + end + + # Get TextFormatter instance from the renderer + # @return [ReVIEW::Renderer::TextFormatter] Text formatter instance + def text_formatter + @renderer.text_formatter + end + + # Get current rendering context (LaTeX-specific feature) + # @return [RenderingContext, nil] Current rendering context if available + def rendering_context + @renderer.rendering_context if @renderer.respond_to?(:rendering_context) + end + + # Render caption with inline markup (LaTeX/IDGXML-specific feature) + # @param caption_node [Object] Caption node to render + # @return [String, nil] Rendered caption if available + def render_caption_inline(caption_node) + if @renderer.respond_to?(:render_caption_inline) + @renderer.render_caption_inline(caption_node) + end + end + + # Increment inline equation counter (IDGXML-specific feature) + # @return [Integer, nil] Counter value if available + def increment_texinlineequation + if @renderer.respond_to?(:increment_texinlineequation) + @renderer.increment_texinlineequation + end + end + end + end +end diff --git a/lib/review/renderer/latex/inline_context.rb b/lib/review/renderer/latex/inline_context.rb new file mode 100644 index 000000000..8a3d9ae73 --- /dev/null +++ b/lib/review/renderer/latex/inline_context.rb @@ -0,0 +1,114 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/latexutils' +require_relative '../inline_render_proxy' + +module ReVIEW + module Renderer + module Latex + # Context for inline element rendering with business logic + # Used by InlineElementHandler + class InlineContext + include ReVIEW::LaTeXUtils + + attr_reader :config, :book, :chapter, :index_db, :index_mecab + + def initialize(config:, book:, chapter:, renderer:) + @config = config + @book = book + @chapter = chapter + # Automatically create proxy from renderer to limit access + @render_proxy = InlineRenderProxy.new(renderer) + # Initialize index support + initialize_index_support + end + + # Get current rendering context dynamically from renderer + # This ensures we always have the most up-to-date context, + # even when it changes during rendering (e.g., caption context) + def rendering_context + @render_proxy.rendering_context + end + + def chapter_link_enabled? + config['chapterlink'] + end + + def draft_mode? + config['draft'] + end + + def over_secnolevel?(n) + secnolevel = config['secnolevel'] || 2 + secnolevel >= n.to_s.split('.').size + end + + def render_children(node) + @render_proxy.render_children(node) + end + + def render_caption_inline(caption_node) + @render_proxy.render_caption_inline(caption_node) + end + + def text_formatter + @render_proxy.text_formatter + end + + def bibpaper_number(bib_id) + if book.bibpaper_index.blank? + raise ReVIEW::KeyError, "unknown bib: #{bib_id}" + end + + book.bibpaper_index.number(bib_id) + end + + private + + # Initialize index support (database and MeCab) + def initialize_index_support + @index_db = {} + @index_mecab = nil + + return unless config['pdfmaker'] && config['pdfmaker']['makeindex'] + + # Load index dictionary file + if config['pdfmaker']['makeindex_dic'] + @index_db = load_idxdb(config['pdfmaker']['makeindex_dic']) + end + + return unless config['pdfmaker']['makeindex_mecab'] + + # Initialize MeCab for Japanese text indexing + begin + begin + require 'MeCab' + rescue LoadError + require 'mecab' + end + require 'nkf' + @index_mecab = MeCab::Tagger.new(config['pdfmaker']['makeindex_mecab_opts']) + rescue LoadError + # MeCab not available, will fall back to text-only indexing + end + end + + # Load index database from file + def load_idxdb(file) + table = {} + File.foreach(file) do |line| + key, value = *line.strip.split(/\t+/, 2) + table[key] = value + end + table + end + end + end + end +end diff --git a/lib/review/renderer/latex/inline_element_handler.rb b/lib/review/renderer/latex/inline_element_handler.rb new file mode 100644 index 000000000..802939693 --- /dev/null +++ b/lib/review/renderer/latex/inline_element_handler.rb @@ -0,0 +1,834 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/latexutils' + +module ReVIEW + module Renderer + module Latex + # Inline element handler for LaTeX rendering + # Uses InlineContext for shared logic + class InlineElementHandler + include ReVIEW::LaTeXUtils + + def initialize(inline_context) + @ctx = inline_context + @chapter = @ctx.chapter + @book = @ctx.book + @config = @ctx.config + # Initialize LaTeX character escaping + initialize_metachars(@config['texcommand']) + end + + def render_inline_b(_type, content, _node) + "\\reviewbold{#{content}}" + end + + def render_inline_i(_type, content, _node) + "\\reviewit{#{content}}" + end + + def render_inline_em(_type, content, _node) + "\\reviewem{#{content}}" + end + + def render_inline_tt(_type, content, _node) + "\\reviewtt{#{content}}" + end + + def render_inline_ttb(_type, content, _node) + "\\reviewttb{#{content}}" + end + + def render_inline_tti(_type, content, _node) + "\\reviewtti{#{content}}" + end + + def render_inline_code(_type, content, _node) + "\\reviewcode{#{content}}" + end + + def render_inline_u(_type, content, _node) + "\\reviewunderline{#{content}}" + end + + def render_inline_strong(_type, content, _node) + "\\reviewstrong{#{content}}" + end + + def render_inline_href(_type, content, node) + if node.args.length >= 2 + url = node.args[0] + text = node.args[1] + # Handle internal references (URLs starting with #) + if url.start_with?('#') + anchor = url.sub(/\A#/, '') + "\\hyperref[#{escape_latex(anchor)}]{#{escape_latex(text)}}" + elsif /\A[a-z]+:/.match?(url) + # External URL with scheme + "\\href{#{escape_url(url)}}{#{escape_latex(text)}}" + else + # Plain reference without scheme + "\\ref{#{escape_latex(url)}}" + end + else + # For single argument href, get raw text from first text child to avoid double escaping + raw_url = if node.children.first.leaf_node? + node.children.first.content + else + raise NotImplementedError, "URL is invalid: #{content}" + end + # Handle internal references (URLs starting with #) + if raw_url.start_with?('#') + anchor = raw_url.sub(/\A#/, '') + "\\hyperref[#{escape_latex(anchor)}]{#{escape_latex(raw_url)}}" + elsif /\A[a-z]+:/.match?(raw_url) + # External URL with scheme + url_content = escape_url(raw_url) + "\\url{#{url_content}}" + else + # Plain reference without scheme + "\\ref{#{escape_latex(raw_url)}}" + end + end + end + + def render_inline_fn(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + footnote_number = data.item_number + + # Check if we need to use footnotetext mode + if @ctx.config['footnotetext'] + "\\footnotemark[#{footnote_number}]" + elsif @ctx.rendering_context.requires_footnotetext? + if data.caption_node + @ctx.rendering_context.collect_footnote(data.caption_node, footnote_number) + end + '\\protect\\footnotemark{}' + else + footnote_content = if data.caption_node + @ctx.render_children(data.caption_node).strip + else + escape(data.caption_text || '') + end + "\\footnote{#{footnote_content}}" + end + end + + # Render list reference + def render_inline_list(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + list_number = data.item_number + + chapter_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + if chapter_num && !chapter_num.empty? + "\\reviewlistref{#{chapter_num}.#{list_number}}" + else + "\\reviewlistref{#{list_number}}" + end + end + + # Render listref reference (same as list) + def render_inline_listref(type, content, node) + render_inline_list(type, content, node) + end + + # Render table reference + def render_inline_table(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + table_number = data.item_number + # Use current chapter ID if chapter_id is not set in resolved_data + chapter_id = data.chapter_id || @chapter&.id + table_label = "table:#{chapter_id}:#{data.item_id}" + + short_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + if short_num && !short_num.empty? + "\\reviewtableref{#{short_num}.#{table_number}}{#{table_label}}" + else + "\\reviewtableref{#{table_number}}{#{table_label}}" + end + end + + # Render tableref reference (same as table) + def render_inline_tableref(type, content, node) + render_inline_table(type, content, node) + end + + # Render image reference + def render_inline_img(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + image_number = data.item_number + # Use current chapter ID if chapter_id is not set in resolved_data + chapter_id = data.chapter_id || @chapter&.id + image_label = "image:#{chapter_id}:#{data.item_id}" + + short_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + if short_num && !short_num.empty? + "\\reviewimageref{#{short_num}.#{image_number}}{#{image_label}}" + else + "\\reviewimageref{#{image_number}}{#{image_label}}" + end + end + + # Render imgref reference (same as img) + def render_inline_imgref(type, content, node) + render_inline_img(type, content, node) + end + + # Render equation reference + def render_inline_eq(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + equation_number = data.item_number + + short_num = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + if short_num && !short_num.empty? + "\\reviewequationref{#{short_num}.#{equation_number}}" + else + "\\reviewequationref{#{equation_number}}" + end + end + + # Render eqref reference (same as eq) + def render_inline_eqref(type, content, node) + render_inline_eq(type, content, node) + end + + # Render same-chapter list reference + def render_same_chapter_list_reference(node) + list_ref = node.args.first.to_s + if @chapter && @ctx.chapter.list_index + begin + list_item = @ctx.chapter.list_index.number(list_ref) + if @ctx.chapter.number + chapter_num = @ctx.chapter.format_number(false) + "\\reviewlistref{#{chapter_num}.#{list_item}}" + else + "\\reviewlistref{#{list_item}}" + end + rescue ReVIEW::KeyError => e + raise NotImplementedError, "List reference failed for #{list_ref}: #{e.message}" + end + else + "\\ref{#{escape(list_ref)}}" + end + end + + # Render bibliography reference + def render_inline_bib(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + bib_number = data.item_number + bib_id = data.item_id + "\\reviewbibref{[#{bib_number}]}{bib:#{bib_id}}" + end + + # Render bibref reference (same as bib) + def render_inline_bibref(type, content, node) + render_inline_bib(type, content, node) + end + + # Render same-chapter table reference + def render_same_chapter_table_reference(node) + table_ref = node.args.first.to_s + if @chapter && @ctx.chapter.table_index + begin + table_item = @ctx.chapter.table_index.number(table_ref) + table_label = "table:#{@ctx.chapter.id}:#{table_ref}" + if @ctx.chapter.number + chapter_num = @ctx.chapter.format_number(false) + "\\reviewtableref{#{chapter_num}.#{table_item}}{#{table_label}}" + else + "\\reviewtableref{#{table_item}}{#{table_label}}" + end + rescue ReVIEW::KeyError => e + raise NotImplementedError, "Table reference failed for #{table_ref}: #{e.message}" + end + else + "\\ref{#{escape(table_ref)}}" + end + end + + # Render same-chapter image reference + def render_same_chapter_image_reference(node) + image_ref = node.args.first.to_s + if @chapter && @ctx.chapter.image_index + begin + image_item = @ctx.chapter.image_index.number(image_ref) + image_label = "image:#{@ctx.chapter.id}:#{image_ref}" + if @ctx.chapter.number + chapter_num = @ctx.chapter.format_number(false) + "\\reviewimageref{#{chapter_num}.#{image_item}}{#{image_label}}" + else + "\\reviewimageref{#{image_item}}{#{image_label}}" + end + rescue ReVIEW::KeyError => e + raise NotImplementedError, "Image reference failed for #{image_ref}: #{e.message}" + end + else + # Don't escape underscores in ref labels + "\\ref{#{image_ref}}" + end + end + + # Render cross-chapter list reference + def render_cross_chapter_list_reference(node) + chapter_id, list_id = node.args + + # Find the target chapter + target_chapter = @ctx.book.contents&.detect { |chap| chap.id == chapter_id } + unless target_chapter + raise NotImplementedError, "Cross-chapter list reference failed: chapter '#{chapter_id}' not found" + end + + # Ensure the target chapter has list index + unless target_chapter.list_index + raise NotImplementedError, "Cross-chapter list reference failed: no list index for chapter '#{chapter_id}'" + end + + begin + list_item = target_chapter.list_index.number(list_id) + if target_chapter.number + chapter_num = target_chapter.format_number(false) + "\\reviewlistref{#{chapter_num}.#{list_item}}" + else + "\\reviewlistref{#{list_item}}" + end + rescue ReVIEW::KeyError => e + raise NotImplementedError, "Cross-chapter list reference failed for #{chapter_id}|#{list_id}: #{e.message}" + end + end + + # Render cross-chapter table reference + def render_cross_chapter_table_reference(node) + chapter_id, table_id = node.args + + # Find the target chapter + target_chapter = @ctx.book.contents&.detect { |chap| chap.id == chapter_id } + unless target_chapter + raise NotImplementedError, "Cross-chapter table reference failed: chapter '#{chapter_id}' not found" + end + + # Ensure the target chapter has table index + unless target_chapter.table_index + raise NotImplementedError, "Cross-chapter table reference failed: no table index for chapter '#{chapter_id}'" + end + + begin + table_item = target_chapter.table_index.number(table_id) + table_label = "table:#{chapter_id}:#{table_id}" + if target_chapter.number + chapter_num = target_chapter.format_number(false) + "\\reviewtableref{#{chapter_num}.#{table_item}}{#{table_label}}" + else + "\\reviewtableref{#{table_item}}{#{table_label}}" + end + rescue ReVIEW::KeyError => e + raise NotImplementedError, "Cross-chapter table reference failed for #{chapter_id}|#{table_id}: #{e.message}" + end + end + + # Render cross-chapter image reference + def render_cross_chapter_image_reference(node) + chapter_id, image_id = node.args + + # Find the target chapter + target_chapter = @ctx.book.contents&.detect { |chap| chap.id == chapter_id } + unless target_chapter + raise NotImplementedError, "Cross-chapter image reference failed: chapter '#{chapter_id}' not found" + end + + # Ensure the target chapter has image index + unless target_chapter.image_index + raise NotImplementedError, "Cross-chapter image reference failed: no image index for chapter '#{chapter_id}'" + end + + begin + image_item = target_chapter.image_index.number(image_id) + image_label = "image:#{chapter_id}:#{image_id}" + if target_chapter.number + chapter_num = target_chapter.format_number(false) + "\\reviewimageref{#{chapter_num}.#{image_item}}{#{image_label}}" + else + "\\reviewimageref{#{image_item}}{#{image_label}}" + end + rescue ReVIEW::KeyError => e + raise NotImplementedError, "Cross-chapter image reference failed for #{chapter_id}|#{image_id}: #{e.message}" + end + end + + # Render chapter number reference + def render_inline_chap(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + # Format chapter number to full form (e.g., "第1章", "付録A", "第II部") + chapter_num = @ctx.text_formatter.format_chapter_number_full(data.chapter_number, data.chapter_type) + "\\reviewchapref{#{chapter_num}}{chap:#{data.item_id}}" + end + + # Render chapter title reference + def render_inline_chapref(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + display_str = @ctx.text_formatter.format_reference(:chapter, data) + "\\reviewchapref{#{escape(display_str)}}{chap:#{data.item_id}}" + end + + # Extract heading reference from node.args, handling ReferenceResolver's array splitting + # ReferenceResolver splits "ch02|ブロック命令" into ["ch02", "ブロック命令"] + # We need to join them back together to get the original format + # Build heading reference parts from resolved_data + # Returns [section_number, section_label, section_title] + def build_heading_reference_parts(data) + # Get headline_number array (e.g., [1, 2] for section 1.2) + headline_number = data.headline_number || [] + + # Get caption from caption_node + section_title = data.caption_text + + # Determine chapter context + if data.chapter_id && data.chapter_number + # Cross-chapter reference + short_chapter = @ctx.text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + chapter_prefix = short_chapter + elsif @chapter && @ctx.chapter.number + # Same chapter reference + short_chapter = @ctx.chapter.format_number(false) + chapter_prefix = short_chapter + else + # Reference without chapter number + short_chapter = '0' + chapter_prefix = '0' + end + + # Build section number for display + full_number_parts = [short_chapter] + headline_number + full_section_number = full_number_parts.join('.') + + # Check if we should show the number based on secnolevel + section_number = if short_chapter != '0' && @ctx.over_secnolevel?(full_section_number) + # Show full number with chapter: "2.1", "2.1.2", etc. + full_section_number + else + # Without chapter number - use relative section number only + headline_number.join('.') + end + + # Generate label using chapter prefix and relative section number + relative_parts = headline_number.join('-') + section_label = "sec:#{chapter_prefix}-#{relative_parts}" + + [section_number, section_label, section_title] + end + + # Render heading reference + def render_inline_hd(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + section_number, section_label, section_title = build_heading_reference_parts(data) + "\\reviewsecref{「#{section_number} #{escape(section_title)}」}{#{section_label}}" + end + + # Render section reference + def render_inline_sec(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + section_number, section_label, _section_title = build_heading_reference_parts(data) + "\\reviewsecref{#{section_number}}{#{section_label}}" + end + + # Render section reference with full title + def render_inline_secref(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + section_number, section_label, section_title = build_heading_reference_parts(data) + "\\reviewsecref{「#{section_number} #{escape(section_title)}」}{#{section_label}}" + end + + # Render section title only + def render_inline_sectitle(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + _section_number, section_label, section_title = build_heading_reference_parts(data) + "\\reviewsecref{#{escape(section_title)}}{#{section_label}}" + end + + # Render index entry + def render_inline_idx(_type, content, node) + return content unless node.args.first + + index_str = node.args.first + # Process hierarchical index like LATEXBuilder's index method + index_entry = process_index(index_str) + # Index entry like LATEXBuilder - content first, then index + "#{content}\\index{#{index_entry}}" + end + + # Render hidden index entry + def render_inline_hidx(_type, content, node) + return content unless node.args.first + + index_str = node.args.first + # Process hierarchical index like LATEXBuilder's index method + index_entry = process_index(index_str) + # Hidden index entry like LATEXBuilder - just output index, content is already rendered + "\\index{#{index_entry}}" + end + + # Process index string for hierarchical index entries (mendex/upmendex) + # This is a simplified version of LATEXBuilder's index method (latexbuilder.rb:1406-1427) + def process_index(str) + # Split by <<>> delimiter for hierarchical index entries + parts = str.split('<<>>') + + # Process each part and format for mendex + formatted_parts = parts.map { |item| format_index_item(item) } + + # Join hierarchical parts with '!' for mendex/upmendex + formatted_parts.join('!') + end + + # Format a single index item for mendex/upmendex + def format_index_item(item) + if ascii_only?(item) + format_ascii_index_item(item) + else + format_japanese_index_item(item) + end + end + + # Check if string contains only ASCII characters + def ascii_only?(str) + str =~ /\A[[:ascii:]]+\Z/ + end + + # Format ASCII-only index item + def format_ascii_index_item(item) + escaped_item = escape(item) + mendex_escaped = escape_index(escaped_item) + + # If no escaping was needed, just return the item + return item if mendex_escaped == item + + # Generate key@display format for proper sorting like LATEXBuilder (latexbuilder.rb:1418) + "#{escape_mendex_key(escape_index(item))}@#{escape_mendex_display(mendex_escaped)}" + end + + # Format Japanese (non-ASCII) index item with yomi reading + def format_japanese_index_item(item) + # Check dictionary first like LATEXBuilder (latexbuilder.rb:1411-1412) + index_db = @ctx.index_db + yomi = if index_db && index_db[item] + index_db[item] + else + # Generate yomi using MeCab like LATEXBuilder (latexbuilder.rb:1421-1422) + generate_yomi(item) + end + escaped_item = escape(item) + "#{escape_mendex_key(escape_index(yomi))}@#{escape_mendex_display(escape_index(escaped_item))}" + end + + # Generate yomi (reading) for Japanese text using MeCab + NKF like LATEXBuilder (latexbuilder.rb:1421) + def generate_yomi(text) + # If MeCab is available, use it to parse and generate reading + index_mecab = @ctx.index_mecab + if index_mecab + require 'nkf' + NKF.nkf('-w --hiragana', index_mecab.parse(text).force_encoding('UTF-8').chomp) + else + # Fallback: use the original text as-is if MeCab is unavailable + text + end + rescue LoadError, ArgumentError, TypeError, RuntimeError + # Fallback: use the original text as-is if processing fails + text + end + + # Render keyword notation + def render_inline_kw(_type, content, node) + if node.args.length >= 2 + term = escape(node.args[0]) + description = escape(node.args[1]) + "\\reviewkw{#{term}}(#{description})" + else + "\\reviewkw{#{content}}" + end + end + + # Render ruby notation + def render_inline_ruby(_type, content, node) + if node.args.length >= 2 + base_text = escape(node.args[0]) + ruby_text = escape(node.args[1]) + "\\ruby{#{base_text}}{#{ruby_text}}" + else + content + end + end + + # Render icon + def render_inline_icon(_type, content, node) + return content unless node.args.first + + icon_id = node.args.first + image_path = find_image_path(icon_id) + + if image_path + command = 'reviewicon' + "\\#{command}{#{image_path}}" + else + "\\verb|--[[path = #{icon_id} (not exist)]]--|" + end + end + + # Render ami notation + def render_inline_ami(_type, content, _node) + "\\reviewami{#{content}}" + end + + # Render bou notation + def render_inline_bou(_type, content, _node) + # Boudou (emphasis) + "\\reviewbou{#{content}}" + end + + # Render tcy notation (tate-chu-yoko: horizontal-in-vertical text) + def render_inline_tcy(_type, content, _node) + # Tate-chu-yoko (縦中横) for vertical typesetting + "\\reviewtcy{#{content}}" + end + + # Render balloon notation + def render_inline_balloon(_type, content, _node) + # Balloon annotation - content contains the balloon text + "\\reviewballoon{#{content}}" + end + + # Render mathematical expression + def render_inline_m(_type, content, node) + # Mathematical expressions - don't escape content + "$#{node.args.first || content}$" + end + + # Render superscript + def render_inline_sup(_type, content, _node) + "\\textsuperscript{#{content}}" + end + + # Render subscript + def render_inline_sub(_type, content, _node) + "\\textsubscript{#{content}}" + end + + # Render strikethrough + def render_inline_del(_type, content, _node) + "\\reviewstrike{#{content}}" + end + + # Render strikethrough (alias) + def render_inline_strike(type, content, node) + render_inline_del(type, content, node) + end + + # Render insert + def render_inline_ins(_type, content, _node) + "\\reviewinsert{#{content}}" + end + + # Render insert (alias) + def render_inline_insert(type, content, node) + render_inline_ins(type, content, node) + end + + # Render unicode character + def render_inline_uchar(_type, content, node) + # Unicode character handling like LATEXBuilder + if node.args.first + char_code = node.args.first + texcompiler = @ctx.config['texcommand'] + if texcompiler&.start_with?('platex') + # with otf package - use \UTF macro + "\\UTF{#{escape(char_code)}}" + else + # upLaTeX or other - convert to actual Unicode character + [char_code.to_i(16)].pack('U') + end + else + content + end + end + + # Render line break + def render_inline_br(_type, _content, _node) + "\\\\\n" + end + + # Render word expansion + def render_inline_w(_type, content, _node) + # Word expansion - pass through content + content + end + + # Render word expansion (bold) + def render_inline_wb(_type, content, _node) + # Word expansion - pass through content + content + end + + # Render raw content + def render_inline_raw(_type, _content, node) + node.targeted_for?('latex') ? (node.content || '') : '' + end + + # Render embedded content + def render_inline_embed(_type, _content, node) + node.targeted_for?('latex') ? (node.content || '') : '' + end + + # Render label reference + def render_inline_labelref(_type, content, node) + # Use resolved content from ReferenceResolver if available, + # otherwise fall back to legacy behavior + if content && !content.empty? + "\\textbf{#{escape(content)}}" + elsif node.args.first + ref_id = node.args.first + "\\ref{#{escape(ref_id)}}" + else + '' + end + end + + # Render reference (same as labelref) + def render_inline_ref(type, content, node) + render_inline_labelref(type, content, node) + end + + # Render inline comment + def render_inline_comment(_type, content, _node) + if @ctx.draft_mode? + "\\pdfcomment{#{escape(content)}}" + else + '' + end + end + + # Render column reference + def render_inline_column(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + column_number = data.item_number + chapter_id = data.chapter_id || @ctx.chapter&.id + column_label = "column:#{chapter_id}:#{column_number}" + + # Render caption with inline markup + compiled_caption = if data.caption_node + @ctx.render_caption_inline(data.caption_node) + else + data.caption_text + end + column_text = @ctx.text_formatter.format_column_label(compiled_caption) + "\\reviewcolumnref{#{column_text}}{#{column_label}}" + end + + # Render endnote + def render_inline_endnote(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + endnote_content = escape(data.caption_text || '') + "\\endnote{#{endnote_content}}" + end + + # Render title reference (@{chapter_id}) + def render_inline_title(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + title = data.chapter_title || '' + if @ctx.chapter_link_enabled? + "\\reviewchapref{#{escape(title)}}{chap:#{data.item_id}}" + else + escape(title) + end + end + + private + + # Find image path for icon + def find_image_path(icon_id) + @ctx.chapter&.image(icon_id)&.path + rescue StandardError + nil + end + end + end + end +end diff --git a/lib/review/renderer/latex_renderer.rb b/lib/review/renderer/latex_renderer.rb new file mode 100644 index 000000000..4ee3c3322 --- /dev/null +++ b/lib/review/renderer/latex_renderer.rb @@ -0,0 +1,1551 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/ast/caption_node' +require 'review/ast/table_column_width_parser' +require 'review/latexutils' +require 'review/sec_counter' +require 'review/i18n' +require 'review/textutils' +require_relative 'base' +require_relative 'rendering_context' +require_relative 'latex/inline_context' +require_relative 'latex/inline_element_handler' + +module ReVIEW + module Renderer + class LatexRenderer < Base + include ReVIEW::LaTeXUtils + include ReVIEW::TextUtils + + attr_reader :chapter, :book, :rendering_context + + def initialize(chapter) + super + + # For AST rendering, we need to set up indexing properly + # The indexing will be done when we process the AST + @ast_indexer = nil + @ast_compiler = nil + @list_structure_normalizer = nil + + I18n.setup(config['language'] || 'ja') + + # Initialize LaTeX character escaping + initialize_metachars(config['texcommand']) + + # Initialize section counter like LATEXBuilder + @sec_counter = SecCounter.new(5, @chapter) if @chapter + + # Initialize RenderingContext for cleaner state management + @rendering_context = RenderingContext.new(:document) + + # Initialize LaTeX-specific inline context and inline element handler + @inline_context = Latex::InlineContext.new( + config: config, + book: book, + chapter: chapter, + renderer: self + ) + @inline_element_handler = Latex::InlineElementHandler.new(@inline_context) + end + + # Format type for this renderer + # @return [Symbol] Format type :latex + def format_type + :latex + end + + def visit_document(node) + # Generate content with proper separation between document-level elements + content = render_document_children(node) + + # Wrap Part documents with reviewpart environment + if should_wrap_part_with_reviewpart? + content = "\\begin{reviewpart}\n" + content + "\\end{reviewpart}\n" + end + + # Add any remaining collected footnotetext commands + if @rendering_context.footnote_collector.any? + content += generate_footnotetext_from_collector(@rendering_context.footnote_collector) + @rendering_context.footnote_collector.clear + end + + # Ensure content ends with single newline if it contains content + # Remove all trailing newlines and add exactly one back + if content && !content.empty? + content.sub(/\n+\z/, '') + "\n" + else + content || '' + end + end + + def visit_headline(node) + level = node.level + caption = render_children(node.caption_node) if node.caption_node + + # Handle special headline options (nonum, notoc, nodisp) + # These do NOT increment the section counter (matching LATEXBuilder behavior) + if node.nodisp? + # nodisp: Only add TOC entry, no visible heading + return generate_toc_entry(level, caption) + elsif node.nonum? + # nonum: Unnumbered section that appears in TOC + return generate_nonum_headline(level, caption, node) + elsif node.notoc? + # notoc: Unnumbered section that does NOT appear in TOC + return generate_notoc_headline(level, caption, node) + end + + # Update section counter like LATEXBuilder (only for regular numbered headlines) + if @sec_counter + @sec_counter.inc(level) + end + + # Regular headline processing + section_command = headline_name(level) + + # Format with exact newlines like LATEXBuilder to match expected format + result = [] + result << "\\#{section_command}{#{caption}}" + + # Add \addcontentsline for unnumbered sections within toclevel + # Match LATEXBuilder logic: only add to TOC if level is within toclevel + if (level > config['secnolevel'] || (@chapter.number.to_s.empty? && level > 1)) && + level <= config['toclevel'].to_i + # Get the base section name for TOC entry + toc_section_name = get_base_section_name(level) + result << "\\addcontentsline{toc}{#{toc_section_name}}{#{caption}}" + end + + # Generate labels like LATEXBuilder - add both automatic and custom labels + if level == 1 && @chapter + result << "\\label{chap:#{@chapter.id}}" + elsif @sec_counter && level >= 2 + # Generate section labels like LATEXBuilder (sec:x-y format) + anchor = @sec_counter.anchor(level) + result << "\\label{sec:#{anchor}}" + # Add custom label if specified (only for level > 1, matching LATEXBuilder) + if node.label && !node.label.empty? + result << "\\label{#{escape(node.label)}}" + end + end + + result.join("\n") + "\n\n" + end + + def visit_paragraph(node) + content = render_children(node) + + # Check for noindent attribute + if node.attribute?(:noindent) + # Add \noindent command like LATEXBuilder + "\\noindent\n#{content}\n\n" + else + # Add double newline for paragraph separation (LaTeX standard) + "#{content}\n\n" + end + end + + def visit_text(node) + content = node.content.to_s + # Preserve newlines and escape content properly + # Don't escape newlines so they are preserved in the output + escape(content) + end + + # Process caption for code blocks with proper context management + # @param node [CodeBlockNode] The code block node + # @return [Array<String, Object>] [caption, caption_collector] + def process_code_block_caption(node) + caption = nil + caption_collector = nil + + if node.caption_node + @rendering_context.with_child_context(:caption) do |caption_context| + caption = render_children_with_context(node.caption_node, caption_context) + caption_collector = caption_context.footnote_collector + end + end + + [caption, caption_collector] + end + + # Add footnotetext commands from collector to result + # @param result [String] The rendered result + # @param caption_collector [Object] The footnote collector + # @return [String] Result with footnotetext commands appended + def append_footnotetext_from_collector(result, caption_collector) + if caption_collector && caption_collector.any? + result += generate_footnotetext_from_collector(caption_collector) + caption_collector.clear + end + result + end + + # Visit list code block + def visit_code_block_list(node) + caption, caption_collector = process_code_block_caption(node) + content = render_children(node) + result = visit_list_block(node, content, caption) + append_footnotetext_from_collector(result, caption_collector) + end + + # Visit listnum code block (list with line numbers) + def visit_code_block_listnum(node) + caption, caption_collector = process_code_block_caption(node) + content = render_children(node) + result = visit_list_block(node, add_line_numbers(content, node), caption) + append_footnotetext_from_collector(result, caption_collector) + end + + # Visit emlist code block + def visit_code_block_emlist(node) + caption, caption_collector = process_code_block_caption(node) + content = render_children(node) + result = visit_emlist_block(node, content, caption) + append_footnotetext_from_collector(result, caption_collector) + end + + # Visit emlistnum code block (emlist with line numbers) + def visit_code_block_emlistnum(node) + caption, caption_collector = process_code_block_caption(node) + content = render_children(node) + result = visit_emlist_block(node, add_line_numbers(content, node), caption) + append_footnotetext_from_collector(result, caption_collector) + end + + # Visit cmd code block + def visit_code_block_cmd(node) + caption, caption_collector = process_code_block_caption(node) + content = render_children(node) + result = visit_cmd_block(node, content, caption) + append_footnotetext_from_collector(result, caption_collector) + end + + # Visit source code block + def visit_code_block_source(node) + caption, caption_collector = process_code_block_caption(node) + content = render_children(node) + result = visit_source_block(node, content, caption) + append_footnotetext_from_collector(result, caption_collector) + end + + def visit_code_line(node) + # Render children (TextNode and InlineNode) to process inline elements properly + content = render_children(node) + # Add proper newline for LaTeX code line formatting + "#{content}\n" + end + + def visit_table(node) + # Process caption with proper context management and collect footnotes + caption = nil + caption_collector = nil + + if node.caption_node + @rendering_context.with_child_context(:caption) do |caption_context| + caption = render_children_with_context(node.caption_node, caption_context) + # Save the collector for later processing + caption_collector = caption_context.footnote_collector + end + end + + table_type = node.table_type + + # Handle imgtable specially - it should be rendered as an image + if table_type == :imgtable + result = visit_imgtable(node, caption) + # Add collected footnotetext commands from caption context for imgtable + if caption_collector && caption_collector.any? + result += generate_footnotetext_from_collector(caption_collector) + caption_collector.clear + end + return result + end + + # Process table content with table context + table_context = nil + table_content = @rendering_context.with_child_context(:table) do |ctx| + table_context = ctx + + # Get column specification from TableNode (set by TsizeProcessor) + # or use default values if not set + col_spec = node.col_spec || node.default_col_spec + cellwidth = node.cellwidth || node.default_cellwidth + + # Store cellwidth temporarily for visit_table_cell_with_index to access + # This is needed because cell rendering happens in nested visitor calls + @current_table_cellwidth = cellwidth + + # Get all rows for processing + all_rows = node.header_rows + node.body_rows + + result = [] + + # Only output \begin{table} if caption is present (like LATEXBuilder) + if caption.present? + result << if node.id? + "\\begin{table}%%#{node.id}" + else + '\\begin{table}%%' + end + end + + # Process caption and label + if caption.present? + # emtable uses reviewtablecaption* (with asterisk) + caption_command = table_type == :emtable ? 'reviewtablecaption*' : 'reviewtablecaption' + result << "\\#{caption_command}{#{caption}}" + end + + if node.id? + # Generate label like LATEXBuilder: table:chapter:id + # Don't escape underscores in labels - they're allowed in LaTeX label names + result << if @chapter + "\\label{table:#{@chapter.id}:#{node.id}}" + else + "\\label{table:test:#{node.id}}" + end + end + + result << "\\begin{reviewtable}{#{col_spec}}" + result << '\\hline' + + # Process all rows using visitor pattern with table context + all_rows.each do |row| + row_content = visit_with_context(row, table_context) + result << "#{row_content} \\\\ \\hline" + end + + result << '\\end{reviewtable}' + + # Only output \end{table} if caption is present (like LATEXBuilder) + if caption.present? + result << '\\end{table}' + end + + result.join("\n") + "\n" + end + + # Add collected footnotetext commands from caption context + if caption_collector && caption_collector.any? + table_content += generate_footnotetext_from_collector(caption_collector) + caption_collector.clear + end + + # Add collected footnotetext commands from table context + if table_context && table_context.footnote_collector.any? + table_content += generate_footnotetext_from_collector(table_context.footnote_collector) + table_context.footnote_collector.clear + end + + table_content.chomp + "\n\n" + end + + def visit_imgtable(node, caption) + # imgtable is rendered as table with image inside (like LATEXBuilder) + result = [] + + # Check if image is bound like LATEXBuilder does + unless node.id? && @chapter && @chapter.image_bound?(node.id) + # No ID or chapter, or image not bound - return dummy + result << '\\begin{reviewdummyimage}' + result << "% image not bound: #{node.id}" if node.id? + result << '\\end{reviewdummyimage}' + return result.join("\n") + "\n" + end + + # Get image path - image is bound, so this should succeed + image_path = @chapter.image(node.id).path + + # Generate table structure with image like LATEXBuilder + # Start table environment if caption exists (line 911) + if caption && !caption.empty? + result << "\\begin{table}[h]%%#{node.id}" + + # Add caption and label at top if caption_top? + if caption_top?('table') + result << "\\reviewimgtablecaption{#{caption}}" + end + + # Add table label (line 919) - this needs table index + begin + result << "\\label{table:#{@chapter.id}:#{node.id}}" + rescue ReVIEW::KeyError + # If table lookup fails, still continue + end + end + + # Add image inside reviewimage environment (lines 937-949) + result << "\\begin{reviewimage}%%#{node.id}" + + # Parse metric option like LATEXBuilder + metrics = parse_metric('latex', node.metric) + command = 'reviewincludegraphics' + + # Use metric if provided, otherwise use default width + result << if metrics.present? + "\\#{command}[#{metrics}]{#{image_path}}" + else + "\\#{command}[width=\\maxwidth]{#{image_path}}" + end + + result << '\\end{reviewimage}' + + # Close table if caption exists + if caption.present? + # Add caption at bottom if not caption_top? + unless caption_top?('table') + result << "\\reviewimgtablecaption{#{caption}}" + end + + result << '\\end{table}' + end + + result.join("\n") + "\n\n" + end + + def visit_table_row(node) + # Process all cells in the row using visitor pattern while maintaining table context + # Note: table context should already be set by visit_table + cells = node.children.map.with_index do |cell, col_index| + visit_table_cell_with_index(cell, col_index) + end + cells.join(' & ') + end + + def visit_table_cell(node) + # Fallback method if called without index + visit_table_cell_with_index(node, 0) + end + + def visit_table_cell_with_index(node, col_index) + # Process cell content while maintaining table context to collect footnotes + # Note: table context should already be set by visit_table + content = render_children(node) + + # Get cellwidth for this column from current table's cellwidth array + cellwidth = @current_table_cellwidth && @current_table_cellwidth[col_index] ? @current_table_cellwidth[col_index] : 'l' + + # Check if content contains line breaks (from @<br>{}) + # Like LATEXBuilder: use \newline{} for fixed-width cells (p{...}), otherwise use \shortstack + if /\\\\/.match?(content) + # Check if cellwidth is fixed-width format (contains `{`) + if AST::TableColumnWidthParser.fixed_width?(cellwidth) + # Fixed-width cell: replace \\\n with \newline{} + content = content.gsub("\\\\\n", '\\newline{}') + if node.cell_type == :th + "\\reviewth{#{content}}" + else + content + end + elsif node.cell_type == :th + # Non-fixed-width cell: use \shortstack[l] like LATEXBuilder does + "\\reviewth{\\shortstack[l]{#{content}}}" + else + "\\shortstack[l]{#{content}}" + end + elsif node.cell_type == :th + # No line breaks - standard formatting + "\\reviewth{#{content}}" + else + content + end + end + + def visit_image(node) + # Process caption with proper context management + caption = nil + caption_collector = nil + + if node.caption_node + @rendering_context.with_child_context(:caption) do |caption_context| + caption = render_children_with_context(node.caption_node, caption_context) + # Save the collector for later processing + caption_collector = caption_context.footnote_collector + end + end + + image_type = node.image_type + + result = case image_type + when :indepimage, :numberlessimage + visit_indepimage(node, caption) + else + visit_regular_image(node, caption) + end + + # Add collected footnotetext commands from caption context + if caption_collector && caption_collector.any? + result += generate_footnotetext_from_collector(caption_collector) + caption_collector.clear + end + + result + end + + def visit_regular_image(node, caption) + image_path = find_image_path(node.id) + + if image_path + render_existing_image(node, image_path, caption, with_label: true) + else + render_dummy_image(node, caption, double_escape_id: false, with_label: true) + end + end + + def visit_indepimage(node, caption) + image_path = find_image_path(node.id) + + if image_path + render_existing_indepimage(node, image_path, caption) + else + render_dummy_image(node, caption, double_escape_id: true, with_label: false) + end + end + + def visit_list(node) + case node.list_type + when :ul + # Unordered list - generate LaTeX itemize environment + items = node.children.map { |item| "\\item #{render_children(item)}" }.join("\n") + "\n\\begin{itemize}\n#{items}\n\\end{itemize}\n\n" + when :ol + # Ordered list - generate LaTeX enumerate environment + items = node.children.map { |item| "\\item #{render_children(item)}" }.join("\n") + + # Check if this list has start_number + if node.start_number && node.start_number != 1 + # Generate enumerate with setcounter for non-default start + start_num = node.start_number - 1 # LaTeX counter is 0-based + "\n\\begin{enumerate}\n\\setcounter{enumi}{#{start_num}}\n#{items}\n\\end{enumerate}\n\n" + else + "\n\\begin{enumerate}\n#{items}\n\\end{enumerate}\n\n" + end + when :dl + # Definition list - generate LaTeX description environment like LATEXBuilder + visit_definition_list(node) + else + raise NotImplementedError, "Unsupported list type: #{node.list_type}" + end + end + + def visit_list_item(node) + raise NotImplementedError, 'List item processing should be handled by visit_list, not as standalone items' + end + + # Visit quote block + def visit_block_quote(node) + content = render_children(node) + result = "\n\\begin{quote}\n#{content.chomp}\\end{quote}\n\n" + apply_noindent_if_needed(node, result) + end + + # Visit source block (code block without caption) + def visit_block_source(node) + content = render_children(node) + "\\begin{reviewcmd}\n#{content}\\end{reviewcmd}\n" + end + + # Visit lead block (lead paragraph) + def visit_block_lead(node) + content = render_children(node) + result = "\n\\begin{quotation}\n#{content.chomp}\\end{quotation}\n\n" + apply_noindent_if_needed(node, result) + end + + # Visit olnum block (set ordered list counter) + def visit_block_olnum(node) + # olnum is now handled as metadata in list processing + # If we encounter it here, it means there was no following ordered list + # In this case, we should still generate the setcounter command for compatibility + if node.args.first + num = node.args.first.to_i + "\\setcounter{enumi}{#{num - 1}}\n" + else + "\\setcounter{enumi}{0}\n" + end + end + + # Visit footnote block + def visit_block_footnote(node) + # Handle footnote blocks - generate \footnotetext LaTeX command + if node.args.length >= 2 + footnote_id = node.args[0] + footnote_content = escape(node.args[1]) + # Generate footnote number like LaTeXBuilder does + if @chapter && @chapter.footnote_index + begin + footnote_number = @chapter.footnote_index.number(footnote_id) + "\\footnotetext[#{footnote_number}]{#{footnote_content}}\n" + rescue ReVIEW::KeyError => e + raise NotImplementedError, "Footnote block processing failed for #{footnote_id}: #{e.message}" + end + else + raise NotImplementedError, 'Footnote processing requires chapter context but none provided' + end + else + raise NotImplementedError, 'Malformed footnote block: insufficient arguments' + end + end + + # Visit tsize block (table size control) + def visit_block_tsize(_node) + # tsize is now processed by TsizeProcessor during AST compilation + # The tsize block nodes are removed from AST by TsizeProcessor, + # so this case should not be reached. Return empty string for safety. + '' + end + + # Visit texequation block (mathematical equation) + def visit_block_texequation(node) + content = render_children(node) + # Handle mathematical equation blocks - output content directly + # without LaTeX environment wrapping since content is raw LaTeX math + content.strip.empty? ? '' : "\n#{content}\n\n" + end + + # Visit comment block + def visit_block_comment(node) + # Handle comment blocks - only output in draft mode + visit_comment_block(node) + end + + # Visit beginchild block (child nesting control) + def visit_block_beginchild(_node) + # Child nesting control commands - produce no output + '' + end + + # Visit endchild block (child nesting control) + def visit_block_endchild(_node) + # Child nesting control commands - produce no output + '' + end + + # Visit centering block (center alignment) + def visit_block_centering(node) + content = render_children(node) + "\n\\begin{center}\n#{content.chomp}\\end{center}\n\n" + end + + # Visit flushright block (right alignment) + def visit_block_flushright(node) + content = render_children(node) + "\n\\begin{flushright}\n#{content.chomp}\\end{flushright}\n\n" + end + + # Visit address block (similar to flushright) + def visit_block_address(node) + content = render_children(node) + "\n\\begin{flushright}\n#{content.chomp}\\end{flushright}\n\n" + end + + # Visit talk block (dialog/conversation) + def visit_block_talk(node) + content = render_children(node) + "#{content}\n" + end + + # Visit read block (reading material) + def visit_block_read(node) + content = render_children(node) + "\n\\begin{quotation}\n#{content.chomp}\\end{quotation}\n\n" + end + + # Visit blockquote block + def visit_block_blockquote(node) + content = render_children(node) + "\n\\begin{quote}\n#{content.chomp}\\end{quote}\n\n" + end + + # Visit printendnotes block (print collected endnotes) + def visit_block_printendnotes(_node) + "\n\\theendnotes\n\n" + end + + # Visit label block (create label) + def visit_block_label(node) + if node.args.first + label_id = node.args.first + "\\label{#{escape(label_id)}}\n" + else + '' + end + end + + # Visit blankline block (control command) + def visit_block_blankline(_node) + "\\par\\vspace{\\baselineskip}\\par\n\n" + end + + # Visit noindent block (control command) + def visit_block_noindent(_node) + '' + end + + # Visit pagebreak block (control command) + def visit_block_pagebreak(_node) + '' + end + + # Visit endnote block (control command) + def visit_block_endnote(_node) + '' + end + + # Visit hr block (control command) + def visit_block_hr(_node) + '' + end + + # Visit bpo block (control command) + def visit_block_bpo(_node) + '' + end + + # Visit parasep block (control command) + def visit_block_parasep(_node) + '' + end + + # Visit bibpaper block (bibliography paper) + def visit_block_bibpaper(node) + visit_bibpaper(node) + end + + def visit_minicolumn(node) + # Process caption with proper context management and collect footnotes + caption = nil + caption_collector = nil + + if node.caption_node + @rendering_context.with_child_context(:caption) do |caption_context| + caption = render_children_with_context(node.caption_node, caption_context) + # Save the collector for later processing + caption_collector = caption_context.footnote_collector + end + end + + content = render_children(node) + + env_name = case node.minicolumn_type.to_s + when 'note' + 'reviewnote' + when 'memo' + 'reviewmemo' + when 'tip' + 'reviewtip' + when 'info' + 'reviewinfo' + when 'warning' + 'reviewwarning' + when 'important' + 'reviewimportant' + when 'caution' + 'reviewcaution' + when 'notice' + 'reviewnotice' + else + 'reviewcolumn' + end + + result = [] + result << if caption && !caption.empty? + "\\begin{#{env_name}}[#{caption}]" + else + "\\begin{#{env_name}}" + end + result << '' # blank line after begin + result << content.chomp + result << "\\end{#{env_name}}" + + output = result.join("\n") + "\n\n" + + # Add collected footnotetext commands from caption context + if caption_collector && caption_collector.any? + output += generate_footnotetext_from_collector(caption_collector) + caption_collector.clear + end + + output + end + + def visit_caption(node) + render_children(node) + end + + def visit_comment_block(node) + # block comment - only display in draft mode + return '' unless config['draft'] + + content_lines = [] + + # add argument if it exists + if node.args.first&.then { |arg| !arg.empty? } + content_lines << escape(node.args.first) + end + + # add body content + if node.content && !node.content.empty? + body_content = render_children(node) + content_lines << body_content unless body_content.empty? + end + + return '' if content_lines.empty? + + # use pdfcomment macro in LaTeX + content_str = content_lines.join('\\par ') + "\\pdfcomment{#{content_str}}\n" + end + + def visit_column(node) + caption = render_children(node.caption_node) if node.caption_node + + # Generate column label for hypertarget (using auto_id from Compiler) + column_label = generate_column_label(node, caption) + hypertarget = "\\hypertarget{#{column_label}}{}" + + # Process column content with :column context to collect footnotes + column_context = nil + content = @rendering_context.with_child_context(:column) do |ctx| + column_context = ctx + render_children_with_context(node, column_context) + end + + result = [] + result << '' # blank line before column + + # support Re:VIEW Version 3+ format only + caption_part = caption ? "[#{caption}#{hypertarget}]" : "[#{hypertarget}]" + result << "\\begin{reviewcolumn}#{caption_part}" + + # Add TOC entry if within toclevel + if node.level && caption && node.level <= config['toclevel'].to_i + toc_level = case node.level + when 1 + 'chapter' + when 2 + 'section' + when 3 + 'subsection' + when 4 + 'subsubsection' + else # rubocop:disable Lint/DuplicateBranch + 'subsection' # fallback + end + result << "\\addcontentsline{toc}{#{toc_level}}{#{caption}}" + end + + result << '' # blank line after header + result << content.chomp + result << '\\end{reviewcolumn}' + result << '' # blank line after column + + output = result.join("\n") + "\n" + + # Add collected footnotetext commands from column context + if column_context && column_context.footnote_collector.any? + output += generate_footnotetext_from_collector(column_context.footnote_collector) + column_context.footnote_collector.clear + end + + output + end + + def visit_embed(node) + # All embed types now use unified processing + process_raw_embed(node) + end + + # Code block type handlers + def visit_list_block(node, content, caption) + result = [] + result << '\\begin{reviewlistblock}' + + if caption && !caption.empty? + # Use LATEXBuilder logic for list caption with proper numbering + if node.id? + # For list blocks with ID, generate numbered caption like LATEXBuilder + begin + list_item = @chapter.list(node.id) + list_num = list_item.number + chapter_num = @chapter.number + captionstr = "\\reviewlistcaption{#{text_formatter.format_caption('list', chapter_num, list_num, caption)}}" + result << captionstr + rescue ReVIEW::KeyError + raise NotImplementedError, "no such list: #{node.id}" + end + else + # For list blocks without ID, use simple caption + result << "\\reviewlistcaption{#{caption}}" + end + end + + result << '\\begin{reviewlist}' + result << content.chomp + result << '\\end{reviewlist}' + result << '\\end{reviewlistblock}' + result.join("\n") + "\n\n" + end + + def visit_emlist_block(_node, content, caption) + result = [] + result << '\\begin{reviewlistblock}' + + if caption && !caption.empty? + result << "\\reviewemlistcaption{#{caption}}" + end + + result << '\\begin{reviewemlist}' + result << content.chomp + result << '\\end{reviewemlist}' + + result << '\\end{reviewlistblock}' + result.join("\n") + "\n\n" + end + + def visit_cmd_block(_node, content, caption) + result = [] + result << '\\begin{reviewlistblock}' + + if caption && !caption.empty? + result << "\\reviewcmdcaption{#{caption}}" + end + + result << '\\begin{reviewcmd}' + result << content.chomp + result << '\\end{reviewcmd}' + result << '\\end{reviewlistblock}' + result.join("\n") + "\n\n" + end + + def visit_source_block(_node, content, caption) + result = [] + result << '\\begin{reviewlistblock}' + + if caption && !caption.empty? + result << "\\reviewsourcecaption{#{caption}}" + end + + result << '\\begin{reviewsource}' + result << content.chomp + result << '\\end{reviewsource}' + result << '\\end{reviewlistblock}' + result.join("\n") + "\n\n" + end + + def visit_tex_equation(node) + # Handle LaTeX mathematical equation blocks + # Output the LaTeX content directly without escaping since it's raw LaTeX + content = node.content + + if node.id? && node.caption? + # Equation with ID and caption - use reviewequationblock like traditional compiler + equation_num = get_equation_number(node.id) + caption_content = render_children(node.caption_node) + result = [] + result << '\\begin{reviewequationblock}' + result << "\\reviewequationcaption{#{escape("式#{equation_num}: #{caption_content}")}}" + result << '\\begin{equation*}' + result << content + result << '\\end{equation*}' + result << '\\end{reviewequationblock}' + elsif node.id? + # Equation with ID only - still use reviewequationblock for consistency + equation_num = get_equation_number(node.id) + result = [] + result << '\\begin{reviewequationblock}' + result << "\\reviewequationcaption{#{escape("式#{equation_num}")}}" + result << '\\begin{equation*}' + result << content + result << '\\end{equation*}' + result << '\\end{reviewequationblock}' + else + # Equation without ID - use equation* environment (no numbering) + result = [] + result << '\\begin{equation*}' + result << content + result << '\\end{equation*}' + end + result.join("\n") + "\n\n" + end + + # Get equation number for texequation blocks + def get_equation_number(equation_id) + if @chapter && @chapter.equation_index + begin + equation_number = @chapter.equation_index.number(equation_id) + if @chapter.number + "#{@chapter.number}.#{equation_number}" + else + equation_number.to_s + end + rescue ReVIEW::KeyError + # Fallback if equation not found in index + '??' + end + else + '??' + end + end + + def visit_bibpaper(node) + # Extract bibliography arguments + if node.args.length >= 2 + bib_id = node.args[0] + bib_caption = node.args[1] + + # Process content + content = render_children(node) + + # Generate bibliography entry like LATEXBuilder + result = [] + + # Header with number and caption + if @book.bibpaper_index + begin + bib_number = @book.bibpaper_index.number(bib_id) + result << "[#{bib_number}] #{escape(bib_caption)}" + rescue ReVIEW::KeyError => e + # Fallback if not found in index + warn "Bibpaper #{bib_id} not found in index: #{e.message}" if $DEBUG + result << "[??] #{escape(bib_caption)}" + end + elsif @ast_indexer && @ast_indexer.bibpaper_index + # Try to get from AST indexer if chapter index not available + begin + bib_number = @ast_indexer.bibpaper_index.number(bib_id) + result << "[#{bib_number}] #{escape(bib_caption)}" + rescue ReVIEW::KeyError + result << "[??] #{escape(bib_caption)}" + end + else + result << "[??] #{escape(bib_caption)}" + end + + # Add label for cross-references + result << "\\label{bib:#{escape(bib_id)}}" + result << '' + + # Add content - process paragraphs + result << if config['join_lines_by_lang'] + split_paragraph(content).join("\n\n") + else + content + end + + result.join("\n") + "\n" + else + raise NotImplementedError, 'Malformed bibpaper block: insufficient arguments' + end + end + + # Add line numbers to content like LATEXBuilder does + def add_line_numbers(content, node = nil) + lines = content.split("\n") + numbered_lines = [] + + # Use node.first_line_num if set, otherwise start from 1 + start_num = node&.first_line_num || 1 + + lines.each_with_index do |line, i| + next if line.strip.empty? && i == lines.length - 1 # Skip last empty line + + numbered_lines << sprintf('%2d: %s', start_num + i, line) + end + + numbered_lines.join("\n") + end + + # Render footnote content for footnotetext + # This method processes the footnote node's children to properly handle + # inline markup like @<b>{text} within footnotes + def render_footnote_content(footnote_node) + render_children(footnote_node) + end + + # Render inline elements from caption_node + # @param caption_node [CaptionNode] Caption node to render + # @return [String] Rendered inline elements + def render_caption_inline(caption_node) + caption_node ? render_children(caption_node) : '' + end + + private + + # Get image path, returning nil if image doesn't exist + def find_image_path(id) + path = @chapter.image(id).path + path && !path.empty? ? path : nil + rescue StandardError + nil + end + + # Render existing image (for regular //image) + def render_existing_image(node, image_path, caption, with_label:) + result = [] + result << if node.id? + "\\begin{reviewimage}%%#{node.id}" + else + '\\begin{reviewimage}' + end + + metrics = parse_metric('latex', node.metric) + command = 'reviewincludegraphics' + + result << if metrics && !metrics.empty? + "\\#{command}[#{metrics}]{#{image_path}}" + else + "\\#{command}[width=\\maxwidth]{#{image_path}}" + end + + result << "\\reviewimagecaption{#{caption}}" if caption && !caption.empty? + + if with_label && node.id? + result << if @chapter + "\\label{image:#{@chapter.id}:#{node.id}}" + else + "\\label{image:test:#{node.id}}" + end + end + + result << '\\end{reviewimage}' + result.join("\n") + "\n" + end + + # Render existing indepimage (for //indepimage) + def render_existing_indepimage(node, image_path, caption) + result = [] + result << "\\begin{reviewimage}%%#{node.id}" + + if caption_top?('image') && caption && !caption.empty? + caption_str = "\\reviewindepimagecaption{#{text_formatter.format_numberless_image}#{text_formatter.format_caption_prefix}#{caption}}" + result << caption_str + end + + metrics = parse_metric('latex', node.metric) + command = 'reviewincludegraphics' + + result << if metrics && !metrics.empty? + "\\#{command}[#{metrics}]{#{image_path}}" + else + "\\#{command}[width=\\maxwidth]{#{image_path}}" + end + + if !caption_top?('image') && caption && !caption.empty? + caption_str = "\\reviewindepimagecaption{#{text_formatter.format_numberless_image}#{text_formatter.format_caption_prefix}#{caption}}" + result << caption_str + end + + result << '\\end{reviewimage}' + result.join("\n") + "\n" + end + + # Render dummy image for missing images + def render_dummy_image(node, caption, double_escape_id:, with_label:) + result = [] + result << '\\begin{reviewdummyimage}' + + if node.id? + # For regular images: single escape, for indepimage: double escape (like Builder) + result << if double_escape_id + escape_latex("--[[path = #{escape_latex(node.id)} (not exist)]]--") + else + escape_latex("--[[path = #{node.id} (not exist)]]--") + end + end + + if with_label && node.id? + result << if @chapter + "\\label{image:#{@chapter.id}:#{node.id}}" + else + "\\label{image:test:#{node.id}}" + end + end + + if caption && !caption.empty? + result << if double_escape_id + # indepimage uses reviewindepimagecaption + "\\reviewindepimagecaption{#{text_formatter.format_numberless_image}#{text_formatter.format_caption_prefix}#{caption}}" + else + # regular image uses reviewimagecaption + "\\reviewimagecaption{#{caption}}" + end + end + + result << '\\end{reviewdummyimage}' + result.join("\n") + "\n" + end + + def ast_compiler + @ast_compiler ||= ReVIEW::AST::Compiler.for_chapter(@chapter) + end + + # Render definition list with proper footnote handling + # Footnotes in definition terms require special handling in LaTeX: + # they must use \protect\footnotemark{} in the term and \footnotetext + # after the description environment + def visit_definition_list(node) + dl_context = nil + items_content = @rendering_context.with_child_context(:dl) do |ctx| + dl_context = ctx + # Temporarily set the renderer's context to the dl context + old_context = @rendering_context + @rendering_context = dl_context + + items = node.children.map do |item| + render_definition_item(item, dl_context) + end.join("\n") + + # Restore the previous context + @rendering_context = old_context + items + end + + # Build output + result = "\n\\begin{description}\n#{items_content}\n\\end{description}\n\n" + + # Add collected footnotetext commands from dt contexts (transferred to dl_context) + if dl_context && dl_context.footnote_collector.any? + result += generate_footnotetext_from_collector(dl_context.footnote_collector) + dl_context.footnote_collector.clear + end + + result + end + + # Render a single definition list item + def render_definition_item(item, dl_context) + # Render term with :dt context like LATEXBuilder does (latexbuilder.rb:361-382) + term = render_definition_term(item, dl_context) + + # Escape square brackets in terms like LATEXBuilder does + term = term.gsub('[', '\\lbrack{}').gsub(']', '\\rbrack{}') + + # Handle definition content (all children are definition content) + if item.children && !item.children.empty? + definition_parts = item.children.map do |child| + result = visit(child) # Use visit instead of render_children for individual nodes + # Strip all trailing whitespace and newlines + # LATEXBuilder's dd() joins lines with "\n", so we need single newlines between paragraphs + result.rstrip + end + # Join with single newline to match LATEXBuilder dd() behavior (lines.map(&:chomp).join("\n")) + definition = definition_parts.join("\n") + + # Use exact LATEXBuilder format: \item[term] \mbox{} \\ + "\\item[#{term}] \\mbox{} \\\\\n#{definition}" + else + # No definition content - term only + "\\item[#{term}] \\mbox{} \\\\" + end + end + + # Render definition term with proper footnote collection + def render_definition_term(item, dl_context) + term = nil + dt_footnote_collector = nil + + @rendering_context.with_child_context(:dt) do |dt_context| + # Temporarily set renderer's context to dt context for term rendering + old_dt_context = @rendering_context + @rendering_context = dt_context + + term = if item.term_children&.any? + # Render term children (which contain inline elements) + item.term_children.map { |child| visit(child) }.join + else + '' + end + + @rendering_context = old_dt_context + + # Save dt_context's footnote collector to transfer footnotes to dl_context + dt_footnote_collector = dt_context.footnote_collector + end + + # Transfer footnotes from dt_context to dl_context + if dt_footnote_collector && dt_footnote_collector.any? + dt_footnote_collector.each do |entry| + dl_context.collect_footnote(entry.node, entry.number) + end + dt_footnote_collector.clear + end + + term + end + + # Generate LaTeX footnotetext commands from collected footnotes + # @param collector [FootnoteCollector] the footnote collector + # @return [String] LaTeX footnotetext commands + def generate_footnotetext_from_collector(collector) + return '' unless collector.any? + + footnotetext_commands = [] + collector.each do |entry| + content = render_footnote_content(entry.node) + footnotetext_commands << "\\footnotetext[#{entry.number}]{#{content}}" + end + + footnotetext_commands.join("\n") + "\n" + end + + HEADLINE = { # rubocop:disable Lint/UselessConstantScoping + 1 => 'chapter', + 2 => 'section', + 3 => 'subsection', + 4 => 'subsubsection', + 5 => 'paragraph', + 6 => 'subparagraph' + }.freeze + + def headline_name(level) + name = if @chapter.is_a?(ReVIEW::Book::Part) && level == 1 + 'part' + else + HEADLINE[level] || raise(CompileError, "Unsupported headline level: #{level}. LaTeX only supports levels 1-6") + end + + if level > config['secnolevel'] || (@chapter.number.to_s.empty? && level > 1) + "#{name}*" + else + name + end + end + + def render_inline_element(type, content, node) + # Delegate to inline element handler + method_name = "render_inline_#{type}" + if @inline_element_handler.respond_to?(method_name, true) + @inline_element_handler.send(method_name, type, content, node) + else + raise NotImplementedError, "Unknown inline element: #{type}" + end + end + + def visit_reference(node) + if node.resolved? + format_resolved_reference(node.resolved_data) + else + # Reference resolution was skipped or disabled + # Return content as fallback + escape(node.content || '') + end + end + + # Format resolved reference based on ResolvedData + # Gets plain text from TextFormatter and wraps it with LaTeX markup + def format_resolved_reference(data) + # Get plain text from TextFormatter (no LaTeX markup) + plain_text = text_formatter.format_reference(data.reference_type, data) + + # Wrap with LaTeX-specific markup based on reference type + case data.reference_type + when :image, :table, :list + # For image/table/list, use \ref command + if data.cross_chapter? + "\\ref{#{data.chapter_id}:#{data.item_id}}" + else + "\\ref{#{data.item_id}}" + end + when :equation + # For equation, use \ref command + "\\ref{#{data.item_id}}" + when :footnote + # For footnote, use \footnotemark command + "\\footnotemark[#{data.item_number}]" + when :bibpaper + # For bibliography, use \reviewbibref command + "\\reviewbibref{[#{data.item_number}]}{bib:#{data.item_id}}" + else + # For other types (chapter, headline, column, word, endnote), return escaped plain text + escape(plain_text) + end + end + + # Render document children with proper separation + def render_document_children(node) + results = [] + node.children.each_with_index do |child, _index| + result = visit(child) + next if result.nil? || result.empty? + + # Add proper separation after raw embeds + if child.is_a?(ReVIEW::AST::EmbedNode) && child.embed_type == :raw && !result.end_with?("\n") + result += "\n" + end + + results << result + end + + content = results.join + + # Post-process to fix consecutive minicolumn blocks spacing like LATEXBuilder's solve_nest + # When minicolumn blocks are consecutive, remove extra blank line between them + # Pattern: \end{reviewnote}\n\n\begin{reviewnote} should become \end{reviewnote}\n\begin{reviewnote} + content.gsub!(/\\end\{(reviewnote|reviewmemo|reviewtip|reviewinfo|reviewwarning|reviewimportant|reviewcaution|reviewnotice)\}\n\n\\begin\{(reviewnote|reviewmemo|reviewtip|reviewinfo|reviewwarning|reviewimportant|reviewcaution|reviewnotice)\}/, + "\\\\end{\\1}\n\\\\begin{\\2}") + + content + end + + # Render children with specific rendering context + def render_children_with_context(node, context) + old_context = @rendering_context + @rendering_context = context + result = render_children(node) + @rendering_context = old_context + result + end + + # Visit node with specific rendering context + def visit_with_context(node, context) + old_context = @rendering_context + @rendering_context = context + result = visit(node) + @rendering_context = old_context + result + end + + def visit_footnote(_node) + # FootnoteNode represents a footnote definition (//footnote[id][content]) + # In AST rendering, footnote definitions do not produce direct output. + # Instead, footnotes are rendered via: + # 1. @<fn>{id} inline references produce \footnotemark or \footnote + # 2. Collected footnotes (from captions/tables) are output as \footnotetext + # by the parent node (code_block, table, image) after processing + '' + end + + # Check caption position configuration + def caption_top?(type) + unless %w[top bottom].include?(config.dig('caption_position', type)) + # Default to top if not configured + return true + end + + config['caption_position'][type] != 'bottom' + end + + # This method calls super to use the base implementation, then applies LaTeX-specific logic + def parse_metric(type, metric) + s = super + # If use_original_image_size is enabled and result is empty and no metric provided + if config&.dig('pdfmaker', 'use_original_image_size') && s.empty? && !metric&.present? + return ' ' # pass empty space to \reviewincludegraphics to use original size + end + + s + end + + # Handle individual metric transformations (like scale to width conversion) + def handle_metric(str) + # Check if image_scale2width is enabled and metric is scale + if config&.dig('pdfmaker', 'image_scale2width') && str =~ /\Ascale=([\d.]+)\Z/ + return "width=#{$1}\\maxwidth" + end + + str + end + + # Apply noindent if the node has the noindent attribute + def apply_noindent_if_needed(node, content) + if node.attribute?(:noindent) + "\\noindent\n#{content}" + else + content + end + end + + # Check if Part document should be wrapped with reviewpart environment + def should_wrap_part_with_reviewpart? + @chapter.is_a?(ReVIEW::Book::Part) + end + + # Generate TOC entry only (for nodisp headlines) + def generate_toc_entry(level, caption) + toc_type = case level + when 1 + 'chapter' + when 2 + 'section' + else + 'subsection' + end + "\\addcontentsline{toc}{#{toc_type}}{#{caption}}\n" + end + + # Generate unnumbered headline with TOC entry (for nonum headlines) + def generate_nonum_headline(level, caption, _node) + section_command = get_base_section_name(level) + '*' + + # Add TOC entry + toc_type = case level + when 1 + 'chapter' + when 2 + 'section' + else + 'subsection' + end + + "\\#{section_command}{#{caption}}\n\\addcontentsline{toc}{#{toc_type}}{#{caption}}\n\n" + end + + # Generate unnumbered headline without TOC entry (for notoc headlines) + def generate_notoc_headline(level, caption, _node) + section_command = get_base_section_name(level) + '*' + + "\\#{section_command}{#{caption}}\n\n" + end + + # Get base section name without star + def get_base_section_name(level) + if @chapter.is_a?(ReVIEW::Book::Part) && level == 1 + 'part' + else + HEADLINE[level] || raise(CompileError, "Unsupported headline level: #{level}") + end + end + + # Generate column label for hypertarget (matches LATEXBuilder behavior) + def generate_column_label(node, _caption) + # Use column_number directly instead of parsing auto_id + num = node.column_number || 'unknown' + + "column:#{@chapter&.id || 'unknown'}:#{num}" + end + + # Process //raw command with LATEXBuilder-compatible behavior + def process_raw_embed(node) + # Skip HTML embeds (from Markdown raw HTML) - they are not compatible with LaTeX + return '' if node.embed_type.to_s == 'html' + + # Check if this embed is targeted for LaTeX builder + unless node.targeted_for?('latex') + return '' + end + + # Get content + content = node.content || '' + + # Process \n based on embed type + case node.embed_type + when :inline, :raw + # For inline and raw embeds, convert \\n to actual newlines + content = content.gsub('\\n', "\n") + end + + # For block embeds, add trailing newline + node.embed_type == :block ? content + "\n" : content + end + end + end +end diff --git a/lib/review/renderer/markdown_renderer.rb b/lib/review/renderer/markdown_renderer.rb new file mode 100644 index 000000000..380664176 --- /dev/null +++ b/lib/review/renderer/markdown_renderer.rb @@ -0,0 +1,1039 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/htmlutils' +require 'review/textutils' +require 'review/loggable' +require_relative 'base' + +module ReVIEW + module Renderer + class MarkdownRenderer < Base + include ReVIEW::HTMLUtils + include ReVIEW::TextUtils + include ReVIEW::Loggable + + def initialize(chapter) + super + @blank_seen = true + @ul_indent = 0 + @table_rows = [] + @table_header_count = 0 + @rendering_context = nil + end + + def target_name + 'markdown' + end + + def visit_document(node) + render_children(node) + end + + def visit_headline(node) + level = node.level + caption = render_caption_inline(node.caption_node) + + # Use Markdown # syntax + prefix = '#' * level + "#{prefix} #{caption}\n\n" + end + + def visit_paragraph(node) + # Render children with spacing between adjacent inline elements + content = render_children_with_inline_spacing(node) + return '' if content.empty? + + lines = content.split("\n") + result = lines.join(' ') + + "#{result}\n\n" + end + + def visit_list(node) + result = +'' + + case node.list_type + when :ul + node.children.each do |item| + result += visit_list_item(item, :ul) + end + when :ol + node.children.each_with_index do |item, index| + result += visit_list_item(item, :ol, index + 1) + end + when :dl + node.children.each do |item| + result += visit_definition_item(item) + end + else + raise NotImplementedError, "MarkdownRenderer does not support list_type #{node.list_type}." + end + + result + "\n" + end + + def visit_list_item(node, type = :ul, number = nil) + # Separate text content from nested lists + text_content = +'' + nested_lists = +'' + + node.children.each do |child| + if child.class.name.include?('ListNode') + # This is a nested list - render it separately + nested_lists += visit(child) + else + # This is regular content + text_content += visit(child) + end + end + + text_content = text_content.chomp + + # Use the level attribute from the node for proper indentation + level = node.level || 1 + + result = case type + when :ul + # Calculate indent based on level (0-based indentation: level 1 = 0 spaces, level 2 = 2 spaces, etc.) + indent = ' ' * (level - 1) + "#{indent}* #{text_content}\n" + when :ol + # For ordered lists, also apply indentation based on level + indent = ' ' * (level - 1) + "#{indent}#{number}. #{text_content}\n" + end + + # Add any nested lists after the item + result += nested_lists + result + end + + def visit_item(node) + # Handle list items that come directly without parent list context + content = render_children(node).chomp + "* #{content}\n" + end + + def visit_definition_item(node) + # Check if term contains inline elements that render as ** (bold/strong) + # to avoid nesting issues like ****bold**** + term_has_bold = node.term_children&.any? do |child| + child.is_a?(ReVIEW::AST::InlineNode) && %i[b strong].include?(child.inline_type) + end + + # Handle definition term - use term_children (AST structure) + term = if node.term_children && !node.term_children.empty? + # Render term children (which contain inline elements) + node.term_children.map { |child| visit(child) }.join + else + '' # No term available + end + + # Handle definition content (all children are definition content) + definition_parts = node.children.map do |child| + visit(child) # Use visit instead of render_children for individual nodes + end + definition = definition_parts.join(' ').strip + + # Format term: if term contains bold inline elements, don't wrap in ** + if term_has_bold + # Term already has strong emphasis, use it as-is + "#{term}: #{definition}\n\n" + else + # Wrap plain term in bold + "**#{term}**: #{definition}\n\n" + end + end + + # Common code block rendering method used by all code block types + def render_code_block_common(node) + result = '' + lang = node.lang || '' + + # Add div wrapper with ID (if node has id) + if node.id && !node.id.empty? + list_id = normalize_id(node.id) + result += %Q(<div id="#{list_id}">\n\n) + end + + # Add caption if present + caption = render_caption_inline(node.caption_node) + if caption && !caption.empty? + result += "**#{caption}**\n\n" + end + + # Generate fenced code block + result += "```#{lang}\n" + + # Handle line numbers if needed + if node.line_numbers + code_content = render_children(node).chomp + lines = code_content.split("\n") + first_line_number = (node.respond_to?(:first_line_number) && node.first_line_number) || 1 + + lines.each_with_index do |line, i| + line_num = (first_line_number + i).to_s.rjust(3) + result += "#{line_num}: #{line}\n" + end + else + code_content = render_children(node) + # Remove trailing newline if present to avoid double newlines + code_content = code_content.chomp if code_content.end_with?("\n") + result += code_content + result += "\n" + end + + result += "```\n\n" + + # Close div wrapper if added + if node.id && !node.id.empty? + result += "</div>\n\n" + end + + result + end + + # Individual code block type visitors that delegate to common method + def visit_code_block_list(node) + render_code_block_common(node) + end + + def visit_code_block_listnum(node) + render_code_block_common(node) + end + + def visit_code_block_emlist(node) + render_code_block_common(node) + end + + def visit_code_block_emlistnum(node) + render_code_block_common(node) + end + + def visit_code_block_cmd(node) + render_code_block_common(node) + end + + def visit_code_block_source(node) + render_code_block_common(node) + end + + def visit_code_line(node) + render_children(node) + "\n" + end + + def visit_table(node) + @table_rows = [] + @table_header_count = 0 + + # Add div wrapper with ID if present + id_attr = node.id ? %Q( id="#{normalize_id(node.id)}") : '' + result = "<div#{id_attr}>\n\n" + + # Add caption if present + caption = render_caption_inline(node.caption_node) + result += "**#{caption}**\n\n" unless caption.empty? + + # Process table content + render_children(node) + + # Generate markdown table + if @table_rows.any? + result += generate_markdown_table + end + + result += "\n</div>\n\n" + result + end + + def visit_table_row(node) + cells = [] + node.children.each do |cell| + cell_content = render_children(cell).gsub('|', '\\|') + # Skip separator rows (rows that contain only dashes) + unless /^-+$/.match?(cell_content.strip) + cells << cell_content + end + end + + # Only add non-empty rows + if cells.any? { |cell| !cell.strip.empty? } + @table_rows << cells + @table_header_count = [@table_header_count, cells.length].max if @table_rows.length == 1 + end + '' + end + + def visit_table_cell(node) + render_children(node) + end + + def visit_image(node) + # Use node.id as the image path, get path from chapter if image is bound + image_path = begin + if @chapter&.image_bound?(node.id) + @chapter.image(node.id).path + else + node.id + end + rescue StandardError + # If image lookup fails (e.g., incomplete book structure), use node.id + node.id + end + + caption = render_caption_inline(node.caption_node) + + # Remove ./ prefix if present + image_path = image_path.sub(%r{\A\./}, '') + + # Generate markdown image syntax + "![#{caption}](#{image_path})\n\n" + end + + def visit_minicolumn(node) + result = +'' + + # Use HTML div for minicolumns as Markdown doesn't have native support + css_class = node.minicolumn_type.to_s + + result += %Q(<div class="#{css_class}">\n\n) + + caption = render_caption_inline(node.caption_node) + result += "**#{caption}**\n\n" unless caption.empty? + + result += render_children(node) + result += "\n</div>\n\n" + + result + end + + # visit_block is now handled by Base renderer with dynamic method dispatch + + def visit_block_quote(node) + content = render_children(node).chomp + lines = content.split("\n") + quoted_lines = lines.map { |line| "> #{line}" } + "#{quoted_lines.join("\n")}\n\n" + end + + def visit_block_centering(node) + # Use HTML div for centering in Markdown + content = render_children(node) + "<div style=\"text-align: center;\">\n\n#{content}\n</div>\n\n" + end + + def visit_block_flushright(node) + # Use HTML div for right alignment in Markdown + content = render_children(node) + "<div style=\"text-align: right;\">\n\n#{content}\n</div>\n\n" + end + + def visit_block_captionblock(node) + # Use HTML div for caption blocks + result = %Q(<div class="captionblock">\n\n) + result += render_children(node) + result += "\n</div>\n\n" + result + end + + def visit_embed(node) + # Handle //raw and @<raw> commands with target builder specification + if node.targeted_for?('markdown') + content = node.content || '' + # Convert \n to actual newlines + content.gsub('\\n', "\n") + else + '' + end + end + + def visit_column(node) + result = +'' + + # Use HTML div for columns as Markdown doesn't have native support + css_class = node.column_type.to_s + + result += %Q(<div class="#{css_class}">\n\n) + + caption = render_caption_inline(node.caption_node) + result += "**#{caption}**\n\n" unless caption.empty? + + result += render_children(node) + result += "\n</div>\n\n" + + result + end + + def visit_block_lead(node) + # Lead paragraphs - render as regular paragraphs in Markdown + render_children(node) + "\n" + end + + def visit_block_bibpaper(node) + # Bibliography entries - render as list items + result = +'' + + # Get ID and caption + bib_id = node.id || '' + caption = render_caption_inline(node.caption_node) + + # Format as markdown list item with ID + result += "* **[#{bib_id}]** #{caption}\n" unless caption.empty? + + # Add content if any + content = render_children(node) + result += " #{content.gsub("\n", "\n ")}\n" unless content.strip.empty? + + result + "\n" + end + + def visit_block_blankline(_node) + # Blank line directive - render as double newline + "\n\n" + end + + def visit_block_hr(_node) + # Horizontal rule - render as Markdown horizontal line + "---\n\n" + end + + def visit_tex_equation(node) + # LaTeX equation block - render as math code block + content = node.content.strip + result = +'' + + if node.id? && node.caption? + # With ID and caption + caption = render_caption_inline(node.caption_node) + result += "**#{caption}**\n\n" unless caption.empty? + end + + # Render equation in display math mode ($$...$$) + result += "$$\n#{content}\n$$\n\n" + result + end + + def render_inline_element(type, content, node) + method_name = "render_inline_#{type}" + if respond_to?(method_name, true) + send(method_name, type, content, node) + else + # Fallback for unknown inline elements: render as plain text + # This allows graceful degradation for specialized elements + ReVIEW.logger.warn("Unknown inline element: @<#{type}>{...} - rendering as plain text") + content + end + end + + def render_caption_inline(caption_node) + return '' unless caption_node + + # Use inline spacing for captions as well + content = render_children_with_inline_spacing(caption_node) + # Join lines like visit_paragraph does + lines = content.split("\n") + lines.join(' ') + end + + def visit_footnote(node) + footnote_id = normalize_id(node.id) + content = render_children(node) + + # Use Markdown standard footnote definition notation + "[^#{footnote_id}]: #{content}\n\n" + end + + def visit_endnote(node) + # Endnote definition - treat similar to footnotes + endnote_id = node.id + content = render_children(node) + + "[^#{endnote_id}]: #{content}\n\n" + end + + def visit_block_label(node) + # Label definition for cross-references - render as HTML anchor + # Label ID is stored in args[0], not in node.id + label_id = node.args&.first + return '' unless label_id + + "<a id=\"#{normalize_id(label_id)}\"></a>\n\n" + end + + def visit_block_printendnotes(_node) + # Directive to print endnotes - in Markdown, endnotes are collected automatically + # Just output a horizontal rule or section marker + "---\n\n**Endnotes**\n\n" + end + + def visit_text(node) + node.content || '' + end + + def visit_reference(node) + node.content || '' + end + + def render_inline_b(_type, content, _node) + "**#{escape_asterisks(content)}**" + end + + def render_inline_strong(_type, content, _node) + "**#{escape_asterisks(content)}**" + end + + def render_inline_i(_type, content, _node) + "*#{escape_asterisks(content)}*" + end + + def render_inline_em(_type, content, _node) + "*#{escape_asterisks(content)}*" + end + + def render_inline_code(_type, content, _node) + "`#{content}`" + end + + def render_inline_tt(_type, content, _node) + "`#{content}`" + end + + def render_inline_ttb(_type, content, _node) + # Bold + monospace: **`content`** + "**`#{content}`**" + end + + def render_inline_ttbold(type, content, node) + # Alias for ttb + render_inline_ttb(type, content, node) + end + + def render_inline_tti(_type, content, _node) + # Italic + monospace: *`content`* + "*`#{content}`*" + end + + def render_inline_kbd(_type, content, _node) + "`#{content}`" + end + + def render_inline_samp(_type, content, _node) + "`#{content}`" + end + + def render_inline_var(_type, content, _node) + "*#{escape_asterisks(content)}*" + end + + def render_inline_sup(_type, content, _node) + "<sup>#{escape_content(content)}</sup>" + end + + def render_inline_sub(_type, content, _node) + "<sub>#{escape_content(content)}</sub>" + end + + def render_inline_del(_type, content, _node) + "~~#{content}~~" + end + + def render_inline_ins(_type, content, _node) + "<ins>#{escape_content(content)}</ins>" + end + + def render_inline_u(_type, content, _node) + "<u>#{escape_content(content)}</u>" + end + + def render_inline_br(_type, _content, _node) + "\n" + end + + def render_inline_raw(_type, _content, node) + node.targeted_for?('markdown') ? (node.content || '') : '' + end + + def render_inline_embed(_type, _content, node) + node.targeted_for?('markdown') ? (node.content || '') : '' + end + + def render_inline_chap(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + chapter_num = text_formatter.format_chapter_number_full(data.chapter_number, data.chapter_type) + chapter_id = data.item_id + + # Generate HTML link (same as HtmlRenderer) + %Q(<a href="./#{chapter_id}.html">#{escape_content(chapter_num.to_s)}</a>) + end + + def render_inline_title(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + title = data.chapter_title || '' + chapter_id = data.item_id + + # Generate HTML link with title + %Q(<a href="./#{chapter_id}.html">#{escape_content(title)}</a>) + end + + def render_inline_chapref(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + display_str = text_formatter.format_reference(:chapter, data) + chapter_id = data.item_id + + # Generate HTML link with full chapter reference + %Q(<a href="./#{chapter_id}.html">#{escape_content(display_str)}</a>) + end + + def render_inline_list(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text = text_formatter.format_reference_text(:list, data) + wrap_reference_with_html(text, data, 'listref') + end + + def render_inline_img(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text = text_formatter.format_reference_text(:image, data) + wrap_reference_with_html(text, data, 'imgref') + end + + def render_inline_icon(_type, content, node) + if node.args.first + image_path = node.args.first + image_path = image_path.sub(%r{\A\./}, '') + "![](#{image_path})" + else + "![](#{content})" + end + end + + def render_inline_table(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text = text_formatter.format_reference_text(:table, data) + wrap_reference_with_html(text, data, 'tableref') + end + + def render_inline_fn(_type, _content, node) + ref_node = node.children.first + + # Handle both resolved and unresolved references + if ref_node.reference_node? && ref_node.resolved? + data = ref_node.resolved_data + fn_id = normalize_id(data.item_id) + elsif ref_node.reference_node? + # Unresolved reference - use the ref_id directly + fn_id = ref_node.ref_id + elsif node.args.any? + # Fallback to args if available + fn_id = node.args.first + end + + # Use Markdown standard footnote notation + "[^#{fn_id}]" + end + + def render_inline_endnote(_type, content, node) + # Endnote references - treat similar to footnotes + if node.args.first + endnote_id = node.args.first + "[^#{endnote_id}]" + else + "[^#{content}]" + end + end + + def render_inline_bib(_type, _content, node) + # Bibliography reference + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + bib_number = data.item_number + # Format as [number] like other builders + "[#{bib_number}]" + end + + def render_inline_kw(_type, content, node) + if node.args.length >= 2 + word = node.args[0] + alt = node.args[1] + "**#{escape_asterisks(word)}** (#{escape_content(alt)})" + else + "**#{escape_asterisks(content)}**" + end + end + + def render_inline_bou(_type, content, _node) + "*#{escape_asterisks(content)}*" + end + + def render_inline_ami(_type, content, _node) + "*#{escape_asterisks(content)}*" + end + + def render_inline_href(_type, content, node) + args = node.args || [] + if args.length >= 2 + # @<href>{url,text} format + url = args[0] + text = args[1] + "[#{escape_content(text)}](#{url})" + elsif args.length == 1 + # @<href>{url} format - use URL as both text and href + url = args[0] + "[#{escape_content(url)}](#{url})" + else + # Fallback to content + "[#{escape_content(content)}](#{content})" + end + end + + def render_inline_ruby(_type, content, node) + if node.args.length >= 2 + base = node.args[0] + ruby = node.args[1] + "<ruby>#{escape_content(base)}<rt>#{escape_content(ruby)}</rt></ruby>" + else + escape_content(content) + end + end + + def render_inline_m(_type, content, _node) + "$$#{content}$$" + end + + def render_inline_idx(_type, content, _node) + escape_content(content) + end + + def render_inline_hidx(_type, _content, _node) + '' + end + + def render_inline_comment(_type, content, _node) + if @book&.config&.[]('draft') + "<!-- #{escape_content(content)} -->" + else + '' + end + end + + def render_inline_hd(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + n = data.headline_number + chapter_num = text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + + # Render caption with inline markup + caption_html = if data.caption_node + render_children(data.caption_node) + else + escape_content(data.caption_text) + end + + # Build full section number + full_number = if n.present? && chapter_num && !chapter_num.to_s.empty? && over_secnolevel?(n) + ([chapter_num] + n).join('.') + end + + str = text_formatter.format_headline_quote(full_number, caption_html) + + # Generate HTML link if section number exists + if full_number + chapter_id = data.chapter_id || @chapter.id + anchor = 'h' + full_number.tr('.', '-') + %Q(<a href="#{chapter_id}.html##{anchor}">#{str}</a>) + else + str + end + end + + def render_inline_column(_type, _content, node) + # Column reference + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + + # Use TextFormatter to format column reference (e.g., "コラム「タイトル」") + text_formatter.format_reference_text(:column, data) + end + + def render_inline_sec(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + n = data.headline_number + chapter_num = text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + + # Build full section number + full_number = if n.present? && chapter_num && !chapter_num.to_s.empty? && over_secnolevel?(n) + ([chapter_num] + n).join('.') + else + '' + end + + # Generate HTML link if section number exists + if full_number.present? + chapter_id = data.chapter_id || @chapter.id + anchor = 'h' + full_number.tr('.', '-') + %Q(<a href="#{chapter_id}.html##{anchor}">#{escape_content(full_number)}</a>) + else + escape_content(full_number) + end + end + + def render_inline_secref(_type, _content, node) + # secref is usually same as sec + render_inline_sec(nil, nil, node) + end + + def render_inline_sectitle(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + + # Render title with inline markup + title_html = if data.caption_node + render_children(data.caption_node) + else + escape_content(data.caption_text) + end + + # Generate HTML link + n = data.headline_number + chapter_num = text_formatter.format_chapter_number_short(data.chapter_number, data.chapter_type) + full_number = ([chapter_num] + n).join('.') + anchor = 'h' + full_number.tr('.', '-') + chapter_id = data.chapter_id || @chapter.id + %Q(<a href="#{chapter_id}.html##{anchor}">#{title_html}</a>) + end + + def render_inline_labelref(_type, content, _node) + escape_content(content) + end + + def render_inline_ref(_type, content, _node) + escape_content(content) + end + + def render_inline_pageref(_type, content, _node) + escape_content(content) + end + + def render_inline_w(_type, content, _node) + # Dictionary lookup for word substitution + dictionary = @book&.config&.[]('dictionary') || {} + translated = dictionary[content] + escape_content(translated || "[missing word: #{content}]") + end + + def render_inline_wb(_type, content, _node) + # Dictionary lookup with bold formatting + dictionary = @book&.config&.[]('dictionary') || {} + word_content = dictionary[content] || "[missing word: #{content}]" + "**#{escape_asterisks(word_content)}**" + end + + def render_inline_uchar(_type, content, _node) + # Convert hex code to Unicode character + [content.to_i(16)].pack('U').force_encoding('UTF-8') + end + + # Helper methods + def escape_content(str) + escape(str) + end + + def escape_asterisks(str) + str.gsub('*', '\\*') + end + + private + + # Render children with spacing between adjacent inline elements + # This prevents Markdown parsing issues when inline elements are adjacent + # + # Rules: + # - Same type adjacent inlines are merged: @<b>{a}@<b>{b} → **ab** + # - Different type adjacent inlines get space: @<b>{a}@<i>{b} → **a** *b* + def render_children_with_inline_spacing(node) + return '' if node.children.empty? + + # Group consecutive inline nodes of the same type + groups = group_inline_nodes(node.children) + + result = +'' + prev_group_was_inline = false + + groups.each do |group| + if group[:type] == :inline_group + # Add space if previous group was also inline (but different type) + result += ' ' if prev_group_was_inline + + # Merge same-type inline nodes and render together + merged_content = group[:nodes].map { |n| render_children(n) }.join + inline_type = group[:inline_type] + + # Render the merged content as a single inline element + result += render_inline_element(inline_type, merged_content, group[:nodes].first) + + prev_group_was_inline = true + else + # Regular nodes (text, etc.) - just render normally + group[:nodes].each do |n| + result += visit(n) + end + prev_group_was_inline = false + end + end + + result + end + + # Group consecutive inline nodes by type + # Returns array of groups: [{type: :inline_group, inline_type: 'b', nodes: [...]}, ...] + def group_inline_nodes(children) + groups = [] + current_group = nil + + children.each do |child| + if child.is_a?(ReVIEW::AST::InlineNode) + inline_type = child.inline_type + + # Start new group if type changed or first inline + if current_group.nil? || current_group[:type] != :inline_group || current_group[:inline_type] != inline_type + # Save previous group if exists + groups << current_group if current_group + + # Start new inline group + current_group = { + type: :inline_group, + inline_type: inline_type, + nodes: [child] + } + else + # Add to current group (same type) + current_group[:nodes] << child + end + else + # Non-inline node + # Save previous inline group if exists + if current_group && current_group[:type] == :inline_group + groups << current_group + current_group = nil + end + + # Start or continue regular node group + if current_group.nil? || current_group[:type] != :regular + groups << current_group if current_group + current_group = { type: :regular, nodes: [child] } + else + current_group[:nodes] << child + end + end + end + + # Don't forget the last group + groups << current_group if current_group + + groups + end + + def generate_markdown_table + return '' if @table_rows.empty? + + result = +'' + + # Header row + header = @table_rows.first + result += "| #{header.join(' | ')} |\n" + + # Separator row + separators = header.map { ':--' } + result += "| #{separators.join(' | ')} |\n" + + # Data rows + @table_rows[1..-1]&.each do |row| + # Pad row to match header length + padded_row = row + ([''] * (@table_header_count - row.length)) + result += "| #{padded_row.join(' | ')} |\n" + end + + result + end + + # Get text formatter for reference formatting + def text_formatter + @text_formatter ||= ReVIEW::TextUtils.new(@chapter) + end + + # Wrap reference with HTML span and link (same as HtmlRenderer) + def wrap_reference_with_html(text, data, css_class) + escaped_text = escape_content(text) + chapter_id = data.chapter_id || @chapter&.id + item_id = normalize_id(data.item_id) + + # Generate HTML with span and link + %Q(<span class="#{css_class}"><a href="./#{chapter_id}.html##{item_id}">#{escaped_text}</a></span>) + end + + # Check if section number should be displayed (based on secnolevel) + def over_secnolevel?(n) + secnolevel = config['secnolevel'] || 0 + # Section level = chapter level (1) + n.size + section_level = n.is_a?(::Array) ? (1 + n.size) : (1 + n.to_s.split('.').size) + secnolevel >= section_level + end + end + end +end diff --git a/lib/review/renderer/plaintext_renderer.rb b/lib/review/renderer/plaintext_renderer.rb new file mode 100644 index 000000000..7d4bcf15f --- /dev/null +++ b/lib/review/renderer/plaintext_renderer.rb @@ -0,0 +1,702 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/textutils' +require 'review/loggable' +require_relative 'base' + +module ReVIEW + module Renderer + class PlaintextRenderer < Base + include ReVIEW::TextUtils + include ReVIEW::Loggable + + def initialize(chapter) + super + @blank_seen = true + @ol_num = nil + @logger = ReVIEW.logger + end + + # Format type for this renderer + # @return [Symbol] Format type :text + def format_type + :text + end + + def target_name + 'plaintext' + end + + def visit_document(node) + render_children(node) + end + + def visit_headline(node) + level = node.level + caption = render_caption_inline(node.caption_node) + + # Get headline prefix like PLAINTEXTBuilder + prefix = headline_prefix(level) + "#{prefix}#{caption}\n" + end + + def visit_paragraph(node) + content = render_children(node) + # Join lines to single paragraph like PLAINTEXTBuilder's join_lines_to_paragraph + lines = content.split("\n") + result = lines.join + "#{result}\n" + end + + def visit_list(node) + result = +'' + + case node.list_type + when :ul + node.children.each do |item| + result += visit_list_item(item, :ul) + end + when :ol + # Reset ol counter + @ol_num = node.start_number || 1 + node.children.each do |item| + result += visit_list_item(item, :ol) + @ol_num += 1 + end + @ol_num = nil + when :dl + node.children.each do |item| + result += visit_definition_item(item) + end + else + raise NotImplementedError, "PlaintextRenderer does not support list_type #{node.list_type}." + end + + "\n#{result}\n" + end + + def visit_list_item(node, type = :ul) + content = render_children(node) + # Remove paragraph newlines and join + text = content.gsub(/\n+/, ' ').strip + + case type + when :ul + "#{text}\n" + when :ol + "#{@ol_num} #{text}\n" + end + end + + def visit_definition_item(node) + # Handle definition term + term = if node.term_children && !node.term_children.empty? + node.term_children.map { |child| visit(child) }.join + else + '' + end + + # Handle definition content + definition_parts = node.children.map { |child| visit(child) } + definition = definition_parts.join.delete("\n") + + "#{term}\n#{definition}\n" + end + + # Numbered code block (listnum, emlistnum) + def render_numbered_code_block(node) + result = +'' + caption = render_caption_inline(node.caption_node) + lines_content = render_children(node) + + lines = lines_content.split("\n") + lines.pop if lines.last && lines.last.empty? + + first_line_number = node.first_line_num || 1 + + result += "\n" if caption_top?('list') && !caption.empty? + result += "#{caption}\n" if caption_top?('list') && !caption.empty? + result += "\n" if caption_top?('list') && !caption.empty? + + lines.each_with_index do |line, i| + result += "#{(i + first_line_number).to_s.rjust(2)}: #{detab(line)}\n" + end + + result += "\n" unless caption_top?('list') + result += "#{caption}\n" unless caption_top?('list') || caption.empty? + result += "\n" + + result + end + + # Regular code block (emlist, cmd, source, etc.) + def render_regular_code_block(node) + result = +'' + caption = render_caption_inline(node.caption_node) + lines_content = render_children(node) + + result += "\n" if caption_top?('list') && !caption.empty? + result += "#{caption}\n" if caption_top?('list') && !caption.empty? + + lines_content.each_line do |line| + result += detab(line.chomp) + "\n" + end + + result += "#{caption}\n" unless caption_top?('list') || caption.empty? + result += "\n" + + result + end + + def visit_code_block_list(node) + result = +'' + caption = render_caption_inline(node.caption_node) + lines_content = render_children(node) + + result += "\n" if caption_top?('list') && !caption.empty? + result += generate_list_header(node.id, caption) + "\n" if caption_top?('list') && !caption.empty? + result += "\n" if caption_top?('list') && !caption.empty? + + lines_content.each_line do |line| + result += detab(line.chomp) + "\n" + end + + result += "\n" unless caption_top?('list') + result += generate_list_header(node.id, caption) + "\n" unless caption_top?('list') || caption.empty? + result += "\n" + + result + end + + def visit_code_block_listnum(node) + render_numbered_code_block(node) + end + + def visit_code_block_emlist(node) + render_regular_code_block(node) + end + + def visit_code_block_emlistnum(node) + render_numbered_code_block(node) + end + + def visit_code_block_cmd(node) + render_regular_code_block(node) + end + + def visit_code_block_source(node) + render_regular_code_block(node) + end + + def visit_code_line(node) + line_content = render_children(node) + # Add newline after each line + line_content + "\n" + end + + def visit_table(node) + result = +'' + + # Check if this is an imgtable + if node.table_type == :imgtable + return render_imgtable(node) + end + + # Add caption + caption = render_caption_inline(node.caption_node) + unless caption.empty? + result += "\n" + result += if node.id + generate_table_header(node.id, caption) + "\n" + else + "#{caption}\n" + end + result += "\n" if caption_top?('table') + end + + # Process table rows + all_rows = node.header_rows + node.body_rows + all_rows.each do |row| + result += visit_table_row(row) + end + + result += "\n" unless caption_top?('table') + result += "\n" + + result + end + + def visit_table_row(node) + cells = node.children.map { |cell| render_children(cell) } + cells.join("\t") + "\n" + end + + def visit_table_cell(node) + render_children(node) + end + + def visit_image(node) + result = +'' + caption = render_caption_inline(node.caption_node) + + result += "\n" + if node.id && @chapter + result += "#{text_formatter.format_caption_plain('image', get_chap, @chapter.image(node.id).number, caption)}\n" + else + result += "図 #{caption}\n" unless caption.empty? + end + result += "\n" + + result + end + + def visit_minicolumn(node) + result = +'' + caption = render_caption_inline(node.caption_node) + + result += "\n" + result += "#{caption}\n" unless caption.empty? + result += render_children(node) + result += "\n" + + result + end + + def visit_column(node) + result = +'' + caption = render_caption_inline(node.caption_node) + + result += "\n" + result += "#{caption}\n" unless caption.empty? + result += render_children(node) + result += "\n" + + result + end + + # visit_block is now handled by Base renderer with dynamic method dispatch + + def visit_block_quote(node) + result = +"\n" + result += render_children(node) + result += "\n" + result + end + + def visit_block_blockquote(node) + visit_block_quote(node) + end + + def visit_block_comment(_node) + # Comments are not rendered in plaintext + '' + end + + def visit_block_blankline(_node) + "\n" + end + + def visit_block_pagebreak(_node) + # Page breaks are not meaningful in plaintext + '' + end + + def visit_block_label(_node) + # Labels are not rendered + '' + end + + def visit_block_printendnotes(_node) + # Print all endnotes collected in the chapter + return '' unless @chapter + return '' if @chapter.endnotes.size == 0 + + result = +'' + @chapter.endnotes.each do |en| + # Format: (number) content + number = en.number + content_text = en.content || '' + result += "(#{number}) #{content_text}\n" + end + result + end + + def visit_block_tsize(_node) + # Table size control is not meaningful in plaintext + '' + end + + def visit_block_lead(node) + result = +"\n" + result += render_children(node) + result += "\n" + result + end + + alias_method :visit_block_read, :visit_block_lead + + def visit_block_flushright(node) + result = +"\n" + result += render_children(node) + result += "\n" + result + end + + def visit_block_centering(node) + result = +"\n" + result += render_children(node) + result += "\n" + result + end + + def visit_block_bibpaper(node) + visit_bibpaper_block(node) + end + + def visit_bibpaper_block(node) + id = node.args[0] + caption_text = node.args[1] + + result = +'' + if id && @chapter + bibpaper_number = @chapter.bibpaper(id).number + result += "#{bibpaper_number} " + end + result += "#{caption_text}\n" if caption_text + + content = render_children(node) + result += "#{content}\n" unless content.strip.empty? + + result + end + + def visit_generic_block(node) + result = +'' + caption = render_caption_inline(node.caption_node) if node.respond_to?(:caption_node) + + result += "\n" + result += "#{caption}\n" if caption && !caption.empty? + result += render_children(node) + result += "\n" + + result + end + + def visit_tex_equation(node) + result = +'' + content = node.content + + result += "\n" + + if node.id? && @chapter + caption = render_caption_inline(node.caption_node) + result += "#{text_formatter.format_caption_plain('equation', get_chap, @chapter.equation(node.id).number, caption)}\n" if caption_top?('equation') + end + + result += "#{content}\n" + + if node.id? && @chapter + caption = render_caption_inline(node.caption_node) + result += "#{text_formatter.format_caption_plain('equation', get_chap, @chapter.equation(node.id).number, caption)}\n" unless caption_top?('equation') + end + + result += "\n" + result + end + + def render_inline_element(type, content, node) + method_name = "render_inline_#{type}" + if respond_to?(method_name, true) + send(method_name, type, content, node) + else + # For unknown inline elements (typically reference types), return content as-is + # Reference types (list, img, table, hd, column, etc.) have their content already resolved + content || '' + end + end + + def visit_text(node) + node.content || '' + end + + def visit_reference(node) + node.content || '' + end + + def visit_footnote(node) + footnote_id = node.id + content = render_children(node) + footnote_number = @chapter&.footnote(footnote_id)&.number || '??' + + "注#{footnote_number} #{content}\n" + end + + def visit_embed(node) + # Check if content should be output for this renderer + return '' unless node.targeted_for?('plaintext') || node.targeted_for?('text') + + # Get content + content = node.content || '' + + # Process \n based on embed type + case node.embed_type + when :inline, :raw + # For inline and raw embeds, convert \\n to actual newlines + content = content.gsub('\\n', "\n") + end + + # For block embeds, add trailing newline + node.embed_type == :block ? content + "\n" : content + end + + # Inline rendering methods + def render_inline_fn(_type, _content, node) + fn_id = node.target_item_id + return '' unless fn_id && @chapter + + footnote_number = @chapter.footnote(fn_id).number + " 注#{footnote_number} " + rescue ReVIEW::KeyError + '' + end + + def render_inline_kw(_type, _content, node) + if node.args.length >= 2 + word = node.args[0] + alt = node.args[1].strip + "#{word}(#{alt})" + else + node.args.first || '' + end + end + + def render_inline_href(_type, _content, node) + args = node.args || [] + if args.length >= 2 + url = args[0] + label = args[1] + "#{label}(#{url})" + else + args.first || '' + end + end + + def render_inline_ruby(_type, _content, node) + # Ruby base text only, ignore ruby annotation + node.args.first || '' + end + + def render_inline_br(_type, _content, _node) + "\n" + end + + def render_inline_raw(_type, _content, node) + # Convert \n to actual newlines like PLAINTEXTBuilder + if node.targeted_for?('plaintext') || node.targeted_for?('text') + (node.content || '').gsub('\\n', "\n") + else + '' + end + end + + def render_inline_embed(_type, _content, node) + # Convert \n to actual newlines like PLAINTEXTBuilder + if node.targeted_for?('plaintext') || node.targeted_for?('text') + (node.content || '').gsub('\\n', "\n") + else + '' + end + end + + def render_inline_hidx(_type, _content, _node) + '' + end + + def render_inline_icon(_type, _content, _node) + '' + end + + def render_inline_comment(_type, _content, _node) + '' + end + + def render_inline_balloon(_type, content, _node) + "←#{content}" + end + + def render_inline_uchar(_type, content, _node) + [content.to_i(16)].pack('U') + end + + def render_inline_bib(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + data.item_number.to_s + end + + def render_inline_hd(_type, _content, node) + # Headline reference + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text_formatter.format_reference(:headline, data) + end + + def render_inline_labelref(_type, _content, _node) + '●' + end + + alias_method :render_inline_ref, :render_inline_labelref + + def render_inline_pageref(_type, _content, _node) + '●ページ' + end + + def render_inline_chap(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text_formatter.format_chapter_number_full(data.chapter_number, data.chapter_type).to_s + end + + def render_inline_chapref(_type, _content, node) + ref_node = node.children.first + unless ref_node.reference_node? && ref_node.resolved? + raise 'BUG: Reference should be resolved at AST construction time' + end + + data = ref_node.resolved_data + text_formatter.format_reference(:chapter, data) + end + + def render_inline_default(_type, content, _node) + content + end + + # Default inline rendering - just return content + alias_method :render_inline_b, :render_inline_default + alias_method :render_inline_strong, :render_inline_default + alias_method :render_inline_i, :render_inline_default + alias_method :render_inline_em, :render_inline_default + alias_method :render_inline_tt, :render_inline_default + alias_method :render_inline_code, :render_inline_default + alias_method :render_inline_ttb, :render_inline_default + alias_method :render_inline_ttbold, :render_inline_default + alias_method :render_inline_tti, :render_inline_default + alias_method :render_inline_ttibold, :render_inline_default + alias_method :render_inline_u, :render_inline_default + alias_method :render_inline_bou, :render_inline_default + alias_method :render_inline_keytop, :render_inline_default + alias_method :render_inline_m, :render_inline_default + alias_method :render_inline_ami, :render_inline_default + alias_method :render_inline_sup, :render_inline_default + alias_method :render_inline_sub, :render_inline_default + alias_method :render_inline_hint, :render_inline_default + alias_method :render_inline_maru, :render_inline_default + alias_method :render_inline_idx, :render_inline_default + alias_method :render_inline_ins, :render_inline_default + alias_method :render_inline_del, :render_inline_default + alias_method :render_inline_tcy, :render_inline_default + + # Helper methods + def render_caption_inline(caption_node) + return '' unless caption_node + + content = render_children(caption_node) + # Join lines like visit_paragraph does + lines = content.split("\n") + lines.join + end + + def headline_prefix(level) + return '' unless @chapter + return '' unless config['secnolevel'] && config['secnolevel'] > 0 + + # Generate headline prefix like PLAINTEXTBuilder + case level + when 1 + if @chapter.number + "第#{@chapter.number}章 " + else + '' + end + when 2, 3, 4, 5 + # For subsections, use section counter if available + '' + else # rubocop:disable Lint/DuplicateBranch + '' + end + end + + def generate_list_header(id, caption) + return caption unless id && @chapter + + list_item = @chapter.list(id) + text_formatter.format_caption_plain('list', get_chap, list_item.number, caption) + rescue ReVIEW::KeyError + caption + end + + def generate_table_header(id, caption) + return caption unless id && @chapter + + table_item = @chapter.table(id) + text_formatter.format_caption_plain('table', get_chap, table_item.number, caption) + rescue ReVIEW::KeyError + caption + end + + def render_imgtable(node) + result = +'' + caption = render_caption_inline(node.caption_node) + + result += "\n" + if node.id && !caption.empty? + result += generate_table_header(node.id, caption) + "\n" + result += "\n" + end + result += "\n" + + result + end + + def get_chap(chapter = @chapter) + return nil unless chapter + return nil unless config['secnolevel'] && config['secnolevel'] > 0 + return nil if chapter.number.nil? || chapter.number.to_s.empty? + + if chapter.is_a?(ReVIEW::Book::Part) + text_formatter.format_part_short(chapter) + else + chapter.format_number(nil) + end + end + + def over_secnolevel?(n, _chapter = @chapter) + secnolevel = config['secnolevel'] || 0 + secnolevel >= n.to_s.split('.').size + end + + def escape(str) + # Plaintext doesn't need escaping + str.to_s + end + end + end +end diff --git a/lib/review/renderer/rendering_context.rb b/lib/review/renderer/rendering_context.rb new file mode 100644 index 000000000..93c6c20a1 --- /dev/null +++ b/lib/review/renderer/rendering_context.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require_relative 'footnote_collector' + +module ReVIEW + module Renderer + # RenderingContext - Manages rendering state and context for AST renderers + # + # This class provides automatic scope management for rendering contexts, + # replacing the manual @doc_status flag management with a cleaner, + # context-aware approach. + # + # Key responsibilities: + # - Track current rendering context (table, caption, minicolumn, etc.) + # - Manage parent-child context relationships for nested structures + # - Determine when footnotes require special handling (footnotetext vs footnote) + # - Collect and process footnotes within problematic contexts + # - Provide automatic cleanup when contexts end + class RenderingContext + attr_reader :context_type, :parent_context, :footnote_collector + + # Context types that require footnotetext instead of direct footnote + FOOTNOTETEXT_REQUIRED_CONTEXTS = %i[table caption minicolumn column dt].freeze + + def initialize(context_type, parent_context = nil) + @context_type = context_type + @parent_context = parent_context + @footnote_collector = FootnoteCollector.new + end + + # Determines if footnotes in this context require footnotetext handling + # @return [Boolean] true if footnotetext is required + def requires_footnotetext? + footnotetext_context? || parent_requires_footnotetext? + end + + # Check if this specific context requires footnotetext + # @return [Boolean] true if this context type requires footnotetext + def footnotetext_context? + FOOTNOTETEXT_REQUIRED_CONTEXTS.include?(@context_type) + end + + # Create and yield a child context, ensuring proper cleanup + # @param child_type [Symbol] the type of child context + # @yield [RenderingContext] the child context + # @return [Object] the result of the block + def with_child_context(child_type) + child_context = RenderingContext.new(child_type, self) + + yield(child_context) + end + + # Add a footnote to this context's collector + # @param footnote_node [AST::FootnoteNode] the footnote node + # @param footnote_number [Integer] the footnote number + def collect_footnote(footnote_node, footnote_number) + @footnote_collector.add(footnote_node, footnote_number) + end + + # Get a string representation for debugging + # @return [String] string representation + def to_s + context_chain = ancestors.map(&:context_type) + "RenderingContext[#{context_chain.join(' > ')}]" + end + + # Get all ancestors (including self) in order from root to current + # @return [Array<RenderingContext>] array of contexts + def ancestors + Enumerator.produce(self, &:parent_context).take_while(&:itself).reverse + end + + private + + # Check if parent context requires footnotetext + # @return [Boolean] true if any parent requires footnotetext + def parent_requires_footnotetext? + @parent_context&.requires_footnotetext? || false + end + end + end +end diff --git a/lib/review/renderer/text_formatter.rb b/lib/review/renderer/text_formatter.rb new file mode 100644 index 000000000..5920347f4 --- /dev/null +++ b/lib/review/renderer/text_formatter.rb @@ -0,0 +1,512 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/i18n' + +module ReVIEW + module Renderer + # TextFormatter - Centralized text formatting and I18n service + # + # This class consolidates all text formatting and internationalization logic + # that was previously scattered across Renderer, InlineElementHandler, Formatter, + # and ResolvedData classes. + # + # Design principles: + # - Single responsibility: All I18n and text generation in one place + # - Format-agnostic core with format-specific decorations + # - Reusable from Renderer, InlineElementHandler, and ResolvedData + class TextFormatter + attr_reader :config, :chapter + + # Initialize formatter + # @param config [Hash] Configuration hash + # @param chapter [Chapter, nil] Current chapter (optional, used for HTML reference links) + def initialize(config:, chapter: nil) + @config = config + @chapter = chapter + end + + # Format a numbered item's caption for HTML/LaTeX (e.g., "図1.1: キャプション") + # Uses format_number_header (with colon) + caption_prefix + # @param label_key [String] I18n key for the label (e.g., 'image', 'table', 'list') + # @param chapter_number [String, nil] Chapter number (e.g., "第1章") + # @param item_number [Integer] Item number within chapter + # @param caption_text [String, nil] Caption text + # @return [String] Formatted caption + def format_caption(label_key, chapter_number, item_number, caption_text = nil) + label = I18n.t(label_key) + number_text = format_number_header(chapter_number, item_number) + separator = I18n.t('caption_prefix') + + base = "#{label}#{number_text}" + return base if caption_text.nil? || caption_text.empty? + + "#{base}#{separator}#{caption_text}" + end + + # Format a numbered item's caption for IDGXML/TOP/TEXT (e.g., "図1.1 キャプション") + # Uses format_number (without colon) + caption_separator + # @param label_key [String] I18n key for the label (e.g., 'image', 'table', 'list') + # @param chapter_number [String, nil] Chapter number (e.g., "第1章") + # @param item_number [Integer] Item number within chapter + # @param caption_text [String, nil] Caption text + # @return [String] Formatted caption + def format_caption_plain(label_key, chapter_number, item_number, caption_text = nil) + label = I18n.t(label_key) + number_text = format_number(chapter_number, item_number) + separator = caption_separator + + base = "#{label}#{number_text}" + return base if caption_text.nil? || caption_text.empty? + + "#{base}#{separator}#{caption_text}" + end + + # Format just the number part (e.g., "1.1" or "1") + # @param chapter_number [String, nil] Chapter number + # @param item_number [Integer] Item number + # @return [String] Formatted number + def format_number(chapter_number, item_number) + if chapter_number && !chapter_number.to_s.empty? + I18n.t('format_number', [chapter_number, item_number]) + else + I18n.t('format_number_without_chapter', [item_number]) + end + end + + # Format number for caption header (HTML/LaTeX style) + # Used in block elements (//image, //table, //list, //equation) caption headers + # @param chapter_number [String, nil] Chapter number + # @param item_number [Integer] Item number + # @return [String] Formatted number for header + def format_number_header(chapter_number, item_number) + if chapter_number && !chapter_number.to_s.empty? + I18n.t('format_number_header', [chapter_number, item_number]) + else + I18n.t('format_number_header_without_chapter', [item_number]) + end + end + + # Format a reference as plain text (without format-specific decorations) + # This method returns pure text suitable for wrapping with HTML tags, LaTeX commands, etc. + # @param type [Symbol] Reference type (:image, :table, :list, :equation, etc.) + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference (e.g., "図1.1", "表2.3") + def format_reference_text(type, data) + case type + when :image + format_numbered_reference_text('image', data) + when :table + format_numbered_reference_text('table', data) + when :list + format_numbered_reference_text('list', data) + when :equation + format_numbered_reference_text('equation', data) + when :footnote + format_footnote_reference_text(data) + when :endnote + format_endnote_reference_text(data) + when :chapter + format_chapter_reference_text(data) + when :headline + format_headline_reference_text(data) + when :column + format_column_reference_text(data) + when :bibpaper + format_bibpaper_reference_text(data) + when :word + data.word_content.to_s + else + raise ArgumentError, "Unknown reference type: #{type}" + end + end + + # Format a reference to an item (with format-specific decorations) + # Used by LaTeX, IDGXML, TOP, and TEXT renderers + # @param type [Symbol] Reference type (:image, :table, :list, :equation, etc.) + # @param data [ResolvedData] Resolved reference data + # @return [String] Formatted reference + def format_reference(type, data) + case type + when :image + format_image_reference(data) + when :table + format_table_reference(data) + when :list + format_list_reference(data) + when :equation + format_equation_reference(data) + when :footnote + format_footnote_reference(data) + when :endnote + format_endnote_reference(data) + when :chapter + format_chapter_reference(data) + when :headline + format_headline_reference(data) + when :column + format_column_reference(data) + when :bibpaper + format_bibpaper_reference(data) + when :word + format_word_reference(data) + else + raise ArgumentError, "Unknown reference type: #{type}" + end + end + + # Format chapter number with I18n (long form, e.g., "第1章", "Appendix A", "Part I") + # Used for @<chap>, @<chapref>, @<title> references + # @param raw_number [Integer, nil] Raw chapter number from chapter.number + # @param chapter_type [Symbol, nil] Chapter type (:chapter, :appendix, :part, :predef) + # @return [String] Formatted chapter number + def format_chapter_number_full(raw_number, chapter_type) + return '' unless raw_number + + case chapter_type + when :chapter + I18n.t('chapter', raw_number) + when :appendix + I18n.t('appendix', raw_number) + when :part + I18n.t('part', raw_number) + else # :predef and others + raw_number.to_s + end + end + + # Format chapter number without heading (short form, e.g., "1", "A", "I") + # Used for figure/table/list references where format is "図2.1" not "図第2章.1" + # Matches Chapter#format_number(false) behavior + # @param raw_number [Integer, nil] Raw chapter number from chapter.number + # @param chapter_type [Symbol, nil] Chapter type (:chapter, :appendix, :part, :predef) + # @return [String] Short form chapter number + def format_chapter_number_short(raw_number, chapter_type) + return '' unless raw_number + + case chapter_type + when :chapter, :part, :predef + # For chapters, parts, and predef: just return the number as-is + raw_number.to_s + when :appendix + # For appendix: extract format from 'appendix' I18n key and create 'appendix_without_heading' + # This replicates the logic from Chapter#format_number(false) + i18n_appendix = I18n.get('appendix') + fmt = i18n_appendix.scan(/%\w{1,3}/).first || '%s' + I18n.update('appendix_without_heading' => fmt) + I18n.t('appendix_without_heading', raw_number) + else # rubocop:disable Lint/DuplicateBranch + raw_number.to_s + end + end + + # Format footnote reference mark + # @param number [Integer] Footnote number + # @return [String] Formatted footnote mark + def format_footnote_mark(number) + I18n.t('html_footnote_refmark', number) + end + + # Format endnote reference mark + # @param number [Integer] Endnote number + # @return [String] Formatted endnote mark + def format_endnote_mark(number) + I18n.t('html_endnote_refmark', number) + end + + # Format footnote text mark (used in footnote body) + # @param number [Integer] Footnote number + # @return [String] Formatted footnote text mark + def format_footnote_textmark(number) + I18n.t('html_footnote_textmark', number) + end + + # Format endnote text mark (used in endnote body) + # @param number [Integer] Endnote number + # @return [String] Formatted endnote text mark + def format_endnote_textmark(number) + I18n.t('html_endnote_textmark', number) + end + + # Format footnote back mark (back link) + # @return [String] Formatted footnote back mark + def format_footnote_backmark + I18n.t('html_footnote_backmark') + end + + # Format part short label (e.g., "第I部") + # @param chapter [Chapter] Chapter object + # @return [String] Formatted part short label + def format_part_short(chapter) + I18n.t('part_short', chapter.number) + end + + # Format numberless image label + # @return [String] Numberless image label + def format_numberless_image + I18n.t('numberless_image') + end + + # Format caption prefix + # @return [String] Caption prefix string + def format_caption_prefix + prefix = I18n.t('caption_prefix') + prefix == 'caption_prefix' ? ' ' : prefix + end + + # Format column reference from ResolvedData + # @param data [ResolvedData] Resolved column reference data + # @return [String] Formatted column reference + def format_column_reference(data) + # caption_text is always plain text from caption_node.to_inline_text + I18n.t('column', data.caption_text) + end + + # Format column label with I18n + # Takes already-rendered caption (in target format) + # Used by InlineElementHandlers for format-specific rendering + # @param caption [String] Already rendered caption + # @return [String] Formatted column label + def format_column_label(caption) + I18n.t('column', caption) + end + + # Format label marker for labelref/ref inline elements + # @param idref [String] Reference ID + # @return [String] Formatted label marker + def format_label_marker(idref) + I18n.t('label_marker') + idref.to_s + end + + # Format headline quote + # @param full_number [String, nil] Full section number (e.g., "1.2.3") + # @param caption_text [String] Caption text (already rendered in target format) + # @return [String] Formatted headline quote + def format_headline_quote(full_number, caption_text) + if full_number + I18n.t('hd_quote', [full_number, caption_text]) + else + I18n.t('hd_quote_without_number', caption_text) + end + end + + # Format image quote (IDGXML specific) + # @param caption_text [String] Caption text + # @return [String] Formatted image quote + def format_image_quote(caption_text) + I18n.t('image_quote', caption_text) + end + + private + + # Format numbered reference (image, table, list) using common logic + # @param label_key [String] I18n key for the label (e.g., 'image', 'table', 'list') + # @param data [ResolvedData] Resolved reference data + # @param html_css_class [String] CSS class for HTML output (unused, kept for compatibility) + # @return [String] Formatted reference without caption (e.g., "図1.1") + def format_numbered_reference(label_key, data, _html_css_class) + # Use short form of chapter number for figure/table/list references + chapter_number_short = format_chapter_number_short(data.chapter_number, data.chapter_type) + + format_caption_plain(label_key, chapter_number_short, data.item_number, nil) + end + + # Format image reference + def format_image_reference(data) + format_numbered_reference('image', data, 'imgref') + end + + # Format table reference + def format_table_reference(data) + format_numbered_reference('table', data, 'tableref') + end + + # Format list reference + def format_list_reference(data) + format_numbered_reference('list', data, 'listref') + end + + # Format equation reference + # @param data [ResolvedData] Resolved reference data + # @return [String] Formatted reference without caption (e.g., "式3.1") + def format_equation_reference(data) + # Use short form of chapter number for equation references + chapter_number_short = format_chapter_number_short(data.chapter_number, data.chapter_type) + + # Return reference without caption text + format_caption_plain('equation', chapter_number_short, data.item_number) + end + + # Format footnote reference + def format_footnote_reference(data) + # For all formats - return plain number without markup + data.item_number.to_s + end + + # Format endnote reference + def format_endnote_reference(data) + # For all formats - return plain number without markup + data.item_number.to_s + end + + # Format chapter reference + def format_chapter_reference(data) + chapter_title = data.chapter_title + + # Use full form of chapter number for chapter references + chapter_number_full = format_chapter_number_full(data.chapter_number, data.chapter_type) + + if chapter_title && !chapter_number_full.empty? + I18n.t('chapter_quote', [chapter_number_full, chapter_title]) + elsif chapter_title + I18n.t('chapter_quote_without_number', chapter_title) + elsif !chapter_number_full.empty? + chapter_number_full + else + data.item_id || '' + end + end + + # Format headline reference + def format_headline_reference(data) + caption = data.caption_text + headline_numbers = Array(data.headline_number).compact + + if !headline_numbers.empty? + # Use short form of chapter number for headline references + chapter_number_short = format_chapter_number_short(data.chapter_number, data.chapter_type) + + # Build full number with chapter number if available + number_str = if chapter_number_short.empty? + headline_numbers.join('.') + else + ([chapter_number_short] + headline_numbers).join('.') + end + I18n.t('hd_quote', [number_str, caption]) + elsif !caption.empty? + I18n.t('hd_quote_without_number', caption) + else + data.item_id || '' + end + end + + # Format bibpaper reference + def format_bibpaper_reference(data) + # For all formats - return plain reference without markup + "[#{data.item_number}]" + end + + # Format word reference + def format_word_reference(data) + data.word_content.to_s + end + + # Get caption separator + def caption_separator + separator = I18n.t('caption_prefix_idgxml') + if separator == 'caption_prefix_idgxml' + # Fallback to regular caption prefix + fallback = I18n.t('caption_prefix') + fallback == 'caption_prefix' ? ' ' : fallback + else + separator + end + end + + # Check if string is numeric + def numeric_string?(value) + value.to_s.match?(/\A-?\d+\z/) + end + + # Format numbered reference as plain text (image, table, list, equation) + # @param label_key [String] I18n key for the label (e.g., 'image', 'table', 'list') + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference (e.g., "図1.1", "表2.3") + def format_numbered_reference_text(label_key, data) + # Use short form of chapter number for figure/table/list references + chapter_number_short = format_chapter_number_short(data.chapter_number, data.chapter_type) + label = I18n.t(label_key) + number_text = format_number(chapter_number_short, data.item_number) + "#{label}#{number_text}" + end + + # Format footnote reference as plain text + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference + def format_footnote_reference_text(data) + data.item_number.to_s + end + + # Format endnote reference as plain text + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference + def format_endnote_reference_text(data) + data.item_number.to_s + end + + # Format chapter reference as plain text + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference + def format_chapter_reference_text(data) + chapter_title = data.chapter_title + + # Use full form of chapter number for chapter references + chapter_number_full = format_chapter_number_full(data.chapter_number, data.chapter_type) + + if chapter_title && !chapter_number_full.empty? + I18n.t('chapter_quote', [chapter_number_full, chapter_title]) + elsif chapter_title + I18n.t('chapter_quote_without_number', chapter_title) + elsif !chapter_number_full.empty? + chapter_number_full + else + data.item_id || '' + end + end + + # Format headline reference as plain text + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference + def format_headline_reference_text(data) + caption = data.caption_text + headline_numbers = Array(data.headline_number).compact + + if !headline_numbers.empty? + # Use short form of chapter number for headline references + chapter_number_short = format_chapter_number_short(data.chapter_number, data.chapter_type) + + # Build full number with chapter number if available + number_str = if chapter_number_short.empty? + headline_numbers.join('.') + else + ([chapter_number_short] + headline_numbers).join('.') + end + I18n.t('hd_quote', [number_str, caption]) + elsif !caption.empty? + I18n.t('hd_quote_without_number', caption) + else + data.item_id || '' + end + end + + # Format column reference as plain text + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference + def format_column_reference_text(data) + I18n.t('column', data.caption_text) + end + + # Format bibpaper reference as plain text + # @param data [ResolvedData] Resolved reference data + # @return [String] Plain text reference + def format_bibpaper_reference_text(data) + "[#{data.item_number}]" + end + end + end +end diff --git a/lib/review/renderer/top_renderer.rb b/lib/review/renderer/top_renderer.rb new file mode 100644 index 000000000..baa8fef8e --- /dev/null +++ b/lib/review/renderer/top_renderer.rb @@ -0,0 +1,882 @@ +# frozen_string_literal: true + +# Copyright (c) 2025 Kenshi Muto, Masayoshi Takahashi +# +# This program is free software. +# You can distribute or modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. + +require 'review/textutils' +require 'review/loggable' +require 'review/i18n' +require_relative 'base' + +module ReVIEW + module Renderer + class TopRenderer < Base + include ReVIEW::TextUtils + include ReVIEW::Loggable + + # Japanese titles for different block types (matching TOPBuilder) + TITLES = { + list: 'リスト', + listnum: '連番付きリスト', + emlist: 'インラインリスト', + emlistnum: '連番付きインラインリスト', + cmd: 'コマンド', + quote: '引用', + source: 'ソースコード', + table: '表', + emtable: 'インライン表', + imgtable: '画像付き表', + image: '図', + indepimage: '独立図', + numberlessimage: '番号なし図', + icon: 'アイコン', + note: 'ノート', + memo: 'メモ', + tip: 'TIP', + info: 'インフォ', + warning: '警告', + important: '重要', + caution: '注意', + notice: '注記', + lead: 'リード', + read: 'リード', + flushright: '右寄せ', + centering: '中央揃え', + texequation: 'TeX式' + }.freeze + + def initialize(chapter) + super + @minicolumn_stack = [] + @table_row_separator_count = 0 + @first_line_number = 1 + @rendering_context = nil + + # Ensure locale strings are available + I18n.setup(config['language'] || 'ja') + end + + # Format type for this renderer + # @return [Symbol] Format type :top + def format_type + :top + end + + def target_name + 'top' + end + + def visit_document(node) + render_children(node) + end + + def visit_headline(node) + level = node.level + caption = render_caption_inline(node.caption_node) + + # Use headline prefix if available + prefix = generate_headline_prefix(level) + "■H#{level}■#{prefix}#{caption}\n" + end + + def visit_paragraph(node) + content = render_children(node).chomp + return '' if content.empty? + + "#{content}\n" + end + + def visit_list(node) + result = +'' + + case node.list_type + when :ul + node.children.each do |item| + result += visit_unordered_list_item(item) + end + when :ol + node.children.each_with_index do |item, index| + result += visit_ordered_list_item(item, index + 1) + end + when :dl + node.children.each do |item| + result += visit_definition_item(item) + end + end + + result + end + + def visit_unordered_list_item(node) + # Separate text content from nested lists + text_content = +'' + nested_lists = +'' + + node.children.each do |child| + if child.class.name.include?('ListNode') + # This is a nested list - render it separately + nested_lists += visit(child) + else + # This is regular content + text_content += visit(child) + end + end + + text_content = text_content.chomp + + # Use level for nested indentation (TOP style uses tabs for each level) + level = node.level || 1 + indent = "\t" * (level - 1) + + result = "#{indent}●\t#{text_content}\n" + + # Add any nested lists after the item + result += nested_lists + result + end + + def visit_ordered_list_item(node, number) + # Separate text content from nested lists + text_content = +'' + nested_lists = +'' + + node.children.each do |child| + if child.class.name.include?('ListNode') + # This is a nested list - render it separately + nested_lists += visit(child) + else + # This is regular content + text_content += visit(child) + end + end + + text_content = text_content.chomp + + # Use level for nested indentation + level = node.level || 1 + indent = "\t" * (level - 1) + + result = "#{indent}#{number}\t#{text_content}\n" + + # Add any nested lists after the item + result += nested_lists + result + end + + def visit_item(node) + # Handle list items that come directly without parent list context + content = render_children(node).chomp + "●\t#{content}\n" + end + + def visit_definition_item(node) + # Handle definition term - use term_children (AST structure) + term = if node.term_children && !node.term_children.empty? + node.term_children.map { |child| visit(child) }.join + else + '' # No term available + end + + # Handle definition content (all children are definition content) + definition = if node.children && !node.children.empty? + node.children.map { |child| visit(child) }.join + end + + result = "#{term}☆\n" + result += "\t#{definition}\n" if definition + + result + end + + # Common code block rendering method used by all code block types + def render_code_block_common(node) + result = +'' + # Convert code_type to symbol if it's not already + code_type = node.code_type.to_sym + block_title = TITLES[code_type] || TITLES[:list] + + result += "\n" + result += "◆→開始:#{block_title}←◆\n" + + # Add caption if present + caption = render_caption_inline(node.caption_node) + unless caption.empty? + result += if node.id && (code_type == :list || code_type == :listnum) + # For list/listnum, use I18n formatting to match TOPBuilder + format_list_caption(node.id, caption) + elsif node.id + "■#{node.id}■#{caption}\n" + else + "■#{caption}\n" + end + result += "\n" + end + + # Add line numbers if needed + if node.line_numbers + code_content = render_children(node).chomp + lines = code_content.split("\n") + lines.each_with_index do |line, i| + line_num = (@first_line_number + i).to_s.rjust(2) + result += "#{line_num}: #{line}\n" + end + else + code_content = render_children(node) + # Remove trailing newline if present to avoid double newlines + code_content = code_content.chomp if code_content.end_with?("\n") + result += code_content + result += "\n" + end + + result += "\n" + result += "◆→終了:#{block_title}←◆\n" + result += "\n" + + result + end + + # Individual code block type visitors that delegate to common method + def visit_code_block_list(node) + render_code_block_common(node) + end + + def visit_code_block_listnum(node) + render_code_block_common(node) + end + + def visit_code_block_emlist(node) + render_code_block_common(node) + end + + def visit_code_block_emlistnum(node) + render_code_block_common(node) + end + + def visit_code_block_cmd(node) + render_code_block_common(node) + end + + def visit_code_block_source(node) + render_code_block_common(node) + end + + def visit_code_line(node) + render_children(node) + "\n" + end + + def visit_table(node) + result = +'' + @table_row_separator_count = 0 + + result += "\n" + result += "◆→開始:#{TITLES[:table]}←◆\n" + + # Add caption if present + caption = render_caption_inline(node.caption_node) + unless caption.empty? + result += if node.id + # Use I18n formatting to match TOPBuilder + format_table_caption(node.id, caption) + else + "■#{caption}\n" + end + result += "\n" + end + + # Process table content + result += render_children(node) + + result += "◆→終了:#{TITLES[:table]}←◆\n" + result += "\n" + + result + end + + def visit_table_row(node) + cells = [] + node.children.each do |cell| + cell_content = render_children(cell) + # Skip separator rows (rows that contain only dashes) + unless /^-+$/.match?(cell_content.strip) + cells << cell_content + end + end + + # Only process non-empty rows + return '' if cells.empty? || cells.all? { |cell| cell.strip.empty? } + + result = cells.join("\t") + "\n" + + # Add separator after header rows + @table_row_separator_count += 1 + # Check if this should be treated as header (simplified logic) + if @table_row_separator_count == 1 && should_add_table_separator? + result += "#{'-' * 12}\n" + end + + result + end + + def visit_table_cell(node) + content = render_children(node) + + # Apply bold formatting for header cells if configured (matches TOPBuilder) + if node.cell_type == :th && should_format_table_header? + "★#{content}☆" + else + content + end + end + + def visit_image(node) + result = +'' + + result += "\n" + result += "◆→開始:#{TITLES[:image]}←◆\n" + + # Add caption if present + caption = render_caption_inline(node.caption_node) + unless caption.empty? + result += if node.id + # Use I18n formatting to match TOPBuilder + format_image_caption(node.id, caption) + else + "■#{caption}\n" + end + result += "\n" + end + + # Add image path with metrics + image_path = node.id + metrics = format_image_metrics(node) + result += "◆→#{image_path}#{metrics}←◆\n" + + result += "◆→終了:#{TITLES[:image]}←◆\n" + result += "\n" + + result + end + + def visit_minicolumn(node) + result = +'' + minicolumn_title = TITLES[node.minicolumn_type.to_sym] || node.minicolumn_type.to_s + + @minicolumn_stack.push(node.minicolumn_type) + + result += "\n" + result += "◆→開始:#{minicolumn_title}←◆\n" + + # Add caption if present + caption = render_caption_inline(node.caption_node) + unless caption.empty? + result += "■#{caption}\n" + result += "\n" + end + + result += render_children(node) + + result += "◆→終了:#{minicolumn_title}←◆\n" + result += "\n" + + @minicolumn_stack.pop + + result + end + + def visit_column(node) + result = +'' + caption = render_caption_inline(node.caption_node) + + result += "\n" + result += "#{caption}\n" unless caption.empty? + result += render_children(node) + result += "\n" + + result + end + + # visit_block is now handled by Base renderer with dynamic method dispatch + + def visit_block_quote(node) + result = +'' + + result += "\n" + result += "◆→開始:#{TITLES[:quote]}←◆\n" + result += render_children(node) + result += "◆→終了:#{TITLES[:quote]}←◆\n" + result += "\n" + + result + end + + def visit_generic_block(node) + block_title = TITLES[node.block_type.to_sym] || node.block_type.to_s + result = +'' + + result += "\n" + result += "◆→開始:#{block_title}←◆\n" + result += render_children(node) + result += "◆→終了:#{block_title}←◆\n" + result += "\n" + + result + end + + # Block elements from todo-top.md + + def visit_block_lead(node) + result = +'' + result += "\n◆→開始:#{TITLES[:lead]}←◆\n" + result += render_children(node) + result += "◆→終了:#{TITLES[:lead]}←◆\n\n" + result + end + + alias_method :visit_block_read, :visit_block_lead + + def visit_block_flushright(node) + result = +'' + result += "\n◆→開始:#{TITLES[:flushright]}←◆\n" + result += render_children(node) + result += "◆→終了:#{TITLES[:flushright]}←◆\n\n" + result + end + + def visit_block_centering(node) + result = +'' + result += "\n◆→開始:#{TITLES[:centering]}←◆\n" + result += render_children(node) + result += "◆→終了:#{TITLES[:centering]}←◆\n\n" + result + end + + def visit_block_blankline(_node) + "\n" + end + + def visit_tex_equation(node) + result = +'' + result += "\n◆→開始:#{TITLES[:texequation]}←◆\n" + result += node.content if node.respond_to?(:content) + result += render_children(node) unless node.respond_to?(:content) + result += "\n◆→終了:#{TITLES[:texequation]}←◆\n\n" + result + end + + def visit_block_emtable(node) + result = +'' + @table_row_separator_count = 0 + + result += "\n" + result += "◆→開始:#{TITLES[:emtable]}←◆\n" + + # Add caption if present + caption = render_caption_inline(node.caption_node) + unless caption.empty? + result += "■#{caption}\n" + result += "\n" + end + + # Process table content + result += render_children(node) + + result += "◆→終了:#{TITLES[:emtable]}←◆\n" + result += "\n" + + result + end + + def visit_block_imgtable(node) + result = +'' + + result += "\n" + result += "◆→開始:#{TITLES[:table]}←◆\n" + + # Add caption if present + caption = render_caption_inline(node.caption_node) + unless caption.empty? + result += if node.id + # Use I18n formatting to match TOPBuilder + format_table_caption(node.id, caption) + else + "■#{caption}\n" + end + result += "\n" + end + + # Add image path with metrics + image_path = node.id + metrics = format_image_metrics(node) + result += "◆→#{image_path}#{metrics}←◆\n" + + result += "◆→終了:#{TITLES[:table]}←◆\n" + result += "\n" + + result + end + + def render_inline_element(type, content, node) # rubocop:disable Metrics/CyclomaticComplexity + case type + when :b, :strong + "★#{content}☆" + when :i, :em + "▲#{content}☆" + when :code, :tt + "△#{content}☆" + when :ttb, :ttbold + "★#{content}☆◆→等幅フォント太字←◆" + when :tti + "▲#{content}☆◆→等幅フォントイタ←◆" + when :u + "@#{content}@◆→@〜@部分に下線←◆" + when :ami + "#{content}◆→DTP連絡:「#{content}」に網カケ←◆" + when :bou + "#{content}◆→DTP連絡:「#{content}」に傍点←◆" + when :keytop + "#{content}◆→キートップ#{content}←◆" + when :idx + "#{content}◆→索引項目:#{content}←◆" + when :hidx + "◆→索引項目:#{content}←◆" + when :balloon + "\t←#{content}" + when :m + "◆→TeX式ここから←◆#{content}◆→TeX式ここまで←◆" + when :ins + "◆→開始:挿入表現←◆#{content}◆→終了:挿入表現←◆" + when :del + "◆→開始:削除表現←◆#{content}◆→終了:削除表現←◆" + when :tcy + "◆→開始:回転←◆#{content}◆→終了:縦回転←◆" + when :maru + "#{content}◆→丸数字#{content}←◆" + when :hint + "◆→ヒントスタイルここから←◆#{content}◆→ヒントスタイルここまで←◆" + when :sup + "#{content}◆→DTP連絡:「#{content}」は上付き←◆" + when :sub + "#{content}◆→DTP連絡:「#{content}」は下付き←◆" + when :br + "\n" + when :href + render_href(node, content) + when :url # rubocop:disable Lint/DuplicateBranch + "△#{content}☆" + when :fn + render_footnote_ref(node, content) + when :ruby + render_ruby(node, content) + when :comment + render_comment(node, content) + when :raw + render_raw(node, content) + when :labelref + render_labelref(node, content) + when :pageref + render_pageref(node, content) + else + content + end + end + + def visit_footnote(node) + footnote_id = node.id + content = render_children(node).chomp + footnote_number = get_footnote_number(footnote_id) + + "【注#{footnote_number}】#{content}\n" + end + + def visit_text(node) + node.content || '' + end + + def visit_reference(node) + format_resolved_reference(node.resolved_data) + end + + private + + def generate_headline_prefix(level) + # Generate headline prefix based on chapter structure + # Similar to TOPBuilder's headline_prefix method + secnolevel = config['secnolevel'] || 2 + + if level > secnolevel || @chapter.nil? + return '' + end + + case level + when 1 + # Chapter level: just the chapter number + if @chapter.number + "#{@chapter.number} " + else + '' + end + when 2, 3, 4, 5, 6 + # Section levels: use counter from chapter + if @chapter.number + # Get section counter from chapter if available + # For now, return empty string as section counter needs proper implementation + # This matches the behavior of TOPBuilder which uses @sec_counter + end + '' + else + '' + end + end + + def should_add_table_separator? + # Add separator when th_bold is not enabled (matches TOPBuilder logic) + # TOPBuilder adds separator when: !@book.config['textmaker'] || !@book.config['textmaker']['th_bold'] + !config&.dig('textmaker', 'th_bold') + end + + def should_format_table_header? + # Check config for header formatting + config&.dig('textmaker', 'th_bold') || false + end + + def format_image_metrics(node) + # Format image metrics if present + metrics = +'' + if node.metric + metrics = "、#{node.metric}" + end + metrics + end + + # Format list caption using I18n (matches TOPBuilder) + def format_list_caption(id, caption_text) + return "■#{caption_text}\n" unless @chapter + + begin + list_item = @chapter.list(id) + chapter_number = @chapter.number + item_number = list_item.number + + # Use TextFormatter to generate caption + formatted = text_formatter.format_caption_plain('list', chapter_number, item_number, caption_text) + "#{formatted}\n" + rescue KeyError, NoMethodError + # Fallback if list not found or chapter doesn't have list index + "■#{id}■#{caption_text}\n" + end + end + + # Format table caption using I18n (matches TOPBuilder) + def format_table_caption(id, caption_text) + return "■#{caption_text}\n" unless @chapter + + begin + table_item = @chapter.table(id) + chapter_number = @chapter.number + item_number = table_item.number + + # Use TextFormatter to generate caption + formatted = text_formatter.format_caption_plain('table', chapter_number, item_number, caption_text) + "#{formatted}\n" + rescue KeyError, NoMethodError + # Fallback if table not found or chapter doesn't have table index + "■#{id}■#{caption_text}\n" + end + end + + # Format image caption using I18n (matches TOPBuilder) + def format_image_caption(id, caption_text) + return "■#{caption_text}\n" unless @chapter + + begin + image_item = @chapter.image(id) + chapter_number = @chapter.number + item_number = image_item.number + + # Use TextFormatter to generate caption + formatted = text_formatter.format_caption_plain('image', chapter_number, item_number, caption_text) + "#{formatted}\n" + rescue KeyError, NoMethodError + # Fallback if image not found or chapter doesn't have image index + "■#{id}■#{caption_text}\n" + end + end + + def render_caption_inline(caption_node) + caption_node ? render_children(caption_node) : '' + end + + def render_href(node, content) + args = node.args || [] + if args.length >= 2 + url = args[0] + label = args[1] + "#{label}(△#{url}☆)" + else + "△#{content}☆" + end + end + + def render_footnote_ref(node, content) + args = node.args || [] + footnote_id = args.first || content + footnote_number = get_footnote_number(footnote_id) + "【注#{footnote_number}】" + end + + def render_ruby(node, content) + args = node.args || [] + if args.length >= 2 + base = args[0] + ruby = args[1] + "#{base}◆→DTP連絡:「#{base}」に「#{ruby}」とルビ←◆" + else + content + end + end + + def render_comment(_node, content) + # Only render in draft mode + if config['draft'] + "◆→#{content}←◆" + else + '' + end + end + + def render_raw(node, content) + args = node.args || [] + if args.any? + format = args.first + if format == 'top' + content + else + '' # Ignore raw content for other formats + end + else + content + end + end + + def render_labelref(node, content) + args = node.args || [] + label_id = args.first || content + "「◆→#{label_id}←◆」" + end + + def render_pageref(node, content) + args = node.args || [] + label_id = args.first || content + "●ページ◆→#{label_id}←◆" + end + + # Format resolved reference based on ResolvedData + # Gets plain text from TextFormatter and wraps it with TOP-specific markup + def format_resolved_reference(data) + # Get plain text from TextFormatter (no TOP markup) + plain_text = text_formatter.format_reference(data.reference_type, data) + + # Wrap with TOP-specific markup based on reference type + case data.reference_type + when :footnote + # For footnote, use 【注】 markup + number = data.item_number || data.item_id + "【注#{number}】" + when :endnote + # For endnote, use 【後注】 markup + number = data.item_number || data.item_id + "【後注#{number}】" + else + # For other types, return plain text as-is + plain_text + end + end + + def visit_block_label(_node) + # Labels are not rendered in TOP format + '' + end + + def visit_block_printendnotes(_node) + # Print all endnotes collected in the chapter + return '' unless @chapter + return '' if @chapter.endnotes.size == 0 + + result = +'' + @chapter.endnotes.each do |en| + # Format: (number) content + number = en.number + content_text = en.content || '' + result += "(#{number}) #{content_text}\n" + end + result + end + + def visit_block_bibpaper(node) + id = node.args[0] + caption_text = node.args[1] + + result = +'' + if id && @chapter + begin + bibpaper_number = @chapter.bibpaper(id).number + result += "[#{bibpaper_number}]" + rescue KeyError + result += "[#{id}]" + end + end + + # Render caption with inline elements if it has a caption_node + if node.respond_to?(:caption_node) && node.caption_node + caption = render_caption_inline(node.caption_node) + result += " #{caption}\n" + elsif caption_text + result += " #{caption_text}\n" + else + result += "\n" + end + + # Render body content + content = render_children(node) + result += "#{content}\n" unless content.strip.empty? + + result + end + + def visit_embed(node) + # Check if content should be output for this renderer + # TOP format accepts 'top' and 'text' as target builders + return '' unless node.targeted_for?('top') || node.targeted_for?('text') + + # Get content + content = node.content || '' + + # Process \n based on embed type + case node.embed_type + when :inline, :raw + # For inline and raw embeds, convert \\n to actual newlines + content = content.gsub('\\n', "\n") + end + + # For block embeds, add trailing newline + node.embed_type == :block ? content + "\n" : content + end + + def get_footnote_number(footnote_id) + # Use chapter's footnote numbering (matches TOPBuilder) + return 1 unless @chapter + + begin + footnote = @chapter.footnote(footnote_id) + footnote&.number || 1 + rescue KeyError + # Fallback if footnote not found + 1 + end + end + end + end +end diff --git a/lib/review/snapshot_location.rb b/lib/review/snapshot_location.rb index 17eb22a35..e9d971ed9 100644 --- a/lib/review/snapshot_location.rb +++ b/lib/review/snapshot_location.rb @@ -21,6 +21,21 @@ def string "#{@filename}:#{@lineno}" end + def to_h + { + filename: filename, + lineno: lineno + } + end + + # Format location information for error messages + # Returns a string like " at line 42 in chapter01.re" + def format_for_error + info = " at line #{@lineno}" + info += " in #{@filename}" if @filename + info + end + alias_method :to_s, :string def snapshot diff --git a/lib/review/textutils.rb b/lib/review/textutils.rb index 34904dfe9..bb2bc045a 100644 --- a/lib/review/textutils.rb +++ b/lib/review/textutils.rb @@ -10,6 +10,7 @@ # require 'nkf' require 'digest' +require 'unicode/eaw' module ReVIEW module TextUtils diff --git a/review.gemspec b/review.gemspec index 31d6d451a..c6b75c445 100644 --- a/review.gemspec +++ b/review.gemspec @@ -31,7 +31,9 @@ Gem::Specification.new do |gem| gem.add_dependency('rubyzip') gem.add_dependency('tty-logger') gem.add_development_dependency('chunky_png') + gem.add_development_dependency('diff-lcs') gem.add_development_dependency('math_ml') + gem.add_development_dependency('nokogiri') gem.add_development_dependency('playwright-runner') gem.add_development_dependency('pygments.rb') gem.add_development_dependency('rake') diff --git a/samples/debug-book/edge_cases_test.re b/samples/debug-book/edge_cases_test.re index 15f691735..151468b2b 100644 --- a/samples/debug-book/edge_cases_test.re +++ b/samples/debug-book/edge_cases_test.re @@ -277,6 +277,8 @@ class ValidationError extends Error { 対策として@<list>{empty_and_special_cases}で示したnullチェックが重要。 //} +//blankline + == まとめ このエッジケーステストでは以下を検証: diff --git a/test/README.md b/test/README.md new file mode 100644 index 000000000..ca47af1cf --- /dev/null +++ b/test/README.md @@ -0,0 +1,179 @@ +# Re:VIEWプロジェクト テストファイル一覧 + +このドキュメントは、Re:VIEWプロジェクトのテストファイルの概要を説明します。 + +## テストファイルのカテゴリー + +### AST(抽象構文木)関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_ast_basic.rb` | AST基本ノード(Node、HeadlineNode)の作成と基本機能のテスト | +| `test_ast_inline.rb` | インライン要素(TextNode等)のAST表現のテスト | +| `test_ast_inline_structure.rb` | インライン構造の詳細なASTテスト | +| `test_ast_comprehensive_inline.rb` | インライン要素の包括的なASTテスト | +| `test_ast_embed.rb` | 埋め込み要素のASTテスト | +| `test_ast_lists.rb` | リスト要素のAST表現のテスト | +| `test_ast_comprehensive.rb` | AST機能の包括的なテスト | +| `test_ast_json_serialization.rb` | ASTのJSON形式へのシリアライズ機能のテスト | +| `test_ast_json_verification.rb` | ASTのJSON検証機能のテスト | +| `test_ast_review_generator.rb` | ASTからRe:VIEW形式への逆変換のテスト | +| `test_ast_bidirectional_conversion.rb` | ASTの双方向変換(Re:VIEW↔AST)のテスト | +| `test_ast_analyzer.rb` | AST解析機能のテスト | +| `test_ast_indexer.rb` | AST索引機能のテスト | +| `test_ast_indexer_pure.rb` | AST索引機能の単体テスト | +| `test_ast_structure_debug.rb` | AST構造のデバッグ機能のテスト | +| `test_full_ast_mode.rb` | 完全ASTモードのテスト | + +### ビルダー関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_builder.rb` | 基底Builderクラスの基本機能テスト | +| `test_htmlbuilder.rb` | HTML出力ビルダーの機能テスト | +| `test_latexbuilder.rb` | LaTeX出力ビルダーの機能テスト | +| `test_latexbuilder_v2.rb` | LaTeXビルダーのバージョン2機能テスト | +| `test_markdownbuilder.rb` | Markdown出力ビルダーの機能テスト | +| `test_plaintextbuilder.rb` | プレーンテキスト出力ビルダーのテスト | +| `test_idgxmlbuilder.rb` | InDesign XML出力ビルダーのテスト | +| `test_topbuilder.rb` | TOP(テキスト)出力ビルダーのテスト | +| `test_rstbuilder.rb` | reStructuredText出力ビルダーのテスト | +| `test_md2inaobuilder.rb` | Markdown→InDesign形式変換ビルダーのテスト | +| `test_indexbuilder.rb` | 索引ビルダーの機能テスト | +| `test_jsonbuilder.rb` | JSON出力ビルダーのテスト | + +### レンダラー関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_html_renderer.rb` | HTMLレンダラーのテスト | +| `test_latex_renderer.rb` | LaTeXレンダラーのテスト | +| `test_html_renderer_builder_comparison.rb` | HTMLレンダラーとビルダーの比較テスト | +| `test_latex_renderer_builder_comparison.rb` | LaTeXレンダラーとビルダーの比較テスト | + +### コンパイラ・パーサー関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_compiler.rb` | Re:VIEWソースコードのコンパイラ機能テスト | +| `test_preprocessor.rb` | プリプロセッサ(#@mapfile等)の機能テスト | +| `test_converter.rb` | 形式変換コンバーターのテスト | +| `test_list_parser.rb` | リスト要素のパーサーテスト | +| `test_caption_parser.rb` | キャプションパーサーのテスト | +| `test_caption_node.rb` | キャプションノードのテスト | +| `test_caption_inline_integration.rb` | キャプションとインライン要素の統合テスト | + +### 書籍構造関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_book.rb` | 書籍全体の構造と管理機能のテスト | +| `test_book_chapter.rb` | 章(Chapter)クラスの機能テスト | +| `test_book_part.rb` | 部(Part)クラスの機能テスト | +| `test_catalog.rb` | カタログ(章構成)機能のテスト | +| `test_index.rb` | 索引機能のテスト | + +### メーカー(生成器)関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_epubmaker.rb` | EPUB生成機能のテスト | +| `test_epub3maker.rb` | EPUB3生成機能のテスト | +| `test_pdfmaker.rb` | PDF生成機能のテスト | +| `test_makerhelper.rb` | メーカー共通ヘルパー機能のテスト | + +### コマンドラインツール関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_epubmaker_cmd.rb` | review-epubmakerコマンドのテスト | +| `test_pdfmaker_cmd.rb` | review-pdfmakerコマンドのテスト | +| `test_textmaker_cmd.rb` | review-textmakerコマンドのテスト | +| `test_idgxmlmaker_cmd.rb` | review-idgxmlmakerコマンドのテスト | +| `test_catalog_converter_cmd.rb` | カタログ変換コマンドのテスト | + +### ユーティリティ関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_textutils.rb` | テキスト処理ユーティリティのテスト | +| `test_htmlutils.rb` | HTML処理ユーティリティのテスト | +| `test_image_finder.rb` | 画像ファイル検索機能のテスト | +| `test_location.rb` | ソースコード位置情報のテスト | +| `test_lineinput.rb` | 行入力処理のテスト | +| `test_logger.rb` | ログ出力機能のテスト | +| `test_yamlloader.rb` | YAML設定ファイル読み込みのテスト | +| `test_template.rb` | テンプレート処理機能のテスト | +| `test_sec_counter.rb` | セクション番号カウンターのテスト | +| `test_zip_exporter.rb` | ZIP形式エクスポート機能のテスト | + +### 設定・国際化関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_configure.rb` | 設定機能のテスト | +| `test_i18n.rb` | 国際化(多言語対応)機能のテスト | +| `test_extentions_hash.rb` | ハッシュ拡張機能のテスト | + +### その他の機能テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_review_ext.rb` | review-ext.rb拡張機能のテスト | +| `test_tocprinter.rb` | 目次出力機能のテスト | +| `test_htmltoc.rb` | HTML目次生成機能のテスト | +| `test_webtocprinter.rb` | Web用目次出力機能のテスト | +| `test_reviewheaderlistener.rb` | ヘッダー情報リスナーのテスト | +| `test_update.rb` | 更新機能のテスト | +| `test_img_math.rb` | 数式画像処理のテスト | +| `test_img_graph.rb` | グラフ画像処理のテスト | +| `test_project_integration.rb` | プロジェクト統合テスト | +| `test_dumper.rb` | ダンプ機能のテスト | +| `test_helper.rb` | テストヘルパー機能 | + +### ブロック・インライン処理関連テスト + +| ファイル名 | 説明 | +|-----------|------| +| `test_block_processor_inline.rb` | ブロック内インライン処理のテスト | +| `test_code_block_debug.rb` | コードブロックのデバッグ機能テスト | +| `test_code_block_inline_processing.rb` | コードブロック内インライン処理のテスト | +| `test_code_block_original_text.rb` | コードブロックの元テキスト保持機能のテスト | +| `test_original_text_integration.rb` | 元テキスト統合機能のテスト | +| `test_new_block_commands.rb` | 新しいブロックコマンドのテスト | +| `test_column_sections.rb` | コラムセクション機能のテスト | +| `test_list_ast_processor.rb` | リストAST処理のテスト | +| `test_nested_list_builder.rb` | ネストしたリストのビルド機能のテスト | + +## テストの実行方法 + +全テストを実行する場合: +```bash +bundle exec rake test +``` + +特定のテストファイルを実行する場合: +```bash +bundle exec ruby test/test_[ファイル名].rb +``` + +特定のパターンにマッチするテストを実行する場合: +```bash +bundle exec rake test[pattern] +``` + +## テストの構成 + +各テストファイルは基本的に以下の構造になっています: + +1. `test/test_helper.rb` を require +2. Test::Unit::TestCase を継承したテストクラスを定義 +3. `setup` メソッドで初期化処理 +4. `test_` で始まるメソッドで個別のテストケースを定義 + +## 追加情報 + +- テストは Ruby の Test::Unit フレームワークを使用 +- モックやスタブには適宜 minitest/mock を使用 +- テストデータは `test/assets/` ディレクトリに配置 +- カバレッジ測定には SimpleCov を使用(`bundle exec rake coverage`) \ No newline at end of file diff --git a/test/ast/caption_parser_helper.rb b/test/ast/caption_parser_helper.rb new file mode 100644 index 000000000..91c2da3bf --- /dev/null +++ b/test/ast/caption_parser_helper.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +# Test helper class for parsing captions +class CaptionParserHelper + def self.parse(caption, location: nil, inline_processor: nil) + new(location: location, inline_processor: inline_processor).parse(caption) + end + + def initialize(location: nil, inline_processor: nil) + @location = location + @inline_processor = inline_processor + end + + def parse(caption) + return nil if caption.nil? || caption == '' + return caption if caption.is_a?(ReVIEW::AST::CaptionNode) + + parse_string(caption) + end + + private + + def parse_string(caption) + require 'review/ast/caption_node' + require 'review/ast/text_node' + + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + if @inline_processor && caption.include?('@<') + @inline_processor.parse_inline_elements(caption, caption_node) + else + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: caption)) + end + caption_node + end +end diff --git a/test/ast/diff/test_markdown.rb b/test/ast/diff/test_markdown.rb new file mode 100644 index 000000000..4cf2e775e --- /dev/null +++ b/test/ast/diff/test_markdown.rb @@ -0,0 +1,178 @@ +# frozen_string_literal: true + +require_relative '../../test_helper' +require 'review/ast/diff/markdown' + +class TestMarkdownDiff < Test::Unit::TestCase + def setup + @differ = ReVIEW::AST::Diff::Markdown.new + end + + def test_equal_identical_strings + left = "# Heading\n\nParagraph" + right = "# Heading\n\nParagraph" + + result = @differ.compare(left, right) + assert(result.equal?) + assert(result.same_hash?) + assert(!result.different?) + end + + def test_different_content + left = '# Heading 1' + right = '# Heading 2' + + result = @differ.compare(left, right) + assert(!result.equal?) + assert(result.different?) + end + + def test_normalize_whitespace + left = "# Heading\n\nParagraph text" + right = "# Heading \n\n Paragraph text " + + result = @differ.compare(left, right) + assert(result.equal?, 'Should normalize whitespace differences') + end + + def test_normalize_blank_lines + left = "# Heading\n\nParagraph" + right = "# Heading\n\n\n\nParagraph" + + result = @differ.compare(left, right) + assert(result.equal?, 'Should normalize multiple blank lines') + end + + def test_normalize_list_markers + left = "* Item 1\n* Item 2" + right = "- Item 1\n+ Item 2" + + result = @differ.compare(left, right) + assert(result.equal?, 'Should normalize list markers to *') + end + + def test_normalize_heading_spacing + left = '# Heading' + right = '#Heading' + + result = @differ.compare(left, right) + assert(result.equal?, 'Should normalize heading spacing') + end + + def test_normalize_heading_trailing_hashes + left = '# Heading' + right = '# Heading #' + + result = @differ.compare(left, right) + assert(result.equal?, 'Should remove trailing # from headings') + end + + def test_pretty_diff_output + left = "# Heading 1\n\nParagraph" + right = "# Heading 2\n\nParagraph" + + result = @differ.compare(left, right) + diff_output = result.pretty_diff + + assert_match(/Heading 1/, diff_output) + assert_match(/Heading 2/, diff_output) + end + + def test_quick_equality_check + left = '# Heading' + right = '# Heading ' + + assert(@differ.equal?(left, right), 'Should have quick equality check') + end + + def test_diff_method + left = 'Line 1' + right = 'Line 2' + + diff_output = @differ.diff(left, right) + assert(!diff_output.empty?, 'Should return diff output') + end + + def test_empty_strings + left = '' + right = '' + + result = @differ.compare(left, right) + assert(result.equal?) + end + + def test_nil_handling + left = nil + right = '' + + result = @differ.compare(left, right) + assert(result.equal?, 'nil and empty string should be equivalent') + end + + def test_complex_markdown_document + left = <<~MD + # Main Heading + + This is a paragraph with **bold** and *italic*. + + * List item 1 + * List item 2 + + ## Section + + Another paragraph. + MD + + right = <<~MD + # Main Heading + + This is a paragraph with **bold** and *italic*. + + - List item 1 + + List item 2 + + ##Section + + Another paragraph. + MD + + result = @differ.compare(left, right) + assert(result.equal?, 'Should handle complex documents with normalization') + end + + def test_code_blocks_preserved + left = <<~MD + ```ruby + def hello + puts "world" + end + ``` + MD + + right = <<~MD + ```ruby + def hello + puts "world" + end + ``` + MD + + result = @differ.compare(left, right) + assert(result.equal?) + end + + def test_disable_normalization_options + differ = ReVIEW::AST::Diff::Markdown.new( + ignore_whitespace: false, + ignore_blank_lines: false, + normalize_headings: false, + normalize_lists: false + ) + + left = '# Heading' + right = '#Heading' + + result = differ.compare(left, right) + assert(!result.equal?, 'Should not normalize when options disabled') + end +end diff --git a/test/ast/diff/test_node.rb b/test/ast/diff/test_node.rb new file mode 100644 index 000000000..0a89b8095 --- /dev/null +++ b/test/ast/diff/test_node.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +require_relative '../../test_helper' +require 'review/ast' +require 'review/ast/diff/node' + +class TestASTDiffNode < Test::Unit::TestCase + def setup + @comparator = ReVIEW::AST::Diff::Node.new + @location = ReVIEW::SnapshotLocation.new('test.re', 1) + end + + def test_compare_identical_text_nodes + node1 = ReVIEW::AST::TextNode.new(location: @location, content: 'Hello') + node2 = ReVIEW::AST::TextNode.new(location: @location, content: 'Hello') + + result = @comparator.compare(node1, node2) + assert_true(result.equal?) + assert_equal('AST nodes are equivalent', result.to_s) + end + + def test_compare_different_text_nodes + node1 = ReVIEW::AST::TextNode.new(location: @location, content: 'Hello') + node2 = ReVIEW::AST::TextNode.new(location: @location, content: 'World') + + result = @comparator.compare(node1, node2) + assert_false(result.equal?) + assert_match(/text content mismatch/, result.to_s) + end + + def test_compare_nil_nodes + result = @comparator.compare(nil, nil) + assert_true(result.equal?) + end + + def test_compare_nil_vs_non_nil + node1 = ReVIEW::AST::TextNode.new(location: @location, content: 'Hello') + result = @comparator.compare(node1, nil) + assert_false(result.equal?) + assert_match(/node2 is nil/, result.to_s) + end + + def test_compare_different_node_types + node1 = ReVIEW::AST::TextNode.new(location: @location, content: 'Hello') + node2 = ReVIEW::AST::ParagraphNode.new(location: @location) + + result = @comparator.compare(node1, node2) + assert_false(result.equal?) + assert_match(/node types differ/, result.to_s) + end + + def test_compare_headlines_with_same_attributes + caption1 = ReVIEW::AST::CaptionNode.new(location: @location) + caption1.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Title')) + + caption2 = ReVIEW::AST::CaptionNode.new(location: @location) + caption2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Title')) + + node1 = ReVIEW::AST::HeadlineNode.new(location: @location, level: 2, label: 'intro', caption_node: caption1) + node2 = ReVIEW::AST::HeadlineNode.new(location: @location, level: 2, label: 'intro', caption_node: caption2) + + result = @comparator.compare(node1, node2) + assert_true(result.equal?) + end + + def test_compare_headlines_with_different_levels + caption1 = ReVIEW::AST::CaptionNode.new(location: @location) + caption1.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Title')) + + caption2 = ReVIEW::AST::CaptionNode.new(location: @location) + caption2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Title')) + + node1 = ReVIEW::AST::HeadlineNode.new(location: @location, level: 2, label: 'intro', caption_node: caption1) + node2 = ReVIEW::AST::HeadlineNode.new(location: @location, level: 3, label: 'intro', caption_node: caption2) + + result = @comparator.compare(node1, node2) + assert_false(result.equal?) + assert_match(/headline level mismatch/, result.to_s) + end + + def test_compare_nodes_with_children + para1 = ReVIEW::AST::ParagraphNode.new(location: @location) + para1.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Hello')) + para1.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'World')) + + para2 = ReVIEW::AST::ParagraphNode.new(location: @location) + para2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Hello')) + para2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'World')) + + result = @comparator.compare(para1, para2) + assert_true(result.equal?) + end + + def test_compare_nodes_with_different_child_count + para1 = ReVIEW::AST::ParagraphNode.new(location: @location) + para1.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Hello')) + + para2 = ReVIEW::AST::ParagraphNode.new(location: @location) + para2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Hello')) + para2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'World')) + + result = @comparator.compare(para1, para2) + assert_false(result.equal?) + assert_match(/children count mismatch/, result.to_s) + end + + def test_compare_code_blocks_with_lang + code1 = ReVIEW::AST::CodeBlockNode.new(location: @location, id: 'sample', lang: 'ruby') + code2 = ReVIEW::AST::CodeBlockNode.new(location: @location, id: 'sample', lang: 'ruby') + + result = @comparator.compare(code1, code2) + assert_true(result.equal?) + end + + def test_compare_code_blocks_with_different_lang + code1 = ReVIEW::AST::CodeBlockNode.new(location: @location, id: 'sample', lang: 'ruby') + code2 = ReVIEW::AST::CodeBlockNode.new(location: @location, id: 'sample', lang: 'python') + + result = @comparator.compare(code1, code2) + assert_false(result.equal?) + assert_match(/code block lang mismatch/, result.to_s) + end + + def test_compare_inline_nodes + inline1 = ReVIEW::AST::InlineNode.new(location: @location, inline_type: 'b') + inline2 = ReVIEW::AST::InlineNode.new(location: @location, inline_type: 'b') + + result = @comparator.compare(inline1, inline2) + assert_true(result.equal?) + end + + def test_compare_inline_nodes_different_type + inline1 = ReVIEW::AST::InlineNode.new(location: @location, inline_type: 'b') + inline2 = ReVIEW::AST::InlineNode.new(location: @location, inline_type: 'i') + + result = @comparator.compare(inline1, inline2) + assert_false(result.equal?) + assert_match(/inline type mismatch/, result.to_s) + end + + def test_comparison_result_with_path + node1 = ReVIEW::AST::TextNode.new(location: @location, content: 'Hello') + node2 = ReVIEW::AST::TextNode.new(location: @location, content: 'World') + + result = @comparator.compare(node1, node2, 'custom.path') + assert_false(result.equal?) + assert_match(/custom\.path/, result.to_s) + end + + def test_multiple_differences + para1 = ReVIEW::AST::ParagraphNode.new(location: @location) + para1.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Hello')) + para1.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'World')) + + para2 = ReVIEW::AST::ParagraphNode.new(location: @location) + para2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Goodbye')) + para2.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Moon')) + + result = @comparator.compare(para1, para2) + assert_false(result.equal?) + # Should have 2 differences (one for each child) + assert_equal(2, result.differences.size) + end +end diff --git a/test/ast/test_ast_basic.rb b/test/ast/test_ast_basic.rb new file mode 100644 index 000000000..5fba05224 --- /dev/null +++ b/test/ast/test_ast_basic.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/htmlbuilder' +require 'review/compiler' +require 'review/book' +require 'review/book/chapter' + +class TestASTBasic < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_ast_node_creation + node = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + assert_equal [], node.children + assert_nil(node.parent) + assert_equal nil, node.location.filename + assert_equal 0, node.location.lineno + end + + def test_headline_node + location = ReVIEW::SnapshotLocation.new(nil, 0) + node = ReVIEW::AST::HeadlineNode.new( + location: location, + level: 1, + label: 'test-label', + caption_node: CaptionParserHelper.parse('Test Headline', location: location) + ) + + hash = node.to_h + assert_equal 'HeadlineNode', hash[:type] + assert_equal 1, hash[:level] + assert_equal 'test-label', hash[:label] + expected_location = { filename: nil, lineno: 0 } + assert_equal({ children: [{ content: 'Test Headline', location: expected_location, type: 'TextNode' }], location: expected_location, type: 'CaptionNode' }, hash[:caption_node]) + assert_equal 'Test Headline', node.caption_text + end + + def test_paragraph_node + node = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + text_node = ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'This is a test paragraph.') + node.add_child(text_node) + + hash = node.to_h + assert_equal 'ParagraphNode', hash[:type] + # Check that the text content is in the children + assert_equal 1, hash[:children].size + assert_equal 'This is a test paragraph.', hash[:children][0][:content] + end + + def test_ast_compilation_basic + chapter_content = <<~EOB + = Test Chapter + + This is a test paragraph. + + == Section 1 + + Another paragraph here. + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = chapter_content + + compiler = ReVIEW::AST::Compiler.new + ast_result = compiler.compile_to_ast(chapter) + + assert_not_nil(ast_result) + assert_equal ReVIEW::AST::DocumentNode, ast_result.class + assert ast_result.children.any? + + options = ReVIEW::AST::JSONSerializer::Options.new(pretty: true) + json_result = ReVIEW::AST::JSONSerializer.serialize(ast_result, options) + + parsed = JSON.parse(json_result) + assert parsed.is_a?(Hash) + assert_equal 'DocumentNode', parsed['type'] + assert parsed.key?('children') + end + + def test_json_output_format + location = ReVIEW::SnapshotLocation.new(nil, 0) + node = ReVIEW::AST::DocumentNode.new(location: location) + child_node = ReVIEW::AST::HeadlineNode.new( + location: location, + level: 1, + caption_node: CaptionParserHelper.parse('Test', location: location) + ) + + node.add_child(child_node) + + json_str = node.to_json + parsed = JSON.parse(json_str) + + assert_equal 'DocumentNode', parsed['type'] + assert_equal 1, parsed['children'].size + assert_equal 'HeadlineNode', parsed['children'][0]['type'] + assert_equal 1, parsed['children'][0]['level'] + expected_location = { 'filename' => nil, 'lineno' => 0 } + assert_equal({ 'children' => [{ 'content' => 'Test', 'location' => expected_location, 'type' => 'TextNode' }], 'location' => expected_location, 'type' => 'CaptionNode' }, parsed['children'][0]['caption_node']) + end +end diff --git a/test/ast/test_ast_bidirectional_conversion.rb b/test/ast/test_ast_bidirectional_conversion.rb new file mode 100644 index 000000000..07a2c80bd --- /dev/null +++ b/test/ast/test_ast_bidirectional_conversion.rb @@ -0,0 +1,383 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/ast/diff/node' +require 'review/ast/json_serializer' +require 'review/ast/review_generator' +require 'review/book' +require 'review/book/chapter' +require 'json' +require 'stringio' + +class TestASTBidirectionalConversion < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + @generator = ReVIEW::AST::ReVIEWGenerator.new + end + + def test_simple_round_trip_conversion + # Test if caption serialization is now fixed with CaptionNode.to_h method + + content = <<~EOB + = Test Chapter + + This is a simple paragraph. + EOB + + # Step 1: Re:VIEW -> AST + ast_root = compile_to_ast(content) + assert_not_nil(ast_root) + + # Step 2: AST -> JSON + json_string = ReVIEW::AST::JSONSerializer.serialize(ast_root) + assert_not_nil(json_string) + parsed_json = JSON.parse(json_string) + assert_equal 'DocumentNode', parsed_json['type'] + + # Step 3: JSON -> AST + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + assert_not_nil(regenerated_ast) + assert_equal 'ReVIEW::AST::DocumentNode', regenerated_ast.class.name + + # Step 4: AST -> Re:VIEW + regenerated_content = @generator.generate(regenerated_ast) + assert_not_nil(regenerated_content) + + # Verify basic structure is preserved + assert_match(/= Test Chapter/, regenerated_content) + assert_match(/This is a simple paragraph/, regenerated_content) + end + + def test_inline_elements_round_trip + # Caption serialization is now fixed + + content = <<~EOB + = Inline Test + + This has @<b>{bold} and @<i>{italic} text. + EOB + + original_ast = compile_to_ast(content) + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + regenerated_content = @generator.generate(regenerated_ast) + + # Check that inline elements are preserved + assert_match(/@<b>\{bold\}/, regenerated_content) + assert_match(/@<i>\{italic\}/, regenerated_content) + end + + def test_list_round_trip + content = <<~EOB + = List Test + + * Item 1 + * Item 2 + ** Item 2-1 + ** Item 2-2 + * Item 3 + ** Item 3-1 + ** Item 3-2 + EOB + + original_ast = compile_to_ast(content) + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + regenerated_content = @generator.generate(regenerated_ast) + + # Check that list structure is preserved with nested items + assert_match(/\* Item 1/, regenerated_content) + assert_match(/\* Item 2/, regenerated_content) + assert_match(/\* Item 3/, regenerated_content) + + # For nested items, they might be rendered differently (e.g., as part of parent item) + # So let's check they exist in the content somehow + assert_match(/Item 2-1/, regenerated_content) + assert_match(/Item 2-2/, regenerated_content) + assert_match(/Item 3-1/, regenerated_content) + assert_match(/Item 3-2/, regenerated_content) + + # Verify that list items appear in order + _lines = regenerated_content.split("\n") + + # Find all occurrences of items in the content + item1_index = regenerated_content.index('Item 1') + item2_index = regenerated_content.index('Item 2') + item2_1_index = regenerated_content.index('Item 2-1') + item2_2_index = regenerated_content.index('Item 2-2') + item3_index = regenerated_content.index('Item 3') + item3_1_index = regenerated_content.index('Item 3-1') + item3_2_index = regenerated_content.index('Item 3-2') + + # Check all items were found + assert_not_nil(item1_index, 'Item 1 not found') + assert_not_nil(item2_index, 'Item 2 not found') + assert_not_nil(item2_1_index, 'Item 2-1 not found') + assert_not_nil(item2_2_index, 'Item 2-2 not found') + assert_not_nil(item3_index, 'Item 3 not found') + assert_not_nil(item3_1_index, 'Item 3-1 not found') + assert_not_nil(item3_2_index, 'Item 3-2 not found') + + # Check correct ordering + assert item1_index < item2_index, 'Item 1 should come before Item 2' + assert item2_index < item2_1_index, 'Item 2 should come before Item 2-1' + assert item2_1_index < item2_2_index, 'Item 2-1 should come before Item 2-2' + assert item2_2_index < item3_index, 'Item 2-2 should come before Item 3' + assert item3_index < item3_1_index, 'Item 3 should come before Item 3-1' + assert item3_1_index < item3_2_index, 'Item 3-1 should come before Item 3-2' + end + + def test_code_block_round_trip + content = <<~EOB + = Code Test + + //list[sample][Sample @<b>{Code}][ruby]{ + puts "Hello" + def greet + puts "Hi" + end + //} + EOB + + original_ast = compile_to_ast(content) + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + regenerated_content = @generator.generate(regenerated_ast) + + # Check that code block structure is preserved + assert_match(%r{//list\[sample\]\[Sample @<b>\{Code\}\]}, regenerated_content) + assert_match(/puts "Hello"/, regenerated_content) + assert_match(/def greet/, regenerated_content) + end + + def test_table_round_trip + content = <<~EOB + = Table Test + + //table[table1][Sample Table]{ + Name Age + ------------ + Alice 25 + Bob 30 + //} + EOB + + original_ast = compile_to_ast(content) + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + regenerated_content = @generator.generate(regenerated_ast) + + # Check that table structure is preserved (caption will be JSON but table content should work) + assert_match(%r{//table\[table1\]}, regenerated_content) # ID should be preserved + assert_match(/Name\s+Age/, regenerated_content) + assert_match(/Alice\s+25/, regenerated_content) + assert_match(/Bob\s+30/, regenerated_content) + + # Verify table structure + assert_match(/------------/, regenerated_content) # Table separator + assert_match(%r{//\}}, regenerated_content) # Table end + end + + def test_image_round_trip + content = <<~EOB + = Image Test + + //image[sample_image][Sample Image @<b>{Caption}]{ + //} + EOB + + original_ast = compile_to_ast(content) + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + regenerated_content = @generator.generate(regenerated_ast) + + # Check that image structure is preserved + assert_match(%r{//image\[sample_image\]}, regenerated_content) # ID should be preserved + assert_match(/Sample Image/, regenerated_content) # Caption content should be preserved + assert_match(/@<b>\{Caption\}/, regenerated_content) # Inline elements in caption should be preserved + end + + def test_image_with_options_round_trip + content = <<~EOB + = Image with Options Test + + //image[scaled_image][Scaled Image][scale=0.5]{ + //} + EOB + + original_ast = compile_to_ast(content) + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + regenerated_content = @generator.generate(regenerated_ast) + + # Check that image with options structure is preserved + assert_match(%r{//image\[scaled_image\]}, regenerated_content) # ID should be preserved + assert_match(/Scaled Image/, regenerated_content) # Caption should be preserved + assert_match(/scale=0\.5/, regenerated_content) # Options should be preserved + end + + def test_complex_structure_round_trip + content = <<~EOB + = Complex Test + + This is a paragraph with @<b>{bold} text. + + 1. First item + 2. Second item with @<i>{italic} + + //list[code1][Code Example]{ + puts "Hello" + //} + + //table[data][Data Table]{ + Key Value + ---- + A 1 + //} + EOB + + original_ast = compile_to_ast(content) + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + regenerated_content = @generator.generate(regenerated_ast) + + # Verify multiple elements are preserved (skip headline check due to caption issue) + assert_match(/@<b>\{bold\}/, regenerated_content) + assert_match(/1\. First item/, regenerated_content) + assert_match(/@<i>\{italic\}/, regenerated_content) + assert_match(%r{//list\[code1\]}, regenerated_content) # Code block ID preserved + assert_match(%r{//table\[data\]}, regenerated_content) # Table ID preserved + assert_match(/puts "Hello"/, regenerated_content) # Code content preserved + assert_match(/Key\s+Value/, regenerated_content) # Table content preserved + end + + def test_json_structure_consistency + content = <<~EOB + = Structure Test + + Simple paragraph. + EOB + + # Test with default serialization options + original_ast = compile_to_ast(content) + + # Serialize and deserialize + options = ReVIEW::AST::JSONSerializer::Options.new + json = ReVIEW::AST::JSONSerializer.serialize(original_ast, options) + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json) + regenerated_content = @generator.generate(regenerated_ast) + + # Should produce similar Re:VIEW output + assert_match(/= Structure Test/, regenerated_content) + assert_match(/Simple paragraph/, regenerated_content) + end + + def test_basic_ast_serialization_works + # This test verifies that basic AST creation and JSON serialization works + content = 'Simple text paragraph.' + + original_ast = compile_to_ast(content) + assert_not_nil(original_ast) + assert_equal 'ReVIEW::AST::DocumentNode', original_ast.class.name + + # Test JSON serialization + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + assert_not_nil(json_string) + parsed = JSON.parse(json_string) + assert_equal 'DocumentNode', parsed['type'] + + # Test JSON deserialization + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + assert_not_nil(regenerated_ast) + assert_equal 'ReVIEW::AST::DocumentNode', regenerated_ast.class.name + end + + # Test all .re files in samples/syntax-book and samples/debug-book directories + # The test verifies AST-level equivalence through roundtrip conversion: + # Original.re -> AST1 -> JSON -> AST2 -> Re:VIEW -> AST3 + # AST1 and AST3 should be structurally equivalent + def test_sample_files_roundtrip + sample_files = [ + 'samples/syntax-book/appA.re', + 'samples/syntax-book/bib.re', + 'samples/syntax-book/ch01.re', + 'samples/syntax-book/ch02.re', + 'samples/syntax-book/ch03.re', + 'samples/syntax-book/part2.re', + 'samples/syntax-book/pre01.re', + 'samples/debug-book/advanced_features.re', + 'samples/debug-book/comprehensive.re', + 'samples/debug-book/edge_cases_test.re', + 'samples/debug-book/extreme_features.re', + 'samples/debug-book/multicontent_test.re' + ] + + sample_files.each do |file_path| + next unless File.exist?(file_path) + + # Step 1: Re:VIEW -> AST1 + original_ast = compile_from_file(file_path) + assert_not_nil(original_ast, "Failed to compile #{file_path}") + + # Step 2: AST1 -> JSON + json_string = ReVIEW::AST::JSONSerializer.serialize(original_ast) + assert_not_nil(json_string, "Failed to serialize #{file_path}") + + # Step 3: JSON -> AST2 + regenerated_ast = ReVIEW::AST::JSONSerializer.deserialize(json_string) + assert_not_nil(regenerated_ast, "Failed to deserialize #{file_path}") + + # Step 4: AST2 -> Re:VIEW + regenerated_content = @generator.generate(regenerated_ast) + assert_not_nil(regenerated_content, "Failed to generate Re:VIEW from #{file_path}") + + # Step 5: Re:VIEW -> AST3 + begin + basename = File.basename(file_path, '.re') + reparsed_ast = compile_to_ast(regenerated_content, basename, file_path) + assert_not_nil(reparsed_ast, "Failed to reparse regenerated content from #{file_path}") + + # Step 6: Compare AST1 and AST3 for structural equivalence + assert_ast_equivalent(original_ast, reparsed_ast, "AST mismatch for #{file_path}") + rescue StandardError => e + flunk("Roundtrip failed for #{file_path}: #{e.message}") + end + end + end + + private + + def compile_to_ast(content, basename = 'test', file_path = 'test.re') + compiler = ReVIEW::AST::Compiler.new + chapter = ReVIEW::Book::Chapter.new(@book, 1, basename, file_path, StringIO.new(content)) + + # Skip reference resolution to avoid chapter lookup errors in isolated tests + compiler.compile_to_ast(chapter, reference_resolution: false) + end + + def compile_from_file(file_path) + content = File.read(file_path) + basename = File.basename(file_path, '.re') + compiler = ReVIEW::AST::Compiler.new + chapter = ReVIEW::Book::Chapter.new(@book, 1, basename, file_path, StringIO.new(content)) + + # Skip reference resolution to avoid chapter lookup errors in isolated tests + compiler.compile_to_ast(chapter, reference_resolution: false) + end + + # Compare two AST nodes for structural equivalence + # Ignores location information and focuses on node types, attributes, and structure + def assert_ast_equivalent(node1, node2, message = 'AST nodes are not equivalent') + comparator = ReVIEW::AST::Diff::Node.new + result = comparator.compare(node1, node2) + assert(result.equal?, "#{message}\n#{result}") + end +end diff --git a/test/ast/test_ast_code_block_node.rb b/test/ast/test_ast_code_block_node.rb new file mode 100644 index 000000000..9535b3261 --- /dev/null +++ b/test/ast/test_ast_code_block_node.rb @@ -0,0 +1,252 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/snapshot_location' +require 'review/ast/code_block_node' +require 'review/ast/paragraph_node' +require 'review/ast/text_node' +require 'review/ast/inline_node' +require 'review/ast/json_serializer' +require 'review/ast/compiler' +require 'review/ast/review_generator' +require 'review/configure' +require 'review/book' +require 'review/i18n' +require 'stringio' + +class TestASTCodeBlockNode < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @location = create_test_location + ReVIEW::I18n.setup(@config['language']) + end + + def create_test_location + ReVIEW::SnapshotLocation.new('test.re', 5) + end + + def test_code_block_node_original_text_preservation + lines = ['puts @<b>{hello}', 'puts "world"'] + original_text = lines.join("\n") + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + id: 'sample', + original_text: original_text + ) + + assert_equal original_text, code_block.original_text + assert_equal lines, code_block.original_lines + end + + def test_code_block_node_processed_lines + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + id: 'sample', + original_text: 'puts @<b>{hello}' + ) + + processed = code_block.processed_lines + assert_equal 0, processed.size + + original = code_block.original_lines + assert_equal 1, original.size + assert_equal 'puts @<b>{hello}', original[0] + end + + def test_original_lines_and_processed_lines + original_text = 'puts @<b>{hello}' + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: original_text + ) + + assert_equal ['puts @<b>{hello}'], code_block.original_lines + + processed = code_block.processed_lines + assert_equal 0, processed.size + end + + def test_ast_node_to_review_syntax + generator = ReVIEW::AST::ReVIEWGenerator.new + + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'hello world') + assert_equal 'hello world', generator.generate(text_node) + + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b, args: ['bold text']) + assert_equal '@<b>{bold text}', generator.generate(inline_node) + end + + def test_code_block_with_ast_compiler_integration + source = <<~EOS + //list[sample][Sample Code]{ + puts @<b>{hello} + puts "world" + //} + EOS + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(source)) + + compiler = ReVIEW::AST::Compiler.new + ast_root = compiler.compile_to_ast(chapter) + + code_block = find_code_block_in_ast(ast_root) + assert_not_nil(code_block) + assert_instance_of(ReVIEW::AST::CodeBlockNode, code_block) + + assert_include(code_block.original_text, 'puts @<b>{hello}') + assert_include(code_block.original_text, 'puts "world"') + + original_lines = code_block.original_lines + assert_equal 2, original_lines.size + assert_equal 'puts @<b>{hello}', original_lines[0] + assert_equal 'puts "world"', original_lines[1] + end + + def test_render_ast_node_as_plain_text_with_text_node + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'hello world') + + result = render_ast_node_as_plain_text_helper(text_node) + assert_equal 'hello world', result + end + + def test_render_ast_node_as_plain_text_with_inline_node + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'bold text') + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(text_node) + + result = render_ast_node_as_plain_text_helper(inline_node) + assert_equal '@<b>{bold text}', result + end + + def test_render_ast_node_as_plain_text_with_paragraph_containing_inline + paragraph = create_test_paragraph + + result = render_ast_node_as_plain_text_helper(paragraph) + assert_equal 'puts @<b>{hello}', result + end + + def test_render_ast_node_as_plain_text_with_complex_inline + bold_text = ReVIEW::AST::TextNode.new(location: @location, content: 'bold') + bold_inline = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + bold_inline.add_child(bold_text) + + italic_text1 = ReVIEW::AST::TextNode.new(location: @location, content: 'italic ') + italic_inline = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :i) + italic_inline.add_child(italic_text1) + italic_inline.add_child(bold_inline) + + paragraph = ReVIEW::AST::ParagraphNode.new(location: @location) + paragraph.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'This is ')) + paragraph.add_child(italic_inline) + paragraph.add_child(ReVIEW::AST::TextNode.new(location: @location, content: ' text')) + + result = render_ast_node_as_plain_text_helper(paragraph) + assert_equal 'This is @<i>{italic @<b>{bold}} text', result + end + + def test_code_block_node_inheritance_from_base_node + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: 'test content' + ) + + assert_respond_to(code_block, :original_text) + assert_equal 'test content', code_block.original_text + + assert_respond_to(code_block, :location) + assert_respond_to(code_block, :children) + assert_equal @location, code_block.location + end + + def test_original_text_preservation + code_block1 = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: 'original content' + ) + assert_equal 'original content', code_block1.original_text + assert_equal ['original content'], code_block1.original_lines + + code_block2 = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: nil + ) + assert_nil(code_block2.original_text) + assert_equal [], code_block2.original_lines + end + + def test_serialize_properties_includes_original_text + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Test Caption')) + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + id: 'test', + caption_node: caption_node, + original_text: 'puts hello' + ) + + hash = {} + options = ReVIEW::AST::JSONSerializer::Options.new + + assert_nothing_raised do + code_block.send(:serialize_properties, hash, options) + end + + assert_equal 'test', hash[:id] + assert_instance_of(Hash, hash[:caption_node]) + assert_equal 'CaptionNode', hash[:caption_node][:type] + assert_equal 1, hash[:caption_node][:children].size + assert_equal 'TextNode', hash[:caption_node][:children][0][:type] + assert_equal 'Test Caption', hash[:caption_node][:children][0][:content] + end + + private + + def create_test_paragraph + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'hello') + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(text_node) + + paragraph = ReVIEW::AST::ParagraphNode.new(location: @location) + paragraph.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'puts ')) + paragraph.add_child(inline_node) + + paragraph + end + + def find_code_block_in_ast(node) + return node if node.is_a?(ReVIEW::AST::CodeBlockNode) + + if node.children + node.children.each do |child| + result = find_code_block_in_ast(child) + return result if result + end + end + + nil + end + + # Helper method to render AST node as plain text (replacement for deleted method) + def render_ast_node_as_plain_text_helper(node) + case node + when ReVIEW::AST::TextNode + node.content + when ReVIEW::AST::InlineNode + content = node.children.map { |child| render_ast_node_as_plain_text_helper(child) }.join + "@<#{node.inline_type}>{#{content}}" + when ReVIEW::AST::ParagraphNode + node.children.map { |child| render_ast_node_as_plain_text_helper(child) }.join + else + if node.respond_to?(:children) + node.children.map { |child| render_ast_node_as_plain_text_helper(child) }.join + else + '' + end + end + end +end diff --git a/test/ast/test_ast_complex_integration.rb b/test/ast/test_ast_complex_integration.rb new file mode 100644 index 000000000..a85d618d1 --- /dev/null +++ b/test/ast/test_ast_complex_integration.rb @@ -0,0 +1,333 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/renderer/html_renderer' +require 'review/renderer/latex_renderer' +require 'review/configure' +require 'review/book' +require 'review/book/chapter' +require 'review/ast/json_serializer' +require 'json' + +class TestASTComplexIntegration < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 3 + @config['language'] = 'ja' + + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_nested_structures_with_inline_elements + content = <<~EOB + = Complex Document Structure + + == Section with Lists and Code + + This paragraph has @<b>{bold} and @<i>{italic} text. + + === Nested Lists Test + + * First level item with @<code>{inline code} + * Second level with @<ruby>{漢字,かんじ} text + * Third level item + * Back to first level with @<href>{http://example.com,link} + + 1. Ordered list item with @<kw>{HTTP,Protocol} + 1. Nested ordered item + 1. Another nested item with @<tt>{typewriter} + 2. Second ordered item + + === Code Blocks with Complex Content + + //list[complex-code][Complex Code Example][ruby]{ + def process_data(input) + # Process with @<b>{bold} annotation + result = input.map { |item| transform(item) } + logger.info("Processed @<fn>{processing-note} items") + result + end + //} + + === Tables with Inline Elements + + //table[data-table][Sample Data]{ + Name Description Status + ---------------- + @<b>{Primary} Main data source @<i>{Active} + @<code>{Secondary} Backup source @<tt>{Standby} + //} + + == Multiple Block Types + + === Note Blocks + + //note[important-note][Important Notice]{ + This note contains @<b>{important} information with @<code>{code examples}. + + * Nested list in note + * Another item with @<href>{http://docs.example.com,documentation} + //} + + === Embedded Blocks + + //embed[latex]{ + \\begin{equation} + E = mc^2 \\quad \\text{with @<i>{emphasis}} + \\end{equation} + //} + + === Column Blocks + + //column[side-info][Side Information]{ + This column has @<ruby>{専門,せんもん} terminology and @<kw>{API,Application Programming Interface}. + + //list[column-code][Code in Column][javascript]{ + const data = await fetch('/api/data'); + console.log("Fetched @<fn>{data-note} records"); + //} + //} + + == Cross-References and Footnotes + + See @<list>{complex-code} for implementation details. + Refer to @<table>{data-table} for data structure. + + This text has footnotes@<fn>{footnote1} and more references@<fn>{footnote2}. + + //footnote[footnote1][First footnote with @<b>{formatting}] + //footnote[footnote2][Second footnote with @<code>{code}] + //footnote[processing-note][Processing footnote] + //footnote[data-note][Data note] + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'complex', 'complex.re', StringIO.new(content)) + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + assert_not_nil(ast_root, 'AST root should be created') + assert_equal('DocumentNode', ast_root.class.name.split('::').last) + + node_counts = count_node_types(ast_root) + + assert(node_counts['HeadlineNode'] >= 4, "Should have multiple headlines, got #{node_counts['HeadlineNode']}") + assert(node_counts['ParagraphNode'] >= 4, "Should have multiple paragraphs, got #{node_counts['ParagraphNode']}") + assert(node_counts['CodeBlockNode'] >= 2, "Should have multiple code blocks, got #{node_counts['CodeBlockNode']}") + assert(node_counts['TableNode'] >= 1, "Should have tables, got #{node_counts['TableNode']}") + assert(node_counts['InlineNode'] >= 10, "Should have many inline elements, got #{node_counts['InlineNode']}") + assert(node_counts['ListNode'] >= 1, "Should have lists, got #{node_counts['ListNode']}") + + html_renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = html_renderer.render(ast_root) + + assert(html_result.include?('<h1>'), 'Should have h1 tags') + assert(html_result.include?('<h2>'), 'Should have h2 tags') + assert(html_result.include?('<h3>'), 'Should have h3 tags') + assert(html_result.include?('<b>'), 'Should have bold tags') + assert(html_result.include?('<i>'), 'Should have italic tags') + assert(html_result.include?('<code'), 'Should have code tags') + assert(html_result.include?('<ul>'), 'Should have unordered lists') + assert(html_result.include?('<ol>'), 'Should have ordered lists') + assert(html_result.include?('<table>'), 'Should have tables') + assert(html_result.include?('<ruby>'), 'Should have ruby tags') + + latex_renderer = ReVIEW::Renderer::LatexRenderer.new(chapter) + latex_result = latex_renderer.render(ast_root) + + assert(latex_result.include?('\\chapter'), 'Should have chapter commands') + assert(latex_result.include?('\\section'), 'Should have section commands') + assert(latex_result.include?('\\subsection'), 'Should have subsection commands') + assert(latex_result.include?('\\textbf') || latex_result.include?('\\reviewbold'), 'Should have bold commands') + assert(latex_result.include?('\\textit') || latex_result.include?('\\reviewit'), 'Should have italic commands') + assert(latex_result.include?('\\begin{itemize}'), 'Should have itemize environments') + assert(latex_result.include?('\\begin{enumerate}'), 'Should have enumerate environments') + assert(latex_result.include?('\\begin{table}'), 'Should have table environments') + + inline_nodes = collect_inline_nodes(ast_root) + list_refs = inline_nodes.select { |node| node.inline_type == :list } + table_refs = inline_nodes.select { |node| node.inline_type == :table } + footnote_refs = inline_nodes.select { |node| node.inline_type == :fn } + + assert(list_refs.size >= 1, 'Should have list references') + assert(table_refs.size >= 1, 'Should have table references') + assert(footnote_refs.size >= 2, 'Should have footnote references') + end + + def test_performance_with_large_complex_document + content = generate_large_complex_document(50) + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'large', 'large.re', StringIO.new) + chapter.content = content + + start_time = Time.now + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + ast_time = Time.now - start_time + + start_time = Time.now + html_renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = html_renderer.render(ast_root) + html_time = Time.now - start_time + + start_time = Time.now + latex_renderer = ReVIEW::Renderer::LatexRenderer.new(chapter) + latex_result = latex_renderer.render(ast_root) + latex_time = Time.now - start_time + + # Performance assertions (these are reasonable limits for CI) + assert(ast_time < 5.0, "AST compilation should be under 5 seconds, took #{ast_time}") + assert(html_time < 3.0, "HTML rendering should be under 3 seconds, took #{html_time}") + assert(latex_time < 3.0, "LaTeX rendering should be under 3 seconds, took #{latex_time}") + + assert(html_result.length > 10000, 'HTML output should be substantial') + assert(latex_result.length > 10000, 'LaTeX output should be substantial') + assert(html_result.include?('<h2>'), 'HTML should contain section headers') + assert(latex_result.include?('\\section'), 'LaTeX should contain section commands') + end + + def test_error_handling_with_malformed_content + malformed_content = <<~EOB + = Test Document + + This has unclosed @<b>{bold text + + //list[broken-list][Broken Code]{ + def broken_function + # Missing closing brace + //} + + === Missing Table End + + //table[broken-table][Test]{ + Header1 Header2 + ----------- + Data1 Data2 + # Missing //} + + Regular paragraph continues here. + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'malformed', 'malformed.re', StringIO.new) + chapter.content = malformed_content + + ast_compiler = ReVIEW::AST::Compiler.new + + assert_raises(ReVIEW::AST::InlineTokenizeError) do + ast_compiler.compile_to_ast(chapter) + end + end + + def test_memory_usage_with_deep_nesting + content = generate_deeply_nested_document(10) + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'nested', 'nested.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + max_depth = calculate_max_depth(ast_root) + assert(max_depth >= 5, "Should handle deep nesting, max depth: #{max_depth}") + + html_renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = html_renderer.render(ast_root) + + nested_ul_count = html_result.scan(/<ul[^>]*>/).length + assert(nested_ul_count >= 1, "Should have nested lists, found #{nested_ul_count}") + end + + private + + def count_node_types(node, counts = Hash.new(0)) + counts[node.class.name.split('::').last] += 1 + + if node.children + node.children.each { |child| count_node_types(child, counts) } + end + + counts + end + + def collect_inline_nodes(node, inline_nodes = []) + if node.class.name.include?('InlineNode') + inline_nodes << node + end + + if node.children + node.children.each { |child| collect_inline_nodes(child, inline_nodes) } + end + + inline_nodes + end + + def generate_large_complex_document(section_count) + content = "= Large Complex Document\n\n" + + (1..section_count).each do |i| + content += <<~SECTION + == Section #{i} + + This is section #{i} with @<b>{bold} and @<i>{italic} text. + It also contains @<code>{code_#{i}} and @<ruby>{漢字#{i},かんじ#{i}}. + + === Subsection #{i}.1 + + * List item #{i}.1 with @<href>{http://example#{i}.com,link#{i}} + * List item #{i}.2 with @<kw>{Term#{i},Description#{i}} + * Nested item #{i}.2.1 + * Nested item #{i}.2.2 + + //list[code-#{i}][Code Example #{i}][ruby]{ + def method_#{i}(param) + # Processing with @<b>{annotation #{i}} + result = process(param) + puts "Result @<fn>{note-#{i}}: \#{result}" + end + //} + + //footnote[note-#{i}][Footnote #{i} with @<code>{code reference}] + + SECTION + end + + content + end + + def generate_deeply_nested_document(max_depth) + content = "= Deeply Nested Document\n\n" + + content += " * Level 1 item with @<b>{bold 1} text\n" + (2..max_depth).each do |level| + indent = ' ' * level + content += "#{indent}* Level #{level} item with @<code>{code_#{level}}\n" + end + + content += "\n== Section with Complex Nesting\n\n" + + (1..5).each do |level| + indent = ' ' * level + content += "#{indent}: Term #{level} with @<i>{italic #{level}}\n" + content += "#{indent} Definition #{level} with @<ruby>{漢字#{level},かんじ#{level}}\n" + end + + content + end + + def calculate_max_depth(node, current_depth = 0) + max_depth = current_depth + + if node.children + node.children.each do |child| + child_depth = calculate_max_depth(child, current_depth + 1) + max_depth = [max_depth, child_depth].max + end + end + + max_depth + end +end diff --git a/test/ast/test_ast_comprehensive.rb b/test/ast/test_ast_comprehensive.rb new file mode 100644 index 000000000..11a451fc8 --- /dev/null +++ b/test/ast/test_ast_comprehensive.rb @@ -0,0 +1,263 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/renderer/html_renderer' +require 'review/configure' +require 'review/book' +require 'review/book/chapter' + +class TestASTComprehensive < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_code_blocks_ast_processing + content = <<~EOB + = Code Examples + + Normal list with ID: + + //list[sample][Sample Code][ruby]{ + puts "Hello, World!" + def greeting + "Hello" + end + //} + + Embedded list without ID: + + //emlist[Ruby Example][ruby]{ + puts "Embedded example" + //} + + Numbered list: + + //listnum[numbered][Numbered Example][python]{ + print("Hello") + print("World") + //} + + Command example: + + //cmd[Terminal Commands]{ + ls -la + cd /home + //} + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + code_blocks = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::CodeBlockNode) } + assert_equal 4, code_blocks.size + + list_block = code_blocks.find { |n| n.id == 'sample' } + assert_not_nil(list_block) + assert_equal 'Sample Code', list_block.caption_text + assert_equal 'ruby', list_block.lang + assert_equal false, list_block.line_numbers + + emlist_block = code_blocks.find { |n| n.caption_text == 'Ruby Example' && n.id.nil? } + assert_not_nil(emlist_block) + assert_equal 'ruby', emlist_block.lang + + listnum_block = code_blocks.find { |n| n.id == 'numbered' } + assert_not_nil(listnum_block) + assert_equal true, listnum_block.line_numbers + + cmd_block = code_blocks.find { |n| n.lang == 'shell' } + assert_not_nil(cmd_block) + assert_equal 'Terminal Commands', cmd_block.caption_text + end + + def test_table_ast_processing + content = <<~EOB + = Tables + + //table[envvars][Environment Variables]{ + Name Meaning + ------------ + PATH Command directories + HOME User home directory + LANG Default locale + //} + + //emtable[Simple Table]{ + Col1 Col2 + A B + C D + //} + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + table_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::TableNode) } + assert_equal 2, table_nodes.size + + main_table = table_nodes.find { |n| n.id == 'envvars' } + assert_not_nil(main_table) + assert_equal 'Environment Variables', main_table.caption_text + assert_equal 1, main_table.header_rows.size + assert_equal 3, main_table.body_rows.size + end + + def test_image_ast_processing + content = <<~EOB + = Images + + //image[diagram][System Diagram][scale=0.5]{ + ASCII art or description here + //} + + //indepimage[logo][Company Logo] + + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + image_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ImageNode) } + assert_equal 2, image_nodes.size + + main_image = image_nodes.find { |n| n.id == 'diagram' } + assert_not_nil(main_image) + assert_equal 'System Diagram', main_image.caption_text + assert_equal 'scale=0.5', main_image.metric + + indep_image = image_nodes.find { |n| n.id == 'logo' } + assert_not_nil(indep_image) + assert_equal 'Company Logo', indep_image.caption_text + end + + def test_special_inline_elements_ast_processing + content = <<~EOB + = Special Inline Elements + + This paragraph contains @<ruby>{漢字,かんじ} with ruby annotation. + + Visit @<href>{https://example.com, Example Site} for more information. + + The @<kw>{HTTP, HyperText Transfer Protocol} is a protocol. + + Simple @<b>{bold} and @<code>{code} elements. + + Unicode character: @<uchar>{2603} (snowman). + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + + ruby_para = paragraph_nodes[0] + ruby_node = ruby_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :ruby } + assert_not_nil(ruby_node) + assert_equal ['漢字', 'かんじ'], ruby_node.args + + href_para = paragraph_nodes[1] + href_node = href_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :href } + assert_not_nil(href_node) + assert_equal ['https://example.com', 'Example Site'], href_node.args + + kw_para = paragraph_nodes[2] + kw_node = kw_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :kw } + assert_not_nil(kw_node) + assert_equal ['HTTP', 'HyperText Transfer Protocol'], kw_node.args + + simple_para = paragraph_nodes[3] + bold_node = simple_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :b } + code_node = simple_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :code } + assert_not_nil(bold_node) + assert_not_nil(code_node) + + uchar_para = paragraph_nodes[4] + uchar_node = uchar_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :uchar } + assert_not_nil(uchar_node) + assert_equal ['2603'], uchar_node.args + end + + def test_comprehensive_output_compatibility + content = <<~EOB + = Comprehensive Test + + Intro with @<b>{bold} text. + + * List item with @<code>{code} + * Another item + + //list[example][Code Example]{ + puts "Hello" + //} + + //table[data][Data Table]{ + Name Value + ------------ + A 1 + B 2 + //} + + Text with @<ruby>{日本語,にほんご} and @<href>{http://example.com}. + + 1. Numbered item + 2. Another numbered item + + //quote{ + This is a quote with @<i>{italic} text. + //} + + Final paragraph. + EOB + + chapter_ast = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter_ast.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter_ast) + + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter_ast) + result_ast = renderer.render(ast_root) + + ['<h1>', '<ul>', '<ol>', '<table>', '<blockquote>'].each do |tag| + assert(result_ast.include?(tag), "AST/Renderer system should produce #{tag}") + end + + ['<b>', '<code', '<i>'].each do |tag| + assert(result_ast.include?(tag), "AST/Renderer system should produce #{tag}") + end + + assert_not_nil(ast_root, 'Should have AST root') + assert_equal(ReVIEW::AST::DocumentNode, ast_root.class) + + headline_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::HeadlineNode) } + assert_equal(1, headline_nodes.size, 'Should have one headline') + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert(paragraph_nodes.size >= 3, 'Should have multiple paragraphs') + + list_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_equal(2, list_nodes.size, 'Should have unordered and ordered lists') + + code_block_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::CodeBlockNode) } + assert_equal(1, code_block_nodes.size, 'Should have one code block') + + table_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::TableNode) } + assert_equal(1, table_nodes.size, 'Should have one table') + end +end diff --git a/test/ast/test_ast_comprehensive_inline.rb b/test/ast/test_ast_comprehensive_inline.rb new file mode 100644 index 000000000..1f9e5130a --- /dev/null +++ b/test/ast/test_ast_comprehensive_inline.rb @@ -0,0 +1,352 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/renderer/html_renderer' +require 'review/configure' +require 'review/book' +require 'review/book/chapter' + +class TestASTComprehensiveInline < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @config['dictionary'] = { + 'glossary' => 'glossary', + 'abbreviations' => 'abbreviations' + } + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_advanced_inline_elements_ast_processing + content = <<~EOB + = Advanced Inline Elements + + This paragraph tests @<b>{bold} text and @<i>{italic} text. + + Basic formatting: @<code>{code} and @<tt>{typewriter}. + + Ruby text: @<ruby>{漢字,かんじ} and @<kw>{HTTP,Protocol}. + + Links: @<href>{http://example.com,example} text. + + Simple inline elements without references. + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = renderer.render(ast_root) + + assert(html_result.include?('bold'), 'HTML should include bold content') + assert(html_result.include?('italic'), 'HTML should include italic content') + assert(html_result.include?('code'), 'HTML should include code content') + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + + first_para = paragraph_nodes[0] + b_node = first_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :b } + i_node = first_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :i } + assert_not_nil(b_node) + assert_equal ['bold'], b_node.args + assert_not_nil(i_node) + assert_equal ['italic'], i_node.args + + second_para = paragraph_nodes[1] + code_node = second_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :code } + tt_node = second_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :tt } + assert_not_nil(code_node) + assert_not_nil(tt_node) + assert_equal ['code'], code_node.args + assert_equal ['typewriter'], tt_node.args + + third_para = paragraph_nodes[2] + ruby_node = third_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :ruby } + kw_node = third_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :kw } + assert_not_nil(ruby_node) + assert_not_nil(kw_node) + assert_equal ['漢字', 'かんじ'], ruby_node.args + assert_equal ['HTTP', 'Protocol'], kw_node.args + + fourth_para = paragraph_nodes[3] + href_node = fourth_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :href } + assert_not_nil(href_node) + assert_equal ['http://example.com', 'example'], href_node.args + end + + def test_inline_elements_in_paragraphs_with_ast_renderer + content = <<~EOB + = Inline Elements Test + + This paragraph has @<b>{bold} and @<i>{italic} formatting. + + Another paragraph with @<code>{code} and @<tt>{typewriter} text. + + Special elements: @<ruby>{漢字,かんじ} and @<href>{http://example.com, Link}. + + Keywords: @<kw>{HTTP, Protocol} and formatting. + + Final paragraph with normal text. + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = renderer.render(ast_root) + + assert(html_result.include?('bold'), 'HTML should include bold content') + assert(html_result.include?('italic'), 'HTML should include italic content') + assert(html_result.include?('code'), 'HTML should include code content') + assert(html_result.include?('typewriter'), 'HTML should include typewriter content') + assert(html_result.include?('漢字'), 'HTML should include ruby content') + assert(html_result.include?('example.com'), 'HTML should include href content') + assert(html_result.include?('HTTP'), 'HTML should include kw content') + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert(paragraph_nodes.size >= 4, 'Should have multiple paragraphs processed via AST') + + inline_paragraphs = paragraph_nodes.select do |para| + para.children.any?(ReVIEW::AST::InlineNode) + end + assert(inline_paragraphs.size >= 3, 'Should have paragraphs with inline elements') + + all_inline_types = [] + inline_paragraphs.each do |para| + para.children.each do |child| + if child.is_a?(ReVIEW::AST::InlineNode) + all_inline_types << child.inline_type + end + end + end + + expected_types = %i[b i code tt ruby href kw] + expected_types.each do |type| + assert(all_inline_types.include?(type), "Should have inline type: #{type}") + end + end + + def test_ast_output_structure_verification + content = <<~EOB + = AST Structure Test + + This paragraph contains @<b>{bold} text and @<code>{code} elements. + + Another paragraph with @<href>{https://example.com, example link}. + + Final paragraph with normal text only. + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = renderer.render(ast_root) + + assert(html_result.include?('<h1>'), 'HTML should contain h1 tag for headlines') + assert(html_result.include?('<p>'), 'HTML should contain p tag for paragraphs') + assert(html_result.include?('AST Structure Test'), 'HTML should include headline caption') + assert(html_result.include?('bold'), 'HTML should include inline content') + assert(html_result.include?('code'), 'HTML should include inline content') + assert(html_result.include?('example.com'), 'HTML should include href content') + + assert_not_nil(ast_root, 'Should have AST root') + assert_equal(ReVIEW::AST::DocumentNode, ast_root.class) + + headline_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::HeadlineNode) } + assert_equal(1, headline_nodes.size, 'Should have one headline') + assert_equal('AST Structure Test', headline_nodes.first.caption_text) + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert_equal(3, paragraph_nodes.size, 'Should have three paragraphs') + + inline_paragraphs = paragraph_nodes.select do |para| + para.children.any?(ReVIEW::AST::InlineNode) + end + assert_equal(2, inline_paragraphs.size, 'Should have two paragraphs with inline elements') + end + + def test_raw_content_processing_with_embed_blocks + content = <<~EOB + = Raw Content Test + + Before embed block. + + //embed[html]{ + <div class="custom">Raw HTML content</div> + <script>console.log('test');</script> + //} + + Middle paragraph with @<b>{bold} text. + + //embed[css]{ + .custom { color: red; } + //} + + After embed blocks. + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = renderer.render(ast_root) + + assert(html_result.include?('Raw Content Test'), 'HTML should include headline') + assert(html_result.include?('Before embed block'), 'HTML should include content before embed') + assert(html_result.include?('After embed blocks'), 'HTML should include content after embed') + assert(html_result.include?('bold'), 'HTML should include inline content') + + assert_not_nil(ast_root, 'Should have AST root') + + embed_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::EmbedNode) } + assert_equal(2, embed_nodes.size, 'Should have two embed nodes') + + html_embed = embed_nodes.find { |n| n.target_builders&.include?('html') } + assert_not_nil(html_embed, 'Should have HTML embed node') + assert_equal(:block, html_embed.embed_type, 'Should be block embed type') + assert(html_embed.content.include?('custom'), 'Should contain custom class') + assert(html_embed.content.include?('console.log'), 'Should contain script') + + css_embed = embed_nodes.find { |n| n.target_builders&.include?('css') } + assert_not_nil(css_embed, 'Should have CSS embed node') + assert(css_embed.content.include?('color: red'), 'Should contain CSS rule') + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert(paragraph_nodes.size >= 3, 'Should have multiple paragraphs') + + middle_para = paragraph_nodes.find do |para| + para.children.any? { |child| child.is_a?(ReVIEW::AST::InlineNode) && child.inline_type == :b } + end + assert_not_nil(middle_para, 'Should have paragraph with bold inline element') + end + + def test_raw_single_command_processing + content = <<~EOB + = Raw Command Test + + Before raw command. + + //raw[|html|<div class="inline-raw">Inline raw content</div>] + + Middle paragraph with @<b>{bold} text. + + //raw[|latex|\\textbf{LaTeX raw content}] + + After raw commands. + EOB + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter) + + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_result = renderer.render(ast_root) + + assert(html_result.include?('Raw Command Test'), 'HTML should include headline') + assert(html_result.include?('Before raw command'), 'HTML should include before paragraph') + assert(html_result.include?('After raw commands'), 'HTML should include after paragraph') + assert(html_result.include?('bold'), 'HTML should include inline content') + + assert_not_nil(ast_root, 'Should have AST root') + + headline_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::HeadlineNode) } + assert_equal(1, headline_nodes.size, 'Should have one headline') + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert(paragraph_nodes.size >= 3, 'Should have multiple paragraphs processed via AST') + + middle_para = paragraph_nodes.find do |para| + para.children.any? { |child| child.is_a?(ReVIEW::AST::InlineNode) && child.inline_type == :b } + end + assert_not_nil(middle_para, 'Should have paragraph with bold inline element') + + before_para = paragraph_nodes.find do |para| + para.children.any? { |child| child.is_a?(ReVIEW::AST::TextNode) && child.content.include?('Before raw command') } + end + assert_not_nil(before_para, 'Should have before paragraph in AST') + + after_para = paragraph_nodes.find do |para| + para.children.any? { |child| child.is_a?(ReVIEW::AST::TextNode) && child.content.include?('After raw commands') } + end + assert_not_nil(after_para, 'Should have after paragraph in AST') + end + + def test_comprehensive_inline_compatibility + content = <<~EOB + = Comprehensive Inline Test + + Text with @<b>{bold}, @<i>{italic}, @<code>{code}, and @<ruby>{漢字,かんじ}. + + Advanced: @<href>{http://example.com, Link} and @<kw>{Term, Description}. + + Words: @<w>{glossary} and @<wb>{abbreviations}. + EOB + + chapter_ast = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter_ast.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(chapter_ast) + + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter_ast) + html_result_ast = renderer.render(ast_root) + + assert(html_result_ast.include?('bold'), 'HTML should include bold content') + assert(html_result_ast.include?('italic'), 'HTML should include italic content') + assert(html_result_ast.include?('code'), 'HTML should include code content') + assert(html_result_ast.include?('glossary'), 'HTML should include word expansion content') + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + + inline_types = [] + paragraph_nodes.each do |para| + para.children.each do |child| + if child.is_a?(ReVIEW::AST::InlineNode) + inline_types << child.inline_type + end + end + end + + expected_types = %i[b i code ruby href kw w wb] + expected_types.each do |type| + assert(inline_types.include?(type), "Should have inline type: #{type}") + end + + simple_content = <<~EOB + = Simple Test + + Text with @<b>{bold} and @<i>{italic}. + EOB + + chapter_simple = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter_simple.content = simple_content + + simple_ast = ast_compiler.compile_to_ast(chapter_simple) + simple_renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter_simple) + result_simple = simple_renderer.render(simple_ast) + + ['<b>', '<i>'].each do |tag| + assert(result_simple.include?(tag), "AST/Renderer system should produce #{tag}") + end + end +end diff --git a/test/ast/test_ast_dl_block.rb b/test/ast/test_ast_dl_block.rb new file mode 100644 index 000000000..cc801fcac --- /dev/null +++ b/test/ast/test_ast_dl_block.rb @@ -0,0 +1,281 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/configure' +require 'review/book' +require 'review/i18n' +require 'review/ast' +require 'review/ast/compiler' +require 'review/ast/block_processor' +require 'review/logger' + +class TestASTDlBlock < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + @compiler = ReVIEW::AST::Compiler.new + end + + def create_chapter(content) + ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + end + + def test_dl_with_dt_dd_blocks + input = <<~REVIEW + //dl{ + + //dt{ + API (Application Programming Interface) + //} + //dd{ + アプリケーションプログラミングインターフェースの略称。 + ソフトウェアコンポーネント同士が相互に機能を利用するための規約。 + + //list[api-example][API呼び出し例]{ + response = api.get('/users/123') + user_data = JSON.parse(response.body) + //} + + 詳細は公式ドキュメントを参照してください。 + //} + + //dt{ + REST (Representational State Transfer) + //} + //dd{ + RESTfulなWebサービスの設計原則。HTTPプロトコルを使用し、 + リソースをURIで識別します。 + + 主な特徴: + * ステートレス通信 + * 統一インターフェース + * キャッシュ可能 + * 階層的システム + + //table[rest-methods][RESTメソッド一覧]{ + メソッド 用途 冪等性 + ------------ + GET リソース取得 あり + POST リソース作成 なし + PUT リソース更新 あり + DELETE リソース削除 あり + //} + //} + + //dt{ + JSON (JavaScript Object Notation) + //} + //dd{ + 軽量なデータ交換フォーマット。 + + //list[json-sample][JSONサンプル]{ + { + "name": "John Doe", + "age": 30, + "email": "john@example.com" + } + //} + //} + + //} + REVIEW + + ast = @compiler.compile_to_ast(create_chapter(input.strip)) + + assert_equal ReVIEW::AST::DocumentNode, ast.class + + list_node = ast.children.first + assert_equal ReVIEW::AST::ListNode, list_node.class + assert_equal :dl, list_node.list_type + + dt_items = list_node.children.select(&:definition_term?) + dd_items = list_node.children.select(&:definition_desc?) + + assert_equal 3, dt_items.size + assert_equal 3, dd_items.size + + api_dt = dt_items[0] + assert_equal ReVIEW::AST::ListItemNode, api_dt.class + assert api_dt.definition_term? + + api_dd = dd_items[0] + assert_equal ReVIEW::AST::ListItemNode, api_dd.class + assert api_dd.definition_desc? + + assert api_dd.children.size > 1 + + api_code_block = api_dd.children.find { |child| child.is_a?(ReVIEW::AST::CodeBlockNode) } + assert_not_nil(api_code_block) + assert_equal 'api-example', api_code_block.id + assert_equal 'API呼び出し例', api_code_block.caption_text + + rest_dd = dd_items[1] + assert_equal ReVIEW::AST::ListItemNode, rest_dd.class + assert rest_dd.definition_desc? + + rest_table = rest_dd.children.find { |child| child.is_a?(ReVIEW::AST::TableNode) } + assert_not_nil(rest_table) + assert_equal 'rest-methods', rest_table.id + assert_equal 'RESTメソッド一覧', rest_table.caption_text + + assert_equal 1, rest_table.header_rows.size + assert_equal 4, rest_table.body_rows.size + + json_dd = dd_items[2] + assert_equal ReVIEW::AST::ListItemNode, json_dd.class + assert json_dd.definition_desc? + + json_code_block = json_dd.children.find { |child| child.is_a?(ReVIEW::AST::CodeBlockNode) } + assert_not_nil(json_code_block) + assert_equal 'json-sample', json_code_block.id + end + + def test_dl_with_multiple_dd + input = <<~REVIEW + //dl{ + + //dt{ + HTTP + //} + //dd{ + HyperText Transfer Protocolの略称。 + //} + //dd{ + Webブラウザとサーバー間の通信プロトコル。 + //} + //dd{ + ステートレスなリクエスト/レスポンス型のプロトコル。 + //} + + //dt{ + HTTPS + //} + //dd{ + HTTP over TLS/SSLの略称。暗号化された安全なHTTP通信。 + //} + + //} + REVIEW + + ast = @compiler.compile_to_ast(create_chapter(input.strip)) + + list_node = ast.children.first + assert_equal :dl, list_node.list_type + + dt_items = list_node.children.select(&:definition_term?) + dd_items = list_node.children.select(&:definition_desc?) + + assert_equal 2, dt_items.size + assert_equal 4, dd_items.size + + http_dt = dt_items[0] + assert http_dt.definition_term? + + # Check that we have 3 consecutive dd items for HTTP + assert dd_items[0].definition_desc? + assert dd_items[1].definition_desc? + assert dd_items[2].definition_desc? + + https_dt = dt_items[1] + assert https_dt.definition_term? + + assert dd_items[3].definition_desc? + end + + def test_dl_empty + input = <<~REVIEW + //dl{ + //} + REVIEW + + ast = @compiler.compile_to_ast(create_chapter(input.strip)) + + list_node = ast.children.first + assert_equal :dl, list_node.list_type + assert_equal 0, list_node.children.size + end + + def test_dl_cannot_use_simple_text_lines + # This test documents that simple text lines in //dl blocks + # are treated as list items without proper term/definition structure + input = <<~REVIEW + //dl{ + API + Application Programming Interface + REST + Representational State Transfer + //} + REVIEW + + ast = @compiler.compile_to_ast(create_chapter(input.strip)) + + list_node = ast.children.first + assert_equal :dl, list_node.list_type + + # Each line becomes a separate list item (not what we want) + # This demonstrates why //dt and //dd blocks are needed for definition lists + # In the current implementation, all lines are treated as items + assert_equal 4, list_node.children.size + + # All items are simple text items (no proper term/definition structure) + # When using simple text lines instead of //dt and //dd blocks, + # empty ListItemNodes are created (without proper content parsing) + list_node.children.each do |item| + assert_equal ReVIEW::AST::ListItemNode, item.class + # Simple text lines in //dl blocks are not properly parsed into content, + # resulting in empty ListItemNodes. This demonstrates why //dt and //dd + # blocks are required for proper definition list structure. + # None of them have dt or dd type + assert_nil(item.item_type) + end + end + + def test_dl_with_nested_content + input = <<~REVIEW + //dl{ + + //dt{ + ネストしたリスト + //} + //dd{ + 定義内にさらにリストを含む例: + + * 項目1 + * 項目2 + ** サブ項目2.1 + ** サブ項目2.2 + + //note[メモ]{ + ネストしたリストは読みやすさを保ちながら + 複雑な情報を整理できます。 + //} + //} + + //} + REVIEW + + ast = @compiler.compile_to_ast(create_chapter(input.strip)) + + list_node = ast.children.first + assert_equal :dl, list_node.list_type + + dt_items = list_node.children.select(&:definition_term?) + dd_items = list_node.children.select(&:definition_desc?) + + assert_equal 1, dt_items.size + assert_equal 1, dd_items.size + + dd_item = dd_items[0] + + ul_node = dd_item.children.find { |child| child.is_a?(ReVIEW::AST::ListNode) && child.list_type == :ul } + assert_not_nil(ul_node) + + note_node = dd_item.children.find { |child| child.is_a?(ReVIEW::AST::MinicolumnNode) } + assert_not_nil(note_node) + assert_equal :note, note_node.minicolumn_type + end +end diff --git a/test/ast/test_ast_embed.rb b/test/ast/test_ast_embed.rb new file mode 100644 index 000000000..7807b0eae --- /dev/null +++ b/test/ast/test_ast_embed.rb @@ -0,0 +1,180 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/configure' +require 'review/book' +require 'review/book/chapter' + +class TestASTEmbed < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_embed_node_creation + node = ReVIEW::AST::EmbedNode.new( + location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :block, + target_builders: ['html'], + content: "content line 1\ncontent line 2" + ) + + hash = node.to_h + assert_equal 'EmbedNode', hash[:type] + assert_equal :block, hash[:embed_type] + assert_equal ['html'], hash[:target_builders] + assert_equal "content line 1\ncontent line 2", hash[:content] + end + + def test_embed_block_ast_processing + content = <<~EOB + = Chapter Title + + Normal paragraph before embed. + + //embed[html]{ + <div class="special"> + HTML content here + </div> + //} + + Paragraph after embed. + EOB + + ast_root = compile_to_ast(content) + + embed_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) } + assert_not_nil(embed_node, 'Should have embed node') + assert_equal :block, embed_node.embed_type + assert_equal ['html'], embed_node.target_builders + assert(embed_node.content.include?('<div class="special">'), 'Should contain div') + assert(embed_node.content.include?('HTML content here'), 'Should contain HTML content') + assert(embed_node.content.include?('</div>'), 'Should contain closing div') + end + + def test_embed_block_without_arg + content = <<~EOB + //embed{ + Raw content + No builder filter + //} + EOB + + ast_root = compile_to_ast(content) + + embed_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) } + assert_not_nil(embed_node) + assert_equal :block, embed_node.embed_type + assert_nil(embed_node.target_builders, 'Should have no target builders (applies to all)') + assert(embed_node.content.include?('Raw content'), 'Should contain raw content') + assert(embed_node.content.include?('No builder filter'), 'Should contain no builder filter text') + end + + def test_inline_embed_ast_processing + content = <<~EOB + This paragraph has @<embed>{inline content} in it. + EOB + + ast_root = compile_to_ast(content) + + paragraph_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert_not_nil(paragraph_node) + + embed_node = paragraph_node.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) } + assert_not_nil(embed_node, 'Should have inline embed node') + assert_equal :inline, embed_node.embed_type + assert_equal 'inline content', embed_node.content + end + + def test_inline_embed_with_builder_filter + content = <<~EOB + Text with @<embed>{|html|<strong>HTML only</strong>} content. + EOB + + ast_root = compile_to_ast(content) + + paragraph_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + embed_node = paragraph_node.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) } + + assert_not_nil(embed_node) + assert_equal :inline, embed_node.embed_type + assert_equal ['html'], embed_node.target_builders + assert_equal '<strong>HTML only</strong>', embed_node.content + end + + def test_embed_output_compatibility + content = <<~EOB + Normal text @<embed>{inline embed} more text. + + //embed[html]{ + <div>Block embed content</div> + //} + EOB + + ast_root = compile_to_ast(content) + + paragraph_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + block_embed_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) && n.embed_type == :block } + + assert_not_nil(paragraph_node, 'Should have paragraph with inline embed') + assert_not_nil(block_embed_node, 'Should have block embed node') + + inline_embed = paragraph_node.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) && n.embed_type == :inline } + assert_not_nil(inline_embed, 'Should have inline embed in paragraph') + assert_equal 'inline embed', inline_embed.content + + assert_equal ['html'], block_embed_node.target_builders + assert_equal '<div>Block embed content</div>', block_embed_node.content + end + + def test_mixed_content_with_embed + content = <<~EOB + = Chapter with Embeds + + This paragraph has @<b>{bold} and @<embed>{inline embed} elements. + + //embed[html]{ + <div class="example"> + <p>Some HTML content</p> + </div> + //} + + Another paragraph after the embed block. + EOB + + ast_root = compile_to_ast(content) + + headline_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::HeadlineNode) } + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + embed_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) } + + assert_not_nil(headline_node) + assert_equal 2, paragraph_nodes.size + assert_not_nil(embed_node) + assert_equal :block, embed_node.embed_type + + first_para = paragraph_nodes[0] + bold_node = first_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :b } + inline_embed_node = first_para.children.find { |n| n.is_a?(ReVIEW::AST::EmbedNode) && n.embed_type == :inline } + + assert_not_nil(bold_node) + assert_not_nil(inline_embed_node) + end + + private + + def compile_to_ast(content) + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_compiler.compile_to_ast(chapter) + end +end diff --git a/test/ast/test_ast_html_diff.rb b/test/ast/test_ast_html_diff.rb new file mode 100644 index 000000000..507a3fd1d --- /dev/null +++ b/test/ast/test_ast_html_diff.rb @@ -0,0 +1,373 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/diff/html' + +class ASTHTMLDiffTest < Test::Unit::TestCase + def setup + @comparator = ReVIEW::AST::Diff::Html.new + end + + def test_same_html_same_hash + html1 = '<p>Hello World</p>' + html2 = '<p>Hello World</p>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_different_html_different_hash + html1 = '<p>Hello World</p>' + html2 = '<p>Hello World!</p>' + result = @comparator.compare(html1, html2) + assert_false(result.same_hash?) + end + + def test_whitespace_normalized + html1 = '<p>Hello World</p>' + html2 = '<p>Hello World</p>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_whitespace_preserved_in_pre + html1 = '<pre>Hello World</pre>' + html2 = '<pre>Hello World</pre>' + result = @comparator.compare(html1, html2) + assert_false(result.same_hash?) + end + + def test_comments_removed + # Comments are removed but text nodes remain separate + html1 = '<p>Hello</p><!-- comment --><p>World</p>' + html2 = '<p>Hello</p><p>World</p>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_class_attribute_sorted + html1 = '<div class="foo bar baz">test</div>' + html2 = '<div class="baz foo bar">test</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_class_attribute_duplicates_removed + html1 = '<div class="foo bar foo baz">test</div>' + html2 = '<div class="bar baz foo">test</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_empty_class_removed + html1 = '<div class="">test</div>' + html2 = '<div>test</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_attribute_names_lowercased + html1 = '<div ID="test">content</div>' + html2 = '<div id="test">content</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_void_elements + html1 = '<p>Line 1<br>Line 2</p>' + html2 = '<p>Line 1<br/>Line 2</p>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_img_void_element + html1 = '<img src="test.png" alt="test">' + html2 = '<img src="test.png" alt="test"/>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_diff_tokens_same_content + html1 = '<p>Hello</p>' + html2 = '<p>Hello</p>' + result = @comparator.compare(html1, html2) + changes = result.changes + assert_equal(0, changes.count { |c| c.action != '=' }) + end + + def test_diff_tokens_text_changed + html1 = '<p>Hello</p>' + html2 = '<p>Goodbye</p>' + result = @comparator.compare(html1, html2) + changes = result.changes + assert(changes.any? { |c| c.action == '!' }) + end + + def test_diff_tokens_element_added + html1 = '<p>Hello</p>' + html2 = '<p>Hello</p><p>World</p>' + result = @comparator.compare(html1, html2) + changes = result.changes + assert(changes.any? { |c| c.action == '+' }) + end + + def test_diff_tokens_element_removed + html1 = '<p>Hello</p><p>World</p>' + html2 = '<p>Hello</p>' + result = @comparator.compare(html1, html2) + changes = result.changes + assert(changes.any? { |c| c.action == '-' }) + end + + def test_pretty_diff_no_changes + html1 = '<p>Hello</p>' + html2 = '<p>Hello</p>' + result = @comparator.compare(html1, html2) + pretty = result.pretty_diff + assert_equal '', pretty + end + + def test_pretty_diff_with_changes + html1 = '<p>Hello</p>' + html2 = '<p>Goodbye</p>' + result = @comparator.compare(html1, html2) + pretty = result.pretty_diff + assert pretty.include?('Hello') + assert pretty.include?('Goodbye') + assert pretty.include?('-') + assert pretty.include?('+') + end + + def test_complex_html_structure + html1 = <<~HTML + <div class="container"> + <h1>Title</h1> + <p>Paragraph 1</p> + <ul> + <li>Item 1</li> + <li>Item 2</li> + </ul> + </div> + HTML + + html2 = <<~HTML + <div class="container"> + <h1>Title</h1> + <p>Paragraph 1</p> + <ul> + <li>Item 1</li> + <li>Item 2</li> + </ul> + </div> + HTML + + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_nested_elements_with_attributes + html1 = '<div id="outer" class="wrapper"><span class="inner" data-value="123">Text</span></div>' + html2 = '<div class="wrapper" id="outer"><span data-value="123" class="inner">Text</span></div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_significant_whitespace_in_textarea + html1 = '<textarea>Line 1\n Line 2\n Line 3</textarea>' + html2 = '<textarea>Line 1\nLine 2\nLine 3</textarea>' + result = @comparator.compare(html1, html2) + assert_false(result.same_hash?) + end + + def test_significant_whitespace_in_script + html1 = '<script>var x = 1; var y = 2;</script>' + html2 = '<script>var x = 1; var y = 2;</script>' + result = @comparator.compare(html1, html2) + assert_false(result.same_hash?) + end + + def test_significant_whitespace_in_style + html1 = '<style>body { margin: 0; padding: 0; }</style>' + html2 = '<style>body { margin: 0; padding: 0; }</style>' + result = @comparator.compare(html1, html2) + assert_false(result.same_hash?) + end + + def test_mixed_content + html1 = '<div>Text before <strong>bold text</strong> text after</div>' + html2 = '<div>Text before <strong>bold text</strong> text after</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_empty_text_nodes_removed + html1 = '<div> <span>Text</span> </div>' + html2 = '<div><span>Text</span></div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_multiple_void_elements + html1 = '<div><br><hr><img src="test.png"></div>' + html2 = '<div><br/><hr/><img src="test.png"/></div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_attribute_order_normalized + html1 = '<div data-id="1" class="test" id="main">Content</div>' + html2 = '<div id="main" class="test" data-id="1">Content</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_real_world_example_article + html1 = <<~HTML + <article> + <header> + <h1 class="title">My Article</h1> + <p class="meta"> + Published on 2024-01-01 + </p> + </header> + <section> + <p> + First paragraph. + </p> + <p> + Second paragraph with + <a href="link.html">a link</a> + . + </p> + </section> + </article> + HTML + + html2 = <<~HTML + <article> + <header> + <h1 class="title">My Article</h1> + <p class="meta">Published on 2024-01-01</p> + </header> + <section> + <p>First paragraph.</p><p>Second paragraph with <a href="link.html">a link</a>.</p> + </section> + </article> + HTML + + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_real_world_example_with_difference + html1 = <<~HTML + <article> + <h1>Title</h1> + <p>Original text.</p> + </article> + HTML + + html2 = <<~HTML + <article> + <h1>Title</h1> + <p>Modified text.</p> + </article> + HTML + + result = @comparator.compare(html1, html2) + assert_false(result.same_hash?) + pretty = result.pretty_diff + assert pretty.include?('Original') + assert pretty.include?('Modified') + end + + def test_newlines_normalized + html1 = "<p>\n\n\nHello\n\n\nWorld\n\n\n</p>" + html2 = '<p>Hello World</p>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_tabs_normalized + html1 = "<p>Hello\t\t\tWorld</p>" + html2 = '<p>Hello World</p>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_leading_trailing_whitespace + html1 = '<p> Hello World </p>' + html2 = '<p>Hello World</p>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_multiple_classes_with_whitespace + html1 = '<div class=" foo bar baz ">test</div>' + html2 = '<div class="bar baz foo">test</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_nested_void_elements + html1 = '<div><p>Text<br>More<br>Lines</p></div>' + html2 = '<div><p>Text<br/>More<br/>Lines</p></div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_empty_attributes + html1 = '<input type="text" disabled>' + html2 = '<input disabled type="text">' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_multiple_attributes_sorted + html1 = '<div z="3" y="2" x="1" class="foo" id="main">test</div>' + html2 = '<div class="foo" id="main" x="1" y="2" z="3">test</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_deeply_nested_structure + html1 = '<div><section><article><p><span>Text</span></p></article></section></div>' + html2 = '<div><section><article><p><span>Text</span></p></article></section></div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_self_closing_void_element_formats + html1 = '<meta charset="utf-8"><link rel="stylesheet" href="style.css">' + html2 = '<meta charset="utf-8"/><link rel="stylesheet" href="style.css"/>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_mixed_significant_whitespace + html1 = '<div><pre> code </pre><p> text </p></div>' + html2 = '<div><pre> code </pre><p>text</p></div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_data_attributes + html1 = '<div data-id="123" data-name="test">Content</div>' + html2 = '<div data-name="test" data-id="123">Content</div>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_complex_class_normalization + html1 = '<span class="a b a c b d">text</span>' + html2 = '<span class="a b c d">text</span>' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end + + def test_boolean_attributes + html1 = '<input type="checkbox" checked disabled readonly>' + html2 = '<input checked disabled readonly type="checkbox">' + result = @comparator.compare(html1, html2) + assert_true(result.same_hash?) + end +end diff --git a/test/ast/test_ast_idgxml_maker.rb b/test/ast/test_ast_idgxml_maker.rb new file mode 100644 index 000000000..7fe4f906c --- /dev/null +++ b/test/ast/test_ast_idgxml_maker.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'tmpdir' +require 'fileutils' +require 'review/ast/command/idgxml_maker' + +class ASTIdgxmlMakerTest < Test::Unit::TestCase + def setup + @tmpdir = Dir.mktmpdir + @old_pwd = Dir.pwd + end + + def teardown + Dir.chdir(@old_pwd) + FileUtils.rm_rf(@tmpdir) + end + + def test_builds_sample_book_with_renderer + if /mswin|mingw|cygwin/.match?(RUBY_PLATFORM) + omit('IDGXML build is not supported on Windows CI') + end + + config = prepare_samplebook(@tmpdir, 'sample-book/src', nil, 'config.yml') + output_dir = File.join(@tmpdir, "#{config['bookname']}-idgxml") + target_file = File.join(output_dir, 'ch01.xml') + + Dir.chdir(@tmpdir) do + maker = ReVIEW::AST::Command::IdgxmlMaker.new + maker.execute('config.yml') + end + + assert(File.exist?(target_file), 'Expected IDGXML output file to be generated') + + content = File.read(target_file) + assert_includes(content, '<doc xmlns:aid="http://ns.adobe.com/AdobeInDesign/4.0/">') + end +end diff --git a/test/ast/test_ast_indexer.rb b/test/ast/test_ast_indexer.rb new file mode 100644 index 000000000..875bb4df2 --- /dev/null +++ b/test/ast/test_ast_indexer.rb @@ -0,0 +1,543 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/indexer' +require 'review/index_builder' +require 'review/book' +require 'review/book/chapter' +require 'review/ast/compiler' +require 'review/configure' + +class TestASTIndexer < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + + @chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test_chapter', 'test_chapter.re', StringIO.new) + ReVIEW::I18n.setup(@config['language']) + end + + def test_basic_index_building + source = <<~EOS + = Chapter Title + + Basic paragraph with text. + + //list[sample-code][Sample Code Caption][ruby]{ + puts "hello world" + //} + + //table[sample-table][Sample Table Caption]{ + Header 1 Header 2 + ------------ + Cell 1 Cell 2 + //} + + //image[sample-image][Sample Image Caption] + + Text with @<fn>{footnote1} and @<eq>{equation1}. + + //footnote[footnote1][Footnote content] + + //texequation[equation1]{ + E = mc^2 + //} + + //footnote[footnote1][Footnote content] + + //texequation[equation1]{ + E = mc^2 + //} + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + list_item = indexer.list_index['sample-code'] + assert_not_nil(list_item) + assert_equal 1, list_item.number + assert_equal 'sample-code', list_item.id + + assert_equal 1, indexer.table_index.size + table_item = indexer.table_index['sample-table'] + assert_not_nil(table_item) + assert_equal 1, table_item.number + assert_equal 'sample-table', table_item.id + assert_equal 'Sample Table Caption', table_item.caption_node&.to_inline_text + + assert_equal 1, indexer.image_index.size + image_item = indexer.image_index['sample-image'] + assert_not_nil(image_item) + assert_equal 1, image_item.number + assert_equal 'sample-image', image_item.id + assert_equal 'Sample Image Caption', image_item.caption_node&.to_inline_text + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['footnote1'] + assert_not_nil(footnote_item) + assert_equal 1, footnote_item.number + assert_equal 'footnote1', footnote_item.id + + equation_item = indexer.equation_index['equation1'] + assert_not_nil(equation_item) + assert_equal 'equation1', equation_item.id + end + + def test_headline_index_building + source = <<~EOS + = Chapter Title + + == Section 1{sec1} + + Basic text. + + == Section 2{sec2} + + More text. + + === Subsection 2.1{subsec21} + + Subsection content. + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_not_nil(indexer.headline_index) + assert indexer.headline_index.size >= 2 + + sec1_item = indexer.headline_index['sec1'] + assert_not_nil(sec1_item) + assert_equal 'sec1', sec1_item.id + assert_equal [1], sec1_item.number + + sec2_item = indexer.headline_index['sec2'] + assert_not_nil(sec2_item) + assert_equal 'sec2', sec2_item.id + assert_equal [2], sec2_item.number + + subsec_item = indexer.headline_index['sec2|subsec21'] + assert_not_nil(subsec_item) + assert_equal 'sec2|subsec21', subsec_item.id + assert_equal [2, 1], subsec_item.number + end + + def test_minicolumn_index_building + source = <<~EOS + = Chapter Title + + //note[Note Caption]{ + This is a note with @<fn>{note-footnote}. + //} + + //memo[Memo Caption]{ + This is a memo with @<bib>{bibitem1}. + //} + + //footnote[note-footnote][Note footnote] + + //bibpaper[bibitem1][Bib item content] + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['note-footnote'] + assert_not_nil(footnote_item) + assert_equal 'note-footnote', footnote_item.id + + assert_equal 1, indexer.bibpaper_index.size + bib_item = indexer.bibpaper_index['bibitem1'] + assert_not_nil(bib_item) + assert_equal 'bibitem1', bib_item.id + end + + def test_table_inline_elements + source = <<~EOS + = Chapter Title + + //table[inline-table][Table with inline elements]{ + Header @<b>{Bold} @<i>{Italic} Header + ------------ + Cell with @<fn>{table-fn} @<eq>{table-eq} + //} + + //footnote[table-fn][Table footnote] + + //texequation[table-eq]{ + x = y + //} + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.table_index.size + table_item = indexer.table_index['inline-table'] + assert_not_nil(table_item) + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['table-fn'] + assert_not_nil(footnote_item) + assert_equal 'table-fn', footnote_item.id + + assert_equal 1, indexer.equation_index.size + equation_item = indexer.equation_index['table-eq'] + assert_not_nil(equation_item) + assert_equal 'table-eq', equation_item.id + end + + def test_code_block_inline_elements + source = <<~EOS + = Chapter Title + + //list[code-with-inline][Code with inline elements][ruby]{ + puts @<b>{bold code} + # Comment with @<fn>{code-fn} + //} + + //footnote[code-fn][Footnote from code block] + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.list_index.size + list_item = indexer.list_index['code-with-inline'] + assert_not_nil(list_item) + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['code-fn'] + assert_not_nil(footnote_item) + assert_equal 'code-fn', footnote_item.id + end + + def test_empty_ast + test_chapter = ReVIEW::Book::Chapter.new(@book, 1, 'empty_test', 'empty_test.re', StringIO.new) + indexer = ReVIEW::AST::Indexer.new(test_chapter) + result = indexer.build_indexes(nil) + + assert_equal indexer, result + assert_equal 0, indexer.list_index.size + assert_equal 0, indexer.table_index.size + assert_equal 0, indexer.image_index.size + assert_equal 0, indexer.footnote_index.size + assert_equal 0, indexer.equation_index.size + end + + def test_indexes_method + source = <<~EOS + = Chapter Title + + //list[sample][Sample]{ + code + //} + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + indexes = indexer.indexes + assert_kind_of(Hash, indexes) + + expected_keys = %i[ + list table equation footnote endnote + image icon numberless_image indepimage + headline column bibpaper + ] + + expected_keys.each do |key| + assert indexes.key?(key), "Should contain #{key} index" + end + + assert_equal 1, indexes[:list].size + assert_not_nil(indexes[:list]['sample']) + end + + def test_id_validation_warnings + source = <<~EOS + = Chapter Title + + //list[invalid#id][Invalid ID with #]{ + code + //} + + //table[.starts_with_dot][ID starting with dot]{ + data + //} + + Text with @<fn>{space id} and @<eq>{id with$pecial}. + + //footnote[space id][Footnote with space id] + + //texequation[id with$pecial]{ + z = 1 + //} + EOS + + original_stderr = $stderr + captured_stderr = StringIO.new + $stderr = captured_stderr + + begin + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + warnings = captured_stderr.string + assert_include(warnings, 'deprecated ID: `#` in `invalid#id`') + assert_include(warnings, 'deprecated ID: `.starts_with_dot` begins from `.`') + assert_include(warnings, 'deprecated ID: ` ` in `space id`') + assert_include(warnings, 'deprecated ID: `$` in `id with$pecial`') + ensure + $stderr = original_stderr + end + end + + def test_column_index_building + source = <<~EOS + = Chapter Title + + Regular paragraph. + + ===[column]{col1} Column Title + + Column content with @<fn>{col-footnote}. + + ===[/column] + + More content. + + //footnote[col-footnote][Column footnote content] + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.column_index.size + column_item = indexer.column_index['col1'] + assert_not_nil(column_item) + assert_equal 'col1', column_item.id + assert_equal 'Column Title', column_item.caption_node&.to_inline_text + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['col-footnote'] + assert_not_nil(footnote_item) + assert_equal 'col-footnote', footnote_item.id + end + + def test_endnote_index_building + source = <<~EOS + = Chapter Title + + Text with @<endnote>{endnote1} reference. + + //endnote[endnote1][Endnote content here] + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.endnote_index.size + endnote_item = indexer.endnote_index['endnote1'] + assert_not_nil(endnote_item) + assert_equal 1, endnote_item.number + assert_equal 'endnote1', endnote_item.id + end + + def test_icon_index_building + source = <<~EOS + = Chapter Title + + Text with @<icon>{user-icon} and @<icon>{settings-icon}. + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 2, indexer.icon_index.size + + icon1 = indexer.icon_index['user-icon'] + assert_not_nil(icon1) + assert_equal 1, icon1.number + assert_equal 'user-icon', icon1.id + + icon2 = indexer.icon_index['settings-icon'] + assert_not_nil(icon2) + assert_equal 2, icon2.number + assert_equal 'settings-icon', icon2.id + end + + def test_imgtable_index_building + source = <<~EOS + = Chapter Title + + //imgtable[table-image][Table as Image Caption]{ + dummy content + //} + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.table_index.size + table_item = indexer.table_index['table-image'] + assert_not_nil(table_item) + assert_equal 'table-image', table_item.id + + assert_equal 1, indexer.indepimage_index.size + indep_item = indexer.indepimage_index['table-image'] + assert_not_nil(indep_item) + assert_equal 'table-image', indep_item.id + end + + def test_bibpaper_block_index_building + source = <<~EOS + = Chapter Title + + Citation @<bib>{ref1} in text. + + //bibpaper[ref1][Author Name, "Book Title", Publisher, 2024] + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.bibpaper_index.size + bib_item = indexer.bibpaper_index['ref1'] + assert_not_nil(bib_item) + assert_equal 'ref1', bib_item.id + assert_equal 'Author Name, "Book Title", Publisher, 2024', bib_item.caption_node&.to_inline_text + end + + def test_caption_inline_elements + source = <<~EOS + = Chapter Title + + //list[code-id][Caption with @<fn>{cap-fn} and @<bib>{cap-bib}]{ + code content + //} + + //footnote[cap-fn][Caption footnote] + //bibpaper[cap-bib][Bibliography in caption] + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.list_index.size + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['cap-fn'] + assert_not_nil(footnote_item) + assert_equal 'cap-fn', footnote_item.id + + assert_equal 1, indexer.bibpaper_index.size + bib_item = indexer.bibpaper_index['cap-bib'] + assert_not_nil(bib_item) + assert_equal 'cap-bib', bib_item.id + end + + def test_headline_caption_inline_elements + source = <<~EOS + = Chapter Title + + =={sec1} Section with @<fn>{head-fn} in title + + Content here. + + //footnote[head-fn][Headline footnote] + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_not_nil(indexer.headline_index['sec1']) + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['head-fn'] + assert_not_nil(footnote_item) + assert_equal 'head-fn', footnote_item.id + end + + def test_index_for_method + source = <<~EOS + = Chapter Title + + //list[sample][Sample]{ + code + //} + + //table[tbl][Table]{ + data + //} + EOS + + ast_root = compile_to_ast(source) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal indexer.list_index, indexer.index_for(:list) + assert_equal indexer.table_index, indexer.index_for(:table) + assert_equal indexer.image_index, indexer.index_for(:image) + assert_equal indexer.footnote_index, indexer.index_for(:footnote) + assert_equal indexer.endnote_index, indexer.index_for(:endnote) + assert_equal indexer.equation_index, indexer.index_for(:equation) + assert_equal indexer.headline_index, indexer.index_for(:headline) + assert_equal indexer.column_index, indexer.index_for(:column) + assert_equal indexer.bibpaper_index, indexer.index_for(:bibpaper) + assert_equal indexer.icon_index, indexer.index_for(:icon) + assert_equal indexer.indepimage_index, indexer.index_for(:indepimage) + assert_equal indexer.numberless_image_index, indexer.index_for(:numberless_image) + + assert_raise(ArgumentError) do + indexer.index_for(:unknown_type) + end + end + + private + + def compile_to_ast(content) + @chapter.content = content + + @book.generate_indexes + @chapter.generate_indexes + + ast_compiler = ReVIEW::AST::Compiler.new + ast_compiler.compile_to_ast(@chapter) + end +end diff --git a/test/ast/test_ast_indexer_pure.rb b/test/ast/test_ast_indexer_pure.rb new file mode 100644 index 000000000..6bf5eff20 --- /dev/null +++ b/test/ast/test_ast_indexer_pure.rb @@ -0,0 +1,334 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/indexer' +require 'review/book' +require 'review/book/chapter' +require 'review/ast/compiler' + +class TestASTIndexerPure < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + + @chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test_chapter', 'test_chapter.re', StringIO.new) + ReVIEW::I18n.setup(@config['language']) + end + + def test_basic_index_building + source = <<~EOS + = Chapter Title + + Basic paragraph with text. + + //list[sample-code][Sample Code Caption][ruby]{ + puts "hello world" + //} + + //table[sample-table][Sample Table Caption]{ + Header 1 Header 2 + ------------ + Cell 1 Cell 2 + //} + + //image[sample-image][Sample Image Caption] + + Text with @<fn>{footnote1} and @<eq>{equation1}. + + //footnote[footnote1][Footnote content] + + //texequation[equation1]{ + E = mc^2 + //} + + //footnote[footnote1][Footnote content] + + //texequation[equation1]{ + E = mc^2 + //} + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + list_item = indexer.list_index['sample-code'] + assert_not_nil(list_item) + assert_equal 1, list_item.number + assert_equal 'sample-code', list_item.id + + assert_equal 1, indexer.table_index.size + table_item = indexer.table_index['sample-table'] + assert_not_nil(table_item) + assert_equal 1, table_item.number + assert_equal 'sample-table', table_item.id + assert_equal 'Sample Table Caption', table_item.caption_node&.to_inline_text + + assert_equal 1, indexer.image_index.size + image_item = indexer.image_index['sample-image'] + assert_not_nil(image_item) + assert_equal 1, image_item.number + assert_equal 'sample-image', image_item.id + assert_equal 'Sample Image Caption', image_item.caption_node&.to_inline_text + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['footnote1'] + assert_not_nil(footnote_item) + assert_equal 1, footnote_item.number + assert_equal 'footnote1', footnote_item.id + + equation_item = indexer.equation_index['equation1'] + assert_not_nil(equation_item) + assert_equal 'equation1', equation_item.id + end + + def test_headline_index_building + source = <<~EOS + = Chapter Title + + =={sec1} Section 1 + + Basic text. + + =={sec2} Section 2 + + More text. + + ==={subsec21} Subsection 2.1 + + Subsection content. + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_not_nil(indexer.headline_index) + assert indexer.headline_index.size >= 2 + + sec1_item = indexer.headline_index['sec1'] + assert_not_nil(sec1_item) + assert_equal 'sec1', sec1_item.id + assert_equal [1], sec1_item.number + + sec2_item = indexer.headline_index['sec2'] + assert_not_nil(sec2_item) + assert_equal 'sec2', sec2_item.id + assert_equal [2], sec2_item.number + + subsec_item = indexer.headline_index['sec2|subsec21'] + assert_not_nil(subsec_item) + assert_equal 'sec2|subsec21', subsec_item.id + assert_equal [2, 1], subsec_item.number + end + + def test_minicolumn_index_building + source = <<~EOS + = Chapter Title + + //note[Note Caption]{ + This is a note with @<fn>{note-footnote}. + //} + + //memo[Memo Caption]{ + This is a memo with @<bib>{bibitem1}. + //} + + //footnote[note-footnote][Note footnote] + + //bibpaper[bibitem1][Bib item content] + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['note-footnote'] + assert_not_nil(footnote_item) + assert_equal 'note-footnote', footnote_item.id + + assert_equal 1, indexer.bibpaper_index.size + bib_item = indexer.bibpaper_index['bibitem1'] + assert_not_nil(bib_item) + assert_equal 'bibitem1', bib_item.id + end + + def test_table_inline_elements + source = <<~EOS + = Chapter Title + + //table[inline-table][Table with inline elements]{ + Header @<b>{Bold} @<i>{Italic} Header + ------------ + Cell with @<fn>{table-fn} @<eq>{table-eq} + //} + + //footnote[table-fn][Table footnote] + + //texequation[table-eq]{ + x = y + //} + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.table_index.size + table_item = indexer.table_index['inline-table'] + assert_not_nil(table_item) + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['table-fn'] + assert_not_nil(footnote_item) + assert_equal 'table-fn', footnote_item.id + + assert_equal 1, indexer.equation_index.size + equation_item = indexer.equation_index['table-eq'] + assert_not_nil(equation_item) + assert_equal 'table-eq', equation_item.id + end + + def test_code_block_inline_elements + source = <<~EOS + = Chapter Title + + //list[code-with-inline][Code with inline elements][ruby]{ + puts @<b>{bold code} + # Comment with @<fn>{code-fn} + //} + + //footnote[code-fn][Footnote from code block] + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + assert_equal 1, indexer.list_index.size + list_item = indexer.list_index['code-with-inline'] + assert_not_nil(list_item) + + assert_equal 1, indexer.footnote_index.size + footnote_item = indexer.footnote_index['code-fn'] + assert_not_nil(footnote_item) + assert_equal 'code-fn', footnote_item.id + end + + def test_empty_ast + test_chapter = ReVIEW::Book::Chapter.new(@book, 1, 'empty_test', 'empty_test.re', StringIO.new) + indexer = ReVIEW::AST::Indexer.new(test_chapter) + result = indexer.build_indexes(nil) + + assert_equal indexer, result + assert_equal 0, indexer.list_index.size + assert_equal 0, indexer.table_index.size + assert_equal 0, indexer.image_index.size + assert_equal 0, indexer.footnote_index.size + assert_equal 0, indexer.equation_index.size + end + + def test_indexes_method + source = <<~EOS + = Chapter Title + + //list[sample][Sample]{ + code + //} + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + indexes = indexer.indexes + assert_kind_of(Hash, indexes) + + expected_keys = %i[ + list table equation footnote endnote + image icon numberless_image indepimage + headline column bibpaper + ] + + expected_keys.each do |key| + assert indexes.key?(key), "Should contain #{key} index" + end + + assert_equal 1, indexes[:list].size + assert_not_nil(indexes[:list]['sample']) + end + + def test_id_validation_warnings + source = <<~EOS + = Chapter Title + + //list[invalid#id][Invalid ID with #]{ + code + //} + + //table[.starts_with_dot][ID starting with dot]{ + data + //} + + Text with @<fn>{space id} and @<eq>{id with$pecial}. + + //footnote[space id][Footnote with space id] + + //texequation[id with$pecial]{ + z = 1 + //} + EOS + + @chapter.content = source + + original_stderr = $stderr + captured_stderr = StringIO.new + $stderr = captured_stderr + + begin + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + indexer = ReVIEW::AST::Indexer.new(@chapter) + indexer.build_indexes(ast_root) + + warnings = captured_stderr.string + assert_include(warnings, 'deprecated ID: `#` in `invalid#id`') + assert_include(warnings, 'deprecated ID: `.starts_with_dot` begins from `.`') + assert_include(warnings, 'deprecated ID: ` ` in `space id`') + assert_include(warnings, 'deprecated ID: `$` in `id with$pecial`') + ensure + $stderr = original_stderr + end + end +end diff --git a/test/ast/test_ast_inline.rb b/test/ast/test_ast_inline.rb new file mode 100644 index 000000000..7f968d231 --- /dev/null +++ b/test/ast/test_ast_inline.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/configure' +require 'review/book' +require 'review/book/chapter' + +class TestASTInline < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_text_node_creation + node = ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Hello world') + + hash = node.to_h + assert_equal 'TextNode', hash[:type] + assert_equal 'Hello world', hash[:content] + end + + def test_inline_node_creation + node = ReVIEW::AST::InlineNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + inline_type: :b, + args: ['bold text']) + + hash = node.to_h + assert_equal 'InlineNode', hash[:type] + assert_equal :b, hash[:inline_type] + assert_equal ['bold text'], hash[:args] + end + + def test_simple_inline_parsing + content = <<~EOB + This is @<b>{bold text} in a paragraph. + EOB + + ast_root = compile_to_ast(content) + # Check that paragraph node exists and has children + paragraph_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert_not_nil(paragraph_node) + assert(paragraph_node.children.any?, 'Paragraph should have inline children') + + # Check for text and inline nodes + text_nodes = paragraph_node.children.select { |n| n.is_a?(ReVIEW::AST::TextNode) } + inline_nodes = paragraph_node.children.select { |n| n.is_a?(ReVIEW::AST::InlineNode) } + + assert(text_nodes.any?, 'Should have text nodes') + assert(inline_nodes.any?, 'Should have inline nodes') + + # Check inline node details + bold_node = inline_nodes.find { |n| n.inline_type == :b } + assert_not_nil(bold_node, 'Should have bold inline node') + assert_equal :b, bold_node.inline_type + end + + def test_multiple_inline_elements + content = <<~EOB + Text with @<b>{bold} and @<i>{italic} elements. + EOB + + ast_root = compile_to_ast(content) + paragraph_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert_not_nil(paragraph_node) + + # Check for both bold and italic inline nodes + inline_nodes = paragraph_node.children.select { |n| n.is_a?(ReVIEW::AST::InlineNode) } + assert_equal 2, inline_nodes.size + + bold_node = inline_nodes.find { |n| n.inline_type == :b } + italic_node = inline_nodes.find { |n| n.inline_type == :i } + + assert_not_nil(bold_node, 'Should have bold inline node') + assert_not_nil(italic_node, 'Should have italic inline node') + assert_equal :b, bold_node.inline_type + assert_equal :i, italic_node.inline_type + end + + def test_inline_output_compatibility + content = <<~EOB + This is @<b>{bold} and @<code>{inline code} text. + EOB + + ast_root = compile_to_ast(content) + + paragraph_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert_not_nil(paragraph_node, 'Should have paragraph node') + + # Check inline elements in AST + inline_nodes = paragraph_node.children.select { |n| n.is_a?(ReVIEW::AST::InlineNode) } + assert_equal(2, inline_nodes.size, 'Should have two inline elements') + + bold_node = inline_nodes.find { |n| n.inline_type == :b } + code_node = inline_nodes.find { |n| n.inline_type == :code } + + assert_not_nil(bold_node, 'Should have bold inline node') + assert_not_nil(code_node, 'Should have code inline node') + assert_equal(['bold'], bold_node.args) + assert_equal(['inline code'], code_node.args) + end + + def test_mixed_content_parsing + content = <<~EOB + = Chapter Title + + Normal paragraph with @<b>{bold text}. + + Another paragraph with @<code>{code} and @<i>{italic}. + EOB + + ast_root = compile_to_ast(content) + # Check headline + headline_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::HeadlineNode) } + assert_not_nil(headline_node) + assert_equal 'Chapter Title', headline_node.caption_text + + # Check paragraphs with inline elements + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert_equal 2, paragraph_nodes.size + + # First paragraph should have bold inline + first_para = paragraph_nodes[0] + bold_node = first_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :b } + assert_not_nil(bold_node) + + # Second paragraph should have code and italic inlines + second_para = paragraph_nodes[1] + code_node = second_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :code } + italic_node = second_para.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :i } + assert_not_nil(code_node) + assert_not_nil(italic_node) + end + + private + + def compile_to_ast(content) + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_compiler.compile_to_ast(chapter) + end +end diff --git a/test/ast/test_ast_inline_structure.rb b/test/ast/test_ast_inline_structure.rb new file mode 100644 index 000000000..a4b4e4339 --- /dev/null +++ b/test/ast/test_ast_inline_structure.rb @@ -0,0 +1,181 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/configure' +require 'review/book' +require 'review/book/chapter' + +class TestASTInlineStructure < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_inline_element_ast_structure + content = <<~EOB + = Test Chapter + + Simple inline: @<b>{bold} and @<code>{code}. + + Ruby annotation: @<ruby>{漢字,かんじ}. + + References: @<href>{http://example.com, Link Text}. + + Keywords: @<kw>{Term, Description}. + + Heading ref: @<hd>{section}. + + Cross-refs: @<chap>{intro}, @<sec>{overview}. + + Word files: @<w>{words} and @<wb>{words2}. + + Complex ref: @<img>{figure1} and @<table>{data1}. + EOB + + ast_root = compile_to_ast(content) + + assert_not_nil(ast_root) + assert_equal(ReVIEW::AST::DocumentNode, ast_root.class) + + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + assert_equal(8, paragraph_nodes.size) + + simple_para = paragraph_nodes[0] + bold_node = find_inline_node(simple_para, :b) + code_node = find_inline_node(simple_para, :code) + assert_not_nil(bold_node) + assert_not_nil(code_node) + assert_equal(['bold'], bold_node.args) + assert_equal(['code'], code_node.args) + + ruby_para = paragraph_nodes[1] + ruby_node = find_inline_node(ruby_para, :ruby) + assert_not_nil(ruby_node) + assert_equal(['漢字', 'かんじ'], ruby_node.args) + + href_para = paragraph_nodes[2] + href_node = find_inline_node(href_para, :href) + assert_not_nil(href_node) + assert_equal(['http://example.com', 'Link Text'], href_node.args) + + kw_para = paragraph_nodes[3] + kw_node = find_inline_node(kw_para, :kw) + assert_not_nil(kw_node) + assert_equal(['Term', 'Description'], kw_node.args) + + hd_para = paragraph_nodes[4] + hd_node = find_inline_node(hd_para, :hd) + assert_not_nil(hd_node) + assert_equal(['section'], hd_node.args) + + cross_para = paragraph_nodes[5] + chap_node = find_inline_node(cross_para, :chap) + sec_node = find_inline_node(cross_para, :sec) + assert_not_nil(chap_node) + assert_not_nil(sec_node) + assert_equal(['intro'], chap_node.args) + assert_equal(['overview'], sec_node.args) + + word_para = paragraph_nodes[6] + w_node = find_inline_node(word_para, :w) + wb_node = find_inline_node(word_para, :wb) + assert_not_nil(w_node) + assert_not_nil(wb_node) + assert_equal(['words'], w_node.args) + assert_equal(['words2'], wb_node.args) + + ref_para = paragraph_nodes[7] + img_node = find_inline_node(ref_para, :img) + table_node = find_inline_node(ref_para, :table) + assert_not_nil(img_node) + assert_not_nil(table_node) + assert_equal(['figure1'], img_node.args) + assert_equal(['data1'], table_node.args) + end + + def test_pipe_separated_inline_elements + content = <<~EOB + = Test Chapter + + Heading with chapter: @<hd>{chapter1|Introduction}. + + Image with chapter: @<img>{chap1|figure1}. + + List with chapter: @<list>{chap2|sample1}. + + Equation with chapter: @<eq>{chap3|formula1}. + + Table with chapter: @<table>{chap4|data1}. + EOB + + ast_root = compile_to_ast(content) + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + + hd_para = paragraph_nodes[0] + hd_node = find_inline_node(hd_para, :hd) + assert_not_nil(hd_node) + assert_equal(['chapter1', 'Introduction'], hd_node.args) + + img_para = paragraph_nodes[1] + img_node = find_inline_node(img_para, :img) + assert_not_nil(img_node) + assert_equal(['chap1', 'figure1'], img_node.args) + + list_para = paragraph_nodes[2] + list_node = find_inline_node(list_para, :list) + assert_not_nil(list_node) + assert_equal(['chap2', 'sample1'], list_node.args) + + eq_para = paragraph_nodes[3] + eq_node = find_inline_node(eq_para, :eq) + assert_not_nil(eq_node) + assert_equal(['chap3', 'formula1'], eq_node.args) + + table_para = paragraph_nodes[4] + table_node = find_inline_node(table_para, :table) + assert_not_nil(table_node) + assert_equal(['chap4', 'data1'], table_node.args) + end + + def test_newly_added_inline_commands + content = <<~EOB + = Test Chapter + + Label references: @<labelref>{label1} and @<ref>{label2}. + EOB + + ast_root = compile_to_ast(content) + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + + label_para = paragraph_nodes[0] + labelref_node = find_inline_node(label_para, :labelref) + ref_node = find_inline_node(label_para, :ref) + assert_not_nil(labelref_node) + assert_not_nil(ref_node) + assert_equal(['label1'], labelref_node.args) + assert_equal(['label2'], ref_node.args) + end + + private + + def find_inline_node(paragraph, inline_type) + paragraph.children.find do |child| + child.is_a?(ReVIEW::AST::InlineNode) && child.inline_type == inline_type + end + end + + def compile_to_ast(content) + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + ast_compiler = ReVIEW::AST::Compiler.new + ast_compiler.compile_to_ast(chapter, reference_resolution: false) + end +end diff --git a/test/ast/test_ast_json_serialization.rb b/test/ast/test_ast_json_serialization.rb new file mode 100644 index 000000000..f50aaedc2 --- /dev/null +++ b/test/ast/test_ast_json_serialization.rb @@ -0,0 +1,855 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/json_serializer' +require 'review/ast/code_line_node' +require 'review/ast/table_row_node' +require 'review/ast/table_cell_node' +require 'json' + +class TestASTJSONSerialization < Test::Unit::TestCase + include ReVIEW + + def setup + @location = SnapshotLocation.new('test.re', 42) + end + + def test_basic_node_serialization + # Test with a concrete node class instead of abstract Node + node = AST::ParagraphNode.new(location: @location) + json = node.to_json + parsed = JSON.parse(json) + + assert_equal 'ParagraphNode', parsed['type'] + assert_equal 'test.re', parsed['location']['filename'] + assert_equal 42, parsed['location']['lineno'] + assert_equal [], parsed['children'] + end + + def test_headline_node_serialization + node = AST::HeadlineNode.new( + location: @location, + level: 1, + label: 'intro', + caption_node: CaptionParserHelper.parse('Introduction', location: @location) + ) + + json = node.to_json + parsed = JSON.parse(json) + + assert_equal 'HeadlineNode', parsed['type'] + assert_equal 1, parsed['level'] + assert_equal 'intro', parsed['label'] + expected_caption_node = { + 'children' => [{ 'content' => 'Introduction', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'type' => 'TextNode' }], + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'type' => 'CaptionNode' + } + assert_equal expected_caption_node, parsed['caption_node'] + end + + def test_paragraph_with_inline_elements + para = AST::ParagraphNode.new(location: @location) + + # Add text node + text1 = AST::TextNode.new( + location: @location, + content: 'This is ' + ) + para.add_child(text1) + + # Add inline node + inline = AST::InlineNode.new( + location: @location, + inline_type: :b, + args: ['bold'] + ) + + inline_text = AST::TextNode.new( + location: @location, + content: 'bold' + ) + inline.add_child(inline_text) + + para.add_child(inline) + + # Add more text + text2 = AST::TextNode.new( + location: @location, + content: ' text.' + ) + para.add_child(text2) + + json = para.to_json + parsed = JSON.parse(json) + + assert_equal 'ParagraphNode', parsed['type'] + assert_equal 3, parsed['children'].size + + # Check first text node + assert_equal 'TextNode', parsed['children'][0]['type'] + assert_equal 'This is ', parsed['children'][0]['content'] + + # Check inline node + inline_node = parsed['children'][1] + assert_equal 'InlineNode', inline_node['type'] + assert_equal 'b', inline_node['inline_type'] + assert_equal ['bold'], inline_node['args'] + assert_equal 1, inline_node['children'].size + assert_equal 'bold', inline_node['children'][0]['content'] + + # Check last text node + assert_equal 'TextNode', parsed['children'][2]['type'] + assert_equal ' text.', parsed['children'][2]['content'] + end + + def test_code_block_node_serialization + lines_text = "def hello\n puts \"world\"\nend" + node = AST::CodeBlockNode.new( + location: @location, + id: 'example', + caption_node: CaptionParserHelper.parse('Example Code', location: @location), + lang: 'ruby', + original_text: lines_text, + line_numbers: true + ) + + # Add code line nodes to represent the structure + ['def hello', ' puts "world"', 'end'].each_with_index do |line, index| + line_node = AST::CodeLineNode.new( + location: @location, + line_number: index + 1 + ) + line_node.add_child(AST::TextNode.new(location: @location, content: line)) + node.add_child(line_node) + end + + json = node.to_json + parsed = JSON.parse(json) + + assert_equal 'CodeBlockNode', parsed['type'] + assert_equal 'example', parsed['id'] + expected_caption = { + 'type' => 'CaptionNode', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'children' => [ + { + 'type' => 'TextNode', + 'content' => 'Example Code', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 } + } + ] + } + assert_equal expected_caption, parsed['caption_node'] + assert_equal 'ruby', parsed['lang'] + assert_equal lines_text, parsed['original_text'] + assert_equal true, parsed['line_numbers'] + assert_equal 3, parsed['children'].size # Check we have 3 code line nodes + end + + def test_table_node_serialization + node = AST::TableNode.new( + location: @location, + id: 'data', + caption_node: CaptionParserHelper.parse('Sample Data', location: @location) + ) + + # Add header row + header_row = AST::TableRowNode.new(location: @location, row_type: :header) + ['Name', 'Age'].each do |cell_content| + cell = AST::TableCellNode.new(location: @location) + cell.add_child(AST::TextNode.new(location: @location, content: cell_content)) + header_row.add_child(cell) + end + node.add_header_row(header_row) + + # Add body rows + [['Alice', '25'], ['Bob', '30']].each do |row_data| + body_row = AST::TableRowNode.new(location: @location, row_type: :body) + row_data.each do |cell_content| + cell = AST::TableCellNode.new(location: @location) + cell.add_child(AST::TextNode.new(location: @location, content: cell_content)) + body_row.add_child(cell) + end + node.add_body_row(body_row) + end + + json = node.to_json + parsed = JSON.parse(json) + + assert_equal 'TableNode', parsed['type'] + assert_equal 'data', parsed['id'] + expected_caption = { + 'type' => 'CaptionNode', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'children' => [ + { + 'type' => 'TextNode', + 'content' => 'Sample Data', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 } + } + ] + } + assert_equal expected_caption, parsed['caption_node'] + assert_equal 1, parsed['header_rows'].size # Check we have 1 header row + assert_equal 2, parsed['body_rows'].size # Check we have 2 body rows + end + + def test_list_node_serialization + list = AST::ListNode.new( + location: @location, + list_type: :ul + ) + + item1 = AST::ListItemNode.new( + location: @location, + level: 1 + ) + text1 = AST::TextNode.new(location: @location, content: 'First item') + item1.add_child(text1) + list.add_child(item1) + + item2 = AST::ListItemNode.new( + location: @location, + level: 1 + ) + text2 = AST::TextNode.new(location: @location, content: 'Second item') + item2.add_child(text2) + list.add_child(item2) + + json = list.to_json + parsed = JSON.parse(json) + + assert_equal 'ListNode', parsed['type'] + assert_equal 'ul', parsed['list_type'] + assert_equal 2, parsed['children'].size + # Check that text content is in the children of each list item + assert_equal 1, parsed['children'][0]['children'].size + assert_equal 'First item', parsed['children'][0]['children'][0]['content'] + assert_equal 1, parsed['children'][1]['children'].size + assert_equal 'Second item', parsed['children'][1]['children'][0]['content'] + end + + def test_embed_node_serialization + node = AST::EmbedNode.new( + location: @location, + embed_type: :block, + target_builders: ['html'], + content: "<div>HTML content</div>\n<p>Paragraph</p>" + ) + + json = node.to_json + parsed = JSON.parse(json) + + assert_equal 'EmbedNode', parsed['type'] + assert_equal 'block', parsed['embed_type'] + assert_equal ['html'], parsed['target_builders'] + assert_equal "<div>HTML content</div>\n<p>Paragraph</p>", parsed['content'] + end + + def test_document_node_serialization + doc = AST::DocumentNode.new( + location: @location + ) + + headline = AST::HeadlineNode.new( + location: @location, + level: 1, + caption_node: CaptionParserHelper.parse('Chapter 1', location: @location) + ) + doc.add_child(headline) + + para = AST::ParagraphNode.new( + location: @location + ) + para_text = AST::TextNode.new(location: @location, content: 'Test paragraph') + para.add_child(para_text) + doc.add_child(para) + + json = doc.to_json + parsed = JSON.parse(json) + + assert_equal 'DocumentNode', parsed['type'] + assert_equal 2, parsed['children'].size + assert_equal 'HeadlineNode', parsed['children'][0]['type'] + assert_equal 'ParagraphNode', parsed['children'][1]['type'] + end + + def test_custom_json_serializer_basic + node = AST::HeadlineNode.new( + location: @location, + level: 2, + caption_node: CaptionParserHelper.parse('Section Title', location: @location) + ) + + options = AST::JSONSerializer::Options.new + json = AST::JSONSerializer.serialize(node, options) + parsed = JSON.parse(json) + + assert_equal 'HeadlineNode', parsed['type'] + assert_equal 2, parsed['level'] + expected_caption = { + 'children' => [{ 'content' => 'Section Title', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'type' => 'TextNode' }], + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'type' => 'CaptionNode' + } + assert_equal expected_caption, parsed['caption_node'] + end + + def test_custom_json_serializer_without_location + node = AST::HeadlineNode.new( + location: @location, + level: 2, + caption_node: CaptionParserHelper.parse('Section Title', location: @location) + ) + + options = AST::JSONSerializer::Options.new + options.include_location = false + + json = AST::JSONSerializer.serialize(node, options) + parsed = JSON.parse(json) + + assert_equal 'HeadlineNode', parsed['type'] + assert_equal 2, parsed['level'] + expected_caption = { + 'children' => [{ 'content' => 'Section Title', 'type' => 'TextNode' }], + 'type' => 'CaptionNode' + } + assert_equal expected_caption, parsed['caption_node'] + assert_nil(parsed['location']) + end + + def test_custom_json_serializer_compact + node = AST::HeadlineNode.new( + location: @location, + level: 2, + caption_node: CaptionParserHelper.parse('Section Title', location: @location) + ) + + options = AST::JSONSerializer::Options.new + options.pretty = false + options.include_location = false + + json = AST::JSONSerializer.serialize(node, options) + + # Compact JSON should not have newlines + assert_not_include(json, "\n") + + parsed = JSON.parse(json) + assert_equal 'HeadlineNode', parsed['type'] + end + + def test_json_schema_structure + schema = AST::JSONSerializer.json_schema + + assert_equal 'http://json-schema.org/draft-07/schema#', schema['$schema'] + assert_equal 'ReVIEW AST JSON Schema', schema['title'] + assert_equal 'object', schema['type'] + assert_include(schema['required'], 'type') + + # Check enum values for type + type_enum = schema['properties']['type']['enum'] + assert_include(type_enum, 'DocumentNode') + assert_include(type_enum, 'HeadlineNode') + assert_include(type_enum, 'ParagraphNode') + assert_include(type_enum, 'InlineNode') + end + + def test_complex_nested_structure + # Create a complex document structure + doc = AST::DocumentNode.new( + location: @location + ) + + # Add headline + headline = AST::HeadlineNode.new( + location: @location, + level: 1, + caption_node: CaptionParserHelper.parse('Introduction', location: @location) + ) + doc.add_child(headline) + + # Add paragraph with inline elements + para = AST::ParagraphNode.new( + location: @location + ) + + text1 = AST::TextNode.new( + location: @location, + content: 'This paragraph has ' + ) + para.add_child(text1) + + inline = AST::InlineNode.new( + location: @location, + inline_type: :code, + args: ['inline code'] + ) + + inline_text = AST::TextNode.new( + location: @location, + content: 'inline code' + ) + inline.add_child(inline_text) + para.add_child(inline) + + text2 = AST::TextNode.new( + location: @location, + content: ' elements.' + ) + para.add_child(text2) + + doc.add_child(para) + + # Add code block + code = AST::CodeBlockNode.new( + location: @location, + id: 'example', + caption_node: CaptionParserHelper.parse('Code Example', location: @location), + lang: 'ruby', + original_text: 'puts "Hello, World!"' + ) + + # Add code line node + line_node = AST::CodeLineNode.new(location: @location) + line_node.add_child(AST::TextNode.new(location: @location, content: 'puts "Hello, World!"')) + code.add_child(line_node) + doc.add_child(code) + + # Serialize and verify + json = doc.to_json + parsed = JSON.parse(json) + + assert_equal 'DocumentNode', parsed['type'] + assert_equal 3, parsed['children'].size + + # Check headline + headline_json = parsed['children'][0] + assert_equal 'HeadlineNode', headline_json['type'] + assert_equal 1, headline_json['level'] + assert_equal({ 'children' => + [{ 'content' => 'Introduction', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'type' => 'TextNode' }], + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'type' => 'CaptionNode' }, headline_json['caption_node']) + + # Check paragraph with inline elements + para_json = parsed['children'][1] + assert_equal 'ParagraphNode', para_json['type'] + assert_equal 3, para_json['children'].size + + inline_json = para_json['children'][1] + assert_equal 'InlineNode', inline_json['type'] + assert_equal 'code', inline_json['inline_type'] + assert_equal ['inline code'], inline_json['args'] + + # Check code block + code_json = parsed['children'][2] + assert_equal 'CodeBlockNode', code_json['type'] + assert_equal 'example', code_json['id'] + expected_caption = { + 'type' => 'CaptionNode', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 }, + 'children' => [ + { + 'type' => 'TextNode', + 'content' => 'Code Example', + 'location' => { 'filename' => 'test.re', 'lineno' => 42 } + } + ] + } + assert_equal expected_caption, code_json['caption_node'] + assert_equal 'ruby', code_json['lang'] + assert_equal 'puts "Hello, World!"', code_json['original_text'] + assert_equal 1, code_json['children'].size # Check we have 1 code line node + end + + def test_include_location_option_with_true + # Test that location information is included when include_location is true (default) + paragraph = AST::ParagraphNode.new(location: @location) + text_node = AST::TextNode.new(location: @location, content: 'Test content') + paragraph.add_child(text_node) + + options = AST::JSONSerializer::Options.new(include_location: true) + json = AST::JSONSerializer.serialize(paragraph, options) + parsed = JSON.parse(json) + + # Check that location is included in parent node + assert_not_nil(parsed['location'], 'location should be included when include_location is true') + assert_equal 'test.re', parsed['location']['filename'] + assert_equal 42, parsed['location']['lineno'] + + # Check that location is included in child nodes + assert_equal 1, parsed['children'].size + child = parsed['children'][0] + assert_not_nil(child['location'], 'location should be included in child nodes when include_location is true') + assert_equal 'test.re', child['location']['filename'] + assert_equal 42, child['location']['lineno'] + end + + def test_include_location_option_with_false + # Test that location information is excluded when include_location is false + paragraph = AST::ParagraphNode.new(location: @location) + text_node = AST::TextNode.new(location: @location, content: 'Test content') + paragraph.add_child(text_node) + + options = AST::JSONSerializer::Options.new(include_location: false) + json = AST::JSONSerializer.serialize(paragraph, options) + parsed = JSON.parse(json) + + # Check that location is not included in parent node + assert_nil(parsed['location'], 'location should not be included when include_location is false') + + # Check that location is not included in child nodes + assert_equal 1, parsed['children'].size + child = parsed['children'][0] + assert_nil(child['location'], 'location should not be included in child nodes when include_location is false') + end + + def test_include_location_with_complex_tree + # Test include_location with a more complex node tree + headline = AST::HeadlineNode.new( + location: @location, + level: 1, + caption_node: CaptionParserHelper.parse('Test Headline', location: @location) + ) + + # Test with include_location = true + options_with_location = AST::JSONSerializer::Options.new(include_location: true) + json_with_location = AST::JSONSerializer.serialize(headline, options_with_location) + parsed_with_location = JSON.parse(json_with_location) + + assert_not_nil(parsed_with_location['location']) + assert_not_nil(parsed_with_location['caption_node']['location']) + caption_children = parsed_with_location['caption_node']['children'] + assert_equal 1, caption_children.size + assert_not_nil(caption_children[0]['location']) + + # Test with include_location = false + options_without_location = AST::JSONSerializer::Options.new(include_location: false) + json_without_location = AST::JSONSerializer.serialize(headline, options_without_location) + parsed_without_location = JSON.parse(json_without_location) + + assert_nil(parsed_without_location['location']) + assert_nil(parsed_without_location['caption_node']['location']) + caption_children = parsed_without_location['caption_node']['children'] + assert_equal 1, caption_children.size + assert_nil(caption_children[0]['location']) + end + + def test_footnote_node_serialization + # Create a footnote node with children + footnote = AST::FootnoteNode.new( + location: @location, + id: 'fn1', + footnote_type: :footnote + ) + + text = AST::TextNode.new( + location: @location, + content: 'This is a footnote text.' + ) + footnote.add_child(text) + + # Serialize to JSON + json = footnote.to_json + parsed = JSON.parse(json) + + # Verify serialization + assert_equal 'FootnoteNode', parsed['type'] + assert_equal 'fn1', parsed['id'] + # footnote_type is omitted when it's :footnote (default) + assert_nil(parsed['footnote_type']) + assert_equal 1, parsed['children'].size + assert_equal 'TextNode', parsed['children'][0]['type'] + assert_equal 'This is a footnote text.', parsed['children'][0]['content'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::FootnoteNode, deserialized) + assert_equal 'fn1', deserialized.id + assert_equal :footnote, deserialized.footnote_type + assert_equal 1, deserialized.children.size + assert_equal 'This is a footnote text.', deserialized.children[0].content + end + + def test_footnote_node_endnote_serialization + # Create an endnote node + endnote = AST::FootnoteNode.new( + location: @location, + id: 'en1', + footnote_type: :endnote + ) + + text = AST::TextNode.new( + location: @location, + content: 'This is an endnote.' + ) + endnote.add_child(text) + + # Serialize to JSON + json = endnote.to_json + parsed = JSON.parse(json) + + # Verify serialization - endnote type should be included + assert_equal 'FootnoteNode', parsed['type'] + assert_equal 'en1', parsed['id'] + assert_equal 'endnote', parsed['footnote_type'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::FootnoteNode, deserialized) + assert_equal 'en1', deserialized.id + assert_equal :endnote, deserialized.footnote_type + end + + def test_reference_node_unresolved_serialization + # Create an unresolved reference node + ref = AST::ReferenceNode.new( + 'img1', + nil, + location: @location + ) + + # Serialize to JSON + json = ref.to_json + parsed = JSON.parse(json) + + # Verify serialization + assert_equal 'ReferenceNode', parsed['type'] + assert_equal 'img1', parsed['content'] + assert_equal 'img1', parsed['ref_id'] + assert_nil(parsed['context_id']) + assert_nil(parsed['resolved_data']) + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ReferenceNode, deserialized) + assert_equal 'img1', deserialized.ref_id + assert_nil(deserialized.context_id) + assert_nil(deserialized.resolved_data) + assert_equal false, deserialized.resolved? + end + + def test_reference_node_with_context_serialization + # Create a cross-chapter reference node + ref = AST::ReferenceNode.new( + 'img1', + 'chapter2', + location: @location + ) + + # Serialize to JSON + json = ref.to_json + parsed = JSON.parse(json) + + # Verify serialization + assert_equal 'ReferenceNode', parsed['type'] + assert_equal 'chapter2|img1', parsed['content'] + assert_equal 'img1', parsed['ref_id'] + assert_equal 'chapter2', parsed['context_id'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ReferenceNode, deserialized) + assert_equal 'img1', deserialized.ref_id + assert_equal 'chapter2', deserialized.context_id + assert_equal true, deserialized.cross_chapter? + end + + def test_reference_node_with_image_reference_serialization + # Create resolved image reference + caption_node = CaptionParserHelper.parse('Sample Image', location: @location) + resolved_data = AST::ResolvedData.image( + chapter_number: 1, chapter_type: :chapter, + item_number: '2', + item_id: 'img1', + caption_node: caption_node + ) + + ref = AST::ReferenceNode.new( + 'img1', + nil, + location: @location, + resolved_data: resolved_data + ) + + # Serialize to JSON + json = ref.to_json + parsed = JSON.parse(json) + + # Verify serialization + assert_equal 'ReferenceNode', parsed['type'] + assert_equal 'img1', parsed['ref_id'] + assert_not_nil(parsed['resolved_data']) + assert_equal 'ImageReference', parsed['resolved_data']['type'] + assert_equal 1, parsed['resolved_data']['chapter_number'] + assert_equal '2', parsed['resolved_data']['item_number'] + assert_equal 'img1', parsed['resolved_data']['item_id'] + assert_equal 'CaptionNode', parsed['resolved_data']['caption_node']['type'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ReferenceNode, deserialized) + assert_equal true, deserialized.resolved? + assert_instance_of(AST::ResolvedData::ImageReference, deserialized.resolved_data) + assert_equal 1, deserialized.resolved_data.chapter_number + assert_equal '2', deserialized.resolved_data.item_number + assert_equal 'img1', deserialized.resolved_data.item_id + assert_instance_of(AST::CaptionNode, deserialized.resolved_data.caption_node) + end + + def test_reference_node_with_table_reference_serialization + # Create resolved table reference + resolved_data = AST::ResolvedData.table( + chapter_number: 2, chapter_type: :chapter, + item_number: '1', + item_id: 'table1', + chapter_id: 'ch2' + ) + + ref = AST::ReferenceNode.new( + 'table1', + 'ch2', + location: @location, + resolved_data: resolved_data + ) + + json = ref.to_json + parsed = JSON.parse(json) + + assert_equal 'TableReference', parsed['resolved_data']['type'] + assert_equal 2, parsed['resolved_data']['chapter_number'] + assert_equal '1', parsed['resolved_data']['item_number'] + assert_equal 'ch2', parsed['resolved_data']['chapter_id'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ResolvedData::TableReference, deserialized.resolved_data) + assert_equal 2, deserialized.resolved_data.chapter_number + end + + def test_reference_node_with_chapter_reference_serialization + # Create resolved chapter reference + resolved_data = AST::ResolvedData.chapter( + chapter_number: 3, chapter_type: :chapter, + chapter_id: 'ch3', + item_id: 'ch3', + chapter_title: 'Advanced Topics' + ) + + ref = AST::ReferenceNode.new( + 'ch3', + nil, + location: @location, + resolved_data: resolved_data + ) + + json = ref.to_json + parsed = JSON.parse(json) + + assert_equal 'ChapterReference', parsed['resolved_data']['type'] + assert_equal 3, parsed['resolved_data']['chapter_number'] + assert_equal 'ch3', parsed['resolved_data']['chapter_id'] + assert_equal 'Advanced Topics', parsed['resolved_data']['chapter_title'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ResolvedData::ChapterReference, deserialized.resolved_data) + assert_equal 3, deserialized.resolved_data.chapter_number + assert_equal 'Advanced Topics', deserialized.resolved_data.chapter_title + end + + def test_reference_node_with_headline_reference_serialization + # Create resolved headline reference + caption_node = CaptionParserHelper.parse('Section Title', location: @location) + resolved_data = AST::ResolvedData.headline( + headline_number: [1, 2, 3], + item_id: 'sec123', + chapter_id: 'ch1', + chapter_number: 1, chapter_type: :chapter, + caption_node: caption_node + ) + + ref = AST::ReferenceNode.new( + 'sec123', + nil, + location: @location, + resolved_data: resolved_data + ) + + json = ref.to_json + parsed = JSON.parse(json) + + assert_equal 'HeadlineReference', parsed['resolved_data']['type'] + assert_equal [1, 2, 3], parsed['resolved_data']['headline_number'] + assert_equal 'sec123', parsed['resolved_data']['item_id'] + assert_equal 'ch1', parsed['resolved_data']['chapter_id'] + assert_equal 1, parsed['resolved_data']['chapter_number'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ResolvedData::HeadlineReference, deserialized.resolved_data) + assert_equal [1, 2, 3], deserialized.resolved_data.headline_number + end + + def test_reference_node_with_footnote_reference_serialization + # Create resolved footnote reference + resolved_data = AST::ResolvedData.footnote( + item_number: 5, + item_id: 'fn5' + ) + + ref = AST::ReferenceNode.new( + 'fn5', + nil, + location: @location, + resolved_data: resolved_data + ) + + json = ref.to_json + parsed = JSON.parse(json) + + assert_equal 'FootnoteReference', parsed['resolved_data']['type'] + assert_equal 5, parsed['resolved_data']['item_number'] + assert_equal 'fn5', parsed['resolved_data']['item_id'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ResolvedData::FootnoteReference, deserialized.resolved_data) + assert_equal 5, deserialized.resolved_data.item_number + end + + def test_reference_node_with_word_reference_serialization + # Create resolved word reference + resolved_data = AST::ResolvedData.word( + word_content: 'important term', + item_id: 'term1' + ) + + ref = AST::ReferenceNode.new( + 'term1', + nil, + location: @location, + resolved_data: resolved_data + ) + + json = ref.to_json + parsed = JSON.parse(json) + + assert_equal 'WordReference', parsed['resolved_data']['type'] + assert_equal 'important term', parsed['resolved_data']['word_content'] + assert_equal 'term1', parsed['resolved_data']['item_id'] + + # Test deserialization + deserialized = AST::JSONSerializer.deserialize(json) + assert_instance_of(AST::ResolvedData::WordReference, deserialized.resolved_data) + assert_equal 'important term', deserialized.resolved_data.word_content + end +end diff --git a/test/ast/test_ast_json_verification.rb b/test/ast/test_ast_json_verification.rb new file mode 100755 index 000000000..655664127 --- /dev/null +++ b/test/ast/test_ast_json_verification.rb @@ -0,0 +1,214 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review' +require 'review/ast/json_serializer' +require 'review/compiler' +require 'review/htmlbuilder' +require 'review/book' +require 'review/book/chapter' +require 'review/configure' +require 'json' +require 'stringio' +require 'fileutils' +require 'tmpdir' + +class ASTJSONVerificationTest < Test::Unit::TestCase + def setup + @fixtures_dir = File.join(__dir__, '..', '..', 'samples', 'debug-book') + @test_files = Dir.glob(File.join(@fixtures_dir, '*.re')).sort + + @tmpdir = Dir.mktmpdir('ast_json_verification') + @output_dir = @tmpdir + + ReVIEW::I18n.setup('ja') + + # Initialize Book and Config for real Chapter usage + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + + @test_results = {} + end + + def teardown + FileUtils.rm_rf(@tmpdir) if @tmpdir && File.exist?(@tmpdir) + end + + def test_all_verification_files + @test_files.each do |file_path| + basename = File.basename(file_path, '.re') + content = File.read(file_path) + test_file_ast_compatibility(basename, content) + end + end + + def test_structure_consistency + # Test that AST compilation produces consistent JSON structure + @test_files.each do |file_path| + basename = File.basename(file_path, '.re') + content = File.read(file_path) + + ast_json = compile_to_json(content, 'ast') + ast_data = JSON.parse(ast_json) + + assert_equal 'DocumentNode', ast_data['type'], "AST mode should create DocumentNode for #{basename}" + assert ast_data.key?('children'), "AST mode should have children array for #{basename}" + + next unless content.strip.length > 50 # Arbitrary threshold for non-trivial content + + assert ast_data['children'].any?, "AST mode should have children for non-trivial content in #{basename}" + assert_nil(ast_data['error'], "AST compilation should not have errors for #{basename}: #{ast_data['error']}") + end + end + + def test_element_coverage + # Test that all major Re:VIEW elements are properly represented in JSON + coverage_test_file = File.join(@fixtures_dir, 'extreme_features.re') + content = File.read(coverage_test_file) + + ast_json = compile_to_json(content, 'ast') + ast_data = JSON.parse(ast_json) + + element_types = extract_all_element_types(ast_data) + + expected_types = %w[DocumentNode HeadlineNode ParagraphNode CodeBlockNode InlineNode TextNode] + # Optional types that may appear depending on content: TableNode ImageNode MinicolumnNode BlockNode + + expected_types.each do |expected_type| + assert element_types.include?(expected_type), "Expected element type #{expected_type} not found in AST JSON. Found types: #{element_types.join(', ')}" + end + end + + def test_inline_element_preservation + # Test that inline elements are properly preserved in AST mode + inline_test_file = File.join(@fixtures_dir, 'comprehensive.re') + content = File.read(inline_test_file) + + ast_json = compile_to_json(content, 'ast') + ast_data = JSON.parse(ast_json) + + ast_inline_count = count_element_type(ast_data, 'InlineNode') + assert ast_inline_count > 0, "AST mode should preserve inline structure. Found: #{ast_inline_count} inline nodes" + assert_nil(ast_data['error'], "AST compilation should not have errors: #{ast_data['error']}") + end + + def test_caption_node_usage + # Test that captions are represented as CaptionNode objects, not plain strings + # This is critical for AST/Renderer architecture + test_file = File.join(@fixtures_dir, 'comprehensive.re') + content = File.read(test_file) + + ast_json = compile_to_json(content, 'ast') + ast_data = JSON.parse(ast_json) + + captioned_nodes = find_nodes_with_captions(ast_data) + assert captioned_nodes.any?, 'Should find at least one node with caption' + + captioned_nodes.each do |node| + node_type = node['type'] + assert node.key?('caption_node'), "#{node_type} should have 'caption_node' field" + + caption_node = node['caption_node'] + assert_not_nil(caption_node, "#{node_type} caption_node should not be nil") + assert_equal 'CaptionNode', caption_node['type'], "#{node_type} caption_node should be CaptionNode" + + assert caption_node.key?('children'), 'CaptionNode should have children array' + assert caption_node['children'].is_a?(Array), 'CaptionNode children should be an array' + end + end + + private + + def test_file_ast_compatibility(basename, content) + json_output = compile_to_json(content, 'ast') + output_file = File.join(@output_dir, "#{basename}_ast.json") + File.write(output_file, json_output) + + begin + json_data = JSON.parse(json_output) + result = { + success: true, + json_data: json_data, + output_file: output_file, + size: json_output.length, + children_count: json_data['children']&.length || 0, + has_error: json_data.key?('error') + } + rescue JSON::ParserError => e + result = { + success: false, + error: e.message, + output_file: output_file + } + end + + @test_results[basename] = { 'ast' => result } + + assert result[:success], "AST mode failed to produce valid JSON for #{basename}: #{result[:error]}" + + if result[:success] + if content.strip.length > 10 + assert result[:children_count] > 0, "AST mode produced empty content for #{basename}" + end + + assert !result[:has_error], "AST compilation had errors for #{basename}: #{result[:json_data]['error']}" + end + end + + def compile_to_json(content, mode, _config = nil) + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + + chapter.generate_indexes + @book.generate_indexes + + ast_compiler = ReVIEW::AST::Compiler.new + ast_result = ast_compiler.compile_to_ast(chapter) + + if ast_result + options = ReVIEW::AST::JSONSerializer::Options.new(pretty: true) + ReVIEW::AST::JSONSerializer.serialize(ast_result, options) + else + JSON.pretty_generate({ 'type' => 'DocumentNode', 'children' => [] }) + end + rescue StandardError => e + # Return error information in JSON format for debugging + JSON.pretty_generate({ + 'type' => 'DocumentNode', + 'children' => [], + 'error' => e.message, + 'mode' => mode + }) + end + + def extract_all_element_types(data, types = Set.new) + if data.is_a?(Hash) + types.add(data['type']) if data['type'] + data.each_value { |value| extract_all_element_types(value, types) } + elsif data.is_a?(Array) + data.each { |item| extract_all_element_types(item, types) } + end + types + end + + def count_element_type(data, target_type, count = 0) + if data.is_a?(Hash) + count += 1 if data['type'] == target_type + data.each_value { |value| count = count_element_type(value, target_type, count) } + elsif data.is_a?(Array) + data.each { |item| count = count_element_type(item, target_type, count) } + end + count + end + + def find_nodes_with_captions(data, nodes = []) + if data.is_a?(Hash) + nodes << data if data.key?('caption_node') + data.each_value { |value| find_nodes_with_captions(value, nodes) } + elsif data.is_a?(Array) + data.each { |item| find_nodes_with_captions(item, nodes) } + end + nodes + end +end diff --git a/test/ast/test_ast_line_break_handling.rb b/test/ast/test_ast_line_break_handling.rb new file mode 100644 index 000000000..cb3a462cc --- /dev/null +++ b/test/ast/test_ast_line_break_handling.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review' +require 'review/ast' +require 'review/ast/compiler' +require 'review/configure' +require 'review/book' +require 'review/i18n' +require 'stringio' + +class TestASTLineBreakHandling < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def create_chapter(content) + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + chapter + end + + def test_single_line_paragraph + content = 'これは一行のテストです。' + compiler = ReVIEW::AST::Compiler.new + ast_root = compiler.compile_to_ast(create_chapter(content)) + + # Should have one paragraph with one text node + assert_equal 1, ast_root.children.length + paragraph = ast_root.children.first + assert_instance_of(ReVIEW::AST::ParagraphNode, paragraph) + + assert_equal 1, paragraph.children.length + text_node = paragraph.children.first + assert_instance_of(ReVIEW::AST::TextNode, text_node) + assert_equal 'これは一行のテストです。', text_node.content + end + + def test_single_paragraph_with_line_break + content = "この文章は改行が含まれています。\nしかし同じ段落のはずです。" + compiler = ReVIEW::AST::Compiler.new + ast_root = compiler.compile_to_ast(create_chapter(content)) + + # Should have one paragraph with one text node + assert_equal 1, ast_root.children.length, 'Should have exactly one paragraph' + paragraph = ast_root.children.first + assert_instance_of(ReVIEW::AST::ParagraphNode, paragraph) + + assert_equal 1, paragraph.children.length, 'Paragraph should have exactly one text node' + text_node = paragraph.children.first + assert_instance_of(ReVIEW::AST::TextNode, text_node) + + # The key assertion: should preserve single line break, not double + expected_content = "この文章は改行が含まれています。\nしかし同じ段落のはずです。" + assert_equal expected_content, text_node.content, + 'Single line break should be preserved as single line break' + end + + def test_two_paragraphs_with_empty_line + content = "最初の段落です。\n\n次の段落です。" + compiler = ReVIEW::AST::Compiler.new + ast_root = compiler.compile_to_ast(create_chapter(content)) + + # Should have two paragraphs + assert_equal 2, ast_root.children.length, 'Should have exactly two paragraphs' + + # First paragraph + paragraph1 = ast_root.children[0] + assert_instance_of(ReVIEW::AST::ParagraphNode, paragraph1) + assert_equal 1, paragraph1.children.length + text1 = paragraph1.children.first + assert_instance_of(ReVIEW::AST::TextNode, text1) + assert_equal '最初の段落です。', text1.content + + # Second paragraph + paragraph2 = ast_root.children[1] + assert_instance_of(ReVIEW::AST::ParagraphNode, paragraph2) + assert_equal 1, paragraph2.children.length + text2 = paragraph2.children.first + assert_instance_of(ReVIEW::AST::TextNode, text2) + assert_equal '次の段落です。', text2.content + end + + def test_multiple_single_line_breaks + content = "行1\n行2\n行3" + compiler = ReVIEW::AST::Compiler.new + ast_root = compiler.compile_to_ast(create_chapter(content)) + + # Should have one paragraph + assert_equal 1, ast_root.children.length, 'Should have exactly one paragraph' + paragraph = ast_root.children.first + assert_instance_of(ReVIEW::AST::ParagraphNode, paragraph) + + assert_equal 1, paragraph.children.length + text_node = paragraph.children.first + assert_instance_of(ReVIEW::AST::TextNode, text_node) + + # Should preserve single line breaks + expected_content = "行1\n行2\n行3" + assert_equal expected_content, text_node.content, + 'Multiple single line breaks should be preserved' + end + + def test_mixed_single_and_double_line_breaks + content = "段落1の行1\n段落1の行2\n\n段落2の行1\n段落2の行2" + compiler = ReVIEW::AST::Compiler.new + ast_root = compiler.compile_to_ast(create_chapter(content)) + + # Should have two paragraphs + assert_equal 2, ast_root.children.length, 'Should have exactly two paragraphs' + + # First paragraph should preserve single line breaks + paragraph1 = ast_root.children[0] + text1 = paragraph1.children.first + assert_equal "段落1の行1\n段落1の行2", text1.content + + # Second paragraph should preserve single line breaks + paragraph2 = ast_root.children[1] + text2 = paragraph2.children.first + assert_equal "段落2の行1\n段落2の行2", text2.content + end +end diff --git a/test/ast/test_ast_lists.rb b/test/ast/test_ast_lists.rb new file mode 100644 index 000000000..3ec87f43e --- /dev/null +++ b/test/ast/test_ast_lists.rb @@ -0,0 +1,267 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/configure' +require 'review/book' +require 'review/book/chapter' + +class TestASTLists < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_unordered_list_ast_processing + content = <<~EOB + = Chapter Title + + Before list. + + * First item + * Second item with @<b>{bold} + ** Nested item + * Third item + + After list. + EOB + + ast_root = compile_to_ast(content) + list_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(list_node, 'Should have list node') + assert_equal :ul, list_node.list_type + + assert_equal 3, list_node.children.size + + first_item = list_node.children[0] + assert_equal 1, first_item.level + + second_item = list_node.children[1] + assert_equal 1, second_item.level + bold_node = second_item.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :b } + assert_not_nil(bold_node) + + nested_list = second_item.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(nested_list, 'Second item should have nested list') + assert_equal :ul, nested_list.list_type + assert_equal 1, nested_list.children.size + + nested_item = nested_list.children[0] + assert_equal 2, nested_item.level + + third_item = list_node.children[2] + assert_equal 1, third_item.level + end + + def test_ordered_list_ast_processing + content = <<~EOB + Numbered list: + + 1. First item + 2. Second item + 3. Third item with @<code>{code} + + End of list. + EOB + + ast_root = compile_to_ast(content) + list_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(list_node) + assert_equal :ol, list_node.list_type + assert_equal 3, list_node.children.size + + first_item = list_node.children[0] + assert_equal 1, first_item.number + + third_item = list_node.children[2] + assert_equal 3, third_item.number + code_node = third_item.children.find { |n| n.is_a?(ReVIEW::AST::InlineNode) && n.inline_type == :code } + assert_not_nil(code_node) + end + + def test_definition_list_ast_processing + content = <<~EOB + Definition list: + + : Alpha + DEC の作っていた RISC CPU。 + 浮動小数点数演算が速い。 + : POWER + IBM とモトローラが共同製作した RISC CPU。 + 派生として POWER PC がある。 + + After definitions. + EOB + + ast_root = compile_to_ast(content) + list_node = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(list_node) + assert_equal :dl, list_node.list_type + assert_equal 2, list_node.children.size + + first_def = list_node.children[0] + assert_equal 1, first_def.level + assert(first_def.children.any?) + + second_def = list_node.children[1] + assert_equal 1, second_def.level + assert(second_def.children.any?) + end + + def test_list_output_compatibility + content = <<~EOB + Lists test: + + * Unordered item 1 + * Unordered item with @<b>{bold} text + + 1. Ordered item 1 + 2. Ordered item 2 + + : Term + Definition + + End. + EOB + + ast_root = compile_to_ast(content) + paragraph_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ParagraphNode) } + list_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ListNode) } + + assert_equal 2, paragraph_nodes.size + assert_equal 3, list_nodes.size + + ul_node = list_nodes.find { |n| n.list_type == :ul } + ol_node = list_nodes.find { |n| n.list_type == :ol } + dl_node = list_nodes.find { |n| n.list_type == :dl } + + assert_not_nil(ul_node) + assert_not_nil(ol_node) + assert_not_nil(dl_node) + + bold_item = ul_node.children.find do |item| + item.children.any? { |child| child.is_a?(ReVIEW::AST::InlineNode) && child.inline_type == :b } + end + assert_not_nil(bold_item) + end + + def test_deep_nested_list_ast_processing + content = <<~EOB + = Deep Nesting Test + + * Level 1 Item A + ** Level 2 Item A1 + *** Level 3 Item A1a + *** Level 3 Item A1b + ** Level 2 Item A2 + * Level 1 Item B + ** Level 2 Item B1 + *** Level 3 Item B1a + **** Level 4 Item B1a-i + **** Level 4 Item B1a-ii + *** Level 3 Item B1b + ** Level 2 Item B2 + * Level 1 Item C + EOB + + ast_root = compile_to_ast(content) + main_list = ast_root.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(main_list, 'Should have main list node') + assert_equal :ul, main_list.list_type + assert_equal 3, main_list.children.size + + item_a = main_list.children[0] + assert_equal 1, item_a.level + nested_list_a = item_a.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(nested_list_a, 'Item A should have nested list') + assert_equal 2, nested_list_a.children.size + + item_a1 = nested_list_a.children[0] + assert_equal 2, item_a1.level + nested_list_a1 = item_a1.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(nested_list_a1, 'Item A1 should have nested list') + assert_equal 2, nested_list_a1.children.size + + item_a1a = nested_list_a1.children[0] + item_a1b = nested_list_a1.children[1] + assert_equal 3, item_a1a.level + assert_equal 3, item_a1b.level + + item_b = main_list.children[1] + assert_equal 1, item_b.level + nested_list_b = item_b.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(nested_list_b, 'Item B should have nested list') + + item_b1 = nested_list_b.children[0] + nested_list_b1 = item_b1.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(nested_list_b1) + item_b1a = nested_list_b1.children[0] + nested_list_b1a = item_b1a.children.find { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(nested_list_b1a, 'Should have Level 4 nesting') + assert_equal 2, nested_list_b1a.children.size + + item_b1a_i = nested_list_b1a.children[0] + item_b1a_ii = nested_list_b1a.children[1] + assert_equal 4, item_b1a_i.level + assert_equal 4, item_b1a_ii.level + end + + def test_mixed_nested_ordered_unordered_lists + content = <<~EOB + = Mixed List Types + + 1. Ordered Item 1 + 2. Ordered Item 2 + + * Unordered Item 1 + ** Nested unordered + *** Deep unordered + * Unordered Item 2 + ** Another nested + EOB + + ast_root = compile_to_ast(content) + list_nodes = ast_root.children.select { |n| n.is_a?(ReVIEW::AST::ListNode) } + assert_operator(list_nodes.size, :>=, 2, 'Should have multiple lists for different types') + + ol_nodes = list_nodes.select { |n| n.list_type == :ol } + ul_nodes = list_nodes.select { |n| n.list_type == :ul } + + assert_equal(1, ol_nodes.size, 'Should have one ordered list') + assert_equal(1, ul_nodes.size, 'Should have one unordered list') + + first_ol = ol_nodes[0] + assert_equal(2, first_ol.children.size, 'Ordered list should have 2 items') + + first_ul = ul_nodes[0] + assert_equal(2, first_ul.children.size, 'Unordered list should have 2 top-level items') + + first_ul_item = first_ul.children[0] + nested_ul = first_ul_item.children.find { |child| child.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(nested_ul, 'First unordered item should have nested list') + + nested_item = nested_ul.children[0] + deep_nested = nested_item.children.find { |child| child.is_a?(ReVIEW::AST::ListNode) } + assert_not_nil(deep_nested, 'Should have 3-level nesting') + assert_equal(3, deep_nested.children[0].level) + end + + private + + def compile_to_ast(content) + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + @book.generate_indexes + chapter.generate_indexes + + ast_compiler = ReVIEW::AST::Compiler.new + ast_compiler.compile_to_ast(chapter) + end +end diff --git a/test/ast/test_ast_review_generator.rb b/test/ast/test_ast_review_generator.rb new file mode 100644 index 000000000..f35a65d52 --- /dev/null +++ b/test/ast/test_ast_review_generator.rb @@ -0,0 +1,607 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/review_generator' +require 'review/ast/code_line_node' +require 'review/ast/table_row_node' +require 'review/ast/table_cell_node' +require 'review/ast/reference_node' +require 'review/ast/footnote_node' + +class TestASTReVIEWGenerator < Test::Unit::TestCase + def setup + @generator = ReVIEW::AST::ReVIEWGenerator.new + @location = ReVIEW::SnapshotLocation.new('test.re', 1) + end + + def test_empty_document + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + result = @generator.generate(doc) + assert_equal '', result + end + + def test_headline + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # Create caption node + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Introduction')) + + headline = ReVIEW::AST::HeadlineNode.new( + location: @location, + level: 2, + label: 'intro', + caption_node: caption_node + ) + doc.add_child(headline) + + result = @generator.generate(doc) + assert_equal "=={intro} Introduction\n\n", result + end + + def test_paragraph_with_text + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Hello, world!')) + doc.add_child(para) + + result = @generator.generate(doc) + assert_equal "Hello, world!\n\n", result + end + + def test_inline_elements + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'This is ')) + + bold = ReVIEW::AST::InlineNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), inline_type: :b) + bold.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'bold')) + para.add_child(bold) + + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: ' text.')) + doc.add_child(para) + + result = @generator.generate(doc) + assert_equal "This is @<b>{bold} text.\n\n", result + end + + def test_code_block_with_id + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # Create caption node + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Hello Example')) + + code = ReVIEW::AST::CodeBlockNode.new( + location: @location, + id: 'hello', + caption_node: caption_node, + original_text: "def hello\n puts \"Hello\"\nend", + lang: 'ruby' + ) + + # Add code line nodes + ['def hello', ' puts "Hello"', 'end'].each do |line| + line_node = ReVIEW::AST::CodeLineNode.new(location: @location) + line_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: line)) + code.add_child(line_node) + end + + doc.add_child(code) + + result = @generator.generate(doc) + expected = <<~EOB + //list[hello][Hello Example][ruby]{ + def hello + puts "Hello" + end + //} + + EOB + assert_equal expected, result + end + + def test_code_block_without_id + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + code = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: 'echo "Hello"', + lang: 'sh' + ) + + # Add code line node + line_node = ReVIEW::AST::CodeLineNode.new(location: @location) + line_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'echo "Hello"')) + code.add_child(line_node) + + doc.add_child(code) + + result = @generator.generate(doc) + expected = <<~EOB + //emlist[][sh]{ + echo "Hello" + //} + + EOB + assert_equal expected, result + end + + def test_unordered_list + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ul) + + item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'First item')) + list.add_child(item1) + + item2 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item2.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Second item')) + list.add_child(item2) + + doc.add_child(list) + + result = @generator.generate(doc) + expected = " * First item\n * Second item\n\n" + assert_equal expected, result + end + + def test_table + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # Create caption node + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Sample Table')) + + table = ReVIEW::AST::TableNode.new( + location: @location, + id: 'sample', + caption_node: caption_node + ) + + # Add header row + header_row = ReVIEW::AST::TableRowNode.new(location: @location, row_type: :header) + ['Name', 'Age'].each do |cell_content| + cell = ReVIEW::AST::TableCellNode.new(location: @location, cell_type: :th) + cell.add_child(ReVIEW::AST::TextNode.new(location: @location, content: cell_content)) + header_row.add_child(cell) + end + table.add_header_row(header_row) + + # Add body rows + [['Alice', '25'], ['Bob', '30']].each do |row_data| + body_row = ReVIEW::AST::TableRowNode.new(location: @location, row_type: :body) + row_data.each_with_index do |cell_content, index| + # First cell in body rows is typically a header (row header) + cell_type = index == 0 ? :th : :td + cell = ReVIEW::AST::TableCellNode.new(location: @location, cell_type: cell_type) + cell.add_child(ReVIEW::AST::TextNode.new(location: @location, content: cell_content)) + body_row.add_child(cell) + end + table.add_body_row(body_row) + end + + doc.add_child(table) + + result = @generator.generate(doc) + expected = <<~EOB + //table[sample][Sample Table]{ + Name Age + ------------ + Alice 25 + Bob 30 + //} + + EOB + assert_equal expected, result + end + + def test_image + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # Create caption node + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Sample Figure')) + + image = ReVIEW::AST::ImageNode.new( + location: @location, + id: 'figure1', + caption_node: caption_node + ) + doc.add_child(image) + + result = @generator.generate(doc) + assert_equal "//image[figure1][Sample Figure]\n\n", result + end + + def test_minicolumn + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # Create caption node + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Important Note')) + + minicolumn = ReVIEW::AST::MinicolumnNode.new( + location: @location, + minicolumn_type: :note, + caption_node: caption_node + ) + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'This is a note.')) + minicolumn.add_child(para) + doc.add_child(minicolumn) + + result = @generator.generate(doc) + expected = <<~EOB + //note[Important Note]{ + This is a note. + + //} + + EOB + assert_equal expected, result + end + + def test_complex_document + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # Headline with caption + h1_caption = ReVIEW::AST::CaptionNode.new(location: @location) + h1_caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Chapter 1')) + + h1 = ReVIEW::AST::HeadlineNode.new(location: @location, level: 1, caption_node: h1_caption) + doc.add_child(h1) + + # Paragraph with inline + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'This is ')) + code_inline = ReVIEW::AST::InlineNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), inline_type: :code) + code_inline.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'inline code')) + para.add_child(code_inline) + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: '.')) + h1.add_child(para) + + # Code block + code = ReVIEW::AST::CodeBlockNode.new( + location: @location, + id: 'example', + original_text: 'puts "Hello, Re:VIEW!"' + ) + + # Add code line node + line_node = ReVIEW::AST::CodeLineNode.new(location: @location) + line_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'puts "Hello, Re:VIEW!"')) + code.add_child(line_node) + + h1.add_child(code) + + result = @generator.generate(doc) + expected = <<~EOB + = Chapter 1 + + This is @<code>{inline code}. + + //list[example]{ + puts "Hello, Re:VIEW!" + //} + + EOB + assert_equal expected, result + end + + def test_inline_with_args + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # href with URL + href = ReVIEW::AST::InlineNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), inline_type: :href, args: ['https://example.com']) + para.add_child(href) + + doc.add_child(para) + + result = @generator.generate(doc) + assert_equal "@<href>{https://example.com}\n\n", result + end + + def test_ordered_list + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ol) + + item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1, number: 1) + item1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'First')) + list.add_child(item1) + + item2 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1, number: 2) + item2.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Second')) + list.add_child(item2) + + doc.add_child(list) + + result = @generator.generate(doc) + expected = " 1. First\n 2. Second\n\n" + assert_equal expected, result + end + + def test_definition_list + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :dl) + + item = ReVIEW::AST::ListItemNode.new( + location: ReVIEW::SnapshotLocation.new(nil, 0), + level: 1, + term_children: [ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Term')] + ) + item.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Definition of the term')) + list.add_child(item) + + doc.add_child(list) + + result = @generator.generate(doc) + expected = <<~EOB + : Term + Definition of the term + + EOB + assert_equal expected, result + end + + def test_empty_paragraph_skipped + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # Non-empty paragraph + para1 = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Content')) + doc.add_child(para1) + + # Empty paragraph (should be skipped) + para2 = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + doc.add_child(para2) + + # Another non-empty paragraph + para3 = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para3.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'More content')) + doc.add_child(para3) + + result = @generator.generate(doc) + expected = <<~EOB + Content + + More content + + EOB + assert_equal expected, result + end + + def test_nested_unordered_list + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ul) + + # First item with nested list + item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Item 1')) + + # Nested list + nested_list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ul) + nested_item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 2) + nested_item1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Nested 1')) + nested_list.add_child(nested_item1) + + nested_item2 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 2) + nested_item2.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Nested 2')) + nested_list.add_child(nested_item2) + + item1.add_child(nested_list) + list.add_child(item1) + + # Second top-level item + item2 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item2.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Item 2')) + list.add_child(item2) + + doc.add_child(list) + + result = @generator.generate(doc) + expected = " * Item 1\n ** Nested 1\n ** Nested 2\n * Item 2\n\n" + assert_equal expected, result + end + + def test_nested_ordered_list + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ol) + + # First item with nested list + item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1, number: 1) + item1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'First')) + + # Nested ordered list + nested_list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ol) + nested_item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 2, number: 1) + nested_item1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Nested First')) + nested_list.add_child(nested_item1) + + item1.add_child(nested_list) + list.add_child(item1) + + # Second top-level item + item2 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1, number: 2) + item2.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Second')) + list.add_child(item2) + + doc.add_child(list) + + result = @generator.generate(doc) + expected = " 1. First\n 1. Nested First\n 2. Second\n\n" + assert_equal expected, result + end + + def test_reference_node + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # ReferenceNode is typically a child of InlineNode, but can also be standalone + reference = ReVIEW::AST::ReferenceNode.new('fig1', nil, location: @location) + para.add_child(reference) + + doc.add_child(para) + + result = @generator.generate(doc) + # ReferenceNode should output its content (the ref_id) + assert_equal "fig1\n\n", result + end + + def test_footnote_node + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # FootnoteNode with content + footnote = ReVIEW::AST::FootnoteNode.new(location: @location, id: 'note1') + footnote.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'This is a footnote')) + + doc.add_child(footnote) + + result = @generator.generate(doc) + # FootnoteNode should be rendered as //footnote[id][content] + assert_equal "//footnote[note1][This is a footnote]\n\n", result + end + + # Edge case tests + def test_empty_list + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ul) + doc.add_child(list) + + result = @generator.generate(doc) + # Empty list should produce empty string + assert_equal '', result + end + + def test_multiple_inline_elements + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Text with ')) + + bold = ReVIEW::AST::InlineNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), inline_type: :b) + bold.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'bold')) + para.add_child(bold) + + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: ' and ')) + + italic = ReVIEW::AST::InlineNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), inline_type: :i) + italic.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'italic')) + para.add_child(italic) + + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: ' and ')) + + code = ReVIEW::AST::InlineNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), inline_type: :code) + code.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'code')) + para.add_child(code) + + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: '.')) + + doc.add_child(para) + + result = @generator.generate(doc) + assert_equal "Text with @<b>{bold} and @<i>{italic} and @<code>{code}.\n\n", result + end + + def test_deeply_nested_list + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ul) + + # Level 1 + item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item1.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Level 1')) + + # Level 2 + list2 = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ul) + item2 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 2) + item2.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Level 2')) + + # Level 3 + list3 = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :ul) + item3 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 3) + item3.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Level 3')) + list3.add_child(item3) + + item2.add_child(list3) + list2.add_child(item2) + item1.add_child(list2) + list.add_child(item1) + doc.add_child(list) + + result = @generator.generate(doc) + assert_equal " * Level 1\n ** Level 2\n *** Level 3\n\n", result + end + + def test_code_block_without_original_text + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + # CodeBlockNode without original_text (reconstructed from AST) + code = ReVIEW::AST::CodeBlockNode.new( + location: @location, + id: 'sample', + lang: 'ruby' + ) + + # Add code line nodes + ['line 1', 'line 2', 'line 3'].each do |line| + line_node = ReVIEW::AST::CodeLineNode.new(location: @location) + line_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: line)) + code.add_child(line_node) + end + + doc.add_child(code) + + result = @generator.generate(doc) + expected = <<~EOB + //list[sample][][ruby]{ + line 1 + line 2 + line 3 + //} + + EOB + assert_equal expected, result + end + + def test_image_with_metric + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Scaled Image')) + + image = ReVIEW::AST::ImageNode.new( + location: @location, + id: 'figure1', + caption_node: caption_node, + metric: 'scale=0.5' + ) + doc.add_child(image) + + result = @generator.generate(doc) + assert_equal "//image[figure1][Scaled Image][scale=0.5]\n\n", result + end + + def test_column + doc = ReVIEW::AST::DocumentNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Column Title')) + + column = ReVIEW::AST::ColumnNode.new( + location: @location, + level: 2, + caption_node: caption_node + ) + + para = ReVIEW::AST::ParagraphNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0)) + para.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Column content.')) + column.add_child(para) + + doc.add_child(column) + + result = @generator.generate(doc) + assert_equal "==[column] Column Title\n\nColumn content.\n\n==[/column]\n\n", result + end +end diff --git a/test/ast/test_ast_structure_debug.rb b/test/ast/test_ast_structure_debug.rb new file mode 100644 index 000000000..3fd86b6c2 --- /dev/null +++ b/test/ast/test_ast_structure_debug.rb @@ -0,0 +1,141 @@ +# frozen_string_literal: true + +# Debug test to understand AST structure issues with inline elements + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/ast/json_serializer' +require 'review/book' +require 'review/book/chapter' +require 'json' + +class TestASTStructureDebug < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + + @chapter = ReVIEW::Book::Chapter.new(@book, 1, 'debug_chapter', 'debug_chapter.re', StringIO.new) + ReVIEW::I18n.setup(@config['language']) + end + + def test_minicolumn_ast_structure + source = <<~EOS + = Chapter Title + + //note[Note Caption]{ + This is a note with @<fn>{footnote1}. + //} + + //footnote[footnote1][Footnote in note] + EOS + + @chapter.content = source + + # Build AST without builder rendering + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Serialize AST to examine structure + json_str = ReVIEW::AST::JSONSerializer.serialize(ast_root) + ast = JSON.parse(json_str) + + # Find minicolumn node + minicolumn = ast['children'].find { |node| node['type'] == 'MinicolumnNode' } + assert_not_nil(minicolumn) + + # Check if inline elements are properly parsed + has_inline_node = minicolumn['children'].any? do |child| + child['type'] == 'InlineNode' || + (child['children'] && child['children'].any? { |grandchild| grandchild['type'] == 'InlineNode' }) + end + + assert_true(has_inline_node, 'Minicolumn should contain inline elements') + end + + def test_table_ast_structure + source = <<~EOS + = Chapter Title + + //table[test-table][Test Table]{ + Header @<b>{Bold} Normal Header + ------------ + Cell with @<fn>{table-fn} Normal Cell + //} + + //footnote[table-fn][Footnote in table] + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Serialize AST to examine structure + json_str = ReVIEW::AST::JSONSerializer.serialize(ast_root) + ast = JSON.parse(json_str) + + # Find table node + table = ast['children'].find { |node| node['type'] == 'TableNode' } + assert_not_nil(table) + + # Check actual table structure (header_rows vs headers) + table.keys.grep(/header|row/) + + # Verify table structure has header_rows and body_rows (correct AST structure) + assert_not_nil(table['header_rows'] || table['headers']) + assert_not_nil(table['body_rows'] || table['rows']) + + # Check for inline elements in table cells using correct structure + headers = table['header_rows'] || table['headers'] || [] + rows = table['body_rows'] || table['rows'] || [] + + headers.any? do |header| + header['children']&.any? { |cell| cell['type'] == 'InlineNode' } + end + + rows.any? do |row| + row['children']&.any? { |cell| cell['type'] == 'InlineNode' } + end + + # Table should have structure and may contain inline elements + assert_true(headers.any? || rows.any?, 'Table should have headers or rows') + # NOTE: Inline element check is optional as it depends on content + end + + def test_paragraph_ast_structure + source = <<~EOS + = Chapter Title + + This is a paragraph with @<fn>{footnote1} and @<b>{bold text}. + + //footnote[footnote1][Paragraph footnote] + EOS + + @chapter.content = source + + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Serialize AST to examine structure + json_str = ReVIEW::AST::JSONSerializer.serialize(ast_root) + ast = JSON.parse(json_str) + + # Find paragraph node + paragraph = ast['children'].find { |node| node['type'] == 'ParagraphNode' } + assert_not_nil(paragraph) + + # Verify paragraph contains inline elements + has_inline_elements = paragraph['children'].any? { |child| child['type'] == 'InlineNode' } + assert_true(has_inline_elements, 'Paragraph should contain inline elements') + + # Verify specific inline elements exist + inline_types = paragraph['children'].select { |child| child['type'] == 'InlineNode' }.map { |node| node['inline_type'] } + assert_includes(inline_types, 'fn', 'Should contain footnote inline element') + assert_includes(inline_types, 'b', 'Should contain bold inline element') + end +end diff --git a/test/ast/test_auto_id_generation.rb b/test/ast/test_auto_id_generation.rb new file mode 100644 index 000000000..9d9ec997d --- /dev/null +++ b/test/ast/test_auto_id_generation.rb @@ -0,0 +1,275 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/renderer/html_renderer' +require 'review/book' +require 'stringio' + +class TestAutoIdGeneration < Test::Unit::TestCase + def setup + @book = ReVIEW::Book::Base.new(config: ReVIEW::Configure.values) + @config = @book.config + @compiler = ReVIEW::AST::Compiler.new + + ReVIEW::I18n.setup(@config['language']) + end + + def test_nonum_headline_auto_id_generation + content = <<~REVIEW + = Chapter + + ===[nonum] First Unnumbered + ===[nonum] Second Unnumbered + REVIEW + + chapter = create_chapter(content) + ast_root = @compiler.compile_to_ast(chapter) + + # Find nonum headlines + headlines = find_all_nodes(ast_root, ReVIEW::AST::HeadlineNode) + nonum_headlines = headlines.select(&:nonum?) + + assert_equal 2, nonum_headlines.size, 'Should have 2 nonum headlines' + + # Verify auto_id is generated for both + assert_not_nil(nonum_headlines[0].auto_id, 'First nonum should have auto_id') + assert_not_nil(nonum_headlines[1].auto_id, 'Second nonum should have auto_id') + + # Verify auto_id format: chapter_name_nonumN + assert_match(/^test_nonum\d+$/, nonum_headlines[0].auto_id, 'First auto_id should match format') + assert_match(/^test_nonum\d+$/, nonum_headlines[1].auto_id, 'Second auto_id should match format') + + # Verify auto_ids are different (sequential) + assert_not_equal(nonum_headlines[0].auto_id, nonum_headlines[1].auto_id, + 'Each nonum headline should have unique auto_id') + end + + def test_notoc_headline_auto_id_generation + content = <<~REVIEW + = Chapter + + ===[notoc] First NotInTOC + ===[notoc] Second NotInTOC + REVIEW + + chapter = create_chapter(content) + ast_root = @compiler.compile_to_ast(chapter) + + headlines = find_all_nodes(ast_root, ReVIEW::AST::HeadlineNode) + notoc_headlines = headlines.select(&:notoc?) + + assert_equal 2, notoc_headlines.size + assert_not_nil(notoc_headlines[0].auto_id) + assert_not_nil(notoc_headlines[1].auto_id) + assert_not_equal(notoc_headlines[0].auto_id, notoc_headlines[1].auto_id) + end + + def test_nodisp_headline_auto_id_generation + content = <<~REVIEW + = Chapter + + ===[nodisp] Hidden Section + REVIEW + + chapter = create_chapter(content) + ast_root = @compiler.compile_to_ast(chapter) + + headlines = find_all_nodes(ast_root, ReVIEW::AST::HeadlineNode) + nodisp_headline = headlines.find(&:nodisp?) + + assert_not_nil(nodisp_headline, 'Should find nodisp headline') + assert_not_nil(nodisp_headline.auto_id, 'Nodisp headline should have auto_id') + assert_match(/^test_nonum\d+$/, nodisp_headline.auto_id) + end + + def test_headline_with_label_no_auto_id + content = <<~REVIEW + = Chapter + + ===[nonum]{custom-label} Labeled Headline + REVIEW + + chapter = create_chapter(content) + ast_root = @compiler.compile_to_ast(chapter) + + headlines = find_all_nodes(ast_root, ReVIEW::AST::HeadlineNode) + labeled_headline = headlines.find { |h| h.label == 'custom-label' } + + assert_not_nil(labeled_headline, 'Should find labeled headline') + # When label is provided, auto_id should still be nil (not needed) + assert_nil(labeled_headline.auto_id, 'Labeled headline should not have auto_id') + end + + def test_mixed_nonum_headlines_sequential_numbering + content = <<~REVIEW + = Chapter + + ===[nonum] First + === Regular Section + ===[nonum] Second + ===[notoc] Third + REVIEW + + chapter = create_chapter(content) + ast_root = @compiler.compile_to_ast(chapter) + + headlines = find_all_nodes(ast_root, ReVIEW::AST::HeadlineNode) + special_headlines = headlines.select { |h| h.nonum? || h.notoc? || h.nodisp? } + + # All special headlines should have auto_id + assert_equal 3, special_headlines.size + special_headlines.each do |h| + assert_not_nil(h.auto_id, "Headline '#{h.caption_text}' should have auto_id") + end + + # Extract numbers from auto_ids + numbers = special_headlines.map { |h| h.auto_id.match(/\d+$/)[0].to_i } + + # Numbers should be sequential (1, 2, 3) + assert_equal [1, 2, 3], numbers, 'Auto_id numbers should be sequential' + end + + def test_column_auto_id_generation + content = <<~REVIEW + = Chapter + + ===[column] First Column + + Content + + ===[/column] + + ===[column] Second Column + + Content + + ===[/column] + REVIEW + + chapter = create_chapter(content) + ast_root = @compiler.compile_to_ast(chapter) + + columns = find_all_nodes(ast_root, ReVIEW::AST::ColumnNode) + + assert_equal 2, columns.size, 'Should have 2 columns' + + # Verify auto_id is generated for both + assert_not_nil(columns[0].auto_id, 'First column should have auto_id') + assert_not_nil(columns[1].auto_id, 'Second column should have auto_id') + + # Verify auto_id format: column-N + assert_equal 'column-1', columns[0].auto_id, 'First column auto_id should be column-1' + assert_equal 'column-2', columns[1].auto_id, 'Second column auto_id should be column-2' + end + + def test_column_with_label_still_has_auto_id + content = <<~REVIEW + = Chapter + + ===[column]{custom-col} Labeled Column + + Content + + ===[/column] + REVIEW + + chapter = create_chapter(content) + ast_root = @compiler.compile_to_ast(chapter) + + columns = find_all_nodes(ast_root, ReVIEW::AST::ColumnNode) + column = columns.first + + assert_not_nil(column, 'Should find column') + assert_equal 'custom-col', column.label, 'Column should have label' + # Columns ALWAYS get auto_id (used for anchor in HTML) + assert_equal 'column-1', column.auto_id, 'Column should have auto_id even with label' + end + + def test_html_renderer_uses_auto_id_for_nonum + content = <<~REVIEW + = Chapter + + ===[nonum] Unnumbered Section + + Content here. + REVIEW + + chapter = create_chapter(content) + chapter.generate_indexes + @book.generate_indexes + + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html = renderer.render_body(ast_root) + + # HTML should contain h3 with auto_id + assert_match(/<h3 id="test_nonum1">/, html, 'Should use auto_id in HTML id attribute') + end + + def test_html_renderer_uses_auto_id_for_column + content = <<~REVIEW + = Chapter + + ===[column] Test Column + + Column content. + + ===[/column] + REVIEW + + chapter = create_chapter(content) + chapter.generate_indexes + @book.generate_indexes + + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html = renderer.render_body(ast_root) + + # HTML should contain anchor with auto_id + assert_match(/<a id="column-1">/, html, 'Should use auto_id in column anchor') + end + + def test_html_renderer_multiple_nonum_unique_ids + content = <<~REVIEW + = Chapter + + ===[nonum] First + + ===[nonum] Second + + ===[nonum] Third + REVIEW + + chapter = create_chapter(content) + chapter.generate_indexes + @book.generate_indexes + + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html = renderer.render_body(ast_root) + + # Each should have unique ID + assert_match(/<h3 id="test_nonum1">/, html) + assert_match(/<h3 id="test_nonum2">/, html) + assert_match(/<h3 id="test_nonum3">/, html) + + # Verify no duplicate IDs + id_matches = html.scan(/id="test_nonum\d+"/) + assert_equal 3, id_matches.size + assert_equal 3, id_matches.uniq.size, 'All IDs should be unique' + end + + private + + def create_chapter(content) + ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + end + + # Recursively find all nodes of a specific type in the AST + def find_all_nodes(node, node_class, results = []) + results << node if node.is_a?(node_class) + node.children.each { |child| find_all_nodes(child, node_class, results) } if node.respond_to?(:children) + results + end +end diff --git a/test/ast/test_block_data.rb b/test/ast/test_block_data.rb new file mode 100644 index 000000000..747031046 --- /dev/null +++ b/test/ast/test_block_data.rb @@ -0,0 +1,117 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/block_data' +require 'review/snapshot_location' + +class TestBlockData < Test::Unit::TestCase + include ReVIEW::AST + + def setup + @location = ReVIEW::SnapshotLocation.new('test.re', 42) + end + + def test_basic_initialization + block_data = BlockData.new(name: :list, args: ['id', 'caption'], location: @location) + + assert_equal :list, block_data.name + assert_equal ['id', 'caption'], block_data.args + assert_equal [], block_data.lines + assert_equal [], block_data.nested_blocks + assert_equal @location, block_data.location + end + + def test_initialization_with_all_parameters + nested_block = BlockData.new(name: :note, args: ['warning'], location: @location) + + block_data = BlockData.new( + name: :minicolumn, + args: ['title'], + lines: ['content line 1', 'content line 2'], + nested_blocks: [nested_block], + location: @location + ) + + assert_equal :minicolumn, block_data.name + assert_equal ['title'], block_data.args + assert_equal ['content line 1', 'content line 2'], block_data.lines + assert_equal 1, block_data.nested_blocks.size + assert_equal nested_block, block_data.nested_blocks.first + assert_equal @location, block_data.location + end + + def test_nested_blocks + block_data = BlockData.new(name: :list, location: @location) + assert_false(block_data.nested_blocks?) + + nested_block = BlockData.new(name: :note, location: @location) + block_data_with_nested = BlockData.new( + name: :minicolumn, + nested_blocks: [nested_block], + location: @location + ) + assert_true(block_data_with_nested.nested_blocks?) + end + + def test_line_count + block_data = BlockData.new(name: :list, location: @location) + assert_equal 0, block_data.line_count + + block_data_with_lines = BlockData.new( + name: :list, + lines: ['line1', 'line2', 'line3'], + location: @location + ) + assert_equal 3, block_data_with_lines.line_count + end + + def test_content + block_data = BlockData.new(name: :list, location: @location) + assert_false(block_data.content?) + + block_data_with_content = BlockData.new( + name: :list, + lines: ['content'], + location: @location + ) + assert_true(block_data_with_content.content?) + end + + def test_arg_method + block_data = BlockData.new( + name: :list, + args: ['id', 'caption', 'lang'], + location: @location + ) + + assert_equal 'id', block_data.arg(0) + assert_equal 'caption', block_data.arg(1) + assert_equal 'lang', block_data.arg(2) + + assert_nil(block_data.arg(3)) + assert_nil(block_data.arg(-1)) + assert_nil(block_data.arg(nil)) + end + + def test_arg_method_with_no_args + block_data = BlockData.new(name: :list, location: @location) + assert_nil(block_data.arg(0)) + end + + def test_inspect + block_data = BlockData.new( + name: :list, + args: ['id', 'caption'], + lines: ['line1', 'line2'], + nested_blocks: [BlockData.new(name: :note, location: @location)], + location: @location + ) + + inspect_str = block_data.inspect + assert_include(inspect_str, 'BlockData') + assert_include(inspect_str, 'name=list') + assert_include(inspect_str, 'args=["id", "caption"]') + assert_include(inspect_str, 'lines=2') + assert_include(inspect_str, 'nested=1') + end +end diff --git a/test/ast/test_block_processor_error_messages.rb b/test/ast/test_block_processor_error_messages.rb new file mode 100644 index 000000000..a12da63d3 --- /dev/null +++ b/test/ast/test_block_processor_error_messages.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/configure' +require 'review/book' +require 'review/i18n' + +class TestBlockProcessorErrorMessages < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_unknown_block_command_error_message + content = "= Chapter\n\n//unknown_command{\ncontent\n//}" + + chapter = ReVIEW::Book::Chapter.new( + @book, + 1, + 'test', + 'sample.re', + StringIO.new(content) + ) + + error = assert_raises(ReVIEW::CompileError) do + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + compiler.compile_to_ast(chapter) + end + + # Verify error message contains expected information + assert_match(%r{Unknown block command: //unknown}, error.message) + assert_match(/at line 3/, error.message) + assert_match(/in sample\.re/, error.message) + end + + def test_invalid_table_row_error_message + content = "= Chapter\n\n//table[id][caption]{\n\n//}" + + chapter = ReVIEW::Book::Chapter.new( + @book, + 1, + 'test', + 'table_test.re', + StringIO.new(content) + ) + + error = assert_raises(ReVIEW::CompileError) do + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + compiler.compile_to_ast(chapter) + end + + # Verify error message contains expected information + assert_match(/Invalid table row: empty line or no tab-separated cells/, error.message) + assert_match(/at line 3/, error.message) + assert_match(/in table_test\.re/, error.message) + end + + # NOTE: The unknown code block type test is harder to trigger in practice + # since CODE_BLOCK_CONFIGS covers most common types, and it would require + # internal method access that changes based on implementation details. + + def test_error_message_formatting + content = "= Chapter\n\n//invalid_block{\ncontent\n//}" + + chapter = ReVIEW::Book::Chapter.new( + @book, + 1, + 'test', + 'format_test.re', + StringIO.new(content) + ) + + error = assert_raises(ReVIEW::CompileError) do + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + compiler.compile_to_ast(chapter) + end + + # Test the general format of location info + assert_match(/at line 3/, error.message) + assert_match(/in \w+\.re/, error.message) + end +end diff --git a/test/ast/test_block_processor_inline.rb b/test/ast/test_block_processor_inline.rb new file mode 100644 index 000000000..76f5c5fd7 --- /dev/null +++ b/test/ast/test_block_processor_inline.rb @@ -0,0 +1,250 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/snapshot_location' +require 'review/ast/code_block_node' +require 'review/ast/paragraph_node' +require 'review/ast/text_node' +require 'review/ast/inline_node' +require 'review/ast/caption_node' +require 'review/ast/table_node' +require 'review/ast/image_node' +require 'review/ast/code_line_node' +require 'review/ast/compiler' + +class TestBlockProcessorInline < Test::Unit::TestCase + def setup + @location = ReVIEW::SnapshotLocation.new('test.re', 10) + compiler = ReVIEW::AST::Compiler.new + @inline_processor = compiler.inline_processor + end + + def test_code_block_node_original_text_attribute + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: 'test content' + ) + + assert_respond_to(code_block, :original_text) + assert_equal 'test content', code_block.original_text + end + + def test_code_block_node_original_text_method + code_block1 = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: 'original content' + ) + assert_equal 'original content', code_block1.original_text + assert_equal ['original content'], code_block1.original_lines + + code_block2 = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: "line1\nline2" + ) + assert_equal "line1\nline2", code_block2.original_text + assert_equal ['line1', 'line2'], code_block2.original_lines + end + + def test_original_and_processed_lines_methods + # Test original_lines and processed_lines methods + original_text = 'puts @<b>{hello}' + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: original_text + ) + + # Create a code line with inline processing + line_node = ReVIEW::AST::CodeLineNode.new(location: @location) + text_node1 = ReVIEW::AST::TextNode.new(location: @location, content: 'puts ') + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'hello')) + line_node.add_child(text_node1) + line_node.add_child(inline_node) + code_block.add_child(line_node) + + # Test original_lines (for builders that don't need inline processing) + assert_equal ['puts @<b>{hello}'], code_block.original_lines + + # Test processed_lines (for builders that need inline processing) + processed = code_block.processed_lines + assert_equal 1, processed.size + assert_equal 'puts @<b>{hello}', processed[0] + end + + def test_processed_lines_method + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + original_text: 'puts hello' + ) + + # Create a simple code line + line_node = ReVIEW::AST::CodeLineNode.new(location: @location) + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'puts hello') + line_node.add_child(text_node) + code_block.add_child(line_node) + + assert_respond_to(code_block, :processed_lines) + processed = code_block.processed_lines + assert_equal 1, processed.size + assert_equal 'puts hello', processed[0] + end + + # Caption tests + def test_code_block_with_simple_caption + # Test CodeBlockNode with simple text caption + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Simple Caption')) + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + caption_node: caption_node, + original_text: 'code line' + ) + + assert_not_nil(code_block.caption_text) + assert_instance_of(ReVIEW::AST::CaptionNode, code_block.caption_node) + assert_equal 'Simple Caption', code_block.caption_text + end + + def test_code_block_with_inline_caption + caption_markup_text = 'Code with @<b>{bold} text' + caption_node = CaptionParserHelper.parse( + caption_markup_text, + location: @location, + inline_processor: @inline_processor + ) + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + caption_node: caption_node, + original_text: 'code line' + ) + + assert_not_nil(code_block.caption_text) + assert_instance_of(ReVIEW::AST::CaptionNode, code_block.caption_node) + assert_equal true, code_block.caption_node.contains_inline? + assert_equal 'Code with bold text', code_block.caption_text + end + + def test_table_node_with_caption + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Table Caption')) + + table = ReVIEW::AST::TableNode.new( + location: @location, + caption_node: caption_node + ) + + assert_not_nil(table.caption_text) + assert_instance_of(ReVIEW::AST::CaptionNode, table.caption_node) + assert_equal 'Table Caption', table.caption_text + end + + def test_image_node_with_caption + caption = 'Figure @<i>{1}: Sample' + caption_node = CaptionParserHelper.parse( + caption, + location: @location, + inline_processor: @inline_processor + ) + + image = ReVIEW::AST::ImageNode.new( + location: @location, + id: 'fig1', + caption_node: caption_node + ) + + assert_instance_of(ReVIEW::AST::CaptionNode, image.caption_node) + assert_equal true, image.caption_node.contains_inline? + assert_equal 'Figure 1: Sample', image.caption_text + end + + def test_caption_node_creation_directly + # Simple string + caption_node1 = CaptionParserHelper.parse('Simple text', location: @location) + assert_instance_of(ReVIEW::AST::CaptionNode, caption_node1) + assert_equal 'Simple text', caption_node1.to_inline_text + assert_equal 1, caption_node1.children.size + assert_instance_of(ReVIEW::AST::TextNode, caption_node1.children.first) + + # Nil caption + caption_node2 = CaptionParserHelper.parse(nil, location: @location) + assert_nil(caption_node2) + + # Empty string + caption_node3 = CaptionParserHelper.parse('', location: @location) + assert_nil(caption_node3) + + # Already a CaptionNode + existing_caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + existing_caption_node.add_child(ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Existing')) + caption_node4 = CaptionParserHelper.parse(existing_caption_node, location: @location) + assert_equal existing_caption_node, caption_node4 + end + + def test_caption_with_multiple_nodes + caption = 'Text with @<b>{bold} content' + caption_node = CaptionParserHelper.parse( + caption, + location: @location, + inline_processor: @inline_processor + ) + + assert_instance_of(ReVIEW::AST::CaptionNode, caption_node) + assert_operator(caption_node.children.size, :>=, 1) + assert_equal true, caption_node.contains_inline? + assert_equal 'Text with bold content', caption_node.to_inline_text + end + + def test_empty_caption_handling + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + caption_node: nil, + original_text: 'code' + ) + assert_nil(code_block.caption_node) + assert_equal('', code_block.caption_text) + + table = ReVIEW::AST::TableNode.new( + location: @location, + caption_node: nil + ) + assert_nil(table.caption_node) + assert_equal('', table.caption_text) + end + + def test_caption_markup_text_compatibility + caption_with_markup = 'Caption with @<b>{bold} and @<i>{italic}' + caption_node = CaptionParserHelper.parse( + caption_with_markup, + location: @location, + inline_processor: @inline_processor + ) + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + caption_node: caption_node, + original_text: 'code' + ) + + assert_instance_of(ReVIEW::AST::CaptionNode, code_block.caption_node) + assert_equal true, code_block.caption_node.contains_inline? + assert_equal 'Caption with bold and italic', code_block.caption_text + end + + private + + def create_test_paragraph + # Create paragraph: puts @<b>{hello} + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'hello') + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(text_node) + + paragraph = ReVIEW::AST::ParagraphNode.new(location: @location) + paragraph.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'puts ')) + paragraph.add_child(inline_node) + + paragraph + end +end diff --git a/test/ast/test_block_processor_integration.rb b/test/ast/test_block_processor_integration.rb new file mode 100644 index 000000000..aca57de34 --- /dev/null +++ b/test/ast/test_block_processor_integration.rb @@ -0,0 +1,303 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/ast/block_processor' +require 'review/ast/block_data' +require 'review/book' +require 'review/book/chapter' +require 'stringio' + +class TestBlockProcessorIntegration < Test::Unit::TestCase + include ReVIEW + + def setup + @config = Configure.values + @config['language'] = 'ja' + @book = Book::Base.new + @book.config = @config + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + I18n.setup(@config['language']) + end + + def test_simple_block_processing + content = <<~EOB + = Test Chapter + + //list[example][サンプルコード]{ + def hello + puts "world" + end + //} + + 段落テキスト + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + assert_equal AST::DocumentNode, ast.class + assert_equal 3, ast.children.size + + headline = ast.children[0] + assert_equal AST::HeadlineNode, headline.class + assert_equal 1, headline.level + + code_block = ast.children[1] + assert_equal AST::CodeBlockNode, code_block.class + assert_equal 'example', code_block.id + assert_equal :list, code_block.code_type + assert_equal 3, code_block.children.size # 3 lines of code + + paragraph = ast.children[2] + assert_equal AST::ParagraphNode, paragraph.class + end + + def test_nested_block_processing + content = <<~EOB + = Test Chapter + + //note[注意]{ + これは注意書きです。 + + //list[nested][ネストしたコード]{ + def nested_method + puts "nested" + end + //} + + 注意書きの続き。 + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + assert_equal AST::DocumentNode, ast.class + assert_equal 2, ast.children.size + + headline = ast.children[0] + assert_equal AST::HeadlineNode, headline.class + + minicolumn = ast.children[1] + assert_equal AST::MinicolumnNode, minicolumn.class + assert_equal :note, minicolumn.minicolumn_type + + assert(minicolumn.children.any?(AST::CodeBlockNode)) + + nested_code = minicolumn.children.find { |child| child.is_a?(AST::CodeBlockNode) } + assert_equal 'nested', nested_code.id + assert_equal :list, nested_code.code_type + end + + def test_multiple_nested_blocks + content = <<~EOB + //box[テストボックス]{ + ボックスの説明 + + //list[code1][最初のコード]{ + puts "first" + //} + + 中間テキスト + + //list[code2][二番目のコード]{ + puts "second" + //} + + 最後のテキスト + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + box_block = ast.children[0] + assert_equal AST::BlockNode, box_block.class + assert_equal :box, box_block.block_type + + code_blocks = box_block.children.select { |child| child.is_a?(AST::CodeBlockNode) } + assert_equal 2, code_blocks.size + assert_equal 'code1', code_blocks[0].id + assert_equal 'code2', code_blocks[1].id + end + + def test_block_error_handling_unclosed_block + content = <<~EOB + //list[example][サンプル]{ + def hello + puts "world" + # //} が欠けている + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + + assert_raise(CompileError) do + compiler.compile_to_ast(chapter) + end + end + + def test_block_error_handling_invalid_syntax + content = <<~EOB + //invalid_command_name{ + content + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + + assert_raise(CompileError) do + compiler.compile_to_ast(chapter) + end + end + + def test_block_error_handling_nested_block_error + content = <<~EOB + //note[注意]{ + 正常なテキスト + + //list[broken][壊れたコード]{ + def method + # //} が欠けている - ネストエラー + + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + + error = assert_raise(CompileError) do + compiler.compile_to_ast(chapter) + end + + assert_include(error.message.downcase, 'unclosed') + end + + def test_image_block_processing + content = <<~EOB + //image[sample][サンプル画像][scale=0.5]{ + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + image_node = ast.children[0] + assert_equal AST::ImageNode, image_node.class + assert_equal 'sample', image_node.id + assert_equal 'scale=0.5', image_node.metric + assert_equal :image, image_node.image_type + end + + def test_table_block_processing + content = <<~EOB + //table[data][サンプルデータ]{ + 名前 年齢 + ------------ + Alice 25 + Bob 30 + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + table_node = ast.children[0] + assert_equal AST::TableNode, table_node.class + assert_equal 'data', table_node.id + assert_equal :table, table_node.table_type + assert_equal 1, table_node.header_rows.size + assert_equal 2, table_node.body_rows.size + end + + def test_minicolumn_with_structured_content + content = <<~EOB + //tip[ヒント]{ + 基本的なヒント + + * リスト項目1 + * リスト項目2 + + 追加説明 + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + tip_node = ast.children[0] + assert_equal AST::MinicolumnNode, tip_node.class + assert_equal :tip, tip_node.minicolumn_type + + assert(tip_node.children.any?(AST::ParagraphNode)) + assert(tip_node.children.any?(AST::ListNode)) + end + + def test_embed_block_processing + content = <<~EOB + //embed[html]{ + <div class="custom"> + <p>HTML content</p> + </div> + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + embed_node = ast.children[0] + assert_equal AST::EmbedNode, embed_node.class + assert_equal :block, embed_node.embed_type + assert_equal ['html'], embed_node.target_builders + assert(embed_node.content.lines.count >= 3, 'Should have at least 3 lines of content') + end + + def test_texequation_block_processing + content = <<~EOB + //texequation[eq1][数式]{ + E = mc^2 + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + compiler = AST::Compiler.new + ast = compiler.compile_to_ast(chapter) + + equation_node = ast.children[0] + assert_equal AST::TexEquationNode, equation_node.class + assert_equal 'eq1', equation_node.id + assert_include(equation_node.content, 'E = mc^2') + end +end diff --git a/test/ast/test_block_processor_table_driven.rb b/test/ast/test_block_processor_table_driven.rb new file mode 100644 index 000000000..f6710c5ef --- /dev/null +++ b/test/ast/test_block_processor_table_driven.rb @@ -0,0 +1,176 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/ast/block_processor' +require 'review/ast/block_data' +require 'review/book' +require 'review/book/chapter' +require 'stringio' + +class TestBlockProcessorTableDriven < Test::Unit::TestCase + include ReVIEW + + def setup + @config = Configure.values + @config['language'] = 'ja' + @book = Book::Base.new + @book.config = @config + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + I18n.setup(@config['language']) + + @compiler = AST::Compiler.new + @processor = @compiler.block_processor + end + + def test_block_command_table_coverage + AST::BlockProcessor::BLOCK_COMMAND_TABLE.each do |command, method_name| + assert @processor.respond_to?(method_name, true), + "Handler method #{method_name} for command #{command} does not exist" + end + end + + def test_registered_commands + registered = @processor.registered_commands + + expected_commands = %i[list image table note embed texequation] + expected_commands.each do |cmd| + assert_include(registered, cmd, "Command #{cmd} should be registered by default") + end + end + + def test_dynamic_handler_registration + @processor.register_block_handler(:custom_test, :build_complex_block_ast) + + assert_include(@processor.registered_commands, :custom_test) + assert_equal :build_complex_block_ast, @processor.instance_variable_get(:@dynamic_command_table)[:custom_test] + end + + def test_custom_block_processing + @processor.register_block_handler(:custom_box, :build_complex_block_ast) + + content = <<~EOB + = Test Chapter + + //custom_box[title]{ + Custom content + //} + EOB + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + error = assert_raise(CompileError) do + @compiler.compile_to_ast(chapter) + end + + assert_include(error.message, 'Unknown block command: //custom') + end + + def test_unknown_command_error + location = SnapshotLocation.new('test.re', 1) + block_data = AST::BlockData.new( + name: :unknown_command, + args: [], + lines: [], + location: location + ) + + error = assert_raise(CompileError) do + @processor.send(:process_block_command, block_data) + end + + assert_include(error.message, 'Unknown block command: //unknown_command') + assert_include(error.message, 'test.re') + end + + def test_table_driven_vs_case_statement_equivalence + test_commands = %i[list image table note embed texequation box] + + test_commands.each do |command| + content = case command # rubocop:disable Style/HashLikeCase + when :list + <<~EOB + //list[test][テスト]{ + puts "test" + //} + EOB + when :image + <<~EOB + //image[test][テスト画像]{ + //} + EOB + when :table + <<~EOB + //table[test][テストテーブル]{ + Name Age + Alice 25 + //} + EOB + when :note + <<~EOB + //note[テスト注意]{ + 注意内容 + //} + EOB + when :embed + <<~EOB + //embed[html]{ + <div>test</div> + //} + EOB + when :texequation + <<~EOB + //texequation[eq1][数式]{ + E = mc^2 + //} + EOB + when :box + <<~EOB + //box[テストボックス]{ + ボックス内容 + //} + EOB + end + + chapter = Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new) + chapter.content = content + + assert_nothing_raised("Command #{command} should be processed without error") do + @compiler.compile_to_ast(chapter) + end + end + end + + def test_handler_method_existence + AST::BlockProcessor::BLOCK_COMMAND_TABLE.each do |command, handler| + assert @processor.respond_to?(handler, true), + "Handler method #{handler} for command //#{command} does not exist" + end + end + + def test_code_block_category_consistency + code_commands = %i[list listnum emlist emlistnum cmd source] + code_commands.each do |cmd| + assert_equal :build_code_block_ast, AST::BlockProcessor::BLOCK_COMMAND_TABLE[cmd], + "Code command #{cmd} should use build_code_block_ast handler" + end + end + + def test_minicolumn_category_consistency + minicolumn_commands = %i[note memo tip info warning important caution notice] + minicolumn_commands.each do |cmd| + assert_equal :build_minicolumn_ast, AST::BlockProcessor::BLOCK_COMMAND_TABLE[cmd], + "Minicolumn command #{cmd} should use build_minicolumn_ast handler" + end + end + + def test_extension_example + @processor.register_block_handler(:callout, :build_complex_block_ast) + + assert_include(@processor.registered_commands, :callout) + + assert_equal :build_complex_block_ast, @processor.instance_variable_get(:@dynamic_command_table)[:callout] + end +end diff --git a/test/ast/test_caption_inline_integration.rb b/test/ast/test_caption_inline_integration.rb new file mode 100644 index 000000000..cfd509e2a --- /dev/null +++ b/test/ast/test_caption_inline_integration.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/snapshot_location' +require 'review/ast/caption_node' +require 'review/ast/code_block_node' +require 'review/ast/text_node' +require 'review/ast/inline_node' + +class TestCaptionInlineIntegration < Test::Unit::TestCase + def setup + @location = ReVIEW::SnapshotLocation.new('test.re', 1) + end + + def test_simple_caption_behavior_in_code_block + # Test that simple captions become CaptionNode in CodeBlockNode + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Simple Caption')) + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + caption_node: caption_node + ) + + assert_equal 'Simple Caption', code_block.caption_text + assert_instance_of(ReVIEW::AST::CaptionNode, code_block.caption_node) + assert_equal 'Simple Caption', code_block.caption_text + end + + def test_caption_node_behavior_in_code_block + # Test that CaptionNode works correctly in CodeBlockNode + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Caption with ')) + + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'bold')) + caption_node.add_child(inline_node) + + caption_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: ' text')) + + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location, + caption_node: caption_node + ) + + assert_equal 'Caption with bold text', code_block.caption_text + assert_instance_of(ReVIEW::AST::CaptionNode, code_block.caption_node) + end + + def test_empty_caption_handling + # Test empty captions + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location + ) + + assert_equal('', code_block.caption_text) + end + + def test_nil_caption_handling + # Test when caption is not provided + code_block = ReVIEW::AST::CodeBlockNode.new( + location: @location + ) + + assert_equal('', code_block.caption_text) + end +end diff --git a/test/ast/test_caption_node.rb b/test/ast/test_caption_node.rb new file mode 100644 index 000000000..ab4421cbe --- /dev/null +++ b/test/ast/test_caption_node.rb @@ -0,0 +1,207 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/snapshot_location' +require 'review/ast/caption_node' +require 'review/ast/text_node' +require 'review/ast/inline_node' + +class TestCaptionNode < Test::Unit::TestCase + def setup + @location = ReVIEW::SnapshotLocation.new('test.re', 1) + end + + def test_caption_node_initialization + caption = ReVIEW::AST::CaptionNode.new(location: @location) + assert_instance_of(ReVIEW::AST::CaptionNode, caption) + assert_equal @location, caption.location + assert_empty(caption.children) + end + + def test_empty_caption + caption = ReVIEW::AST::CaptionNode.new(location: @location) + assert caption.empty? + assert_equal false, caption.contains_inline? + end + + def test_simple_text_caption + caption = ReVIEW::AST::CaptionNode.new(location: @location) + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'Simple caption') + caption.add_child(text_node) + + assert_equal false, caption.empty? + assert_equal false, caption.contains_inline? + + # Verify structure + assert_equal 1, caption.children.size + assert_instance_of(ReVIEW::AST::TextNode, caption.children[0]) + assert_equal 'Simple caption', caption.children[0].content + end + + def test_caption_with_inline_elements + caption = ReVIEW::AST::CaptionNode.new(location: @location) + + # Add text: "Caption with " + caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Caption with ')) + + # Add inline: @<b>{bold text} + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'bold text')) + caption.add_child(inline_node) + + # Add more text: " content" + caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: ' content')) + + assert_equal false, caption.empty? + assert_equal true, caption.contains_inline? + + # Verify structure: "Caption with @<b>{bold text} content" + assert_equal 3, caption.children.size + assert_instance_of(ReVIEW::AST::TextNode, caption.children[0]) + assert_equal 'Caption with ', caption.children[0].content + assert_instance_of(ReVIEW::AST::InlineNode, caption.children[1]) + assert_equal :b, caption.children[1].inline_type + assert_equal 1, caption.children[1].children.size + assert_equal 'bold text', caption.children[1].children[0].content + assert_instance_of(ReVIEW::AST::TextNode, caption.children[2]) + assert_equal ' content', caption.children[2].content + end + + def test_caption_with_nested_inline + caption = ReVIEW::AST::CaptionNode.new(location: @location) + + # Create: Text @<i>{italic @<b>{bold}} more + text1 = ReVIEW::AST::TextNode.new(location: @location, content: 'Text ') + caption.add_child(text1) + + # Create nested inline: @<i>{italic @<b>{bold}} + bold_text = ReVIEW::AST::TextNode.new(location: @location, content: 'bold') + bold_inline = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + bold_inline.add_child(bold_text) + + italic_text = ReVIEW::AST::TextNode.new(location: @location, content: 'italic ') + italic_inline = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :i) + italic_inline.add_child(italic_text) + italic_inline.add_child(bold_inline) + caption.add_child(italic_inline) + + text2 = ReVIEW::AST::TextNode.new(location: @location, content: ' more') + caption.add_child(text2) + + assert_equal true, caption.contains_inline? + + # Verify structure: "Text @<i>{italic @<b>{bold}} more" + assert_equal 3, caption.children.size + assert_instance_of(ReVIEW::AST::TextNode, caption.children[0]) + assert_equal 'Text ', caption.children[0].content + + # Check nested inline structure + assert_instance_of(ReVIEW::AST::InlineNode, caption.children[1]) + assert_equal :i, caption.children[1].inline_type + assert_equal 2, caption.children[1].children.size + assert_equal 'italic ', caption.children[1].children[0].content + + # Check inner inline + inner_inline = caption.children[1].children[1] + assert_instance_of(ReVIEW::AST::InlineNode, inner_inline) + assert_equal :b, inner_inline.inline_type + assert_equal 1, inner_inline.children.size + assert_equal 'bold', inner_inline.children[0].content + + assert_instance_of(ReVIEW::AST::TextNode, caption.children[2]) + assert_equal ' more', caption.children[2].content + end + + def test_caption_serialization_simple + caption = ReVIEW::AST::CaptionNode.new(location: @location) + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'Simple caption') + caption.add_child(text_node) + + # Simple text caption should serialize as children array for compatibility + result = caption.to_h + expected = { + type: 'CaptionNode', + location: { filename: 'test.re', lineno: 1 }, + children: [ + { + type: 'TextNode', + content: 'Simple caption', + location: { filename: 'test.re', lineno: 1 } + } + ] + } + assert_equal expected, result + end + + def test_caption_serialization_complex + caption = ReVIEW::AST::CaptionNode.new(location: @location) + caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Caption with ')) + + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'bold')) + caption.add_child(inline_node) + + # Complex caption should serialize as node structure + result = caption.to_h + assert_instance_of(Hash, result) + assert_equal 'CaptionNode', result[:type] + assert_equal 2, result[:children].size + end + + def test_empty_whitespace_caption + caption = ReVIEW::AST::CaptionNode.new(location: @location) + text_node = ReVIEW::AST::TextNode.new(location: @location, content: ' ') + caption.add_child(text_node) + + # Whitespace-only caption should be considered empty + assert_equal true, caption.empty? + end + + def test_to_inline_text_simple + caption = ReVIEW::AST::CaptionNode.new(location: @location) + text_node = ReVIEW::AST::TextNode.new(location: @location, content: 'Simple caption') + caption.add_child(text_node) + + assert_equal 'Simple caption', caption.to_inline_text + end + + def test_to_inline_text_with_inline + caption = ReVIEW::AST::CaptionNode.new(location: @location) + caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Caption with ')) + + inline_node = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + inline_node.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'bold text')) + caption.add_child(inline_node) + + caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: ' content')) + + # Markup should be removed: "Caption with @<b>{bold text} content" -> "Caption with bold text content" + assert_equal 'Caption with bold text content', caption.to_inline_text + end + + def test_to_inline_text_with_nested_inline + caption = ReVIEW::AST::CaptionNode.new(location: @location) + caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: 'Text ')) + + # Create nested inline: @<i>{italic @<b>{bold}} + bold_text = ReVIEW::AST::TextNode.new(location: @location, content: 'bold') + bold_inline = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :b) + bold_inline.add_child(bold_text) + + italic_text = ReVIEW::AST::TextNode.new(location: @location, content: 'italic ') + italic_inline = ReVIEW::AST::InlineNode.new(location: @location, inline_type: :i) + italic_inline.add_child(italic_text) + italic_inline.add_child(bold_inline) + caption.add_child(italic_inline) + + caption.add_child(ReVIEW::AST::TextNode.new(location: @location, content: ' more')) + + # Nested markup should be removed: "Text @<i>{italic @<b>{bold}} more" -> "Text italic bold more" + assert_equal 'Text italic bold more', caption.to_inline_text + end + + def test_to_inline_text_empty + caption = ReVIEW::AST::CaptionNode.new(location: @location) + assert_equal '', caption.to_inline_text + end +end diff --git a/test/ast/test_caption_parser.rb b/test/ast/test_caption_parser.rb new file mode 100644 index 000000000..5604b56ec --- /dev/null +++ b/test/ast/test_caption_parser.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/snapshot_location' +require 'review/ast/caption_node' +require 'review/ast/text_node' +require 'review/ast/inline_node' +require 'review/ast/compiler' + +class TestCaptionParser < Test::Unit::TestCase + def setup + @location = ReVIEW::SnapshotLocation.new('test.re', 1) + end + + def test_parser_initialization + parser = CaptionParserHelper.new(location: @location) + assert_instance_of(CaptionParserHelper, parser) + end + + def test_parse_nil_returns_nil + parser = CaptionParserHelper.new(location: @location) + assert_nil(parser.parse(nil)) + end + + def test_parse_empty_string_returns_nil + parser = CaptionParserHelper.new(location: @location) + assert_nil(parser.parse('')) + end + + def test_parse_existing_caption_node_returns_same + parser = CaptionParserHelper.new(location: @location) + caption_node = ReVIEW::AST::CaptionNode.new(location: @location) + + result = parser.parse(caption_node) + assert_equal caption_node, result + end + + def test_parse_simple_string_without_inline_processor + parser = CaptionParserHelper.new(location: @location) + result = parser.parse('Simple Caption') + + assert_instance_of(ReVIEW::AST::CaptionNode, result) + assert_equal 1, result.children.size + assert_instance_of(ReVIEW::AST::TextNode, result.children.first) + assert_equal 'Simple Caption', result.children.first.content + assert_equal 'Simple Caption', result.to_inline_text + end + + def test_parse_string_with_inline_markup_without_processor + parser = CaptionParserHelper.new(location: @location) + result = parser.parse('Caption with @<b>{bold}') + + assert_instance_of(ReVIEW::AST::CaptionNode, result) + assert_equal 1, result.children.size + assert_instance_of(ReVIEW::AST::TextNode, result.children.first) + assert_equal 'Caption with @<b>{bold}', result.children.first.content + assert_equal 'Caption with @<b>{bold}', result.to_inline_text + assert_equal false, result.contains_inline? + end + + def test_parse_with_inline_processor + # Create a real inline processor from AST::Compiler + compiler = ReVIEW::AST::Compiler.new + inline_processor = compiler.inline_processor + + parser = CaptionParserHelper.new( + location: @location, + inline_processor: inline_processor + ) + result = parser.parse('Caption with @<b>{bold}') + + assert_instance_of(ReVIEW::AST::CaptionNode, result) + assert_operator(result.children.size, :>=, 1) + assert_equal true, result.contains_inline? + # Real inline processor parses the markup, so to_inline_text extracts text content + assert_match(/Caption with.*bold/, result.to_inline_text) + end + + def test_factory_method_delegates_to_parser + result = CaptionParserHelper.parse('Test Caption', location: @location) + + assert_instance_of(ReVIEW::AST::CaptionNode, result) + assert_equal 'Test Caption', result.to_inline_text + end +end diff --git a/test/ast/test_code_block_debug.rb b/test/ast/test_code_block_debug.rb new file mode 100644 index 000000000..cdcd0373d --- /dev/null +++ b/test/ast/test_code_block_debug.rb @@ -0,0 +1,289 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/ast/json_serializer' +require 'review/book' +require 'review/book/chapter' +require 'json' + +class TestCodeBlockDebug < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + + @book = ReVIEW::Book::Base.new(config: @config) + + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + + @chapter = ReVIEW::Book::Chapter.new(@book, 1, 'debug_chapter', 'debug_chapter.re', StringIO.new) + ReVIEW::I18n.setup(@config['language']) + end + + def test_code_block_ast_structure + source = <<~EOS + = Chapter Title + + //list[test-code][Test Code][ruby]{ + puts @<b>{bold code} + # Comment with @<fn>{code-fn} + //} + EOS + + @chapter.content = source + + # Build AST without builder rendering + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter, reference_resolution: false) + + # Serialize AST to examine structure + json_str = ReVIEW::AST::JSONSerializer.serialize(ast_root) + ast = JSON.parse(json_str) + + # === Code Block AST Structure === + result = JSON.pretty_generate(ast) + expected0 = <<~EXPECTED.chomp + { + "type": "DocumentNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 1 + }, + "children": [ + { + "type": "HeadlineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 1 + }, + "level": 1, + "label": null, + "caption_node": { + "type": "CaptionNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 1 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 1 + }, + "content": "Chapter Title" + } + ] + } + }, + { + "type": "CodeBlockNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "id": "test-code", + "lang": "ruby", + "caption_node": { + "type": "CaptionNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "Test Code" + } + ] + }, + "line_numbers": false, + "code_type": "list", + "original_text": "puts @<b>{bold code}\\n# Comment with @<fn>{code-fn}", + "children": [ + { + "type": "CodeLineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "puts " + }, + { + "type": "InlineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "bold code" + } + ], + "inline_type": "b", + "args": [ + "bold code" + ] + } + ], + "original_text": "puts @<b>{bold code}" + }, + { + "type": "CodeLineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "# Comment with " + }, + { + "type": "InlineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "ReferenceNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "code-fn", + "ref_id": "code-fn" + } + ], + "inline_type": "fn", + "args": [ + "code-fn" + ], + "target_item_id": "code-fn" + } + ], + "original_text": "# Comment with @<fn>{code-fn}" + } + ] + } + ] + } + EXPECTED + assert_equal expected0, result + + # Find code block node + code_block = ast['children'].find { |node| node['type'] == 'CodeBlockNode' } + assert_not_nil(code_block) + + # === Code Block Children === + result = JSON.pretty_generate(code_block['children']) + expected = <<~EXPECTED.chomp + [ + { + "type": "CodeLineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "puts " + }, + { + "type": "InlineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "bold code" + } + ], + "inline_type": "b", + "args": [ + "bold code" + ] + } + ], + "original_text": "puts @<b>{bold code}" + }, + { + "type": "CodeLineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "TextNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "# Comment with " + }, + { + "type": "InlineNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "children": [ + { + "type": "ReferenceNode", + "location": { + "filename": "debug_chapter.re", + "lineno": 3 + }, + "content": "code-fn", + "ref_id": "code-fn" + } + ], + "inline_type": "fn", + "args": [ + "code-fn" + ], + "target_item_id": "code-fn" + } + ], + "original_text": "# Comment with @<fn>{code-fn}" + } + ] + EXPECTED + assert_equal expected, result + end +end diff --git a/test/ast/test_code_block_original_text.rb b/test/ast/test_code_block_original_text.rb new file mode 100644 index 000000000..58935b876 --- /dev/null +++ b/test/ast/test_code_block_original_text.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/book' +require 'review/book/chapter' + +class TestCodeBlockOriginalText < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + + @chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test_chapter', 'test_chapter.re', StringIO.new) + ReVIEW::I18n.setup(@config['language']) + end + + def test_code_block_original_text_preservation + source = <<~EOS + = Chapter Title + + //list[test-code][Test Code][ruby]{ + puts @<b>{bold code} + # Comment with @<fn>{code-fn} + normal line + //} + + //footnote[code-fn][Code block footnote] + EOS + + @chapter.content = source + + # Build AST without builder rendering + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Find code block node + code_block = ast_root.children.find { |node| node.class.name.include?('CodeBlockNode') } + assert_not_nil(code_block) + + # Test original_text preservation + expected_original = "puts @<b>{bold code}\n# Comment with @<fn>{code-fn}\nnormal line" + assert_equal expected_original, code_block.original_text + + # Test original_lines method + expected_lines = [ + 'puts @<b>{bold code}', + '# Comment with @<fn>{code-fn}', + 'normal line' + ] + assert_equal expected_lines, code_block.original_lines + + # Test processed_lines method (should reconstruct from AST) + processed = code_block.processed_lines + assert_equal 3, processed.size + assert_equal 'puts @<b>{bold code}', processed[0] + assert_equal '# Comment with @<fn>{code-fn}', processed[1] + assert_equal 'normal line', processed[2] + end +end diff --git a/test/ast/test_column_sections.rb b/test/ast/test_column_sections.rb new file mode 100644 index 000000000..172cce995 --- /dev/null +++ b/test/ast/test_column_sections.rb @@ -0,0 +1,214 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast' +require 'review/ast/compiler' +require 'review/ast/review_generator' +require 'review/book' +require 'review/book/chapter' + +class TestColumnSections < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @book = ReVIEW::Book::Base.new(config: @config) + @chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test_chapter', 'test_chapter.re', StringIO.new) + end + + def test_column_section + source = <<~EOS + = Chapter Title + + Regular paragraph content. + + ==[column] Column Title + + This is content inside a column. + + Another paragraph in the column. + + == Regular Section + + Back to regular content. + EOS + + @chapter.content = source + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Find column node + column_node = find_node_by_type(ast_root, ReVIEW::AST::ColumnNode) + assert_not_nil(column_node) + assert_equal(2, column_node.level) + assert_equal(:column, column_node.column_type) + + # Check caption + assert_not_nil(column_node.caption_text) + assert_equal('Column Title', column_node.caption_text) + + # Check that column has content as children + assert(column_node.children.any?, 'Column should have content as children') + + # Test round-trip conversion + generator = ReVIEW::AST::ReVIEWGenerator.new + result = generator.generate(ast_root) + assert_include(result, '==[column] Column Title') + assert_include(result, 'This is content inside a column.') + assert_include(result, 'Another paragraph in the column.') + end + + def test_column_with_label + source = <<~EOS + = Chapter Title + + ==[column]{col1} Column with Label + + Content of labeled column. + EOS + + @chapter.content = source + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Find column node + column_node = find_node_by_type(ast_root, ReVIEW::AST::ColumnNode) + assert_not_nil(column_node) + assert_equal('col1', column_node.label) + assert_equal('Column with Label', column_node.caption_text) + + # Test round-trip conversion + generator = ReVIEW::AST::ReVIEWGenerator.new + result = generator.generate(ast_root) + assert_include(result, '==[column]{col1} Column with Label') + end + + def test_nested_column_levels + source = <<~EOS + = Chapter Title + + ==[column] Level 2 Column + + Content in level 2 column. + + ===[column] Level 3 Column + + Content in level 3 column. + EOS + + @chapter.content = source + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Find column nodes + column_nodes = find_all_nodes_by_type(ast_root, ReVIEW::AST::ColumnNode) + assert_equal(2, column_nodes.length) + + level2_column = column_nodes.find { |n| n.level == 2 } + level3_column = column_nodes.find { |n| n.level == 3 } + + assert_not_nil(level2_column) + assert_equal('Level 2 Column', level2_column.caption_text) + + assert_not_nil(level3_column) + assert_equal('Level 3 Column', level3_column.caption_text) + end + + def test_column_vs_regular_headline + source = <<~EOS + = Chapter Title + + == Regular Headline + + Regular content. + + ==[column] Column Headline + + Column content. + + == Another Regular Headline + + More regular content. + EOS + + @chapter.content = source + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Find different node types + headline_nodes = find_all_nodes_by_type(ast_root, ReVIEW::AST::HeadlineNode) + column_nodes = find_all_nodes_by_type(ast_root, ReVIEW::AST::ColumnNode) + + # Should have 3 headlines (including the chapter title) and 1 column + assert_equal(3, headline_nodes.length) + assert_equal(1, column_nodes.length) + + # Check that regular headlines are HeadlineNode + regular_headlines = headline_nodes.select { |n| n.level == 2 } + assert_equal(2, regular_headlines.length) + + # Check that column is ColumnNode + column = column_nodes.first + assert_equal(2, column.level) + assert_equal('Column Headline', column.caption_text) + end + + def test_column_with_inline_elements + source = <<~EOS + = Chapter Title + + ==[column] Column with @<b>{Bold} Text + + Content with @<i>{italic} and @<code>{code}. + EOS + + @chapter.content = source + ast_compiler = ReVIEW::AST::Compiler.new + ast_root = ast_compiler.compile_to_ast(@chapter) + + # Find column node + column_node = find_node_by_type(ast_root, ReVIEW::AST::ColumnNode) + assert_not_nil(column_node) + + # Check that caption has inline elements processed + caption_text = column_node.caption_text + assert_include(caption_text, 'Bold') + + # Check that content has inline elements in children + assert(column_node.children.any?, 'Column should have content as children') + + # Test round-trip conversion + generator = ReVIEW::AST::ReVIEWGenerator.new + result = generator.generate(ast_root) + assert_include(result, '==[column]') + assert_include(result, '@<i>{italic}') + assert_include(result, '@<code>{code}') + end + + private + + def find_node_by_type(node, node_type) + return node if node.is_a?(node_type) + + if node.children + node.children.each do |child| + result = find_node_by_type(child, node_type) + return result if result + end + end + + nil + end + + def find_all_nodes_by_type(node, node_type) + results = [] + + results << node if node.is_a?(node_type) + + if node.children + node.children.each do |child| + results.concat(find_all_nodes_by_type(child, node_type)) + end + end + + results + end +end diff --git a/test/ast/test_compiler_error_messages.rb b/test/ast/test_compiler_error_messages.rb new file mode 100644 index 000000000..791fa7d0c --- /dev/null +++ b/test/ast/test_compiler_error_messages.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/configure' +require 'review/book' +require 'review/i18n' + +class TestCompilerErrorMessages < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new + @book.config = @config + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + ReVIEW::I18n.setup(@config['language']) + end + + def test_max_headline_level_error_with_location + content = "= Chapter\n\n======= Too Deep Headline" + + chapter = ReVIEW::Book::Chapter.new( + @book, + 1, + 'test', + 'deep_headline.re', + StringIO.new(content) + ) + + error = assert_raises(ReVIEW::CompileError) do + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + compiler.compile_to_ast(chapter) + end + + # Verify error message contains expected information + assert_match(/Invalid header: max headline level is 6/, error.message) + assert_match(/at line 3/, error.message) + assert_match(/in deep_headline\.re/, error.message) + end + + def test_unknown_block_command_with_location + content = "= Chapter\n\n//unknowncommand{\ncontent\n//}" + + chapter = ReVIEW::Book::Chapter.new( + @book, + 1, + 'test', + 'unknown_command.re', + StringIO.new(content) + ) + + error = assert_raises(ReVIEW::CompileError) do + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + compiler.compile_to_ast(chapter) + end + + # Verify error message contains expected information + assert_match(%r{Unknown block command: //unknowncommand}, error.message) + assert_match(/at line/, error.message) + assert_match(/in unknown_command\.re/, error.message) + end +end diff --git a/test/ast/test_context_stack.rb b/test/ast/test_context_stack.rb new file mode 100644 index 000000000..382c98abb --- /dev/null +++ b/test/ast/test_context_stack.rb @@ -0,0 +1,159 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/markdown_adapter' + +class TestContextStack < Test::Unit::TestCase + # Mock node class for testing + class MockNode + attr_reader :children + + def initialize(name) + @name = name + @children = [] + end + + def add_child(child) + @children << child + end + + def to_s + @name + end + end + + def setup + @root = MockNode.new('root') + @stack = ReVIEW::AST::MarkdownAdapter::ContextStack.new(@root) + end + + def test_initialize + assert_equal @root, @stack.current + assert_true(@stack.empty?) + end + + def test_push_and_pop + child = MockNode.new('child') + + @stack.push(child) + assert_equal child, @stack.current + assert_false(@stack.empty?) + assert_equal 2, @stack.depth + + @stack.pop + assert_equal @root, @stack.current + assert_true(@stack.empty?) + assert_equal 1, @stack.depth + end + + def test_with_context + child = MockNode.new('child') + result = nil + + @stack.with_context(child) do + result = @stack.current + end + + # Context should be restored after block + assert_equal child, result + assert_equal @root, @stack.current + end + + def test_with_context_exception_safety + child = MockNode.new('child') + + begin + @stack.with_context(child) do + raise 'Test error' + end + rescue StandardError + # Exception caught + end + + # Context should still be restored despite exception + assert_equal @root, @stack.current + assert_true(@stack.empty?) + end + + def test_nested_contexts + child1 = MockNode.new('child1') + child2 = MockNode.new('child2') + child3 = MockNode.new('child3') + + @stack.with_context(child1) do + assert_equal child1, @stack.current + assert_equal 2, @stack.depth + + @stack.with_context(child2) do + assert_equal child2, @stack.current + assert_equal 3, @stack.depth + + @stack.with_context(child3) do + assert_equal child3, @stack.current + assert_equal 4, @stack.depth + end + + assert_equal child2, @stack.current + end + + assert_equal child1, @stack.current + end + + assert_equal @root, @stack.current + assert_true(@stack.empty?) + end + + def test_pop_from_empty_raises_error + assert_raise(ReVIEW::CompileError) do + @stack.pop + end + end + + def test_validate_success + assert_nothing_raised do + @stack.validate! + end + end + + def test_validate_nil_in_stack + child = MockNode.new('child') + @stack.push(child) + + # Manually corrupt the internal stack + internal_stack = @stack.instance_variable_get(:@stack) + internal_stack << nil + + assert_raise_message(/Context corruption: nil found in stack/) do + @stack.validate! + end + end + + def test_depth + assert_equal 1, @stack.depth + + child1 = MockNode.new('child1') + @stack.push(child1) + assert_equal 2, @stack.depth + + child2 = MockNode.new('child2') + @stack.push(child2) + assert_equal 3, @stack.depth + + @stack.pop + assert_equal 2, @stack.depth + + @stack.pop + assert_equal 1, @stack.depth + end + + def test_empty + assert_true(@stack.empty?) + + child = MockNode.new('child') + @stack.push(child) + assert_false(@stack.empty?) + + @stack.pop + assert_true(@stack.empty?) + end +end diff --git a/test/ast/test_dumper.rb b/test/ast/test_dumper.rb new file mode 100644 index 000000000..3452d15fa --- /dev/null +++ b/test/ast/test_dumper.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/dumper' +require 'tmpdir' +require 'json' + +class TestDumper < Test::Unit::TestCase + def setup + @tmpdir = Dir.mktmpdir + @config = ReVIEW::Configure.values + end + + def teardown + FileUtils.rm_rf(@tmpdir) + end + + def create_review_file(content, filename = 'test.re') + path = File.join(@tmpdir, filename) + File.write(path, content) + path + end + + def test_dump_ast + content = <<~REVIEW + = Test Chapter + + This is a test paragraph. + + //list[sample][Sample Code]{ + puts 'Hello, World!' + //} + REVIEW + + path = create_review_file(content) + dumper = ReVIEW::AST::Dumper.new + result = dumper.dump_file(path) + + json = JSON.parse(result) + assert_equal 'DocumentNode', json['type'] + assert_equal 3, json['children'].size + + # Check headline + assert_equal 'HeadlineNode', json['children'][0]['type'] + assert_equal 1, json['children'][0]['level'] + expected_caption_node = { + 'type' => 'CaptionNode', + 'location' => { 'filename' => 'test.re', 'lineno' => 1 }, + 'children' => [ + { + 'type' => 'TextNode', + 'content' => 'Test Chapter', + 'location' => { 'filename' => 'test.re', 'lineno' => 1 } + } + ] + } + assert_equal expected_caption_node, json['children'][0]['caption_node'] + + # Check paragraph + assert_equal 'ParagraphNode', json['children'][1]['type'] + + # Check code block + assert_equal 'CodeBlockNode', json['children'][2]['type'] + assert_equal 'sample', json['children'][2]['id'] + expected_caption = { + 'type' => 'CaptionNode', + 'location' => { 'filename' => 'test.re', 'lineno' => 5 }, + 'children' => [ + { + 'type' => 'TextNode', + 'content' => 'Sample Code', + 'location' => { 'filename' => 'test.re', 'lineno' => 5 } + } + ] + } + assert_equal expected_caption, json['children'][2]['caption_node'] + end + + def test_dump_with_compact_options + content = "= Test\n\nParagraph" + path = create_review_file(content) + + options = ReVIEW::AST::JSONSerializer::Options.new + options.pretty = false + options.include_location = false + + dumper = ReVIEW::AST::Dumper.new(serializer_options: options) + result = dumper.dump_file(path) + + # Should be compact JSON + assert_not_include(result, "\n") + + json = JSON.parse(result) + # Should not have location + assert_nil(json['location']) + end + + def test_dump_multiple_files + content1 = "= Chapter 1\n\nContent 1" + content2 = "= Chapter 2\n\nContent 2" + + path1 = create_review_file(content1, 'ch01.re') + path2 = create_review_file(content2, 'ch02.re') + + dumper = ReVIEW::AST::Dumper.new + results = dumper.dump_files([path1, path2]) + + assert_equal 2, results.size + assert_include(results, path1) + assert_include(results, path2) + + json1 = JSON.parse(results[path1]) + json2 = JSON.parse(results[path2]) + + # Check that both documents have headline children + assert_equal 'HeadlineNode', json1['children'][0]['type'] + assert_equal 'HeadlineNode', json2['children'][0]['type'] + end + + def test_dump_nonexistent_file + dumper = ReVIEW::AST::Dumper.new + assert_raise(ReVIEW::FileNotFound) do + dumper.dump_file('/nonexistent/file.re') + end + end +end diff --git a/test/ast/test_format_auto_detection.rb b/test/ast/test_format_auto_detection.rb new file mode 100644 index 000000000..05f875896 --- /dev/null +++ b/test/ast/test_format_auto_detection.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/book' + +# Skip Markdown tests if Ruby < 3.1 (markly requires Ruby >= 3.1) +# Note: Some tests use Markdown format detection and compilation +if Gem::Version.new(RUBY_VERSION) < Gem::Version.new('3.1.0') + # Define empty test class to avoid load errors + class TestFormatAutoDetection < Test::Unit::TestCase + def test_skipped + omit('Markdown tests require Ruby >= 3.1') + end + end + return +end + +class TestFormatAutoDetection < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @book = ReVIEW::Book::Base.new('.', config: @config) + end + + def test_markdown_file_detection + # Test .md extension + chapter_md = create_chapter('test.md', '# Markdown heading') + compiler = ReVIEW::AST::Compiler.for_chapter(chapter_md) + + assert_instance_of(ReVIEW::AST::MarkdownCompiler, compiler) + end + + def test_review_file_detection + # Test .re extension (Re:VIEW format) + chapter_re = create_chapter('test.re', '= Re:VIEW heading') + compiler = ReVIEW::AST::Compiler.for_chapter(chapter_re) + + assert_instance_of(ReVIEW::AST::Compiler, compiler) + assert_not_instance_of(ReVIEW::AST::MarkdownCompiler, compiler) + end + + def test_unknown_extension_defaults_to_review + # Test unknown extension defaults to Re:VIEW format + chapter_unknown = create_chapter('test.txt', '= Some heading') + compiler = ReVIEW::AST::Compiler.for_chapter(chapter_unknown) + + assert_instance_of(ReVIEW::AST::Compiler, compiler) + assert_not_instance_of(ReVIEW::AST::MarkdownCompiler, compiler) + end + + def test_no_extension_defaults_to_review + # Test no extension defaults to Re:VIEW format + chapter_no_ext = create_chapter('test', '= Some heading') + compiler = ReVIEW::AST::Compiler.for_chapter(chapter_no_ext) + + assert_instance_of(ReVIEW::AST::Compiler, compiler) + assert_not_instance_of(ReVIEW::AST::MarkdownCompiler, compiler) + end + + def test_markdown_compilation_with_auto_detection + # Test that Markdown file actually compiles to AST + content = <<~MD + # Main Title + + This is a paragraph with **bold** text. + + ## Subsection + + - List item 1 + - List item 2 + MD + + chapter = create_chapter('test.md', content) + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + ast = compiler.compile_to_ast(chapter) + + assert_not_nil(ast) + assert_instance_of(ReVIEW::AST::DocumentNode, ast) + assert(ast.children.size > 0) + + # Check that we get headline nodes + headlines = ast.children.select { |child| child.is_a?(ReVIEW::AST::HeadlineNode) } + assert_equal(2, headlines.size) + assert_equal(1, headlines[0].level) + assert_equal(2, headlines[1].level) + end + + def test_review_compilation_with_auto_detection + # Test that Re:VIEW file actually compiles to AST + content = <<~RE + = Main Title + + This is a paragraph with @<b>{bold} text. + + == Subsection + + * List item 1 + * List item 2 + RE + + chapter = create_chapter('test.re', content) + compiler = ReVIEW::AST::Compiler.for_chapter(chapter) + ast = compiler.compile_to_ast(chapter) + + assert_not_nil(ast) + assert_instance_of(ReVIEW::AST::DocumentNode, ast) + assert(ast.children.size > 0) + + # Check that we get headline nodes + headlines = ast.children.select { |child| child.is_a?(ReVIEW::AST::HeadlineNode) } + assert_equal(2, headlines.size) + assert_equal(1, headlines[0].level) + assert_equal(2, headlines[1].level) + end + + private + + def create_chapter(filename, content) + require 'stringio' + ReVIEW::Book::Chapter.new(@book, 1, filename, filename, StringIO.new(content)) + end +end diff --git a/test/ast/test_html_renderer.rb b/test/ast/test_html_renderer.rb new file mode 100644 index 000000000..830c8c749 --- /dev/null +++ b/test/ast/test_html_renderer.rb @@ -0,0 +1,600 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/ast/node' +require 'review/renderer/html_renderer' +require 'review/book' +require 'review/book/chapter' +require 'review/configure' +require 'review/i18n' + +class TestHtmlRenderer < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + + ReVIEW::I18n.setup('ja') + + @compiler = ReVIEW::AST::Compiler.new + end + + def test_headline_rendering + content = "= Test Chapter\n\nParagraph text.\n" + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(%r{<h1>.*Test Chapter</h1>}, html_output) + assert_match(%r{<p>Paragraph text\.</p>}, html_output) + end + + def test_inline_elements + content = "= Chapter\n\nThis is @<b>{bold} and @<i>{italic} text.\n" + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(%r{<b>bold</b>}, html_output) + assert_match(%r{<i>italic</i>}, html_output) + end + + def test_code_block + content = <<~REVIEW + = Chapter + + //list[sample][Sample Code][ruby]{ + puts "Hello World" + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(/<div id="sample" class="caption-code">/, html_output) + assert_match(%r{<p class="caption">リスト1\.1: Sample Code</p>}, html_output) + assert_match(%r{<pre class="list.*">puts "Hello World"\n</pre>}, html_output) + end + + def test_table_rendering + content = <<~REVIEW + = Chapter + + //table[sample][Sample Table]{ + Header1 Header2 + -------------------- + Cell1 Cell2 + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(/<div id="sample" class="table">/, html_output) + assert_match(%r{<p class="caption">表1\.1: Sample Table</p>}, html_output) + # No thead/tbody sections like HTMLBuilder + assert_no_match(/<thead>/, html_output) + assert_no_match(/<tbody>/, html_output) + # Since ---- is only 5 chars, it's not a separator, so it appears as body content + assert_match(%r{<th>Header1</th>}, html_output) + assert_match(%r{<td>Cell1</td>}, html_output) + end + + def test_column_rendering + content = <<~REVIEW + = Chapter + + ===[column] Column Title + + Column content here. + + ===[/column] + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(/<div class="column">/, html_output) + assert_match(/Column Title/, html_output) + assert_match(%r{<p>Column content here\.</p>}, html_output) + end + + def test_note_block + content = <<~REVIEW + = Chapter + + //note[Sample Note]{ + This is a note. + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(/<div class="note">/, html_output) + assert_match(%r{<p class="caption">Sample Note</p>}, html_output) + # Note content should be wrapped in paragraph tags like HTMLBuilder + assert_match(%r{<p>This is a note\.</p>}, html_output) + end + + def test_text_escaping + content = "= Chapter\n\nText with <html> & \"quotes\".\n" + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(/<html> & "quotes"/, html_output) + end + + def test_id_normalization + content = "={test-chapter} Test Chapter\n\nParagraph.\n" + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + # HtmlRenderer now uses fixed anchor IDs like HTMLBuilder + assert_match(%r{<h1 id="test-chapter">.*</h1>}, html_output) + # Chapter title should be present + assert_match(/Test Chapter/, html_output) + end + + def test_href_inline + content = "= Chapter\n\nVisit @<href>{https://example.com, Example Site}.\n" + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + assert_match(%r{<a href="https://example\.com".*>Example Site</a>}, html_output) + end + + def test_visit_embed_raw_basic + # Test basic //raw command without builder specification + embed = ReVIEW::AST::EmbedNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :raw, + arg: 'Raw HTML content with <br> tag', + target_builders: nil, + content: 'Raw HTML content with <br> tag') + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(embed) + expected = 'Raw HTML content with <br /> tag' + + assert_equal expected, result + end + + def test_visit_embed_raw_html_targeted + # Test //raw command targeted for HTML + embed = ReVIEW::AST::EmbedNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :raw, + arg: '|html|<div class="custom">HTML content</div>', + target_builders: ['html'], + content: '<div class="custom">HTML content</div>') + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(embed) + expected = '<div class="custom">HTML content</div>' + + assert_equal expected, result + end + + def test_visit_embed_raw_latex_targeted + # Test //raw command targeted for LaTeX (should output nothing) + embed = ReVIEW::AST::EmbedNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :raw, + arg: '|latex|\\textbf{LaTeX content}', + target_builders: ['latex'], + content: '\\textbf{LaTeX content}') + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(embed) + expected = '' + + assert_equal expected, result + end + + def test_visit_embed_raw_multiple_builders + # Test //raw command targeted for multiple builders including HTML + embed = ReVIEW::AST::EmbedNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :raw, + arg: '|html,latex|Content for both', + target_builders: ['html', 'latex'], + content: 'Content for both') + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(embed) + expected = 'Content for both' + + assert_equal expected, result + end + + def test_visit_embed_raw_inline + # Test inline @<raw> command + embed = ReVIEW::AST::EmbedNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :inline, + arg: '|html|<span class="inline">HTML</span>', + target_builders: ['html'], + content: '<span class="inline">HTML</span>') + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(embed) + expected = '<span class="inline">HTML</span>' + + assert_equal expected, result + end + + def test_visit_embed_raw_newline_conversion + # Test \\n to newline conversion + embed = ReVIEW::AST::EmbedNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :raw, + arg: 'Line 1\\nLine 2\\nLine 3', + target_builders: nil, + content: 'Line 1\\nLine 2\\nLine 3') + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(embed) + expected = "Line 1\nLine 2\nLine 3" + + assert_equal expected, result + end + + def test_visit_embed_raw_xhtml_compliance + # Test XHTML compliance for self-closing tags + embed = ReVIEW::AST::EmbedNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), + embed_type: :raw, + arg: '<hr><br><img src="test.png"><input type="text">', + target_builders: nil, + content: '<hr><br><img src="test.png"><input type="text">') + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(embed) + expected = '<hr /><br /><img src="test.png" /><input type="text" />' + + assert_equal expected, result + end + + def test_visit_list_definition + # Test definition list + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :dl) + + # First definition item + item1 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item1.parent = list # Set parent for list type detection + # Term goes to term_children + term1 = ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Alpha') + item1.term_children << term1 + # Definition goes to children + def1 = ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'RISC CPU made by DEC.') + item1.add_child(def1) + + # Second definition item + item2 = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item2.parent = list # Set parent for list type detection + # Term goes to term_children + term2 = ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'POWER') + item2.term_children << term2 + # Definition goes to children + def2 = ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'RISC CPU made by IBM and Motorola.') + item2.add_child(def2) + + list.add_child(item1) + list.add_child(item2) + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(list) + + expected = "<dl>\n" + + '<dt>Alpha</dt><dd>RISC CPU made by DEC.</dd>' + + '<dt>POWER</dt><dd>RISC CPU made by IBM and Motorola.</dd>' + + "\n</dl>\n" + + assert_equal expected, result + end + + def test_visit_list_definition_single_child + # Test definition list with term only (no definition) + list = ReVIEW::AST::ListNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), list_type: :dl) + + item = ReVIEW::AST::ListItemNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), level: 1) + item.parent = list # Set parent for list type detection + # Term goes to term_children + term = ReVIEW::AST::TextNode.new(location: ReVIEW::SnapshotLocation.new(nil, 0), content: 'Term Only') + item.term_children << term + # No definition (children is empty) + + list.add_child(item) + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(list) + + expected = "<dl>\n" + + '<dt>Term Only</dt><dd></dd>' + + "\n</dl>\n" + + assert_equal expected, result + end + + def test_tex_equation_without_id_mathjax + # Test TexEquationNode without ID using MathJax + @config['math_format'] = 'mathjax' + @book.config = @config + + require 'review/ast/tex_equation_node' + equation = ReVIEW::AST::TexEquationNode.new( + location: nil, + id: nil, + content: 'E = mc^2' + ) + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(equation) + + # HTMLBuilder uses $$ for display mode + expected = "<div class=\"equation\">\n$$E = mc^2$$\n</div>\n" + + assert_equal expected, result + end + + def test_tex_equation_without_id_plain + # Test TexEquationNode without ID using plain text + @config['math_format'] = nil + @book.config = @config + + require 'review/ast/tex_equation_node' + equation = ReVIEW::AST::TexEquationNode.new( + location: nil, + id: nil, + content: 'E = mc^2' + ) + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new('')) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + result = renderer.visit(equation) + + # Fallback format wraps in div.equation and pre tags + expected = "<div class=\"equation\">\n<pre>E = mc^2\n</pre>\n</div>\n" + + assert_equal expected, result + end + + def test_tex_equation_with_id_and_caption_mathjax + # Test TexEquationNode with ID and caption using MathJax + @config['math_format'] = 'mathjax' + @book.config = @config + + content = <<~REVIEW + = Chapter + + //texequation[eq1][Einstein's Mass-Energy Equivalence]{ + E = mc^2 + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + # Use caption-equation class like HTMLBuilder + assert_match(/<div id="eq1" class="caption-equation">/, html_output) + # Caption should use I18n.t('equation') and proper formatting + assert_match(%r{<p class="caption">式1\.1: Einstein's Mass-Energy Equivalence</p>}, html_output) + # MathJax uses $$ delimiters + assert_match(/\$\$E = mc\^2\$\$/, html_output) + end + + def test_tex_equation_with_id_only_mathjax + # Test TexEquationNode with ID only (no caption) using MathJax + @config['math_format'] = 'mathjax' + @book.config = @config + + content = <<~REVIEW + = Chapter + + //texequation[eq1]{ + \\int_{-\\infty}^{\\infty} e^{-x^2} dx = \\sqrt{\\pi} + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + # Use caption-equation class like HTMLBuilder + assert_match(/<div id="eq1" class="caption-equation">/, html_output) + # Caption should show equation number only (with colon from format_number_header) + assert_match(%r{<p class="caption">式1\.1:</p>}, html_output) + # Check that equation content is present + assert_match(/\\int_/, html_output) + end + + def test_nest_ul + content = <<~EOS + = Chapter + + * UL1 + + //beginchild + + 1. UL1-OL1 + 2. UL1-OL2 + + * UL1-UL1 + * UL1-UL2 + + : UL1-DL1 + \tUL1-DD1 + : UL1-DL2 + \tUL1-DD2 + + //endchild + + * UL2 + + //beginchild + + UL2-PARA + + //endchild + EOS + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + # Verify that nested structure is present + assert_match(/<li>UL1/, html_output) + assert_match(/<li>UL2/, html_output) + assert_match(/<li>UL1-OL1/, html_output) + assert_match(/<li>UL1-UL1/, html_output) + assert_match(/<dt>UL1-DL1/, html_output) + end + + def test_nest_ol + content = <<~EOS + = Chapter + + 1. OL1 + + //beginchild + + 1. OL1-OL1 + 2. OL1-OL2 + + * OL1-UL1 + * OL1-UL2 + + : OL1-DL1 + \tOL1-DD1 + : OL1-DL2 + \tOL1-DD2 + + //endchild + + 2. OL2 + + //beginchild + + OL2-PARA + + //endchild + EOS + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + # Verify that nested structure is present + assert_match(/<li>OL1/, html_output) + assert_match(/<li>OL2/, html_output) + assert_match(/<li>OL1-OL1/, html_output) + assert_match(/<li>OL1-UL1/, html_output) + assert_match(/<dt>OL1-DL1/, html_output) + end + + def test_nest_dl + content = <<~EOS + = Chapter + + : DL1 + + //beginchild + + 1. DL1-OL1 + 2. DL1-OL2 + + * DL1-UL1 + * DL1-UL2 + + : DL1-DL1 + \tDL1-DD1 + : DL1-DL2 + \tDL1-DD2 + + //endchild + + : DL2 + \tDD2 + + //beginchild + + * DD2-UL1 + * DD2-UL2 + + DD2-PARA + + //endchild + EOS + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render(ast_root) + + # Verify that nested structure is present + assert_match(/<dt>DL1/, html_output) + assert_match(/<dt>DL2/, html_output) + assert_match(/<li>DL1-OL1/, html_output) + assert_match(/<li>DL1-UL1/, html_output) + assert_match(/<li>DD2-UL1/, html_output) + end +end diff --git a/test/ast/test_html_renderer_builder_comparison.rb b/test/ast/test_html_renderer_builder_comparison.rb new file mode 100644 index 000000000..43f3306b7 --- /dev/null +++ b/test/ast/test_html_renderer_builder_comparison.rb @@ -0,0 +1,426 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require_relative '../support/review/test/html_comparator' +require 'review/ast/diff/html' + +class TestHtmlRendererBuilderComparison < Test::Unit::TestCase + def setup + @converter = ReVIEW::Test::HtmlComparator.new + @comparator = ReVIEW::AST::Diff::Html.new + end + + def test_simple_paragraph_comparison + source = 'This is a simple paragraph.' + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash?, 'Simple paragraph should produce equivalent HTML' + end + + def test_headline_comparison + source = '= Chapter Title' + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash?, 'Headline should produce equivalent HTML' + end + + def test_inline_formatting_comparison + source = 'This has @<b>{bold} and @<i>{italic} and @<code>{code} text.' + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash?, 'Inline formatting should produce equivalent HTML' + end + + def test_code_block_comparison + source = <<~RE + //list[example][Code Example]{ + def hello + puts "Hello World" + end + //} + RE + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash? + end + + def test_table_comparison + source = <<~RE + //table[sample][Sample Table]{ + Header 1 Header 2 + --------------------- + Data 1 Data 2 + Data 3 Data 4 + //} + RE + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash? + end + + def test_list_comparison + source = <<~RE + * First item + * Second item + * Third item + RE + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash? + end + + def test_note_block_comparison + source = <<~RE + //note[Note Title]{ + This is a note block. + //} + RE + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash? + end + + def test_complex_document_comparison + source = <<~RE + = Chapter Title + + This is a paragraph with @<b>{bold} text. + + == Section Title + + Here's a list: + + * Item 1 + * Item 2 + + And a code block: + + //list[example][Example]{ + puts "Hello" + //} + + //table[data][Data Table]{ + Name Value + ---------------------- + A 1 + B 2 + //} + RE + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'Complex document differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts "Builder HTML: #{builder_html.inspect}" + puts "Renderer HTML: #{renderer_html.inspect}" + puts result.pretty_diff + end + + assert result.same_hash? + end + + # Tests with actual Re:VIEW files from samples/syntax-book + def test_syntax_book_ch01 + file_path = File.join(__dir__, '../../samples/syntax-book/ch01.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'ch01.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'ch01.re should produce equivalent HTML' + end + + def test_syntax_book_ch02 + book_dir = File.join(__dir__, '../../samples/syntax-book') + result = @converter.convert_chapter_with_book_context(book_dir, 'ch02') + + builder_html = result[:builder] + renderer_html = result[:renderer] + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'ch02.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'ch02.re should produce equivalent HTML' + end + + def test_syntax_book_ch03 + file_path = File.join(__dir__, '../../samples/syntax-book/ch03.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'ch03.re differences found:' + puts "Builder HTML: #{builder_html}" + puts "Renderer HTML: #{renderer_html}" + puts result.pretty_diff + end + + assert result.same_hash?, 'ch03.re should produce equivalent HTML' + end + + def test_syntax_book_pre01 + file_path = File.join(__dir__, '../../samples/syntax-book/pre01.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'pre01.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'pre01.re should produce equivalent HTML' + end + + def test_syntax_book_appA + file_path = File.join(__dir__, '../../samples/syntax-book/appA.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'appA.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'appA.re should produce equivalent HTML' + end + + def test_syntax_book_part2 + file_path = File.join(__dir__, '../../samples/syntax-book/part2.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'part2.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'part2.re should produce equivalent HTML' + end + + def test_syntax_book_bib + book_dir = File.join(__dir__, '../../samples/syntax-book') + result = @converter.convert_chapter_with_book_context(book_dir, 'bib') + + builder_html = result[:builder] + renderer_html = result[:renderer] + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'bib.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'bib.re should produce equivalent HTML' + end + + # Tests with actual Re:VIEW files from samples/debug-book + def test_debug_book_advanced_features + file_path = File.join(__dir__, '../../samples/debug-book/advanced_features.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'advanced_features.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'advanced_features.re should produce equivalent HTML' + end + + def test_debug_book_comprehensive + file_path = File.join(__dir__, '../../samples/debug-book/comprehensive.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'comprehensive.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'comprehensive.re should produce equivalent HTML' + end + + def test_debug_book_edge_cases_test + file_path = File.join(__dir__, '../../samples/debug-book/edge_cases_test.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'edge_cases_test.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'edge_cases_test.re should produce equivalent HTML' + end + + def test_debug_book_extreme_features + file_path = File.join(__dir__, '../../samples/debug-book/extreme_features.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'extreme_features.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'extreme_features.re should produce equivalent HTML' + end + + def test_debug_book_multicontent_test + file_path = File.join(__dir__, '../../samples/debug-book/multicontent_test.re') + source = File.read(file_path) + + builder_html = @converter.convert_with_builder(source) + renderer_html = @converter.convert_with_renderer(source) + + result = @comparator.compare(builder_html, renderer_html) + + unless result.same_hash? + puts 'multicontent_test.re differences found:' + puts "Builder HTML length: #{builder_html.length}" + puts "Renderer HTML length: #{renderer_html.length}" + puts result.pretty_diff + end + + assert result.same_hash?, 'multicontent_test.re should produce equivalent HTML' + end +end diff --git a/test/ast/test_html_renderer_inline_elements.rb b/test/ast/test_html_renderer_inline_elements.rb new file mode 100644 index 000000000..b3c7b0ce0 --- /dev/null +++ b/test/ast/test_html_renderer_inline_elements.rb @@ -0,0 +1,602 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require_relative '../book_test_helper' +require 'review/ast/compiler' +require 'review/ast/node' +require 'review/renderer/html_renderer' +require 'review/book' +require 'review/book/chapter' +require 'review/configure' +require 'review/i18n' + +class TestHtmlRendererInlineElements < Test::Unit::TestCase + include BookTestHelper + + def setup + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @config['secnolevel'] = 2 + @book = ReVIEW::Book::Base.new(config: @config) + + ReVIEW::I18n.setup('ja') + + @compiler = ReVIEW::AST::Compiler.new + end + + def render_inline(content) + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + renderer.render(ast_root) + end + + # Basic text formatting + def test_inline_b + content = "= Chapter\n\nThis is @<b>{bold} text.\n" + output = render_inline(content) + assert_match(%r{<b>bold</b>}, output) + end + + def test_inline_strong + content = "= Chapter\n\nThis is @<strong>{strong} text.\n" + output = render_inline(content) + assert_match(%r{<strong>strong</strong>}, output) + end + + def test_inline_i + content = "= Chapter\n\nThis is @<i>{italic} text.\n" + output = render_inline(content) + assert_match(%r{<i>italic</i>}, output) + end + + def test_inline_em + content = "= Chapter\n\nThis is @<em>{emphasized} text.\n" + output = render_inline(content) + assert_match(%r{<em>emphasized</em>}, output) + end + + def test_inline_u + content = "= Chapter\n\nThis is @<u>{underlined} text.\n" + output = render_inline(content) + assert_match(%r{<u>underlined</u>}, output) + end + + def test_inline_del + content = "= Chapter\n\nThis is @<del>{deleted} text.\n" + output = render_inline(content) + assert_match(%r{<del>deleted</del>}, output) + end + + def test_inline_ins + content = "= Chapter\n\nThis is @<ins>{inserted} text.\n" + output = render_inline(content) + assert_match(%r{<ins>inserted</ins>}, output) + end + + # Code and monospace formatting + def test_inline_code + content = "= Chapter\n\nInline code: @<code>{var x = 10}\n" + output = render_inline(content) + assert_match(%r{<code class="inline-code tt">var x = 10</code>}, output) + end + + def test_inline_tt + content = "= Chapter\n\nMonospace: @<tt>{monospace}\n" + output = render_inline(content) + assert_match(%r{<code class="tt">monospace</code>}, output) + end + + def test_inline_ttb + content = "= Chapter\n\nBold monospace: @<ttb>{bold mono}\n" + output = render_inline(content) + assert_match(%r{<code class="tt"><b>bold mono</b></code>}, output) + end + + def test_inline_tti + content = "= Chapter\n\nItalic monospace: @<tti>{italic mono}\n" + output = render_inline(content) + assert_match(%r{<code class="tt"><i>italic mono</i></code>}, output) + end + + def test_inline_kbd + content = "= Chapter\n\nPress @<kbd>{Enter} key.\n" + output = render_inline(content) + assert_match(%r{<kbd>Enter</kbd>}, output) + end + + def test_inline_samp + content = "= Chapter\n\nOutput: @<samp>{sample output}\n" + output = render_inline(content) + assert_match(%r{<samp>sample output</samp>}, output) + end + + def test_inline_var + content = "= Chapter\n\nVariable: @<var>{variableName}\n" + output = render_inline(content) + assert_match(%r{<var>variableName</var>}, output) + end + + # Superscript and subscript + def test_inline_sup + content = "= Chapter\n\nE = mc@<sup>{2}\n" + output = render_inline(content) + assert_match(%r{mc<sup>2</sup>}, output) + end + + def test_inline_sub + content = "= Chapter\n\nH@<sub>{2}O\n" + output = render_inline(content) + assert_match(%r{H<sub>2</sub>O}, output) + end + + # Ruby annotation + def test_inline_ruby + content = "= Chapter\n\n@<ruby>{漢字, かんじ}\n" + output = render_inline(content) + # InlineElementRenderer outputs simple ruby without rp tags + assert_match(%r{<ruby>漢字<rt>かんじ</rt></ruby>}, output) + end + + def test_inline_ruby_without_annotation + content = "= Chapter\n\n@<ruby>{漢字}\n" + assert_nothing_raised do + output = render_inline(content) + assert_match(/漢字/, output) + assert_no_match(%r{<rt></rt>}, output) + end + end + + # Special Japanese formatting + def test_inline_bou + content = "= Chapter\n\n@<bou>{傍点}\n" + output = render_inline(content) + assert_match(%r{<span class="bou">傍点</span>}, output) + end + + def test_inline_ami + content = "= Chapter\n\n@<ami>{網掛け}\n" + output = render_inline(content) + assert_match(%r{<span class="ami">網掛け</span>}, output) + end + + def test_inline_tcy + content = "= Chapter\n\n縦中横@<tcy>{10}文字\n" + output = render_inline(content) + assert_match(%r{<span class="tcy">10</span>}, output) + end + + def test_inline_tcy_single_ascii + content = "= Chapter\n\n@<tcy>{A}文字\n" + output = render_inline(content) + assert_match(%r{<span class="upright">A</span>}, output) + end + + # Keywords and index + def test_inline_kw + content = "= Chapter\n\n@<kw>{キーワード, keyword}\n" + output = render_inline(content) + # Uses half-width parentheses and includes IDX comment + assert_match(%r{<b class="kw">キーワード \(keyword\)</b><!-- IDX:キーワード -->}, output) + end + + def test_inline_idx + content = "= Chapter\n\n@<idx>{索引項目}\n" + output = render_inline(content) + # idx displays the text and outputs an IDX comment (no anchor tag) + assert_match(/索引項目/, output) + assert_match(/<!-- IDX:索引項目 -->/, output) + end + + def test_inline_idx_hierarchical + content = "= Chapter\n\n@<idx>{親項目<<>>子項目}\n" + output = render_inline(content) + # Display text includes the full hierarchical path with <<>> + assert_match(/親項目<<>>子項目/, output) + # IDX comment preserves the <<>> delimiter (not escaped in HTML comments) + assert_match(/<!-- IDX:親項目<<>>子項目 -->/, output) + end + + def test_inline_hidx + content = "= Chapter\n\n@<hidx>{隠し索引}\n" + output = render_inline(content) + # hidx outputs only an IDX comment (no text, no anchor tag) + assert_match(/<!-- IDX:隠し索引 -->/, output) + # Text should not be displayed + refute_match(/>隠し索引</, output) + end + + def test_inline_hidx_hierarchical + content = "= Chapter\n\n@<hidx>{索引<<>>項目}\n" + output = render_inline(content) + # hidx outputs only an IDX comment with <<>> delimiter (no text, no anchor tag) + # Note: <<>> is not escaped in HTML comments + assert_match(/<!-- IDX:索引<<>>項目 -->/, output) + # Text should not be displayed + refute_match(/>索引/, output) + refute_match(/項目</, output) + end + + # Links + def test_inline_href + content = "= Chapter\n\n@<href>{https://example.com, Example}\n" + output = render_inline(content) + assert_match(%r{<a href="https://example\.com" class="link">Example</a>}, output) + end + + def test_inline_href_url_only + content = "= Chapter\n\n@<href>{https://example.com}\n" + output = render_inline(content) + assert_match(%r{<a href="https://example\.com" class="link">https://example\.com</a>}, output) + end + + def test_inline_href_internal_reference_with_label + content = "= Chapter\n\n@<href>{#anchor,Jump to anchor}\n" + output = render_inline(content) + assert_match(%r{<a href="#anchor" class="link">Jump to anchor</a>}, output) + end + + def test_inline_href_internal_reference_without_label + content = "= Chapter\n\n@<href>{#anchor}\n" + output = render_inline(content) + assert_match(%r{<a href="#anchor" class="link">#anchor</a>}, output) + end + + # Special characters + def test_inline_br + content = "= Chapter\n\nLine1@<br>{}Line2\n" + output = render_inline(content) + assert_match(%r{Line1<br />Line2}, output) + end + + def test_inline_uchar + content = "= Chapter\n\n@<uchar>{2764} is a heart.\n" + output = render_inline(content) + assert_match(/❤/, output) + end + + # HTML semantic elements + def test_inline_abbr + content = "= Chapter\n\n@<abbr>{HTML}\n" + output = render_inline(content) + assert_match(%r{<abbr>HTML</abbr>}, output) + end + + def test_inline_acronym + content = "= Chapter\n\n@<acronym>{NATO}\n" + output = render_inline(content) + assert_match(%r{<acronym>NATO</acronym>}, output) + end + + def test_inline_cite + content = "= Chapter\n\n@<cite>{Book Title}\n" + output = render_inline(content) + assert_match(%r{<cite>Book Title</cite>}, output) + end + + def test_inline_dfn + content = "= Chapter\n\n@<dfn>{definition}\n" + output = render_inline(content) + assert_match(%r{<dfn>definition</dfn>}, output) + end + + def test_inline_big + content = "= Chapter\n\n@<big>{large text}\n" + output = render_inline(content) + assert_match(%r{<big>large text</big>}, output) + end + + def test_inline_small + content = "= Chapter\n\n@<small>{small text}\n" + output = render_inline(content) + assert_match(%r{<small>small text</small>}, output) + end + + # Special formatting + def test_inline_recipe + content = "= Chapter\n\n@<recipe>{レシピ名}\n" + output = render_inline(content) + assert_match(%r{<span class="recipe">「レシピ名」</span>}, output) + end + + def test_inline_balloon + content = "= Chapter\n\n@<balloon>{吹き出し}\n" + output = render_inline(content) + assert_match(%r{<span class="balloon">吹き出し</span>}, output) + end + + def test_inline_dtp + content = "= Chapter\n\n@<dtp>{command}\n" + output = render_inline(content) + assert_match(/<\?dtp command \?>/, output) + end + + # Math + def test_inline_m + content = "= Chapter\n\n@<m>{E = mc^2}\n" + output = render_inline(content) + # InlineElementRenderer uses class="equation" like HTMLBuilder + assert_match(%r{<span class="equation">E = mc\^2</span>}, output) + end + + # Comments (draft mode) + def test_inline_comment_draft_mode + @config['draft'] = true + content = "= Chapter\n\nText @<comment>{draft comment} here.\n" + output = render_inline(content) + assert_match(%r{<span class="draft-comment">draft comment</span>}, output) + end + + def test_inline_comment_non_draft_mode + @config['draft'] = false + content = "= Chapter\n\nText @<comment>{draft comment} here.\n" + output = render_inline(content) + assert_no_match(/draft-comment/, output) + assert_no_match(/draft comment/, output) + end + + # Cross-references (basic tests) + def test_inline_list_reference + content = <<~REVIEW + = Chapter + + //list[sample][Sample]{ + code + //} + + See @<list>{sample}. + REVIEW + output = render_inline(content) + assert_match(/リスト1\.1/, output) + # Reference text is rendered but not wrapped in span by InlineElementRenderer + assert_match(/リスト1\.1/, output) + end + + def test_inline_table_reference + content = <<~REVIEW + = Chapter + + //table[sample][Sample]{ + A B + ----- + 1 2 + //} + + See @<table>{sample}. + REVIEW + output = render_inline(content) + assert_match(/表1\.1/, output) + # Reference text is rendered but not wrapped in span by InlineElementRenderer + assert_match(/表1\.1/, output) + end + + def test_inline_img_reference + content = <<~REVIEW + = Chapter + + //image[sample][Sample Image]{ + //} + + See @<img>{sample}. + REVIEW + output = render_inline(content) + assert_match(/図1\.1/, output) + # Reference text is rendered but not wrapped in span by InlineElementRenderer + assert_match(/図1\.1/, output) + end + + # Footnote reference + def test_inline_fn + content = <<~REVIEW + = Chapter + + Text with footnote@<fn>{note1}. + + //footnote[note1][Footnote text here.] + REVIEW + output = render_inline(content) + assert_match(/<a id="fnb-note1" href="#fn-note1"/, output) + assert_match(/class="noteref"/, output) + end + + # Headline reference + def test_inline_hd + content = <<~REVIEW + = Chapter + + == Section Title + + See @<hd>{Section Title}. + REVIEW + output = render_inline(content) + # Should contain section reference + assert_match(/Section Title/, output) + end + + # Section reference + def test_inline_sec + content = <<~REVIEW + = Chapter + + == Section 1 + + See @<sec>{Section 1}. + REVIEW + output = render_inline(content) + assert_match(/1\.1/, output) + end + + def test_inline_sec_respects_secnolevel + @config['secnolevel'] = 1 + @config['chapterlink'] = true + + content = <<~REVIEW + = Chapter + + == Section 1 + + See @<sec>{Section 1}. + REVIEW + + output = render_inline(content) + assert_match(/Section 1/, output) + assert_no_match(/1\.1/, output) + assert_no_match(%r{href="\./test\.html#h1-1"}, output) + end + + # Column reference + def test_inline_column + content = <<~REVIEW + = Chapter + + ===[column] Column Title + + Column content. + + ===[/column] + + See @<column>{Column Title}. + REVIEW + output = render_inline(content) + assert_match(/Column Title/, output) + end + + # Chapter reference + def test_inline_chap + # Use mktmpbookdir to create a proper book with chapters + mktmpbookdir('test.re' => "= Chapter Title\n\nSee @<chap>{test}.\n") do |_dir, book| + chapter = book.chapters[0] + chapter.generate_indexes + book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + output = renderer.render(ast_root) + # Should contain chapter number + assert_match(/第1章/, output) + end + end + + def test_inline_title + # Use mktmpbookdir to create a proper book with chapters + mktmpbookdir('test.re' => "= Chapter Title\n\nSee @<title>{test}.\n") do |_dir, book| + chapter = book.chapters[0] + chapter.generate_indexes + book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + output = renderer.render(ast_root) + assert_match(/Chapter Title/, output) + end + end + + # Page reference (unsupported in HTML) + def test_inline_pageref + content = "= Chapter\n\nSee @<pageref>{test}.\n" + output = render_inline(content) + # Should just output the content without error + assert_match(/test/, output) + end + + # Icon images + def test_inline_icon + content = "= Chapter\n\n@<icon>{sample}\n" + output = render_inline(content) + # Should attempt to reference image or show missing image + assert_match(/sample/, output) + end + + # Escaping special characters + def test_inline_escaping + content = "= Chapter\n\n@<b>{text with <html> & \"quotes\"}\n" + output = render_inline(content) + # Content is escaped once by visit_text, then rendered as-is + assert_match(%r{<b>text with <html> & "quotes"</b>}, output) + end + + # Raw inline content + def test_inline_raw_html + content = "= Chapter\n\nText @<raw>{|html|<span class=\"custom\">HTML</span>} here.\n" + output = render_inline(content) + assert_match(%r{<span class="custom">HTML</span>}, output) + end + + def test_inline_raw_other_format + content = "= Chapter\n\nText @<raw>{|latex|\\textbf{LaTeX}} here.\n" + output = render_inline(content) + # Should not output LaTeX content in HTML + assert_no_match(/textbf/, output) + end + + # Complex inline combinations + def test_inline_nested_formatting + content = "= Chapter\n\n@<b>{bold @<i>{and italic\\}}\n" + output = render_inline(content) + assert_match(%r{<b>bold <i>and italic</i></b>}, output) + end + + def test_inline_code_with_special_chars + content = "= Chapter\n\n@<code>{<tag> & \"value\"}\n" + output = render_inline(content) + # Content is escaped once by visit_text, then rendered as-is + assert_match(%r{<code class="inline-code tt"><tag> & "value"</code>}, output) + end + + # Bibliography reference (requires bib file setup) + def test_inline_bib_basic + mktmpbookdir('bib.re' => '//bibpaper[ref1][Reference Title]{Author Name, Publisher, 2020}') do |_dir, book| + chapter = ReVIEW::Book::Chapter.new(book, 1, 'test', 'test.re', StringIO.new("= Chapter\n\nReference @<bib>{ref1}.\n")) + chapter.generate_indexes + book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + output = renderer.render(ast_root) + # Should contain reference markup with the bibliography reference + assert_match(/ref1/, output) + end + end + + # Equation reference + def test_inline_eq_basic + content = <<~REVIEW + = Chapter + + //texequation[eq1]{ + E = mc^2 + //} + + See @<eq>{eq1}. + REVIEW + output = render_inline(content) + # Should contain equation reference + assert_match(/式1\.1/, output) + end + + # Endnote reference + def test_inline_endnote_basic + content = <<~REVIEW + = Chapter + + Text @<endnote>{note1}. + + //endnote[note1][Endnote content] + REVIEW + output = render_inline(content) + # Should contain endnote reference markup + assert_match(/note1/, output) + end + + # Section title reference + def test_inline_sectitle_basic + content = <<~REVIEW + = Chapter + + == Section Title + + See @<sectitle>{Section Title}. + REVIEW + output = render_inline(content) + assert_match(/Section Title/, output) + end +end diff --git a/test/ast/test_html_renderer_join_lines_by_lang.rb b/test/ast/test_html_renderer_join_lines_by_lang.rb new file mode 100644 index 000000000..1ca31f70e --- /dev/null +++ b/test/ast/test_html_renderer_join_lines_by_lang.rb @@ -0,0 +1,114 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require_relative '../support/review/test/html_comparator' +require 'tmpdir' + +class TestHtmlRendererJoinLinesByLang < Test::Unit::TestCase + def test_join_lines_by_lang_disabled + Dir.mktmpdir do |dir| + setup_book(dir, join_lines_by_lang: false) + + File.write(File.join(dir, 'test.re'), <<~RE) += Test + +Japanese text +continues here + +English text +continues here + RE + + converter = ReVIEW::Test::HtmlComparator.new + result = converter.convert_chapter_with_book_context(dir, 'test') + + assert_equal result[:builder], result[:renderer], + 'Builder and Renderer should produce same output when join_lines_by_lang is disabled' + + # Without join_lines_by_lang, lines are joined without any separator + assert result[:builder].include?('Japanese textcontinues here'), + 'Lines should be joined without space when join_lines_by_lang is disabled' + end + end + + def test_join_lines_by_lang_enabled_japanese + Dir.mktmpdir do |dir| + setup_book(dir, join_lines_by_lang: true) + + File.write(File.join(dir, 'test.re'), <<~RE) += テスト + +これは日本語の文章です。 +複数行にわたっています。 + RE + + converter = ReVIEW::Test::HtmlComparator.new + result = converter.convert_chapter_with_book_context(dir, 'test') + + assert_equal result[:builder], result[:renderer], + 'Builder and Renderer should produce same output for Japanese text' + + # Japanese text should be joined without space + assert result[:builder].include?('これは日本語の文章です。複数行にわたっています。'), + 'Japanese lines should be joined without space' + end + end + + def test_join_lines_by_lang_enabled_english + Dir.mktmpdir do |dir| + setup_book(dir, join_lines_by_lang: true) + + File.write(File.join(dir, 'test.re'), <<~RE) += Test + +This is English text. +It spans multiple lines. + RE + + converter = ReVIEW::Test::HtmlComparator.new + result = converter.convert_chapter_with_book_context(dir, 'test') + + assert_equal result[:builder], result[:renderer], + 'Builder and Renderer should produce same output for English text' + + # English text should have space between lines + assert result[:builder].include?('This is English text. It spans multiple lines.'), + 'English lines should be joined with space' + end + end + + def test_join_lines_by_lang_mixed_content + Dir.mktmpdir do |dir| + setup_book(dir, join_lines_by_lang: true) + + File.write(File.join(dir, 'test.re'), <<~RE) += Test + +日本語とEnglish混在 +次の行です + RE + + converter = ReVIEW::Test::HtmlComparator.new + result = converter.convert_chapter_with_book_context(dir, 'test') + + assert_equal result[:builder], result[:renderer], + 'Builder and Renderer should produce same output for mixed content' + end + end + + private + + def setup_book(dir, join_lines_by_lang:) + config = { + 'bookname' => 'test', + 'language' => 'ja' + } + config['join_lines_by_lang'] = true if join_lines_by_lang + + File.write(File.join(dir, 'config.yml'), config.to_yaml) + File.write(File.join(dir, 'catalog.yml'), <<~YAML) + CHAPS: + - test.re + YAML + end +end diff --git a/test/ast/test_html_renderer_math.rb b/test/ast/test_html_renderer_math.rb new file mode 100644 index 000000000..0ac047607 --- /dev/null +++ b/test/ast/test_html_renderer_math.rb @@ -0,0 +1,388 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require 'review/ast/compiler' +require 'review/ast/node' +require 'review/renderer/html_renderer' +require 'review/book' +require 'review/book/chapter' +require 'review/configure' +require 'review/i18n' +require 'tmpdir' + +class TestHtmlRendererMath < Test::Unit::TestCase + def setup + @config = ReVIEW::Configure.values + @config['language'] = 'ja' + @book = ReVIEW::Book::Base.new(config: @config) + + ReVIEW::I18n.setup('ja') + + @compiler = ReVIEW::AST::Compiler.new + end + + # Test for texequation block with mathjax format + def test_texequation_mathjax + @config['math_format'] = 'mathjax' + + content = <<~REVIEW + = Chapter + + //texequation{ + x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a} + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/<div class="equation">/, html_output) + assert_match(/\$\$.*\\frac.*\$\$/, html_output) + assert_match(/x = \\frac\{-b \\pm \\sqrt\{b\^2 - 4ac\}\}\{2a\}/, html_output) + end + + # Test for texequation block with ID and caption using mathjax + def test_texequation_with_id_caption_mathjax + @config['math_format'] = 'mathjax' + + content = <<~REVIEW + = Chapter + + //texequation[quadratic][二次方程式の解の公式]{ + x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a} + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/<div id="quadratic" class="caption-equation">/, html_output) + assert_match(%r{<p class="caption">式1\.1: 二次方程式の解の公式</p>}, html_output) + assert_match(/<div class="equation">/, html_output) + assert_match(/\$\$.*\\frac.*\$\$/, html_output) + end + + # Test for mathjax escaping of special characters + def test_texequation_mathjax_escaping + @config['math_format'] = 'mathjax' + + content = <<~REVIEW + = Chapter + + //texequation{ + a < b & c > d + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + # Check that <, >, & are properly escaped for mathjax + assert_match(/\\lt\{\}/, html_output) + assert_match(/\\gt\{\}/, html_output) + assert_match(/&/, html_output) + # Verify that the equation content itself has escaped characters + assert_match(/\$\$a \\lt\{\} b & c \\gt\{\} d\$\$/, html_output) + end + + # Test for inline math with mathjax format + def test_inline_m_mathjax + @config['math_format'] = 'mathjax' + + content = <<~REVIEW + = Chapter + + Einstein's equation is @<m>{E = mc^2}. + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(%r{<span class="equation">\\\\?\( E = mc\^2 \\\\?\)</span>}, html_output) + end + + # Test for inline math with mathjax escaping + def test_inline_m_mathjax_escaping + @config['math_format'] = 'mathjax' + + content = <<~REVIEW + = Chapter + + Test equation @<m>{a < b & c > d}. + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/\\lt\{\}/, html_output) + assert_match(/\\gt\{\}/, html_output) + assert_match(/&/, html_output) + end + + # Test for texequation with mathml format (requires math_ml gem) + def test_texequation_mathml + begin + require 'math_ml' + require 'math_ml/symbol/character_reference' + rescue LoadError + omit('math_ml gem not installed') + end + + @config['math_format'] = 'mathml' + + content = <<~REVIEW + = Chapter + + //texequation{ + E = mc^2 + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/<div class="equation">/, html_output) + # MathML output contains <math> tags + assert_match(/<math/, html_output) + end + + # Test for inline math with mathml format + def test_inline_m_mathml + begin + require 'math_ml' + require 'math_ml/symbol/character_reference' + rescue LoadError + omit('math_ml gem not installed') + end + + @config['math_format'] = 'mathml' + + content = <<~REVIEW + = Chapter + + Einstein's equation is @<m>{E = mc^2}. + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/<span class="equation">/, html_output) + assert_match(/<math/, html_output) + end + + # Test for texequation with imgmath format (requires latex/dvipng) + def test_texequation_imgmath + # Check if latex and dvipng are available + unless system('which latex > /dev/null 2>&1') && system('which dvipng > /dev/null 2>&1') + omit('latex or dvipng not installed') + end + + Dir.mktmpdir do |tmpdir| + @config['math_format'] = 'imgmath' + @config['imagedir'] = tmpdir + @config['imgmath_options'] = { + 'fontsize' => 12, + 'lineheight' => 14.4, + 'format' => 'png' + } + + content = <<~REVIEW + = Chapter + + //texequation{ + E = mc^2 + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/<div class="equation">/, html_output) + # Should contain img tag with math image + assert_match(%r{<img src=".*_review_math/_gen_.*\.png"}, html_output) + assert_match(/alt="E = mc\^2"/, html_output) + end + end + + # Test for inline math with imgmath format + def test_inline_m_imgmath + unless system('which latex > /dev/null 2>&1') && system('which dvipng > /dev/null 2>&1') + omit('latex or dvipng not installed') + end + + Dir.mktmpdir do |tmpdir| + @config['math_format'] = 'imgmath' + @config['imagedir'] = tmpdir + @config['imgmath_options'] = { + 'fontsize' => 12, + 'lineheight' => 14.4, + 'format' => 'png' + } + + content = <<~REVIEW + = Chapter + + Einstein's equation is @<m>{E = mc^2}. + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/<span class="equation">/, html_output) + assert_match(%r{<img src=".*_review_math/_gen_.*\.png"}, html_output) + end + end + + # Test for texequation with fallback (no math_format set) + def test_texequation_fallback + @config['math_format'] = nil + + content = <<~REVIEW + = Chapter + + //texequation{ + E = mc^2 + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(/<div class="equation">/, html_output) + # Should fall back to <pre> tag + assert_match(/<pre>E = mc\^2/, html_output) + end + + # Test for inline math with fallback + def test_inline_m_fallback + @config['math_format'] = nil + + content = <<~REVIEW + = Chapter + + Einstein's equation is @<m>{E = mc^2}. + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + assert_match(%r{<span class="equation">E = mc\^2</span>}, html_output) + end + + # Test for caption positioning (top/bottom) + def test_texequation_caption_top + @config['math_format'] = 'mathjax' + @config['caption_position'] = { 'equation' => 'top' } + + content = <<~REVIEW + = Chapter + + //texequation[einstein][アインシュタインの式]{ + E = mc^2 + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + # Caption should appear before equation div + assert_match(%r{<p class="caption">.*アインシュタインの式</p>\s*<div class="equation">}m, html_output) + end + + # Test for caption positioning (bottom) + def test_texequation_caption_bottom + @config['math_format'] = 'mathjax' + @config['caption_position'] = { 'equation' => 'bottom' } + + content = <<~REVIEW + = Chapter + + //texequation[einstein][アインシュタインの式]{ + E = mc^2 + //} + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + # Caption should appear after equation div + assert_match(%r{</div>\s*<p class="caption">.*アインシュタインの式</p>}m, html_output) + end + + # Test for equation reference (@<eq>) + def test_equation_reference + @config['math_format'] = 'mathjax' + + content = <<~REVIEW + = Chapter + + //texequation[einstein][アインシュタインの式]{ + E = mc^2 + //} + + See @<eq>{einstein} for details. + REVIEW + + chapter = ReVIEW::Book::Chapter.new(@book, 1, 'test', 'test.re', StringIO.new(content)) + chapter.generate_indexes + @book.generate_indexes + ast_root = @compiler.compile_to_ast(chapter) + renderer = ReVIEW::Renderer::HtmlRenderer.new(chapter) + html_output = renderer.render_body(ast_root) + + # Check equation reference link (with chapterlink enabled, it includes file path) + assert_match(%r{<span class="eqref"><a href=".*#einstein">式1\.1</a></span>}, html_output) + end +end diff --git a/test/ast/test_idgxml_renderer.rb b/test/ast/test_idgxml_renderer.rb new file mode 100644 index 000000000..ec959036b --- /dev/null +++ b/test/ast/test_idgxml_renderer.rb @@ -0,0 +1,1549 @@ +# frozen_string_literal: true + +require_relative '../test_helper' +require_relative '../book_test_helper' +require 'review/ast/compiler' +require 'review/ast/book_indexer' +require 'review/renderer/idgxml_renderer' +require 'review/book' +require 'review/i18n' + +class IdgxmlRendererTest < Test::Unit::TestCase + include ReVIEW + include BookTestHelper + + def setup + @config = ReVIEW::Configure.values + @config['secnolevel'] = 2 + @config['tableopt'] = '10' + @config['builder'] = 'idgxml' # Set builder for tsize processing + @book = Book::Base.new(config: @config) + @log_io = StringIO.new + ReVIEW.logger = ReVIEW::Logger.new(@log_io) + @chapter = Book::Chapter.new(@book, 1, '-', nil, StringIO.new) + I18n.setup('ja') + end + + def compile_block(src, reference_resolution: true) + @chapter.content = src + compiler = ReVIEW::AST::Compiler.for_chapter(@chapter) + ast = compiler.compile_to_ast(@chapter, reference_resolution: reference_resolution) + renderer = ReVIEW::Renderer::IdgxmlRenderer.new(@chapter) + result = renderer.render(ast) + # Strip XML declaration and root doc tags to match expected output format + # Remove leading/trailing newlines but preserve spaces (for //raw blocks) + result = result.sub(/\A<\?xml[^>]+\?><doc[^>]*>/, '').sub(%r{</doc>\s*\z}, '') + result.gsub(/\A\n+/, '').gsub(/\n+\z/, '') + end + + def compile_inline(src, reference_resolution: true) + result = compile_block(src, reference_resolution: reference_resolution) + # For inline tests, also strip the paragraph tags if present + # Don't use .strip as it removes important whitespace like newlines from @<br>{} + result = result.sub(/\A<p>/, '').delete_suffix('</p>') if result.start_with?('<p>') + result + end + + def test_headline_level1 + actual = compile_block("={test} this is test.\n") + assert_equal %Q(<title id="test" aid:pstyle="h1">第1章 this is test.), actual + end + + def test_headline_level1_without_secno + @config['secnolevel'] = 0 + actual = compile_block("={test} this is test.\n") + assert_equal %Q(this is test.), actual + end + + def test_headline_level2 + actual = compile_block("=={test} this is test.\n") + assert_equal %Q(1.1 this is test.), actual + end + + def test_headline_level3 + actual = compile_block("==={test} this is test.\n") + assert_equal %Q(this is test.), actual + end + + def test_headline_level3_with_secno + @config['secnolevel'] = 3 + actual = compile_block("==={test} this is test.\n") + assert_equal %Q(1.0.1 this is test.), actual + end + + def test_headline_secttags + @config['structuredxml'] = true + actual = compile_block("= HEAD1\n== HEAD1-1\n\n=== HEAD1-1-1\n\n== HEAD1-2\n\n==== HEAD1-2-0-1\n\n===== HEAD1-2-0-1-1\n\n== HEAD1-3\n") + expected = '第1章 HEAD1' + + '1.1 HEAD1-1' + + 'HEAD1-1-1' + + '1.2 HEAD1-2' + + 'HEAD1-2-0-1' + + 'HEAD1-2-0-1-1' + + '1.3 HEAD1-3' + assert_equal expected, actual + end + + def test_label + actual = compile_block("//label[label_test]\n") + assert_equal %Q(