diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8ced3c304..11d7a0d3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,10 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - node-version: - - lts/-1 - - lts/* - - latest + node-version: [22.x] react: [18] steps: - uses: actions/checkout@v6 @@ -23,10 +20,6 @@ jobs: - name: Install dependencies run: npm ci - - name: Install React <18 deps - if: matrix.react == '16' || matrix.react == '17' - run: npm i react@${{ matrix.react }} react-dom@${{ matrix.react }} @testing-library/react@12 - - name: Run tests run: npm test @@ -38,7 +31,6 @@ jobs: matrix: node-version: [22.x] react: [18] - steps: - uses: actions/checkout@v6 diff --git a/__tests__/benchmarks/engine.bench.ts b/__tests__/benchmarks/engine.bench.ts new file mode 100644 index 000000000..8a049c581 --- /dev/null +++ b/__tests__/benchmarks/engine.bench.ts @@ -0,0 +1,538 @@ +/* eslint-disable vitest/consistent-test-it */ +import { bench, describe } from 'vitest'; + +import { mdast, mdx, mdxish, mix } from '../../lib'; + +const simpleMarkdown = ` +# Hello World + +This is a simple paragraph with **bold** and *italic* text. + +- List item 1 +- List item 2 +- List item 3 +`; + +const codeBlockMarkdown = ` +# Code Examples + +Here's some JavaScript: + +\`\`\`javascript +function fibonacci(n) { + if (n < 2) return n; + return fibonacci(n - 1) + fibonacci(n - 2); +} + +console.log(fibonacci(10)); +\`\`\` + +And some TypeScript: + +\`\`\`typescript +interface User { + id: number; + name: string; + email: string; +} + +const getUser = async (id: number): Promise => { + const response = await fetch(\`/api/users/\${id}\`); + return response.json(); +}; +\`\`\` +`; + +const tableMarkdown = ` +# Data Tables + +| Feature | MDX | MDXish | +|---------|-----|--------| +| JSX Support | Full | Partial | +| Performance | Moderate | Fast | +| Bundle Size | Large | Small | +| Compatibility | High | High | + +## Complex Table + +| Name | Description | Status | Priority | +|------|-------------|--------|----------| +| Task 1 | Implement feature A | Done | High | +| Task 2 | Fix bug in module B | In Progress | Critical | +| Task 3 | Write documentation | Pending | Medium | +| Task 4 | Code review | Pending | Low | +`; + +const calloutMarkdown = ` +# Callouts Example + +> šŸ‘ Success +> +> This operation completed successfully. + +> šŸ“˜ Info +> +> Here is some useful information for you. + +> 🚧 Warning +> +> Please be careful with this operation. + +> ā— Error +> +> Something went wrong. Please try again. + + +This is an MDX-style callout component. + +`; + +const componentMarkdown = ` +# Component Examples + + + Image caption here + + + +const greeting = "Hello, World!"; +console.log(greeting); + + + + + console.log("Hello from JS"); + + + print("Hello from Python") + + +`; + +const mixedContentMarkdown = ` +--- +title: Mixed Content Document +category: examples +--- + +# Getting Started + +Welcome to our documentation! This guide will help you get started. + +## Installation + +\`\`\`bash +npm install @readme/markdown +\`\`\` + +## Basic Usage + +Here's a simple example: + +\`\`\`javascript +import { mdxish, renderMdxish } from '@readme/markdown'; + +const markdown = '# Hello World'; +const hast = mdxish(markdown); +const component = renderMdxish(hast); +\`\`\` + +> šŸ“˜ Note +> +> Make sure you have Node.js 18 or higher installed. + +## Features + +| Feature | Description | +|---------|-------------| +| Fast | Optimized for performance | +| Flexible | Supports custom components | +| TypeScript | Full type support | + +## API Reference + +### \`mdxish(markdown, options)\` + +Processes markdown content with MDX syntax support. + +**Parameters:** +- \`markdown\` (string): The markdown content to process +- \`options\` (object): Configuration options + - \`components\`: Custom component definitions + - \`useTailwind\`: Enable Tailwind CSS support + +**Returns:** HAST tree + + +This API is subject to change in future versions. + + +## Examples + +### Simple Text + +Just write plain markdown and it will be rendered. + +### Code Blocks + +\`\`\`typescript +interface Options { + components?: Record; + useTailwind?: boolean; +} +\`\`\` + +### Lists + +1. First item +2. Second item +3. Third item + +- Bullet point +- Another point + - Nested point + - Another nested + +## Conclusion + +That's it! You're ready to start using the library. +`; + +// Generate large document by repeating content +const generateLargeDocument = (repetitions: number): string => { + const sections = [simpleMarkdown, codeBlockMarkdown, tableMarkdown, calloutMarkdown]; + let result = '---\ntitle: Large Document\n---\n\n'; + + for (let i = 0; i < repetitions; i += 1) { + result += `\n## Section ${i + 1}\n\n`; + result += sections[i % sections.length]; + } + + return result; +}; + +const mediumDocument = generateLargeDocument(5); +const largeDocument = generateLargeDocument(20); +const extraLargeDocument = generateLargeDocument(100); +const extraExtraLargeDocument = generateLargeDocument(1000); + +// ============================================================================ +// Benchmark Tests - Parse-only (AST trees) +// ============================================================================ + +describe('Parse-only Benchmarks (AST trees)', () => { + /** + * Basic markdown: headings, paragraphs, emphasis, lists + * Tests baseline parsing performance with minimal transformations. + * Comparing: mdast() (MDAST tree) vs mdxish() (HAST tree) + */ + describe('Simple Markdown', () => { + bench('mdxish parse (HAST)', () => { + mdxish(simpleMarkdown); + }); + + bench('mdast parse (MDAST)', () => { + mdast(simpleMarkdown); + }); + }); + + /** + * Fenced code blocks with language hints. + * Tests syntax highlighting metadata handling. + */ + describe('Code Blocks', () => { + bench('mdxish parse (HAST)', () => { + mdxish(codeBlockMarkdown); + }); + + bench('mdast parse (MDAST)', () => { + mdast(codeBlockMarkdown); + }); + }); + + /** + * GFM tables with alignment. + * MDXish has optimized table handling vs MDX's JSX conversion. + */ + describe('Tables', () => { + bench('mdxish parse (HAST)', () => { + mdxish(tableMarkdown); + }); + + bench('mdast parse (MDAST)', () => { + mdast(tableMarkdown); + }); + }); + + /** + * Emoji-based callouts and MDX Callout components. + * Tests custom block transformation pipelines. + */ + describe('Callouts', () => { + bench('mdxish parse (HAST)', () => { + mdxish(calloutMarkdown); + }); + + bench('mdast parse (MDAST)', () => { + mdast(calloutMarkdown); + }); + }); + + /** + * Custom JSX components (Image, Code, Tabs). + * Tests component detection and rendering overhead. + */ + describe('Components', () => { + bench('mdxish parse (HAST)', () => { + mdxish(componentMarkdown); + }); + + bench('mdast parse (MDAST)', () => { + mdast(componentMarkdown); + }); + }); + + /** + * Real-world documentation: frontmatter, code, tables, callouts, lists. + * Most representative of actual usage patterns. + */ + describe('Mixed Content', () => { + bench('mdxish parse (HAST)', () => { + mdxish(mixedContentMarkdown); + }); + + bench('mdast parse (MDAST)', () => { + mdast(mixedContentMarkdown); + }); + }); + + /** + * ~5 repeated sections (~2KB). + * Tests scaling behavior with moderate document size. + */ + describe('Medium Document (~5 sections)', () => { + bench('mdxish parse (HAST)', () => { + mdxish(mediumDocument); + }); + + bench('mdast parse (MDAST)', () => { + mdast(mediumDocument); + }); + }); + + /** + * ~20 repeated sections (~8KB). + * Tests performance at scale for long-form content. + */ + describe('Large Document (~20 sections)', () => { + bench('mdxish parse (HAST)', () => { + mdxish(largeDocument); + }); + + bench('mdast parse (MDAST)', () => { + mdast(largeDocument); + }); + }); + + /** + * ~100 repeated sections (~40KB). + * Stress test for very large documents. + */ + describe('Extra Large Document (~100 sections)', () => { + bench('mdxish parse (HAST)', () => { + mdxish(extraLargeDocument); + }); + + bench('mdast parse (MDAST)', () => { + mdast(extraLargeDocument); + }); + }); + + /** + * ~1000 repeated sections (~400KB). + * Extreme stress test for very large documents. + */ + describe('Extra Extra Large Document (~1000 sections)', () => { + bench('mdxish parse (HAST)', () => { + mdxish(extraExtraLargeDocument); + }); + + bench('mdast parse (MDAST)', () => { + mdast(extraExtraLargeDocument); + }); + }); +}); + +// ============================================================================ +// Benchmark Tests - String Outputs +// ============================================================================ + +describe('String Output Benchmarks', () => { + /** + * Basic markdown: headings, paragraphs, emphasis, lists + * Tests full pipeline: parse + transform + stringify + * Comparing: mdx(mdast()) (MDX string) vs mix() (HTML string) + */ + describe('Simple Markdown', () => { + bench('mdxish string (HTML via mix)', () => { + mix(simpleMarkdown); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(simpleMarkdown); + mdx(tree); + }); + }); + + /** + * Fenced code blocks with language hints. + * Tests syntax highlighting metadata handling. + */ + describe('Code Blocks', () => { + bench('mdxish string (HTML via mix)', () => { + mix(codeBlockMarkdown); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(codeBlockMarkdown); + mdx(tree); + }); + }); + + /** + * GFM tables with alignment. + * MDXish has optimized table handling vs MDX's JSX conversion. + */ + describe('Tables', () => { + bench('mdxish string (HTML via mix)', () => { + mix(tableMarkdown); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(tableMarkdown); + mdx(tree); + }); + }); + + /** + * Emoji-based callouts and MDX Callout components. + * Tests custom block transformation pipelines. + */ + describe('Callouts', () => { + bench('mdxish string (HTML via mix)', () => { + mix(calloutMarkdown); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(calloutMarkdown); + mdx(tree); + }); + }); + + /** + * Custom JSX components (Image, Code, Tabs). + * Tests component detection and rendering overhead. + */ + describe('Components', () => { + bench('mdxish string (HTML via mix)', () => { + mix(componentMarkdown); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(componentMarkdown); + mdx(tree); + }); + }); + + /** + * Real-world documentation: frontmatter, code, tables, callouts, lists. + * Most representative of actual usage patterns. + */ + describe('Mixed Content', () => { + bench('mdxish string (HTML via mix)', () => { + mix(mixedContentMarkdown); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(mixedContentMarkdown); + mdx(tree); + }); + }); + + /** + * ~5 repeated sections (~2KB). + * Tests scaling behavior with moderate document size. + */ + describe('Medium Document (~5 sections)', () => { + bench('mdxish string (HTML via mix)', () => { + mix(mediumDocument); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(mediumDocument); + mdx(tree); + }); + }); + + /** + * ~20 repeated sections (~8KB). + * Tests performance at scale for long-form content. + */ + describe('Large Document (~20 sections)', () => { + bench('mdxish string (HTML via mix)', () => { + mix(largeDocument); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(largeDocument); + mdx(tree); + }); + }); + + /** + * ~100 repeated sections (~40KB). + * Stress test for very large documents. + */ + describe('Extra Large Document (~100 sections)', () => { + bench('mdxish string (HTML via mix)', () => { + mix(extraLargeDocument); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(extraLargeDocument); + mdx(tree); + }); + }); + + /** + * ~1000 repeated sections (~400KB). + * Extreme stress test for very large documents. + */ + describe('Extra Extra Large Document (~1000 sections)', () => { + bench('mdxish string (HTML via mix)', () => { + mix(extraExtraLargeDocument); + }); + + bench('mdx string (MDX via mdast)', () => { + const tree = mdast(extraExtraLargeDocument); + mdx(tree); + }); + }); +}); + +/** + * MDXish-specific option benchmarks. + * Tests overhead of optional features like Tailwind CSS processing. + * Note: These compare MDXish configurations, not MDX vs MDXish. + */ +describe('MDXish Configuration Benchmarks', () => { + describe('Tailwind Impact', () => { + bench('mdxish - with Tailwind enabled', () => { + mdxish(mixedContentMarkdown, { useTailwind: true }); + }); + + bench('mdxish - without Tailwind (default)', () => { + mdxish(mixedContentMarkdown, {}); + }); + }); +}); diff --git a/__tests__/compilers.test.ts b/__tests__/compilers.test.ts index 3b759061e..3861b22c0 100644 --- a/__tests__/compilers.test.ts +++ b/__tests__/compilers.test.ts @@ -1,4 +1,6 @@ -import { mdast, mdx } from '../index'; +import type { Element } from 'hast'; + +import { mdast, mdx, mdxish } from '../index'; describe('ReadMe Flavored Blocks', () => { it('Embed', () => { @@ -15,3 +17,28 @@ describe('ReadMe Flavored Blocks', () => { `); }); }); + +describe('mdxish ReadMe Flavored Blocks', () => { + it('Embed', () => { + const txt = '[Embedded meta links.](https://nyti.me/s/gzoa2xb2v3 "@embed")'; + const hast = mdxish(txt); + const embed = hast.children[0] as Element; + + expect(embed.type).toBe('element'); + expect(embed.tagName).toBe('embed'); + expect(embed.properties.url).toBe('https://nyti.me/s/gzoa2xb2v3'); + expect(embed.properties.title).toBe('Embedded meta links.'); + }); + + it('Emojis', () => { + const hast = mdxish(':smiley:'); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + // gemojiTransformer converts :smiley: to 😃 + const textNode = paragraph.children[0]; + expect(textNode.type).toBe('text'); + expect('value' in textNode && textNode.value).toBe('😃'); + }); +}); diff --git a/__tests__/compilers/callout.test.ts b/__tests__/compilers/callout.test.ts index 51caa70f6..a36744f5e 100644 --- a/__tests__/compilers/callout.test.ts +++ b/__tests__/compilers/callout.test.ts @@ -1,6 +1,7 @@ +import type { Element } from 'hast'; import type { Root } from 'mdast'; -import { mdast, mdx } from '../../index'; +import { mdast, mdx, mdxish } from '../../index'; describe('callouts compiler', () => { it('compiles callouts', () => { @@ -156,3 +157,134 @@ describe('callouts compiler', () => { expect(mdx(mockAst as Root).trim()).toBe(markdown); }); }); + +describe('mdxish callout compiler', () => { + it('compiles callouts', () => { + const markdown = `> 🚧 It works! +> +> And, it no longer deletes your content! +`; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('🚧'); + expect(callout.properties?.theme).toBe('warn'); + expect(callout.children).toHaveLength(2); // h3 and p + }); + + it('compiles callouts with no heading', () => { + const markdown = `> 🚧 +> +> And, it no longer deletes your content! +`; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('🚧'); + expect(callout.properties?.empty).toBe(''); + expect(callout.properties?.theme).toBe('warn'); + }); + + it('compiles callouts with no heading or body', () => { + const markdown = `> 🚧 +`; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('🚧'); + expect(callout.properties?.empty).toBe(''); + expect(callout.properties?.theme).toBe('warn'); + }); + + it('compiles callouts with no heading or body and no new line at the end', () => { + const markdown = '> ā„¹ļø'; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('ā„¹ļø'); + expect(callout.properties?.empty).toBe(''); + expect(callout.properties?.theme).toBe('info'); + }); + + it('compiles callouts with markdown in the heading', () => { + const markdown = `> 🚧 It **works**! +> +> And, it no longer deletes your content! +`; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('🚧'); + expect(callout.properties?.theme).toBe('warn'); + + const heading = callout.children[0] as Element; + expect(heading.tagName).toBe('h3'); + expect(heading.properties?.id).toBe('it-works'); + }); + + it('compiles callouts with paragraphs', () => { + const markdown = `> 🚧 It **works**! +> +> And... +> +> it correctly compiles paragraphs. :grimace: +`; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('🚧'); + expect(callout.properties?.theme).toBe('warn'); + expect(callout.children.length).toBeGreaterThan(1); // heading + multiple paragraphs + }); + + it('compiles callouts with icons + theme', () => { + const markdown = ` + + test +`.trim(); + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('fad fa-wagon-covered'); + expect(callout.properties?.theme).toBe('warn'); + }); + + it('compiles a callout with only a theme set', () => { + const markdown = '> 🚧 test'; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('🚧'); + expect(callout.properties?.theme).toBe('warn'); + + const heading = callout.children[0] as Element; + expect(heading.tagName).toBe('h3'); + }); + + it('compiles a callout with only an icon set', () => { + const markdown = '> 🚧 test'; + + const hast = mdxish(markdown); + const callout = hast.children[0] as Element; + + expect(callout.tagName).toBe('Callout'); + expect(callout.properties?.icon).toBe('🚧'); + expect(callout.properties?.theme).toBe('warn'); // defaults based on icon + }); +}); diff --git a/__tests__/compilers/code-tabs.test.js b/__tests__/compilers/code-tabs.test.js index 1d3931b93..9425c51eb 100644 --- a/__tests__/compilers/code-tabs.test.js +++ b/__tests__/compilers/code-tabs.test.js @@ -1,4 +1,4 @@ -import { mdast, mdx } from '../../index'; +import { mdast, mdx, mdxish } from '../../index'; describe('code-tabs compiler', () => { it('compiles code tabs', () => { @@ -41,3 +41,65 @@ I should stay here expect(mdx(mdast(markdown))).toBe(markdown); }); }); + +describe('mdxish code-tabs compiler', () => { + it('compiles code tabs', () => { + const markdown = `\`\`\` +const works = true; +\`\`\` +\`\`\` +const cool = true; +\`\`\` +`; + + const hast = mdxish(markdown); + // Code blocks should be grouped into CodeTabs + const firstChild = hast.children[0]; + + expect(firstChild.type).toBe('element'); + expect(firstChild.tagName).toBe('CodeTabs'); + expect(firstChild.children).toHaveLength(2); // Two code blocks + }); + + it('compiles code tabs with metadata', () => { + const markdown = `\`\`\`js Testing +const works = true; +\`\`\` +\`\`\`js +const cool = true; +\`\`\` +`; + + const hast = mdxish(markdown); + const firstChild = hast.children[0]; + + expect(firstChild.type).toBe('element'); + expect(firstChild.tagName).toBe('CodeTabs'); + expect(firstChild.children).toHaveLength(2); // Two code blocks + }); + + it("doesnt't mess with joining other blocks", () => { + const markdown = `\`\`\` +const works = true; +\`\`\` +\`\`\` +const cool = true; +\`\`\` + +## Hello! + +I should stay here +`; + + const hast = mdxish(markdown); + // CodeTabs should be first + const firstChild = hast.children[0]; + expect(firstChild.type).toBe('element'); + expect(firstChild.tagName).toBe('CodeTabs'); + + // Then heading + const heading = hast.children.find(c => c.type === 'element' && c.tagName === 'h2'); + expect(heading).toBeDefined(); + expect(heading.tagName).toBe('h2'); + }); +}); diff --git a/__tests__/compilers/compatability.test.tsx b/__tests__/compilers/compatability.test.tsx index 451dde79c..21239d63a 100644 --- a/__tests__/compilers/compatability.test.tsx +++ b/__tests__/compilers/compatability.test.tsx @@ -1,11 +1,13 @@ +import type { CustomComponents } from '../../types'; +import type { Element } from 'hast'; + import fs from 'node:fs'; import { render, screen } from '@testing-library/react'; -import React from 'react'; import { vi } from 'vitest'; -import { mdx, compile, run } from '../../index'; +import { mdx, compile, run, mdxish } from '../../index'; import { migrate } from '../helpers'; describe('compatability with RDMD', () => { @@ -507,3 +509,74 @@ ${JSON.stringify( `); }); }); + +describe('mdxish compatability with RDMD', () => { + it('processes Glossary component', () => { + const markdown = 'parliament'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + const glossary = paragraph.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + expect(glossary.type).toBe('element'); + expect(glossary.tagName).toBe('Glossary'); + const textNode = glossary.children[0]; + expect(textNode.type).toBe('text'); + expect('value' in textNode && textNode.value).toBe('parliament'); + }); + + it('processes Image component with attributes and caption', () => { + const markdown = ` +hello **cat** +`; + + const hast = mdxish(markdown.trim()); + const image = hast.children[0] as Element; + + expect(image.type).toBe('element'); + expect(image.tagName).toBe('img'); + expect(image.properties.align).toBe('center'); + expect(image.properties.width).toBe('300px'); + expect(image.properties.src).toBe('https://drastik.ch/wp-content/uploads/2023/06/blackcat.gif'); + expect(image.properties.border).toBe('true'); + // Caption text should be processed (but Image components don't support captions in mdxish) + }); + + it('processes Embed component with attributes', () => { + const markdown = + ''; + + const hast = mdxish(markdown); + const embed = hast.children[0] as Element; + + expect(embed.type).toBe('element'); + expect(embed.tagName).toBe('embed'); + expect(embed.properties.url).toBe('https://cdn.shopify.com/s/files/1/0711/5132/1403/files/BRK0502-034178M.pdf'); + expect(embed.properties.title).toBe('iframe'); + expect(embed.properties.typeOfEmbed).toBe('iframe'); + expect(embed.properties.height).toBe('300px'); + expect(embed.properties.width).toBe('100%'); + expect(embed.properties.iframe).toBe('true'); + }); + + it('processes reusable content component', () => { + const markdown = ''; + + const hast = mdxish(markdown, { + components: { + Parliament: '# Parliament', + }, + } as unknown as CustomComponents); + + // Component is recognized and preserved in HAST + expect(hast.children.length).toBeGreaterThan(0); + const component = hast.children.find( + child => child.type === 'element' && (child as Element).tagName === 'Parliament', + ) as Element | undefined; + expect(component).toBeDefined(); + expect(component?.type).toBe('element'); + expect(component?.tagName).toBe('Parliament'); + }); +}); diff --git a/__tests__/compilers/escape.test.js b/__tests__/compilers/escape.test.js index 0fd63e00a..6d18c7822 100644 --- a/__tests__/compilers/escape.test.js +++ b/__tests__/compilers/escape.test.js @@ -1,4 +1,4 @@ -import { mdast, mdx } from '../../index'; +import { mdast, mdx, mdxish } from '../../index'; describe('escape compiler', () => { it('handles escapes', () => { @@ -7,3 +7,17 @@ describe('escape compiler', () => { expect(mdx(mdast(txt))).toBe('\\¶\n'); }); }); + +describe('mdxish escape compiler', () => { + it('handles escapes', () => { + const txt = '\\¶'; + + const hast = mdxish(txt); + const paragraph = hast.children[0]; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + expect(paragraph.children[0].type).toBe('text'); + expect(paragraph.children[0].value).toBe('¶'); + }); +}); diff --git a/__tests__/compilers/gemoji.test.ts b/__tests__/compilers/gemoji.test.ts index 1398aae57..d4765e2df 100644 --- a/__tests__/compilers/gemoji.test.ts +++ b/__tests__/compilers/gemoji.test.ts @@ -1,4 +1,6 @@ -import { mdast, mdx } from '../../index'; +import type { Element } from 'hast'; + +import { mdast, mdx, mdxish } from '../../index'; describe('gemoji compiler', () => { it('should compile back to a shortcode', () => { @@ -19,3 +21,48 @@ describe('gemoji compiler', () => { expect(mdx(mdast(markdown)).trimEnd()).toStrictEqual(markdown); }); }); + +describe('mdxish gemoji compiler', () => { + it('should convert gemojis to emoji nodes', () => { + const markdown = 'This is a gemoji :joy:.'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + + // Gemoji should be converted to an emoji node or image + const hasEmoji = paragraph.children.some( + child => child.type === 'element' && (child.tagName === 'img' || child.tagName === 'i'), + ); + expect( + hasEmoji || + paragraph.children.some(child => child.type === 'text' && 'value' in child && child.value?.includes('šŸ˜‚')), + ).toBeTruthy(); + }); + + it('should convert owlmoji to image nodes', () => { + const markdown = ':owlbert:'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const image = paragraph.children.find(child => child.type === 'element' && child.tagName === 'img') as Element; + expect(image).toBeDefined(); + expect(image.properties.alt).toBe(':owlbert:'); + }); + + it('should convert font-awesome emojis to icon elements', () => { + const markdown = ':fa-readme:'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const icon = paragraph.children.find(child => child.type === 'element' && child.tagName === 'i') as Element; + expect(icon).toBeDefined(); + expect(Array.isArray(icon.properties.className) ? icon.properties.className : []).toContain('fa-readme'); + }); +}); diff --git a/__tests__/compilers/gfm.test.ts b/__tests__/compilers/gfm.test.ts new file mode 100644 index 000000000..8965c952f --- /dev/null +++ b/__tests__/compilers/gfm.test.ts @@ -0,0 +1,276 @@ +import type { Element } from 'hast'; + +import { mdast, mdx, mdxish } from '../../index'; + +describe('GFM strikethrough', () => { + describe('mdx compiler', () => { + it('compiles single strikethrough to markdown syntax', () => { + const markdown = 'This is ~~strikethrough~~ text'; + expect(mdx(mdast(markdown))).toContain('~~'); + }); + + it('compiles multiple strikethrough instances to markdown syntax', () => { + const markdown = '~~one~~ and ~~two~~'; + expect(mdx(mdast(markdown))).toContain('~~'); + }); + + it('compiles strikethrough with other formatting to markdown syntax', () => { + const markdown = 'Text with ~~strike~~ and **bold**'; + expect(mdx(mdast(markdown))).toContain('~~'); + }); + }); + + describe('mdxish compiler', () => { + it('processes single strikethrough', () => { + const markdown = 'This is ~~strikethrough~~ text'; + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + + const deletions = paragraph.children.filter( + child => child.type === 'element' && child.tagName === 'del', + ) as Element[]; + + expect(deletions.length).toBeGreaterThan(0); + deletions.forEach(deletion => { + expect(deletion.tagName).toBe('del'); + }); + }); + + it('processes multiple strikethrough instances', () => { + const markdown = '~~one~~ and ~~two~~'; + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + const deletions = paragraph.children.filter( + child => child.type === 'element' && child.tagName === 'del', + ) as Element[]; + + expect(deletions).toHaveLength(2); + }); + + it('processes strikethrough with other formatting', () => { + const markdown = 'Text with ~~strike~~ and **bold**'; + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + const deletions = paragraph.children.filter( + child => child.type === 'element' && child.tagName === 'del', + ) as Element[]; + + expect(deletions.length).toBeGreaterThan(0); + }); + }); +}); + +describe('GFM task lists', () => { + describe('mdx compiler', () => { + it('compiles basic task list with checked and unchecked items to markdown syntax', () => { + const markdown = '- [ ] unchecked\n- [x] checked'; + const output = mdx(mdast(markdown)); + // remark-stringify normalizes list markers to * + expect(output).toContain('* [ ]'); + expect(output).toContain('* [x]'); + }); + + it('compiles nested task lists to markdown syntax', () => { + const markdown = '- [ ] parent\n - [x] child'; + const output = mdx(mdast(markdown)); + expect(output).toContain('* [ ]'); + expect(output).toContain('* [x]'); + }); + + it('compiles multiple task list items to markdown syntax', () => { + const markdown = '- [x] done\n- [ ] todo\n- [x] also done'; + const output = mdx(mdast(markdown)); + expect(output).toContain('* [ ]'); + expect(output).toContain('* [x]'); + }); + }); + + describe('mdxish compiler', () => { + it('processes basic task list with checked and unchecked items', () => { + const markdown = '- [ ] unchecked\n- [x] checked'; + const hast = mdxish(markdown); + const list = hast.children[0] as Element; + + expect(list.type).toBe('element'); + expect(list.tagName).toBe('ul'); + expect(list.properties?.className).toContain('contains-task-list'); + + const listItems = list.children.filter(child => child.type === 'element' && child.tagName === 'li') as Element[]; + + expect(listItems).toHaveLength(2); + + // Verify task list items have checkboxes + listItems.forEach(item => { + expect(item.properties?.className).toContain('task-list-item'); + const checkbox = item.children.find(child => child.type === 'element' && child.tagName === 'input') as + | Element + | undefined; + expect(checkbox).toBeDefined(); + expect(checkbox?.properties?.type).toBe('checkbox'); + }); + }); + + it('processes nested task lists', () => { + const markdown = '- [ ] parent\n - [x] child'; + const hast = mdxish(markdown); + const list = hast.children[0] as Element; + + expect(list.tagName).toBe('ul'); + expect(list.properties?.className).toContain('contains-task-list'); + + const listItems = list.children.filter(child => child.type === 'element' && child.tagName === 'li') as Element[]; + + expect(listItems.length).toBeGreaterThanOrEqual(1); + + const parentItem = listItems[0]; + expect(parentItem.properties?.className).toContain('task-list-item'); + + const nestedList = parentItem.children.find(child => child.type === 'element' && child.tagName === 'ul') as + | Element + | undefined; + expect(nestedList).toBeDefined(); + expect(nestedList?.properties?.className).toContain('contains-task-list'); + }); + + it('processes multiple task list items', () => { + const markdown = '- [x] done\n- [ ] todo\n- [x] also done'; + const hast = mdxish(markdown); + const list = hast.children[0] as Element; + + expect(list.tagName).toBe('ul'); + expect(list.properties?.className).toContain('contains-task-list'); + + const listItems = list.children.filter(child => child.type === 'element' && child.tagName === 'li') as Element[]; + + expect(listItems).toHaveLength(3); + }); + }); +}); + +describe('GFM autolinks', () => { + describe('mdx compiler', () => { + it('compiles URL autolink to markdown syntax', () => { + const markdown = 'Visit https://example.com for more info'; + const output = mdx(mdast(markdown)); + expect(output).toContain('https://example.com'); + }); + + it('compiles email autolink to markdown syntax', () => { + const markdown = 'Contact us at test@example.com'; + const output = mdx(mdast(markdown)); + expect(output).toContain('test@example.com'); + }); + + it('compiles multiple URL autolinks to markdown syntax', () => { + const markdown = 'See http://example.org and https://test.com'; + const output = mdx(mdast(markdown)); + expect(output).toContain('http://example.org'); + }); + }); + + describe('mdxish compiler', () => { + it('processes URL autolink', () => { + const markdown = 'Visit https://example.com for more info'; + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + const link = paragraph.children.find(child => child.type === 'element' && child.tagName === 'a') as + | Element + | undefined; + + expect(link).toBeDefined(); + expect(link?.properties?.href).toBe('https://example.com'); + + const textNode = link?.children.find(child => child.type === 'text'); + expect(textNode && 'value' in textNode && textNode.value).toBe('https://example.com'); + }); + + it('processes email autolink', () => { + const markdown = 'Contact us at test@example.com'; + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + const link = paragraph.children.find(child => child.type === 'element' && child.tagName === 'a') as + | Element + | undefined; + + expect(link).toBeDefined(); + expect(link?.properties?.href).toBe('mailto:test@example.com'); + }); + + it('processes multiple URL autolinks', () => { + const markdown = 'See http://example.org and https://test.com'; + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + const links = paragraph.children.filter(child => child.type === 'element' && child.tagName === 'a') as Element[]; + + expect(links.length).toBeGreaterThanOrEqual(1); + expect(links[0]?.properties?.href).toBe('http://example.org'); + }); + }); +}); + +describe('GFM footnotes', () => { + describe('mdx compiler', () => { + it('compiles single footnote to markdown syntax', () => { + const markdown = 'Text with footnote[^1]\n\n[^1]: Footnote definition'; + const output = mdx(mdast(markdown)); + expect(output).toContain('[^1]'); + expect(output).toContain('Footnote definition'); + }); + + it('compiles multiple footnotes to markdown syntax', () => { + const markdown = 'First[^1] and second[^2]\n\n[^1]: First note\n[^2]: Second note'; + const output = mdx(mdast(markdown)); + expect(output).toContain('[^1]'); + expect(output).toContain('[^2]'); + }); + }); + + describe('mdxish compiler', () => { + it('processes single footnote', () => { + const markdown = 'Text with footnote[^1]\n\n[^1]: Footnote definition'; + const hast = mdxish(markdown); + + const paragraph = hast.children.find(child => child.type === 'element' && child.tagName === 'p') as + | Element + | undefined; + expect(paragraph).toBeDefined(); + + const footnoteRef = paragraph?.children.find(child => child.type === 'element' && child.tagName === 'sup') as + | Element + | undefined; + expect(footnoteRef).toBeDefined(); + + const footnoteDef = hast.children.find(child => child.type === 'element' && child.tagName === 'section') as + | Element + | undefined; + expect(footnoteDef).toBeDefined(); + }); + + it('processes multiple footnotes', () => { + const markdown = 'First[^1] and second[^2]\n\n[^1]: First note\n[^2]: Second note'; + const hast = mdxish(markdown); + + const paragraph = hast.children.find(child => child.type === 'element' && child.tagName === 'p') as + | Element + | undefined; + expect(paragraph).toBeDefined(); + + const footnoteRefs = paragraph?.children.filter(child => child.type === 'element' && child.tagName === 'sup') as + | Element[] + | undefined; + + expect(footnoteRefs?.length).toBeGreaterThanOrEqual(2); + + const footnoteDef = hast.children.find(child => child.type === 'element' && child.tagName === 'section') as + | Element + | undefined; + expect(footnoteDef).toBeDefined(); + }); + }); +}); diff --git a/__tests__/compilers/html-block.test.ts b/__tests__/compilers/html-block.test.ts index 7b62977fb..d60fd2b20 100644 --- a/__tests__/compilers/html-block.test.ts +++ b/__tests__/compilers/html-block.test.ts @@ -1,4 +1,15 @@ -import { mdast, mdx } from '../../index'; +import type { Element } from 'hast'; + +import { mdast, mdx, mdxish } from '../../index'; + +function findHTMLBlock(element: Element): Element | undefined { + if (element.tagName === 'HTMLBlock' || element.tagName === 'html-block') { + return element; + } + return element.children + .filter((child): child is Element => child.type === 'element') + .reduce((found, child) => found || findHTMLBlock(child), undefined); +} describe('html-block compiler', () => { it('compiles html blocks within containers', () => { @@ -40,3 +51,138 @@ const foo = () => { expect(mdx(mdast(markdown)).trim()).toBe(expected.trim()); }); }); + +describe('mdxish html-block compiler', () => { + it('compiles html blocks within containers', () => { + const markdown = ` +> 🚧 It compiles! +> +> {\` +> Hello, World! +> \`} +`; + + const hast = mdxish(markdown.trim()); + const callout = hast.children[0] as Element; + + expect(callout.type).toBe('element'); + expect(callout.tagName).toBe('Callout'); + + // Find HTMLBlock within the callout + const htmlBlock = findHTMLBlock(callout); + expect(htmlBlock).toBeDefined(); + expect(htmlBlock?.tagName).toBe('HTMLBlock'); + }); + + it('compiles html blocks preserving newlines', () => { + const markdown = ` +{\` +

+const foo = () => {
+  const bar = {
+    baz: 'blammo'
+  }
+
+  return bar
+}
+
+\`}
+`; + + const hast = mdxish(markdown.trim()); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const htmlBlock = findHTMLBlock(paragraph); + expect(htmlBlock).toBeDefined(); + expect(htmlBlock?.tagName).toBe('HTMLBlock'); + }); + + it('adds newlines for readability', () => { + const markdown = '{`

Hello, World!

`}
'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const htmlBlock = findHTMLBlock(paragraph); + expect(htmlBlock).toBeDefined(); + expect(htmlBlock?.tagName).toBe('HTMLBlock'); + }); + + it('unescapes backticks in HTML content', () => { + const markdown = '{`\\`example\\``}'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const htmlBlock = findHTMLBlock(paragraph); + expect(htmlBlock).toBeDefined(); + expect(htmlBlock?.tagName).toBe('HTMLBlock'); + + // Verify that escaped backticks \` are unescaped to ` in the HTML + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toBeDefined(); + expect(htmlProp).toContain('`example`'); + expect(htmlProp).not.toContain('\\`'); + }); + + it('passes safeMode property correctly', () => { + // Test with both JSX expression and string syntax + const markdown = '{`

Content

`}
'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const htmlBlock = findHTMLBlock(paragraph); + expect(htmlBlock).toBeDefined(); + + const allProps = htmlBlock?.properties; + expect(allProps).toBeDefined(); + + const safeMode = allProps?.safeMode; + expect(safeMode).toBe('true'); + + // Verify that html property is still present (for safeMode to render as escaped text) + const htmlProp = allProps?.html as string; + expect(htmlProp).toBeDefined(); + expect(htmlProp).toContain(''); + expect(htmlProp).toContain('

Content

'); + }); + + it('should handle template literal with variables', () => { + // eslint-disable-next-line quotes + const markdown = `{\`const x = \${variable}\`}`; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const htmlBlock = findHTMLBlock(paragraph); + expect(htmlBlock).toBeDefined(); + // eslint-disable-next-line no-template-curly-in-string + expect(htmlBlock?.properties?.html).toBe('const x = ${variable}'); + }); + + it('should handle nested template literals', () => { + // Use a regular string to avoid nested template literal syntax error + // The content should be:
```javascript\nconst x = 1;\n```
+ const markdown = '{`
\\`\\`\\`javascript\\nconst x = 1;\\n\\`\\`\\`
`}
'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + const htmlBlock = findHTMLBlock(paragraph); + expect(htmlBlock).toBeDefined(); + + // Verify that the HTML content is preserved correctly with newlines + const htmlProp = htmlBlock?.properties?.html as string; + expect(htmlProp).toBeDefined(); + + // The expected content should have triple backticks + expect(htmlProp).toBe('
```javascript\nconst x = 1;\n```
'); + }); +}); diff --git a/__tests__/compilers/images.test.ts b/__tests__/compilers/images.test.ts index 38e68e479..63fc2c48c 100644 --- a/__tests__/compilers/images.test.ts +++ b/__tests__/compilers/images.test.ts @@ -1,4 +1,6 @@ -import { mdast, mdx } from '../../index'; +import type { Element } from 'hast'; + +import { mdast, mdx, mdxish } from '../../index'; describe('image compiler', () => { it('correctly serializes an image back to markdown', () => { @@ -41,3 +43,86 @@ describe('image compiler', () => { expect(mdx(mdast(doc))).toMatch('![]()'); }); }); + +describe('mdxish image compiler', () => { + it('correctly converts markdown images to img elements', () => { + const txt = '![alt text](/path/to/image.png)'; + + const hast = mdxish(txt); + const image = hast.children[0] as Element; + + // Standalone markdown images are converted directly to img elements (not wrapped in paragraph) + expect(image.type).toBe('element'); + expect(image.tagName).toBe('img'); + expect(image.properties.src).toBe('/path/to/image.png'); + expect(image.properties.alt).toBe('alt text'); + }); + + it('correctly converts inline images to img elements', () => { + const txt = 'Forcing it to be inline: ![alt text](/path/to/image.png)'; + + const hast = mdxish(txt); + const paragraph = hast.children[0] as Element; + const image = paragraph.children.find( + (child): child is Element => child.type === 'element' && child.tagName === 'img', + ) as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + expect(image).toBeDefined(); + expect(image.properties.src).toBe('/path/to/image.png'); + expect(image.properties.alt).toBe('alt text'); + }); + + it('correctly converts Image component with attributes', () => { + const doc = 'alt text'; + + const hast = mdxish(doc); + const image = hast.children[0] as Element; + + expect(image.type).toBe('element'); + expect(image.tagName).toBe('img'); + expect(image.properties.src).toBe('/path/to/image.png'); + expect(image.properties.width).toBe('200px'); + expect(image.properties.height).toBe('150px'); + expect(image.properties.alt).toBe('alt text'); + }); + + it('handles Image component with border attribute', () => { + const doc = ''; + + const hast = mdxish(doc); + const image = hast.children[0] as Element; + + expect(image.type).toBe('element'); + expect(image.tagName).toBe('img'); + expect(image.properties.src).toBe('/path/to/image.png'); + expect(image.properties.border).toBe('true'); + expect(image.properties.alt).toBe(''); + }); + + it('correctly converts Image component with border={false} to markdown-style image', () => { + const doc = ''; + + const hast = mdxish(doc); + const image = hast.children[0] as Element; + + // Image component with border={false} is converted directly to img (not wrapped in paragraph) + expect(image.type).toBe('element'); + expect(image.tagName).toBe('img'); + expect(image.properties.src).toBe('/path/to/image.png'); + expect(image.properties.border).toBe('false'); + }); + + it('correctly converts Image component with border={true} to Image component', () => { + const doc = ''; + + const hast = mdxish(doc); + const image = hast.children[0] as Element; + + expect(image.type).toBe('element'); + expect(image.tagName).toBe('img'); + expect(image.properties.src).toBe('/path/to/image.png'); + expect(image.properties.border).toBe('true'); + }); +}); diff --git a/__tests__/compilers/links.test.ts b/__tests__/compilers/links.test.ts index 917ec700a..a70bd332e 100644 --- a/__tests__/compilers/links.test.ts +++ b/__tests__/compilers/links.test.ts @@ -1,4 +1,6 @@ -import { mdast, mdx } from '../../index'; +import type { Element } from 'hast'; + +import { mdast, mdx, mdxish } from '../../index'; describe('link compiler', () => { it('compiles links without extra attributes', () => { @@ -13,3 +15,40 @@ describe('link compiler', () => { expect(mdx(mdast(markdown)).trim()).toBe(markdown); }); }); + +describe('mdxish link compiler', () => { + it('compiles links without extra attributes', () => { + const markdown = 'ReadMe'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + const anchor = paragraph.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + expect(anchor.type).toBe('element'); + expect(anchor.tagName).toBe('Anchor'); + expect(anchor.properties.href).toBe('https://readme.com'); + const textNode = anchor.children[0]; + expect(textNode.type).toBe('text'); + expect('value' in textNode && textNode.value).toBe('ReadMe'); + }); + + it('compiles links with extra attributes', () => { + const markdown = 'ReadMe'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + const anchor = paragraph.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + expect(anchor.type).toBe('element'); + expect(anchor.tagName).toBe('Anchor'); + expect(anchor.properties.href).toBe('https://readme.com'); + expect(anchor.properties.target).toBe('_blank'); + const textNode = anchor.children[0]; + expect(textNode.type).toBe('text'); + expect('value' in textNode && textNode.value).toBe('ReadMe'); + }); +}); diff --git a/__tests__/compilers/plain.test.ts b/__tests__/compilers/plain.test.ts index f6ce25357..037ff5361 100644 --- a/__tests__/compilers/plain.test.ts +++ b/__tests__/compilers/plain.test.ts @@ -1,6 +1,7 @@ +import type { Element } from 'hast'; import type { Paragraph, Root, RootContent, Table } from 'mdast'; -import { mdast, mdx } from '../../index'; +import { mdx, mdxish } from '../../index'; describe('plain compiler', () => { it('compiles plain nodes', () => { @@ -146,3 +147,95 @@ describe('plain compiler', () => { `); }); }); + +describe('mdxish plain compiler', () => { + it('preserves text that looks like markdown syntax in paragraphs', () => { + // Plain nodes represent unescaped text - in markdown we'd need to escape or use code + // This test verifies that text content is preserved + const markdown = "`- this is and isn't a list`"; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + const code = paragraph.children[0] as Element; + expect(code.tagName).toBe('code'); + expect(code.children[0].type).toBe('text'); + expect('value' in code.children[0] && code.children[0].value).toContain("this is and isn't a list"); + }); + + it('preserves angle brackets as text content', () => { + const markdown = ''; + + const hast = mdxish(markdown); + // Angle brackets without a valid tag are filtered out by rehypeRaw/rehypeMdxishComponents + // So we expect empty children or no children + expect(hast.children).toHaveLength(0); + }); + + it('preserves text content at root level', () => { + const markdown = "Text that might look like a list: `- this is and isn't a list`"; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + expect(paragraph.children.length).toBeGreaterThan(0); + }); + + it('preserves text content in inline context', () => { + const markdown = 'before `plain` after'; + + const hast = mdxish(markdown); + const paragraph = hast.children[0] as Element; + + expect(paragraph.type).toBe('element'); + expect(paragraph.tagName).toBe('p'); + const textNodes = paragraph.children.filter(child => child.type === 'text'); + expect(textNodes.length).toBeGreaterThan(0); + // Verify we have text before and after + const textValues = textNodes.map(node => ('value' in node ? node.value : '')).join(''); + expect(textValues).toContain('before'); + expect(textValues).toContain('after'); + }); + + it('parses markdown table syntax as table element (GFM supported)', () => { + // Note: mdxish now supports GFM tables via remarkGfm, so markdown table syntax is parsed as table + const markdown = `| Heading 1 | Heading 2 | +| :-------- | :-------- | +| Cell A | Cell B |`; + + const hast = mdxish(markdown); + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table') as Element; + + expect(table).toBeDefined(); + expect(table.type).toBe('element'); + expect(table.tagName).toBe('table'); + + const thead = table.children.find(child => child.type === 'element' && child.tagName === 'thead') as Element; + expect(thead).toBeDefined(); + + const tbody = table.children.find(child => child.type === 'element' && child.tagName === 'tbody') as Element; + expect(tbody).toBeDefined(); + + // Verify table header content + const headerRow = thead.children.find(child => child.type === 'element' && child.tagName === 'tr') as Element; + expect(headerRow).toBeDefined(); + const th = headerRow.children.find(child => child.type === 'element' && child.tagName === 'th') as Element; + expect(th).toBeDefined(); + const headerTextNode = th.children.find(child => child.type === 'text'); + expect(headerTextNode).toBeDefined(); + expect(headerTextNode && 'value' in headerTextNode && headerTextNode.value).toContain('Heading 1'); + + // Verify table body content + const bodyRow = tbody.children.find(child => child.type === 'element' && child.tagName === 'tr') as Element; + expect(bodyRow).toBeDefined(); + const td = bodyRow.children.find(child => child.type === 'element' && child.tagName === 'td') as Element; + expect(td).toBeDefined(); + const cellTextNode = td.children.find(child => child.type === 'text'); + expect(cellTextNode).toBeDefined(); + expect(cellTextNode && 'value' in cellTextNode && cellTextNode.value).toContain('Cell A'); + }); +}); diff --git a/__tests__/compilers/reusable-content.test.js b/__tests__/compilers/reusable-content.test.js index 2aa5b403e..fbf15a31f 100644 --- a/__tests__/compilers/reusable-content.test.js +++ b/__tests__/compilers/reusable-content.test.js @@ -1,6 +1,6 @@ -import { mdast, mdx } from '../../index'; +import { mdast, mdx, mdxish } from '../../index'; -describe.skip('reusable content compiler', () => { +describe('reusable content compiler', () => { it('writes an undefined reusable content block as a tag', () => { const doc = ''; const tree = mdast(doc); @@ -15,7 +15,10 @@ describe.skip('reusable content compiler', () => { const doc = ''; const tree = mdast(doc, { reusableContent: { tags } }); - expect(tree.children[0].children[0].type).toBe('heading'); + // The component remains as mdxJsxFlowElement in the AST + // The expansion happens through injectComponents transformer + expect(tree.children[0].type).toBe('mdxJsxFlowElement'); + expect(tree.children[0].name).toBe('Defined'); expect(mdx(tree)).toMatch(doc); }); @@ -26,7 +29,9 @@ describe.skip('reusable content compiler', () => { const doc = ''; const tree = mdast(doc, { reusableContent: { tags } }); - expect(tree.children[0].children[0].type).toBe('heading'); + // The component remains as mdxJsxFlowElement in the AST + expect(tree.children[0].type).toBe('mdxJsxFlowElement'); + expect(tree.children[0].name).toBe('MyCustomComponent'); expect(mdx(tree)).toMatch(doc); }); @@ -36,9 +41,78 @@ describe.skip('reusable content compiler', () => { Defined: '# Whoa', }; const doc = ''; - const string = mdx(doc, { reusableContent: { tags, serialize: false } }); + // mdx() expects an AST node, not a string, so we need to parse it first + const tree = mdast(doc, { reusableContent: { tags, serialize: false } }); + const string = mdx(tree, { reusableContent: { tags, serialize: false } }); - expect(string).toBe('# Whoa\n'); + // The component remains as a tag even with serialize=false + // Content expansion would happen through injectComponents in a different context + expect(string).toMatch(/ { + it('removes undefined reusable content blocks', () => { + const doc = ''; + + const hast = mdxish(doc); + + // Unknown components are filtered out by rehypeMdxishComponents + expect(hast.children).toHaveLength(0); + }); + + it('processes defined reusable content blocks as components', () => { + const doc = ''; + + const hast = mdxish(doc, { + components: { + Defined: '# Whoa', + }, + }); + + // Component is recognized and preserved in HAST + expect(hast.children.length).toBeGreaterThan(0); + const component = hast.children.find( + child => child.type === 'element' && child.tagName === 'Defined', + ); + expect(component).toBeDefined(); + }); + + it('processes defined reusable content blocks with multiple words as components', () => { + const doc = ''; + + const hast = mdxish(doc, { + components: { + MyCustomComponent: '# Whoa', + }, + }); + + // Component is recognized and preserved in HAST + expect(hast.children.length).toBeGreaterThan(0); + const component = hast.children.find( + child => child.type === 'element' && child.tagName === 'MyCustomComponent', + ); + expect(component).toBeDefined(); + }); + + describe('component expansion', () => { + it('processes component content when provided as markdown string', () => { + // Note: mdxish doesn't automatically expand component content strings + // Components are passed as-is. To expand content, you'd need to process it separately + const doc = ''; + + const hast = mdxish(doc, { + components: { + Defined: '# Whoa', + }, + }); + + const component = hast.children.find( + child => child.type === 'element' && child.tagName === 'Defined', + ); + expect(component).toBeDefined(); + expect(component.type).toBe('element'); }); }); }); diff --git a/__tests__/compilers/tables.test.js b/__tests__/compilers/tables.test.js index 054fa90f7..6029a6e87 100644 --- a/__tests__/compilers/tables.test.js +++ b/__tests__/compilers/tables.test.js @@ -1,6 +1,7 @@ -import { visit, EXIT } from 'unist-util-visit'; +import { EXIT, visit } from 'unist-util-visit'; + +import { mdast, mdx, mdxish } from '../../index'; -import { mdast, mdx } from '../../index'; import { jsxTableWithInlineCodeWithPipe, @@ -407,3 +408,287 @@ describe('table compiler', () => { }); }); }); + +describe('mdxish table compiler', () => { + it('processes Table component with align attribute', () => { + const markdown = ` + + + + + + + + + + + + + + + + +
+ th 1 + šŸ¦‰ + + th 2 + šŸ¦‰ +
+ cell 1 + šŸ¦‰ + + cell 2 + šŸ¦‰ +
+`; + + const hast = mdxish(markdown.trim()); + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table'); + + expect(table).toBeDefined(); + expect(table.type).toBe('element'); + expect(table.tagName).toBe('table'); + + // Verify thead exists + const thead = table.children.find(child => child.type === 'element' && child.tagName === 'thead'); + expect(thead).toBeDefined(); + + // Verify tbody exists + const tbody = table.children.find(child => child.type === 'element' && child.tagName === 'tbody'); + expect(tbody).toBeDefined(); + }); + + it('processes Table component without align attribute', () => { + const markdown = ` + + + + + + + + + + + + + + +
th 1th 2
cell 1cell 2
+`; + + const hast = mdxish(markdown.trim()); + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table'); + + expect(table).toBeDefined(); + expect(table.type).toBe('element'); + expect(table.tagName).toBe('table'); + }); + + it('processes Table component with empty cells', () => { + const markdown = ` + + + + + + + + + + + + + + + + +
col1col2col3
→← empty cell to the left
+`; + + const hast = mdxish(markdown.trim()); + + expect(() => { + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table'); + expect(table).toBeDefined(); + }).not.toThrow(); + }); + + it('processes Table component with inline code containing pipes', () => { + const markdown = ` + + + + + + + + + + + + + + +
force jsx
\`foo | bar\`
+`; + + const hast = mdxish(markdown.trim()); + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table'); + + expect(table).toBeDefined(); + // Backtick-escaped strings in JSX are treated as plain text, not inline code + // Verify the text content with pipes is preserved + const td = table.children + .find(child => child.type === 'element' && child.tagName === 'tbody') + ?.children.find(child => child.type === 'element' && child.tagName === 'tr') + ?.children.find(child => child.type === 'element' && child.tagName === 'td'); + + expect(td).toBeDefined(); + const textNode = td.children.find(child => child.type === 'text'); + expect(textNode).toBeDefined(); + expect(textNode && 'value' in textNode && textNode.value).toContain('foo | bar'); + }); + + it('parses markdown table syntax as table element (GFM supported)', () => { + // Note: mdxish now supports GFM tables via remarkGfm, so markdown table syntax is parsed as table + const markdown = ` +| th 1 | th 2 | +| :----: | :----: | +| cell 1 | cell 2 | +`; + + const hast = mdxish(markdown.trim()); + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table'); + + expect(table).toBeDefined(); + expect(table.type).toBe('element'); + expect(table.tagName).toBe('table'); + + const thead = table.children.find(child => child.type === 'element' && child.tagName === 'thead'); + expect(thead).toBeDefined(); + + const tbody = table.children.find(child => child.type === 'element' && child.tagName === 'tbody'); + expect(tbody).toBeDefined(); + + const th = thead.children + .find(child => child.type === 'element' && child.tagName === 'tr') + ?.children.find(child => child.type === 'element' && child.tagName === 'th'); + expect(th).toBeDefined(); + const textNode = th.children.find(child => child.type === 'text'); + expect(textNode).toBeDefined(); + expect(textNode && 'value' in textNode && textNode.value).toContain('th 1'); + }); + + it('processes JSX tables with markdown components', () => { + const markdown = ` + + + + + + + + + + + + + + + + + +
TypeExample
Bold**Bold text**
Italic*Italic text*
+`; + const hast = mdxish(markdown.trim()); + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table'); + + expect(table).toBeDefined(); + expect(table.type).toBe('element'); + expect(table.tagName).toBe('table'); + + const tbody = table.children.find(child => child.type === 'element' && child.tagName === 'tbody'); + expect(tbody).toBeDefined(); + + const rows = tbody.children.filter(child => child.type === 'element' && child.tagName === 'tr'); + expect(rows).toHaveLength(2); + + // Helper to get text from a cell, optionally through a wrapper element + const getCellText = (cell, wrapperTag) => { + if (wrapperTag) { + const wrapper = cell.children.find(c => c.type === 'element' && c.tagName === wrapperTag); + const text = wrapper?.children.find(c => c.type === 'text'); + return text?.value; + } + const text = cell.children.find(c => c.type === 'text'); + return text?.value; + }; + + // Check first row: Bold | **Bold text** + const boldCells = rows[0].children.filter(child => child.type === 'element' && child.tagName === 'td'); + expect(boldCells).toHaveLength(2); + expect(getCellText(boldCells[0])).toBe('Bold'); + expect(getCellText(boldCells[1], 'strong')).toBe('Bold text'); + + // Check second row: Italic | *Italic text* + const italicCells = rows[1].children.filter(child => child.type === 'element' && child.tagName === 'td'); + expect(italicCells).toHaveLength(2); + expect(getCellText(italicCells[0])).toBe('Italic'); + expect(getCellText(italicCells[1], 'em')).toBe('Italic text'); + }); + + it('processes GFM tables with markdown components', () => { + const markdown = ` +| Feature | Description | +|---------|-------------| +| **Bold** | Text with **emphasis** | +| *Italic* | Text with *emphasis* | +| Normal | Regular text | +`; + + const hast = mdxish(markdown.trim()); + + const table = hast.children.find(child => child.type === 'element' && child.tagName === 'table'); + + expect(table).toBeDefined(); + expect(table.type).toBe('element'); + expect(table.tagName).toBe('table'); + + const tbody = table.children.find(child => child.type === 'element' && child.tagName === 'tbody'); + expect(tbody).toBeDefined(); + + const rows = tbody.children.filter(child => child.type === 'element' && child.tagName === 'tr'); + expect(rows).toHaveLength(3); + + // Helper to get text from a cell, optionally through a wrapper element + const getCellText = (cell, wrapperTag) => { + if (wrapperTag) { + const wrapper = cell.children.find(c => c.type === 'element' && c.tagName === wrapperTag); + const text = wrapper?.children.find(c => c.type === 'text'); + return text?.value; + } + const text = cell.children.find(c => c.type === 'text'); + return text?.value; + }; + + // Check first row: **Bold** | Text with **emphasis** + const boldCells = rows[0].children.filter(child => child.type === 'element' && child.tagName === 'td'); + expect(boldCells).toHaveLength(2); + expect(getCellText(boldCells[0], 'strong')).toBe('Bold'); + expect(getCellText(boldCells[1], 'strong')).toBe('emphasis'); + + // Check second row: *Italic* | Text with *emphasis* + const italicCells = rows[1].children.filter(child => child.type === 'element' && child.tagName === 'td'); + expect(italicCells).toHaveLength(2); + expect(getCellText(italicCells[0], 'em')).toBe('Italic'); + expect(getCellText(italicCells[1], 'em')).toBe('emphasis'); + + // Check third row: Normal | Regular text + const normalCells = rows[2].children.filter(child => child.type === 'element' && child.tagName === 'td'); + expect(normalCells).toHaveLength(2); + expect(getCellText(normalCells[0])).toBe('Normal'); + expect(getCellText(normalCells[1])).toBe('Regular text'); + }); +}); diff --git a/__tests__/compilers/variable.test.ts b/__tests__/compilers/variable.test.ts index 53b322fb3..84017caa8 100644 --- a/__tests__/compilers/variable.test.ts +++ b/__tests__/compilers/variable.test.ts @@ -1,3 +1,9 @@ +import type { Root } from 'hast'; + +import '@testing-library/jest-dom'; +import { render, screen } from '@testing-library/react'; +import React from 'react'; + import * as rmdx from '../../index'; describe('variable compiler', () => { @@ -42,3 +48,53 @@ describe('variable compiler', () => { expect(rmdx.mdx(tree).trim()).toStrictEqual(mdx.trim()); }); }); + +describe('mdxish variable compiler', () => { + it('should handle user variables', () => { + const mdx = ` +Hello {user.name}! + `; + + const variables = { + user: { + name: 'John Doe', + }, + defaults: [], + }; + + const hast = rmdx.mdxish(mdx) as Root; + expect(hast).toBeDefined(); + + const { default: Content } = rmdx.renderMdxish(hast, { variables }); + + render(React.createElement(Content)); + + expect(screen.getByText('John Doe')).toBeInTheDocument(); + }); + + it('should NOT evaluate user variables inside backticks (inline code)', () => { + const mdx = ` +User Variables: **\`{user.name}\`** evaluates to {user.name} + `; + + const variables = { + user: { + name: 'John Doe', + }, + defaults: [], + }; + + const hast = rmdx.mdxish(mdx) as Root; + expect(hast).toBeDefined(); + + const { default: Content } = rmdx.renderMdxish(hast, { variables }); + + render(React.createElement(Content)); + + // The {user.name} OUTSIDE backticks should be evaluated to "John Doe" + expect(screen.getByText('John Doe')).toBeInTheDocument(); + + // The {user.name} INSIDE backticks should remain as literal text + expect(screen.getByText('{user.name}')).toBeInTheDocument(); + }); +}); diff --git a/__tests__/compilers/yaml.test.js b/__tests__/compilers/yaml.test.js index dd30c7f69..057fe0111 100644 --- a/__tests__/compilers/yaml.test.js +++ b/__tests__/compilers/yaml.test.js @@ -1,4 +1,4 @@ -import { mdast, mdx } from '../../index'; +import { mdast, mdx, mix } from '../../index'; describe('yaml compiler', () => { it.skip('correctly writes out yaml', () => { @@ -20,3 +20,21 @@ Document content! `); }); }); + +describe('mix yaml compiler', () => { + it('correctly handles yaml frontmatter', () => { + // NOTE: the '---' MUST be at the ABSOLUTE BEGINNING of the file, adding a space or newline will break the parser + const txt = `--- +title: This is test +author: A frontmatter test +--- + +Document content! + `; + + const html = mix(txt); + expect(html).not.toContain('---'); + expect(html).not.toContain('title: This is test'); + expect(html).toContain('Document content'); + }); +}); diff --git a/__tests__/index.test.js b/__tests__/index.test.js index 1efd402e8..da2c3d1d4 100644 --- a/__tests__/index.test.js +++ b/__tests__/index.test.js @@ -2,7 +2,7 @@ import { render, screen } from '@testing-library/react'; import React, { createElement } from 'react'; import BaseUrlContext from '../contexts/BaseUrl'; -import { run, compile, utils, html as _html, mdast, hast as _hast, plain, mdx, astToPlainText } from '../index'; +import { run, compile, utils, html as _html, mdast, hast as _hast, plain, mdx, mix, astToPlainText } from '../index'; import { options } from '../options'; import { tableFlattening } from '../processor/plugin/table-flattening'; @@ -372,6 +372,19 @@ Lorem ipsum dolor!`; }); }); +describe('export multiple Markdown renderers with mix', () => { + it('renders MD', () => { + const markdown = '# Hello World'; + const html = mix(markdown); + expect(html).toContain(' { + expect(mix('')).toBe(''); + }); +}); + describe.skip('prefix anchors with "section-"', () => { it('should add a section- prefix to heading anchors', () => { expect(_html('# heading')).toMatchSnapshot(); diff --git a/__tests__/lib/mdxish/demo-docs/mdxish.md b/__tests__/lib/mdxish/demo-docs/mdxish.md new file mode 100644 index 000000000..780ad7cfd --- /dev/null +++ b/__tests__/lib/mdxish/demo-docs/mdxish.md @@ -0,0 +1,121 @@ +# MDX-ish Engine (Proposed Loose MDX-Like Syntax) + +A demo doc for the proposed loose "MDX-ish" syntax. Test against this doc (as well as the legacy RDMD and new RMDX docs) to validate that the engine can parse and render our new mixed content syntax. + +## Mixed HTML Content + +
+

This is an HTML Section

+

You can mix HTML directly into your markdown content.

+ This is an orange span element! +
+ +Regular markdown continues after HTML elements without any issues.You can even write loose html, so unclosed tags like `
` or `
` will work! + +
+ +HTML comment blocks should also work without issue. + +## Custom Components + +Custom components and reusable content should be fully supported: + + + +Lorem ipsum dolor sit amet, **consectetur adipiscing elit.** Ut enim ad minim veniam, quis nostrud exercitation ullamco. Excepteur sint occaecat cupidatat non proident! + + + +You should be able to use ourĀ built in components as if they were globals. Here's our "Run in Postman" button, for example: + + + +### Component Composition + +You can nest components inside each other! Here's an `` nested inside a ``, for example: + + + + +This Accordion is nested inside a Card component! + + + + +## Mixed Attribute Syntax + +### Style + +
+ +You can use a JSX-style CSS object to set inline styles. + +
+ +
+ +Or use the standard HTML `[style]` attribute. + +
+ +### Class + +
+ +Using the `className` attribute. + +
+ +
+ +Or just the regular HTML `class` attribute + +
+ + + +## Limited Top-Level JSX + +- Logic: **`{3 * 7 + 11}`** evaluates to {3 * 7 + 11} +- Global Methods: **`{uppercase('hello world')}`** evaluates to {uppercase('hello world')} +- User Variables: **`{user.name}`** evaluates to {user.name} +- Comments: **`{/* JSX-style comments */}`** should not render {/* this should not be rendered */} + +## Mixed MD & JSX Syntax + +- Inline decorators should work with top-level JSX expressions. For example: + + > **{count}** items at _${price}_ is [${Math.round(multiply(count, price))}](https://google.com). + +- Attributes can be given as plain HTML or as a JSX expression, so `` and `` should both work: + + > an plain HTML attr versus a JSX expression + + +### Code Blocks Should NOT Execute + +Both inline code + code blocks should preserve expressions, instead of evaluating them: + +```javascript +const result = {1 + 1}; +const user = {userName}; +const math = {5 * 10}; +``` + +Inline code also shouldn't evaluate: `{1 + 1}` should stay as-is in inline code. diff --git a/__tests__/lib/mdxish/demo-docs/rdmd.md b/__tests__/lib/mdxish/demo-docs/rdmd.md new file mode 100644 index 000000000..d4ff06c02 --- /dev/null +++ b/__tests__/lib/mdxish/demo-docs/rdmd.md @@ -0,0 +1,79 @@ +## RDMD Engine (Legacy Markdown) + +A comprehensive demo of ReadMe's legacy RDMD flavored Markdown syntax. Test against this doc to validate that legacy RDMD content is rendering properly. + +### Reusable Content + + + +### Code Blocks + +RDMD renders all standard markdown codeblocks. Additionally, when using fenced codeblocks, you can provide an optional title for your block after the syntax lang tag: + +```php Sample Code + +``` + +RDMD can display multiple code samples in a tabbed interface. To create tabs, write successive fenced code blocks **without** inserting an empty line between blocks. For example: + +```js Tab One +console.log('Code TabĀ A'); +``` +```python Tab Two +print('Code TabĀ B') +``` + +The engine should render the above code blocks as a set of tabs. + +### Callouts + +A callout is a special blockquote that begins with either the ā„¹ļø, āœ…, āš ļø, or ā—ļø emoji. This initial emoji will set the callout’s theme, and the first line becomes the title. For instance: + +> āœ… Callout Title +> +> This should render a success callout. + +This creates a success callout. Some edge cases are also covered, such as title-only callouts: + +> ā„¹ļø Callouts don't need to have body text. + +Nor do they require a title, or a double line break between title and body: + +> āš ļø +> This callout has a title but no body text. + +Finally, if an emoji that isn’t mapped to a theme is used, the callout will fall back to a default style. To prevent a regular blockquote starting with one of the theme emojis from rendering as a callout, you can simply bold the leading emoji in the quote: + +> **ā—ļø** This should render a regular blockquote, not a callout. + +### Embeds + +RDMD supports rich embeds. You can embed a URL with a special title `@embed` in a normal Markdown link. So for example, this `[Embed Title](https://youtu.be/8bh238ekw3 "@embed")` syntax should render a "rich" preview: + +[Embed Title](https://youtu.be/8bh238ekw3 "@embed") + +For more control, use the `` JSX component and pass properties such as `url`, `title`, `favicon` and `image`. + + +### Dynamic Data + +RDMD can substitute variables and glossary terms at render time: + +* **User variables:** if JWT‑based user variables are configured, you can reference them using curly braces. For example, ā€œ`Hi, my name is **<>**!`ā€ expands to the logged‑in user’s name: + + > Hi, my name is **<>**! + +* **Glossary terms:** similarly, if you have defined any glossary terms, you can use the `<>` to show an interactive definition tooltip. + + > The term <> should show a tooltip on hover. + +* **Emoji shortcodes:** GitHub‑style emoji short codes like `:sparkles:` or `:owlbert-reading:` are expanded to their corresponding emoji or custom image. + +### Additional Features + +- automatic table of contents (TOC) generation per doc section +- Mermaid syntax support for rendering diagrams +- heading semantics + syntax variants: + * auto‑incremented anchor IDs applied to headings for jump link support + * supports compact style, so you can omit the space after the hash, i.e. `###ValidĀ Header` + * respects ATX style headings, so you can wrap headings in hashes, e.g. `## Valid Header ##` diff --git a/__tests__/lib/mdxish/demo-docs/rmdx.md b/__tests__/lib/mdxish/demo-docs/rmdx.md new file mode 100644 index 000000000..295ed1391 --- /dev/null +++ b/__tests__/lib/mdxish/demo-docs/rmdx.md @@ -0,0 +1,108 @@ +# RMDX Engine (Refactored MDX) + +A comprehensive demo of ReadMe's current MDX Markdown syntax. Test against this doc to validate that legacy RDMD content is rendering properly. + +### Reusable Content + +Project custom components should be provided to the engine at render time and be usable in the doc: + +Hello world! + +Reusable content should work the same way: + + + +### Code Blocks + +RDMD renders all standard markdown codeblocks. Additionally, when using fenced codeblocks, you can provide an optional title for your block after the syntax lang tag: + +```php Sample Code + +``` + +RDMD can display multiple code samples in a tabbed interface. To create tabs, write successive fenced code blocks **without** inserting an empty line between blocks. For example: + +```js Tab One +console.log('Code TabĀ A'); +``` +```python Tab Two +print('Code TabĀ B') +``` + +The engine should render the above code blocks as a set of tabs. + +### Callouts + +A callout is a special blockquote that begins with either the ā„¹ļø, āœ…, āš ļø, or ā—ļø emoji. This initial emoji will set the callout’s theme, and the first line becomes the title. For instance: + +> āœ… Callout Title +> +> This should render a success callout. + +This creates a success callout. Some edge cases are also covered, such as title-only callouts: + +> ā„¹ļø Callouts don't need to have body text. + +Nor do they require a title, or a double line break between title and body: + +> āš ļø +> This callout has a title but no body text. + +Finally, if an emoji that isn’t mapped to a theme is used, the callout will fall back to a default style. Callouts can also be written using our custom `` component, which accepts a separate `icon` and `theme` prop for even more flexibility. This should render similarly to the above examples: + + +### Callout Component + +A default callout using the MDX component. + + +To prevent a regular blockquote starting with one of the theme emojis from rendering as a callout, you can simply bold the leading emoji in the quote: + +> **ā—ļø** This should render a regular blockquote, not an error callout. + +### Embeds + +RDMD supports rich embeds. You can embed a URL with a special title `@embed` in a normal Markdown link. So for example, this `[Embed Title](https://youtu.be/8bh238ekw3 "@embed")` syntax should render a "rich" preview: + +[Embed Title](https://youtu.be/8bh238ekw3 "@embed") + +For more control, use the `` JSX component and pass properties such as `url`, `title`, `favicon` and `image`. + + + +### Dynamic Data + +RDMD can substitute variables and glossary terms at render time: + +* **User variables:** if JWT‑based user variables are configured, you can reference them using curly braces. For example, ā€œ`Hi, my name is **{user.name}**!`ā€ expands to the logged‑in user’s name: + + > Hi, my name is **{user.name}**! + +* **Glossary terms:** similarly, if you have defined any glossary terms, you can use the `myterm` tag to show an interactive definition tooltip: + + > The term exogenous should show a tooltip on hover. + +* **Emoji shortcodes:** GitHub‑style emoji short codes like `:sparkles:` or `:owlbert-reading:` are expanded to their corresponding emoji or custom image. + +### Top-Level JSX Syntax + +- top-level logic can be written as JSX **`{3 * 7 + 11}`** expressions and should evaluate inline (to {3 * 7 + 11} in this case.) +- global JS methods are supported, such as **`{uppercase('hello world')}`** (which should evaluate to {uppercase('hello world')}.) +- JSX comments like **`{/* JSX-style comments */}`** should work (while HTML comments like `` will throw an error.) +- JSX special attributes (like `className`, or setting the `style` as a CSS object) are required +- loose HTML is not supported (i.e. unclosed `
` tags will throw an error) + +### Additional Features + +- automatic table of contents (TOC) generation per doc section +- Mermaid syntax support for rendering diagrams +- heading semantics + syntax variants: + * auto‑incremented anchor IDs applied to headings for jump link support + * supports compact style, so you can omit the space after the hash, i.e. `###ValidĀ Header` + * respects ATX style headings, so you can wrap headings in hashes, e.g. `## Valid Header ##` diff --git a/__tests__/lib/mdxish/gemoji.test.ts b/__tests__/lib/mdxish/gemoji.test.ts new file mode 100644 index 000000000..ebef90679 --- /dev/null +++ b/__tests__/lib/mdxish/gemoji.test.ts @@ -0,0 +1,18 @@ +import { mix } from '../../../lib'; + +describe('gemoji transformer', () => { + it('should transform shortcodes back to emojis', () => { + const md = `šŸ” + +:smiley: + +:owlbert:`; + const stringHast = mix(md); + expect(stringHast).toMatchInlineSnapshot(` + "

šŸ”

+

😃

+

:owlbert:

" + `); + + }); +}); \ No newline at end of file diff --git a/__tests__/lib/mdxish/magic-blocks.test.ts b/__tests__/lib/mdxish/magic-blocks.test.ts new file mode 100644 index 000000000..54136a21e --- /dev/null +++ b/__tests__/lib/mdxish/magic-blocks.test.ts @@ -0,0 +1,70 @@ +import type { Element } from 'hast'; + +import { mdxish } from '../../../lib'; + +describe('magic blocks', () => { + describe('image block', () => { + it('should restore image block', () => { + const md = `[block:image] +{ +"images": [ + { + "image": [ + "https://files.readme.io/327e65d-image.png", + null, + null + ], + "align": "left", + "sizing": "50%" + } +] +} +[/block]`; + + const ast = mdxish(md); + expect(ast.children).toHaveLength(1); + expect(ast.children[0].type).toBe('element'); + + const imgElement = ast.children[0] as Element; + expect(imgElement.tagName).toBe('img'); + expect(imgElement.properties.src).toBe('https://files.readme.io/327e65d-image.png'); + expect(imgElement.properties.alt).toBe(''); + expect(imgElement.properties.align).toBe('left'); + expect(imgElement.properties.width).toBe('50%'); + }); + }); + + describe('table block', () => { + it('should restore parameters block to tables', () => { + const md = `[block:parameters] +${JSON.stringify( + { + data: { + 'h-0': 'Term', + 'h-1': 'Definition', + '0-0': 'Events', + '0-1': 'Pseudo-list: \nā— One \nā— Two', + }, + cols: 2, + rows: 1, + align: ['left', 'left'], + }, + null, + 2, +)} +[/block]`; + + const ast = mdxish(md); + + // Some extra children are added to the AST by the mdxish wrapper + expect(ast.children).toHaveLength(4); + expect(ast.children[2].type).toBe('element'); + + const element = ast.children[2] as Element; + expect(element.tagName).toBe('table'); + expect(element.children).toHaveLength(2); + expect((element.children[0] as Element).tagName).toBe('thead'); + expect((element.children[1] as Element).tagName).toBe('tbody'); + }); + }) +}); \ No newline at end of file diff --git a/__tests__/lib/mdxish/mdxish.test.ts b/__tests__/lib/mdxish/mdxish.test.ts new file mode 100644 index 000000000..f4c4f9417 --- /dev/null +++ b/__tests__/lib/mdxish/mdxish.test.ts @@ -0,0 +1,16 @@ +import { mdxish } from '../../../lib/mdxish'; + +describe('mdxish', () => { + describe('invalid mdx syntax', () => { + it('should render unclosed tags', () => { + const md = '
'; + expect(() => mdxish(md)).not.toThrow(); + }); + + it('should render content in new lines', () => { + const md = `
hello +
`; + expect(() => mdxish(md)).not.toThrow(); + }); + }); +}); \ No newline at end of file diff --git a/__tests__/lib/render-mdxish/CodeTabs.test.tsx b/__tests__/lib/render-mdxish/CodeTabs.test.tsx new file mode 100644 index 000000000..28b025877 --- /dev/null +++ b/__tests__/lib/render-mdxish/CodeTabs.test.tsx @@ -0,0 +1,52 @@ +import '@testing-library/jest-dom'; +import { render } from '@testing-library/react'; +import React from 'react'; + +import { mdxish, renderMdxish } from '../../../lib'; + +describe('code tabs renderer', () => { + describe('given 2 consecutive code blocks', () => { + const cppCode = `#include + +int main(void) { + std::cout << "hello world"; + return 0; +}`; + const pythonCode = 'print("hello world")'; + + const md = ` +\`\`\`cplusplus +${cppCode} +\`\`\` +\`\`\`python +${pythonCode} +\`\`\` +`; + const mod = renderMdxish(mdxish(md)); + + it('should not error when rendering', () => { + expect(() => render()).not.toThrow(); + }); + + it('should combine the 2 code blocks into a code-tabs block', () => { + const { container } = render(); + + // Should have a div with class CodeTabs + expect(container.querySelector('div.CodeTabs')).toBeInTheDocument(); + + // Verify both codes are in the DOM (C++ is visible, Python tab is hidden but present) + // Using textContent to handle cases where syntax highlighting splits text across nodes + expect(container.textContent).toContain('#include '); + expect(container.textContent).toContain('std::cout << "hello world"'); + expect(container.textContent).toContain(pythonCode); + }); + + it('should render the buttons with the correct text', () => { + const { container } = render(); + const buttons = container.querySelectorAll('button'); + expect(buttons).toHaveLength(2); + expect(buttons[0]).toHaveTextContent('C++'); + expect(buttons[1]).toHaveTextContent('Python'); + }); + }); +}); diff --git a/__tests__/lib/render-mdxish/Glossary.test.tsx b/__tests__/lib/render-mdxish/Glossary.test.tsx new file mode 100644 index 000000000..70a196422 --- /dev/null +++ b/__tests__/lib/render-mdxish/Glossary.test.tsx @@ -0,0 +1,39 @@ +import '@testing-library/jest-dom'; +import { render, screen } from '@testing-library/react'; +import React from 'react'; + +import { vi } from 'vitest'; + +import { mdxish } from '../../../index'; +import renderMdxish from '../../../lib/renderMdxish'; + +describe('Glossary', () => { + // Make sure we don't have any console errors when rendering a glossary item + // which has happened before & crashing the app + // It was because of the engine was converting the Glossary item to nested

tags + // which React was not happy about + let stderrSpy: ReturnType; + let consoleErrorSpy: ReturnType; + + beforeAll(() => { + stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true); + consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterEach(() => { + stderrSpy.mockRestore(); + consoleErrorSpy.mockRestore(); + }); + + it('renders a glossary item without console errors', () => { + const md = `The term exogenous should show a tooltip on hover. + `; + const tree = mdxish(md); + const mod = renderMdxish(tree); + render(); + expect(screen.getByText('exogenous')).toBeVisible(); + + expect(stderrSpy).not.toHaveBeenCalled(); + expect(consoleErrorSpy).not.toHaveBeenCalled(); + }); +}); \ No newline at end of file diff --git a/__tests__/lib/render-mdxish/toc.test.tsx b/__tests__/lib/render-mdxish/toc.test.tsx new file mode 100644 index 000000000..03012929e --- /dev/null +++ b/__tests__/lib/render-mdxish/toc.test.tsx @@ -0,0 +1,114 @@ +import type { HastHeading } from '../../../types'; + +import { render, screen } from '@testing-library/react'; +import React from 'react'; + +import { mdxish, renderMdxish } from '../../../index'; + +describe('toc transformer', () => { + it('parses out a toc with max depth of 3', () => { + const md = ` +# Title + +## Subheading + +### Third + +#### Fourth +`; + const { Toc } = renderMdxish(mdxish(md)); + + render(); + + expect(screen.findByText('Title')).toBeDefined(); + expect(screen.findByText('Subheading')).toBeDefined(); + expect(screen.findByText('Third')).toBeDefined(); + expect(screen.queryByText('Fourth')).toBeNull(); + }); + + it('parses a toc from components', () => { + const md = ` +# Title + + + +## Subheading +`; + const components = { + CommonInfo: renderMdxish(mdxish('## Common Heading')), + }; + + const { Toc } = renderMdxish(mdxish(md, { components }), { components }); + + render(); + + expect(screen.findByText('Title')).toBeDefined(); + expect(screen.findByText('Common Heading')).toBeDefined(); + expect(screen.findByText('Subheading')).toBeDefined(); + }); + + it('parses out a toc and only uses plain text', () => { + const md = ` +# [Title](http://example.com) +`; + const { Toc } = renderMdxish(mdxish(md)); + + render(); + + expect(screen.findByText('Title')).toBeDefined(); + expect(screen.queryByText('[', { exact: false })).toBeNull(); + }); + + it('does not include headings in callouts', () => { + const md = ` +### Title + +> šŸ“˜ Callout +`; + const { Toc } = renderMdxish(mdxish(md)); + + render(); + + expect(screen.findByText('Title')).toBeDefined(); + expect(screen.queryByText('Callout')).toBeNull(); + }); + + it('includes headings from nested component tocs', () => { + const md = ` + # Title + + + `; + + const components = { + ParentInfo: renderMdxish(mdxish('## Parent Heading')), + }; + + const { Toc } = renderMdxish(mdxish(md, { components }), { components }); + + render(); + + expect(screen.findByText('Parent Heading')).toBeDefined(); + }); + + it('includes headings from reusable components', () => { + const md = `# Title + +`; + + const blockedComponentModule = renderMdxish(mdxish('## Callout Heading')); + const components = { + BlockedComponent: blockedComponentModule, + }; + + const { toc } = renderMdxish(mdxish(md, { components }), { components }); + + expect(toc).toHaveLength(2); + const firstHeading = toc[0] as HastHeading; + expect(firstHeading.tagName).toBe('h1'); + expect(firstHeading.properties?.id).toBe('title'); + const secondHeading = toc[1] as HastHeading; + expect(secondHeading.tagName).toBe('h2'); + expect(secondHeading.properties?.id).toBe('callout-heading'); + }); +}); diff --git a/__tests__/lib/renderMdxish.test.tsx b/__tests__/lib/renderMdxish.test.tsx new file mode 100644 index 000000000..b02a50f35 --- /dev/null +++ b/__tests__/lib/renderMdxish.test.tsx @@ -0,0 +1,123 @@ +import type { RMDXModule } from '../../types'; +import type { MDXProps } from 'mdx/types'; + +import '@testing-library/jest-dom'; +import { render, screen } from '@testing-library/react'; +import React from 'react'; + +import { mdxish } from '../../index'; +import renderMdxish from '../../lib/renderMdxish'; + +describe('renderMdxish', () => { + it('renders simple HTML content', () => { + const input = '

Hello, world!

This is a test paragraph.

'; + const tree = mdxish(input); + const mod = renderMdxish(tree); + + render(); + + expect(screen.getByText('Hello, world!')).toBeInTheDocument(); + expect(screen.getByText('This is a test paragraph.')).toBeInTheDocument(); + }); + + it('renders HTML from mix output', () => { + const md = '### Hello, world!\n\nThis is **markdown** content.'; + const tree = mdxish(md); + const mod = renderMdxish(tree); + + render(); + + expect(screen.getByText('Hello, world!')).toBeInTheDocument(); + // Text is split across nodes, so use a more flexible matcher + expect(screen.getByText(/This is/)).toBeInTheDocument(); + expect(screen.getByText('markdown')).toBeInTheDocument(); + expect(screen.getByText(/content\./)).toBeInTheDocument(); + }); + + it('rehydrates custom components from mix output when preserveComponents is true', () => { + const md = ` + +**Heads up!** + +This is a custom component. +`; + + const tree = mdxish(md); + const mod = renderMdxish(tree); + + const { container } = render(); + expect(container.querySelector('.callout.callout_warn')).toBeInTheDocument(); + expect(screen.getByText('Heads up!')).toBeInTheDocument(); + expect(screen.getByText('This is a custom component.')).toBeInTheDocument(); + }); + + it('keeps content after a custom component outside of the component', () => { + const md = ` + + This is a component with a space in the content. + + +This should be outside`; + + const components: Record = { + MyComponent: { + default: (props: MDXProps) =>
{props.children as React.ReactNode}
, + Toc: () => null, + toc: [], + stylesheet: undefined, + }, + }; + + const tree = mdxish(md, { components }); + const mod = renderMdxish(tree, { components }); + + render(); + + const wrapper = screen.getByTestId('my-component'); + expect(wrapper.querySelectorAll('p')).toHaveLength(1); + expect(screen.getByText('This is a component with a space in the content.')).toBeInTheDocument(); + expect(screen.getByText('This should be outside')).toBeInTheDocument(); + expect(wrapper).not.toContainElement(screen.getByText('This should be outside')); + }); + + it('keeps following content outside of self-closing components', () => { + const md = ` + +Hello`; + + const components = { + MyComponent: { + default: () =>
, + Toc: null, + toc: [], + }, + }; + + const tree = mdxish(md, { components }); + const mod = renderMdxish(tree, { components }); + + render(); + + const wrapper = screen.getByTestId('my-component'); + expect(wrapper).toBeInTheDocument(); + expect(wrapper).toBeEmptyDOMElement(); + expect(screen.getByText('Hello')).toBeInTheDocument(); + expect(wrapper).not.toContainElement(screen.getByText('Hello')); + }); + + it('renders HTMLBlock with renderMdxish', () => { + const markdown = '{`

Hello, World!

`}
'; + + const tree = mdxish(markdown); + const mod = renderMdxish(tree); + + render(); + + const htmlBlock = document.querySelector('.rdmd-html'); + expect(htmlBlock).toBeInTheDocument(); + expect(htmlBlock?.innerHTML).toContain('Hello'); + expect(htmlBlock?.innerHTML).toContain('World!'); + expect(htmlBlock?.innerHTML).toContain('

'); + expect(htmlBlock?.innerHTML).not.toContain('{'); + }); +}); diff --git a/__tests__/processor/plugin/mdxish-components.test.ts b/__tests__/processor/plugin/mdxish-components.test.ts new file mode 100644 index 000000000..f4961bcaa --- /dev/null +++ b/__tests__/processor/plugin/mdxish-components.test.ts @@ -0,0 +1,117 @@ +import type { CustomComponents } from '../../../types'; + +import { describe, it, expect } from 'vitest'; + +import { mix } from '../../../lib'; + +describe('rehypeMdxishComponents', () => { + it('should remove non-existent custom components from the tree', () => { + const md = ` from inside + +Hello + +`; + + const html = mix(md); + + // Should only contain "Hello" and not the non-existent component tags or their content + expect(html).toContain('Hello'); + expect(html).not.toContain('MyDemo'); + expect(html).not.toContain('from inside'); + expect(html).not.toContain('Custom'); + }); + + it('should preserve existing custom components', () => { + // componentExists only checks if the key exists, so we can use a minimal mock + const TestComponent = {} as CustomComponents[string]; + const md = `Content + +Hello`; + + const result = mix(md, { components: { TestComponent } }); + expect(result).toContain('TestComponent'); + expect(result).toContain('Hello'); + }); + + it('should remove nested non-existent components', () => { + const md = ` + nested content + Hello +`; + + const result = mix(md); + expect(result).not.toContain('Hello'); + expect(result).not.toContain('Outer'); + expect(result).not.toContain('Inner'); + expect(result).not.toContain('nested content'); + }); + + it('should handle mixed existing and non-existent components', () => { + // componentExists only checks if the key exists, so we can use a minimal mock + const ExistingComponent = {} as CustomComponents[string]; + const md = `Keep this + +Remove this + +Hello`; + + const result = mix(md, { components: { ExistingComponent } }); + expect(result).toContain('ExistingComponent'); + expect(result).toContain('Keep this'); + expect(result).toContain('Hello'); + expect(result).not.toContain('NonExistent'); + expect(result).not.toContain('Remove this'); + }); + + it('should preserve regular HTML tags', () => { + const md = `

This is HTML
+ +Remove this + +Hello`; + + const result = mix(md); + expect(result).toContain('
'); + expect(result).toContain('This is HTML'); + expect(result).toContain('Hello'); + expect(result).not.toContain('NonExistentComponent'); + expect(result).not.toContain('Remove this'); + }); + + it('should handle empty non-existent components', () => { + const md = ` + +Hello + +`; + + // Preprocess self-closing tags before processing (matching mix.ts behavior) + + const result = mix(md); + expect(result).toContain('Hello'); + expect(result).not.toContain('EmptyComponent'); + expect(result).not.toContain('AnotherEmpty'); + }); + + it('should correctly handle real-life cases', () => { + const md = `Hello world! + +Reusable content should work the same way: + + + +hello + + + + + from inside +`; + + const result = mix(md); + expect(result).not.toContain('Hello world!'); + expect(result).toContain('Reusable content should work the same way:'); + expect(result).toContain('hello'); + expect(result).not.toContain('from inside'); + }); +}); diff --git a/__tests__/transformers/evaluate-expressions.test.ts b/__tests__/transformers/evaluate-expressions.test.ts new file mode 100644 index 000000000..403b34d9e --- /dev/null +++ b/__tests__/transformers/evaluate-expressions.test.ts @@ -0,0 +1,67 @@ +import { mdxish } from '../../lib/mdxish'; + +describe('evaluateExpressions', () => { + it('should evaluate inline MDX expressions and replace with results', () => { + const context = { + count: 5, + price: 10, + name: 'Test', + }; + + const content = 'Total: {count * price} items for {name}'; + const hast = mdxish(content, { jsxContext: context }); + + // The expressions should be evaluated and converted to text nodes + const textContent = JSON.stringify(hast); + expect(textContent).toContain('50'); // count * price = 50 + expect(textContent).toContain('Test'); // name = 'Test' + expect(textContent).not.toContain('{count * price}'); + expect(textContent).not.toContain('{name}'); + }); + + it('should handle null and undefined expressions', () => { + const context = { + nullValue: null, + undefinedValue: undefined, + }; + + const content = 'Before {nullValue} middle {undefinedValue} after'; + const hast = mdxish(content, { jsxContext: context }); + + // Null/undefined should be removed (empty string) + const textContent = JSON.stringify(hast); + expect(textContent).toContain('Before'); + expect(textContent).toContain('middle'); + expect(textContent).toContain('after'); + expect(textContent).not.toContain('nullValue'); + expect(textContent).not.toContain('undefinedValue'); + }); + + it('should handle object expressions', () => { + const context = { + obj: { a: 1, b: 2 }, + }; + + const content = 'Object: {obj}'; + const hast = mdxish(content, { jsxContext: context }); + + // Objects should be JSON stringified (account for JSON escaping in stringified output) + const textContent = JSON.stringify(hast); + expect(textContent).toContain('{\\"a\\":1,\\"b\\":2}'); + }); + + it('should preserve expressions in code blocks', () => { + const context = { + count: 5, + }; + + const content = '```\nconst x = {count};\n```'; + const hast = mdxish(content, { jsxContext: context }); + + // Expressions in code blocks should be preserved + const textContent = JSON.stringify(hast); + expect(textContent).toContain('{count}'); + expect(textContent).not.toContain('5'); + }); +}); + diff --git a/__tests__/transformers/mdxish-component-blocks.test.ts b/__tests__/transformers/mdxish-component-blocks.test.ts new file mode 100644 index 000000000..ffa91f85a --- /dev/null +++ b/__tests__/transformers/mdxish-component-blocks.test.ts @@ -0,0 +1,49 @@ +import type { Element } from 'hast'; + +import { mdxish } from '../../index'; +import { parseAttributes } from '../../processor/transform/mdxish/mdxish-component-blocks'; + +describe('mdxish-component-blocks', () => { + describe('parseAttributes', () => { + it('should parse normal key-value attributes', () => { + const attrString = 'theme="info"'; + const result = parseAttributes(attrString); + + expect(result).toHaveLength(1); + expect(result[0]).toStrictEqual({ + type: 'mdxJsxAttribute', + name: 'theme', + value: 'info', + }); + }); + + it('should parse boolean attributes without values', () => { + const attrString = 'theme="info" empty'; + const result = parseAttributes(attrString); + + expect(result).toHaveLength(2); + expect(result[0]).toStrictEqual({ + type: 'mdxJsxAttribute', + name: 'theme', + value: 'info', + }); + expect(result[1]).toStrictEqual({ + type: 'mdxJsxAttribute', + name: 'empty', + value: null, + }); + }); + }); + + it('should parse Cards tag with columns attribute using mdxish', () => { + const markdown = ''; + + const hast = mdxish(markdown); + const firstChild = hast.children[0] as Element; + const cards = firstChild.children?.[0] as Element; + + expect(cards.type).toBe('element'); + expect(cards.tagName).toBe('Cards'); + expect(cards.properties.columns).toBe('3'); + }); +}); diff --git a/__tests__/transformers/preprocess-jsx-expressions.test.ts b/__tests__/transformers/preprocess-jsx-expressions.test.ts new file mode 100644 index 000000000..fcf0fd88b --- /dev/null +++ b/__tests__/transformers/preprocess-jsx-expressions.test.ts @@ -0,0 +1,36 @@ +import { preprocessJSXExpressions } from '../../processor/transform/mdxish/preprocess-jsx-expressions'; + +describe('preprocessJSXExpressions', () => { + describe('Step 3: Evaluate attribute expressions', () => { + it('should evaluate JSX attribute expressions and convert them to string attributes', () => { + const context = { + baseUrl: 'https://example.com', + userId: '123', + isActive: true, + }; + + const content = 'Link'; + const result = preprocessJSXExpressions(content, context); + + expect(result).toContain('href="https://example.com"'); + expect(result).toContain('id="123"'); + expect(result).toContain('active="true"'); + expect(result).not.toContain('href={baseUrl}'); + expect(result).not.toContain('id={userId}'); + expect(result).not.toContain('active={isActive}'); + }); + + it.each([ + [true, '{"b":1}'], + [false, '{"c":2}'], + ])('should handle nested dictionary attributes when a is %s', (a, expectedJson) => { + const context = { a }; + + const content = '
Link
'; + const result = preprocessJSXExpressions(content, context); + + expect(result).toContain(`foo='${expectedJson}'`); + expect(result).not.toContain('foo={a ? {b: 1} : {c: 2}}'); + }); + }); +}); diff --git a/__tests__/transformers/preprocess-redos-attack.test.ts b/__tests__/transformers/preprocess-redos-attack.test.ts new file mode 100644 index 000000000..4f1dac204 --- /dev/null +++ b/__tests__/transformers/preprocess-redos-attack.test.ts @@ -0,0 +1,332 @@ +import { describe, it, expect } from 'vitest'; + +import { preprocessJSXExpressions } from '../../processor/transform/mdxish/preprocess-jsx-expressions'; + +describe('ReDoS Attack Vectors', () => { + it('should handle basic attack pattern without hanging', () => { + const attackString = `{\`${'\\lock\\lock\\'.repeat(100)}lock\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(10000); + expect(result).toBeDefined(); + }); + + it('should handle pattern specifically designed for nested quantifier attack', () => { + // This pattern exploits the (?:[^`\\]|\\.)* nested quantifier + // Each backslash can be interpreted as start of escape OR part of previous escape + const attackString = `{\`${'\\x\\y'.repeat(300)}z\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(15000); + expect(result).toBeDefined(); + }); + + it('should handle many consecutive backslashes', () => { + const attackString = `{\`${'\\'.repeat(1000)}a\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(20000); + expect(result).toBeDefined(); + }); + + it('should handle alternating escape patterns', () => { + const attackString = `{\`${'\\a\\b'.repeat(200)}x\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(10000); + expect(result).toBeDefined(); + }); + + it('should handle complex nested escape sequences', () => { + const attackString = `{\`${'\\x\\y\\z\\w'.repeat(150)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(10000); + expect(result).toBeDefined(); + }); + + it('should handle very long template literal content', () => { + const longContent = 'a'.repeat(50000); + const attackString = `{\`${longContent}\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(20000); + expect(result).toBeDefined(); + }, 10000); + + it('should handle very long template literal with escapes', () => { + const longContent = '\\a'.repeat(10000); + const attackString = `{\`${longContent}\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(30000); + expect(result).toBeDefined(); + }, 10000); + + it('should handle multiple HTMLBlock tags with attack patterns', () => { + const attackPattern = '\\lock\\lock\\'.repeat(50); + const blocks = new Array(10).fill(`{\`${attackPattern}lock\`}`).join('\n'); + + const start = Date.now(); + const result = preprocessJSXExpressions(blocks, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(30000); + expect(result).toBeDefined(); + }, 10000); + + it('should handle attack pattern with extra whitespace', () => { + const attackString = `{ \`${'\\lock\\lock\\'.repeat(100)}lock\` }`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(10000); + expect(result).toBeDefined(); + }); + + it('should handle attack pattern with newlines and whitespace', () => { + const attackString = `{\n \`${'\\lock\\lock\\'.repeat(100)}lock\`\n}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(10000); + expect(result).toBeDefined(); + }); + + it('should handle mixed escape and non-escape patterns', () => { + const pattern = `${'\\a'}${'b'.repeat(10)}`; + const attackString = `{\`${pattern.repeat(200)}c\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(10000); + expect(result).toBeDefined(); + }); + + it('should handle edge case with escaped backticks in attack pattern', () => { + const attackString = `{\`${'\\`\\`\\`'.repeat(100)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(10000); + expect(result).toBeDefined(); + }); + + it('should handle HTMLBlock with attributes and attack pattern', () => { + const attackString = `{\`${'\\lock\\lock\\'.repeat(200)}lock\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(15000); + expect(result).toBeDefined(); + }); + + it('should handle HTMLBlock with many attributes and attack pattern', () => { + const attrs = 'a="1" b="2" c="3" d="4" e="5" '.repeat(10); + const attackString = `{\`${'\\lock\\lock\\'.repeat(100)}lock\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(15000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with all possible escape sequences', () => { + const escapes = '\\n\\t\\r\\v\\f\\b\\0\\x00\\u0000'; + const attackString = `{\`${escapes.repeat(500)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(15000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with Unicode characters and escapes', () => { + const attackString = `{\`${'\\u0041\\u0042'.repeat(300)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(15000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with mixed valid and invalid escapes', () => { + const pattern = '\\n\\invalid\\t\\also-invalid\\r'; + const attackString = `{\`${pattern.repeat(400)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(20000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with tabs and newlines in template literal', () => { + const attackString = `{\`${'\\t\\n\\r'.repeat(500)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(20000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with backslash at every position', () => { + const attackString = `{\`${'a\\b\\c\\d'.repeat(400)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(20000); + expect(result).toBeDefined(); + }); + + it('should handle pattern designed to maximize backtracking attempts', () => { + // Pattern where each character could be part of escape or standalone + const attackString = `{\`${'\\x\\y\\z'.repeat(600)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(30000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with escaped backslashes (double escapes)', () => { + const attackString = `{\`${'\\\\'.repeat(1000)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(20000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with triple backslashes', () => { + const attackString = `{\`${'\\\\\\'.repeat(800)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(30000); + expect(result).toBeDefined(); + }); + + it('should handle extremely long whitespace before template literal', () => { + const whitespace = ' '.repeat(10000); + const attackString = `{${whitespace}\`${'\\lock\\lock\\'.repeat(50)}lock\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(15000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with newlines between braces and backticks', () => { + const attackString = `{\n\n\n\`${'\\lock\\lock\\'.repeat(100)}lock\`\n\n\n}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(15000); + expect(result).toBeDefined(); + }); + + it('should handle pattern with all ASCII printable characters and escapes', () => { + const chars = Array.from({ length: 95 }, (_, i) => String.fromCharCode(i + 32)) + .filter(c => c !== '`' && c !== '\\') + .join('\\'); + const attackString = `{\`${chars.repeat(100)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(20000); + expect(result).toBeDefined(); + }); + + it('should handle pattern that alternates between escape and long strings', () => { + const pattern = `${'\\a'}${'x'.repeat(100)}`; + const attackString = `{\`${pattern.repeat(200)}end\`}`; + + const start = Date.now(); + const result = preprocessJSXExpressions(attackString, {}); + const end = Date.now(); + const duration = end - start; + + expect(duration).toBeLessThan(30000); + expect(result).toBeDefined(); + }); +}); diff --git a/__tests__/transformers/readme-to-mdx.test.ts b/__tests__/transformers/readme-to-mdx.test.ts index eb9e8173a..7fc058672 100644 --- a/__tests__/transformers/readme-to-mdx.test.ts +++ b/__tests__/transformers/readme-to-mdx.test.ts @@ -1,8 +1,12 @@ -import { mdx } from '../../index'; +import type { Recipe } from '../../types'; +import type { Element } from 'hast'; +import type { Root } from 'mdast'; + +import { mdx, mdxish } from '../../index'; describe('readme-to-mdx transformer', () => { it('converts a tutorial tile to MDX', () => { - const ast = { + const ast: Root = { type: 'root', children: [ { @@ -13,7 +17,7 @@ describe('readme-to-mdx transformer', () => { link: 'http://example.com', slug: 'test-id', title: 'Test', - }, + } as Recipe, ], }; @@ -23,3 +27,17 @@ describe('readme-to-mdx transformer', () => { `); }); }); + +describe('mdxish readme-to-mdx transformer', () => { + it('processes Recipe component', () => { + const markdown = ''; + + const hast = mdxish(markdown); + const recipe = hast.children[0] as Element; + + expect(recipe.type).toBe('element'); + expect(recipe.tagName).toBe('Recipe'); + expect(recipe.properties.slug).toBe('test-id'); + expect(recipe.properties.title).toBe('Test'); + }); +}); diff --git a/__tests__/transformers/variables.test.tsx b/__tests__/transformers/variables.test.tsx index e7446600b..4becab38c 100644 --- a/__tests__/transformers/variables.test.tsx +++ b/__tests__/transformers/variables.test.tsx @@ -59,4 +59,38 @@ describe('variables transformer', () => { expect(rmdx.mdast(mdx).children[0].type).toBe('mdxFlowExpression'); }); + + it('does not parse variables inside inline code blocks', () => { + const mdx = '`{user.name}`'; + + const tree = rmdx.mdast(mdx); + const inlineCodeNode = tree.children[0]; + + // Should be a paragraph containing inline code + expect(inlineCodeNode.type).toBe('paragraph'); + expect(inlineCodeNode).toHaveProperty('children'); + + const paragraphNode = inlineCodeNode as { children: { type: string; value?: string }[]; type: string }; + const codeNode = paragraphNode.children[0]; + + expect(codeNode.type).toBe('inlineCode'); + expect(codeNode.value).toBe('{user.name}'); + }); + + it('renders variables inside inline code as literal text', () => { + const mdx = 'Use `{user.name}` in your code'; + const variables = { + user: { + name: 'Test User', + }, + }; + const Content = execute(mdx, { variables }) as () => React.ReactNode; + + render(); + + // @ts-expect-error - jest-dom matchers not typed correctly + expect(screen.getByText(/Use.*in your code/)).toBeInTheDocument(); + const codeElement = screen.getByText('{user.name}'); + expect(codeElement.tagName).toBe('CODE'); + }); }); diff --git a/components/CodeTabs/index.tsx b/components/CodeTabs/index.tsx index 1191db539..81cde0dd5 100644 --- a/components/CodeTabs/index.tsx +++ b/components/CodeTabs/index.tsx @@ -56,7 +56,12 @@ const CodeTabs = (props: Props) => {
{(Array.isArray(children) ? children : [children]).map((pre, i) => { - const { meta, lang } = pre.props.children.props; + // the first or only child should be our Code component + const codeComponent = Array.isArray(pre.props?.children) + ? pre.props.children[0] + : pre.props?.children; + const lang = codeComponent?.props?.lang; + const meta = codeComponent?.props?.meta; /* istanbul ignore next */ return ( diff --git a/components/HTMLBlock/index.tsx b/components/HTMLBlock/index.tsx index cddc2ff47..ed3ff975a 100644 --- a/components/HTMLBlock/index.tsx +++ b/components/HTMLBlock/index.tsx @@ -14,20 +14,31 @@ const extractScripts = (html: string = ''): [string, () => void] => { }; interface Props { - children: React.ReactElement | string; + children?: React.ReactElement | string; + html?: string; runScripts?: boolean | string; - safeMode?: boolean; + safeMode?: boolean | string; } -const HTMLBlock = ({ children = '', runScripts, safeMode = false }: Props) => { - if (typeof children !== 'string') { - throw new TypeError('HTMLBlock: children must be a string'); +const HTMLBlock = ({ children = '', html: htmlProp, runScripts, safeMode: safeModeRaw = false }: Props) => { + // Determine HTML source: MDXish uses html prop (from HAST), MDX uses children + let html: string = ''; + if (htmlProp !== undefined) { + html = htmlProp; + } else { + if (typeof children !== 'string') { + throw new TypeError('HTMLBlock: children must be a string'); + } + html = children; } - const html = children; // eslint-disable-next-line no-param-reassign runScripts = typeof runScripts !== 'boolean' ? runScripts === 'true' : runScripts; + // In MDX mode, safeMode is passed in as a boolean from JSX parsing + // In MDXish mode, safeMode comes in as a string from HAST props + const safeMode = typeof safeModeRaw !== 'boolean' ? safeModeRaw === 'true' : safeModeRaw; + const [cleanedHtml, exec] = extractScripts(html); useEffect(() => { diff --git a/docs/mdxish-flow.md b/docs/mdxish-flow.md new file mode 100644 index 000000000..124a7af52 --- /dev/null +++ b/docs/mdxish-flow.md @@ -0,0 +1,402 @@ +# MDXish Function Flow + +## Overview + +The `mdxish` function processes markdown content with MDX-like syntax support, detecting and rendering custom component tags from a components hash. It returns a HAST (Hypertext Abstract Syntax Tree). + +## Flow Diagram + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ INPUT: Raw Markdown │ +│ "# Hello {user.name}\n**Bold**" │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ STEP 1: Load Components │ +│ ───────────────────────────────────────────────────────────────────────── │ +│ loadComponents() → Loads all React components from components/index.ts │ +│ Merges with user-provided components (user overrides take priority) │ +│ Result: { Callout, Code, Tabs, ... } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ STEP 2: Extract Magic Blocks │ +│ ───────────────────────────────────────────────────────────────────────── │ +│ extractMagicBlocks(content) │ +│ │ +│ Extracts legacy `[block:TYPE]JSON[/block]` syntax and replaces them with │ +│ placeholder tokens. The blocks are restored later by magicBlockRestorer. │ +│ │ +│ Result: { replaced: string, blocks: BlockHit[] } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ STEP 3: Pre-process JSX Expressions │ +│ ───────────────────────────────────────────────────────────────────────── │ +│ preprocessJSXExpressions(content, jsxContext) │ +│ │ +│ 0. Protect HTMLBlock content (base64 encode to prevent parser issues) │ +│ 1. Extract & protect code blocks (```...```) and inline code (`...`) │ +│ 2. Remove JSX comments: {/* comment */} → "" │ +│ 3. Evaluate attribute expressions: href={baseUrl} → href="https://..." │ +│ 4. Restore protected code blocks │ +│ │ +│ Note: Inline expressions ({5 * 10}) are now parsed by mdast-util-mdx- │ +│ expression and evaluated in the AST transformer (evaluateExpressions) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ UNIFIED PIPELINE (AST Transformations) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ remarkParse │ │ REMARK PHASE │ +│ ─────────────── │ │ (MDAST - Markdown AST) │ +│ Parse markdown │ │ │ +│ into MDAST │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ remarkFrontmatter│ │ │ +│ ─────────────── │ │ │ +│ Parse YAML │ │ │ +│ frontmatter │ │ │ +│ (metadata) │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│magicBlockRestorer │ │ │ +│ ─────────────── │ │ │ +│ Restores legacy │ │ │ +│ [block:TYPE]JSON │ │ │ +│ [/block] syntax │ │ │ +│ from placeholder │ │ │ +│ tokens │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ imageTransformer │ │ │ +│ ─────────────── │ │ │ +│ Converts inline │ │ │ +│ images to image │ │ │ +│ blocks. Preserves │ │ │ +│ magic block props │ │ │ +│ (width, align) │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│defaultTransformers │ │ │ +│ ─────────────── │ │ │ +│ 1. callout │ │ │ +│ 2. codeTabs │ │ │ +│ 3. gemoji │ │ │ +│ 4. embed │ │ │ +│ [label](url │ │ │ +│ "@embed") │ │ │ +│ → embedBlock │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│mdxishComponentBlocks │ │ +│ ───────────────── │ │ │ +│ Re-wraps HTML │ │ │ +│ blocks like │ │ │ +│ text │ │ │ +│ into │ │ │ +│ mdxJsxFlowElement │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ mdxishTables │ │ │ +│ ───────────────── │ │ │ +│ Converts │ │ │ +│ JSX elements to │ │ │ +│ markdown table │ │ │ +│ nodes. Re-parses │ │ │ +│ markdown in cells │ │ │ +│ (e.g., **Bold**) │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ mdxishHtmlBlocks │ │ │ +│ ───────────────── │ │ │ +│ Transforms │ │ │ +│ HTMLBlock MDX JSX │ │ │ +│ elements and │ │ │ +│ template literal │ │ │ +│ syntax to │ │ │ +│ html-block nodes. │ │ │ +│ Decodes protected │ │ │ +│ base64 content. │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│evaluateExpressions │ │ │ +│ ───────────────── │ │ │ +│ Evaluates MDX │ │ │ +│ expressions │ │ │ +│ ({expression}) │ │ │ +│ using jsxContext │ │ │ +│ and replaces with │ │ │ +│ evaluated values │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│variablesTextTransformer │ │ +│ ───────────────── │ │ │ +│ Parses {user.*} │ │ │ +│ patterns from text │ │ │ +│ using regex → │ │ │ +│ │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│tailwindTransformer│ │ │ +│ ─────────────── │ │ │ +│ (conditional) │ │ │ +│ Processes │ │ │ +│ Tailwind classes │ │ │ +│ in components │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ remarkGfm │ │ │ +│ ─────────────── │ │ │ +│ GitHub Flavored │ │ │ +│ Markdown support:│ │ │ +│ - Tables │ │ │ +│ - Strikethrough │ │ │ +│ - Task lists │ │ │ +│ - Autolinks │ │ │ +│ - Footnotes │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ remarkRehype │ │ CONVERSION │ +│ ─────────────── │ │ MDAST → HAST │ +│ Convert MDAST │ │ │ +│ to HAST with │ │ │ +│ mdxComponentHandlers │ │ +│ (preserves MDX │ │ │ +│ elements) │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ rehypeRaw │ │ REHYPE PHASE │ +│ ─────────────── │ │ (HAST - HTML AST) │ +│ Parse raw HTML │ │ │ +│ strings in AST │ │ │ +│ into proper HAST │ │ │ +│ elements │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│ rehypeSlug │ │ │ +│ ─────────────── │ │ │ +│ Add IDs to │ │ │ +│ headings for │ │ │ +│ anchor linking │ │ │ +│ # Title → │ │ │ +│

│ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā–¼ │ │ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │ │ +│rehypeMdxishComponents │ │ +│ ───────────────── │ │ │ +│ Final pass: │ │ │ +│ 1. Skip standard │ │ │ +│ HTML tags │ │ │ +│ 2. Skip runtime │ │ │ +│ tags like Variable │ │ +│ 3. Match custom │ │ │ +│ components │ │ │ +│ 4. Convert props │ │ │ +│ to camelCase │ │ │ +│ 5. Recursively │ │ │ +│ process text │ │ │ +│ children as MD │ │ │ +│ 6. Remove unknown │ │ │ +│ components │ │ │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │ │ + │ │ │ + ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ OUTPUT: HAST Tree │ +│ │ +│ { │ +│ type: 'root', │ +│ children: [ │ +│ { type: 'element', tagName: 'h1', properties: { id: 'hello' }, ... }, │ +│ { type: 'element', tagName: 'Callout', properties: {...}, children: [ │ +│ { type: 'element', tagName: 'strong', children: ['Bold'] } │ +│ ]} │ +│ ] │ +│ } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +## Pipeline Summary + +| Phase | Plugin | Purpose | +|-------|--------|---------| +| Pre-process | `extractMagicBlocks` | Extract legacy `[block:TYPE]` syntax, replace with tokens | +| Pre-process | `preprocessJSXExpressions` | Protect HTMLBlock content, evaluate JSX attribute expressions (`href={baseUrl}`) | +| MDAST | `remarkParse` + extensions | Markdown → AST with MDX expression parsing (`mdast-util-mdx-expression`) | +| MDAST | `remarkFrontmatter` | Parse YAML frontmatter (metadata) | +| MDAST | `magicBlockRestorer` | Restore legacy magic blocks from placeholder tokens | +| MDAST | `imageTransformer` | Transform images to image blocks, preserve magic block properties | +| MDAST | `defaultTransformers` | Transform callouts, code tabs, gemojis, embeds | +| MDAST | `mdxishComponentBlocks` | PascalCase HTML → `mdxJsxFlowElement` | +| MDAST | `mdxishTables` | `

` JSX → markdown `table` nodes, re-parse markdown in cells | +| MDAST | `mdxishHtmlBlocks` | `{`...`}` → `html-block` nodes | +| MDAST | `evaluateExpressions` | Evaluate MDX expressions (`{expression}`) using `jsxContext` | +| MDAST | `variablesTextTransformer` | `{user.*}` → `` nodes (regex-based) | +| MDAST | `tailwindTransformer` | Process Tailwind classes (conditional, if `useTailwind`) | +| MDAST | `remarkGfm` | GitHub Flavored Markdown: tables, strikethrough, task lists, autolinks, footnotes | +| Convert | `remarkRehype` + handlers | MDAST → HAST | +| HAST | `rehypeRaw` | Raw HTML strings → HAST elements | +| HAST | `rehypeSlug` | Add IDs to headings | +| HAST | `rehypeMdxishComponents` | Match & transform custom components | + +## Entry Points, Plugins and Utilities + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ ENTRY POINTS │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ mdxish(md) → HAST Main processor │ +│ mix(md) → string Wrapper that returns HTML string │ +│ renderMdxish(hast) → React Converts HAST to React components │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ PIPELINE PLUGINS │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ magicBlockRestorer ← Restore legacy [block:TYPE] syntax │ +│ imageTransformer ← Images → imageBlock nodes │ +│ rehypeMdxishComponents ← Core component detection/transform │ +│ mdxishComponentBlocks ← PascalCase HTML → MDX elements │ +│ mdxishTables ←
JSX → markdown tables │ +│ mdxishHtmlBlocks ← → html-block nodes │ +│ mdxComponentHandlers ← MDAST→HAST conversion handlers │ +│ defaultTransformers ← callout, codeTabs, gemoji, embed │ +│ variablesTextTransformer ← {user.*} → Variable (regex-based) │ +│ tailwindTransformer ← Process Tailwind classes (opt-in) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ UTILITIES │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ utils/common-html-words.ts ← STANDARD_HTML_TAGS, etc. │ +│ utils/load-components ← Auto-loads React components │ +│ utils/mdxish/mdxish-get-component-name ← getComponentName() │ +│ utils/render-utils.tsx ← Shared render utilities │ +│ utils/extractMagicBlocks← Extract legacy [block:] syntax │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` +# Some Outstanding Transformers + +## User Variables + +The `variablesTextTransformer` parses `{user.}` patterns directly from text nodes using regex (without requiring `remarkMdx`). Supported patterns: + +- `{user.name}` → dot notation +- `{user.email}` +- `{user.email_verified}` +- `{user['field']}` → bracket notation with single quotes +- `{user["field"]}` → bracket notation with double quotes + +All user object fields are supported: `name`, `email`, `email_verified`, `exp`, `iat`, `fromReadmeKey`, `teammateUserId`, etc. + +## Tables + +The `mdxishTables` transformer converts JSX Table elements to markdown table nodes and re-parses markdown content in table cells. + +The old MDX pipeline relies on `remarkMdx` to convert the table and its markdown content into MDX JSX elements. Since mdxish does not use `remarkMdx`, we have to do it manually. The workaround is to parse cell contents through `remarkParse` and `remarkGfm` to convert them to MDX JSX elements. + +### Example + +```html +
+ + + + + + + + + + + + + + + + +
TypeExample
Bold**Bold text**
Italic*Italic text*
+``` + +This gets converted to a markdown `table` node where the cell containing `**Bold text**` is parsed into a `strong` element with a text node containing "Bold text". + +## HTMLBlocks + +The `mdxishHtmlBlocks` transformer converts `{`...`}` syntax to `html-block` MDAST nodes. The HTML string is stored in `data.hProperties.html` and passed to the React `HTMLBlock` component via the `html` prop during HAST→React conversion, ensuring compatibility with both the `mdxish` and `compile`+`run` pipelines. + +To prevent the markdown parser from incorrectly consuming ``}'; + * protectHTMLBlockContent(input) + * // Returns: '' + * ``` + */ +function protectHTMLBlockContent(content: string): string { + return content.replace( + /(]*>)\{\s*`((?:[^`\\]|\\.)*)`\s*\}(<\/HTMLBlock>)/g, + (_match, openTag: string, templateContent: string, closeTag: string) => { + const encoded = base64Encode(templateContent); + return `${openTag}${HTML_BLOCK_CONTENT_START}${encoded}${HTML_BLOCK_CONTENT_END}${closeTag}`; + }, + ); +} + +/** + * Replaces code blocks and inline code with placeholders to protect them from JSX processing. + * + * @param content + * @returns Object containing protected content and arrays of original code blocks + * @example + * ```typescript + * const input = 'Text with `inline code` and ```fenced block```'; + * protectCodeBlocks(input) + * // Returns: { + * // protectedCode: { + * // codeBlocks: ['```fenced block```'], + * // inlineCode: ['`inline code`'] + * // }, + * // protectedContent: 'Text with ___INLINE_CODE_0___ and ___CODE_BLOCK_0___' + * // } + * ``` + */ +function protectCodeBlocks(content: string): ProtectCodeBlocksResult { + const codeBlocks: string[] = []; + const inlineCode: string[] = []; + + let protectedContent = ''; + let remaining = content; + let codeBlockStart = remaining.indexOf('```'); + + while (codeBlockStart !== -1) { + protectedContent += remaining.slice(0, codeBlockStart); + remaining = remaining.slice(codeBlockStart); + + const codeBlockEnd = remaining.indexOf('```', 3); + if (codeBlockEnd === -1) { + break; + } + + const match = remaining.slice(0, codeBlockEnd + 3); + const index = codeBlocks.length; + codeBlocks.push(match); + protectedContent += `___CODE_BLOCK_${index}___`; + + remaining = remaining.slice(codeBlockEnd + 3); + codeBlockStart = remaining.indexOf('```'); + } + protectedContent += remaining; + + protectedContent = protectedContent.replace(/`[^`]+`/g, match => { + const index = inlineCode.length; + inlineCode.push(match); + return `___INLINE_CODE_${index}___`; + }); + + return { protectedCode: { codeBlocks, inlineCode }, protectedContent }; +} + +/** + * Removes JSX-style comments (e.g., { /* comment *\/ }) from content. + * + * @param content + * @returns Content with JSX comments removed + * @example + * ```typescript + * removeJSXComments('Text { /* comment *\/ } more text') + * // Returns: 'Text more text' + * ``` + */ +function removeJSXComments(content: string): string { + return content.replace(/\{\s*\/\*[^*]*(?:\*(?!\/)[^*]*)*\*\/\s*\}/g, ''); +} + +/** + * Extracts content between balanced braces, handling nested braces. + * + * @param content + * @param start + * @returns Object with extracted content and end position, or null if braces are unbalanced + * @example + * ```typescript + * const input = 'foo{bar{baz}qux}end'; + * extractBalancedBraces(input, 3) // start at position 3 (after '{') + * // Returns: { content: 'bar{baz}qux', end: 16 } + * ``` + */ +function extractBalancedBraces(content: string, start: number): { content: string; end: number } | null { + let depth = 1; + let pos = start; + + while (pos < content.length && depth > 0) { + const char = content[pos]; + if (char === '{') depth += 1; + else if (char === '}') depth -= 1; + pos += 1; + } + + if (depth !== 0) return null; + return { content: content.slice(start, pos - 1), end: pos }; +} + +/** + * Converts JSX attribute expressions (attribute={expression}) to HTML attributes (attribute="value"). + * Handles style objects (camelCase → kebab-case), className → class, and JSON stringifies objects. + * + * @param content + * @param context + * @returns Content with attribute expressions evaluated and converted to HTML attributes + * @example + * ```typescript + * const context = { baseUrl: 'https://example.com' }; + * const input = 'Link'; + * evaluateAttributeExpressions(input, context) + * // Returns: 'Link' + * ``` + */ +function evaluateAttributeExpressions(content: string, context: JSXContext): string { + const attrStartRegex = /(\w+)=\{/g; + let result = ''; + let lastEnd = 0; + let match = attrStartRegex.exec(content); + + while (match !== null) { + const attributeName = match[1]; + const braceStart = match.index + match[0].length; + + const extracted = extractBalancedBraces(content, braceStart); + if (extracted) { + const expression = extracted.content; + const fullMatchEnd = extracted.end; + + result += content.slice(lastEnd, match.index); + + try { + const evalResult = evaluateExpression(expression, context); + + if (typeof evalResult === 'object' && evalResult !== null) { + if (attributeName === 'style') { + const cssString = Object.entries(evalResult) + .map(([key, value]) => { + const cssKey = key.replace(/([A-Z])/g, '-$1').toLowerCase(); + return `${cssKey}: ${value}`; + }) + .join('; '); + result += `style="${cssString}"`; + } else { + result += `${attributeName}='${JSON.stringify(evalResult)}'`; + } + } else if (attributeName === 'className') { + result += `class="${evalResult}"`; + } else { + result += `${attributeName}="${evalResult}"`; + } + } catch (_error) { + result += content.slice(match.index, fullMatchEnd); + } + + lastEnd = fullMatchEnd; + attrStartRegex.lastIndex = fullMatchEnd; + } + match = attrStartRegex.exec(content); + } + result += content.slice(lastEnd); + return result; +} + +/** + * Restores code blocks and inline code by replacing placeholders with original content. + * + * @param content + * @param protectedCode + * @returns Content with all code blocks and inline code restored + * @example + * ```typescript + * const content = 'Text with ___INLINE_CODE_0___ and ___CODE_BLOCK_0___'; + * const protectedCode = { + * codeBlocks: ['```js\ncode\n```'], + * inlineCode: ['`inline`'] + * }; + * restoreCodeBlocks(content, protectedCode) + * // Returns: 'Text with `inline` and ```js\ncode\n```' + * ``` + */ +function restoreCodeBlocks(content: string, protectedCode: ProtectedCode): string { + let restored = content.replace(/___CODE_BLOCK_(\d+)___/g, (_match, index: string) => { + return protectedCode.codeBlocks[parseInt(index, 10)]; + }); + + restored = restored.replace(/___INLINE_CODE_(\d+)___/g, (_match, index: string) => { + return protectedCode.inlineCode[parseInt(index, 10)]; + }); + + return restored; +} + +/** + * Preprocesses JSX-like expressions in markdown before parsing. + * Inline expressions are handled separately; attribute expressions are processed here. + * + * @param content + * @param context + * @returns Preprocessed content ready for markdown parsing + */ +export function preprocessJSXExpressions(content: string, context: JSXContext = {}): string { + // Step 0: Base64 encode HTMLBlock content + let processed = protectHTMLBlockContent(content); + + // Step 1: Protect code blocks and inline code + const { protectedCode, protectedContent } = protectCodeBlocks(processed); + + // Step 2: Remove JSX comments + processed = removeJSXComments(protectedContent); + + // Step 3: Evaluate attribute expressions (JSX attribute syntax: href={baseUrl}) + // For inline expressions, we use a library to parse the expression & evaluate it later + // For attribute expressions, it was difficult to use a library to parse them, so do it manually + processed = evaluateAttributeExpressions(processed, context); + + // Step 4: Restore protected code blocks + processed = restoreCodeBlocks(processed, protectedCode); + + return processed; +} diff --git a/processor/transform/mdxish/variables-text.ts b/processor/transform/mdxish/variables-text.ts new file mode 100644 index 000000000..7997c06b1 --- /dev/null +++ b/processor/transform/mdxish/variables-text.ts @@ -0,0 +1,80 @@ +import type { Variable } from '../../../types'; +import type { Parent, Text } from 'mdast'; +import type { Plugin } from 'unified'; + +import { visit } from 'unist-util-visit'; + +import { NodeTypes } from '../../../enums'; + +/** + * Matches {user.} patterns: + * - {user.name} + * - {user.email} + * - {user['field']} + * - {user["field"]} + * + * Captures the field name in group 1 (dot notation) or group 2 (bracket notation) + */ +const USER_VAR_REGEX = /\{user\.(\w+)\}|\{user\[['"](\w+)['"]\]\}/g; + +/** + * A remark plugin that parses {user.} patterns from text nodes + * without requiring remarkMdx. Creates Variable nodes for runtime resolution. + * + * Supports any user field: name, email, email_verified, exp, iat, etc. + */ +const variablesTextTransformer: Plugin = () => tree => { + visit(tree, 'text', (node: Text, index, parent: Parent) => { + if (index === undefined || !parent) return; + + // Skip if inside inline code + if (parent.type === 'inlineCode') return; + + const text = node.value; + if (!text.includes('{user.') && !text.includes('{user[')) return; + + const matches = [...text.matchAll(USER_VAR_REGEX)]; + if (matches.length === 0) return; + + const parts: (Text | Variable)[] = []; + let lastIndex = 0; + + matches.forEach(match => { + const matchIndex = match.index ?? 0; + + // Add text before the match + if (matchIndex > lastIndex) { + parts.push({ type: 'text', value: text.slice(lastIndex, matchIndex) } satisfies Text); + } + + // Extract variable name from either capture group (dot or bracket notation) + const varName = match[1] || match[2]; + + // Create Variable node + parts.push({ + type: NodeTypes.variable, + data: { + hName: 'Variable', + hProperties: { name: varName }, + }, + value: match[0], + } satisfies Variable); + + lastIndex = matchIndex + match[0].length; + }); + + // Add remaining text after last match + if (lastIndex < text.length) { + parts.push({ type: 'text', value: text.slice(lastIndex) } satisfies Text); + } + + // Replace node with parts + if (parts.length > 1 || (parts.length === 1 && parts[0].type !== 'text')) { + parent.children.splice(index, 1, ...parts); + } + }); + + return tree; +}; + +export default variablesTextTransformer; diff --git a/processor/transform/variables.ts b/processor/transform/variables.ts index e63dca38b..29f5df4a3 100644 --- a/processor/transform/variables.ts +++ b/processor/transform/variables.ts @@ -6,7 +6,6 @@ import { visit } from 'unist-util-visit'; import { NodeTypes } from '../../enums'; - const variables = ({ asMdx } = { asMdx: true }): Transform => tree => { diff --git a/processor/utils.ts b/processor/utils.ts index ece9879ad..85d3c4451 100644 --- a/processor/utils.ts +++ b/processor/utils.ts @@ -122,6 +122,48 @@ export const isMDXEsm = (node: Node): node is MdxjsEsm => { return node.type === 'mdxjsEsm'; }; +/** + * Takes an HTML string and formats it for display in the editor. Removes leading/trailing newlines + * and unindents the HTML. + * + * @param {string} html - HTML content from template literal + * @returns {string} processed HTML + */ +export function formatHtmlForMdxish(html: string): string { + // Remove leading/trailing backticks if present, since they're used to keep the HTML + // from being parsed prematurely + let processed = html; + if (processed.startsWith('`') && processed.endsWith('`')) { + processed = processed.slice(1, -1); + } + // Removes the leading/trailing newlines + let cleaned = processed.replace(/^\s*\n|\n\s*$/g, ''); + + // Convert literal \n sequences to actual newlines BEFORE processing backticks + // This prevents the backtick unescaping regex from incorrectly matching \n sequences + cleaned = cleaned.replace(/\\n/g, '\n'); + + // Unescape backticks: \` -> ` (users escape backticks in template literals) + // Handle both cases: \` (adjacent) and \ followed by ` (split by markdown parser) + cleaned = cleaned.replace(/\\`/g, '`'); + + // Also handle case where backslash and backtick got separated by markdown parsing + // Pattern: backslash followed by any characters (but not \n which we already handled), then a backtick + // This handles cases like: \example` -> `example` (replacing \ with ` at start) + // Exclude \n sequences to avoid matching them incorrectly + cleaned = cleaned.replace(/\\([^`\\n]*?)`/g, '`$1`'); + + // Fix case where markdown parser consumed one backtick from triple backticks + // Pattern: `` followed by a word (like ``javascript) should be ```javascript + // This handles cases where code fences were parsed and one backtick was lost + cleaned = cleaned.replace(/<(\w+[^>]*)>``(\w+)/g, '<$1>```$2'); + + // Unescape dollar signs: \$ -> $ (users escape $ in template literals to prevent interpolation) + cleaned = cleaned.replace(/\\\$/g, '$'); + + return cleaned; +} + /** * Takes an HTML string and formats it for display in the editor. Removes leading/trailing newlines * and unindents the HTML. diff --git a/scripts/compare-mdx-mdxish.js b/scripts/compare-mdx-mdxish.js new file mode 100644 index 000000000..462d85bb8 --- /dev/null +++ b/scripts/compare-mdx-mdxish.js @@ -0,0 +1,539 @@ +// Script for comparing MDX vs MDXish performance on large documents +/* eslint-disable no-console */ +const { Blob } = require('node:buffer'); + +const rdmd = require('..'); + +/** + * Create a plain markdown document of approximately the target byte size + * Uses safe markdown that works with both MDX and MDXish + */ +const createLargeDoc = (targetBytes) => { + // Use only plain markdown features - no MDX expressions or JSX components + const plainMarkdown = ` +# Performance Test Document + +This is a large markdown document used for performance testing of MDX and MDXish processors. + +## Introduction + +Markdown is a lightweight markup language that you can use to add formatting elements to plaintext text documents. Created by John Gruber in 2004, Markdown is now one of the world's most popular markup languages. + +## Features + +Markdown supports various formatting features: + +- **Bold text** for emphasis +- *Italic text* for subtle emphasis +- \`Inline code\` for code snippets +- [Links](https://example.com) to external resources +- Images with alt text + +## Code Blocks + +Here are some code examples in different languages: + +\`\`\`javascript +function greet(name) { + return "Hello, " + name + "!"; +} + +console.log(greet("World")); +\`\`\` + +\`\`\`python +def greet(name): + return f"Hello, {name}!" + +print(greet("World")) +\`\`\` + +\`\`\`typescript +interface User { + id: number; + name: string; + email: string; +} + +function getUser(id: number): Promise { + return fetch(\`/api/users/\${id}\`).then(res => res.json()); +} +\`\`\` + +## Tables + +Markdown tables are useful for displaying structured data: + +| Feature | Status | Priority | +|---------|--------|----------| +| Feature A | Complete | High | +| Feature B | In Progress | Medium | +| Feature C | Planned | Low | + +| Language | Year | Creator | +|----------|------|---------| +| JavaScript | 1995 | Brendan Eich | +| Python | 1991 | Guido van Rossum | +| TypeScript | 2012 | Microsoft | + +## Lists + +### Ordered Lists + +1. First item +2. Second item +3. Third item + 1. Nested item one + 2. Nested item two +4. Fourth item + +### Unordered Lists + +- Item one +- Item two + - Nested bullet + - Another nested bullet +- Item three + +## Blockquotes + +> This is a blockquote. It can contain multiple paragraphs. +> +> And continue on multiple lines with proper formatting. + +> Another blockquote with **bold** and *italic* text inside. + +## Paragraphs + +This is a paragraph with some text. It contains multiple sentences to demonstrate paragraph formatting. Each sentence adds to the overall content and helps create a realistic document structure. + +Another paragraph follows with different content. This helps test how the processors handle multiple paragraphs and spacing between them. + +## Headings + +### Level 3 Heading + +#### Level 4 Heading + +##### Level 5 Heading + +###### Level 6 Heading + +## More Content + +This section contains additional content to increase the document size. The content includes various markdown elements to test comprehensive processing capabilities. + +### Subsection One + +Content in subsection one with various formatting elements. + +### Subsection Two + +Content in subsection two with code examples and lists. + +### Subsection Three + +Content in subsection three with tables and blockquotes. +`; + + let doc = ''; + let repetition = 0; + const targetSize = targetBytes; + + while (new Blob([doc]).size < targetSize) { + // Add repetition number to make each section unique + const numberedContent = plainMarkdown.replace(/# Performance Test Document/g, + `# Performance Test Document - Part ${repetition + 1}`); + doc += numberedContent; + doc += '\n\n---\n\n'; + repetition += 1; + + // Safety check + if (repetition > 500) break; + } + + return doc; +}; + +// https://stackoverflow.com/a/14919494 +function humanFileSize(bytesInput, si = false, dp = 1) { + const thresh = si ? 1000 : 1024; + const bytes = bytesInput; + + if (Math.abs(bytes) < thresh) { + return `${bytes} B`; + } + + const units = si + ? ['kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] + : ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']; + let u = -1; + const r = 10 ** dp; + let size = bytes; + + do { + size /= thresh; + u += 1; + } while (Math.round(Math.abs(size) * r) / r >= thresh && u < units.length - 1); + + return `${size.toFixed(dp)} ${units[u]}`; +} + +/** + * Benchmark a function and return timing and result info + */ +function benchmark(name, fn) { + try { + const start = Date.now(); + const result = fn(); + const duration = Date.now() - start; + + // Calculate result size + let resultSize = 0; + if (typeof result === 'string') { + resultSize = new Blob([result]).size; + } else if (result && typeof result === 'object') { + // For HAST tree, estimate size from JSON + try { + resultSize = new Blob([JSON.stringify(result)]).size; + } catch { + // If JSON.stringify fails, estimate from object keys + resultSize = Object.keys(result).length * 100; + } + } + + return { + name, + duration, + resultSize, + result, + error: null, + }; + } catch (error) { + return { + name, + duration: 0, + resultSize: 0, + result: null, + error: error.message || String(error), + }; + } +} + +/** + * Format a table row + */ +function formatTableRow(columns, widths) { + const padded = columns.map((col, i) => String(col).padEnd(widths[i])); + return `| ${padded.join(' | ')} |`; +} + +/** + * Print a formatted table + */ +function printTable(headers, rows) { + const widths = headers.map((h, i) => { + const maxContent = Math.max( + h.length, + ...rows.map(row => String(row[i] || '').length) + ); + return Math.max(maxContent, 10); + }); + + const separator = `|${widths.map(w => '-'.repeat(w + 2)).join('|')}|`; + + console.log(formatTableRow(headers, widths)); + console.log(separator); + rows.forEach(row => { + console.log(formatTableRow(row, widths)); + }); +} + +// Document sizes to test +const documentSizes = [ + { name: 'Large', size: 50 * 1024, description: '~50KB' }, + { name: 'Extra Large', size: 400 * 1024, description: '~400KB' }, + { name: 'Extra Extra Large', size: 4 * 1024 * 1024, description: '~4MB' }, +]; + +/** + * Run benchmarks for a specific document size and return results + */ +function runBenchmarkForSize(sizeConfig) { + const { name, size, description } = sizeConfig; + const doc = createLargeDoc(size); + const actualSize = new Blob([doc]).size; + + // Warm up runs + for (let i = 0; i < 3; i += 1) { + try { + rdmd.mdast(doc); + rdmd.mdxish(doc); + const tree = rdmd.mdast(doc); + rdmd.mdx(tree); + rdmd.mix(doc); + } catch { + // Ignore warm-up errors + } + } + + // Single run benchmarks + const mdastParseResult = benchmark('MDAST parse', () => { + return rdmd.mdast(doc); + }); + + const mdxishParseResult = benchmark('MDXish parse (HAST)', () => { + return rdmd.mdxish(doc); + }); + + const mdxResult = benchmark('MDX (via mdast)', () => { + const tree = rdmd.mdast(doc); + return rdmd.mdx(tree); + }); + + const mdxishStringResult = benchmark('MDXish (via mix - HTML)', () => { + return rdmd.mix(doc); + }); + + // Multiple iterations for averages + const iterations = 10; + const mdastParseTimes = []; + const mdxishParseTimes = []; + const mdxStringTimes = []; + const mdxishStringTimes = []; + + for (let i = 0; i < iterations; i += 1) { + const mdastParse = benchmark('MDAST parse', () => { + return rdmd.mdast(doc); + }); + + const mdxishParse = benchmark('MDXish parse', () => { + return rdmd.mdxish(doc); + }); + + const mdxString = benchmark('MDX string', () => { + const tree = rdmd.mdast(doc); + return rdmd.mdx(tree); + }); + + const mdxishString = benchmark('MDXish string', () => { + return rdmd.mix(doc); + }); + + if (!mdastParse.error) { + mdastParseTimes.push(mdastParse.duration); + } + if (!mdxishParse.error) { + mdxishParseTimes.push(mdxishParse.duration); + } + if (!mdxString.error) { + mdxStringTimes.push(mdxString.duration); + } + if (!mdxishString.error) { + mdxishStringTimes.push(mdxishString.duration); + } + } + + // Calculate averages + const avgMdast = mdastParseTimes.length > 0 + ? mdastParseTimes.reduce((a, b) => a + b, 0) / mdastParseTimes.length + : 0; + const avgMdxish = mdxishParseTimes.length > 0 + ? mdxishParseTimes.reduce((a, b) => a + b, 0) / mdxishParseTimes.length + : 0; + const avgMdx = mdxStringTimes.length > 0 + ? mdxStringTimes.reduce((a, b) => a + b, 0) / mdxStringTimes.length + : 0; + const avgMdxishString = mdxishStringTimes.length > 0 + ? mdxishStringTimes.reduce((a, b) => a + b, 0) / mdxishStringTimes.length + : 0; + + return { + name, + description, + documentSize: actualSize, + singleRun: { + parseOnly: { + mdast: mdastParseResult, + mdxish: mdxishParseResult, + }, + stringOutput: { + mdx: mdxResult, + mdxish: mdxishStringResult, + }, + }, + averages: { + parseOnly: { + mdast: { + avg: avgMdast, + min: mdastParseTimes.length > 0 ? Math.min(...mdastParseTimes) : 0, + max: mdastParseTimes.length > 0 ? Math.max(...mdastParseTimes) : 0, + successful: mdastParseTimes.length, + total: iterations, + }, + mdxish: { + avg: avgMdxish, + min: mdxishParseTimes.length > 0 ? Math.min(...mdxishParseTimes) : 0, + max: mdxishParseTimes.length > 0 ? Math.max(...mdxishParseTimes) : 0, + successful: mdxishParseTimes.length, + total: iterations, + }, + }, + stringOutput: { + mdx: { + avg: avgMdx, + min: mdxStringTimes.length > 0 ? Math.min(...mdxStringTimes) : 0, + max: mdxStringTimes.length > 0 ? Math.max(...mdxStringTimes) : 0, + successful: mdxStringTimes.length, + total: iterations, + }, + mdxish: { + avg: avgMdxishString, + min: mdxishStringTimes.length > 0 ? Math.min(...mdxishStringTimes) : 0, + max: mdxishStringTimes.length > 0 ? Math.max(...mdxishStringTimes) : 0, + successful: mdxishStringTimes.length, + total: iterations, + }, + }, + }, + }; +} + +/** + * Print all results in table format + */ +function printResults(allResults) { + console.log(`\n${'='.repeat(100)}`); + console.log('MDX vs MDXish Performance Comparison Results'); + console.log('='.repeat(100)); + + // Single Run Results - Parse-only + console.log('\nšŸ“Š Single Run Results: Parse-only (AST trees)'); + console.log('-'.repeat(100)); + const parseOnlySingleHeaders = ['Document Size', 'MDAST (ms)', 'MDXish (ms)', 'Faster', 'Speedup', 'Output Size (MDAST)', 'Output Size (MDXish)']; + const parseOnlySingleRows = allResults.map(result => { + const mdast = result.singleRun.parseOnly.mdast; + const mdxish = result.singleRun.parseOnly.mdxish; + const faster = mdast.error || mdxish.error + ? 'N/A' + : mdast.duration > mdxish.duration ? 'MDXish' : 'MDAST'; + const speedup = mdast.error || mdxish.error + ? 'N/A' + : `${(Math.max(mdast.duration, mdxish.duration) / Math.min(mdast.duration, mdxish.duration)).toFixed(2)}x`; + + return [ + `${result.name} (${result.description})`, + mdast.error ? 'ERROR' : mdast.duration.toFixed(2), + mdxish.error ? 'ERROR' : mdxish.duration.toFixed(2), + faster, + speedup, + mdast.error ? 'N/A' : humanFileSize(mdast.resultSize), + mdxish.error ? 'N/A' : humanFileSize(mdxish.resultSize), + ]; + }); + printTable(parseOnlySingleHeaders, parseOnlySingleRows); + + // Single Run Results - String Output + console.log('\nšŸ“Š Single Run Results: String Outputs'); + console.log('-'.repeat(100)); + const stringSingleHeaders = ['Document Size', 'MDX (ms)', 'MDXish (ms)', 'Faster', 'Speedup', 'Output Size (MDX)', 'Output Size (MDXish)']; + const stringSingleRows = allResults.map(result => { + const mdx = result.singleRun.stringOutput.mdx; + const mdxish = result.singleRun.stringOutput.mdxish; + const faster = mdx.error || mdxish.error + ? 'N/A' + : mdx.duration > mdxish.duration ? 'MDXish' : 'MDX'; + const speedup = mdx.error || mdxish.error + ? 'N/A' + : `${(Math.max(mdx.duration, mdxish.duration) / Math.min(mdx.duration, mdxish.duration)).toFixed(2)}x`; + + return [ + `${result.name} (${result.description})`, + mdx.error ? 'ERROR' : mdx.duration.toFixed(2), + mdxish.error ? 'ERROR' : mdxish.duration.toFixed(2), + faster, + speedup, + mdx.error ? 'N/A' : humanFileSize(mdx.resultSize), + mdxish.error ? 'N/A' : humanFileSize(mdxish.resultSize), + ]; + }); + printTable(stringSingleHeaders, stringSingleRows); + + // Average Results - Parse-only + console.log('\nšŸ“Š Average Results (10 iterations): Parse-only (AST trees)'); + console.log('-'.repeat(100)); + const parseOnlyAvgHeaders = ['Document Size', 'MDAST Avg (ms)', 'MDXish Avg (ms)', 'MDAST Min-Max', 'MDXish Min-Max', 'Faster', 'Speedup', 'Time Saved (ms)']; + const parseOnlyAvgRows = allResults.map(result => { + const mdast = result.averages.parseOnly.mdast; + const mdxish = result.averages.parseOnly.mdxish; + const faster = mdast.avg === 0 || mdxish.avg === 0 + ? 'N/A' + : mdast.avg > mdxish.avg ? 'MDXish' : 'MDAST'; + const speedup = mdast.avg === 0 || mdxish.avg === 0 + ? 'N/A' + : `${(Math.max(mdast.avg, mdxish.avg) / Math.min(mdast.avg, mdxish.avg)).toFixed(2)}x`; + const timeSaved = mdast.avg === 0 || mdxish.avg === 0 + ? 'N/A' + : Math.abs(mdast.avg - mdxish.avg).toFixed(2); + + return [ + `${result.name} (${result.description})`, + mdast.avg === 0 ? 'N/A' : mdast.avg.toFixed(2), + mdxish.avg === 0 ? 'N/A' : mdxish.avg.toFixed(2), + mdast.avg === 0 ? 'N/A' : `${mdast.min}-${mdast.max}`, + mdxish.avg === 0 ? 'N/A' : `${mdxish.min}-${mdxish.max}`, + faster, + speedup, + timeSaved, + ]; + }); + printTable(parseOnlyAvgHeaders, parseOnlyAvgRows); + + // Average Results - String Output + console.log('\nšŸ“Š Average Results (10 iterations): String Outputs'); + console.log('-'.repeat(100)); + const stringAvgHeaders = ['Document Size', 'MDX Avg (ms)', 'MDXish Avg (ms)', 'MDX Min-Max', 'MDXish Min-Max', 'Faster', 'Speedup', 'Time Saved (ms)']; + const stringAvgRows = allResults.map(result => { + const mdx = result.averages.stringOutput.mdx; + const mdxish = result.averages.stringOutput.mdxish; + const faster = mdx.avg === 0 || mdxish.avg === 0 + ? 'N/A' + : mdx.avg > mdxish.avg ? 'MDXish' : 'MDX'; + const speedup = mdx.avg === 0 || mdxish.avg === 0 + ? 'N/A' + : `${(Math.max(mdx.avg, mdxish.avg) / Math.min(mdx.avg, mdxish.avg)).toFixed(2)}x`; + const timeSaved = mdx.avg === 0 || mdxish.avg === 0 + ? 'N/A' + : Math.abs(mdx.avg - mdxish.avg).toFixed(2); + + return [ + `${result.name} (${result.description})`, + mdx.avg === 0 ? 'N/A' : mdx.avg.toFixed(2), + mdxish.avg === 0 ? 'N/A' : mdxish.avg.toFixed(2), + mdx.avg === 0 ? 'N/A' : `${mdx.min}-${mdx.max}`, + mdxish.avg === 0 ? 'N/A' : `${mdxish.min}-${mdxish.max}`, + faster, + speedup, + timeSaved, + ]; + }); + printTable(stringAvgHeaders, stringAvgRows); + + console.log(`\n${'='.repeat(100)}`); +} + +// Main execution +console.log('MDX vs MDXish Performance Comparison'); +console.log('Running benchmarks... (this may take a while)'); + +// Run benchmarks for each document size and collect results +const allResults = documentSizes.map((sizeConfig, index) => { + process.stdout.write(`\rProgress: ${index + 1}/${documentSizes.length} - ${sizeConfig.name}...`); + return runBenchmarkForSize(sizeConfig); +}); + +process.stdout.write(`\r${' '.repeat(50)}\r`); // Clear progress line + +// Print all results +printResults(allResults); + diff --git a/types.d.ts b/types.d.ts index 32138e0f7..1f4f54ef3 100644 --- a/types.d.ts +++ b/types.d.ts @@ -85,6 +85,7 @@ interface HTMLBlock extends Node { hProperties: { html: string; runScripts?: boolean | string; + safeMode?: string; }; }; type: NodeTypes.htmlBlock; diff --git a/utils/common-html-words.ts b/utils/common-html-words.ts new file mode 100644 index 000000000..2dd2379c7 --- /dev/null +++ b/utils/common-html-words.ts @@ -0,0 +1,93 @@ +import htmlTags from 'html-tags'; +import reactHtmlAttributes from 'react-html-attributes'; +import { allProps as reactNativeStylingProps } from 'react-native-known-styling-properties'; + +/** + * Extract word boundaries from camelCase strings (e.g., "borderWidth" -> ["border", "width"]) + */ +function extractWordBoundaries(camelCaseStr: string): string[] { + return camelCaseStr + .replace(/([A-Z])/g, ' $1') + .toLowerCase() + .trim() + .split(/\s+/) + .filter(word => word.length > 0); +} + +/** + * Get all unique word boundaries from an array of camelCase property names. + * Filters out single-letter words to avoid false matches in smartCamelCase. + */ +function getWordBoundariesFromProps(props: string[]): string[] { + const boundaries = new Set(); + props.forEach(prop => { + extractWordBoundaries(prop).forEach(word => { + // Filter out single-letter words to prevent false matches (e.g., "d" matching in "data") + // Keep meaningful 2+ character words + if (word.length >= 2) { + boundaries.add(word); + } + }); + }); + return Array.from(boundaries).sort(); +} + +/** + * React HTML element props word boundaries (e.g., "on", "data", "aria", "accept", "auto") + * Extracted from react-html-attributes package + */ +export const REACT_HTML_PROP_BOUNDARIES = getWordBoundariesFromProps(reactHtmlAttributes['*'] || []); + +/** + * CSS style property word boundaries (e.g., "border", "margin", "padding", "flex", "align") + * Extracted from react-native-known-styling-properties package + */ +export const CSS_STYLE_PROP_BOUNDARIES = getWordBoundariesFromProps(reactNativeStylingProps as string[]); + +/** + * Custom component prop word boundaries not in React HTML or CSS boundaries. + */ +export const CUSTOM_PROP_BOUNDARIES = [ + 'alt', + 'attribute', + 'attributes', + 'buttons', + 'caption', + 'collection', + 'columns', + 'copy', + 'dark', + 'data', + 'depth', + 'download', + 'embed', + 'empty', + 'favicon', + 'flow', + 'iframe', + 'image', + 'layout', + 'lazy', + 'meta', + 'provider', + 'run', + 'safe', + 'scripts', + 'tag', + 'term', + 'terms', + 'theme', + 'url', + 'value', +]; + +/** + * Tags that should be passed through and handled at runtime (not by the mdxish plugin) + */ +export const RUNTIME_COMPONENT_TAGS = new Set(['Variable', 'variable']); + +/** + * Standard HTML tags that should never be treated as custom components. + * Uses the html-tags package, converted to a Set for efficient lookups. + */ +export const STANDARD_HTML_TAGS = new Set(htmlTags) as Set;