diff --git a/scripts/notionClient.test.ts b/scripts/notionClient.test.ts index 0b0a58f1..e1a86b45 100644 --- a/scripts/notionClient.test.ts +++ b/scripts/notionClient.test.ts @@ -38,7 +38,7 @@ vi.mock("chalk", () => ({ describe("notionClient", () => { let mockClient: any; - let mockN2M: any; + let notionToMarkdownInstances: any[]; let originalEnv: NodeJS.ProcessEnv; let consoleMocks: ReturnType; @@ -71,12 +71,6 @@ describe("notionClient", () => { blocks: { children: { list: vi.fn() }, append: vi.fn(), delete: vi.fn() }, }; - // Create mock NotionToMarkdown - mockN2M = { - pageToMarkdown: vi.fn(), - toMarkdownString: vi.fn(), - }; - // Set up constructor mocks - create a proper constructor function const MockClientClass = vi.fn().mockImplementation(function ( this: any, @@ -85,11 +79,39 @@ describe("notionClient", () => { return mockClient; }); + notionToMarkdownInstances = []; + const MockNotionToMarkdownClass = vi.fn().mockImplementation(function ( this: any, config: any ) { - return mockN2M; + const instance: any = { + pageToMarkdown: vi.fn(), + toMarkdownString: vi.fn(), + customTransformers: {} as Record, + }; + + instance.blockToMarkdown = vi.fn(async function (this: any, block: any) { + const transformer = this.customTransformers?.[block.type]; + + if (transformer) { + throw new Error("paragraph transformer recursion"); + } + + return `default:${block.id ?? ""}`; + }); + + instance.setCustomTransformer = vi.fn(function ( + this: any, + type: string, + transformer: unknown + ) { + this.customTransformers[type] = transformer; + return this; + }); + + notionToMarkdownInstances.push(instance); + return instance; }); // Replace the Client and NotionToMarkdown with our mocks @@ -176,6 +198,63 @@ describe("notionClient", () => { // Assert expect(DATABASE_ID).toBe("exported-database-id"); }); + + it("should register a spacer transformer for empty paragraph blocks", async () => { + await import("./notionClient"); + + expect(notionToMarkdownInstances.length).toBe(1); + + const [primaryN2M] = notionToMarkdownInstances; + + expect(primaryN2M.setCustomTransformer).toHaveBeenCalledWith( + "paragraph", + expect.any(Function) + ); + + const transformer = primaryN2M.setCustomTransformer.mock.calls.find( + (call: any[]) => call[0] === "paragraph" + )?.[1]; + + expect(typeof transformer).toBe("function"); + + const emptyParagraph = { + id: "empty", + type: "paragraph", + has_children: false, + paragraph: { + rich_text: [], + }, + } as any; + + const spacerResult = await transformer(emptyParagraph); + expect(typeof spacerResult).toBe("string"); + expect(spacerResult).toContain("notion-spacer"); + expect(primaryN2M.blockToMarkdown).not.toHaveBeenCalled(); + + const populatedParagraph = { + id: "content", + type: "paragraph", + has_children: false, + paragraph: { + rich_text: [ + { + type: "text", + text: { content: "Hello" }, + plain_text: "Hello", + }, + ], + }, + } as any; + + primaryN2M.blockToMarkdown.mockClear(); + + const markdownResult = await transformer(populatedParagraph); + expect(primaryN2M.blockToMarkdown).toHaveBeenCalledWith( + populatedParagraph + ); + expect(markdownResult).toBe(`default:${populatedParagraph.id}`); + expect(primaryN2M.customTransformers.paragraph).toBe(transformer); + }); }); describe("enhancedNotion.databasesQuery", () => { diff --git a/scripts/notionClient.ts b/scripts/notionClient.ts index f442b92a..72967d5a 100644 --- a/scripts/notionClient.ts +++ b/scripts/notionClient.ts @@ -1,6 +1,10 @@ import dotenv from "dotenv"; import { Client } from "@notionhq/client"; import { NotionToMarkdown } from "notion-to-md"; +import type { + BlockObjectResponse, + ParagraphBlockObjectResponse, +} from "@notionhq/client/build/src/api-endpoints"; import chalk from "chalk"; import { perfTelemetry } from "./perfTelemetry"; import { @@ -8,6 +12,10 @@ import { setCircuitBreakerCheck, } from "./notion-fetch/requestScheduler"; +type MarkdownBlock = Awaited< + ReturnType["blockToMarkdown"]> +>; + dotenv.config(); if (!process.env.NOTION_API_KEY) { @@ -250,6 +258,78 @@ const notion = new Client({ const n2m = new NotionToMarkdown({ notionClient: notion }); +type BlockToMarkdown = InstanceType["blockToMarkdown"]; +const defaultParagraphToMarkdown = n2m.blockToMarkdown.bind( + n2m +) as BlockToMarkdown; + +const NOTION_SPACER_HTML = + ''; + +function hasVisibleParagraphContent( + block: ParagraphBlockObjectResponse +): boolean { + const richText = block.paragraph?.rich_text; + if (!Array.isArray(richText) || richText.length === 0) { + return false; + } + + return richText.some((item) => { + if (item.type === "text") { + const content = item.text?.content ?? item.plain_text ?? ""; + return content.trim().length > 0; + } + + if (item.type === "equation") { + return (item.equation?.expression ?? "").trim().length > 0; + } + + const plainText = item.plain_text ?? ""; + return plainText.trim().length > 0; + }); +} + +const paragraphTransformer: BlockToMarkdown = async (block) => { + const paragraphBlock = block as ParagraphBlockObjectResponse; + + if (paragraphBlock?.type !== "paragraph") { + return defaultParagraphToMarkdown(block as BlockObjectResponse); + } + + const hasChildren = paragraphBlock.has_children === true; + const hasContent = hasVisibleParagraphContent(paragraphBlock); + + if (!hasChildren && !hasContent) { + return NOTION_SPACER_HTML as MarkdownBlock; + } + + const customTransformers = ( + n2m as unknown as { + customTransformers?: Record; + } + ).customTransformers; + + let previousParagraphTransformer: BlockToMarkdown | undefined; + + if (customTransformers) { + previousParagraphTransformer = customTransformers.paragraph; + + if (previousParagraphTransformer) { + delete customTransformers.paragraph; + } + } + + try { + return defaultParagraphToMarkdown(paragraphBlock as BlockObjectResponse); + } finally { + if (customTransformers && previousParagraphTransformer) { + customTransformers.paragraph = previousParagraphTransformer; + } + } +}; + +n2m.setCustomTransformer("paragraph", paragraphTransformer); + export const DATABASE_ID = resolvedDatabaseId; // For v5 API compatibility - export data source ID diff --git a/scripts/test-utils/mocks.ts b/scripts/test-utils/mocks.ts index 04394f18..0912c81c 100644 --- a/scripts/test-utils/mocks.ts +++ b/scripts/test-utils/mocks.ts @@ -27,6 +27,8 @@ export const createMockNotionClient = () => ({ export const createMockNotionToMarkdown = () => ({ pageToMarkdown: vi.fn(), toMarkdownString: vi.fn(), + setCustomTransformer: vi.fn(), + blockToMarkdown: vi.fn(), }); /** diff --git a/src/css/custom.css b/src/css/custom.css index 4e0ec226..89c1f313 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -314,3 +314,21 @@ details { [data-theme="dark"] details { background-color: rgba(229, 228, 226, 0.15) !important; } + +/* Preserve intentional gaps from Notion empty paragraph spacers */ +:root { + --notion-paragraph-gap-base: var(--ifm-paragraph-margin-bottom, 1.25rem); + --notion-spacer-gap: clamp( + calc(var(--notion-paragraph-gap-base) * 0.55), + calc(var(--notion-paragraph-gap-base) * 0.65 + 0.2rem), + calc(var(--notion-paragraph-gap-base) * 0.75 + 0.25rem) + ); +} + +.notion-spacer { + display: block; + width: 100%; + height: var(--notion-spacer-gap); + margin: 0; + pointer-events: none; +}