From d023dc696f3ad117d905a7870242ebebe8bda2db Mon Sep 17 00:00:00 2001 From: luandro Date: Wed, 5 Nov 2025 15:55:37 -0300 Subject: [PATCH 1/3] fix(notion): refine spacer transformer and spacing --- scripts/notionClient.test.ts | 56 ++++++++++++++++++++++++++++++++++ scripts/notionClient.ts | 58 ++++++++++++++++++++++++++++++++++++ scripts/test-utils/mocks.ts | 2 ++ src/css/custom.css | 18 +++++++++++ 4 files changed, 134 insertions(+) diff --git a/scripts/notionClient.test.ts b/scripts/notionClient.test.ts index 0b0a58f1..1bc875c6 100644 --- a/scripts/notionClient.test.ts +++ b/scripts/notionClient.test.ts @@ -75,6 +75,8 @@ describe("notionClient", () => { mockN2M = { pageToMarkdown: vi.fn(), toMarkdownString: vi.fn(), + setCustomTransformer: vi.fn(), + blockToMarkdown: vi.fn().mockResolvedValue({ parent: "", children: [] }), }; // Set up constructor mocks - create a proper constructor function @@ -176,6 +178,60 @@ describe("notionClient", () => { // Assert expect(DATABASE_ID).toBe("exported-database-id"); }); + + it("should register a spacer transformer for empty paragraph blocks", async () => { + await import("./notionClient"); + + expect(mockN2M.setCustomTransformer).toHaveBeenCalledWith( + "paragraph", + expect.any(Function) + ); + + const transformer = mockN2M.setCustomTransformer.mock.calls.find( + (call) => call[0] === "paragraph" + )?.[1]; + + expect(typeof transformer).toBe("function"); + + const emptyParagraph = { + id: "empty", + type: "paragraph", + has_children: false, + paragraph: { + rich_text: [], + }, + } as any; + + const spacerResult = await transformer(emptyParagraph); + expect(spacerResult).toEqual({ + parent: expect.stringContaining("notion-spacer"), + children: [], + }); + expect(mockN2M.blockToMarkdown).not.toHaveBeenCalled(); + + const populatedParagraph = { + id: "content", + type: "paragraph", + has_children: false, + paragraph: { + rich_text: [ + { + type: "text", + text: { content: "Hello" }, + plain_text: "Hello", + }, + ], + }, + } as any; + + mockN2M.blockToMarkdown.mockClear(); + const expectedMarkdown = { parent: "Hello", children: [] }; + mockN2M.blockToMarkdown.mockResolvedValueOnce(expectedMarkdown); + + const markdownResult = await transformer(populatedParagraph); + expect(mockN2M.blockToMarkdown).toHaveBeenCalledWith(populatedParagraph); + expect(markdownResult).toBe(expectedMarkdown); + }); }); describe("enhancedNotion.databasesQuery", () => { diff --git a/scripts/notionClient.ts b/scripts/notionClient.ts index f442b92a..e77d4bf8 100644 --- a/scripts/notionClient.ts +++ b/scripts/notionClient.ts @@ -1,6 +1,10 @@ import dotenv from "dotenv"; import { Client } from "@notionhq/client"; import { NotionToMarkdown } from "notion-to-md"; +import type { + BlockObjectResponse, + ParagraphBlockObjectResponse, +} from "@notionhq/client/build/src/api-endpoints"; import chalk from "chalk"; import { perfTelemetry } from "./perfTelemetry"; import { @@ -8,6 +12,10 @@ import { setCircuitBreakerCheck, } from "./notion-fetch/requestScheduler"; +type MarkdownBlock = Awaited< + ReturnType["blockToMarkdown"]> +>; + dotenv.config(); if (!process.env.NOTION_API_KEY) { @@ -249,6 +257,56 @@ const notion = new Client({ }); const n2m = new NotionToMarkdown({ notionClient: notion }); +type BlockToMarkdown = InstanceType["blockToMarkdown"]; +const defaultParagraphToMarkdown = n2m.blockToMarkdown.bind( + n2m +) as BlockToMarkdown; + +const NOTION_SPACER_HTML = + ''; + +function hasVisibleParagraphContent( + block: ParagraphBlockObjectResponse +): boolean { + const richText = block.paragraph?.rich_text; + if (!Array.isArray(richText) || richText.length === 0) { + return false; + } + + return richText.some((item) => { + if (item.type === "text") { + const content = item.text?.content ?? item.plain_text ?? ""; + return content.trim().length > 0; + } + + if (item.type === "equation") { + return (item.equation?.expression ?? "").trim().length > 0; + } + + const plainText = item.plain_text ?? ""; + return plainText.trim().length > 0; + }); +} + +n2m.setCustomTransformer("paragraph", async (block) => { + const paragraphBlock = block as ParagraphBlockObjectResponse; + + if (paragraphBlock?.type !== "paragraph") { + return defaultParagraphToMarkdown(block as BlockObjectResponse); + } + + const hasChildren = paragraphBlock.has_children === true; + const hasContent = hasVisibleParagraphContent(paragraphBlock); + + if (!hasChildren && !hasContent) { + return { + parent: NOTION_SPACER_HTML, + children: [], + } as MarkdownBlock; + } + + return defaultParagraphToMarkdown(paragraphBlock as BlockObjectResponse); +}); export const DATABASE_ID = resolvedDatabaseId; diff --git a/scripts/test-utils/mocks.ts b/scripts/test-utils/mocks.ts index 04394f18..0912c81c 100644 --- a/scripts/test-utils/mocks.ts +++ b/scripts/test-utils/mocks.ts @@ -27,6 +27,8 @@ export const createMockNotionClient = () => ({ export const createMockNotionToMarkdown = () => ({ pageToMarkdown: vi.fn(), toMarkdownString: vi.fn(), + setCustomTransformer: vi.fn(), + blockToMarkdown: vi.fn(), }); /** diff --git a/src/css/custom.css b/src/css/custom.css index 4e0ec226..89c1f313 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -314,3 +314,21 @@ details { [data-theme="dark"] details { background-color: rgba(229, 228, 226, 0.15) !important; } + +/* Preserve intentional gaps from Notion empty paragraph spacers */ +:root { + --notion-paragraph-gap-base: var(--ifm-paragraph-margin-bottom, 1.25rem); + --notion-spacer-gap: clamp( + calc(var(--notion-paragraph-gap-base) * 0.55), + calc(var(--notion-paragraph-gap-base) * 0.65 + 0.2rem), + calc(var(--notion-paragraph-gap-base) * 0.75 + 0.25rem) + ); +} + +.notion-spacer { + display: block; + width: 100%; + height: var(--notion-spacer-gap); + margin: 0; + pointer-events: none; +} From 7fcea7c8117e1ae00abd28e16f683cc0e9df4ee8 Mon Sep 17 00:00:00 2001 From: luandro Date: Wed, 5 Nov 2025 23:12:50 -0300 Subject: [PATCH 2/3] fix(notion): prevent recursive paragraph fallback --- scripts/notionClient.test.ts | 59 ++++++++++++++++++++++-------------- scripts/notionClient.ts | 17 ++++++----- 2 files changed, 46 insertions(+), 30 deletions(-) diff --git a/scripts/notionClient.test.ts b/scripts/notionClient.test.ts index 1bc875c6..b3678125 100644 --- a/scripts/notionClient.test.ts +++ b/scripts/notionClient.test.ts @@ -38,7 +38,7 @@ vi.mock("chalk", () => ({ describe("notionClient", () => { let mockClient: any; - let mockN2M: any; + let notionToMarkdownInstances: any[]; let originalEnv: NodeJS.ProcessEnv; let consoleMocks: ReturnType; @@ -71,14 +71,6 @@ describe("notionClient", () => { blocks: { children: { list: vi.fn() }, append: vi.fn(), delete: vi.fn() }, }; - // Create mock NotionToMarkdown - mockN2M = { - pageToMarkdown: vi.fn(), - toMarkdownString: vi.fn(), - setCustomTransformer: vi.fn(), - blockToMarkdown: vi.fn().mockResolvedValue({ parent: "", children: [] }), - }; - // Set up constructor mocks - create a proper constructor function const MockClientClass = vi.fn().mockImplementation(function ( this: any, @@ -87,11 +79,30 @@ describe("notionClient", () => { return mockClient; }); + notionToMarkdownInstances = []; + const MockNotionToMarkdownClass = vi.fn().mockImplementation(function ( this: any, config: any ) { - return mockN2M; + const instance: any = { + pageToMarkdown: vi.fn(), + toMarkdownString: vi.fn(), + customTransformers: {} as Record, + blockToMarkdown: vi.fn().mockResolvedValue(""), + }; + + instance.setCustomTransformer = vi.fn(function ( + this: any, + type: string, + transformer: unknown + ) { + this.customTransformers[type] = transformer; + return this; + }); + + notionToMarkdownInstances.push(instance); + return instance; }); // Replace the Client and NotionToMarkdown with our mocks @@ -182,13 +193,17 @@ describe("notionClient", () => { it("should register a spacer transformer for empty paragraph blocks", async () => { await import("./notionClient"); - expect(mockN2M.setCustomTransformer).toHaveBeenCalledWith( + expect(notionToMarkdownInstances.length).toBeGreaterThanOrEqual(2); + + const [primaryN2M, fallbackN2M] = notionToMarkdownInstances; + + expect(primaryN2M.setCustomTransformer).toHaveBeenCalledWith( "paragraph", expect.any(Function) ); - const transformer = mockN2M.setCustomTransformer.mock.calls.find( - (call) => call[0] === "paragraph" + const transformer = primaryN2M.setCustomTransformer.mock.calls.find( + (call: any[]) => call[0] === "paragraph" )?.[1]; expect(typeof transformer).toBe("function"); @@ -203,11 +218,9 @@ describe("notionClient", () => { } as any; const spacerResult = await transformer(emptyParagraph); - expect(spacerResult).toEqual({ - parent: expect.stringContaining("notion-spacer"), - children: [], - }); - expect(mockN2M.blockToMarkdown).not.toHaveBeenCalled(); + expect(typeof spacerResult).toBe("string"); + expect(spacerResult).toContain("notion-spacer"); + expect(fallbackN2M.blockToMarkdown).not.toHaveBeenCalled(); const populatedParagraph = { id: "content", @@ -224,12 +237,14 @@ describe("notionClient", () => { }, } as any; - mockN2M.blockToMarkdown.mockClear(); - const expectedMarkdown = { parent: "Hello", children: [] }; - mockN2M.blockToMarkdown.mockResolvedValueOnce(expectedMarkdown); + fallbackN2M.blockToMarkdown.mockClear(); + const expectedMarkdown = "Hello"; + fallbackN2M.blockToMarkdown.mockResolvedValueOnce(expectedMarkdown); const markdownResult = await transformer(populatedParagraph); - expect(mockN2M.blockToMarkdown).toHaveBeenCalledWith(populatedParagraph); + expect(fallbackN2M.blockToMarkdown).toHaveBeenCalledWith( + populatedParagraph + ); expect(markdownResult).toBe(expectedMarkdown); }); }); diff --git a/scripts/notionClient.ts b/scripts/notionClient.ts index e77d4bf8..282fe1ef 100644 --- a/scripts/notionClient.ts +++ b/scripts/notionClient.ts @@ -257,9 +257,11 @@ const notion = new Client({ }); const n2m = new NotionToMarkdown({ notionClient: notion }); +const paragraphFallbackN2M = new NotionToMarkdown({ notionClient: notion }); + type BlockToMarkdown = InstanceType["blockToMarkdown"]; -const defaultParagraphToMarkdown = n2m.blockToMarkdown.bind( - n2m +const defaultParagraphToMarkdown = paragraphFallbackN2M.blockToMarkdown.bind( + paragraphFallbackN2M ) as BlockToMarkdown; const NOTION_SPACER_HTML = @@ -288,7 +290,7 @@ function hasVisibleParagraphContent( }); } -n2m.setCustomTransformer("paragraph", async (block) => { +const paragraphTransformer: BlockToMarkdown = async (block) => { const paragraphBlock = block as ParagraphBlockObjectResponse; if (paragraphBlock?.type !== "paragraph") { @@ -299,14 +301,13 @@ n2m.setCustomTransformer("paragraph", async (block) => { const hasContent = hasVisibleParagraphContent(paragraphBlock); if (!hasChildren && !hasContent) { - return { - parent: NOTION_SPACER_HTML, - children: [], - } as MarkdownBlock; + return NOTION_SPACER_HTML as MarkdownBlock; } return defaultParagraphToMarkdown(paragraphBlock as BlockObjectResponse); -}); +}; + +n2m.setCustomTransformer("paragraph", paragraphTransformer); export const DATABASE_ID = resolvedDatabaseId; From 5e76356946730492214fa59d496e4865ee24839b Mon Sep 17 00:00:00 2001 From: luandro Date: Wed, 5 Nov 2025 23:38:50 -0300 Subject: [PATCH 3/3] fix(scripts): avoid recursive paragraph fallback --- scripts/notionClient.test.ts | 28 ++++++++++++++++++---------- scripts/notionClient.ts | 29 +++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/scripts/notionClient.test.ts b/scripts/notionClient.test.ts index b3678125..e1a86b45 100644 --- a/scripts/notionClient.test.ts +++ b/scripts/notionClient.test.ts @@ -88,10 +88,19 @@ describe("notionClient", () => { const instance: any = { pageToMarkdown: vi.fn(), toMarkdownString: vi.fn(), - customTransformers: {} as Record, - blockToMarkdown: vi.fn().mockResolvedValue(""), + customTransformers: {} as Record, }; + instance.blockToMarkdown = vi.fn(async function (this: any, block: any) { + const transformer = this.customTransformers?.[block.type]; + + if (transformer) { + throw new Error("paragraph transformer recursion"); + } + + return `default:${block.id ?? ""}`; + }); + instance.setCustomTransformer = vi.fn(function ( this: any, type: string, @@ -193,9 +202,9 @@ describe("notionClient", () => { it("should register a spacer transformer for empty paragraph blocks", async () => { await import("./notionClient"); - expect(notionToMarkdownInstances.length).toBeGreaterThanOrEqual(2); + expect(notionToMarkdownInstances.length).toBe(1); - const [primaryN2M, fallbackN2M] = notionToMarkdownInstances; + const [primaryN2M] = notionToMarkdownInstances; expect(primaryN2M.setCustomTransformer).toHaveBeenCalledWith( "paragraph", @@ -220,7 +229,7 @@ describe("notionClient", () => { const spacerResult = await transformer(emptyParagraph); expect(typeof spacerResult).toBe("string"); expect(spacerResult).toContain("notion-spacer"); - expect(fallbackN2M.blockToMarkdown).not.toHaveBeenCalled(); + expect(primaryN2M.blockToMarkdown).not.toHaveBeenCalled(); const populatedParagraph = { id: "content", @@ -237,15 +246,14 @@ describe("notionClient", () => { }, } as any; - fallbackN2M.blockToMarkdown.mockClear(); - const expectedMarkdown = "Hello"; - fallbackN2M.blockToMarkdown.mockResolvedValueOnce(expectedMarkdown); + primaryN2M.blockToMarkdown.mockClear(); const markdownResult = await transformer(populatedParagraph); - expect(fallbackN2M.blockToMarkdown).toHaveBeenCalledWith( + expect(primaryN2M.blockToMarkdown).toHaveBeenCalledWith( populatedParagraph ); - expect(markdownResult).toBe(expectedMarkdown); + expect(markdownResult).toBe(`default:${populatedParagraph.id}`); + expect(primaryN2M.customTransformers.paragraph).toBe(transformer); }); }); diff --git a/scripts/notionClient.ts b/scripts/notionClient.ts index 282fe1ef..72967d5a 100644 --- a/scripts/notionClient.ts +++ b/scripts/notionClient.ts @@ -257,11 +257,10 @@ const notion = new Client({ }); const n2m = new NotionToMarkdown({ notionClient: notion }); -const paragraphFallbackN2M = new NotionToMarkdown({ notionClient: notion }); type BlockToMarkdown = InstanceType["blockToMarkdown"]; -const defaultParagraphToMarkdown = paragraphFallbackN2M.blockToMarkdown.bind( - paragraphFallbackN2M +const defaultParagraphToMarkdown = n2m.blockToMarkdown.bind( + n2m ) as BlockToMarkdown; const NOTION_SPACER_HTML = @@ -304,7 +303,29 @@ const paragraphTransformer: BlockToMarkdown = async (block) => { return NOTION_SPACER_HTML as MarkdownBlock; } - return defaultParagraphToMarkdown(paragraphBlock as BlockObjectResponse); + const customTransformers = ( + n2m as unknown as { + customTransformers?: Record; + } + ).customTransformers; + + let previousParagraphTransformer: BlockToMarkdown | undefined; + + if (customTransformers) { + previousParagraphTransformer = customTransformers.paragraph; + + if (previousParagraphTransformer) { + delete customTransformers.paragraph; + } + } + + try { + return defaultParagraphToMarkdown(paragraphBlock as BlockObjectResponse); + } finally { + if (customTransformers && previousParagraphTransformer) { + customTransformers.paragraph = previousParagraphTransformer; + } + } }; n2m.setCustomTransformer("paragraph", paragraphTransformer);