From 7e2b4847420438b822a7094ea9dd3a2798f69d45 Mon Sep 17 00:00:00 2001 From: luandro Date: Sat, 11 Oct 2025 19:38:49 +0700 Subject: [PATCH] feat(notion-fetch): preserve notion spacers --- scripts/notion-fetch/generateBlocks.ts | 25 ++++++++++- scripts/notionClient.test.ts | 44 ++++++++++++++++++++ scripts/notionClient.ts | 57 ++++++++++++++++++++++++-- src/components/DocSpacer/index.tsx | 29 +++++++++++++ src/theme/MDXComponents/index.tsx | 8 ++++ 5 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 src/components/DocSpacer/index.tsx create mode 100644 src/theme/MDXComponents/index.tsx diff --git a/scripts/notion-fetch/generateBlocks.ts b/scripts/notion-fetch/generateBlocks.ts index 7e597c61..e32d0a41 100644 --- a/scripts/notion-fetch/generateBlocks.ts +++ b/scripts/notion-fetch/generateBlocks.ts @@ -26,6 +26,8 @@ import { convertCalloutToAdmonition, isCalloutBlock } from "./calloutProcessor"; import { fetchNotionBlocks } from "../fetchNotionData"; import { EmojiProcessor } from "./emojiProcessor"; +const DOC_SPACER_COMPONENT = ""; + // Enhanced image handling utilities for robust processing interface ImageProcessingResult { success: boolean; @@ -244,6 +246,27 @@ async function logImageFailure(logEntry: any): Promise { await imageLogWriting; } +function trimEdgeDocSpacers(content: string): string { + if (!content) { + return content; + } + + const lines = content.split("\n"); + + while (lines.length && lines[0].trim() === DOC_SPACER_COMPONENT) { + lines.shift(); + } + + while ( + lines.length && + lines[lines.length - 1].trim() === DOC_SPACER_COMPONENT + ) { + lines.pop(); + } + + return lines.join("\n"); +} + /** * Post-process markdown to ensure no broken image references remain */ @@ -1552,7 +1575,7 @@ export async function generateBlocks(pages, progressCallback) { ); // Remove duplicate title heading if it exists // The first H1 heading often duplicates the title in Notion exports - let contentBody = markdownString.parent; + let contentBody = trimEdgeDocSpacers(markdownString.parent); // Find the first H1 heading pattern at the beginning of the content const firstH1Regex = /^\s*# (.+?)(?:\n|$)/; diff --git a/scripts/notionClient.test.ts b/scripts/notionClient.test.ts index b9494ffe..6c294145 100644 --- a/scripts/notionClient.test.ts +++ b/scripts/notionClient.test.ts @@ -74,6 +74,7 @@ describe("notionClient", () => { mockN2M = { pageToMarkdown: vi.fn(), toMarkdownString: vi.fn(), + setCustomTransformer: vi.fn(() => mockN2M), }; // Set up constructor mocks @@ -159,6 +160,49 @@ describe("notionClient", () => { // Assert expect(DATABASE_ID).toBe("exported-database-id"); }); + + it("should register a paragraph transformer that emits DocSpacer for empty blocks", async () => { + await import("./notionClient"); + + expect(mockN2M.setCustomTransformer).toHaveBeenCalledWith( + "paragraph", + expect.any(Function) + ); + + const transformer = mockN2M.setCustomTransformer.mock.calls[0][1]; + + const emptyBlock = { + type: "paragraph", + paragraph: { rich_text: [] }, + has_children: false, + }; + + const populatedBlock = { + type: "paragraph", + paragraph: { + rich_text: [ + { + type: "text", + plain_text: "Hello", + text: { content: "Hello" }, + }, + ], + }, + has_children: false, + }; + + const nestedBlock = { + type: "paragraph", + paragraph: { rich_text: [] }, + has_children: true, + }; + + await expect(transformer(emptyBlock as any)).resolves.toBe( + "" + ); + await expect(transformer(populatedBlock as any)).resolves.toBeUndefined(); + await expect(transformer(nestedBlock as any)).resolves.toBeUndefined(); + }); }); describe("enhancedNotion.databasesQuery", () => { diff --git a/scripts/notionClient.ts b/scripts/notionClient.ts index 38c70ee8..a7023545 100644 --- a/scripts/notionClient.ts +++ b/scripts/notionClient.ts @@ -2,6 +2,7 @@ import dotenv from "dotenv"; import { Client } from "@notionhq/client"; import { NotionToMarkdown } from "notion-to-md"; import chalk from "chalk"; +import type { RichTextItemResponse } from "@notionhq/client/build/src/api-endpoints"; dotenv.config(); @@ -15,9 +16,7 @@ const resolvedDatabaseId = process.env.DATABASE_ID ?? process.env.NOTION_DATABASE_ID; if (!resolvedDatabaseId) { - throw new Error( - "DATABASE_ID is not defined in the environment variables." - ); + throw new Error("DATABASE_ID is not defined in the environment variables."); } process.env.DATABASE_ID = resolvedDatabaseId; @@ -40,6 +39,58 @@ const notion = new Client({ const n2m = new NotionToMarkdown({ notionClient: notion }); +const DOC_SPACER_COMPONENT = ""; + +const hasVisibleRichText = (items: RichTextItemResponse[] = []): boolean => + items.some((item) => { + if (!item) { + return false; + } + + if (typeof item.plain_text === "string" && item.plain_text.trim().length) { + return true; + } + + if (item.type === "text") { + return Boolean(item.text?.content?.trim()); + } + + if (item.type === "equation") { + return Boolean(item.equation?.expression?.trim()); + } + + if (item.type === "mention") { + return Boolean(item.plain_text?.trim()); + } + + return false; + }); + +n2m.setCustomTransformer("paragraph", async (block) => { + if (block.type !== "paragraph") { + return undefined; + } + + const paragraph = block.paragraph; + if (!paragraph) { + return undefined; + } + + if (block.has_children) { + return undefined; + } + + const richText = Array.isArray(paragraph.rich_text) + ? paragraph.rich_text + : []; + + if (hasVisibleRichText(richText)) { + return undefined; + } + + return DOC_SPACER_COMPONENT; +}); + export const DATABASE_ID = resolvedDatabaseId; /** diff --git a/src/components/DocSpacer/index.tsx b/src/components/DocSpacer/index.tsx new file mode 100644 index 00000000..170c2ed3 --- /dev/null +++ b/src/components/DocSpacer/index.tsx @@ -0,0 +1,29 @@ +import React from "react"; + +type DocSpacerSize = "sm" | "md" | "lg"; + +const SIZE_TO_REM: Record = { + sm: "0.5rem", + md: "1rem", + lg: "1.5rem", +}; + +export interface DocSpacerProps { + size?: DocSpacerSize; +} + +export default function DocSpacer({ size = "md" }: DocSpacerProps) { + const height = SIZE_TO_REM[size] ?? SIZE_TO_REM.md; + + return ( +