diff --git a/scripts/notion-fetch/generateBlocks.ts b/scripts/notion-fetch/generateBlocks.ts
index 7e597c61..e32d0a41 100644
--- a/scripts/notion-fetch/generateBlocks.ts
+++ b/scripts/notion-fetch/generateBlocks.ts
@@ -26,6 +26,8 @@ import { convertCalloutToAdmonition, isCalloutBlock } from "./calloutProcessor";
import { fetchNotionBlocks } from "../fetchNotionData";
import { EmojiProcessor } from "./emojiProcessor";
+const DOC_SPACER_COMPONENT = "";
+
// Enhanced image handling utilities for robust processing
interface ImageProcessingResult {
success: boolean;
@@ -244,6 +246,27 @@ async function logImageFailure(logEntry: any): Promise {
await imageLogWriting;
}
+function trimEdgeDocSpacers(content: string): string {
+ if (!content) {
+ return content;
+ }
+
+ const lines = content.split("\n");
+
+ while (lines.length && lines[0].trim() === DOC_SPACER_COMPONENT) {
+ lines.shift();
+ }
+
+ while (
+ lines.length &&
+ lines[lines.length - 1].trim() === DOC_SPACER_COMPONENT
+ ) {
+ lines.pop();
+ }
+
+ return lines.join("\n");
+}
+
/**
* Post-process markdown to ensure no broken image references remain
*/
@@ -1552,7 +1575,7 @@ export async function generateBlocks(pages, progressCallback) {
);
// Remove duplicate title heading if it exists
// The first H1 heading often duplicates the title in Notion exports
- let contentBody = markdownString.parent;
+ let contentBody = trimEdgeDocSpacers(markdownString.parent);
// Find the first H1 heading pattern at the beginning of the content
const firstH1Regex = /^\s*# (.+?)(?:\n|$)/;
diff --git a/scripts/notionClient.test.ts b/scripts/notionClient.test.ts
index b9494ffe..6c294145 100644
--- a/scripts/notionClient.test.ts
+++ b/scripts/notionClient.test.ts
@@ -74,6 +74,7 @@ describe("notionClient", () => {
mockN2M = {
pageToMarkdown: vi.fn(),
toMarkdownString: vi.fn(),
+ setCustomTransformer: vi.fn(() => mockN2M),
};
// Set up constructor mocks
@@ -159,6 +160,49 @@ describe("notionClient", () => {
// Assert
expect(DATABASE_ID).toBe("exported-database-id");
});
+
+ it("should register a paragraph transformer that emits DocSpacer for empty blocks", async () => {
+ await import("./notionClient");
+
+ expect(mockN2M.setCustomTransformer).toHaveBeenCalledWith(
+ "paragraph",
+ expect.any(Function)
+ );
+
+ const transformer = mockN2M.setCustomTransformer.mock.calls[0][1];
+
+ const emptyBlock = {
+ type: "paragraph",
+ paragraph: { rich_text: [] },
+ has_children: false,
+ };
+
+ const populatedBlock = {
+ type: "paragraph",
+ paragraph: {
+ rich_text: [
+ {
+ type: "text",
+ plain_text: "Hello",
+ text: { content: "Hello" },
+ },
+ ],
+ },
+ has_children: false,
+ };
+
+ const nestedBlock = {
+ type: "paragraph",
+ paragraph: { rich_text: [] },
+ has_children: true,
+ };
+
+ await expect(transformer(emptyBlock as any)).resolves.toBe(
+ ""
+ );
+ await expect(transformer(populatedBlock as any)).resolves.toBeUndefined();
+ await expect(transformer(nestedBlock as any)).resolves.toBeUndefined();
+ });
});
describe("enhancedNotion.databasesQuery", () => {
diff --git a/scripts/notionClient.ts b/scripts/notionClient.ts
index 38c70ee8..a7023545 100644
--- a/scripts/notionClient.ts
+++ b/scripts/notionClient.ts
@@ -2,6 +2,7 @@ import dotenv from "dotenv";
import { Client } from "@notionhq/client";
import { NotionToMarkdown } from "notion-to-md";
import chalk from "chalk";
+import type { RichTextItemResponse } from "@notionhq/client/build/src/api-endpoints";
dotenv.config();
@@ -15,9 +16,7 @@ const resolvedDatabaseId =
process.env.DATABASE_ID ?? process.env.NOTION_DATABASE_ID;
if (!resolvedDatabaseId) {
- throw new Error(
- "DATABASE_ID is not defined in the environment variables."
- );
+ throw new Error("DATABASE_ID is not defined in the environment variables.");
}
process.env.DATABASE_ID = resolvedDatabaseId;
@@ -40,6 +39,58 @@ const notion = new Client({
const n2m = new NotionToMarkdown({ notionClient: notion });
+const DOC_SPACER_COMPONENT = "";
+
+const hasVisibleRichText = (items: RichTextItemResponse[] = []): boolean =>
+ items.some((item) => {
+ if (!item) {
+ return false;
+ }
+
+ if (typeof item.plain_text === "string" && item.plain_text.trim().length) {
+ return true;
+ }
+
+ if (item.type === "text") {
+ return Boolean(item.text?.content?.trim());
+ }
+
+ if (item.type === "equation") {
+ return Boolean(item.equation?.expression?.trim());
+ }
+
+ if (item.type === "mention") {
+ return Boolean(item.plain_text?.trim());
+ }
+
+ return false;
+ });
+
+n2m.setCustomTransformer("paragraph", async (block) => {
+ if (block.type !== "paragraph") {
+ return undefined;
+ }
+
+ const paragraph = block.paragraph;
+ if (!paragraph) {
+ return undefined;
+ }
+
+ if (block.has_children) {
+ return undefined;
+ }
+
+ const richText = Array.isArray(paragraph.rich_text)
+ ? paragraph.rich_text
+ : [];
+
+ if (hasVisibleRichText(richText)) {
+ return undefined;
+ }
+
+ return DOC_SPACER_COMPONENT;
+});
+
export const DATABASE_ID = resolvedDatabaseId;
/**
diff --git a/src/components/DocSpacer/index.tsx b/src/components/DocSpacer/index.tsx
new file mode 100644
index 00000000..170c2ed3
--- /dev/null
+++ b/src/components/DocSpacer/index.tsx
@@ -0,0 +1,29 @@
+import React from "react";
+
+type DocSpacerSize = "sm" | "md" | "lg";
+
+const SIZE_TO_REM: Record = {
+ sm: "0.5rem",
+ md: "1rem",
+ lg: "1.5rem",
+};
+
+export interface DocSpacerProps {
+ size?: DocSpacerSize;
+}
+
+export default function DocSpacer({ size = "md" }: DocSpacerProps) {
+ const height = SIZE_TO_REM[size] ?? SIZE_TO_REM.md;
+
+ return (
+
+ );
+}
diff --git a/src/theme/MDXComponents/index.tsx b/src/theme/MDXComponents/index.tsx
new file mode 100644
index 00000000..f7020fdd
--- /dev/null
+++ b/src/theme/MDXComponents/index.tsx
@@ -0,0 +1,8 @@
+import React from "react";
+import MDXComponents from "@theme-original/MDXComponents";
+import DocSpacer from "@site/src/components/DocSpacer";
+
+export default {
+ ...MDXComponents,
+ DocSpacer,
+};