Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion scripts/notion-fetch/generateBlocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import { convertCalloutToAdmonition, isCalloutBlock } from "./calloutProcessor";
import { fetchNotionBlocks } from "../fetchNotionData";
import { EmojiProcessor } from "./emojiProcessor";

const DOC_SPACER_COMPONENT = "<DocSpacer />";

// Enhanced image handling utilities for robust processing
interface ImageProcessingResult {
success: boolean;
Expand Down Expand Up @@ -244,6 +246,27 @@ async function logImageFailure(logEntry: any): Promise<void> {
await imageLogWriting;
}

function trimEdgeDocSpacers(content: string): string {
if (!content) {
return content;
}

const lines = content.split("\n");

while (lines.length && lines[0].trim() === DOC_SPACER_COMPONENT) {
lines.shift();
}

while (
lines.length &&
lines[lines.length - 1].trim() === DOC_SPACER_COMPONENT
) {
lines.pop();
}

return lines.join("\n");
}

/**
* Post-process markdown to ensure no broken image references remain
*/
Expand Down Expand Up @@ -1552,7 +1575,7 @@ export async function generateBlocks(pages, progressCallback) {
);
// Remove duplicate title heading if it exists
// The first H1 heading often duplicates the title in Notion exports
let contentBody = markdownString.parent;
let contentBody = trimEdgeDocSpacers(markdownString.parent);

// Find the first H1 heading pattern at the beginning of the content
const firstH1Regex = /^\s*# (.+?)(?:\n|$)/;
Expand Down
44 changes: 44 additions & 0 deletions scripts/notionClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ describe("notionClient", () => {
mockN2M = {
pageToMarkdown: vi.fn(),
toMarkdownString: vi.fn(),
setCustomTransformer: vi.fn(() => mockN2M),
};

// Set up constructor mocks
Expand Down Expand Up @@ -159,6 +160,49 @@ describe("notionClient", () => {
// Assert
expect(DATABASE_ID).toBe("exported-database-id");
});

it("should register a paragraph transformer that emits DocSpacer for empty blocks", async () => {
await import("./notionClient");

expect(mockN2M.setCustomTransformer).toHaveBeenCalledWith(
"paragraph",
expect.any(Function)
);

const transformer = mockN2M.setCustomTransformer.mock.calls[0][1];

const emptyBlock = {
type: "paragraph",
paragraph: { rich_text: [] },
has_children: false,
};

const populatedBlock = {
type: "paragraph",
paragraph: {
rich_text: [
{
type: "text",
plain_text: "Hello",
text: { content: "Hello" },
},
],
},
has_children: false,
};

const nestedBlock = {
type: "paragraph",
paragraph: { rich_text: [] },
has_children: true,
};

await expect(transformer(emptyBlock as any)).resolves.toBe(
"<DocSpacer />"
);
await expect(transformer(populatedBlock as any)).resolves.toBeUndefined();
await expect(transformer(nestedBlock as any)).resolves.toBeUndefined();
});
});

describe("enhancedNotion.databasesQuery", () => {
Expand Down
57 changes: 54 additions & 3 deletions scripts/notionClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import dotenv from "dotenv";
import { Client } from "@notionhq/client";
import { NotionToMarkdown } from "notion-to-md";
import chalk from "chalk";
import type { RichTextItemResponse } from "@notionhq/client/build/src/api-endpoints";

dotenv.config();

Expand All @@ -15,9 +16,7 @@ const resolvedDatabaseId =
process.env.DATABASE_ID ?? process.env.NOTION_DATABASE_ID;

if (!resolvedDatabaseId) {
throw new Error(
"DATABASE_ID is not defined in the environment variables."
);
throw new Error("DATABASE_ID is not defined in the environment variables.");
}

process.env.DATABASE_ID = resolvedDatabaseId;
Expand All @@ -40,6 +39,58 @@ const notion = new Client({

const n2m = new NotionToMarkdown({ notionClient: notion });

const DOC_SPACER_COMPONENT = "<DocSpacer />";

const hasVisibleRichText = (items: RichTextItemResponse[] = []): boolean =>
items.some((item) => {
if (!item) {
return false;
}

if (typeof item.plain_text === "string" && item.plain_text.trim().length) {
return true;
}

if (item.type === "text") {
return Boolean(item.text?.content?.trim());
}

if (item.type === "equation") {
return Boolean(item.equation?.expression?.trim());
}

if (item.type === "mention") {
return Boolean(item.plain_text?.trim());
}

return false;
});

n2m.setCustomTransformer("paragraph", async (block) => {
if (block.type !== "paragraph") {
return undefined;
}

const paragraph = block.paragraph;
if (!paragraph) {
return undefined;
}

if (block.has_children) {
return undefined;
}

const richText = Array.isArray(paragraph.rich_text)
? paragraph.rich_text
: [];

if (hasVisibleRichText(richText)) {
return undefined;
}

return DOC_SPACER_COMPONENT;
});

export const DATABASE_ID = resolvedDatabaseId;

/**
Expand Down
29 changes: 29 additions & 0 deletions src/components/DocSpacer/index.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import React from "react";

type DocSpacerSize = "sm" | "md" | "lg";

const SIZE_TO_REM: Record<DocSpacerSize, string> = {
sm: "0.5rem",
md: "1rem",
lg: "1.5rem",
};

export interface DocSpacerProps {
size?: DocSpacerSize;
}

export default function DocSpacer({ size = "md" }: DocSpacerProps) {
const height = SIZE_TO_REM[size] ?? SIZE_TO_REM.md;

return (
<div
aria-hidden="true"
role="presentation"
style={{
height,
width: "100%",
margin: 0,
}}
/>
);
}
8 changes: 8 additions & 0 deletions src/theme/MDXComponents/index.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import React from "react";
import MDXComponents from "@theme-original/MDXComponents";
import DocSpacer from "@site/src/components/DocSpacer";

export default {
...MDXComponents,
DocSpacer,
};
Loading