diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts
index 5a1a10f7..5f2cf17b 100644
--- a/scripts/notion-fetch/contentSanitizer.test.ts
+++ b/scripts/notion-fetch/contentSanitizer.test.ts
@@ -121,4 +121,42 @@ describe("contentSanitizer", () => {
expect(result).toBe("[tag](#tag)");
});
});
+
+ describe("restoreSoftLineBreaks", () => {
+ it("should convert single newlines between text into
elements", () => {
+ const input = "First line\nSecond line";
+ const result = scriptModule.restoreSoftLineBreaks(input);
+ expect(result).toBe("First line
\nSecond line");
+ });
+
+ it("should leave paragraph breaks (double newlines) untouched", () => {
+ const input = "First paragraph\n\nSecond paragraph";
+ const result = scriptModule.restoreSoftLineBreaks(input);
+ expect(result).toBe(input);
+ });
+
+ it("should ignore newlines that start markdown list items", () => {
+ const input = "Intro text\n- list item";
+ const result = scriptModule.restoreSoftLineBreaks(input);
+ expect(result).toBe(input);
+ });
+
+ it("should ignore newlines before numbered list items", () => {
+ const input = "Intro text\n1. First item";
+ const result = scriptModule.restoreSoftLineBreaks(input);
+ expect(result).toBe(input);
+ });
+
+ it("should not modify content inside fenced code blocks", () => {
+ const input = "```js\nconst x = 1;\nconst y = 2;\n```\nOutside";
+ const result = scriptModule.restoreSoftLineBreaks(input);
+ expect(result).toBe(input);
+ });
+
+ it("should normalize unicode line separators into
line breaks", () => {
+ const input = "Line one\u2028Line two";
+ const result = scriptModule.restoreSoftLineBreaks(input);
+ expect(result).toBe("Line one
\nLine two");
+ });
+ });
});
diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index 153fc0d6..6efddf9e 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -105,3 +105,69 @@ export function sanitizeMarkdownContent(content: string): string {
return content;
}
+
+/**
+ * Restores intentional soft line breaks (Shift+Enter in Notion) by converting single
+ * newlines within paragraphs into `
` elements while avoiding structural markdown lines.
+ */
+export function restoreSoftLineBreaks(content: string): string {
+ if (!content) return content;
+
+ const codeBlocks: string[] = [];
+ const codeSpans: string[] = [];
+
+ const blockPlaceholder = (index: number) =>
+ `__SOFTBREAK_CODEBLOCK_${index}__`;
+ const spanPlaceholder = (index: number) => `__SOFTBREAK_CODESPAN_${index}__`;
+
+ // Protect fenced blocks and inline code so formatting is left untouched
+ let transformed = content.replace(/```[\s\S]*?```/g, (match) => {
+ codeBlocks.push(match);
+ return blockPlaceholder(codeBlocks.length - 1);
+ });
+
+ transformed = transformed.replace(/`[^`\n]*`/g, (match) => {
+ codeSpans.push(match);
+ return spanPlaceholder(codeSpans.length - 1);
+ });
+
+ // Normalize uncommon Unicode line separators that Notion may emit
+ transformed = transformed.replace(/[\u2028\u2029]/g, "\n");
+
+ transformed = transformed.replace(
+ /(?<=\S)\n(?=\S)/g,
+ (newline, offset, full) => {
+ const nextLine = full.slice(offset + newline.length);
+ const trimmedNextLine = nextLine.replace(/^[ \t]+/, "");
+
+ const before = full.slice(0, offset);
+ const prevLine = before.slice(before.lastIndexOf("\n") + 1);
+ const trimmedPrevLine = prevLine.trim();
+
+ // Skip markdown constructs that should remain as new lines
+ if (
+ /^([-*+>#|<])/.test(trimmedNextLine) ||
+ /^\d+[.)]/.test(trimmedNextLine) ||
+ /^```/.test(trimmedPrevLine) ||
+ /^---$/.test(trimmedPrevLine) ||
+ trimmedPrevLine.startsWith("__SOFTBREAK_CODEBLOCK_")
+ ) {
+ return newline;
+ }
+
+ return "
\n";
+ }
+ );
+
+ // Restore masked code sections
+ transformed = transformed.replace(
+ /__SOFTBREAK_CODEBLOCK_(\d+)__/g,
+ (_m, i) => codeBlocks[Number(i)]
+ );
+ transformed = transformed.replace(
+ /__SOFTBREAK_CODESPAN_(\d+)__/g,
+ (_m, i) => codeSpans[Number(i)]
+ );
+
+ return transformed;
+}
diff --git a/scripts/notion-fetch/generateBlocks.test.ts b/scripts/notion-fetch/generateBlocks.test.ts
index ae7ca469..fcb49e84 100644
--- a/scripts/notion-fetch/generateBlocks.test.ts
+++ b/scripts/notion-fetch/generateBlocks.test.ts
@@ -69,6 +69,7 @@ vi.mock("./imageProcessor", () => ({
vi.mock("./utils", () => ({
sanitizeMarkdownContent: vi.fn((content) => content),
+ restoreSoftLineBreaks: vi.fn((content) => content),
compressImageToFileWithFallback: vi.fn(),
detectFormatFromBuffer: vi.fn(() => "jpeg"),
formatFromContentType: vi.fn(() => "jpeg"),
diff --git a/scripts/notion-fetch/generateBlocks.ts b/scripts/notion-fetch/generateBlocks.ts
index 7e597c61..985a1aea 100644
--- a/scripts/notion-fetch/generateBlocks.ts
+++ b/scripts/notion-fetch/generateBlocks.ts
@@ -13,6 +13,7 @@ import chalk from "chalk";
import { processImage } from "./imageProcessor";
import {
sanitizeMarkdownContent,
+ restoreSoftLineBreaks,
compressImageToFileWithFallback,
detectFormatFromBuffer,
formatFromContentType,
@@ -1550,6 +1551,9 @@ export async function generateBlocks(pages, progressCallback) {
markdownString.parent = sanitizeMarkdownContent(
markdownString.parent
);
+ markdownString.parent = restoreSoftLineBreaks(
+ markdownString.parent
+ );
// Remove duplicate title heading if it exists
// The first H1 heading often duplicates the title in Notion exports
let contentBody = markdownString.parent;
diff --git a/scripts/notion-fetch/utils.ts b/scripts/notion-fetch/utils.ts
index bdfb9bb0..d65228bd 100644
--- a/scripts/notion-fetch/utils.ts
+++ b/scripts/notion-fetch/utils.ts
@@ -4,8 +4,11 @@ import os from "node:os";
import chalk from "chalk";
import { compressImage } from "./imageCompressor";
-// Re-export sanitize so callers have a single utils entrypoint
-export { sanitizeMarkdownContent } from "./contentSanitizer";
+// Re-export sanitize helpers so callers have a single utils entrypoint
+export {
+ sanitizeMarkdownContent,
+ restoreSoftLineBreaks,
+} from "./contentSanitizer";
// Fail-open toggle: defaults to true unless explicitly set to 'false'
export const SOFT_FAIL: boolean =