Skip to content

Commit 6fdc509

Browse files
committed
feat: regex plugins can have functions, and skip code blocks
1 parent e4a8795 commit 6fdc509

File tree

4 files changed

+206
-27
lines changed

4 files changed

+206
-27
lines changed

src/config/configuration.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,20 @@ export type IPlugin = {
4545
// simple regex replacements on the markdown output
4646
regexMarkdownModifications?: IRegexMarkdownModification[];
4747

48-
// allow a plugin to perform an async operation before it can deliver its operations
48+
// Allow a plugin to perform an async operation at the start of docu-notion.
49+
// Notice that the plugin itself is given, so you can add things to it.
4950
init?(plugin: IPlugin): Promise<void>;
5051
};
5152

5253
export type IRegexMarkdownModification = {
54+
// Should match on markdown that you want to replace
5355
regex: RegExp;
54-
output: string;
56+
// Based on that regex, the outputPattern will be used to replace the matched text
57+
replacementPattern?: string;
58+
// Instead of a pattern, you can use this if you have to ask a server somewhere for help in getting the new markdown
59+
getReplacement?(s: string): Promise<string>;
60+
61+
// If the output is creating things like react elements, you can import their definitions here
5562
imports?: string[];
5663
};
5764

src/plugins/embedTweaks.spec.ts

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { NotionBlock } from "../config/configuration";
1+
import { IPlugin, NotionBlock } from "../config/configuration";
22
import { setLogLevel } from "../log";
33
import { blocksToMarkdown } from "../TestRun";
44
import {
@@ -93,3 +93,148 @@ test("gif", async () => {
9393
`![](https://en.wikipedia.org/wiki/GIF#/media/File:Rotating_earth_(large).gif)`
9494
);
9595
});
96+
97+
test("tweaks are not applied inside code blocks", async () => {
98+
setLogLevel("verbose");
99+
const p: IPlugin = {
100+
name: "test",
101+
regexMarkdownModifications: [
102+
{
103+
regex: /find/,
104+
replacementPattern: `found`,
105+
},
106+
],
107+
};
108+
const config = { plugins: [p] };
109+
const result = await blocksToMarkdown(config, [
110+
{
111+
type: "code",
112+
code: {
113+
caption: [],
114+
rich_text: [
115+
{
116+
type: "text",
117+
text: {
118+
content: "don't find me",
119+
link: null,
120+
},
121+
annotations: {
122+
bold: false,
123+
italic: false,
124+
strikethrough: false,
125+
underline: false,
126+
code: false,
127+
color: "default",
128+
},
129+
plain_text: "don't find me",
130+
href: null,
131+
},
132+
],
133+
language: "",
134+
},
135+
} as unknown as NotionBlock,
136+
{
137+
type: "paragraph",
138+
paragraph: {
139+
rich_text: [
140+
{
141+
type: "text",
142+
text: { content: "find this", link: null },
143+
annotations: {
144+
bold: false,
145+
italic: false,
146+
strikethrough: false,
147+
underline: false,
148+
code: true,
149+
color: "default",
150+
},
151+
plain_text: "find this",
152+
href: null,
153+
},
154+
],
155+
},
156+
} as unknown as NotionBlock,
157+
]);
158+
// we should not change the code one
159+
expect(result.trim()).toContain("don't find me");
160+
// but we should change the non-code block one
161+
expect(result.trim()).toContain("found this");
162+
});
163+
164+
test("simplest possible", async () => {
165+
setLogLevel("verbose");
166+
const p: IPlugin = {
167+
name: "test",
168+
regexMarkdownModifications: [
169+
{
170+
regex: /find/,
171+
replacementPattern: `found`,
172+
},
173+
],
174+
};
175+
const config = { plugins: [p] };
176+
const result = await blocksToMarkdown(config, [
177+
{
178+
type: "paragraph",
179+
paragraph: {
180+
rich_text: [
181+
{
182+
type: "text",
183+
text: { content: "find this", link: null },
184+
annotations: {
185+
bold: false,
186+
italic: false,
187+
strikethrough: false,
188+
underline: false,
189+
code: true,
190+
color: "default",
191+
},
192+
plain_text: "find this",
193+
href: null,
194+
},
195+
],
196+
},
197+
} as unknown as NotionBlock,
198+
]);
199+
200+
expect(result.trim()).toContain("found this");
201+
});
202+
203+
test("use match in output", async () => {
204+
setLogLevel("verbose");
205+
const p: IPlugin = {
206+
name: "test",
207+
regexMarkdownModifications: [
208+
{
209+
regex: /(find)/,
210+
replacementPattern: `found $1`,
211+
},
212+
],
213+
};
214+
const config = { plugins: [p] };
215+
const result = await blocksToMarkdown(config, [
216+
{
217+
type: "paragraph",
218+
paragraph: {
219+
rich_text: [
220+
{
221+
type: "text",
222+
text: { content: "find this", link: null },
223+
annotations: {
224+
bold: false,
225+
italic: false,
226+
strikethrough: false,
227+
underline: false,
228+
code: true,
229+
color: "default",
230+
},
231+
plain_text: "find this",
232+
href: null,
233+
},
234+
],
235+
},
236+
} as unknown as NotionBlock,
237+
]);
238+
239+
expect(result.trim()).toContain("found find");
240+
});

src/plugins/embedTweaks.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ export const gifEmbed: IPlugin = {
66
{
77
// I once saw a gif coming from Notion that wasn't a full
88
// url, which wouldn't work, hence the "http" requirement
9-
regex: /\[.*\]\((http.*(\.(gif|GIF)))\)/gm,
10-
output: `![]($1)`,
9+
regex: /\[.*\]\((http.*(\.(gif|GIF)))\)/,
10+
replacementPattern: `![]($1)`,
1111
},
1212
],
1313
};
@@ -16,32 +16,32 @@ export const imgurGifEmbed: IPlugin = {
1616
name: "imgur",
1717
regexMarkdownModifications: [
1818
{
19-
regex: /\[.*\]\((.*imgur\.com\/.*)\)/gm, // imgur.com
19+
regex: /\[.*\]\((.*imgur\.com\/.*)\)/, // imgur.com
2020
// imgur links to gifs need a .gif at the end, but the url they give you doesn't have one.
21-
output: `![]($1.gif)`,
21+
replacementPattern: `![]($1.gif)`,
2222
},
2323
],
2424
};
2525
export const youtubeEmbed: IPlugin = {
2626
name: "youtube",
2727
regexMarkdownModifications: [
2828
{
29-
regex: /\[.*\]\((.*youtube\.com\/watch.*)\)/gm, //youtube.com/watch
29+
regex: /\[.*\]\((.*youtube\.com\/watch.*)\)/, //youtube.com/watch
3030
imports: [`import ReactPlayer from "react-player";`],
31-
output: `<ReactPlayer controls url="$1" />`,
31+
replacementPattern: `<ReactPlayer controls url="$1" />`,
3232
},
3333
],
3434
};
3535
export const vimeoEmbed: IPlugin = {
3636
name: "vimeo",
3737
regexMarkdownModifications: [
3838
{
39-
regex: /\[.*\]\((https:\/\/.*vimeo.*)\)/gm,
39+
regex: /\[.*\]\((https:\/\/.*vimeo.*)\)/,
4040
// we use to have the following, but the above should handle both the player an not-player urls.
4141
//regex: /\[.*\]\((.*player\.vimeo.*)\)/gm, // player.vimeo
4242

4343
imports: [`import ReactPlayer from "react-player";`],
44-
output: `<ReactPlayer controls url="$1" />`,
44+
replacementPattern: `<ReactPlayer controls url="$1" />`,
4545
},
4646
],
4747
};

src/transform.ts

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ export async function getMarkdownFromNotionBlocks(
5656
//console.log("markdown after link fixes", markdown);
5757

5858
// simple regex-based tweaks. These are usually related to docusaurus
59-
const { imports, body } = doTransformsOnMarkdown(config, markdown);
59+
const { imports, body } = await doTransformsOnMarkdown(config, markdown);
6060

6161
// console.log("markdown after regex fixes", markdown);
6262
// console.log("body after regex", body);
@@ -81,7 +81,10 @@ function doNotionBlockTransforms(
8181
}
8282
}
8383

84-
function doTransformsOnMarkdown(config: IDocuNotionConfig, input: string) {
84+
async function doTransformsOnMarkdown(
85+
config: IDocuNotionConfig,
86+
input: string
87+
) {
8588
const regexMods: IRegexMarkdownModification[] = config.plugins
8689
.filter(plugin => !!plugin.regexMarkdownModifications)
8790
.map(plugin => {
@@ -92,26 +95,50 @@ function doTransformsOnMarkdown(config: IDocuNotionConfig, input: string) {
9295
})
9396
.flat();
9497

98+
// regex that matches markdown code blocks
99+
const codeBlocks = /```.*\n[\s\S]*?\n```/;
100+
95101
let body = input;
96102
//console.log("body before regex: " + body);
97103
let match;
98104
const imports = new Set<string>();
99105

100106
// eslint-disable-next-line @typescript-eslint/no-unused-vars
101-
regexMods.forEach(mod => {
102-
//verbose(`Trying [${mod.name}]`);
103-
while ((match = mod.regex.exec(input)) !== null) {
104-
const string = match[0];
105-
const url = match[1];
106-
verbose(
107-
`[${(mod as any).name}] ${string} --> ${mod.output.replace("$1", url)}`
108-
);
109-
body = body.replace(string, mod.output.replace("$1", url));
110-
// add any library imports
111-
mod.imports?.forEach(imp => imports.add(imp));
107+
for (const mod of regexMods) {
108+
let replacement = undefined;
109+
// regex.exec is stateful, so we don't want to mess up the plugin's use of its own regex, so we clone it.
110+
// we also add the "g" flag to make sure we get all matches
111+
const regex = new RegExp(`${codeBlocks.source}|(${mod.regex.source})`, "g");
112+
let count = 0;
113+
while ((match = regex.exec(input)) !== null) {
114+
if (match[0]) {
115+
const original = match[0];
116+
if (original.startsWith("```") && original.endsWith("```")) {
117+
continue; // code block
118+
}
119+
if (mod.getReplacement) {
120+
replacement = await mod.getReplacement(original);
121+
} else if (mod.replacementPattern) {
122+
console.log(`mod.replacementPattern.replace("$1", ${match[2]}`);
123+
replacement = mod.replacementPattern.replace("$1", match[2]);
124+
}
125+
if (replacement !== undefined) {
126+
verbose(`[${(mod as any).name}] ${original} --> ${replacement}`);
127+
128+
const precedingPart = body.substring(0, match.index); // ?
129+
const partStartingFromThisMatch = body.substring(match.index); // ?
130+
body =
131+
precedingPart +
132+
partStartingFromThisMatch.replace(original, replacement);
133+
// add any library imports
134+
mod.imports?.forEach(imp => imports.add(imp));
135+
}
136+
}
112137
}
113-
});
114-
return { body, imports: [...imports].join("\n") };
138+
}
139+
console.log("body after regex: " + body);
140+
const uniqueImports = [...new Set(imports)];
141+
return { body, imports: [...uniqueImports].join("\n") };
115142
}
116143

117144
async function doNotionToMarkdown(
@@ -148,7 +175,7 @@ function doLinkFixes(
148175
// The key to understanding this `while` is that linkRegExp actually has state, and
149176
// it gives you a new one each time. https://stackoverflow.com/a/1520853/723299
150177
while ((match = linkRegExp.exec(markdownToSearch)) !== null) {
151-
const originalLinkMarkdown = match[0]; // ?
178+
const originalLinkMarkdown = match[0];
152179

153180
verbose(
154181
`Checking to see if a plugin wants to modify "${originalLinkMarkdown}" `

0 commit comments

Comments
 (0)