From c7b537b26a08fa818bf04e592ef38856f2681e76 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 27 Mar 2026 14:35:39 -0700 Subject: [PATCH] More sophisticated flanking rules for math With this change, `$` that's preceded by word chars can't open, and followed by word chars can't close. This reduces "false positives" on existing markdown documents like this: vim /etc/systemd/system/$UNIT.$TYPE ^-----^ currently parsed as latex math I've checked a few other commonmark-based markdown flavors that accept LaTeX math, to see what they did. The rule I added is supposed to be the same as the one used in VSCode's markdown-it plugin and Discourse's math plugin: - https://github.com/microsoft/vscode-markdown-it-katex/blob/efd01d8/src/index.ts#L22-L32 - https://github.com/discourse/discourse/blob/41f62aa/plugins/discourse-math/assets/javascripts/lib/discourse-markdown/discourse-math.js#L46 Full disclosure: I know of a few implementations that don't do this: - https://github.com/kivikakk/comrak/blob/0d4a9ca/src/parser/inlines.rs#L2104 (narrow special case for digits after the closer) - https://github.com/classeur/markdown-it-mathjax/blob/c1e34d4/markdown-it-mathjax.js#L80 (same narrow special case for digits) - https://github.com/jupyterlab/jupyterlab/blob/61a2db4/packages/rendermime/src/latex.ts#L17 (very permissive; even `1 $ 2 $ 3` thinks the `2` is in a math span) --- .../src/Commonmark/Extensions/Math.hs | 12 ++++++++++-- commonmark-extensions/test/math.md | 15 +++++++++++++++ commonmark/src/Commonmark/Inlines.hs | 1 + 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/commonmark-extensions/src/Commonmark/Extensions/Math.hs b/commonmark-extensions/src/Commonmark/Extensions/Math.hs index a5a7c7d..ca94d9d 100644 --- a/commonmark-extensions/src/Commonmark/Extensions/Math.hs +++ b/commonmark-extensions/src/Commonmark/Extensions/Math.hs @@ -15,11 +15,16 @@ import Commonmark.Html import Text.Parsec import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Map.Strict as M mathSpec :: (Monad m, IsBlock il bl, IsInline il, HasMath il) => SyntaxSpec m il bl mathSpec = mempty { syntaxInlineParsers = [withAttributes parseMath] + -- A very odd workaround. Is there a better way to: + -- 1. Get the precedingTokTypes map populated with info about '$' + -- 2. Have the math spans otherwise follow the same precedence as code spans, not emphasis + , syntaxFormattingSpecs = [FormattingSpec '$' False False Nothing Nothing '$'] } class HasMath a where @@ -38,13 +43,16 @@ instance (HasMath i, Monoid i) => HasMath (WithSourceMap i) where parseMath :: (Monad m, HasMath a) => InlineParser m a parseMath = try $ do - symbol '$' + Tok _ pos _ <- symbol '$' display <- (True <$ symbol '$') <|> (False <$ notFollowedBy whitespace) contents <- try $ untokenize <$> pDollarsMath 0 + st <- getState + let isPrecededByWordChars = M.lookup pos (precedingTokTypes st) == Just WordChars + isFollowedByWordChars <- (==) WordChars . maybe LineEnd tokType <$> optionMaybe (lookAhead anyTok) let isWs c = c == ' ' || c == '\t' || c == '\r' || c == '\n' if display then displayMath contents <$ symbol '$' - else if T.null contents || isWs (T.last contents) + else if T.null contents || isWs (T.last contents) || isPrecededByWordChars || isFollowedByWordChars -- don't allow math to end with SPACE + $ then mzero else return $ inlineMath contents diff --git a/commonmark-extensions/test/math.md b/commonmark-extensions/test/math.md index f8d6dc3..6ba5d05 100644 --- a/commonmark-extensions/test/math.md +++ b/commonmark-extensions/test/math.md @@ -87,3 +87,18 @@ $bc$ .

\(b<a>c\)

```````````````````````````````` + +Inline math openers can't be immediately preceded by +alphanumeric characters: +```````````````````````````````` example +Expect$Nothing$ +. +

Expect$Nothing$

+```````````````````````````````` + +Or immediately followed by them: +```````````````````````````````` example +./install.sh $PLATFORM-$VERSION +. +

./install.sh $PLATFORM-$VERSION

+```````````````````````````````` diff --git a/commonmark/src/Commonmark/Inlines.hs b/commonmark/src/Commonmark/Inlines.hs index 1988666..59d58d3 100644 --- a/commonmark/src/Commonmark/Inlines.hs +++ b/commonmark/src/Commonmark/Inlines.hs @@ -26,6 +26,7 @@ module Commonmark.Inlines , pLinkTitle , pEscaped , pEscapedSymbol + , precedingTokTypes , processEmphasis , processBrackets , pBacktickSpan