Skip to content

Commit 9e93bd5

Browse files
authored
👌 fix quadratic complexity in reference parser (#367)
ports: markdown-it/markdown-it@de814ca
1 parent da45c87 commit 9e93bd5

File tree

6 files changed

+170
-103
lines changed

6 files changed

+170
-103
lines changed

docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
".*_NodeType",
5353
".*Literal.*",
5454
".*_Result",
55+
".*_State",
5556
"EnvType",
5657
"Path",
5758
"Ellipsis",

markdown_it/helpers/parse_link_destination.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,15 @@
66

77

88
class _Result:
9-
__slots__ = ("lines", "ok", "pos", "str")
9+
__slots__ = ("ok", "pos", "str")
1010

1111
def __init__(self) -> None:
1212
self.ok = False
1313
self.pos = 0
14-
self.lines = 0
1514
self.str = ""
1615

1716

1817
def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
19-
lines = 0
2018
start = pos
2119
result = _Result()
2220

@@ -80,7 +78,6 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
8078
return result
8179

8280
result.str = unescapeAll(string[start:pos])
83-
result.lines = lines
8481
result.pos = pos
8582
result.ok = True
8683
return result

markdown_it/helpers/parse_link_title.py

Lines changed: 47 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,58 +3,73 @@
33
from ..common.utils import charCodeAt, unescapeAll
44

55

6-
class _Result:
7-
__slots__ = ("lines", "ok", "pos", "str")
6+
class _State:
7+
__slots__ = ("can_continue", "marker", "ok", "pos", "str")
88

99
def __init__(self) -> None:
1010
self.ok = False
11+
"""if `true`, this is a valid link title"""
12+
self.can_continue = False
13+
"""if `true`, this link can be continued on the next line"""
1114
self.pos = 0
12-
self.lines = 0
15+
"""if `ok`, it's the position of the first character after the closing marker"""
1316
self.str = ""
17+
"""if `ok`, it's the unescaped title"""
18+
self.marker = 0
19+
"""expected closing marker character code"""
1420

1521
def __str__(self) -> str:
1622
return self.str
1723

1824

19-
def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result:
20-
lines = 0
21-
start = pos
22-
result = _Result()
25+
def parseLinkTitle(
26+
string: str, start: int, maximum: int, prev_state: _State | None = None
27+
) -> _State:
28+
"""Parse link title within `str` in [start, max] range,
29+
or continue previous parsing if `prev_state` is defined (equal to result of last execution).
30+
"""
31+
pos = start
32+
state = _State()
2333

24-
if pos >= maximum:
25-
return result
34+
if prev_state is not None:
35+
# this is a continuation of a previous parseLinkTitle call on the next line,
36+
# used in reference links only
37+
state.str = prev_state.str
38+
state.marker = prev_state.marker
39+
else:
40+
if pos >= maximum:
41+
return state
2642

27-
marker = charCodeAt(string, pos)
43+
marker = charCodeAt(string, pos)
2844

29-
# /* " */ /* ' */ /* ( */
30-
if marker != 0x22 and marker != 0x27 and marker != 0x28:
31-
return result
45+
# /* " */ /* ' */ /* ( */
46+
if marker != 0x22 and marker != 0x27 and marker != 0x28:
47+
return state
3248

33-
pos += 1
49+
start += 1
50+
pos += 1
51+
52+
# if opening marker is "(", switch it to closing marker ")"
53+
if marker == 0x28:
54+
marker = 0x29
3455

35-
# if opening marker is "(", switch it to closing marker ")"
36-
if marker == 0x28:
37-
marker = 0x29
56+
state.marker = marker
3857

3958
while pos < maximum:
4059
code = charCodeAt(string, pos)
41-
if code == marker:
42-
title = string[start + 1 : pos]
43-
title = unescapeAll(title)
44-
result.pos = pos + 1
45-
result.lines = lines
46-
result.str = title
47-
result.ok = True
48-
return result
49-
elif code == 0x28 and marker == 0x29: # /* ( */ /* ) */
50-
return result
51-
elif code == 0x0A:
52-
lines += 1
60+
if code == state.marker:
61+
state.pos = pos + 1
62+
state.str += unescapeAll(string[start:pos])
63+
state.ok = True
64+
return state
65+
elif code == 0x28 and state.marker == 0x29: # /* ( */ /* ) */
66+
return state
5367
elif code == 0x5C and pos + 1 < maximum: # /* \ */
5468
pos += 1
55-
if charCodeAt(string, pos) == 0x0A:
56-
lines += 1
5769

5870
pos += 1
5971

60-
return result
72+
# no closing marker found, but this link title may continue on the next line (for references)
73+
state.can_continue = True
74+
state.str += unescapeAll(string[start:pos])
75+
return state

markdown_it/rules_block/reference.py

Lines changed: 86 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
1111
"entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
1212
)
1313

14-
lines = 0
1514
pos = state.bMarks[startLine] + state.tShift[startLine]
1615
maximum = state.eMarks[startLine]
1716
nextLine = startLine + 1
@@ -22,51 +21,9 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
2221
if state.src[pos] != "[":
2322
return False
2423

25-
# Simple check to quickly interrupt scan on [link](url) at the start of line.
26-
# Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
27-
while pos < maximum:
28-
# /* ] */ /* \ */ /* : */
29-
if state.src[pos] == "]" and state.src[pos - 1] != "\\":
30-
if pos + 1 == maximum:
31-
return False
32-
if state.src[pos + 1] != ":":
33-
return False
34-
break
35-
pos += 1
36-
37-
endLine = state.lineMax
38-
39-
# jump line-by-line until empty one or EOF
40-
terminatorRules = state.md.block.ruler.getRules("reference")
24+
string = state.src[pos : maximum + 1]
4125

42-
oldParentType = state.parentType
43-
state.parentType = "reference"
44-
45-
while nextLine < endLine and not state.isEmpty(nextLine):
46-
# this would be a code block normally, but after paragraph
47-
# it's considered a lazy continuation regardless of what's there
48-
if state.sCount[nextLine] - state.blkIndent > 3:
49-
nextLine += 1
50-
continue
51-
52-
# quirk for blockquotes, this line should already be checked by that rule
53-
if state.sCount[nextLine] < 0:
54-
nextLine += 1
55-
continue
56-
57-
# Some tags can terminate paragraph without empty line.
58-
terminate = False
59-
for terminatorRule in terminatorRules:
60-
if terminatorRule(state, nextLine, endLine, True):
61-
terminate = True
62-
break
63-
64-
if terminate:
65-
break
66-
67-
nextLine += 1
68-
69-
string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
26+
# string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
7027
maximum = len(string)
7128

7229
labelEnd = None
@@ -79,11 +36,20 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
7936
labelEnd = pos
8037
break
8138
elif ch == 0x0A: # /* \n */
82-
lines += 1
39+
if (lineContent := getNextLine(state, nextLine)) is not None:
40+
string += lineContent
41+
maximum = len(string)
42+
nextLine += 1
8343
elif ch == 0x5C: # /* \ */
8444
pos += 1
85-
if pos < maximum and charCodeAt(string, pos) == 0x0A:
86-
lines += 1
45+
if (
46+
pos < maximum
47+
and charCodeAt(string, pos) == 0x0A
48+
and (lineContent := getNextLine(state, nextLine)) is not None
49+
):
50+
string += lineContent
51+
maximum = len(string)
52+
nextLine += 1
8753
pos += 1
8854

8955
if (
@@ -97,7 +63,10 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
9763
while pos < maximum:
9864
ch = charCodeAt(string, pos)
9965
if ch == 0x0A:
100-
lines += 1
66+
if (lineContent := getNextLine(state, nextLine)) is not None:
67+
string += lineContent
68+
maximum = len(string)
69+
nextLine += 1
10170
elif isSpace(ch):
10271
pass
10372
else:
@@ -106,28 +75,30 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
10675

10776
# [label]: destination 'title'
10877
# ^^^^^^^^^^^ parse this
109-
res = state.md.helpers.parseLinkDestination(string, pos, maximum)
110-
if not res.ok:
78+
destRes = state.md.helpers.parseLinkDestination(string, pos, maximum)
79+
if not destRes.ok:
11180
return False
11281

113-
href = state.md.normalizeLink(res.str)
82+
href = state.md.normalizeLink(destRes.str)
11483
if not state.md.validateLink(href):
11584
return False
11685

117-
pos = res.pos
118-
lines += res.lines
86+
pos = destRes.pos
11987

12088
# save cursor state, we could require to rollback later
12189
destEndPos = pos
122-
destEndLineNo = lines
90+
destEndLineNo = nextLine
12391

12492
# [label]: destination 'title'
12593
# ^^^ skipping those spaces
12694
start = pos
12795
while pos < maximum:
12896
ch = charCodeAt(string, pos)
12997
if ch == 0x0A:
130-
lines += 1
98+
if (lineContent := getNextLine(state, nextLine)) is not None:
99+
string += lineContent
100+
maximum = len(string)
101+
nextLine += 1
131102
elif isSpace(ch):
132103
pass
133104
else:
@@ -136,15 +107,23 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
136107

137108
# [label]: destination 'title'
138109
# ^^^^^^^ parse this
139-
res = state.md.helpers.parseLinkTitle(string, pos, maximum)
140-
if pos < maximum and start != pos and res.ok:
141-
title = res.str
142-
pos = res.pos
143-
lines += res.lines
110+
titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, None)
111+
while titleRes.can_continue:
112+
if (lineContent := getNextLine(state, nextLine)) is None:
113+
break
114+
string += lineContent
115+
pos = maximum
116+
maximum = len(string)
117+
nextLine += 1
118+
titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, titleRes)
119+
120+
if pos < maximum and start != pos and titleRes.ok:
121+
title = titleRes.str
122+
pos = titleRes.pos
144123
else:
145124
title = ""
146125
pos = destEndPos
147-
lines = destEndLineNo
126+
nextLine = destEndLineNo
148127

149128
# skip trailing spaces until the rest of the line
150129
while pos < maximum:
@@ -158,7 +137,7 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
158137
# but it could still be a valid reference if we roll back
159138
title = ""
160139
pos = destEndPos
161-
lines = destEndLineNo
140+
nextLine = destEndLineNo
162141
while pos < maximum:
163142
ch = charCodeAt(string, pos)
164143
if not isSpace(ch):
@@ -181,7 +160,7 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
181160
if "references" not in state.env:
182161
state.env["references"] = {}
183162

184-
state.line = startLine + lines + 1
163+
state.line = nextLine
185164

186165
# note, this is not part of markdown-it JS, but is useful for renderers
187166
if state.md.options.get("inline_definitions", False):
@@ -210,6 +189,47 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) ->
210189
}
211190
)
212191

213-
state.parentType = oldParentType
214-
215192
return True
193+
194+
195+
def getNextLine(state: StateBlock, nextLine: int) -> None | str:
196+
endLine = state.lineMax
197+
198+
if nextLine >= endLine or state.isEmpty(nextLine):
199+
# empty line or end of input
200+
return None
201+
202+
isContinuation = False
203+
204+
# this would be a code block normally, but after paragraph
205+
# it's considered a lazy continuation regardless of what's there
206+
if state.is_code_block(nextLine):
207+
isContinuation = True
208+
209+
# quirk for blockquotes, this line should already be checked by that rule
210+
if state.sCount[nextLine] < 0:
211+
isContinuation = True
212+
213+
if not isContinuation:
214+
terminatorRules = state.md.block.ruler.getRules("reference")
215+
oldParentType = state.parentType
216+
state.parentType = "reference"
217+
218+
# Some tags can terminate paragraph without empty line.
219+
terminate = False
220+
for terminatorRule in terminatorRules:
221+
if terminatorRule(state, nextLine, endLine, True):
222+
terminate = True
223+
break
224+
225+
state.parentType = oldParentType
226+
227+
if terminate:
228+
# terminated by another block
229+
return None
230+
231+
pos = state.bMarks[nextLine] + state.tShift[nextLine]
232+
maximum = state.eMarks[nextLine]
233+
234+
# max + 1 explicitly includes the newline
235+
return state.src[pos : maximum + 1]

markdown_it/rules_inline/image.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def image(state: StateInline, silent: bool) -> bool:
6666

6767
# [link]( <href> "title" )
6868
# ^^^^^^^ parsing link title
69-
res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
69+
res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax, None)
7070
if pos < max and start != pos and res.ok:
7171
title = res.str
7272
pos = res.pos

0 commit comments

Comments
 (0)