Skip to content

Commit c6b3e47

Browse files
committed
Add more URL patterns to make sure only valid URLs are matched
1 parent aa6f970 commit c6b3e47

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

plugins/link_announcer.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from cloudbot import hook
88
from cloudbot.hook import Priority, Action
99

10+
ENCODED_CHAR = r"%[A-F0-9]{2}"
11+
PATH_SEG_CHARS = r"[A-Za-z0-9!$&'*-.:;=@_~\u00A0-\U0010FFFD]|" + ENCODED_CHAR
12+
QUERY_CHARS = PATH_SEG_CHARS + r"|/"
13+
FRAG_CHARS = QUERY_CHARS
14+
15+
16+
def no_parens(pattern):
17+
return r"{0}|\(({0}|[\(\)])*\)".format(pattern)
18+
19+
1020
# This will match any URL, blacklist removed and abstracted to a priority/halting system
1121
url_re = re.compile(
1222
r"""
@@ -30,11 +40,11 @@
3040
3141
(?::\d*)? # port
3242
33-
(?:/(?:[A-Za-z0-9!$&-.:;=@_~\u00A0-\U0010FFFD]|%[A-F0-9]{2})*)* # Path segment
43+
(?:/(?:""" + no_parens(PATH_SEG_CHARS) + r""")*)*(?<![.,?!\]]) # Path segment
3444
35-
(?:\?(?:[A-Za-z0-9!$&-;=@_~\u00A0-\U0010FFFD]|%[A-F0-9]{2})*)? # Query
45+
(?:\?(?:""" + no_parens(QUERY_CHARS) + r""")*(?<![.,?!\]]))? # Query
3646
37-
(?:\#(?:[A-Za-z0-9!$&-;=@_~\u00A0-\U0010FFFD]|%[A-F0-9]{2})*)? # Fragment
47+
(?:\#(?:""" + no_parens(FRAG_CHARS) + r""")*(?<![.,?!\]]))? # Fragment
3848
""",
3949
re.IGNORECASE | re.VERBOSE
4050
)

tests/plugin_tests/test_link_announcer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131

3232
FAILS = (
3333
"http://",
34+
"http://.",
35+
"http://..",
3436
"http://?",
3537
"http://??",
3638
"http://??/",
@@ -50,6 +52,9 @@
5052
":// should fail",
5153
"http://foo.bar/foo(bar)baz quux",
5254
"ftps://foo.bar/",
55+
"https://foo.bar/baz.ext)",
56+
"https://foo.bar/test.",
57+
"https://foo.bar/test(test",
5358
)
5459

5560
SEARCH = (

0 commit comments

Comments
 (0)