diff --git a/regexploit/ast/sre.py b/regexploit/ast/sre.py index 675eacb..f793691 100644 --- a/regexploit/ast/sre.py +++ b/regexploit/ast/sre.py @@ -15,6 +15,10 @@ SreOp = Tuple[SreConstant, SreOpData] +class UnsupportedSreOpException(Exception): + pass + + class SreOpParser: def __init__(self): self._groups = {} @@ -24,6 +28,8 @@ def parse_sre(self, pattern: str, flags: int = 0): return self.sequence_or_singleton(sre_parse.parse(pattern, flags)) def parse_op(self, op: SreConstant, data: SreOpData): + if not hasattr(self, f"from_{op.name}"): + raise UnsupportedSreOpException(f"Unsupported SRE op: {op.name}") return getattr(self, f"from_{op.name}")(data) def sequence_or_singleton(self, ops: List[SreOp]): diff --git a/regexploit/bin/regexploit-python-env b/regexploit/bin/regexploit-python-env index 6f0fae2..cadf73e 100755 --- a/regexploit/bin/regexploit-python-env +++ b/regexploit/bin/regexploit-python-env @@ -39,9 +39,13 @@ def main(): hooked_regex: regexploit.hook.CompiledRegex for hooked_regex in regexploit.hook.get_and_clear_regexes(): output.next() - parsed = SreOpParser().parse_sre( - hooked_regex.pattern, hooked_regex.flags - ) + try: + parsed = SreOpParser().parse_sre( + hooked_regex.pattern, hooked_regex.flags + ) + except Exception as e: + print(f"Error parsing regex {hooked_regex.pattern} from {p}: {e}") + continue for redos in find(parsed): if redos.starriness > 2: output.record( diff --git a/regexploit/bin/regexploit_python_ast.py b/regexploit/bin/regexploit_python_ast.py index b7a0d25..7dde83c 100644 --- a/regexploit/bin/regexploit_python_ast.py +++ b/regexploit/bin/regexploit_python_ast.py @@ -6,7 +6,7 @@ import traceback import warnings -from regexploit.ast.sre import SreOpParser +from regexploit.ast.sre import SreOpParser, UnsupportedSreOpException from regexploit.bin.files import file_generator from regexploit.languages.python_node_visitor import PythonNodeVisitor from regexploit.output.text import TextOutput @@ -31,6 +31,9 @@ def handle_file(filename: str, output: TextOutput): parsed = SreOpParser().parse_sre(regex.pattern, regex.flags) except re.error: continue # We will have many strings which aren't actually regexes + except UnsupportedSreOpException as e: + print(f"Error parsing regex {regex.pattern} from {filename}: {e}") + continue try: output.next() for redos in find(parsed): diff --git a/tests/test_at.py b/tests/test_at.py index 8aafec8..a9b9ed8 100644 --- a/tests/test_at.py +++ b/tests/test_at.py @@ -1,7 +1,7 @@ import pytest from regexploit.ast.at import EndOfString -from regexploit.ast.sre import SreOpParser +from regexploit.ast.sre import SreOpParser, UnsupportedSreOpException def from_regex(pattern: str): @@ -60,3 +60,16 @@ def test_real(): dollar = EndOfString() dollar.set_character(from_regex(r"-\d+(\s*\s*\s*)").elements) assert dollar.character == from_regex(r"[\s]") + + +@pytest.mark.parametrize( + "r", + [ + r"a++b", + r"a?+b", + r"a*+b", + ], +) +def test_unsupported_op(r): + with pytest.raises(UnsupportedSreOpException): + from_regex(r)