diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 69ce18175..ff2eb1cda 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -66,7 +66,7 @@ jobs: libxml2-utils locales pkg-config polkitd procps python3 python3-apt python3-distutils-extra python3-gi python3-launchpadlib python3-psutil python3-pyqt5 python3-pytest python3-pytest-cov python3-setuptools - python3-systemd python3-zstandard valgrind xterm + python3-systemd python3-yaql python3-zstandard valgrind xterm - uses: actions/checkout@v4 - name: Enable German locale run: sed -i 's/^# de_DE/de_DE/g' /etc/locale.gen && locale-gen @@ -102,7 +102,7 @@ jobs: libxml2-utils locales pkg-config polkitd python3 python3-apt python3-distutils-extra python3-gi python3-launchpadlib python3-psutil python3-pyqt5 python3-pytest python3-pytest-cov python3-setuptools - python3-systemd python3-zstandard valgrind xterm + python3-systemd python3-yaql python3-zstandard valgrind xterm - uses: actions/checkout@v4 - name: Enable German locale run: sudo sed -i 's/^# de_DE/de_DE/g' /etc/locale.gen && sudo locale-gen diff --git a/apport/report.py b/apport/report.py index f40ded803..99f48d17c 100644 --- a/apport/report.py +++ b/apport/report.py @@ -201,7 +201,7 @@ def _command_output( ) -def _check_bug_pattern(report, pattern): +def _check_bug_pattern(report, pattern, *, yaql_engine=None): """Check if given report matches the given bug pattern XML DOM node. Return the bug URL on match, otherwise None. @@ -210,41 +210,87 @@ def _check_bug_pattern(report, pattern): return None for c in pattern.childNodes: - # regular expression condition - if c.nodeType == xml.dom.Node.ELEMENT_NODE and c.nodeName == "re": + if c.nodeType != xml.dom.Node.ELEMENT_NODE: + continue + if c.nodeName not in ("re", "yaql"): + continue + try: + key = c.attributes["key"].nodeValue + except KeyError: + continue + if key not in report: + return None + c.normalize() + if not c.hasChildNodes() or c.childNodes[0].nodeType != xml.dom.Node.TEXT_NODE: + continue + + if c.nodeName == "re": + # regular expression condition + regexp = c.childNodes[0].nodeValue + v = report[key] + if isinstance(v, problem_report.CompressedValue): + v = v.get_value() + regexp = regexp.encode("UTF-8") + elif isinstance(v, bytes): + regexp = regexp.encode("UTF-8") try: - key = c.attributes["key"].nodeValue - except KeyError: + re_c = re.compile(regexp) + except (re.error, TypeError, ValueError): continue - if key not in report: + if not re_c.search(v): + return None + elif c.nodeName == "yaql": + if yaql_engine is None: return None - c.normalize() - if c.hasChildNodes() and c.childNodes[0].nodeType == xml.dom.Node.TEXT_NODE: - regexp = c.childNodes[0].nodeValue - v = report[key] - if isinstance(v, problem_report.CompressedValue): - v = v.get_value() - regexp = regexp.encode("UTF-8") - elif isinstance(v, bytes): - regexp = regexp.encode("UTF-8") + import yaql # pylint: disable=import-outside-toplevel + + v = report[key] + if isinstance(v, problem_report.CompressedValue): + v = v.get_value() + expression = yaql_engine(c.childNodes[0].nodeValue) + try: + fmt = c.attributes["format"].nodeValue + except KeyError: + # Only JSON and YAML are supported for now. Since it is + # possible to parse JSON with a YAML parser, make it the + # default. For JSON, specifying format="json" will result in + # faster parsing though. + fmt = "yaml" + if fmt == "json": + import json # pylint: disable=import-outside-toplevel + try: - re_c = re.compile(regexp) - except (re.error, TypeError, ValueError): - continue - if not re_c.search(v): + data = json.loads(v) + except json.JSONDecodeError: return None + elif fmt == "yaml": + import yaml # pylint: disable=import-outside-toplevel + + try: + data = yaml.safe_load(v) + except yaml.YAMLError: + return None + else: + apport.logging.warning("unsupported format for yaql: %s", fmt) + return None + + try: + if not expression.evaluate(data=data): + return None + except yaql.language.exceptions.YaqlException: + return None return pattern.attributes["url"].nodeValue -def _check_bug_patterns(report, patterns): +def _check_bug_patterns(report, patterns, *, yaql_engine=None): try: dom = xml.dom.minidom.parseString(patterns) except (xml.parsers.expat.ExpatError, UnicodeEncodeError): return None for pattern in dom.getElementsByTagName("pattern"): - url = _check_bug_pattern(report, pattern) + url = _check_bug_pattern(report, pattern, yaql_engine=yaql_engine) if url: return url @@ -1242,7 +1288,14 @@ def search_bug_patterns(self, url: str | None) -> str | None: if "404 Not Found" in patterns: return None - url = _check_bug_patterns(self, patterns) + try: + import yaql # pylint: disable=import-outside-toplevel + + yaql_engine = yaql.factory.YaqlFactory().create() + except ImportError: + yaql_engine = None + + url = _check_bug_patterns(self, patterns, yaql_engine=yaql_engine) if url: return url diff --git a/tests/integration/test_report.py b/tests/integration/test_report.py index 64aa3b7fc..61b3a05bc 100644 --- a/tests/integration/test_report.py +++ b/tests/integration/test_report.py @@ -4,6 +4,7 @@ import atexit import grp +import importlib import io import os import pathlib @@ -19,6 +20,8 @@ from typing import IO from unittest.mock import MagicMock +import pytest + import apport.packaging import apport.report import problem_report @@ -1196,6 +1199,137 @@ def test_search_bug_patterns(self) -> None: "gracefully handles nonexisting URL domain", ) + @pytest.mark.skipif( + not importlib.util.find_spec("yaql"), + reason="YAQL Python module is not installed", + ) + def test_search_bug_patterns_yaql_installed(self) -> None: + self._test_search_bug_patterns_yaql(yaql_installed=True) + + def test_search_bug_patterns_yaql_not_installed(self) -> None: + with unittest.mock.patch.dict(sys.modules, {"yaql": None}, clear=False): + self._test_search_bug_patterns_yaql(yaql_installed=False) + + def _test_search_bug_patterns_yaql(self, yaql_installed: bool) -> None: + # create some test patterns + patterns = textwrap.dedent( + """\ + <?xml version="1.0"?> + <patterns> + <pattern url="http://bugtracker.net/bugs/1"> + <re key="Package">subiquity</re> + <yaql key="ProbeData" format="json"> + isDict($) and $.get("blockdev", {}).values().count() > 3 + </yaql> + </pattern> + <pattern url="http://bugtracker.net/bugs/2"> + <re key="Package">subiquity</re> + <yaql key="ProbeData"> + isDict($) and $.get("blockdev", {}).values().where( + isDict($) and $.get("ID_PART_TABLE_TYPE") = "unsupported" + ).any() + </yaql> + </pattern> + <pattern url="http://bugtracker.net/bugs/3"> + <re key="Package">subiquity</re> + <yaql key="CurtinPartitioningConfig" format="yaml"> + not $.storage.config.where( + $.get("path") = "/" + ).any() + </yaql> + </pattern> + </patterns>""" + ).encode() + + reports = [] + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = ( + """ { + "blockdev": { + "/dev/sda": null, "/dev/sda1": null, + "/dev/sda2": null, "/dev/sda3": null + } + } """ + ) + + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = ( + '{"blockdev": {"/dev/sda1": {"ID_PART_TABLE_TYPE": "unsupported"}}}' + ) + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["CurtinPartitioningConfig"] = textwrap.dedent( + """ + storage: + config: + - type: mount + path: /home + """ + ) + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = ( + """ { + "blockdev": { + "/dev/sda": null, "/dev/sda1": null, + } + } """ + ) + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = "{}" + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = "{" # <- invalid JSON + reports.append(r) + + if yaql_installed: + expected_values = [ + ("http://bugtracker.net/bugs/1", None), + ("http://bugtracker.net/bugs/2", None), + ("http://bugtracker.net/bugs/3", None), + (None, "does not match any pattern"), + (None, "does not match empty JSON"), + (None, "does not match invalid JSON"), + ] + else: + expected_values = [ + (None, "does not match without a YAQL engine") for _ in reports + ] + + with tempfile.NamedTemporaryFile(prefix="apport-") as bug_pattern: + bug_pattern.write(patterns) + bug_pattern.flush() + pattern_url = f"file://{bug_pattern.name}" + + for report, (expected_value, reason) in zip(reports, expected_values): + self.assertEqual( + expected_value, report.search_bug_patterns(pattern_url), reason + ) + # Also test with compressed values (only for those who have ProbeData) + if "ProbeData" in report: + compressed = report.copy() + compressed["ProbeData"] = problem_report.CompressedValue( + report["ProbeData"].encode("utf-8") + ) + self.assertEqual( + expected_value, + compressed.search_bug_patterns(pattern_url), + reason, + ) + def test_add_hooks_info(self) -> None: # TODO: Split into separate test cases # pylint: disable=too-many-statements