From e97565f689459a1884815b428032be5dd23251b8 Mon Sep 17 00:00:00 2001 From: Olivier Gayot Date: Mon, 24 Mar 2025 12:25:44 +0100 Subject: [PATCH] report: add support for YAQL-based bug patterns Bug patterns have historically only supported executing regular expressions. This is ok for log files and simple strings, but when we have access to more structured data (i.e., JSON or YAML) in bug reports, having the ability to find elements in such files is very useful. In addition to regular expressions, we now support YAQL expressions in bug patterns. They can be specified using the following syntax: subiquity [ YAQL expression ] The format attribute can optionally be specified and will control the parser used. Supported values are "yaml" (the default) and "json". The YAML parser can parse JSON but is slightly slower, thus specifying format="json" is recommended for JSON files. Patterns having tags will be considered non-matches if the yaql python module is not installed. --- .github/workflows/ci.yaml | 4 +- apport/report.py | 97 +++++++++++++++++----- tests/integration/test_report.py | 134 +++++++++++++++++++++++++++++++ 3 files changed, 211 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 69ce18175..ff2eb1cda 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -66,7 +66,7 @@ jobs: libxml2-utils locales pkg-config polkitd procps python3 python3-apt python3-distutils-extra python3-gi python3-launchpadlib python3-psutil python3-pyqt5 python3-pytest python3-pytest-cov python3-setuptools - python3-systemd python3-zstandard valgrind xterm + python3-systemd python3-yaql python3-zstandard valgrind xterm - uses: actions/checkout@v4 - name: Enable German locale run: sed -i 's/^# de_DE/de_DE/g' /etc/locale.gen && locale-gen @@ -102,7 +102,7 @@ jobs: libxml2-utils locales pkg-config polkitd python3 python3-apt python3-distutils-extra python3-gi python3-launchpadlib python3-psutil python3-pyqt5 python3-pytest python3-pytest-cov python3-setuptools - python3-systemd python3-zstandard valgrind xterm + python3-systemd python3-yaql python3-zstandard valgrind xterm - uses: actions/checkout@v4 - name: Enable German locale run: sudo sed -i 's/^# de_DE/de_DE/g' /etc/locale.gen && sudo locale-gen diff --git a/apport/report.py b/apport/report.py index f40ded803..99f48d17c 100644 --- a/apport/report.py +++ b/apport/report.py @@ -201,7 +201,7 @@ def _command_output( ) -def _check_bug_pattern(report, pattern): +def _check_bug_pattern(report, pattern, *, yaql_engine=None): """Check if given report matches the given bug pattern XML DOM node. Return the bug URL on match, otherwise None. @@ -210,41 +210,87 @@ def _check_bug_pattern(report, pattern): return None for c in pattern.childNodes: - # regular expression condition - if c.nodeType == xml.dom.Node.ELEMENT_NODE and c.nodeName == "re": + if c.nodeType != xml.dom.Node.ELEMENT_NODE: + continue + if c.nodeName not in ("re", "yaql"): + continue + try: + key = c.attributes["key"].nodeValue + except KeyError: + continue + if key not in report: + return None + c.normalize() + if not c.hasChildNodes() or c.childNodes[0].nodeType != xml.dom.Node.TEXT_NODE: + continue + + if c.nodeName == "re": + # regular expression condition + regexp = c.childNodes[0].nodeValue + v = report[key] + if isinstance(v, problem_report.CompressedValue): + v = v.get_value() + regexp = regexp.encode("UTF-8") + elif isinstance(v, bytes): + regexp = regexp.encode("UTF-8") try: - key = c.attributes["key"].nodeValue - except KeyError: + re_c = re.compile(regexp) + except (re.error, TypeError, ValueError): continue - if key not in report: + if not re_c.search(v): + return None + elif c.nodeName == "yaql": + if yaql_engine is None: return None - c.normalize() - if c.hasChildNodes() and c.childNodes[0].nodeType == xml.dom.Node.TEXT_NODE: - regexp = c.childNodes[0].nodeValue - v = report[key] - if isinstance(v, problem_report.CompressedValue): - v = v.get_value() - regexp = regexp.encode("UTF-8") - elif isinstance(v, bytes): - regexp = regexp.encode("UTF-8") + import yaql # pylint: disable=import-outside-toplevel + + v = report[key] + if isinstance(v, problem_report.CompressedValue): + v = v.get_value() + expression = yaql_engine(c.childNodes[0].nodeValue) + try: + fmt = c.attributes["format"].nodeValue + except KeyError: + # Only JSON and YAML are supported for now. Since it is + # possible to parse JSON with a YAML parser, make it the + # default. For JSON, specifying format="json" will result in + # faster parsing though. + fmt = "yaml" + if fmt == "json": + import json # pylint: disable=import-outside-toplevel + try: - re_c = re.compile(regexp) - except (re.error, TypeError, ValueError): - continue - if not re_c.search(v): + data = json.loads(v) + except json.JSONDecodeError: return None + elif fmt == "yaml": + import yaml # pylint: disable=import-outside-toplevel + + try: + data = yaml.safe_load(v) + except yaml.YAMLError: + return None + else: + apport.logging.warning("unsupported format for yaql: %s", fmt) + return None + + try: + if not expression.evaluate(data=data): + return None + except yaql.language.exceptions.YaqlException: + return None return pattern.attributes["url"].nodeValue -def _check_bug_patterns(report, patterns): +def _check_bug_patterns(report, patterns, *, yaql_engine=None): try: dom = xml.dom.minidom.parseString(patterns) except (xml.parsers.expat.ExpatError, UnicodeEncodeError): return None for pattern in dom.getElementsByTagName("pattern"): - url = _check_bug_pattern(report, pattern) + url = _check_bug_pattern(report, pattern, yaql_engine=yaql_engine) if url: return url @@ -1242,7 +1288,14 @@ def search_bug_patterns(self, url: str | None) -> str | None: if "404 Not Found" in patterns: return None - url = _check_bug_patterns(self, patterns) + try: + import yaql # pylint: disable=import-outside-toplevel + + yaql_engine = yaql.factory.YaqlFactory().create() + except ImportError: + yaql_engine = None + + url = _check_bug_patterns(self, patterns, yaql_engine=yaql_engine) if url: return url diff --git a/tests/integration/test_report.py b/tests/integration/test_report.py index 64aa3b7fc..61b3a05bc 100644 --- a/tests/integration/test_report.py +++ b/tests/integration/test_report.py @@ -4,6 +4,7 @@ import atexit import grp +import importlib import io import os import pathlib @@ -19,6 +20,8 @@ from typing import IO from unittest.mock import MagicMock +import pytest + import apport.packaging import apport.report import problem_report @@ -1196,6 +1199,137 @@ def test_search_bug_patterns(self) -> None: "gracefully handles nonexisting URL domain", ) + @pytest.mark.skipif( + not importlib.util.find_spec("yaql"), + reason="YAQL Python module is not installed", + ) + def test_search_bug_patterns_yaql_installed(self) -> None: + self._test_search_bug_patterns_yaql(yaql_installed=True) + + def test_search_bug_patterns_yaql_not_installed(self) -> None: + with unittest.mock.patch.dict(sys.modules, {"yaql": None}, clear=False): + self._test_search_bug_patterns_yaql(yaql_installed=False) + + def _test_search_bug_patterns_yaql(self, yaql_installed: bool) -> None: + # create some test patterns + patterns = textwrap.dedent( + """\ + <?xml version="1.0"?> + <patterns> + <pattern url="http://bugtracker.net/bugs/1"> + <re key="Package">subiquity</re> + <yaql key="ProbeData" format="json"> + isDict($) and $.get("blockdev", {}).values().count() > 3 + </yaql> + </pattern> + <pattern url="http://bugtracker.net/bugs/2"> + <re key="Package">subiquity</re> + <yaql key="ProbeData"> + isDict($) and $.get("blockdev", {}).values().where( + isDict($) and $.get("ID_PART_TABLE_TYPE") = "unsupported" + ).any() + </yaql> + </pattern> + <pattern url="http://bugtracker.net/bugs/3"> + <re key="Package">subiquity</re> + <yaql key="CurtinPartitioningConfig" format="yaml"> + not $.storage.config.where( + $.get("path") = "/" + ).any() + </yaql> + </pattern> + </patterns>""" + ).encode() + + reports = [] + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = ( + """ { + "blockdev": { + "/dev/sda": null, "/dev/sda1": null, + "/dev/sda2": null, "/dev/sda3": null + } + } """ + ) + + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = ( + '{"blockdev": {"/dev/sda1": {"ID_PART_TABLE_TYPE": "unsupported"}}}' + ) + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["CurtinPartitioningConfig"] = textwrap.dedent( + """ + storage: + config: + - type: mount + path: /home + """ + ) + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = ( + """ { + "blockdev": { + "/dev/sda": null, "/dev/sda1": null, + } + } """ + ) + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = "{}" + reports.append(r) + + r = apport.report.Report() + r["Package"] = "subiquity" + r["ProbeData"] = "{" # <- invalid JSON + reports.append(r) + + if yaql_installed: + expected_values = [ + ("http://bugtracker.net/bugs/1", None), + ("http://bugtracker.net/bugs/2", None), + ("http://bugtracker.net/bugs/3", None), + (None, "does not match any pattern"), + (None, "does not match empty JSON"), + (None, "does not match invalid JSON"), + ] + else: + expected_values = [ + (None, "does not match without a YAQL engine") for _ in reports + ] + + with tempfile.NamedTemporaryFile(prefix="apport-") as bug_pattern: + bug_pattern.write(patterns) + bug_pattern.flush() + pattern_url = f"file://{bug_pattern.name}" + + for report, (expected_value, reason) in zip(reports, expected_values): + self.assertEqual( + expected_value, report.search_bug_patterns(pattern_url), reason + ) + # Also test with compressed values (only for those who have ProbeData) + if "ProbeData" in report: + compressed = report.copy() + compressed["ProbeData"] = problem_report.CompressedValue( + report["ProbeData"].encode("utf-8") + ) + self.assertEqual( + expected_value, + compressed.search_bug_patterns(pattern_url), + reason, + ) + def test_add_hooks_info(self) -> None: # TODO: Split into separate test cases # pylint: disable=too-many-statements