diff --git a/README.md b/README.md index 7f3bb8d..1a574af 100644 --- a/README.md +++ b/README.md @@ -456,13 +456,13 @@ options: * xml-deprecated-qweb-directive - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L7 Deprecated QWeb directive `t-esc-options`. Use `t-options` instead - - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L37 Deprecated QWeb directive `t-field-options`. Use `t-options` instead + - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L42 Deprecated QWeb directive `t-field-options`. Use `t-options` instead * xml-deprecated-qweb-directive-15 - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml#L6 Deprecated QWeb directive `t-esc`. Use `t-out` instead - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml#L7 Deprecated QWeb directive `t-raw`. Use `t-out` instead - - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml#L13 Deprecated QWeb directive `t-esc`. Use `t-out` instead + - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml#L15 Deprecated QWeb directive `t-esc`. Use `t-out` instead * xml-deprecated-tree-attribute @@ -523,14 +523,14 @@ options: * xml-not-valid-char-link - - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L59 The resource in in src/href contains a not valid character - - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L61 The resource in in src/href contains a not valid character + - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L64 The resource in in src/href contains a not valid character + - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L66 The resource in in src/href contains a not valid character * xml-oe-structure-missing-id - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L9 Consider removing the class `oe_structure` or adding a proper id to the tag. The id must contain `oe_structure` - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L13 Consider removing the class `oe_structure` or adding a proper id to the tag. The id must contain `oe_structure` - - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L41 Consider removing the class `oe_structure` or adding a proper id to the tag. The id must contain `oe_structure` + - https://github.com/OCA/odoo-pre-commit-hooks/blob/v0.2.20/test_repo/test_module/website_templates.xml#L46 Consider removing the class `oe_structure` or adding a proper id to the tag. The id must contain `oe_structure` * xml-record-missing-id diff --git a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py index 4cbf2b9..1b7ac07 100644 --- a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py +++ b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py @@ -135,8 +135,16 @@ def update_node(self, manifest_data): "first_tag_lineno": first_tag_lineno, } ) + manifest_data.pop("_tag_locator", None) return node + def _get_tag_locator(self, manifest_data): + locator = manifest_data.get("_tag_locator") + if locator is None: + locator = node_xml.XMLStartTagLocator(manifest_data["filename"], manifest_data["node"]) + manifest_data["_tag_locator"] = locator + return locator + def __init__(self, manifest_datas, module_name, enable, disable, module_version, autofix): super().__init__(enable, disable, module_name, module_version, autofix) self.manifest_datas = manifest_datas or [] @@ -446,16 +454,16 @@ def visit_xml_record(self, manifest_data, record): line=record.sourceline, ) if self.autofix: - node_content = node_xml.NodeContent(manifest_data["filename"], record) - pattern = rb'\bid\s*=\s*(?P["\'])(?P' + re.escape(record_id.encode()) + rb")(?P=q)" - content_node2 = re.sub( - pattern, rb"id=\g" + xmlid_name.encode() + rb"\g", node_content.content_node, count=1 + locator = self._get_tag_locator(manifest_data) + content = locator.rewrite_start_tag( + locator.content, + record, + attr_value_replacements={"id": xmlid_name.encode()}, ) - if content_node2 != node_content.content_node: - # Modify the record attrib to propagate the change to other checks + if content != locator.content: record.attrib["id"] = xmlid_name - node_content.content_node = content_node2 - utils.perform_fix(manifest_data["filename"], bytes(node_content)) + utils.perform_fix(manifest_data["filename"], content) + self.update_node(manifest_data) first_attr = record.keys()[0] if first_attr != "id" and self.is_message_enabled("xml-id-position-first", manifest_data["disabled_checks"]): @@ -471,63 +479,14 @@ def visit_xml_record(self, manifest_data, record): def autofix_id_position_first(self, node, first_attr, manifest_data): attrs = dict(node.attrib) - node_content = node_xml.NodeContent(manifest_data["filename"], node) - # Build regex pattern to match the tag with all its known attributes - # sourceline is the last line of the last attribute, so we need to search backwards - tag_name = re.escape(node.tag) - - # Create a pattern that matches all known attributes in any order - # Each attribute: attrname="attrvalue" with optional whitespace - attr_patterns = [] - # Use the first attribute spaces since that id will be the new first attribute - keys = [f"spaces_before_{first_attr}", "id"] - for attr_name, attr_value in attrs.items(): - escaped_name = re.escape(attr_name) - escaped_value = re.escape(attr_value) - # Match attribute with flexible whitespace and quotes - attr_patterns.append( - rf'(?P\s*)(?P<{attr_name}>{escaped_name}\s*=\s*(?P["\'])({escaped_value})(?P=quote_{attr_name}))' - ) - if attr_name == "id": - # skip the first key "id" because is already added - continue - if attr_name == first_attr: - # Use the same spaces_before_id for the first attribute - # - # - keys.extend(["spaces_before_id", attr_name]) - continue - keys.extend([f"spaces_before_{attr_name}", attr_name]) - # Pattern for the complete opening tag - # - # Using DOTALL to match across lines - attrs_regex = r"".join(attr_patterns) - pattern = ( - rf"(?P<{tag_name})" # Opening tag with space - rf"{attrs_regex}" # All attributes with whitespace between them - rf"(?P\s*(/?)>)" # Optional self-closing and closing > - ) - - # Search with multiline and dotall flags - match = re.search(pattern, node_content.content_node.decode(), re.DOTALL | re.MULTILINE) - if match: - keys = [f"open_{node.tag}"] + keys + [f"close_{node.tag}"] - match_dict = match.groupdict() - recreate = "".join(match_dict[k] for k in keys) - original = match.group() - content_node2 = node_content.content_node.replace(original.encode(), recreate.encode(), 1) - if content_node2 != node_content.content_node: - # Modify the record attrib to propagate the change to other checks - id_value = attrs.pop("id") - node.attrib.clear() - new_attrs = {"id": id_value, **attrs} - node.attrib.update(new_attrs) - node_content.content_node = content_node2 - utils.perform_fix(manifest_data["filename"], bytes(node_content)) + locator = self._get_tag_locator(manifest_data) + content = locator.rewrite_start_tag(locator.content, node, first_attr="id") + if content != locator.content: + id_value = attrs.pop("id") + node.attrib.clear() + node.attrib.update({"id": id_value, **attrs}) + utils.perform_fix(manifest_data["filename"], content) + self.update_node(manifest_data) @utils.only_required_for_checks("xml-view-dangerous-replace-low-priority", "xml-deprecated-tree-attribute") def visit_xml_record_view(self, manifest_data, record): @@ -721,21 +680,20 @@ def check_xml_double_quotes_py(self): ): # Process text {} node_content = node_xml.NodeContent(manifest_data["filename"], elem) - if b""" not in node_content.content_node: - continue - self.register_error( - code="xml-double-quotes-py", - message='Escaped double quotes " for python code detected', - info=f"Use single quote instead: `{new_py_code}`", - filepath=manifest_data["filename_short"], - line=elem.sourceline, - ) - during2 = node_content.content_node.replace(b""", b"'") - if self.autofix and during2 != node_content.content_node: - # Modify the xml node to propagate the change to other checks - elem.text = new_py_code - node_content.content_node = during2 - utils.perform_fix(manifest_data["filename"], bytes(node_content)) + if b""" in node_content.content_node: + self.register_error( + code="xml-double-quotes-py", + message='Escaped double quotes " for python code detected', + info=f"Use single quote instead: `{new_py_code}`", + filepath=manifest_data["filename_short"], + line=elem.sourceline, + ) + during2 = node_content.content_node.replace(b""", b"'") + if self.autofix and during2 != node_content.content_node: + # Modify the xml node to propagate the change to other checks + elem.text = new_py_code + node_content.content_node = during2 + utils.perform_fix(manifest_data["filename"], bytes(node_content)) for attr_name, attr_value in elem.attrib.items(): # Process attributes @@ -877,6 +835,8 @@ def check_xml_deprecated_qweb_directives_15(self): for manifest_data in self.manifest_datas: if not self.is_message_enabled("xml-deprecated-qweb-directive-15", manifest_data["disabled_checks"]): continue + locator = self._get_tag_locator(manifest_data) if self.autofix else None + replacements = [] for node in self.xpath_qweb_deprecated15(manifest_data["node"]): node_attrs = set(node.attrib) node_attrs_deprecated = node_attrs & self.qweb_deprecated_directives15 @@ -887,19 +847,20 @@ def check_xml_deprecated_qweb_directives_15(self): line=node.sourceline, ) if self.autofix and "t-out" not in node_attrs: - # TODO: add autofix test - # if t-out already exists, skip autofix - attr_deprecated = next(iter(node_attrs_deprecated)) - value_deprecated = node.attrib.get(attr_deprecated) - node_content = node_xml.NodeContent(manifest_data["filename"], node) - pattern = rb"(?P\b)" + re.escape(attr_deprecated).encode() + rb'(?P\s*=\s*["\'])' - content_node2 = re.sub(pattern, rb"\gt-out\g", node_content.content_node, count=1) - if content_node2 != node_content.content_node: - # Modify the record attrib to propagate the change to other checks - node_content.content_node = content_node2 - node.attrib.pop(attr_deprecated) - node.attrib["t-out"] = value_deprecated - utils.perform_fix(manifest_data["filename"], bytes(node_content)) + # If t-out already exists, skip autofix to avoid clobbering another value. + attr_deprecated = next(attr for attr in node.attrib if attr in node_attrs_deprecated) + attr_span = locator.get_attr(node, attr_deprecated) + if not attr_span: + continue + replacements.append((attr_span.name_start, attr_span.name_end, b"t-out")) + value_deprecated = node.attrib.pop(attr_deprecated) + node.attrib["t-out"] = value_deprecated + if replacements: + content = locator.content + for start, end, new_name in reversed(replacements): + content = content[:start] + new_name + content[end:] + utils.perform_fix(manifest_data["filename"], content) + self.update_node(manifest_data) @utils.only_required_for_checks("xml-xpath-translatable-item") def check_xml_xpath(self): diff --git a/src/oca_pre_commit_hooks/node_xml.py b/src/oca_pre_commit_hooks/node_xml.py index a1726cf..004cd69 100644 --- a/src/oca_pre_commit_hooks/node_xml.py +++ b/src/oca_pre_commit_hooks/node_xml.py @@ -1,3 +1,219 @@ +# Based on https://github.com/mitsuhiko/sloppy-xml-py +from dataclasses import dataclass + + +@dataclass(frozen=True) +class XMLAttributeSpan: + name: str + value_start: int + value_end: int + name_start: int + name_end: int + attr_end: int + + +@dataclass(frozen=True) +class XMLStartTag: + tag: str + start: int + name_end: int + end: int + line: int + attrs: tuple[XMLAttributeSpan, ...] + + def get_attr(self, attr_name): + for attr in self.attrs: + if attr.name == attr_name: + return attr + return None + + +class XMLStartTagLocator: + """Locate opening tags and attribute spans without reformatting the file.""" + + def __init__(self, filename, tree): + self.filename = filename + with open(filename, "rb") as f_content: + self.content = f_content.read() + self.tags = self._scan_start_tags(self.content) + self._element_tags = {} + self._map_tree(tree) + + @staticmethod + def _consume_until(content, start, needle): + end = content.find(needle, start) + if end == -1: + return len(content) + return end + len(needle) + + @classmethod + def _scan_start_tags(cls, content): # noqa: C901 pylint: disable=too-complex + tags = [] + i = 0 + line = 1 + content_len = len(content) + + while i < content_len: + byte = content[i : i + 1] + if byte == b"\n": + line += 1 + i += 1 + continue + if byte != b"<": + i += 1 + continue + + if content.startswith(b"") + line += content[i:new_i].count(b"\n") + i = new_i + continue + if content.startswith(b"") + line += content[i:new_i].count(b"\n") + i = new_i + continue + if content.startswith(b"") + line += content[i:new_i].count(b"\n") + i = new_i + continue + if content.startswith(b"") + line += content[i:new_i].count(b"\n") + i = new_i + continue + if content.startswith(b"") + line += content[i:new_i].count(b"\n") + i = new_i + continue + + tag_line = line + tag_start = i + i += 1 + name_start = i + while i < content_len and content[i : i + 1] not in b" \t\r\n/>": + i += 1 + tag_name = content[name_start:i].decode("utf-8", errors="replace") + tag_name_end = i + attrs = [] + + while i < content_len: + while i < content_len and content[i : i + 1] in b" \t\r\n": + if content[i : i + 1] == b"\n": + line += 1 + i += 1 + if i >= content_len: + break + if content.startswith(b"/>", i): + i += 2 + break + if content[i : i + 1] == b">": + i += 1 + break + + attr_name_start = i + while i < content_len and content[i : i + 1] not in b" \t\r\n=/>": + i += 1 + attr_name_end = i + attr_name = content[attr_name_start:attr_name_end].decode("utf-8", errors="replace") + + while i < content_len and content[i : i + 1] in b" \t\r\n": + if content[i : i + 1] == b"\n": + line += 1 + i += 1 + if i < content_len and content[i : i + 1] == b"=": + i += 1 + while i < content_len and content[i : i + 1] in b" \t\r\n": + if content[i : i + 1] == b"\n": + line += 1 + i += 1 + + if i >= content_len or content[i : i + 1] not in (b'"', b"'"): + continue + quote = content[i : i + 1] + i += 1 + value_start = i + while i < content_len: + if content[i : i + 1] == b"\n": + line += 1 + if content[i : i + 1] == quote: + value_end = i + i += 1 + break + i += 1 + + attrs.append(XMLAttributeSpan(attr_name, value_start, value_end, attr_name_start, attr_name_end, i)) + + tags.append(XMLStartTag(tag_name, tag_start, tag_name_end, i, tag_line, tuple(attrs))) + + return tags + + def _map_tree(self, tree): + tag_iter = iter(self.tags) + for element in tree.getroot().iter(): + if not isinstance(getattr(element, "tag", None), str): + continue + tag_info = next(tag_iter, None) + if tag_info is None: + break + self._element_tags[tree.getpath(element)] = tag_info + + def get_tag(self, element): + return self._element_tags.get(element.getroottree().getpath(element)) + + def get_attr(self, element, attr_name): + tag_info = self.get_tag(element) + if not tag_info: + return None + return tag_info.get_attr(attr_name) + + def rewrite_start_tag( + self, content, element, attr_name_replacements=None, attr_value_replacements=None, first_attr=None + ): + tag_info = self.get_tag(element) + if not tag_info: + return content + + attr_name_replacements = attr_name_replacements or {} + attr_value_replacements = attr_value_replacements or {} + attrs = list(tag_info.attrs) + if not attrs: + return content + + open_part = content[tag_info.start : tag_info.name_end] + cursor = tag_info.name_end + spaces = [] + attr_chunks = {} + + for attr in attrs: + spaces.append(content[cursor : attr.name_start]) + chunk = bytearray(content[attr.name_start : attr.attr_end]) + if attr.name in attr_name_replacements: + new_name = attr_name_replacements[attr.name] + chunk[: len(attr.name)] = new_name + if attr.name in attr_value_replacements: + value_rel_start = attr.value_start - attr.name_start + value_rel_end = attr.value_end - attr.name_start + chunk[value_rel_start:value_rel_end] = attr_value_replacements[attr.name] + attr_chunks[attr.name] = bytes(chunk) + cursor = attr.attr_end + + close_part = content[cursor : tag_info.end] + ordered_attrs = attrs + if first_attr: + target_attr = next((attr for attr in attrs if attr.name == first_attr), None) + if target_attr and attrs[0].name != first_attr: + ordered_attrs = [target_attr] + [attr for attr in attrs if attr.name != first_attr] + + rebuilt = open_part + for idx, attr in enumerate(ordered_attrs): + rebuilt += spaces[idx] + attr_chunks[attr.name] + rebuilt += close_part + return content[: tag_info.start] + rebuilt + content[tag_info.end :] + + class NodeContent: """Represents the content and metadata of an XML node.""" diff --git a/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml b/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml index 8e1e855..d989026 100644 --- a/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml +++ b/test_repo/odoo18_module/views/deprecated_qweb_directives15.xml @@ -1,6 +1,6 @@ - + diff --git a/tests/test_checks.py b/tests/test_checks.py index 65a413a..9b863ca 100644 --- a/tests/test_checks.py +++ b/tests/test_checks.py @@ -35,7 +35,7 @@ "xml-dangerous-qweb-replace-low-priority": 9, "xml-deprecated-data-node": 8, "xml-deprecated-openerp-node": 4, - "xml-deprecated-qweb-directive-15": 3, + "xml-deprecated-qweb-directive-15": 4, "xml-deprecated-qweb-directive": 2, "xml-deprecated-tree-attribute": 3, "xml-double-quotes-py": 3, @@ -258,7 +258,8 @@ def test_autofix(self): with open(t_out, "rb") as f: content = f.read() - assert b"t-out" not in content, "The deprecated t-out was previously fixed" + assert content.count(b"t-esc") > 1, "The deprecated t-esc was previously fixed" + assert content.count(b"t-raw") > 1, "The deprecated t-raw was previously fixed" self.checks_run(self.file_paths, autofix=True, no_exit=True, no_verbose=False) @@ -343,4 +344,6 @@ def test_autofix(self): with open(t_out, "rb") as f: content = f.read() - assert b"t-out" in content, "The deprecated t-out was not fixed" + # comments contain 1 valid deprecated + assert content.count(b"t-esc") == 1, "The deprecated t-esc was not fixed" + assert content.count(b"t-raw") == 1, "The deprecated t-esc was not fixed" diff --git a/tests/test_node_xml.py b/tests/test_node_xml.py new file mode 100644 index 0000000..455e35d --- /dev/null +++ b/tests/test_node_xml.py @@ -0,0 +1,95 @@ +import shutil +import tempfile +from contextlib import contextmanager +from pathlib import Path +from typing import Iterator + +from lxml import etree + +from oca_pre_commit_hooks import checks_odoo_module, node_xml + +REPO_ROOT = Path(__file__).resolve().parent.parent + + +@contextmanager +def temporary_module_copy(module_path: str) -> Iterator[Path]: + module_src = REPO_ROOT / module_path + with tempfile.TemporaryDirectory(dir=REPO_ROOT) as tmp_dir: + module_dst = Path(tmp_dir) / module_src.name + shutil.copytree(module_src, module_dst) + yield module_dst + + +def test_xml_start_tag_locator_multiline_qweb_directives(): + xml_path = REPO_ROOT / "test_repo/odoo18_module/views/deprecated_qweb_directives15.xml" + tree = etree.parse(str(xml_path)) + locator = node_xml.XMLStartTagLocator(str(xml_path), tree) + + nodes = tree.xpath("/odoo//template//*[@t-esc or @t-raw]") + attrs = [("t-esc" if "t-esc" in node.attrib else "t-raw") for node in nodes] + spans = [locator.get_attr(node, attr_name) for node, attr_name in zip(nodes, attrs, strict=True)] + + assert [locator.content[span.name_start : span.name_end] for span in spans] == [ + b"t-esc", + b"t-raw", + b"t-esc", + b"t-esc", + ] + # pylint: disable=protected-access # TODO: Check how to fix it instead of disable it + assert [locator._element_tags[tree.getpath(node)].line for node in nodes] == [6, 7, 13, 19] + + +def test_xml_deprecated_qweb_directive_15_autofix_preserves_format(): + with temporary_module_copy("test_repo/odoo18_module") as module_dst: + checks_odoo_module.run( + [str(module_dst / "__manifest__.py")], + enable={"xml-deprecated-qweb-directive-15"}, + no_exit=True, + autofix=True, + ) + + xml_content = (module_dst / "views" / "deprecated_qweb_directives15.xml").read_text() + assert xml_content.count("t-esc") == 1 + assert xml_content.count("t-raw") == 1 + assert xml_content.count("t-out") == 5 + assert '' in xml_content + assert '' in xml_content + assert 'Name ' in xml_content + assert ( + '

Line Template:

' in xml_content + ) + + +def test_xml_id_position_first_autofix_preserves_template_layout(): + with temporary_module_copy("test_repo/test_module") as module_dst: + checks_odoo_module.run( + [str(module_dst / "__openerp__.py")], + enable={"xml-id-position-first"}, + no_exit=True, + autofix=True, + ) + + xml_content = (module_dst / "website_templates.xml").read_text() + assert ( + '' in xml_content + ) + + +def test_xml_record_id_autofixes_preserve_menuitem_layout(): + with temporary_module_copy("test_repo/broken_module") as module_dst: + checks_odoo_module.run( + [str(module_dst / "__openerp__.py")], + enable={"xml-id-position-first", "xml-redundant-module-name"}, + no_exit=True, + autofix=True, + ) + + xml_content = (module_dst / "model_view_odoo2.xml").read_text() + assert "" in xml_content + assert '