diff --git a/ext/dom/node.c b/ext/dom/node.c index 105f4cb73a018..c48737b376ef7 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -2081,6 +2081,93 @@ PHP_METHOD(DOMNode, lookupNamespaceURI) } /* }}} end dom_node_lookup_namespace_uri */ +static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node) +{ + if (node->type == XML_ELEMENT_NODE) { + for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) { + if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) { + xmlNsPtr ns = xmlMalloc(sizeof(*ns)); + if (!ns) { + return; + } + + zval *zv = zend_hash_index_lookup(links, (zend_ulong) node); + if (Z_ISNULL_P(zv)) { + ZVAL_LONG(zv, 1); + } else { + Z_LVAL_P(zv)++; + } + + bool should_free; + xmlChar *attr_value = php_libxml_attr_value(attr, &should_free); + + memset(ns, 0, sizeof(*ns)); + ns->type = XML_LOCAL_NAMESPACE; + ns->href = should_free ? attr_value : xmlStrdup(attr_value); + ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL; + ns->next = node->nsDef; + node->nsDef = ns; + + ns->_private = attr; + if (attr->prev) { + attr->prev = attr->next; + } else { + node->properties = attr->next; + } + if (attr->next) { + attr->next->prev = attr->prev; + } + } + } + + if (node->ns && !node->ns->prefix) { // TODO: is this complete? + /* Workaround for the behaviour where xmlSearchNs() can return the current namespace */ + zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns); + node->ns = xmlSearchNs(node->doc, node, NULL); + } + } +} + +static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) +{ + dom_relink_ns_decls_element(links, root); + + xmlNodePtr base = root; + xmlNodePtr node = base->children; + while (node != NULL) { + dom_relink_ns_decls_element(links, node); + node = php_dom_next_in_tree_order(node, base); + } +} + +static void dom_unlink_ns_decls(HashTable *links) +{ + ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) { + if (h & 1) { + xmlNodePtr node = (xmlNodePtr) (h ^ 1); + node->ns = Z_PTR_P(data); + } else { + xmlNodePtr node = (xmlNodePtr) h; + while (Z_LVAL_P(data)-- > 0) { + xmlNsPtr ns = node->nsDef; + node->nsDef = node->nsDef->next; + + xmlAttrPtr attr = ns->_private; + if (attr->prev) { + attr->prev->next = attr; + } else { + node->properties = attr; + } + if (attr->next) { + attr->next->prev = attr; + } + + xmlFreeNs(ns); + } + } + } ZEND_HASH_FOREACH_END(); +} + static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent) { xmlNodePtr root = user_data; @@ -2136,7 +2223,23 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ docp = nodep->doc; - if (! docp) { + HashTable links; + bool modern = php_dom_follow_spec_node(nodep); + if (modern) { + xmlNodePtr root = nodep; + while (root->parent) { + root = root->parent; + } + + if (UNEXPECTED(root->type != XML_DOCUMENT_NODE && root->type != XML_HTML_DOCUMENT_NODE)) { + php_dom_throw_error_with_message(HIERARCHY_REQUEST_ERR, "Canonicalization can only happen on nodes attached to a document.", /* strict */ true); + RETURN_THROWS(); + } + + zend_hash_init(&links, 0, NULL, NULL, false); + dom_relink_ns_decls(&links, xmlDocGetRootElement(docp)); + } else if (!docp) { + /* Note: not triggerable with modern DOM */ zend_throw_error(NULL, "Node must be associated with a document"); RETURN_THROWS(); } @@ -2158,12 +2261,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ if (!tmp) { /* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */ zend_argument_value_error(3 + mode, "must have a \"query\" key"); - RETURN_THROWS(); + goto clean_links; } if (Z_TYPE_P(tmp) != IS_STRING) { /* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */ zend_argument_type_error(3 + mode, "\"query\" option must be a string, %s given", zend_zval_value_name(tmp)); - RETURN_THROWS(); + goto clean_links; } xquery = Z_STRVAL_P(tmp); @@ -2195,7 +2298,7 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ } xmlXPathFreeContext(ctxp); zend_throw_error(NULL, "XPath query did not return a nodeset"); - RETURN_THROWS(); + goto clean_links; } } @@ -2264,6 +2367,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ RETURN_LONG(bytes); } } + +clean_links: + if (modern) { + dom_unlink_ns_decls(&links); + zend_hash_destroy(&links); + } } /* }}} */ diff --git a/ext/dom/tests/canonicalization.phpt b/ext/dom/tests/canonicalization.phpt index 4183b7cd41edc..fcd9b207bc24b 100644 --- a/ext/dom/tests/canonicalization.phpt +++ b/ext/dom/tests/canonicalization.phpt @@ -21,32 +21,46 @@ $dom = new DOMDocument(); $dom->loadXML($xml); $doc = $dom->documentElement->firstChild; +$newDom = Dom\XMLDocument::createFromString($xml); +$newDoc = $newDom->documentElement->firstChild; +$counter = 0; + +function check($doc, $newDoc, ...$args) { + global $counter; + $counter++; + echo $doc->C14N(...$args)."\n\n"; + if ($doc->C14N(...$args) !== $newDoc->C14N(...$args)) { + var_dump($doc->C14N(...$args), $newDoc->C14N(...$args)); + throw new Error("mismatch: $counter"); + } +} + /* inclusive/without comments first child element of doc element is context. */ -echo $doc->C14N()."\n\n"; +check($doc, $newDoc); /* exclusive/without comments first child element of doc element is context. */ -echo $doc->c14N(TRUE)."\n\n"; +check($doc, $newDoc, TRUE); /* inclusive/with comments first child element of doc element is context. */ -echo $doc->C14N(FALSE, TRUE)."\n\n"; +check($doc, $newDoc, FALSE, TRUE); /* exclusive/with comments first child element of doc element is context. */ -echo $doc->C14N(TRUE, TRUE)."\n\n"; +check($doc, $newDoc, TRUE, TRUE); /* exclusive/without comments using xpath query. */ -echo $doc->c14N(TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n"; +check($doc, $newDoc, TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n"; /* exclusive/without comments first child element of doc element is context. using xpath query with registered namespace. test namespace prefix is also included. */ -echo $doc->c14N(TRUE, FALSE, +check($doc, $newDoc, TRUE, FALSE, array('query'=>'(//a:contain | //a:bar | .//namespace::*)', 'namespaces'=>array('a'=>'http://www.example.com/ns/foo')), - array('test'))."\n\n"; + array('test')); /* exclusive/without comments first child element of doc element is context. test namespace prefix is also included */ -echo $doc->C14N(TRUE, FALSE, NULL, array('test')); +check($doc, $newDoc, TRUE, FALSE, NULL, array('test')); ?> --EXPECT-- diff --git a/ext/dom/tests/modern/xml/canonicalize_unattached.phpt b/ext/dom/tests/modern/xml/canonicalize_unattached.phpt new file mode 100644 index 0000000000000..cec5f1085757b --- /dev/null +++ b/ext/dom/tests/modern/xml/canonicalize_unattached.phpt @@ -0,0 +1,20 @@ +--TEST-- +Canonicalize unattached node should fail +--EXTENSIONS-- +dom +--FILE-- +'); +$child = $d->documentElement->firstChild; +$child->remove(); + +try { + $child->C14N(); +} catch (Dom\DOMException $e) { + echo $e->getMessage(), "\n"; +} + +?> +--EXPECT-- +Canonicalization can only happen on nodes attached to a document. diff --git a/ext/dom/tests/modern/xml/gh20444.phpt b/ext/dom/tests/modern/xml/gh20444.phpt new file mode 100644 index 0000000000000..b3a77e3f13edb --- /dev/null +++ b/ext/dom/tests/modern/xml/gh20444.phpt @@ -0,0 +1,43 @@ +--TEST-- +GH-20444 (Dom\XMLDocument::C14N() seems broken compared to DOMDocument::C14N()) +--EXTENSIONS-- +dom +--FILE-- + + + + abc + +EOF; + +$d = \Dom\XMLDocument::createFromString($xml); +var_dump($d->C14N(true)); + +$xml = << + + + + 123 + + +EOF; + +$d = \Dom\XMLDocument::createFromString($xml); +var_dump($d->C14N()); + +?> +--EXPECT-- +string(128) " + + abc +" +string(134) " + + + 123 + +"