diff --git a/go.mod b/go.mod index bf941d4..acd006c 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/FINTLabs/fint-model go 1.13 require ( - github.com/antchfx/xpath v1.1.2 // indirect + github.com/antchfx/xpath v1.3.6 // indirect github.com/antchfx/xquery v0.0.0-20180515051857-ad5b8c7a47b0 github.com/google/go-github v17.0.0+incompatible github.com/google/go-querystring v1.0.0 // indirect diff --git a/go.sum b/go.sum index da5dfff..f6fba0a 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/antchfx/xpath v1.1.2 h1:YziPrtM0gEJBnhdUGxYcIVYXZ8FXbtbovxOi+UW/yWQ= -github.com/antchfx/xpath v1.1.2/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= +github.com/antchfx/xpath v1.3.6 h1:s0y+ElRRtTQdfHP609qFu0+c6bglDv20pqOViQjjdPI= +github.com/antchfx/xpath v1.3.6/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= github.com/antchfx/xquery v0.0.0-20180515051857-ad5b8c7a47b0 h1:JaCC8jz0zdMLk2m+qCCVLLLM/PL93p84w4pK3aJWj60= github.com/antchfx/xquery v0.0.0-20180515051857-ad5b8c7a47b0/go.mod h1:LzD22aAzDP8/dyiCKFp31He4m2GPjl0AFyzDtZzUu9M= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY= @@ -11,6 +11,7 @@ github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASu github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -23,8 +24,6 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191204025024-5ee1b9f4859a h1:+HHJiFUXVOIS9mr1ThqkQD1N8vpFCfCShqADBM12KTc= -golang.org/x/net v0.0.0-20191204025024-5ee1b9f4859a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= @@ -41,8 +40,6 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= diff --git a/vendor/github.com/antchfx/xpath/.travis.yml b/vendor/github.com/antchfx/xpath/.travis.yml deleted file mode 100644 index 6b63957..0000000 --- a/vendor/github.com/antchfx/xpath/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: go - -go: - - 1.6 - - 1.9 - - '1.10' - -install: - - go get github.com/mattn/goveralls - -script: - - $HOME/gopath/bin/goveralls -service=travis-ci \ No newline at end of file diff --git a/vendor/github.com/antchfx/xpath/README.md b/vendor/github.com/antchfx/xpath/README.md index bb91694..733c4c8 100644 --- a/vendor/github.com/antchfx/xpath/README.md +++ b/vendor/github.com/antchfx/xpath/README.md @@ -1,14 +1,13 @@ -XPath -==== +# XPath + [![GoDoc](https://godoc.org/github.com/antchfx/xpath?status.svg)](https://godoc.org/github.com/antchfx/xpath) [![Coverage Status](https://coveralls.io/repos/github/antchfx/xpath/badge.svg?branch=master)](https://coveralls.io/github/antchfx/xpath?branch=master) -[![Build Status](https://travis-ci.org/antchfx/xpath.svg?branch=master)](https://travis-ci.org/antchfx/xpath) +[![Build Status](https://github.com/antchfx/xpath/actions/workflows/testing.yml/badge.svg)](https://github.com/antchfx/xpath/actions/workflows/testing.yml) [![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/xpath)](https://goreportcard.com/report/github.com/antchfx/xpath) XPath is Go package provides selecting nodes from XML, HTML or other documents using XPath expression. -Implementation -=== +# Implementation - [htmlquery](https://github.com/antchfx/htmlquery) - an XPath query package for HTML document @@ -16,8 +15,7 @@ Implementation - [jsonquery](https://github.com/antchfx/jsonquery) - an XPath query package for JSON document -Supported Features -=== +# Supported Features #### The basic XPath patterns. @@ -49,7 +47,7 @@ Supported Features - `a/b` : For each node matching a, add the nodes matching b to the result. -- `a//b` : For each node matching a, add the descendant nodes matching b to the result. +- `a//b` : For each node matching a, add the descendant nodes matching b to the result. - `//b` : Returns elements in the entire document matching b. @@ -57,24 +55,28 @@ Supported Features - `(a, b, c)` : Evaluates each of its operands and concatenates the resulting sequences, in order, into a single result sequence +- `(a/b)` : Selects all matches nodes as grouping set. -#### Node Axes +#### Node Axes - `child::*` : The child axis selects children of the current node. + - `child::node()`: Selects all the children of the context node. + - `child::text()`: Selects all text node children of the context node. + - `descendant::*` : The descendant axis selects descendants of the current node. It is equivalent to '//'. - `descendant-or-self::*` : Selects descendants including the current node. -- `attribute::*` : Selects attributes of the current element. It is equivalent to @* +- `attribute::*` : Selects attributes of the current element. It is equivalent to @\* - `following-sibling::*` : Selects nodes after the current node. - `preceding-sibling::*` : Selects nodes before the current node. -- `following::*` : Selects the first matching node following in document order, excluding descendants. +- `following::*` : Selects the first matching node following in document order, excluding descendants. -- `preceding::*` : Selects the first matching node preceding in document order, excluding ancestors. +- `preceding::*` : Selects the first matching node preceding in document order, excluding ancestors. - `parent::*` : Selects the parent if it matches. The '..' pattern from the core is equivalent to 'parent::node()'. @@ -86,27 +88,27 @@ Supported Features #### Expressions - The gxpath supported three types: number, boolean, string. +The gxpath supported three types: number, boolean, string. - `path` : Selects nodes based on the path. - `a = b` : Standard comparisons. - * a = b True if a equals b. - * a != b True if a is not equal to b. - * a < b True if a is less than b. - * a <= b True if a is less than or equal to b. - * a > b True if a is greater than b. - * a >= b True if a is greater than or equal to b. + - `a = b` : True if a equals b. + - `a != b` : True if a is not equal to b. + - `a < b` : True if a is less than b. + - `a <= b` : True if a is less than or equal to b. + - `a > b` : True if a is greater than b. + - `a >= b` : True if a is greater than or equal to b. - `a + b` : Arithmetic expressions. - * `- a` Unary minus - * a + b Add - * a - b Substract - * a * b Multiply - * a div b Divide - * a mod b Floating point mod, like Java. + - `- a` Unary minus + - `a + b` : Addition + - `a - b` : Subtraction + - `a * b` : Multiplication + - `a div b` : Division + - `a mod b` : Modulus (division remainder) - `a or b` : Boolean `or` operation. @@ -116,55 +118,50 @@ Supported Features - `fun(arg1, ..., argn)` : Function calls: -| Function | Supported | -| --- | --- | -`boolean()`| ✓ | -`ceiling()`| ✓ | -`choose()`| ✗ | -`concat()`| ✓ | -`contains()`| ✓ | -`count()`| ✓ | -`current()`| ✗ | -`document()`| ✗ | -`element-available()`| ✗ | -`ends-with()`| ✓ | -`false()`| ✓ | -`floor()`| ✓ | -`format-number()`| ✗ | -`function-available()`| ✗ | -`generate-id()`| ✗ | -`id()`| ✗ | -`key()`| ✗ | -`lang()`| ✗ | -`last()`| ✓ | -`local-name()`| ✓ | -`name()`| ✓ | -`namespace-uri()`| ✓ | -`normalize-space()`| ✓ | -`not()`| ✓ | -`number()`| ✓ | -`position()`| ✓ | -`round()`| ✓ | -`starts-with()`| ✓ | -`string()`| ✓ | -`string-length()`| ✓ | -`substring()`| ✓ | -`substring-after()`| ✓ | -`substring-before()`| ✓ | -`sum()`| ✓ | -`system-property()`| ✗ | -`translate()`| ✓ | -`true()`| ✓ | -`unparsed-entity-url()` | ✗ | - -Changelogs -=== - -2019-03-19 -- optimize XPath `|` operation performance. [#33](https://github.com/antchfx/xpath/issues/33). Tips: suggest split into multiple subquery if you have a lot of `|` operations. - -2019-01-29 -- improvement `normalize-space` function. [#32](https://github.com/antchfx/xpath/issues/32) - -2018-12-07 -- supports XPath 2.0 Sequence expressions. [#30](https://github.com/antchfx/xpath/pull/30) by [@minherz](https://github.com/minherz). \ No newline at end of file +| Function | Supported | +| ----------------------- | --------- | +| `boolean()` | ✓ | +| `ceiling()` | ✓ | +| `choose()` | ✗ | +| `concat()` | ✓ | +| `contains()` | ✓ | +| `count()` | ✓ | +| `current()` | ✗ | +| `document()` | ✗ | +| `element-available()` | ✗ | +| `ends-with()` | ✓ | +| `false()` | ✓ | +| `floor()` | ✓ | +| `format-number()` | ✗ | +| `function-available()` | ✗ | +| `generate-id()` | ✗ | +| `id()` | ✗ | +| `key()` | ✗ | +| `lang()` | ✗ | +| `last()` | ✓ | +| `local-name()` | ✓ | +| `lower-case()`[^1] | ✓ | +| `matches()` | ✓ | +| `name()` | ✓ | +| `namespace-uri()` | ✓ | +| `normalize-space()` | ✓ | +| `not()` | ✓ | +| `number()` | ✓ | +| `position()` | ✓ | +| `replace()` | ✓ | +| `reverse()` | ✓ | +| `round()` | ✓ | +| `starts-with()` | ✓ | +| `string()` | ✓ | +| `string-join()`[^1] | ✓ | +| `string-length()` | ✓ | +| `substring()` | ✓ | +| `substring-after()` | ✓ | +| `substring-before()` | ✓ | +| `sum()` | ✓ | +| `system-property()` | ✗ | +| `translate()` | ✓ | +| `true()` | ✓ | +| `unparsed-entity-url()` | ✗ | + +[^1]: XPath-2.0 expression diff --git a/vendor/github.com/antchfx/xpath/build.go b/vendor/github.com/antchfx/xpath/build.go index 74f266b..7172608 100644 --- a/vendor/github.com/antchfx/xpath/build.go +++ b/vendor/github.com/antchfx/xpath/build.go @@ -7,43 +7,54 @@ import ( type flag int -const ( - noneFlag flag = iota - filterFlag -) +var flagsEnum = struct { + None flag + SmartDesc flag + PosFilter flag + Filter flag + Condition flag +}{ + None: 0, + SmartDesc: 1, + PosFilter: 2, + Filter: 4, + Condition: 8, +} + +type builderProp int + +var builderProps = struct { + None builderProp + PosFilter builderProp + HasPosition builderProp + HasLast builderProp + NonFlat builderProp +}{ + None: 0, + PosFilter: 1, + HasPosition: 2, + HasLast: 4, + NonFlat: 8, +} // builder provides building an XPath expressions. type builder struct { - depth int - flag flag + parseDepth int firstInput query } // axisPredicate creates a predicate to predicating for this axis node. func axisPredicate(root *axisNode) func(NodeNavigator) bool { - // get current axix node type. - typ := ElementNode - switch root.AxeType { - case "attribute": - typ = AttributeNode - case "self", "parent": - typ = allNode - default: - switch root.Prop { - case "comment": - typ = CommentNode - case "text": - typ = TextNode - // case "processing-instruction": - // typ = ProcessingInstructionNode - case "node": - typ = allNode - } - } nametest := root.LocalName != "" || root.Prefix != "" predicate := func(n NodeNavigator) bool { - if typ == n.NodeType() || typ == allNode || typ == TextNode { + if root.typeTest == n.NodeType() || root.typeTest == allNode { if nametest { + type namespaceURL interface { + NamespaceURL() string + } + if ns, ok := n.(namespaceURL); ok && root.hasNamespaceURI { + return root.LocalName == n.LocalName() && root.namespaceURI == ns.NamespaceURL() + } if root.LocalName == n.LocalName() && root.Prefix == n.Prefix() { return true } @@ -57,69 +68,88 @@ func axisPredicate(root *axisNode) func(NodeNavigator) bool { return predicate } -// processAxisNode processes a query for the XPath axis node. -func (b *builder) processAxisNode(root *axisNode) (query, error) { +// processAxis processes a query for the XPath axis node. +func (b *builder) processAxis(root *axisNode, flags flag, props *builderProp) (query, error) { var ( - err error - qyInput query - qyOutput query - predicate = axisPredicate(root) + err error + qyInput query + qyOutput query ) + b.firstInput = nil + predicate := axisPredicate(root) if root.Input == nil { qyInput = &contextQuery{} + *props = builderProps.None } else { - if root.AxeType == "child" && (root.Input.Type() == nodeAxis) { - if input := root.Input.(*axisNode); input.AxeType == "descendant-or-self" { - var qyGrandInput query - if input.Input != nil { - qyGrandInput, _ = b.processNode(input.Input) - } else { - qyGrandInput = &contextQuery{} + inputFlags := flagsEnum.None + if (flags & flagsEnum.Filter) == 0 { + if root.AxisType == "child" && (root.Input.Type() == nodeAxis) { + if input := root.Input.(*axisNode); input.AxisType == "descendant-or-self" { + var qyGrandInput query + if input.Input != nil { + qyGrandInput, err = b.processNode(input.Input, flagsEnum.SmartDesc, props) + if err != nil { + return nil, err + } + } else { + qyGrandInput = &contextQuery{} + } + qyOutput = &descendantQuery{name: root.LocalName, Input: qyGrandInput, Predicate: predicate, Self: false} + *props |= builderProps.NonFlat + return qyOutput, nil } - qyOutput = &descendantQuery{Input: qyGrandInput, Predicate: predicate, Self: true} - return qyOutput, nil + } + if root.AxisType == "descendant" || root.AxisType == "descendant-or-self" { + inputFlags |= flagsEnum.SmartDesc } } - qyInput, err = b.processNode(root.Input) + + qyInput, err = b.processNode(root.Input, inputFlags, props) if err != nil { return nil, err } } - switch root.AxeType { + switch root.AxisType { case "ancestor": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate} + qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "ancestor-or-self": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate, Self: true} + qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true} + *props |= builderProps.NonFlat case "attribute": - qyOutput = &attributeQuery{Input: qyInput, Predicate: predicate} + qyOutput = &attributeQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} case "child": - filter := func(n NodeNavigator) bool { - v := predicate(n) - switch root.Prop { - case "text": - v = v && n.NodeType() == TextNode - case "node": - v = v && (n.NodeType() == ElementNode || n.NodeType() == TextNode) - case "comment": - v = v && n.NodeType() == CommentNode - } - return v + if (*props & builderProps.NonFlat) == 0 { + qyOutput = &childQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + } else { + qyOutput = &cachedChildQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} } - qyOutput = &childQuery{Input: qyInput, Predicate: filter} case "descendant": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate} + if (flags & flagsEnum.SmartDesc) != flagsEnum.None { + qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: false, Predicate: predicate} + } else { + qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + } + *props |= builderProps.NonFlat case "descendant-or-self": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate, Self: true} + if (flags & flagsEnum.SmartDesc) != flagsEnum.None { + qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: true, Predicate: predicate} + } else { + qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true} + } + *props |= builderProps.NonFlat case "following": qyOutput = &followingQuery{Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "following-sibling": qyOutput = &followingQuery{Input: qyInput, Predicate: predicate, Sibling: true} case "parent": qyOutput = &parentQuery{Input: qyInput, Predicate: predicate} case "preceding": qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "preceding-sibling": qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate, Sibling: true} case "self": @@ -127,63 +157,211 @@ func (b *builder) processAxisNode(root *axisNode) (query, error) { case "namespace": // haha,what will you do someting?? default: - err = fmt.Errorf("unknown axe type: %s", root.AxeType) + err = fmt.Errorf("unknown axe type: %s", root.AxisType) return nil, err } return qyOutput, nil } +func canBeNumber(q query) bool { + if q.ValueType() != xpathResultType.Any { + return q.ValueType() == xpathResultType.Number + } + return true +} + // processFilterNode builds query for the XPath filter predicate. -func (b *builder) processFilterNode(root *filterNode) (query, error) { - b.flag |= filterFlag +func (b *builder) processFilter(root *filterNode, flags flag, props *builderProp) (query, error) { + first := (flags & flagsEnum.Filter) == 0 - qyInput, err := b.processNode(root.Input) + qyInput, err := b.processNode(root.Input, (flags | flagsEnum.Filter), props) if err != nil { return nil, err } - qyCond, err := b.processNode(root.Condition) + firstInput := b.firstInput + + var propsCond builderProp + cond, err := b.processNode(root.Condition, flags, &propsCond) if err != nil { return nil, err } - qyOutput := &filterQuery{Input: qyInput, Predicate: qyCond} - return qyOutput, nil + + // Checking whether is number + if canBeNumber(cond) || ((propsCond & (builderProps.HasPosition | builderProps.HasLast)) != 0) { + propsCond |= builderProps.HasPosition + flags |= flagsEnum.PosFilter + } + + if root.Input.Type() != nodeFilter { + *props &= ^builderProps.PosFilter + } + + if (propsCond & builderProps.HasPosition) != 0 { + *props |= builderProps.PosFilter + } + + if (propsCond & builderProps.HasPosition) != builderProps.None { + if (propsCond & builderProps.HasLast) != 0 { + // https://github.com/antchfx/xpath/issues/76 + // https://github.com/antchfx/xpath/issues/78 + if qyFunc, ok := cond.(*functionQuery); ok { + switch qyFunc.Input.(type) { + case *filterQuery: + cond = &lastFuncQuery{Input: qyFunc.Input} + case *groupQuery: + cond = &lastFuncQuery{Input: qyFunc.Input} + } + } + } + } + + merge := (qyInput.Properties() & queryProps.Merge) != 0 + if first && firstInput != nil { + if merge && ((*props & builderProps.PosFilter) != 0) { + var ( + rootQuery = &contextQuery{} + parent query + ) + switch axisQuery := firstInput.(type) { + case *ancestorQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *attributeQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *childQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *cachedChildQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *descendantQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *followingQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *precedingQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *parentQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *selfQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *groupQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *descendantOverDescendantQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + } + b.firstInput = nil + child := &filterQuery{Input: qyInput, Predicate: cond, NoPosition: false} + if parent != nil { + return &mergeQuery{Input: parent, Child: child}, nil + } + return child, nil + } + b.firstInput = nil + } + + resultQuery := &filterQuery{ + Input: qyInput, + Predicate: cond, + NoPosition: (propsCond & builderProps.HasPosition) == 0, + } + return resultQuery, nil } // processFunctionNode processes query for the XPath function node. -func (b *builder) processFunctionNode(root *functionNode) (query, error) { +func (b *builder) processFunction(root *functionNode, props *builderProp) (query, error) { + // Reset builder props + *props = builderProps.None + var qyOutput query switch root.FuncName { + case "lower-case": + arg, err := b.processNode(root.Args[0], flagsEnum.None, props) + if err != nil { + return nil, err + } + qyOutput = &functionQuery{Func: lowerCaseFunc(arg)} case "starts-with": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: startwithFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: startwithFunc(arg1, arg2)} case "ends-with": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: endwithFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: endwithFunc(arg1, arg2)} case "contains": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - - qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: containsFunc(arg1, arg2)} + case "matches": + //matches(string , pattern) + if len(root.Args) != 2 { + return nil, errors.New("xpath: matches function must have two parameters") + } + var ( + arg1, arg2 query + err error + ) + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { + return nil, err + } + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { + return nil, err + } + // Issue #92, testing the regular expression before. + if q, ok := arg2.(*constantQuery); ok { + if _, err = getRegexp(q.Val.(string)); err != nil { + return nil, fmt.Errorf("matches() got error. %v", err) + } + } + qyOutput = &functionQuery{Func: matchesFunc(arg1, arg2)} case "substring": //substring( string , start [, length] ) if len(root.Args) < 2 { @@ -193,18 +371,18 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } if len(root.Args) == 3 { - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } } - qyOutput = &functionQuery{Input: b.firstInput, Func: substringFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: substringFunc(arg1, arg2, arg3)} case "substring-before", "substring-after": //substring-xxxx( haystack, needle ) if len(root.Args) != 2 { @@ -214,35 +392,56 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } qyOutput = &functionQuery{ - Input: b.firstInput, - Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"), + Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"), } case "string-length": // string-length( [string] ) if len(root.Args) < 1 { return nil, errors.New("xpath: string-length function must have at least one parameter") } - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: stringLengthFunc(arg1)} + qyOutput = &functionQuery{Func: stringLengthFunc(arg1)} case "normalize-space": - if len(root.Args) == 0 { - return nil, errors.New("xpath: normalize-space function must have at least one parameter") + var arg node + if len(root.Args) > 0 { + arg = root.Args[0] + } else { + arg = newAxisNode("self", allNode, "", "", "", nil) } - argQuery, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(arg, flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: normalizespaceFunc} + qyOutput = &functionQuery{Func: normalizespaceFunc(arg1)} + case "replace": + //replace( string , string, string ) + if len(root.Args) != 3 { + return nil, errors.New("xpath: replace function must have three parameters") + } + var ( + arg1, arg2, arg3 query + err error + ) + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { + return nil, err + } + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { + return nil, err + } + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { + return nil, err + } + qyOutput = &functionQuery{Func: replaceFunc(arg1, arg2, arg3)} case "translate": //translate( string , string, string ) if len(root.Args) != 3 { @@ -252,157 +451,182 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: translateFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: translateFunc(arg1, arg2, arg3)} case "not": if len(root.Args) == 0 { return nil, errors.New("xpath: not function must have at least one parameter") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: notFunc} + qyOutput = &functionQuery{Func: notFunc(argQuery)} case "name", "local-name", "namespace-uri": - inp := b.firstInput if len(root.Args) > 1 { return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) } + var ( + arg query + err error + ) if len(root.Args) == 1 { - argQuery, err := b.processNode(root.Args[0]) + arg, err = b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - inp = argQuery } - f := &functionQuery{Input: inp} switch root.FuncName { case "name": - f.Func = nameFunc + qyOutput = &functionQuery{Func: nameFunc(arg)} case "local-name": - f.Func = localNameFunc + qyOutput = &functionQuery{Func: localNameFunc(arg)} case "namespace-uri": - f.Func = namespaceFunc + qyOutput = &functionQuery{Func: namespaceFunc(arg)} } - qyOutput = f case "true", "false": val := root.FuncName == "true" qyOutput = &functionQuery{ - Input: b.firstInput, Func: func(_ query, _ iterator) interface{} { return val }, } case "last": - qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc} + qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc()} + *props |= builderProps.HasLast case "position": - qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc} + qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc()} + *props |= builderProps.HasPosition case "boolean", "number", "string": - inp := b.firstInput + var inp query if len(root.Args) > 1 { return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) } if len(root.Args) == 1 { - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } inp = argQuery } - f := &functionQuery{Input: inp} switch root.FuncName { case "boolean": - f.Func = booleanFunc + qyOutput = &functionQuery{Func: booleanFunc(inp)} case "string": - f.Func = stringFunc + qyOutput = &functionQuery{Func: stringFunc(inp)} case "number": - f.Func = numberFunc + qyOutput = &functionQuery{Func: numberFunc(inp)} } - qyOutput = f case "count": - //if b.firstInput == nil { - // return nil, errors.New("xpath: expression must evaluate to node-set") - //} if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: count(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: countFunc} + qyOutput = &functionQuery{Func: countFunc(argQuery)} case "sum": if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: sum(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: sumFunc} + qyOutput = &functionQuery{Func: sumFunc(argQuery)} case "ceiling", "floor", "round": if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: ceiling(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - f := &functionQuery{Input: argQuery} switch root.FuncName { case "ceiling": - f.Func = ceilingFunc + qyOutput = &functionQuery{Func: ceilingFunc(argQuery)} case "floor": - f.Func = floorFunc + qyOutput = &functionQuery{Func: floorFunc(argQuery)} case "round": - f.Func = roundFunc + qyOutput = &functionQuery{Func: roundFunc(argQuery)} } - qyOutput = f case "concat": if len(root.Args) < 2 { return nil, fmt.Errorf("xpath: concat() must have at least two arguments") } var args []query for _, v := range root.Args { - q, err := b.processNode(v) + q, err := b.processNode(v, flagsEnum.None, props) if err != nil { return nil, err } args = append(args, q) } - qyOutput = &functionQuery{Input: b.firstInput, Func: concatFunc(args...)} + qyOutput = &functionQuery{Func: concatFunc(args...)} + case "reverse": + if len(root.Args) == 0 { + return nil, fmt.Errorf("xpath: reverse(node-sets) function must with have parameters node-sets") + } + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) + if err != nil { + return nil, err + } + qyOutput = &transformFunctionQuery{Input: argQuery, Func: reverseFunc} + case "string-join": + if len(root.Args) != 2 { + return nil, fmt.Errorf("xpath: string-join(node-sets, separator) function requires node-set and argument") + } + input, err := b.processNode(root.Args[0], flagsEnum.None, props) + if err != nil { + return nil, err + } + arg1, err := b.processNode(root.Args[1], flagsEnum.None, props) + if err != nil { + return nil, err + } + qyOutput = &functionQuery{Func: stringJoinFunc(input, arg1)} default: return nil, fmt.Errorf("not yet support this function %s()", root.FuncName) } return qyOutput, nil } -func (b *builder) processOperatorNode(root *operatorNode) (query, error) { - left, err := b.processNode(root.Left) +func (b *builder) processOperator(root *operatorNode, props *builderProp) (query, error) { + var ( + leftProp builderProp + rightProp builderProp + ) + + left, err := b.processNode(root.Left, flagsEnum.None, &leftProp) if err != nil { return nil, err } - right, err := b.processNode(root.Right) + right, err := b.processNode(root.Right, flagsEnum.None, &rightProp) if err != nil { return nil, err } + *props = leftProp | rightProp + var qyOutput query switch root.Op { - case "+", "-", "div", "mod": // Numeric operator - var exprFunc func(interface{}, interface{}) interface{} + case "+", "-", "*", "div", "mod": // Numeric operator + var exprFunc func(iterator, interface{}, interface{}) interface{} switch root.Op { case "+": exprFunc = plusFunc case "-": exprFunc = minusFunc + case "*": + exprFunc = mulFunc case "div": exprFunc = divFunc case "mod": @@ -433,38 +657,48 @@ func (b *builder) processOperatorNode(root *operatorNode) (query, error) { } qyOutput = &booleanQuery{Left: left, Right: right, IsOr: isOr} case "|": + *props |= builderProps.NonFlat qyOutput = &unionQuery{Left: left, Right: right} } return qyOutput, nil } -func (b *builder) processNode(root node) (q query, err error) { - if b.depth = b.depth + 1; b.depth > 1024 { +func (b *builder) processNode(root node, flags flag, props *builderProp) (q query, err error) { + if b.parseDepth = b.parseDepth + 1; b.parseDepth > 1024 { err = errors.New("the xpath expressions is too complex") return } - + *props = builderProps.None switch root.Type() { case nodeConstantOperand: n := root.(*operandNode) q = &constantQuery{Val: n.Val} case nodeRoot: - q = &contextQuery{Root: true} + q = &absoluteQuery{} case nodeAxis: - q, err = b.processAxisNode(root.(*axisNode)) + q, err = b.processAxis(root.(*axisNode), flags, props) b.firstInput = q case nodeFilter: - q, err = b.processFilterNode(root.(*filterNode)) + q, err = b.processFilter(root.(*filterNode), flags, props) + b.firstInput = q case nodeFunction: - q, err = b.processFunctionNode(root.(*functionNode)) + q, err = b.processFunction(root.(*functionNode), props) case nodeOperator: - q, err = b.processOperatorNode(root.(*operatorNode)) + q, err = b.processOperator(root.(*operatorNode), props) + case nodeGroup: + q, err = b.processNode(root.(*groupNode).Input, flagsEnum.None, props) + if err != nil { + return + } + q = &groupQuery{Input: q} + b.firstInput = q } + b.parseDepth-- return } // build builds a specified XPath expressions expr. -func build(expr string) (q query, err error) { +func build(expr string, namespaces map[string]string) (q query, err error) { defer func() { if e := recover(); e != nil { switch x := e.(type) { @@ -477,7 +711,8 @@ func build(expr string) (q query, err error) { } } }() - root := parse(expr) + root := parse(expr, namespaces) b := &builder{} - return b.processNode(root) + props := builderProps.None + return b.processNode(root, flagsEnum.None, &props) } diff --git a/vendor/github.com/antchfx/xpath/cache.go b/vendor/github.com/antchfx/xpath/cache.go new file mode 100644 index 0000000..31a2b33 --- /dev/null +++ b/vendor/github.com/antchfx/xpath/cache.go @@ -0,0 +1,80 @@ +package xpath + +import ( + "regexp" + "sync" +) + +type loadFunc func(key interface{}) (interface{}, error) + +const ( + defaultCap = 65536 +) + +// The reason we're building a simple capacity-resetting loading cache (when capacity reached) instead of using +// something like github.com/hashicorp/golang-lru is primarily due to (not wanting to create) external dependency. +// Currently this library has 0 external dep (other than go sdk), and supports go 1.6, 1.9, and 1.10 (and later). +// Creating external lib dependencies (plus their transitive dependencies) would make things hard if not impossible. +// We expect under most circumstances, the defaultCap is big enough for any long running services that use this +// library if their xpath regexp cardinality is low. However, in extreme cases when the capacity is reached, we +// simply reset the cache, taking a small subsequent perf hit (next to nothing considering amortization) in trade +// of more complex and less performant LRU type of construct. +type loadingCache struct { + sync.RWMutex + cap int + load loadFunc + m map[interface{}]interface{} + reset int +} + +// NewLoadingCache creates a new instance of a loading cache with capacity. Capacity must be >= 0, or +// it will panic. Capacity == 0 means the cache growth is unbounded. +func NewLoadingCache(load loadFunc, capacity int) *loadingCache { + if capacity < 0 { + panic("capacity must be >= 0") + } + return &loadingCache{cap: capacity, load: load, m: make(map[interface{}]interface{})} +} + +func (c *loadingCache) get(key interface{}) (interface{}, error) { + c.RLock() + v, found := c.m[key] + c.RUnlock() + if found { + return v, nil + } + v, err := c.load(key) + if err != nil { + return nil, err + } + c.Lock() + if c.cap > 0 && len(c.m) >= c.cap { + c.m = map[interface{}]interface{}{key: v} + c.reset++ + } else { + c.m[key] = v + } + c.Unlock() + return v, nil +} + +var ( + // RegexpCache is a loading cache for string -> *regexp.Regexp mapping. It is exported so that in rare cases + // client can customize load func and/or capacity. + RegexpCache = defaultRegexpCache() +) + +func defaultRegexpCache() *loadingCache { + return NewLoadingCache( + func(key interface{}) (interface{}, error) { + return regexp.Compile(key.(string)) + }, defaultCap) +} + +func getRegexp(pattern string) (*regexp.Regexp, error) { + exp, err := RegexpCache.get(pattern) + if err != nil { + return nil, err + } + return exp.(*regexp.Regexp), nil +} diff --git a/vendor/github.com/antchfx/xpath/func.go b/vendor/github.com/antchfx/xpath/func.go index a2f0dce..ffbee65 100644 --- a/vendor/github.com/antchfx/xpath/func.go +++ b/vendor/github.com/antchfx/xpath/func.go @@ -4,11 +4,26 @@ import ( "errors" "fmt" "math" - "regexp" "strconv" "strings" + "sync" + "unicode" ) +// Defined an interface of stringBuilder that compatible with +// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10) +type stringBuilder interface { + WriteRune(r rune) (n int, err error) + WriteString(s string) (int, error) + Reset() + Grow(n int) + String() string +} + +var builderPool = sync.Pool{New: func() interface{} { + return newStringBuilder() +}} + // The XPath function list. func predicate(q query) func(NodeNavigator) bool { @@ -22,74 +37,83 @@ func predicate(q query) func(NodeNavigator) bool { } // positionFunc is a XPath Node Set functions position(). -func positionFunc(q query, t iterator) interface{} { - var ( - count = 1 - node = t.Current() - ) - test := predicate(q) - for node.MoveToPrevious() { - if test(node) { - count++ +func positionFunc() func(query, iterator) interface{} { + return func(q query, t iterator) interface{} { + var ( + count = 1 + node = t.Current().Copy() + ) + test := predicate(q) + for node.MoveToPrevious() { + if test(node) { + count++ + } } + return float64(count) } - return float64(count) } // lastFunc is a XPath Node Set functions last(). -func lastFunc(q query, t iterator) interface{} { - var ( - count = 0 - node = t.Current() - ) - node.MoveToFirst() - test := predicate(q) - for { - if test(node) { - count++ - } - if !node.MoveToNext() { - break +func lastFunc() func(query, iterator) interface{} { + return func(q query, t iterator) interface{} { + var ( + count = 0 + node = t.Current().Copy() + ) + test := predicate(q) + node.MoveToFirst() + for { + if test(node) { + count++ + } + if !node.MoveToNext() { + break + } } + return float64(count) } - return float64(count) } // countFunc is a XPath Node Set functions count(node-set). -func countFunc(q query, t iterator) interface{} { - var count = 0 - test := predicate(q) - switch typ := q.Evaluate(t).(type) { - case query: - for node := typ.Select(t); node != nil; node = typ.Select(t) { - if test(node) { - count++ +func countFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var count = 0 + q := functionArgs(arg) + test := predicate(q) + switch typ := q.Evaluate(t).(type) { + case query: + for node := typ.Select(t); node != nil; node = typ.Select(t) { + if test(node) { + count++ + } } } + return float64(count) } - return float64(count) } // sumFunc is a XPath Node Set functions sum(node-set). -func sumFunc(q query, t iterator) interface{} { - var sum float64 - switch typ := q.Evaluate(t).(type) { - case query: - for node := typ.Select(t); node != nil; node = typ.Select(t) { - if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { - sum += v +func sumFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var sum float64 + switch typ := functionArgs(arg).Evaluate(t).(type) { + case query: + for node := typ.Select(t); node != nil; node = typ.Select(t) { + if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { + sum += v + } } + case float64: + sum = typ + case string: + v, err := strconv.ParseFloat(typ, 64) + if err != nil { + panic(errors.New("sum() function argument type must be a node-set or number")) + } + sum = v } - case float64: - sum = typ - case string: - v, err := strconv.ParseFloat(typ, 64) - if err != nil { - panic(errors.New("sum() function argument type must be a node-set or number")) - } - sum = v + return sum } - return sum } func asNumber(t iterator, o interface{}) float64 { @@ -97,7 +121,7 @@ func asNumber(t iterator, o interface{}) float64 { case query: node := typ.Select(t) if node == nil { - return float64(0) + return math.NaN() } if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { return v @@ -106,70 +130,100 @@ func asNumber(t iterator, o interface{}) float64 { return typ case string: v, err := strconv.ParseFloat(typ, 64) - if err != nil { - panic(errors.New("ceiling() function argument type must be a node-set or number")) + if err == nil { + return v } - return v } - return 0 + return math.NaN() } // ceilingFunc is a XPath Node Set functions ceiling(node-set). -func ceilingFunc(q query, t iterator) interface{} { - val := asNumber(t, q.Evaluate(t)) - return math.Ceil(val) +func ceilingFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + val := asNumber(t, functionArgs(arg).Evaluate(t)) + // if math.IsNaN(val) { + // panic(errors.New("ceiling() function argument type must be a valid number")) + // } + return math.Ceil(val) + } } // floorFunc is a XPath Node Set functions floor(node-set). -func floorFunc(q query, t iterator) interface{} { - val := asNumber(t, q.Evaluate(t)) - return math.Floor(val) +func floorFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + val := asNumber(t, functionArgs(arg).Evaluate(t)) + return math.Floor(val) + } } // roundFunc is a XPath Node Set functions round(node-set). -func roundFunc(q query, t iterator) interface{} { - val := asNumber(t, q.Evaluate(t)) - //return math.Round(val) - return round(val) +func roundFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + val := asNumber(t, functionArgs(arg).Evaluate(t)) + //return math.Round(val) + return round(val) + } } // nameFunc is a XPath functions name([node-set]). -func nameFunc(q query, t iterator) interface{} { - v := q.Select(t) - if v == nil { - return "" - } - ns := v.Prefix() - if ns == "" { - return v.LocalName() +func nameFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var v NodeNavigator + if arg == nil { + v = t.Current() + } else { + v = arg.Clone().Select(t) + if v == nil { + return "" + } + } + ns := v.Prefix() + if ns == "" { + return v.LocalName() + } + return ns + ":" + v.LocalName() } - return ns + ":" + v.LocalName() } // localNameFunc is a XPath functions local-name([node-set]). -func localNameFunc(q query, t iterator) interface{} { - v := q.Select(t) - if v == nil { - return "" +func localNameFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var v NodeNavigator + if arg == nil { + v = t.Current() + } else { + v = arg.Clone().Select(t) + if v == nil { + return "" + } + } + return v.LocalName() } - return v.LocalName() } // namespaceFunc is a XPath functions namespace-uri([node-set]). -func namespaceFunc(q query, t iterator) interface{} { - v := q.Select(t) - if v == nil { - return "" - } - // fix about namespace-uri() bug: https://github.com/antchfx/xmlquery/issues/22 - // TODO: In the next version, add NamespaceURL() to the NodeNavigator interface. - type namespaceURL interface { - NamespaceURL() string - } - if f, ok := v.(namespaceURL); ok { - return f.NamespaceURL() +func namespaceFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var v NodeNavigator + if arg == nil { + v = t.Current() + } else { + // Get the first node in the node-set if specified. + v = arg.Clone().Select(t) + if v == nil { + return "" + } + } + // fix about namespace-uri() bug: https://github.com/antchfx/xmlquery/issues/22 + // TODO: In the next version, add NamespaceURL() to the NodeNavigator interface. + type namespaceURL interface { + NamespaceURL() string + } + if f, ok := v.(namespaceURL); ok { + return f.NamespaceURL() + } + return v.Prefix() } - return v.Prefix() } func asBool(t iterator, v interface{}) bool { @@ -179,7 +233,7 @@ func asBool(t iterator, v interface{}) bool { case *NodeIterator: return v.MoveNext() case bool: - return bool(v) + return v case float64: return v != 0 case string: @@ -216,31 +270,40 @@ func asString(t iterator, v interface{}) string { } // booleanFunc is a XPath functions boolean([node-set]). -func booleanFunc(q query, t iterator) interface{} { - v := q.Evaluate(t) - return asBool(t, v) +func booleanFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + v := functionArgs(arg1).Evaluate(t) + return asBool(t, v) + } } // numberFunc is a XPath functions number([node-set]). -func numberFunc(q query, t iterator) interface{} { - v := q.Evaluate(t) - return asNumber(t, v) +func numberFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + v := functionArgs(arg1).Evaluate(t) + return asNumber(t, v) + } } // stringFunc is a XPath functions string([node-set]). -func stringFunc(q query, t iterator) interface{} { - v := q.Evaluate(t) - return asString(t, v) +func stringFunc(arg1 query) func(query, iterator) interface{} { + return func(q query, t iterator) interface{} { + if arg1 == nil { + return t.Current().Value() + } + v := functionArgs(arg1).Evaluate(t) + return asString(t, v) + } } // startwithFunc is a XPath functions starts-with(string, string). func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var ( m, n string ok bool ) - switch typ := arg1.Evaluate(t).(type) { + switch typ := functionArgs(arg1).Evaluate(t).(type) { case string: m = typ case query: @@ -252,7 +315,7 @@ func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} { default: panic(errors.New("starts-with() function argument type must be string")) } - n, ok = arg2.Evaluate(t).(string) + n, ok = functionArgs(arg2).Evaluate(t).(string) if !ok { panic(errors.New("starts-with() function argument type must be string")) } @@ -262,12 +325,12 @@ func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} { // endwithFunc is a XPath functions ends-with(string, string). func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var ( m, n string ok bool ) - switch typ := arg1.Evaluate(t).(type) { + switch typ := functionArgs(arg1).Evaluate(t).(type) { case string: m = typ case query: @@ -279,7 +342,7 @@ func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} { default: panic(errors.New("ends-with() function argument type must be string")) } - n, ok = arg2.Evaluate(t).(string) + n, ok = functionArgs(arg2).Evaluate(t).(string) if !ok { panic(errors.New("ends-with() function argument type must be string")) } @@ -289,13 +352,12 @@ func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} { // containsFunc is a XPath functions contains(string or @attr, string). func containsFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var ( m, n string ok bool ) - - switch typ := arg1.Evaluate(t).(type) { + switch typ := functionArgs(arg1).Evaluate(t).(type) { case string: m = typ case query: @@ -308,7 +370,7 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} { panic(errors.New("contains() function argument type must be string")) } - n, ok = arg2.Evaluate(t).(string) + n, ok = functionArgs(arg2).Evaluate(t).(string) if !ok { panic(errors.New("contains() function argument type must be string")) } @@ -317,35 +379,77 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} { } } -var ( - regnewline = regexp.MustCompile(`[\r\n\t]`) - regseqspace = regexp.MustCompile(`\s{2,}`) -) +// matchesFunc is an XPath function that tests a given string against a regexp pattern. +// Note: does not support https://www.w3.org/TR/xpath-functions-31/#func-matches 3rd optional `flags` argument; if +// needed, directly put flags in the regexp pattern, such as `(?i)^pattern$` for `i` flag. +func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var s string + switch typ := functionArgs(arg1).Evaluate(t).(type) { + case string: + s = typ + case query: + node := typ.Select(t) + if node == nil { + return "" + } + s = node.Value() + } + var pattern string + var ok bool + if pattern, ok = functionArgs(arg2).Evaluate(t).(string); !ok { + panic(errors.New("matches() function second argument type must be string")) + } + re, err := getRegexp(pattern) + if err != nil { + panic(fmt.Errorf("matches() function second argument is not a valid regexp pattern, err: %s", err.Error())) + } + return re.MatchString(s) + } +} // normalizespaceFunc is XPath functions normalize-space(string?) -func normalizespaceFunc(q query, t iterator) interface{} { - var m string - switch typ := q.Evaluate(t).(type) { - case string: - m = typ - case query: - node := typ.Select(t) - if node == nil { - return "" +func normalizespaceFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var m string + switch typ := functionArgs(arg1).Evaluate(t).(type) { + case string: + m = typ + case query: + node := typ.Select(t) + if node == nil { + return "" + } + m = node.Value() + } + var b = builderPool.Get().(stringBuilder) + b.Grow(len(m)) + + runeStr := []rune(strings.TrimSpace(m)) + l := len(runeStr) + for i := range runeStr { + r := runeStr[i] + isSpace := unicode.IsSpace(r) + if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) { + if isSpace { + r = ' ' + } + b.WriteRune(r) + } } - m = node.Value() + result := b.String() + b.Reset() + builderPool.Put(b) + + return result } - m = strings.TrimSpace(m) - m = regnewline.ReplaceAllString(m, " ") - m = regseqspace.ReplaceAllString(m, " ") - return m } // substringFunc is XPath functions substring function returns a part of a given string. func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var m string - switch typ := arg1.Evaluate(t).(type) { + switch typ := functionArgs(arg1).Evaluate(t).(type) { case string: m = typ case query: @@ -358,33 +462,50 @@ func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { var start, length float64 var ok bool - - if start, ok = arg2.Evaluate(t).(float64); !ok { - panic(errors.New("substring() function first argument type must be int")) - } else if start < 1 { - panic(errors.New("substring() function first argument type must be >= 1")) + if start, ok = functionArgs(arg2).Evaluate(t).(float64); !ok { + panic(errors.New("substring() function first argument type must be number")) } - start-- - if arg3 != nil { - if length, ok = arg3.Evaluate(t).(float64); !ok { - panic(errors.New("substring() function second argument type must be int")) + // fix https://github.com/antchfx/xpath/issues/109 + start = math.Round(start) + if start > float64(len(m)) { + return "" + } + if arg3 == nil { + if start <= 0 { + return m } + return m[int(start)-1:] + } + + if length, ok = functionArgs(arg3).Evaluate(t).(float64); !ok { + panic(errors.New("substring() function second argument type must be number")) } - if (len(m) - int(start)) < int(length) { - panic(errors.New("substring() function start and length argument out of range")) + length = math.Round(length) + if length <= 0 { + return "" } - if length > 0 { - return m[int(start):int(length+start)] + if length > float64(len(m)) { + length = float64(len(m)) } - return m[int(start):] + if start < 0 { + length = length - math.Abs(start) + if length <= 1 { + return "" + } + return m[:int(length-1)] + } + if start == 0 { + return m[:int(length-1)] + } + return m[int(start-1):int(length+start-1)] } } // substringIndFunc is XPath functions substring-before/substring-after function returns a part of a given string. func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var str string - switch v := arg1.Evaluate(t).(type) { + switch v := functionArgs(arg1).Evaluate(t).(type) { case string: str = v case query: @@ -395,7 +516,7 @@ func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interf str = node.Value() } var word string - switch v := arg2.Evaluate(t).(type) { + switch v := functionArgs(arg2).Evaluate(t).(type) { case string: word = v case query: @@ -423,8 +544,8 @@ func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interf // stringLengthFunc is XPATH string-length( [string] ) function that returns a number // equal to the number of characters in a given string. func stringLengthFunc(arg1 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - switch v := arg1.Evaluate(t).(type) { + return func(_ query, t iterator) interface{} { + switch v := functionArgs(arg1).Evaluate(t).(type) { case string: return float64(len(v)) case query: @@ -440,12 +561,12 @@ func stringLengthFunc(arg1 query) func(query, iterator) interface{} { // translateFunc is XPath functions translate() function returns a replaced string. func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - str := asString(t, arg1.Evaluate(t)) - src := asString(t, arg2.Evaluate(t)) - dst := asString(t, arg3.Evaluate(t)) + return func(_ query, t iterator) interface{} { + str := asString(t, functionArgs(arg1).Evaluate(t)) + src := asString(t, functionArgs(arg2).Evaluate(t)) + dst := asString(t, functionArgs(arg3).Evaluate(t)) - var replace []string + replace := make([]string, 0, len(src)) for i, s := range src { d := "" if i < len(dst) { @@ -457,16 +578,38 @@ func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { } } +// replaceFunc is XPath functions replace() function returns a replaced string. +func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + str := asString(t, functionArgs(arg1).Evaluate(t)) + src := asString(t, functionArgs(arg2).Evaluate(t)) + dst := asString(t, functionArgs(arg3).Evaluate(t)) + e, err := getRegexp(src) + if err != nil { + panic(fmt.Errorf("replace() function second argument is not a valid regexp pattern, err: %s", err.Error())) + } + + // replace all $i to ${i} for golang regexp.Expand + for idx := e.NumSubexp(); idx > 0; idx-- { + dst = strings.ReplaceAll(dst, fmt.Sprintf("$%d", idx), fmt.Sprintf("${%d}", idx)) + } + + return e.ReplaceAllString(str, dst) + } +} + // notFunc is XPATH functions not(expression) function operation. -func notFunc(q query, t iterator) interface{} { - switch v := q.Evaluate(t).(type) { - case bool: - return !v - case query: - node := v.Select(t) - return node == nil - default: - return false +func notFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + switch v := functionArgs(arg1).Evaluate(t).(type) { + case bool: + return !v + case query: + node := v.Select(t) + return node == nil + default: + return false + } } } @@ -474,19 +617,92 @@ func notFunc(q query, t iterator) interface{} { // strings and returns the resulting string. // concat( string1 , string2 [, stringn]* ) func concatFunc(args ...query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var a []string + return func(_ query, t iterator) interface{} { + b := builderPool.Get().(stringBuilder) for _, v := range args { + v = functionArgs(v) + switch v := v.Evaluate(t).(type) { case string: - a = append(a, v) + b.WriteString(v) case query: node := v.Select(t) if node != nil { - a = append(a, node.Value()) + b.WriteString(node.Value()) } } } - return strings.Join(a, "") + result := b.String() + b.Reset() + builderPool.Put(b) + + return result + } +} + +// https://github.com/antchfx/xpath/issues/43 +func functionArgs(q query) query { + if _, ok := q.(*functionQuery); ok { + return q + } + return q.Clone() +} + +func reverseFunc(q query, t iterator) func() NodeNavigator { + var list []NodeNavigator + for { + node := q.Select(t) + if node == nil { + break + } + list = append(list, node.Copy()) + } + i := len(list) + return func() NodeNavigator { + if i <= 0 { + return nil + } + i-- + node := list[i] + return node + } +} + +// string-join is a XPath Node Set functions string-join(node-set, separator). +func stringJoinFunc(q, arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var separator string + switch v := functionArgs(arg1).Evaluate(t).(type) { + case string: + separator = v + case query: + node := v.Select(t) + if node != nil { + separator = node.Value() + } + } + + q = functionArgs(q) + test := predicate(q) + var parts []string + switch v := q.Evaluate(t).(type) { + case string: + return v + case query: + for node := v.Select(t); node != nil; node = v.Select(t) { + if test(node) { + parts = append(parts, node.Value()) + } + } + } + return strings.Join(parts, separator) + } +} + +// lower-case is XPATH function that converts a string to lower case. +func lowerCaseFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + v := functionArgs(arg1).Evaluate(t) + return strings.ToLower(asString(t, v)) } } diff --git a/vendor/github.com/antchfx/xpath/func_go110.go b/vendor/github.com/antchfx/xpath/func_go110.go index 500880f..d6ca451 100644 --- a/vendor/github.com/antchfx/xpath/func_go110.go +++ b/vendor/github.com/antchfx/xpath/func_go110.go @@ -2,8 +2,15 @@ package xpath -import "math" +import ( + "math" + "strings" +) func round(f float64) int { return int(math.Round(f)) } + +func newStringBuilder() stringBuilder { + return &strings.Builder{} +} diff --git a/vendor/github.com/antchfx/xpath/func_pre_go110.go b/vendor/github.com/antchfx/xpath/func_pre_go110.go index 043616b..335141f 100644 --- a/vendor/github.com/antchfx/xpath/func_pre_go110.go +++ b/vendor/github.com/antchfx/xpath/func_pre_go110.go @@ -2,7 +2,10 @@ package xpath -import "math" +import ( + "bytes" + "math" +) // math.Round() is supported by Go 1.10+, // This method just compatible for version <1.10. @@ -13,3 +16,7 @@ func round(f float64) int { } return int(f + math.Copysign(0.5, f)) } + +func newStringBuilder() stringBuilder { + return &bytes.Buffer{} +} diff --git a/vendor/github.com/antchfx/xpath/go.mod b/vendor/github.com/antchfx/xpath/go.mod new file mode 100644 index 0000000..6745c56 --- /dev/null +++ b/vendor/github.com/antchfx/xpath/go.mod @@ -0,0 +1,3 @@ +module github.com/antchfx/xpath + +go 1.14 diff --git a/vendor/github.com/antchfx/xpath/operator.go b/vendor/github.com/antchfx/xpath/operator.go index 308d3cb..2820152 100644 --- a/vendor/github.com/antchfx/xpath/operator.go +++ b/vendor/github.com/antchfx/xpath/operator.go @@ -1,40 +1,11 @@ package xpath import ( - "fmt" - "reflect" "strconv" ) // The XPath number operator function list. -// valueType is a return value type. -type valueType int - -const ( - booleanType valueType = iota - numberType - stringType - nodeSetType -) - -func getValueType(i interface{}) valueType { - v := reflect.ValueOf(i) - switch v.Kind() { - case reflect.Float64: - return numberType - case reflect.String: - return stringType - case reflect.Bool: - return booleanType - default: - if _, ok := i.(query); ok { - return nodeSetType - } - } - panic(fmt.Errorf("xpath unknown value type: %v", v.Kind())) -} - type logical func(iterator, string, interface{}, interface{}) bool var logicalFuncs = [][]logical{ @@ -163,7 +134,30 @@ func cmpNodeSetString(t iterator, op string, m, n interface{}) bool { } func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool { - return false + a := m.(query) + b := n.(query) + for { + x := a.Select(t) + if x == nil { + return false + } + + y := b.Select(t) + if y == nil { + return false + } + + for { + if cmpStringStringF(op, x.Value(), y.Value()) { + return true + } + if y = b.Select(t); y == nil { + break + } + } + // reset + b.Evaluate(t) + } } func cmpStringNumeric(t iterator, op string, m, n interface{}) bool { @@ -205,91 +199,90 @@ func cmpBooleanBoolean(t iterator, op string, m, n interface{}) bool { // eqFunc is an `=` operator. func eqFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "=", m, n) } // gtFunc is an `>` operator. func gtFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, ">", m, n) } // geFunc is an `>=` operator. func geFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, ">=", m, n) } // ltFunc is an `<` operator. func ltFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "<", m, n) } // leFunc is an `<=` operator. func leFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "<=", m, n) } // neFunc is an `!=` operator. func neFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "!=", m, n) } // orFunc is an `or` operator. var orFunc = func(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "or", m, n) } -func numericExpr(m, n interface{}, cb func(float64, float64) float64) float64 { - typ := reflect.TypeOf(float64(0)) - a := reflect.ValueOf(m).Convert(typ) - b := reflect.ValueOf(n).Convert(typ) - return cb(a.Float(), b.Float()) +func numericExpr(t iterator, m, n interface{}, cb func(float64, float64) float64) float64 { + a := asNumber(t, m) + b := asNumber(t, n) + return cb(a, b) } // plusFunc is an `+` operator. -var plusFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var plusFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a + b }) } // minusFunc is an `-` operator. -var minusFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var minusFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a - b }) } // mulFunc is an `*` operator. -var mulFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var mulFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a * b }) } // divFunc is an `DIV` operator. -var divFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var divFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a / b }) } // modFunc is an 'MOD' operator. -var modFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var modFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return float64(int(a) % int(b)) }) } diff --git a/vendor/github.com/antchfx/xpath/parse.go b/vendor/github.com/antchfx/xpath/parse.go index fb9abe3..5393125 100644 --- a/vendor/github.com/antchfx/xpath/parse.go +++ b/vendor/github.com/antchfx/xpath/parse.go @@ -6,6 +6,7 @@ import ( "fmt" "strconv" "unicode" + "unicode/utf8" ) // A XPath expression token type. @@ -65,11 +66,13 @@ const ( nodeOperator nodeVariable nodeConstantOperand + nodeGroup ) type parser struct { - r *scanner - d int + r *scanner + d int + namespaces map[string]string } // newOperatorNode returns new operator node OperatorNode. @@ -83,15 +86,20 @@ func newOperandNode(v interface{}) node { } // newAxisNode returns new axis node AxisNode. -func newAxisNode(axeTyp, localName, prefix, prop string, n node) node { - return &axisNode{ +func newAxisNode(axisType string, typeTest NodeType, localName, prefix, prop string, n node, opts ...func(p *axisNode)) node { + a := axisNode{ nodeType: nodeAxis, + typeTest: typeTest, LocalName: localName, Prefix: prefix, - AxeType: axeTyp, + AxisType: axisType, Prop: prop, Input: n, } + for _, o := range opts { + o(&a) + } + return &a } // newVariableNode returns new variable node VariableNode. @@ -104,6 +112,10 @@ func newFilterNode(n, m node) node { return &filterNode{nodeType: nodeFilter, Input: n, Condition: m} } +func newGroupNode(n node) node { + return &groupNode{nodeType: nodeGroup, Input: n} +} + // newRootNode returns a root node. func newRootNode(s string) node { return &rootNode{nodeType: nodeRoot, slash: s} @@ -218,8 +230,9 @@ Loop: } // RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr -// | RelationalExpr '<=' AdditiveExpr -// | RelationalExpr '>=' AdditiveExpr +// +// | RelationalExpr '<=' AdditiveExpr +// | RelationalExpr '>=' AdditiveExpr func (p *parser) parseRelationalExpr(n node) node { opnd := p.parseAdditiveExpr(n) Loop: @@ -264,7 +277,8 @@ Loop: } // MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator(*) UnaryExpr -// | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr +// +// | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr func (p *parser) parseMultiplicativeExpr(n node) node { opnd := p.parseUnaryExpr(n) Loop: @@ -298,7 +312,7 @@ func (p *parser) parseUnaryExpr(n node) node { return opnd } -// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr +// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr func (p *parser) parseUnionExpr(n node) node { opnd := p.parsePathExpr(n) Loop: @@ -325,7 +339,7 @@ func (p *parser) parsePathExpr(n node) node { opnd = p.parseRelativeLocationPath(opnd) case itemSlashSlash: p.next() - opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd)) + opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", allNode, "", "", "", opnd)) } } else { opnd = p.parseLocationPath(nil) @@ -342,7 +356,7 @@ func (p *parser) parseFilterExpr(n node) node { return opnd } -// Predicate ::= '[' PredicateExpr ']' +// Predicate ::= '[' PredicateExpr ']' func (p *parser) parsePredicate(n node) node { p.skipItem(itemLBracket) opnd := p.parseExpression(n) @@ -362,7 +376,7 @@ func (p *parser) parseLocationPath(n node) (opnd node) { case itemSlashSlash: p.next() opnd = newRootNode("//") - opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd)) + opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", allNode, "", "", "", opnd)) default: opnd = p.parseRelativeLocationPath(n) } @@ -378,7 +392,7 @@ Loop: switch p.r.typ { case itemSlashSlash: p.next() - opnd = newAxisNode("descendant-or-self", "", "", "", opnd) + opnd = newAxisNode("descendant-or-self", allNode, "", "", "", opnd) case itemSlash: p.next() default: @@ -390,30 +404,33 @@ Loop: // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep func (p *parser) parseStep(n node) (opnd node) { - axeTyp := "child" // default axes value. if p.r.typ == itemDot || p.r.typ == itemDotDot { if p.r.typ == itemDot { - axeTyp = "self" + opnd = newAxisNode("self", allNode, "", "", "", n) } else { - axeTyp = "parent" + opnd = newAxisNode("parent", allNode, "", "", "", n) } p.next() - opnd = newAxisNode(axeTyp, "", "", "", n) if p.r.typ != itemLBracket { return opnd } } else { + axisType := "child" // default axes value. switch p.r.typ { case itemAt: + axisType = "attribute" p.next() - axeTyp = "attribute" case itemAxe: - axeTyp = p.r.name + axisType = p.r.name p.next() case itemLParens: return p.parseSequence(n) } - opnd = p.parseNodeTest(n, axeTyp) + matchType := ElementNode + if axisType == "attribute" { + matchType = AttributeNode + } + opnd = p.parseNodeTest(n, axisType, matchType) } for p.r.typ == itemLBracket { opnd = newFilterNode(opnd, p.parsePredicate(opnd)) @@ -437,8 +454,8 @@ func (p *parser) parseSequence(n node) (opnd node) { return opnd } -// NodeTest ::= NameTest | nodeType '(' ')' | 'processing-instruction' '(' Literal ')' -func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { +// NodeTest ::= NameTest | nodeType '(' ')' | 'processing-instruction' '(' Literal ')' +func (p *parser) parseNodeTest(n node, axeTyp string, matchType NodeType) (opnd node) { switch p.r.typ { case itemName: if p.r.canBeFunc && isNodeType(p.r) { @@ -456,7 +473,19 @@ func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { p.next() } p.skipItem(itemRParens) - opnd = newAxisNode(axeTyp, name, "", prop, n) + switch prop { + case "comment": + matchType = CommentNode + case "text": + matchType = TextNode + case "processing-instruction": + case "node": + matchType = allNode + default: + matchType = RootNode + } + + opnd = newAxisNode(axeTyp, matchType, name, "", prop, n) } else { prefix := p.r.prefix name := p.r.name @@ -464,10 +493,19 @@ func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { if p.r.name == "*" { name = "" } - opnd = newAxisNode(axeTyp, name, prefix, "", n) + opnd = newAxisNode(axeTyp, matchType, name, prefix, "", n, func(a *axisNode) { + if prefix != "" && p.namespaces != nil { + if ns, ok := p.namespaces[prefix]; ok { + a.hasNamespaceURI = true + a.namespaceURI = ns + } else { + panic(fmt.Sprintf("prefix %s not defined.", prefix)) + } + } + }) } case itemStar: - opnd = newAxisNode(axeTyp, "", "", "", n) + opnd = newAxisNode(axeTyp, matchType, "", "", "", n) p.next() default: panic("expression must evaluate to a node-set") @@ -492,6 +530,9 @@ func (p *parser) parsePrimaryExpr(n node) (opnd node) { case itemLParens: p.next() opnd = p.parseExpression(n) + if opnd.Type() != nodeConstantOperand { + opnd = newGroupNode(opnd) + } p.skipItem(itemRParens) case itemName: if p.r.canBeFunc && !isNodeType(p.r) { @@ -523,11 +564,11 @@ func (p *parser) parseMethod(n node) node { } // Parse parsing the XPath express string expr and returns a tree node. -func parse(expr string) node { +func parse(expr string, namespaces map[string]string) node { r := &scanner{text: expr} r.nextChar() r.nextItem() - p := &parser{r: r} + p := &parser{r: r, namespaces: namespaces} return p.parseExpression(nil) } @@ -555,17 +596,20 @@ func (o *operatorNode) String() string { // axisNode holds a location step. type axisNode struct { nodeType - Input node - Prop string // node-test name.[comment|text|processing-instruction|node] - AxeType string // name of the axes.[attribute|ancestor|child|....] - LocalName string // local part name of node. - Prefix string // prefix name of node. + Input node + Prop string // node-test name.[comment|text|processing-instruction|node] + AxisType string // name of the axis.[attribute|ancestor|child|....] + LocalName string // local part name of node. + Prefix string // prefix name of node. + namespaceURI string // namespace URI of node + hasNamespaceURI bool // if namespace URI is set (can be "") + typeTest NodeType } func (a *axisNode) String() string { var b bytes.Buffer - if a.AxeType != "" { - b.Write([]byte(a.AxeType + "::")) + if a.AxisType != "" { + b.Write([]byte(a.AxisType + "::")) } if a.Prefix != "" { b.Write([]byte(a.Prefix + ":")) @@ -587,6 +631,16 @@ func (o *operandNode) String() string { return fmt.Sprintf("%v", o.Val) } +// groupNode holds a set of node expression +type groupNode struct { + nodeType + Input node +} + +func (g *groupNode) String() string { + return fmt.Sprintf("%s", g.Input) +} + // filterNode holds a condition filter. type filterNode struct { nodeType @@ -638,6 +692,7 @@ type scanner struct { pos int curr rune + currSize int typ itemType strval string // text value at current pos numval float64 // number value at current pos @@ -647,10 +702,18 @@ type scanner struct { func (s *scanner) nextChar() bool { if s.pos >= len(s.text) { s.curr = rune(0) + s.currSize = 1 return false } - s.curr = rune(s.text[s.pos]) - s.pos++ + + r, size := rune(s.text[s.pos]), 1 + if r >= 0x80 { // handle multi-byte runes + r, size = utf8.DecodeRuneInString(s.text[s.pos:]) + } + + s.curr = r + s.currSize = size + s.pos += size return true } @@ -805,31 +868,36 @@ func (s *scanner) scanNumber() float64 { func (s *scanner) scanString() string { var ( - c = 0 end = s.curr ) s.nextChar() - i := s.pos - 1 + i := s.pos - s.currSize + c := s.currSize for s.curr != end { if !s.nextChar() { panic(errors.New("xpath: scanString got unclosed string")) } - c++ + c += s.currSize } + c -= 1 s.nextChar() return s.text[i : i+c] } func (s *scanner) scanName() string { var ( - c int - i = s.pos - 1 + c = s.currSize - 1 + i = s.pos - s.currSize ) + + // Detect current rune size + for isName(s.curr) { - c++ if !s.nextChar() { + c += s.currSize break } + c += s.currSize } return s.text[i : i+c] } diff --git a/vendor/github.com/antchfx/xpath/query.go b/vendor/github.com/antchfx/xpath/query.go index afeb890..8c5535e 100644 --- a/vendor/github.com/antchfx/xpath/query.go +++ b/vendor/github.com/antchfx/xpath/query.go @@ -5,8 +5,47 @@ import ( "fmt" "hash/fnv" "reflect" + "strconv" ) +// The return type of the XPath expression. +type resultType int + +var xpathResultType = struct { + Boolean resultType + // A numeric value + Number resultType + String resultType + // A node collection. + NodeSet resultType + // Any of the XPath node types. + Any resultType +}{ + Boolean: 0, + Number: 1, + String: 2, + NodeSet: 3, + Any: 4, +} + +type queryProp int + +var queryProps = struct { + None queryProp + Position queryProp + Count queryProp + Cached queryProp + Reverse queryProp + Merge queryProp +}{ + None: 0, + Position: 1, + Count: 2, + Cached: 4, + Reverse: 8, + Merge: 16, +} + type iterator interface { Current() NodeNavigator } @@ -20,12 +59,15 @@ type query interface { Evaluate(iterator) interface{} Clone() query + + // ValueType returns the value type of the current query. + ValueType() resultType + + Properties() queryProp } // nopQuery is an empty query that always return nil for any query. -type nopQuery struct { - query -} +type nopQuery struct{} func (nopQuery) Select(iterator) NodeNavigator { return nil } @@ -33,21 +75,23 @@ func (nopQuery) Evaluate(iterator) interface{} { return nil } func (nopQuery) Clone() query { return nopQuery{} } +func (nopQuery) ValueType() resultType { return xpathResultType.NodeSet } + +func (nopQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached +} + // contextQuery is returns current node on the iterator object query. type contextQuery struct { count int - Root bool // Moving to root-level node in the current context iterator. } -func (c *contextQuery) Select(t iterator) (n NodeNavigator) { - if c.count == 0 { - c.count++ - n = t.Current().Copy() - if c.Root { - n.MoveToRoot() - } +func (c *contextQuery) Select(t iterator) NodeNavigator { + if c.count > 0 { + return nil } - return n + c.count++ + return t.Current().Copy() } func (c *contextQuery) Evaluate(iterator) interface{} { @@ -56,12 +100,54 @@ func (c *contextQuery) Evaluate(iterator) interface{} { } func (c *contextQuery) Clone() query { - return &contextQuery{count: 0, Root: c.Root} + return &contextQuery{} +} + +func (c *contextQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *contextQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached +} + +type absoluteQuery struct { + count int +} + +func (a *absoluteQuery) Select(t iterator) (n NodeNavigator) { + if a.count > 0 { + return + } + a.count++ + n = t.Current().Copy() + n.MoveToRoot() + return +} + +func (a *absoluteQuery) Evaluate(t iterator) interface{} { + a.count = 0 + return a +} + +func (a *absoluteQuery) Clone() query { + return &absoluteQuery{} +} + +func (a *absoluteQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *absoluteQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached } // ancestorQuery is an XPath ancestor node query.(ancestor::*|ancestor-self::*) type ancestorQuery struct { + name string iterator func() NodeNavigator + table map[uint64]bool + pos int Self bool Input query @@ -69,32 +155,44 @@ type ancestorQuery struct { } func (a *ancestorQuery) Select(t iterator) NodeNavigator { + if a.table == nil { + a.table = make(map[uint64]bool) + } + for { if a.iterator == nil { node := a.Input.Select(t) if node == nil { return nil } + // Reset position for a new input context node + a.pos = 0 first := true + node = node.Copy() a.iterator = func() NodeNavigator { - if first && a.Self { + if first { first = false - if a.Predicate(node) { + if a.Self && a.Predicate(node) { return node } } for node.MoveToParent() { - if !a.Predicate(node) { - continue + if a.Predicate(node) { + return node } - return node } return nil } } - if node := a.iterator(); node != nil { - return node + for node := a.iterator(); node != nil; node = a.iterator() { + node_id := getHashCode(node.Copy()) + if _, ok := a.table[node_id]; !ok { + a.table[node_id] = true + // Increase position for each matched node in current input context + a.pos++ + return node + } } a.iterator = nil } @@ -103,6 +201,8 @@ func (a *ancestorQuery) Select(t iterator) NodeNavigator { func (a *ancestorQuery) Evaluate(t iterator) interface{} { a.Input.Evaluate(t) a.iterator = nil + // Reset the table when re-evaluating to ensure clean state + a.table = nil return a } @@ -111,11 +211,27 @@ func (a *ancestorQuery) Test(n NodeNavigator) bool { } func (a *ancestorQuery) Clone() query { - return &ancestorQuery{Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} + return &ancestorQuery{name: a.name, Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} +} + +func (a *ancestorQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *ancestorQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge | queryProps.Reverse +} + +// position returns the ordinal of the current matched node within the axis +// traversal for the current input context node. This is required so numeric +// predicates like [1] or [2] on the ancestor axis resolve in axis order. +func (a *ancestorQuery) position() int { + return a.pos } // attributeQuery is an XPath attribute node query.(@*) type attributeQuery struct { + name string iterator func() NodeNavigator Input query @@ -161,11 +277,20 @@ func (a *attributeQuery) Test(n NodeNavigator) bool { } func (a *attributeQuery) Clone() query { - return &attributeQuery{Input: a.Input.Clone(), Predicate: a.Predicate} + return &attributeQuery{name: a.name, Input: a.Input.Clone(), Predicate: a.Predicate} +} + +func (a *attributeQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *attributeQuery) Properties() queryProp { + return queryProps.Merge } // childQuery is an XPath child node query.(child::*) type childQuery struct { + name string posit int iterator func() NodeNavigator @@ -215,7 +340,15 @@ func (c *childQuery) Test(n NodeNavigator) bool { } func (c *childQuery) Clone() query { - return &childQuery{Input: c.Input.Clone(), Predicate: c.Predicate} + return &childQuery{name: c.name, Input: c.Input.Clone(), Predicate: c.Predicate} +} + +func (c *childQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *childQuery) Properties() queryProp { + return queryProps.Merge } // position returns a position of current NodeNavigator. @@ -223,10 +356,78 @@ func (c *childQuery) position() int { return c.posit } +type cachedChildQuery struct { + name string + posit int + iterator func() NodeNavigator + + Input query + Predicate func(NodeNavigator) bool +} + +func (c *cachedChildQuery) Select(t iterator) NodeNavigator { + for { + if c.iterator == nil { + c.posit = 0 + node := c.Input.Select(t) + if node == nil { + return nil + } + node = node.Copy() + first := true + c.iterator = func() NodeNavigator { + for { + if (first && !node.MoveToChild()) || (!first && !node.MoveToNext()) { + return nil + } + first = false + if c.Predicate(node) { + return node + } + } + } + } + + if node := c.iterator(); node != nil { + c.posit++ + return node + } + c.iterator = nil + } +} + +func (c *cachedChildQuery) Evaluate(t iterator) interface{} { + c.Input.Evaluate(t) + c.iterator = nil + return c +} + +func (c *cachedChildQuery) position() int { + return c.posit +} + +func (c *cachedChildQuery) Test(n NodeNavigator) bool { + return c.Predicate(n) +} + +func (c *cachedChildQuery) Clone() query { + return &childQuery{name: c.name, Input: c.Input.Clone(), Predicate: c.Predicate} +} + +func (c *cachedChildQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *cachedChildQuery) Properties() queryProp { + return queryProps.Merge +} + // descendantQuery is an XPath descendant node query.(descendant::* | descendant-or-self::*) type descendantQuery struct { + name string iterator func() NodeNavigator posit int + level int Self bool Input query @@ -242,38 +443,32 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { return nil } node = node.Copy() - level := 0 - positmap := make(map[int]int) + d.level = 0 first := true d.iterator = func() NodeNavigator { - if first && d.Self { + if first { first = false - if d.Predicate(node) { - d.posit = 1 - positmap[level] = 1 + if d.Self && d.Predicate(node) { return node } } for { if node.MoveToChild() { - level++ - positmap[level] = 0 + d.level = d.level + 1 } else { for { - if level == 0 { + if d.level == 0 { return nil } if node.MoveToNext() { break } node.MoveToParent() - level-- + d.level = d.level - 1 } } if d.Predicate(node) { - positmap[level]++ - d.posit = positmap[level] return node } } @@ -281,6 +476,7 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { } if node := d.iterator(); node != nil { + d.posit++ return node } d.iterator = nil @@ -302,8 +498,20 @@ func (d *descendantQuery) position() int { return d.posit } +func (d *descendantQuery) depth() int { + return d.level +} + func (d *descendantQuery) Clone() query { - return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} + return &descendantQuery{name: d.name, Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} +} + +func (d *descendantQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (d *descendantQuery) Properties() queryProp { + return queryProps.Merge } // followingQuery is an XPath following node query.(following::*|following-sibling::*) @@ -384,6 +592,14 @@ func (f *followingQuery) Clone() query { return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate} } +func (f *followingQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (f *followingQuery) Properties() queryProp { + return queryProps.Merge +} + func (f *followingQuery) position() int { return f.posit } @@ -465,6 +681,14 @@ func (p *precedingQuery) Clone() query { return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate} } +func (p *precedingQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (p *precedingQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Reverse +} + func (p *precedingQuery) position() int { return p.posit } @@ -497,6 +721,14 @@ func (p *parentQuery) Clone() query { return &parentQuery{Input: p.Input.Clone(), Predicate: p.Predicate} } +func (p *parentQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (p *parentQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + func (p *parentQuery) Test(n NodeNavigator) bool { return p.Predicate(n) } @@ -533,11 +765,22 @@ func (s *selfQuery) Clone() query { return &selfQuery{Input: s.Input.Clone(), Predicate: s.Predicate} } +func (s *selfQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (s *selfQuery) Properties() queryProp { + return queryProps.Merge +} + // filterQuery is an XPath query for predicate filter. type filterQuery struct { - Input query - Predicate query - posit int + Input query + Predicate query + NoPosition bool + + posit int + positmap map[int]int } func (f *filterQuery) do(t iterator) bool { @@ -551,8 +794,8 @@ func (f *filterQuery) do(t iterator) bool { pt := getNodePosition(f.Input) return int(val.Float()) == pt default: - if q, ok := f.Predicate.(query); ok { - return q.Select(t) != nil + if f.Predicate != nil { + return f.Predicate.Select(t) != nil } } return false @@ -563,26 +806,33 @@ func (f *filterQuery) position() int { } func (f *filterQuery) Select(t iterator) NodeNavigator { - + if f.positmap == nil { + f.positmap = make(map[int]int) + } for { node := f.Input.Select(t) if node == nil { - return node + return nil } node = node.Copy() t.Current().MoveTo(node) if f.do(t) { - f.posit++ + // fix https://github.com/antchfx/htmlquery/issues/26 + // Calculate and keep the each of matching node's position in the same depth. + level := getNodeDepth(f.Input) + f.positmap[level]++ + f.posit = f.positmap[level] return node } - f.posit = 0 } } func (f *filterQuery) Evaluate(t iterator) interface{} { f.Input.Evaluate(t) + // Reset the position map when re-evaluating to ensure clean state + f.positmap = nil return f } @@ -590,8 +840,17 @@ func (f *filterQuery) Clone() query { return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()} } -// functionQuery is an XPath function that call a function to returns -// value of current NodeNavigator node. +func (f *filterQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (f *filterQuery) Properties() queryProp { + return (queryProps.Position | f.Input.Properties()) & (queryProps.Reverse | queryProps.Merge) +} + +// functionQuery is an XPath function that returns a computed value for +// the Evaluate call of the current NodeNavigator node. Select call isn't +// applicable for functionQuery. type functionQuery struct { Input query // Node Set Func func(query, iterator) interface{} // The xpath function. @@ -608,9 +867,56 @@ func (f *functionQuery) Evaluate(t iterator) interface{} { } func (f *functionQuery) Clone() query { + if f.Input == nil { + return &functionQuery{Func: f.Func} + } return &functionQuery{Input: f.Input.Clone(), Func: f.Func} } +func (f *functionQuery) ValueType() resultType { + return xpathResultType.Any +} + +func (f *functionQuery) Properties() queryProp { + return queryProps.Merge +} + +// transformFunctionQuery diffs from functionQuery where the latter computes a scalar +// value (number,string,boolean) for the current NodeNavigator node while the former +// (transformFunctionQuery) performs a mapping or transform of the current NodeNavigator +// and returns a new NodeNavigator. It is used for non-scalar XPath functions such as +// reverse(), remove(), subsequence(), unordered(), etc. +type transformFunctionQuery struct { + Input query + Func func(query, iterator) func() NodeNavigator + iterator func() NodeNavigator +} + +func (f *transformFunctionQuery) Select(t iterator) NodeNavigator { + if f.iterator == nil { + f.iterator = f.Func(f.Input, t) + } + return f.iterator() +} + +func (f *transformFunctionQuery) Evaluate(t iterator) interface{} { + f.Input.Evaluate(t) + f.iterator = nil + return f +} + +func (f *transformFunctionQuery) Clone() query { + return &transformFunctionQuery{Input: f.Input.Clone(), Func: f.Func} +} + +func (f *transformFunctionQuery) ValueType() resultType { + return xpathResultType.Any +} + +func (f *transformFunctionQuery) Properties() queryProp { + return queryProps.Merge +} + // constantQuery is an XPath constant operand. type constantQuery struct { Val interface{} @@ -628,6 +934,49 @@ func (c *constantQuery) Clone() query { return c } +func (c *constantQuery) ValueType() resultType { + return getXPathType(c.Val) +} + +func (c *constantQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + +type groupQuery struct { + posit int + + Input query +} + +func (g *groupQuery) Select(t iterator) NodeNavigator { + node := g.Input.Select(t) + if node == nil { + return nil + } + g.posit++ + return node +} + +func (g *groupQuery) Evaluate(t iterator) interface{} { + return g.Input.Evaluate(t) +} + +func (g *groupQuery) Clone() query { + return &groupQuery{Input: g.Input.Clone()} +} + +func (g *groupQuery) ValueType() resultType { + return g.Input.ValueType() +} + +func (g *groupQuery) Properties() queryProp { + return queryProps.Position +} + +func (g *groupQuery) position() int { + return g.posit +} + // logicalQuery is an XPath logical expression. type logicalQuery struct { Left, Right query @@ -636,15 +985,6 @@ type logicalQuery struct { } func (l *logicalQuery) Select(t iterator) NodeNavigator { - // When a XPath expr is logical expression. - node := t.Current().Copy() - val := l.Evaluate(t) - switch val.(type) { - case bool: - if val.(bool) == true { - return node - } - } return nil } @@ -658,11 +998,19 @@ func (l *logicalQuery) Clone() query { return &logicalQuery{Left: l.Left.Clone(), Right: l.Right.Clone(), Do: l.Do} } +func (l *logicalQuery) ValueType() resultType { + return xpathResultType.Boolean +} + +func (l *logicalQuery) Properties() queryProp { + return queryProps.Merge +} + // numericQuery is an XPath numeric operator expression. type numericQuery struct { Left, Right query - Do func(interface{}, interface{}) interface{} + Do func(iterator, interface{}, interface{}) interface{} } func (n *numericQuery) Select(t iterator) NodeNavigator { @@ -672,13 +1020,21 @@ func (n *numericQuery) Select(t iterator) NodeNavigator { func (n *numericQuery) Evaluate(t iterator) interface{} { m := n.Left.Evaluate(t) k := n.Right.Evaluate(t) - return n.Do(m, k) + return n.Do(t, m, k) } func (n *numericQuery) Clone() query { return &numericQuery{Left: n.Left.Clone(), Right: n.Right.Clone(), Do: n.Do} } +func (n *numericQuery) ValueType() resultType { + return xpathResultType.Number +} + +func (n *numericQuery) Properties() queryProp { + return queryProps.Merge +} + type booleanQuery struct { IsOr bool Left, Right query @@ -750,6 +1106,8 @@ func (b *booleanQuery) Select(t iterator) NodeNavigator { } func (b *booleanQuery) Evaluate(t iterator) interface{} { + n := t.Current().Copy() + m := b.Left.Evaluate(t) left := asBool(t, m) if b.IsOr && left { @@ -757,6 +1115,8 @@ func (b *booleanQuery) Evaluate(t iterator) interface{} { } else if !b.IsOr && !left { return false } + + t.Current().MoveTo(n) m = b.Right.Evaluate(t) return asBool(t, m) } @@ -765,6 +1125,14 @@ func (b *booleanQuery) Clone() query { return &booleanQuery{IsOr: b.IsOr, Left: b.Left.Clone(), Right: b.Right.Clone()} } +func (b *booleanQuery) ValueType() resultType { + return xpathResultType.Boolean +} + +func (b *booleanQuery) Properties() queryProp { + return queryProps.Merge +} + type unionQuery struct { Left, Right query iterator func() NodeNavigator @@ -822,13 +1190,205 @@ func (u *unionQuery) Clone() query { return &unionQuery{Left: u.Left.Clone(), Right: u.Right.Clone()} } +func (u *unionQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (u *unionQuery) Properties() queryProp { + return queryProps.Merge +} + +type lastFuncQuery struct { + buffer []NodeNavigator + counted bool + + Input query +} + +func (q *lastFuncQuery) Select(t iterator) NodeNavigator { + return nil +} + +func (q *lastFuncQuery) Evaluate(t iterator) interface{} { + if !q.counted { + for { + node := q.Input.Select(t) + if node == nil { + break + } + q.buffer = append(q.buffer, node.Copy()) + } + q.counted = true + } + return float64(len(q.buffer)) +} + +func (q *lastFuncQuery) Clone() query { + return &lastFuncQuery{Input: q.Input.Clone()} +} + +func (q *lastFuncQuery) ValueType() resultType { + return xpathResultType.Number +} + +func (q *lastFuncQuery) Properties() queryProp { + return queryProps.Merge +} + +type descendantOverDescendantQuery struct { + name string + level int + posit int + currentNode NodeNavigator + + Input query + MatchSelf bool + Predicate func(NodeNavigator) bool +} + +func (d *descendantOverDescendantQuery) moveToFirstChild() bool { + if d.currentNode.MoveToChild() { + d.level++ + return true + } + return false +} + +func (d *descendantOverDescendantQuery) moveUpUntilNext() bool { + for !d.currentNode.MoveToNext() { + d.level-- + if d.level == 0 { + return false + } + d.currentNode.MoveToParent() + } + return true +} + +func (d *descendantOverDescendantQuery) Select(t iterator) NodeNavigator { + for { + if d.level == 0 { + node := d.Input.Select(t) + if node == nil { + return nil + } + d.currentNode = node.Copy() + d.posit = 0 + if d.MatchSelf && d.Predicate(d.currentNode) { + d.posit = 1 + return d.currentNode + } + d.moveToFirstChild() + } else if !d.moveUpUntilNext() { + continue + } + for ok := true; ok; ok = d.moveToFirstChild() { + if d.Predicate(d.currentNode) { + d.posit++ + return d.currentNode + } + } + } +} + +func (d *descendantOverDescendantQuery) Evaluate(t iterator) interface{} { + d.Input.Evaluate(t) + return d +} + +func (d *descendantOverDescendantQuery) Clone() query { + return &descendantOverDescendantQuery{Input: d.Input.Clone(), Predicate: d.Predicate, MatchSelf: d.MatchSelf} +} + +func (d *descendantOverDescendantQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (d *descendantOverDescendantQuery) Properties() queryProp { + return queryProps.Merge +} + +func (d *descendantOverDescendantQuery) position() int { + return d.posit +} + +type mergeQuery struct { + Input query + Child query + + iterator func() NodeNavigator +} + +func (m *mergeQuery) Select(t iterator) NodeNavigator { + for { + if m.iterator == nil { + root := m.Input.Select(t) + if root == nil { + return nil + } + m.Child.Evaluate(t) + root = root.Copy() + t.Current().MoveTo(root) + var list []NodeNavigator + for node := m.Child.Select(t); node != nil; node = m.Child.Select(t) { + list = append(list, node.Copy()) + } + i := 0 + m.iterator = func() NodeNavigator { + if i >= len(list) { + return nil + } + result := list[i] + i++ + return result + } + } + + if node := m.iterator(); node != nil { + return node + } + m.iterator = nil + } +} + +func (m *mergeQuery) Evaluate(t iterator) interface{} { + m.Input.Evaluate(t) + return m +} + +func (m *mergeQuery) Clone() query { + return &mergeQuery{Input: m.Input.Clone(), Child: m.Child.Clone()} +} + +func (m *mergeQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (m *mergeQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + func getHashCode(n NodeNavigator) uint64 { var sb bytes.Buffer switch n.NodeType() { case AttributeNode, TextNode, CommentNode: - sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value())) - if n.MoveToParent() { - sb.WriteString(n.LocalName()) + sb.WriteString(n.LocalName()) + sb.WriteByte('=') + sb.WriteString(n.Value()) + // https://github.com/antchfx/htmlquery/issues/25 + d := 1 + for n.MoveToPrevious() { + d++ + } + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) + for n.MoveToParent() { + d = 1 + for n.MoveToPrevious() { + d++ + } + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) } case ElementNode: sb.WriteString(n.Prefix() + n.LocalName()) @@ -836,18 +1396,20 @@ func getHashCode(n NodeNavigator) uint64 { for n.MoveToPrevious() { d++ } - sb.WriteString(fmt.Sprintf("-%d", d)) + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) for n.MoveToParent() { d = 1 for n.MoveToPrevious() { d++ } - sb.WriteString(fmt.Sprintf("-%d", d)) + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) } } h := fnv.New64a() - h.Write([]byte(sb.String())) + h.Write(sb.Bytes()) return h.Sum64() } @@ -860,3 +1422,30 @@ func getNodePosition(q query) int { } return 1 } + +func getNodeDepth(q query) int { + type Depth interface { + depth() int + } + if count, ok := q.(Depth); ok { + return count.depth() + } + return 0 +} + +func getXPathType(i interface{}) resultType { + v := reflect.ValueOf(i) + switch v.Kind() { + case reflect.Float64: + return xpathResultType.Number + case reflect.String: + return xpathResultType.String + case reflect.Bool: + return xpathResultType.Boolean + default: + if _, ok := i.(query); ok { + return xpathResultType.NodeSet + } + } + panic(fmt.Errorf("xpath unknown value type: %v", v.Kind())) +} diff --git a/vendor/github.com/antchfx/xpath/xpath.go b/vendor/github.com/antchfx/xpath/xpath.go index d6c9912..04bbe8d 100644 --- a/vendor/github.com/antchfx/xpath/xpath.go +++ b/vendor/github.com/antchfx/xpath/xpath.go @@ -2,6 +2,7 @@ package xpath import ( "errors" + "fmt" ) // NodeType represents a type of XPath node. @@ -83,13 +84,13 @@ func (t *NodeIterator) Current() NodeNavigator { // MoveNext moves Navigator to the next match node. func (t *NodeIterator) MoveNext() bool { n := t.query.Select(t) - if n != nil { - if !t.node.MoveTo(n) { - t.node = n.Copy() - } - return true + if n == nil { + return false } - return false + if !t.node.MoveTo(n) { + t.node = n.Copy() + } + return true } // Select selects a node set using the specified XPath expression. @@ -140,10 +141,13 @@ func Compile(expr string) (*Expr, error) { if expr == "" { return nil, errors.New("expr expression is nil") } - qy, err := build(expr) + qy, err := build(expr, nil) if err != nil { return nil, err } + if qy == nil { + return nil, fmt.Errorf(fmt.Sprintf("undeclared variable in XPath expression: %s", expr)) + } return &Expr{s: expr, q: qy}, nil } @@ -155,3 +159,18 @@ func MustCompile(expr string) *Expr { } return exp } + +// CompileWithNS compiles an XPath expression string, using given namespaces map. +func CompileWithNS(expr string, namespaces map[string]string) (*Expr, error) { + if expr == "" { + return nil, errors.New("expr expression is nil") + } + qy, err := build(expr, namespaces) + if err != nil { + return nil, err + } + if qy == nil { + return nil, fmt.Errorf(fmt.Sprintf("undeclared variable in XPath expression: %s", expr)) + } + return &Expr{s: expr, q: qy}, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 13d2bcd..0220d4c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/antchfx/xpath v1.1.2 +# github.com/antchfx/xpath v1.3.6 github.com/antchfx/xpath # github.com/antchfx/xquery v0.0.0-20180515051857-ad5b8c7a47b0 github.com/antchfx/xquery/xml