Skip to content

Commit 141f5aa

Browse files
committed
(PDB-5161) engine/parse-dot-query: strictly parse fields
Replace ad-hoc parsing with parse-field. To do so add :indexes? and :matches? options to allow treating segments that would otherwise be recognized as match()es or array indexes as literal named fields, allowing us to maintain the previous behavior for now.
1 parent 039eea1 commit 141f5aa

File tree

2 files changed

+53
-26
lines changed

2 files changed

+53
-26
lines changed

src/puppetlabs/puppetdb/query_eng/engine.clj

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,29 +1631,49 @@
16311631
(instance? ArrayBinaryExpression node)
16321632
(instance? ArrayRegexExpression node)))
16331633

1634-
(defn path->nested-map
1635-
"Given path [a b c] and value d, produce {a {b {c d}}}"
1636-
[path value]
1637-
(reduce #(hash-map (parse/maybe-strip-escaped-quotes %2) %1)
1638-
(rseq (conj (vec path) value))))
1639-
16401634
(defn parse-dot-query
16411635
"Transforms a dotted query into a JSON structure appropriate
16421636
for comparison in the database."
16431637
[{:keys [field value] :as node} state]
1644-
(let [[column & path] (map parse/maybe-strip-escaped-quotes
1645-
(parse/dotted-query->path field))]
1646-
(if (some #(re-matches #"^\d+$" %) path)
1647-
;; This path is taken for match() operators that match indexes,
1648-
;; i.e. facts.foo.match("\d+") because by this point they'll be
1649-
;; plain integer path components.
1650-
{:node (assoc node :value ["?" "?"] :field column :array-in-path true)
1651-
;; https://www.postgresql.org/docs/11/arrays.html#ARRAYS-INPUT
1652-
:state (reduce conj state [(jdbc/strs->db-array path)
1638+
;; node is JsonContainsExpression
1639+
(let [[column & path] (parse/parse-field field 0 {:indexes? false
1640+
:matches? false})
1641+
;; FIXME: this is a conflation - one fix might be to parse all
1642+
;; fields *once* at the start of query processing, and then
1643+
;; pass that representation all the way through.
1644+
;;
1645+
;; The handling of match() wrt fact_paths complicates that
1646+
;; because right now we don't have any (efficient) way of
1647+
;; knowing all the types of the path elements we retrieve from
1648+
;; the path_array, i.e. for ["foo" "5" "bar"], is "5" a custom
1649+
;; fact name, or an array lookup? The situation could be
1650+
;; different if we pushed the path lookups further down, and
1651+
;; didn't insinuate them via AST. At the moment, we preserve
1652+
;; backward compatibility by just treating everything as a
1653+
;; named field by setting indexes? and matches? above.
1654+
maybe-index? #(re-matches #"[0-9]+" (:name %))]
1655+
(if (some maybe-index? path)
1656+
;; If this matches, then the path component might be an array
1657+
;; access, e.g. via the database-generated paths created by
1658+
;; maybe-add-match-function-filter like foo.5.bar, and so we
1659+
;; have to be conservative and set array-in-path so that we'll
1660+
;; eventually use the right json operators (e.g. #> instead of
1661+
;; @>).
1662+
{:node (assoc node
1663+
:value ["?" "?"]
1664+
:field (:name column)
1665+
:array-in-path true)
1666+
:state (reduce conj state [(jdbc/strs->db-array (map :name path))
16531667
(su/munge-jsonb-for-storage value)])}
1654-
{:node (assoc node :value "?" :field column :array-in-path false)
1655-
:state (conj state (su/munge-jsonb-for-storage
1656-
(path->nested-map path value)))})))
1668+
{:node (assoc node
1669+
:value "?"
1670+
:field (:name column)
1671+
:array-in-path false)
1672+
:state (conj state
1673+
(su/munge-jsonb-for-storage
1674+
;; Convert path [a b c] and value d to {a {b {c d}}}
1675+
(reduce (fn [result name] (hash-map name result))
1676+
(cons value (reverse (map :name path))))))})))
16571677

16581678
(defn parse-dot-query-with-array-elements
16591679
"Transforms a dotted query into a JSON structure appropriate

src/puppetlabs/puppetdb/query_eng/parse.clj

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
(defn- index-or-name
5656
"Returns an ::indexed-field-part segment if s is of the form name[digits],
5757
otherwise a ::named-field-part segment."
58-
[s]
58+
[s indexes?]
5959
(if-let [[_ n i] (re-matches #"(?s:(.+)\[(\d+)\])" s)]
6060
;; Must be Integer, not Long to avoid pg errors like this:
6161
;; "ERROR: operator does not exist: jsonb -> bigint"
@@ -65,7 +65,11 @@
6565
(defn- parse-field-components
6666
"Parses the components of a dotted query field that come after the
6767
first, and conjoins a map describing each one onto the result."
68-
[^String s offset result]
68+
[^String s offset
69+
{:keys [indexes? matches?]
70+
:or {indexes? true matches? true}
71+
:as opts}
72+
result]
6973
(let [field-m (re-matcher field-rx s)
7074
match-m (re-matcher match-rx s)
7175
qfield-m (re-matcher quoted-field-rx s)]
@@ -94,7 +98,7 @@
9498
{:kind ::invalid-field-component
9599
:field s
96100
:offset i}))
97-
(find-at match-m i)
101+
(and matches? (find-at match-m i))
98102
(recur (.end match-m)
99103
(conj result {:kind ::match-field-part
100104
:pattern (.group match-m 1)}))
@@ -103,10 +107,12 @@
103107
;; hopefully a bit easier to follow.
104108
(find-at qfield-m i)
105109
(recur (.end qfield-m)
106-
(conj result (index-or-name (.group qfield-m 1))))
110+
(conj result (index-or-name (.group qfield-m 1)
111+
indexes?)))
107112
(find-at field-m i)
108113
(recur (.end field-m)
109-
(conj result (index-or-name (.group field-m))))
114+
(conj result (index-or-name (.group field-m)
115+
indexes?)))
110116
;; Probably currently unreachable
111117
:else (throw
112118
(ex-info (format "Don't recognize AST field component at character %d: %s"
@@ -119,8 +125,9 @@
119125
"Parses an AST field like \"certname\", \"facts.partition[3]\" and
120126
returns a vector of the field components as maps. The first
121127
component will always be a ::named-field-part."
122-
([s] (parse-field s 0))
123-
([s offset]
128+
([s] (parse-field s 0 {}))
129+
([s offset opts]
130+
(assert (string? s))
124131
(when (= offset (count s))
125132
(throw (ex-info "Empty AST field" {:kind ::invalid-field :field s})))
126133
(let [field-m (re-matcher top-field-rx s)]
@@ -132,7 +139,7 @@
132139
:offset 0})))
133140
;; Q: OK to disallow an initial quoted-field?
134141
(let [result [{:kind ::named-field-part :name (.group field-m)}]]
135-
(parse-field-components s (.end field-m) result)))))
142+
(parse-field-components s (.end field-m) opts result)))))
136143

137144
(defn- quote-path-name-for-field-str [s]
138145
(when (re-matches #"(?s:.*\..*\\)" s)

0 commit comments

Comments
 (0)