Skip to content

Commit 957f533

Browse files
authored
Merge pull request #3510 from rbrw/pdb-5161-parse-ast-query-fields
(PDB-5161) Parse AST query fields (dotted projections, etc.)
2 parents f89c4c6 + 17b4fad commit 957f533

File tree

9 files changed

+523
-229
lines changed

9 files changed

+523
-229
lines changed

src/puppetlabs/puppetdb/jdbc.clj

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
(:import
1414
(com.zaxxer.hikari HikariDataSource HikariConfig)
1515
(java.sql Connection SQLException SQLTransientConnectionException)
16-
(java.util.concurrent TimeUnit)))
16+
(java.util.concurrent TimeUnit)
17+
(org.postgresql.util PGobject)))
1718

1819
(def ^:dynamic *db* nil)
1920

@@ -392,6 +393,26 @@
392393
[s]
393394
(str "\"" (escape-double-quotes s) "\""))
394395

396+
(defn string->text-array-literal
397+
"Escape string s for inclusion in a postgres text[] literal,
398+
e.g. \"foo\\\"bar\" becomes the \"foo\\\\\\\"bar\" in
399+
'{\"foo\\\\\\\"bar\"}'"
400+
;; https://www.postgresql.org/docs/11/arrays.html#ARRAYS-INPUT
401+
;; https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS
402+
[s]
403+
(assert (string? s))
404+
(str \" (str/replace s "\\" "\\\\") \"))
405+
406+
(defn strs->db-array
407+
[strs]
408+
(assert (every? string? strs))
409+
;; https://www.postgresql.org/docs/11/arrays.html#ARRAYS-INPUT
410+
(let [quoted (map string->text-array-literal strs)]
411+
(doto (PGobject.)
412+
(.setType "text[]")
413+
(.setValue (str \{ (str/join \, quoted) \})))))
414+
415+
;; Q: move/replace?
395416
(defn create-json-path-extraction
396417
"Given a base json field and a path of keys to traverse, construct the proper
397418
SQL query of the form base->'key'->'key2'..."

src/puppetlabs/puppetdb/query_eng/engine.clj

Lines changed: 202 additions & 109 deletions
Large diffs are not rendered by default.
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
(ns puppetlabs.puppetdb.query-eng.parse
2+
"AST parsing"
3+
(:require
4+
[clojure.string :as str])
5+
(:import
6+
(java.util.regex Matcher)))
7+
8+
(def ^:private warn-on-reflection-orig *warn-on-reflection*)
9+
(set! *warn-on-reflection* true)
10+
11+
;; Q: could even be stricter?
12+
(def top-field-rx
13+
"Syntax of the first component of a field path, e.g. \"facts\" for
14+
facts.os."
15+
#"^[a-zA-Z][_0-9a-zA-Z]*\??(?=\.|$)")
16+
17+
(def field-rx
18+
"Syntax of an unquoted field component, e.g. the second component of
19+
foo.bar.baz. Includes all characters up to the next dot, or the end
20+
of the field. Taken in conjunction with the current quoted field
21+
syntax, there is no way to represent a field component that contains
22+
a dot and ends in a backslash, e.g. a fact named \"foo.bar\\\" since
23+
given the dot, it has to be quoted, but quoted fields can't end in a
24+
backslash right now (cf. quoted-field-rx)."
25+
#"^[^.]+(?=\.|$)")
26+
27+
(def quoted-field-rx
28+
"Syntax of a quoted field component, e.g. the second component of
29+
foo.\"bar\".baz. It must begin with a double quote, and ends at the
30+
next double quote that is not preceded by a backslash and is
31+
followed by either a dot, or the end of the field. There is
32+
currently no way to represent a field component that contains a dot
33+
and ends in a backslash, e.g. a fact named \"foo.bar\\\". It has to
34+
be quoted, given the dot, but as just mentioned, quoted fields can't
35+
end in a backslash."
36+
#"(?s:^\"(.*?[^\\])\"(?=\.|$))")
37+
38+
(def match-rx
39+
"Syntax of a match(regex) field component, e.g. the second component
40+
of foo.match(\"bar?\").baz. It must begin with match, open paren,
41+
double quote, and ends at the next double-quote, close-paren that is
42+
not preceded by a backslash and is followed by either a dot, or the
43+
end of the field. The regex then, has essentially the same syntax
44+
as a double quoted field. And similarly, there is currently not way
45+
to specify a match regular expression that ends in a backslash."
46+
#"(?s:^match\(\"(.*[^\\])\"\)(?=\.|$))")
47+
48+
(defn- find-at
49+
"Sets the start of m's region to i and then returns the result of a
50+
find. This allows ^ to match at i."
51+
[^Matcher m i]
52+
(.region m i (.regionEnd m))
53+
(.find m))
54+
55+
(defn- index-or-name
56+
"Returns an ::indexed-field-part segment if s is of the form name[digits],
57+
otherwise a ::named-field-part segment."
58+
[s indexes?]
59+
(if-let [[_ n i] (re-matches #"(?s:(.+)\[(\d+)\])" s)]
60+
;; Must be Integer, not Long to avoid pg errors like this:
61+
;; "ERROR: operator does not exist: jsonb -> bigint"
62+
{:kind ::indexed-field-part :name n :index (Integer/valueOf ^String i)}
63+
{:kind ::named-field-part :name s}))
64+
65+
(defn- parse-field-components
66+
"Parses the components of a dotted query field that come after the
67+
first, and conjoins a map describing each one onto the result."
68+
[^String s offset
69+
{:keys [indexes? matches?]
70+
:or {indexes? true matches? true}
71+
:as opts}
72+
result]
73+
(let [field-m (re-matcher field-rx s)
74+
match-m (re-matcher match-rx s)
75+
qfield-m (re-matcher quoted-field-rx s)]
76+
(loop [i offset
77+
result result]
78+
(if (= i (count s))
79+
result
80+
(do
81+
;; Q: can this case ever match now?
82+
(when-not (= \. (nth s i))
83+
(throw
84+
(ex-info (format "AST field component at character %d does not begin with a dot: %s"
85+
i (pr-str s))
86+
{:kind ::invalid-field-component
87+
:field s
88+
:offset i})))
89+
(let [i (inc i)]
90+
;; Assumes that this ordering produces no aliasing, which is
91+
;; true right now because all the patterns should be mutually
92+
;; exclusive.
93+
(cond
94+
(.startsWith s "\"\"" i)
95+
(throw
96+
(ex-info (format "Empty AST field component at character %d: %s"
97+
i (pr-str s))
98+
{:kind ::invalid-field-component
99+
:field s
100+
:offset i}))
101+
(and matches? (find-at match-m i))
102+
(recur (.end match-m)
103+
(conj result {:kind ::match-field-part
104+
:pattern (.group match-m 1)}))
105+
;; We could handle indexing as an option in the field
106+
;; regexes, but we don't for now, so that the code's
107+
;; hopefully a bit easier to follow.
108+
(find-at qfield-m i)
109+
(recur (.end qfield-m)
110+
(conj result (index-or-name (.group qfield-m 1)
111+
indexes?)))
112+
(find-at field-m i)
113+
(recur (.end field-m)
114+
(conj result (index-or-name (.group field-m)
115+
indexes?)))
116+
;; Probably currently unreachable
117+
:else (throw
118+
(ex-info (format "Don't recognize AST field component at character %d: %s"
119+
i (pr-str s))
120+
{:kind ::invalid-field-component
121+
:field s
122+
:offset i})))))))))
123+
124+
(defn parse-field
125+
"Parses an AST field like \"certname\", \"facts.partition[3]\" and
126+
returns a vector of the field components as maps. The first
127+
component will always be a ::named-field-part."
128+
([s] (parse-field s 0 {}))
129+
([s offset opts]
130+
(assert (string? s))
131+
(when (= offset (count s))
132+
(throw (ex-info "Empty AST field" {:kind ::invalid-field :field s})))
133+
(let [field-m (re-matcher top-field-rx s)]
134+
(when-not (find-at field-m offset)
135+
(throw
136+
(ex-info (str "First component of AST field is invalid: " (pr-str s))
137+
{:kind ::invalid-field-component
138+
:field s
139+
:offset 0})))
140+
;; Q: OK to disallow an initial quoted-field?
141+
(let [result [{:kind ::named-field-part :name (.group field-m)}]]
142+
(parse-field-components s (.end field-m) opts result)))))
143+
144+
(defn- quote-path-name-for-field-str [s]
145+
(when (re-matches #"(?s:.*\..*\\)" s)
146+
;; Currently no way to represent a string including a dot that
147+
;; ends in a backslash.
148+
(throw (ex-info (str "AST has no way to quote a path segment including a dot and ending in backslash: "
149+
(pr-str s))
150+
{:kind ::unquotable-field-segment
151+
:name s})))
152+
(if (str/index-of s \.)
153+
(str \" s \")
154+
s))
155+
156+
(defn path-names->field-str
157+
"Returns a properly quoted AST field string (dotted path) for the
158+
given names (only handles names, not ::indexed-field-part
159+
or ::match-field-part expressions). Throws an exception if any name
160+
cannot be quoted, since AST's current quoting syntax is
161+
incomplete (e.g. cannot represent a name that contains a dot and
162+
ends in backslash."
163+
[names]
164+
(str/join \. (map quote-path-name-for-field-str names)))
165+
166+
(set! *warn-on-reflection* warn-on-reflection-orig)

src/puppetlabs/puppetdb/scf/storage_utils.clj

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,6 @@
244244
(hcore/raw
245245
(format "%s = ANY(?)" (first (hfmt/format column)))))
246246

247-
(defn json-contains
248-
[field array-in-path]
249-
(if array-in-path
250-
(hcore/raw (format "%s #> ? = ?" field))
251-
(hcore/raw (format "%s @> ?" field))))
252-
253247
(defn jsonb-path-binary-expression
254248
"Produce a predicate that compares against nested value with op and checks the
255249
existence of a (presumably) top-level value. The existence check is necessary
@@ -382,42 +376,3 @@
382376
(log/info (trs "Analyzing small tables"))
383377
(apply jdbc/do-commands-outside-txn
384378
(map #(str "analyze " %) small-tables)))
385-
386-
(defn handle-quoted-path-segment
387-
[v]
388-
(loop [result []
389-
[s & splits] v]
390-
(let [s-len (count s)]
391-
(if (and (str/ends-with? s "\"")
392-
(not (= s-len 1))
393-
(or (<= s-len 2) (not (= (nth s (- s-len 2)) \\))))
394-
[(str/join "." (conj result s)) splits]
395-
(recur (conj result s) splits)))))
396-
397-
(defn dotted-query->path
398-
[string]
399-
(loop [[s & splits :as v] (str/split string #"\.")
400-
result []]
401-
(if (nil? s)
402-
result
403-
(let [s-len (count s)]
404-
(if (and (str/starts-with? s "\"")
405-
(or (= s-len 1)
406-
(or (not (str/ends-with? s "\""))
407-
(and (str/ends-with? s "\"")
408-
(>= s-len 2)
409-
(= (nth s (- s-len 2)) \\)))))
410-
(let [[x xs] (handle-quoted-path-segment v)]
411-
(recur xs (conj result x)))
412-
(recur splits (conj result s)))))))
413-
414-
(defn expand-array-access-in-path
415-
"Given a path like [\"a\" \"b[0]\" \"c\"], expand the [0] to get
416-
[\"a\" \"b\" 0 \"c\"]"
417-
[path]
418-
(mapcat (fn [el]
419-
(let [[[_ field index-str]] (re-seq #"^(.*)\[(\d+)\]$" el)]
420-
(if index-str
421-
[field (Integer/parseInt index-str)]
422-
[el])))
423-
path))

src/puppetlabs/puppetdb/utils.clj

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -285,24 +285,6 @@
285285
(apply assoc m new-kvs)
286286
m)))
287287

288-
(defn parse-matchfields
289-
[s]
290-
(str/replace s #"match\((\".*\")\)" "$1"))
291-
292-
(defn parse-indexing
293-
[s]
294-
(str/replace s #"\[(\d+)\]" ".$1"))
295-
296-
(defn split-indexing
297-
[path]
298-
(flatten
299-
(for [s path]
300-
(if (re-find #"\[\d+\]$" s)
301-
(-> s
302-
(str/split #"(?=\[\d+\]$)")
303-
(update 1 #(Integer/parseInt (subs % 1 (dec (count %))))))
304-
s))))
305-
306288
(defn regex-quote
307289
[s]
308290
(when (and (string? s) (re-find #"\\E" s))

src/puppetlabs/puppetdb/utils/string_formatter.clj

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,6 @@
1717
(interpose "\\E\\\\E\\Q"))
1818
["\\E"])))
1919

20-
(defn maybe-strip-escaped-quotes
21-
[s]
22-
(if (and (> (count s) 1)
23-
(string/starts-with? s "\"")
24-
(string/ends-with? s "\""))
25-
(subs s 1 (dec (count s)))
26-
s))
27-
2820
(defn quoted
2921
[s]
3022
(str "'" s "'"))

test/puppetlabs/puppetdb/pql/parser_test.clj

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
(ns puppetlabs.puppetdb.pql.parser-test
2-
(:require [clojure.test :refer :all]
3-
[clojure.string :as str]
4-
[instaparse.core :as insta]
5-
[puppetlabs.puppetdb.scf.storage-utils :as sutils]
6-
[puppetlabs.puppetdb.pql :refer [parse]]))
2+
(:require
3+
[clojure.string :as str]
4+
[clojure.test :refer :all]
5+
[instaparse.core :as insta]
6+
[puppetlabs.puppetdb.pql :refer [parse]]))
77

88
;; These tests are ordered the same as in the EBNF file, so one can
99
;; develop the expressions and tests side-by-side.
@@ -563,18 +563,18 @@
563563
""))
564564

565565
(testing "field"
566-
(are [in] (= (parse in :start :field)
567-
(vec (concat [:field] (sutils/dotted-query->path in))))
568-
"certname"
569-
"value"
570-
"field_underscore"
571-
"facts.operatingsystem.κόσμε"
572-
"facts.\"quoted field\".foo"
573-
"facts.\"field.with.dot\".foo"
574-
"facts.\"field-with-dash\".foo"
575-
"trusted.authenticated"
576-
"parameters.😁"
577-
"latest_report?")
566+
(doseq [[field expected]
567+
[["certname" [:field "certname"]]
568+
["value" [:field "value"]]
569+
["field_underscore" [:field "field_underscore"]]
570+
["facts.operatingsystem.κόσμε" [:field "facts" "operatingsystem" "κόσμε"]]
571+
["facts.\"quoted field\".foo" [:field "facts" "\"quoted field\"" "foo"]]
572+
["facts.\"field.with.dot\".foo" [:field "facts" "\"field.with.dot\"" "foo"]]
573+
["facts.\"field-with-dash\".foo" [:field "facts" "\"field-with-dash\"" "foo"]]
574+
["trusted.authenticated" [:field "trusted" "authenticated"]]
575+
["parameters.😁" [:field "parameters" "😁"]]
576+
["latest_report?" [:field "latest_report?"]]]]
577+
(is (= expected (parse field :start :field))))
578578

579579
(are [in] (insta/failure? (insta/parse parse in :start :field))
580580
"'asdf'"

0 commit comments

Comments
 (0)