|
| 1 | +(ns puppetlabs.puppetdb.query-eng.parse |
| 2 | + "AST parsing" |
| 3 | + (:require |
| 4 | + [clojure.string :as str]) |
| 5 | + (:import |
| 6 | + (java.util.regex Matcher))) |
| 7 | + |
| 8 | +(def ^:private warn-on-reflection-orig *warn-on-reflection*) |
| 9 | +(set! *warn-on-reflection* true) |
| 10 | + |
| 11 | +;; Q: could even be stricter? |
| 12 | +(def top-field-rx |
| 13 | + "Syntax of the first component of a field path, e.g. \"facts\" for |
| 14 | + facts.os." |
| 15 | + #"^[a-zA-Z][_0-9a-zA-Z]*\??(?=\.|$)") |
| 16 | + |
| 17 | +(def field-rx |
| 18 | + "Syntax of an unquoted field component, e.g. the second component of |
| 19 | + foo.bar.baz. Includes all characters up to the next dot, or the end |
| 20 | + of the field. Taken in conjunction with the current quoted field |
| 21 | + syntax, there is no way to represent a field component that contains |
| 22 | + a dot and ends in a backslash, e.g. a fact named \"foo.bar\\\" since |
| 23 | + given the dot, it has to be quoted, but quoted fields can't end in a |
| 24 | + backslash right now (cf. quoted-field-rx)." |
| 25 | + #"^[^.]+(?=\.|$)") |
| 26 | + |
| 27 | +(def quoted-field-rx |
| 28 | + "Syntax of a quoted field component, e.g. the second component of |
| 29 | + foo.\"bar\".baz. It must begin with a double quote, and ends at the |
| 30 | + next double quote that is not preceded by a backslash and is |
| 31 | + followed by either a dot, or the end of the field. There is |
| 32 | + currently no way to represent a field component that contains a dot |
| 33 | + and ends in a backslash, e.g. a fact named \"foo.bar\\\". It has to |
| 34 | + be quoted, given the dot, but as just mentioned, quoted fields can't |
| 35 | + end in a backslash." |
| 36 | + #"(?s:^\"(.*?[^\\])\"(?=\.|$))") |
| 37 | + |
| 38 | +(def match-rx |
| 39 | + "Syntax of a match(regex) field component, e.g. the second component |
| 40 | + of foo.match(\"bar?\").baz. It must begin with match, open paren, |
| 41 | + double quote, and ends at the next double-quote, close-paren that is |
| 42 | + not preceded by a backslash and is followed by either a dot, or the |
| 43 | + end of the field. The regex then, has essentially the same syntax |
| 44 | + as a double quoted field. And similarly, there is currently not way |
| 45 | + to specify a match regular expression that ends in a backslash." |
| 46 | + #"(?s:^match\(\"(.*[^\\])\"\)(?=\.|$))") |
| 47 | + |
| 48 | +(defn- find-at |
| 49 | + "Sets the start of m's region to i and then returns the result of a |
| 50 | + find. This allows ^ to match at i." |
| 51 | + [^Matcher m i] |
| 52 | + (.region m i (.regionEnd m)) |
| 53 | + (.find m)) |
| 54 | + |
| 55 | +(defn- index-or-name |
| 56 | + "Returns an ::indexed-field-part segment if s is of the form name[digits], |
| 57 | + otherwise a ::named-field-part segment." |
| 58 | + [s indexes?] |
| 59 | + (if-let [[_ n i] (re-matches #"(?s:(.+)\[(\d+)\])" s)] |
| 60 | + ;; Must be Integer, not Long to avoid pg errors like this: |
| 61 | + ;; "ERROR: operator does not exist: jsonb -> bigint" |
| 62 | + {:kind ::indexed-field-part :name n :index (Integer/valueOf ^String i)} |
| 63 | + {:kind ::named-field-part :name s})) |
| 64 | + |
| 65 | +(defn- parse-field-components |
| 66 | + "Parses the components of a dotted query field that come after the |
| 67 | + first, and conjoins a map describing each one onto the result." |
| 68 | + [^String s offset |
| 69 | + {:keys [indexes? matches?] |
| 70 | + :or {indexes? true matches? true} |
| 71 | + :as opts} |
| 72 | + result] |
| 73 | + (let [field-m (re-matcher field-rx s) |
| 74 | + match-m (re-matcher match-rx s) |
| 75 | + qfield-m (re-matcher quoted-field-rx s)] |
| 76 | + (loop [i offset |
| 77 | + result result] |
| 78 | + (if (= i (count s)) |
| 79 | + result |
| 80 | + (do |
| 81 | + ;; Q: can this case ever match now? |
| 82 | + (when-not (= \. (nth s i)) |
| 83 | + (throw |
| 84 | + (ex-info (format "AST field component at character %d does not begin with a dot: %s" |
| 85 | + i (pr-str s)) |
| 86 | + {:kind ::invalid-field-component |
| 87 | + :field s |
| 88 | + :offset i}))) |
| 89 | + (let [i (inc i)] |
| 90 | + ;; Assumes that this ordering produces no aliasing, which is |
| 91 | + ;; true right now because all the patterns should be mutually |
| 92 | + ;; exclusive. |
| 93 | + (cond |
| 94 | + (.startsWith s "\"\"" i) |
| 95 | + (throw |
| 96 | + (ex-info (format "Empty AST field component at character %d: %s" |
| 97 | + i (pr-str s)) |
| 98 | + {:kind ::invalid-field-component |
| 99 | + :field s |
| 100 | + :offset i})) |
| 101 | + (and matches? (find-at match-m i)) |
| 102 | + (recur (.end match-m) |
| 103 | + (conj result {:kind ::match-field-part |
| 104 | + :pattern (.group match-m 1)})) |
| 105 | + ;; We could handle indexing as an option in the field |
| 106 | + ;; regexes, but we don't for now, so that the code's |
| 107 | + ;; hopefully a bit easier to follow. |
| 108 | + (find-at qfield-m i) |
| 109 | + (recur (.end qfield-m) |
| 110 | + (conj result (index-or-name (.group qfield-m 1) |
| 111 | + indexes?))) |
| 112 | + (find-at field-m i) |
| 113 | + (recur (.end field-m) |
| 114 | + (conj result (index-or-name (.group field-m) |
| 115 | + indexes?))) |
| 116 | + ;; Probably currently unreachable |
| 117 | + :else (throw |
| 118 | + (ex-info (format "Don't recognize AST field component at character %d: %s" |
| 119 | + i (pr-str s)) |
| 120 | + {:kind ::invalid-field-component |
| 121 | + :field s |
| 122 | + :offset i}))))))))) |
| 123 | + |
| 124 | +(defn parse-field |
| 125 | + "Parses an AST field like \"certname\", \"facts.partition[3]\" and |
| 126 | + returns a vector of the field components as maps. The first |
| 127 | + component will always be a ::named-field-part." |
| 128 | + ([s] (parse-field s 0 {})) |
| 129 | + ([s offset opts] |
| 130 | + (assert (string? s)) |
| 131 | + (when (= offset (count s)) |
| 132 | + (throw (ex-info "Empty AST field" {:kind ::invalid-field :field s}))) |
| 133 | + (let [field-m (re-matcher top-field-rx s)] |
| 134 | + (when-not (find-at field-m offset) |
| 135 | + (throw |
| 136 | + (ex-info (str "First component of AST field is invalid: " (pr-str s)) |
| 137 | + {:kind ::invalid-field-component |
| 138 | + :field s |
| 139 | + :offset 0}))) |
| 140 | + ;; Q: OK to disallow an initial quoted-field? |
| 141 | + (let [result [{:kind ::named-field-part :name (.group field-m)}]] |
| 142 | + (parse-field-components s (.end field-m) opts result))))) |
| 143 | + |
| 144 | +(defn- quote-path-name-for-field-str [s] |
| 145 | + (when (re-matches #"(?s:.*\..*\\)" s) |
| 146 | + ;; Currently no way to represent a string including a dot that |
| 147 | + ;; ends in a backslash. |
| 148 | + (throw (ex-info (str "AST has no way to quote a path segment including a dot and ending in backslash: " |
| 149 | + (pr-str s)) |
| 150 | + {:kind ::unquotable-field-segment |
| 151 | + :name s}))) |
| 152 | + (if (str/index-of s \.) |
| 153 | + (str \" s \") |
| 154 | + s)) |
| 155 | + |
| 156 | +(defn path-names->field-str |
| 157 | + "Returns a properly quoted AST field string (dotted path) for the |
| 158 | + given names (only handles names, not ::indexed-field-part |
| 159 | + or ::match-field-part expressions). Throws an exception if any name |
| 160 | + cannot be quoted, since AST's current quoting syntax is |
| 161 | + incomplete (e.g. cannot represent a name that contains a dot and |
| 162 | + ends in backslash." |
| 163 | + [names] |
| 164 | + (str/join \. (map quote-path-name-for-field-str names))) |
| 165 | + |
| 166 | +(set! *warn-on-reflection* warn-on-reflection-orig) |
0 commit comments