diff --git a/project.clj b/project.clj index 7b62ea0..06910da 100755 --- a/project.clj +++ b/project.clj @@ -5,7 +5,7 @@ {"install-for-building-drivers" ["with-profile" "+install-for-building-drivers" "install"]} :dependencies - [[com.google.apis/google-api-services-bigquery "v2-rev20190917-1.30.3"]] + [[com.google.apis/google-api-services-bigquery "v2-rev20200523-1.30.9"]] :profiles {:provided @@ -18,7 +18,6 @@ {:auto-clean true :aot :all} - :uberjar {:auto-clean true :aot :all diff --git a/resources/metabase-plugin.yaml b/resources/metabase-plugin.yaml index 6c4856f..f7bb2bd 100755 --- a/resources/metabase-plugin.yaml +++ b/resources/metabase-plugin.yaml @@ -12,26 +12,18 @@ driver: - google - sql connection-properties: - - name: project-id - display-name: Project ID - placeholder: praxis-beacon-120871 - required: true - - name: client-id - display-name: Client ID - placeholder: 1201327674725-y6ferb0feo1hfssr7t40o4aikqll46d4.apps.googleusercontent.com - required: true - - name: client-secret - display-name: Client Secret - placeholder: dJNi4utWgMzyIFo2JbnsK6Np - required: true - - name: auth-code - display-name: Auth Code - placeholder: 4/HSk-KtxkSzTt61j5zcbee2Rmm5JHkRFbL5gD5lgkXek + - name: "service-account-json" + display-name: "Service account JSON file" + type: "textFile" required: true - name: use-jvm-timezone - display-name: Use JVM Time Zone + display-name: "Use JVM Time Zone" + type: boolean default: false + - name: "include-user-id-and-hash" + display-name: "Include User ID and query hash" type: boolean + default: true init: - step: load-namespace namespace: metabase.driver.bigquery-alt diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj old mode 100755 new mode 100644 index a72912e..81384d7 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -1,37 +1,37 @@ (ns metabase.driver.bigquery_alt - (:require [clojure - [set :as set] - [string :as str]] - [clojure.tools.logging :as log] - [metabase - [driver :as driver] - [util :as u]] - [metabase.driver.bigquery_alt - [common :as bigquery_alt.common] - [query-processor :as bigquery_alt.qp]] - [metabase.driver.google :as google] - [metabase.driver.sql.util.unprepare :as unprepare] - [metabase.query-processor - [store :as qp.store] - [error-type :as error-type] - [timezone :as qp.timezone] - [util :as qputil]] - [metabase.util.schema :as su] + (:require [clojure.set :as set] + [clojure.string :as str] + [clojure.tools.logging :as log] + [medley.core :as m] + [metabase.driver :as driver] + [metabase.driver.bigquery_alt.common :as bigquery_alt.common] + [metabase.driver.bigquery_alt.params :as bigquery_alt.params] + [metabase.driver.bigquery_alt.query-processor :as bigquery_alt.qp] + [metabase.driver.google :as google] + [metabase.query-processor.error-type :as error-type] + [metabase.query-processor.store :as qp.store] + [metabase.query-processor.timezone :as qp.timezone] + [metabase.query-processor.util :as qputil] + [metabase.util :as u] + [metabase.util.i18n :refer [tru]] + [metabase.util.schema :as su] [schema.core :as s]) (:import com.google.api.client.googleapis.auth.oauth2.GoogleCredential com.google.api.client.http.HttpRequestInitializer [com.google.api.services.bigquery Bigquery Bigquery$Builder BigqueryScopes] - [com.google.api.services.bigquery.model QueryRequest QueryResponse Table TableCell TableFieldSchema TableList TableList$Tables TableReference TableRow TableSchema] + [com.google.api.services.bigquery.model GetQueryResultsResponse QueryRequest QueryResponse Table TableCell TableFieldSchema TableList DatasetList DatasetList$Datasets DatasetReference TableList$Tables TableReference TableRow TableSchema] - java.util.Collections)) + java.util.Collections)) (driver/register! :bigquery_alt, :parent #{:google :sql}) + ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Client | ;;; +----------------------------------------------------------------------------------------------------------------+ + (defn- ^Bigquery credential->client [^GoogleCredential credential] (.build (doto (Bigquery$Builder. google/http-transport @@ -49,11 +49,15 @@ (def ^:private ^{:arglists '([database])} ^Bigquery database->client (comp credential->client database->credential)) +(defn find-project-id + "Select the user specified project-id or the one from the credential, in the case of a service account" + [project-id ^GoogleCredential credential] + (or project-id + (.getServiceAccountProjectId credential))) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Sync | ;;; +----------------------------------------------------------------------------------------------------------------+ - (defn- ^TableList list-tables "Fetch a page of Tables. By default, fetches the first page; page size is 50. For cases when more than 50 Tables are present, you may fetch subsequent pages by specifying the `page-token`; the token for the next page is returned with a @@ -62,48 +66,57 @@ (list-tables database nil)) ([{{:keys [project-id]} :details, :as database}, ^String dataset-id] - (list-tables (database->client database) project-id dataset-id nil)) + (let [db-client (database->client database) + project-id (find-project-id project-id (database->credential database))] + (list-tables (database->client database) project-id dataset-id nil))) ([{{:keys [project-id]} :details, :as database}, ^String dataset-id, ^String page-token-or-nil] - (list-tables (database->client database) project-id dataset-id page-token-or-nil)) + (list-tables (database->client database) (find-project-id project-id (database->credential database)) dataset-id page-token-or-nil)) ([^Bigquery client, ^String project-id, ^String dataset-id, ^String page-token-or-nil] {:pre [client (seq project-id) (seq dataset-id)]} (google/execute (u/prog1 (.list (.tables client) project-id dataset-id) - (.setPageToken <> page-token-or-nil))))) - + (.setPageToken <> page-token-or-nil))))) (defn- ^DatasetList list-datasets - ([{{:keys [project-id]} :details, :as database}] - {:pre [database (seq project-id)]} - (google/execute (u/prog1 (.list (.datasets (database->client database)) project-id))))) + ([database] + (list-datasets database nil)) + + ([{{:keys [project-id]} :details, :as database}, ^String page-token-or-nil] + (list-datasets (database->client database) (find-project-id project-id (database->credential database)) page-token-or-nil)) + ([^Bigquery client, ^String project-id, ^String page-token-or-nil] + {:pre [client (seq project-id)]} + (google/execute (u/prog1 (.list (.datasets client) project-id) + (.setPageToken <> page-token-or-nil))))) (defmethod driver/describe-database :bigquery_alt [_ database] ;; first page through all the 50-table pages until we stop getting "next page tokens" - (let [datasets (loop [datasets [], ^DatasetList dataset-list (list-datasets database)] - (let [datasets (concat datasets (.getDatasets dataset-list))] - (for [^DatasetList$Datasets dataset datasets - :let [^DatasetReference datasetref (.getDatasetReference dataset)]] - (.getDatasetId datasetref))))] + (let [datasets (loop [datasets [], ^DatasetList dataset-list (list-datasets database)] + (let [datasets (concat datasets (loop [bq_datasets [], ^DatasetList dataset-list (list-datasets database)] + (let [bq_datasets (concat bq_datasets (.getDatasets dataset-list))] + (if-let [next-page-token (.getNextPageToken dataset-list)] + (recur bq_datasets (list-datasets database next-page-token)) + bq_datasets))))] + (for [^DatasetList$Datasets dataset datasets + :let [^DatasetReference datasetref (.getDatasetReference dataset)]] + (.getDatasetId datasetref))))] (let [tables (loop [tables [], dataset_id (first datasets), dataset_list (next datasets)] (let [tables (concat tables (loop [dataset_tables [], ^TableList table-list (list-tables database dataset_id)] - (let [dataset_tables (concat dataset_tables (.getTables table-list))] - (if-let [next-page-token (.getNextPageToken table-list)] - (recur dataset_tables (list-tables database dataset_id next-page-token)) - dataset_tables))))] - (if (and (seq dataset_list)) - (recur tables (first dataset_list) (next dataset_list)) - tables)) - )] + (let [dataset_tables (concat dataset_tables (.getTables table-list))] + (if-let [next-page-token (.getNextPageToken table-list)] + (recur dataset_tables (list-tables database dataset_id next-page-token)) + dataset_tables))))] + (if (and (seq dataset_list)) + (recur tables (first dataset_list) (next dataset_list)) + tables)))] ;; after that convert the results to MB format - {:tables (set (for [^TableList$Tables table tables - :let [^TableReference tableref (.getTableReference table)]] - {:schema (.getDatasetId tableref), :name (.getTableId tableref)}))}))) - + {:tables (set (for [^TableList$Tables table tables + :let [^TableReference tableref (.getTableReference table)]] + {:schema (.getDatasetId tableref), :name (.getTableId tableref)}))}))) (defmethod driver/can-connect? :bigquery_alt [_ details-map] @@ -113,7 +126,7 @@ (s/defn get-table :- Table ([{{:keys [project-id]} :details, :as database} dataset-id table-id] - (get-table (database->client database) project-id dataset-id table-id)) + (get-table (database->client database) (find-project-id project-id (database->credential database)) dataset-id table-id)) ([client :- Bigquery, project-id :- su/NonBlankString, dataset-id :- su/NonBlankString, table-id :- su/NonBlankString] (google/execute (.get (.tables client) project-id dataset-id table-id)))) @@ -134,33 +147,52 @@ (s/defn ^:private table-schema->metabase-field-info [schema :- TableSchema] - (for [^TableFieldSchema field (.getFields schema)] - {:name (.getName field) - :database-type (.getType field) - :base-type (bigquery-type->base-type (.getType field))})) + (for [[idx ^TableFieldSchema field] (m/indexed (.getFields schema))] + {:name (.getName field) + :database-type (.getType field) + :base-type (bigquery-type->base-type (.getType field)) + :database-position idx})) (defmethod driver/describe-table :bigquery_alt [_ database {dataset-id :schema, table-name :name}] - {:schema dataset-id + {:schema dataset-id :name table-name :fields (set (table-schema->metabase-field-info (.getSchema (get-table database dataset-id table-name))))}) - ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Running Queries | ;;; +----------------------------------------------------------------------------------------------------------------+ (def ^:private ^:const ^Integer query-timeout-seconds 60) +(def ^:private ^:dynamic ^Long max-results-per-page + "Maximum number of rows to return per page in a query." + 20000) + +(def ^:private ^:dynamic page-callback + "Callback to execute when a new page is retrieved, used for testing" + nil) + +(defprotocol GetJobComplete + "A Clojure protocol for the .getJobComplete method on disparate Google BigQuery results" + (get-job-complete [this] "Call .getJobComplete on a BigQuery API response")) + +(extend-protocol GetJobComplete + com.google.api.services.bigquery.model.QueryResponse + (get-job-complete [this] (.getJobComplete ^QueryResponse this)) + + com.google.api.services.bigquery.model.GetQueryResultsResponse + (get-job-complete [this] (.getJobComplete ^GetQueryResultsResponse this))) + (defn do-with-finished-response "Impl for `with-finished-response`." {:style/indent 1} - [^QueryResponse response, f] + [response f] ;; 99% of the time by the time this is called `.getJobComplete` will return `true`. On the off chance it doesn't, ;; wait a few seconds for the job to finish. (loop [remaining-timeout (double query-timeout-seconds)] (cond - (.getJobComplete response) + (get-job-complete response) (f response) (pos? remaining-timeout) @@ -180,13 +212,50 @@ [[response-binding response] & body] `(do-with-finished-response ~response - (fn [~(vary-meta response-binding assoc :tag 'com.google.api.services.bigquery.model.QueryResponse)] + (fn [~response-binding] ~@body))) +(defn- ^GetQueryResultsResponse get-query-results + [^Bigquery client ^String project-id ^String job-id ^String location ^String page-token] + (when page-callback + (page-callback)) + (let [request (doto (.getQueryResults (.jobs client) project-id job-id) + (.setMaxResults max-results-per-page) + (.setPageToken page-token) + (.setLocation location))] + (google/execute request))) + +(defn- ^GetQueryResultsResponse execute-bigquery + ([{{:keys [project-id]} :details, :as database} sql parameters] + (execute-bigquery (database->client database) (find-project-id project-id (database->credential database)) sql parameters)) + + ([^Bigquery client ^String project-id ^String sql parameters] + {:pre [client (seq project-id) (seq sql)]} + (try + (let [request (doto (QueryRequest.) + (.setTimeoutMs (* query-timeout-seconds 1000)) + ;; if the query contains a `#legacySQL` directive then use legacy SQL instead of standard SQL + (.setUseLegacySql (str/includes? (str/lower-case sql) "#legacysql")) + (.setQuery sql) + (bigquery_alt.params/set-parameters! parameters)) + query-response ^QueryResponse (google/execute (.query (.jobs client) project-id request)) + job-ref (.getJobReference query-response) + location (.getLocation job-ref) + job-id (.getJobId job-ref) + proj-id (.getProjectId job-ref)] + (with-finished-response [_ query-response] + (get-query-results client proj-id job-id location nil))) + (catch Throwable e + (throw (ex-info (tru "Error executing query") + {:type error-type/invalid-query, :sql sql, :parameters parameters})))))) + (defn- post-process-native - "Parse results of a BigQuery query." - [^QueryResponse resp] - (with-finished-response [response resp] + "Parse results of a BigQuery query. `respond` is the same function passed to + `metabase.driver/execute-reducible-query`, and has the signature + + (respond results-metadata rows)" + [database respond ^GetQueryResultsResponse resp] + (with-finished-response [^GetQueryResultsResponse response resp] (let [^TableSchema schema (.getSchema response) @@ -194,44 +263,41 @@ (doall (for [^TableFieldSchema field (.getFields schema) :let [column-type (.getType field) + column-mode (.getMode field) method (get-method bigquery_alt.qp/parse-result-of-type column-type)]] - (partial method column-type bigquery_alt.common/*bigquery-timezone-id*))) + (partial method column-type column-mode bigquery_alt.common/*bigquery-timezone-id*))) columns (for [column (table-schema->metabase-field-info schema)] (-> column (set/rename-keys {:base-type :base_type}) - (dissoc :database-type)))] - {:columns (map (comp u/qualified-name :name) columns) - :cols columns - :rows (for [^TableRow row (.getRows response)] - (for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))] - (when-let [v (.getV cell)] - ;; There is a weird error where everything that *should* be NULL comes back as an Object. - ;; See https://jira.talendforge.org/browse/TBD-1592 - ;; Everything else comes back as a String luckily so we can proceed normally. - (when-not (= (class v) Object) - (parser v)))))}))) - -(defn- ^QueryResponse execute-bigquery - ([{{:keys [project-id]} :details, :as database} query-string] - (execute-bigquery (database->client database) project-id query-string)) - - ([^Bigquery client, ^String project-id, ^String query-string] - {:pre [client (seq project-id) (seq query-string)]} - (let [request (doto (QueryRequest.) - (.setTimeoutMs (* query-timeout-seconds 1000)) - ;; if the query contains a `#legacySQL` directive then use legacy SQL instead of standard SQL - (.setUseLegacySql (str/includes? (str/lower-case query-string) "#legacysql")) - (.setQuery query-string))] - (google/execute (.query (.jobs client) project-id request))))) - -(defn- process-native* [database query-string] + (dissoc :database-type :database-position)))] + (respond + {:cols columns} + (letfn [(fetch-page [^GetQueryResultsResponse response] + (lazy-cat + (.getRows response) + (when-let [next-page-token (.getPageToken response)] + (with-finished-response [next-resp (get-query-results (database->client database) + (.getProjectId (.getJobReference response)) + (.getJobId (.getJobReference response)) + next-page-token)] + (fetch-page next-resp)))))] + (for [^TableRow row (fetch-page response)] + (for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))] + (when-let [v (.getV cell)] + ;; There is a weird error where everything that *should* be NULL comes back as an Object. + ;; See https://jira.talendforge.org/browse/TBD-1592 + ;; Everything else comes back as a String luckily so we can proceed normally. + (when-not (= (class v) Object) + (parser v)))))))))) + +(defn- process-native* [respond database sql parameters] {:pre [(map? database) (map? (:details database))]} ;; automatically retry the query if it times out or otherwise fails. This is on top of the auto-retry added by ;; `execute` (letfn [(thunk [] - (post-process-native (execute-bigquery database query-string)))] + (post-process-native database respond (execute-bigquery database sql parameters)))] (try (thunk) (catch Throwable e @@ -243,26 +309,33 @@ (qp.timezone/system-timezone-id) "UTC")) - -(defmethod driver/execute-query :bigquery_alt - [driver {{sql :query, params :params, :keys [table-name mbql?]} :native, :as outer-query}] +(defmethod driver/execute-reducible-query :bigquery_alt + ;; TODO - it doesn't actually cancel queries the way we'd expect + [_ {{sql :query, :keys [params]} :native, :as outer-query} _ respond] (let [database (qp.store/database)] (binding [bigquery_alt.common/*bigquery-timezone-id* (effective-query-timezone-id database)] (log/tracef "Running BigQuery query in %s timezone" bigquery_alt.common/*bigquery-timezone-id*) - (let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params) - (unprepare/unprepare driver (cons sql params)) - sql))] - (process-native* database sql))))) + (let [sql (if (get-in database [:details :include-user-id-and-hash] true) + (str "-- " (qputil/query->remark :bigquery outer-query) "\n" sql) + sql)] + (process-native* respond database sql params))))) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Other Driver Method Impls | ;;; +----------------------------------------------------------------------------------------------------------------+ -(defmethod driver/supports? [:bigquery_alt :expressions] [_ _] false) + +(defmethod driver/supports? [:bigquery_alt :percentile-aggregations] [_ _] false) + +(defmethod driver/supports? [:bigquery_alt :expressions] [_ _] true) (defmethod driver/supports? [:bigquery_alt :foreign-keys] [_ _] true) ;; BigQuery is always in UTC (defmethod driver/db-default-timezone :bigquery_alt [_ _] "UTC") + +(defmethod driver/db-start-of-week :bigquery_alt + [_] + :sunday) diff --git a/src/metabase/driver/bigquery_alt/common.clj b/src/metabase/driver/bigquery_alt/common.clj old mode 100755 new mode 100644 diff --git a/src/metabase/driver/bigquery_alt/params.clj b/src/metabase/driver/bigquery_alt/params.clj new file mode 100644 index 0000000..58e6200 --- /dev/null +++ b/src/metabase/driver/bigquery_alt/params.clj @@ -0,0 +1,77 @@ +(ns metabase.driver.bigquery_alt.params + (:require [clojure.tools.logging :as log] + [java-time :as t] + [metabase.util.date-2 :as u.date]) + (:import [com.google.api.services.bigquery.model QueryParameter QueryParameterType QueryParameterValue QueryRequest])) + +(defn- param-type ^QueryParameterType [^String type-name] + (doto (QueryParameterType.) + (.setType type-name))) + +(defn- param-value ^QueryParameterValue [v] + (doto (QueryParameterValue.) + (.setValue (str v)))) + +(defn- param [type-name v] + (doto (QueryParameter.) + (.setParameterType (param-type type-name)) + (.setParameterValue (param-value v)))) + +(defmulti ^:private ->QueryParameter + {:arglists '(^QueryParameter [v])} + class) + +(defmethod ->QueryParameter :default + [v] + (param "STRING" v)) + +;; See https://cloud.google.com/spanner/docs/data-types for type mappings + +;; `nil` still has to be given a type (this determines the type it comes back as in cases like `["SELECT ?" nil]`) -- +;; AFAIK this only affects native queries because `NULL` is usually spliced into the compiled SQL directly in MBQL +;; queries. Unfortunately we don't know the actual type we should set here so `STRING` is going to have to do for now. +;; This shouldn't really matter anyways since `WHERE field = NULL` generally doesn't work (we have to do `WHERE FIELD +;; IS NULL` instead) +(defmethod ->QueryParameter nil + [_] + (doto (QueryParameter.) + (.setParameterType (param-type "STRING")) + (.setParameterValue (doto (QueryParameterValue.) + (.setValue nil))))) + +(defmethod ->QueryParameter String [v] (param "STRING" v)) +(defmethod ->QueryParameter Boolean [v] (param "BOOL" v)) +(defmethod ->QueryParameter Integer [v] (param "INT64" v)) +(defmethod ->QueryParameter Long [v] (param "INT64" v)) +(defmethod ->QueryParameter Short [v] (param "INT64" v)) +(defmethod ->QueryParameter Byte [v] (param "INT64" v)) +(defmethod ->QueryParameter clojure.lang.BigInt [v] (param "INT64" v)) +(defmethod ->QueryParameter Float [v] (param "FLOAT64" v)) +(defmethod ->QueryParameter Double [v] (param "FLOAT64" v)) +(defmethod ->QueryParameter java.math.BigDecimal [v] (param "FLOAT64" v)) + +(defmethod ->QueryParameter java.time.LocalDate [t] (param "DATE" (u.date/format t))) +(defmethod ->QueryParameter java.time.LocalDateTime [t] (param "DATETIME" (u.date/format t))) +(defmethod ->QueryParameter java.time.LocalTime [t] (param "TIME" (u.date/format t))) +(defmethod ->QueryParameter java.time.OffsetTime [t] (param "TIME" (u.date/format + (t/local-time + (t/with-offset-same-instant t (t/zone-offset 0)))))) +(defmethod ->QueryParameter java.time.OffsetDateTime [t] (param "TIMESTAMP" (u.date/format t))) +(defmethod ->QueryParameter java.time.ZonedDateTime [t] (param "TIMESTAMP" (u.date/format + (t/offset-date-time + (t/with-zone-same-instant t (t/zone-id "UTC")))))) + +(defn- query-parameter ^QueryParameter [value] + (let [param (->QueryParameter value)] + (log/tracef "Set parameter ^%s %s -> %s" (some-> value class .getCanonicalName) (pr-str value) (pr-str param)) + param)) + +(defn set-parameters! + "Set the `parameters` (i.e., values for `?` positional placeholders in the SQL) for a `query` request. Equivalent to + JDBC `.setObject()` and the like." + ^QueryRequest [^QueryRequest query parameters] + (if (seq parameters) + (doto query + (.setParameterMode "POSITIONAL") + (.setQueryParameters (apply list (map query-parameter parameters)))) + query)) diff --git a/src/metabase/driver/bigquery_alt/query_processor.clj b/src/metabase/driver/bigquery_alt/query_processor.clj old mode 100755 new mode 100644 index 1247c03..68104c1 --- a/src/metabase/driver/bigquery_alt/query_processor.clj +++ b/src/metabase/driver/bigquery_alt/query_processor.clj @@ -1,30 +1,25 @@ (ns metabase.driver.bigquery_alt.query-processor (:require [clojure.string :as str] [clojure.tools.logging :as log] - [honeysql - [core :as hsql] - [format :as hformat] - [helpers :as h]] + [honeysql.core :as hsql] + [honeysql.format :as hformat] + [honeysql.helpers :as h] [java-time :as t] - [metabase - [driver :as driver] - [util :as u]] + [metabase.driver :as driver] [metabase.driver.sql :as sql] [metabase.driver.sql.parameters.substitution :as sql.params.substitution] [metabase.driver.sql.query-processor :as sql.qp] [metabase.driver.sql.util.unprepare :as unprepare] [metabase.mbql.util :as mbql.u] - [metabase.models - [field :refer [Field]] - [table :as table]] - [metabase.query-processor - [error-type :as error-type] - [store :as qp.store]] - [metabase.util - [date-2 :as u.date] - [honeysql-extensions :as hx] - [i18n :refer [tru]] - [schema :as su]] + [metabase.models.field :refer [Field]] + [metabase.models.setting :as setting] + [metabase.models.table :as table] + [metabase.query-processor.error-type :as error-type] + [metabase.query-processor.store :as qp.store] + [metabase.util :as u] + [metabase.util.date-2 :as u.date] + [metabase.util.honeysql-extensions :as hx] + [metabase.util.i18n :refer [tru]] [schema.core :as s] [toucan.db :as db]) (:import [java.time LocalDate LocalDateTime LocalTime OffsetDateTime OffsetTime ZonedDateTime] @@ -55,28 +50,38 @@ (defmulti parse-result-of-type "Parse the values that come back in results of a BigQuery query based on their column type." - {:arglists '([column-type timezone-id v])} - (fn [column-type _ _] column-type)) + {:arglists '([column-type column-mode timezone-id v])} + (fn [column-type _ _ _] column-type)) + +(defn- parse-value + [column-mode v parse-fn] + ;; For results from a query like `SELECT [1,2]`, BigQuery sets the column-mode to `REPEATED` and wraps the column in an ArrayList, + ;; with ArrayMap entries, like: `ArrayList(ArrayMap("v", 1), ArrayMap("v", 2))` + (if (= "REPEATED" column-mode) + (for [result v + ^java.util.Map$Entry entry result] + (parse-fn (.getValue entry))) + (parse-fn v))) (defmethod parse-result-of-type :default - [_ _ v] - v) + [_ column-mode _ v] + (parse-value column-mode v identity)) (defmethod parse-result-of-type "BOOLEAN" - [_ _ v] - (Boolean/parseBoolean v)) + [_ column-mode _ v] + (parse-value column-mode v #(Boolean/parseBoolean %))) (defmethod parse-result-of-type "FLOAT" - [_ _ v] - (Double/parseDouble v)) + [_ column-mode _ v] + (parse-value column-mode v #(Double/parseDouble %))) (defmethod parse-result-of-type "INTEGER" - [_ _ v] - (Long/parseLong v)) + [_ column-mode _ v] + (parse-value column-mode v #(Long/parseLong %))) (defmethod parse-result-of-type "NUMERIC" - [_ _ v] - (bigdec v)) + [_ column-mode _ v] + (parse-value column-mode v bigdec)) (defn- parse-timestamp-str [timezone-id s] ;; Timestamp strings either come back as ISO-8601 strings or Unix timestamps in µs, e.g. "1.3963104E9" @@ -86,26 +91,27 @@ (u.date/parse s timezone-id))) (defmethod parse-result-of-type "DATE" - [_ timezone-id s] - (parse-timestamp-str timezone-id s)) + [_ column-mode timezone-id v] + (parse-value column-mode v (partial parse-timestamp-str timezone-id))) (defmethod parse-result-of-type "DATETIME" - [_ timezone-id s] - (parse-timestamp-str timezone-id s)) + [_ column-mode timezone-id v] + (parse-value column-mode v (partial parse-timestamp-str timezone-id))) (defmethod parse-result-of-type "TIMESTAMP" - [_ timezone-id s] - (parse-timestamp-str timezone-id s)) + [_ column-mode timezone-id v] + (parse-value column-mode v (partial parse-timestamp-str timezone-id))) (defmethod parse-result-of-type "TIME" - [_ timezone-id s] - (u.date/parse s timezone-id)) + [_ column-mode timezone-id v] + (parse-value column-mode v (fn [v] (u.date/parse v timezone-id)))) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | SQL Driver Methods | ;;; +----------------------------------------------------------------------------------------------------------------+ + (def ^:private temporal-type-hierarchy (-> (make-hierarchy) (derive :date :temporal-type) @@ -166,10 +172,10 @@ (if (contains? (meta x) :bigquery_alt/temporal-type) (:bigquery_alt/temporal-type (meta x)) (mbql.u/match-one x - [:field-id id] (temporal-type (qp.store/field id)) - [:field-literal _ base-type] (base-type->temporal-type base-type)))) + [:field-id id] (temporal-type (qp.store/field id)) + [:field-literal _ base-type] (base-type->temporal-type base-type)))) -(defn- with-temporal-type [x new-type] +(defn- with-temporal-type {:style/indent 0} [x new-type] (if (= (temporal-type x) new-type) x (vary-meta x assoc :bigquery_alt/temporal-type new-type))) @@ -183,7 +189,7 @@ (defn- throw-unsupported-conversion [from to] (throw (ex-info (tru "Cannot convert a {0} to a {1}" from to) - {:type error-type/invalid-query}))) + {:type error-type/invalid-query}))) (defmethod ->temporal-type [:date LocalTime] [_ t] (throw-unsupported-conversion "time" "date")) (defmethod ->temporal-type [:date OffsetTime] [_ t] (throw-unsupported-conversion "time" "date")) @@ -241,7 +247,7 @@ bigquery-type (do (log/tracef "Coercing %s (temporal type = %s) to %s" (binding [*print-meta* true] (pr-str x)) (pr-str (temporal-type x)) bigquery-type) - (with-temporal-type (hx/cast bigquery-type (sql.qp/->honeysql :bigquery x)) target-type)) + (with-temporal-type (hx/cast bigquery-type (sql.qp/->honeysql :bigquery_alt x)) target-type)) :else x)))) @@ -250,7 +256,6 @@ [target-type [_ t unit]] [:absolute-datetime (->temporal-type target-type t) unit]) - (def ^:private temporal-type->supported-units {:timestamp #{:microsecond :millisecond :second :minute :hour :day} :datetime #{:microsecond :millisecond :second :minute :hour :day :week :month :quarter :year} @@ -287,44 +292,105 @@ [target-type trunc-form] (map->TruncForm (update trunc-form :hsql-form (partial ->temporal-type target-type)))) - (defn- trunc "Generate a SQL call an appropriate truncation function, depending on the temporal type of `expr`." [unit hsql-form] (TruncForm. hsql-form unit)) +(def ^:private valid-date-extract-units + #{:dayofweek :day :dayofyear :week :isoweek :month :quarter :year :isoyear}) + +(def ^:private valid-time-extract-units + #{:microsecond :millisecond :second :minute :hour}) (defn- extract [unit expr] - (with-temporal-type (hsql/call :extract unit (->temporal-type :timestamp expr)) nil)) + (condp = (temporal-type expr) + :time + (do + (assert (valid-time-extract-units unit) + (tru "Cannot extract {0} from a TIME field" unit)) + (recur unit (with-temporal-type (hsql/call :timestamp (hsql/call :datetime "1970-01-01" expr)) + :timestamp))) + + ;; timestamp and date both support extract() + :date + (do + (assert (valid-date-extract-units unit) + (tru "Cannot extract {0} from a DATE field" unit)) + (with-temporal-type (hsql/call :extract unit expr) nil)) + + :timestamp + (do + (assert (or (valid-date-extract-units unit) + (valid-time-extract-units unit)) + (tru "Cannot extract {0} from a DATETIME or TIMESTAMP" unit)) + (with-temporal-type (hsql/call :extract unit expr) nil)) + + ;; for datetimes or anything without a known temporal type, cast to timestamp and go from there + (recur unit (->temporal-type :timestamp expr)))) (defmethod sql.qp/date [:bigquery_alt :minute] [_ _ expr] (trunc :minute expr)) (defmethod sql.qp/date [:bigquery_alt :minute-of-hour] [_ _ expr] (extract :minute expr)) (defmethod sql.qp/date [:bigquery_alt :hour] [_ _ expr] (trunc :hour expr)) (defmethod sql.qp/date [:bigquery_alt :hour-of-day] [_ _ expr] (extract :hour expr)) (defmethod sql.qp/date [:bigquery_alt :day] [_ _ expr] (trunc :day expr)) -(defmethod sql.qp/date [:bigquery_alt :day-of-week] [_ _ expr] (extract :dayofweek expr)) (defmethod sql.qp/date [:bigquery_alt :day-of-month] [_ _ expr] (extract :day expr)) (defmethod sql.qp/date [:bigquery_alt :day-of-year] [_ _ expr] (extract :dayofyear expr)) -(defmethod sql.qp/date [:bigquery_alt :week] [_ _ expr] (trunc :week expr)) -;; ; BigQuery's impl of `week` uses 0 for the first week; we use 1 -(defmethod sql.qp/date [:bigquery_alt :week-of-year] [_ _ expr] (-> (extract :week expr) hx/inc)) (defmethod sql.qp/date [:bigquery_alt :month] [_ _ expr] (trunc :month expr)) (defmethod sql.qp/date [:bigquery_alt :month-of-year] [_ _ expr] (extract :month expr)) (defmethod sql.qp/date [:bigquery_alt :quarter] [_ _ expr] (trunc :quarter expr)) (defmethod sql.qp/date [:bigquery_alt :quarter-of-year] [_ _ expr] (extract :quarter expr)) (defmethod sql.qp/date [:bigquery_alt :year] [_ _ expr] (trunc :year expr)) +(defmethod sql.qp/date [:bigquery_alt :day-of-week] + [_ _ expr] + (sql.qp/adjust-day-of-week :bigquery_alt (extract :dayofweek expr))) + +(defmethod sql.qp/date [:bigquery_alt :week] + [_ _ expr] + (trunc (keyword (format "week(%s)" (name (setting/get-keyword :start-of-week)))) expr)) + (doseq [[unix-timestamp-type bigquery-fn] {:seconds :timestamp_seconds - :milliseconds :timestamp_millis}] - (defmethod sql.qp/unix-timestamp->timestamp [:bigquery_alt unix-timestamp-type] + :milliseconds :timestamp_millis + :microseconds :timestamp_micros}] + (defmethod sql.qp/unix-timestamp->honeysql [:bigquery_alt unix-timestamp-type] [_ _ expr] - (vary-meta (hsql/call bigquery-fn expr) assoc :bigquery_alt/temporal-type :timestamp))) + (with-temporal-type (hsql/call bigquery-fn expr) :timestamp))) + +(defmethod sql.qp/->float :bigquery_alt + [_ value] + (hx/cast :float64 value)) + +(defmethod sql.qp/->honeysql [:bigquery_alt :regex-match-first] + [driver [_ arg pattern]] + (hsql/call :regexp_extract (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver pattern))) + +(defn- percentile->quantile + [x] + (loop [x (double x) + power (int 0)] + (if (zero? (- x (Math/floor x))) + [(Math/round x) (Math/round (Math/pow 10 power))] + (recur (* 10 x) (inc power))))) + +(defmethod sql.qp/->honeysql [:bigquery_alt :percentile] + [driver [_ arg p]] + (let [[offset quantiles] (percentile->quantile p)] + (hsql/raw (format "APPROX_QUANTILES(%s, %s)[OFFSET(%s)]" + (hformat/to-sql (sql.qp/->honeysql driver arg)) + quantiles + offset)))) + +(defmethod sql.qp/->honeysql [:bigquery_alt :median] + [driver [_ arg]] + (sql.qp/->honeysql driver [:percentile arg 0.5])) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Query Processor | ;;; +----------------------------------------------------------------------------------------------------------------+ + (defn- should-qualify-identifier? "Should we qualify an Identifier with the dataset name? @@ -348,17 +414,15 @@ ;; Only qualify Field identifiers that are qualified by a Table. (e.g. don't qualify stuff inside `CREATE TABLE` ;; DDL statements) (and (= identifier-type :field) - (>= (count components) 2)) + (>= (count components) 2)) true)) - (defmethod sql.qp/->honeysql [:bigquery_alt (class Field)] [driver field] (let [parent-method (get-method sql.qp/->honeysql [:sql (class Field)]) identifier (parent-method driver field)] (with-temporal-type identifier (temporal-type field)))) - (defmethod sql.qp/->honeysql [:bigquery_alt Identifier] [_ {:keys [identifier-type components], :as identifier}] (cond-> identifier @@ -367,11 +431,9 @@ (cons table more))) (and (= identifier-type :field) - (> (count components) 2)) + (> (count components) 2)) (update :components (fn [[table & more]] - more)) - )) - + more)))) (doseq [clause-type [:datetime-field :field-literal :field-id]] (defmethod sql.qp/->honeysql [:bigquery_alt clause-type] @@ -379,21 +441,13 @@ (let [hsql-form ((get-method sql.qp/->honeysql [:sql clause-type]) driver clause)] (with-temporal-type hsql-form (temporal-type clause))))) -(defmethod sql.qp/->honeysql [:bigquery :relative-datetime] +(defmethod sql.qp/->honeysql [:bigquery_alt :relative-datetime] [driver clause] ;; wrap the parent method, converting the result if `clause` itself is typed (let [t (temporal-type clause)] (cond->> ((get-method sql.qp/->honeysql [:sql :relative-datetime]) driver clause) t (->temporal-type t)))) - -(s/defn ^:private honeysql-form->sql :- s/Str - [driver, honeysql-form :- su/Map] - (let [[sql & args :as sql+args] (sql.qp/format-honeysql driver honeysql-form)] - (if (seq args) - (unprepare/unprepare driver sql+args) - sql))) - ;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be ;; at most 128 characters long. (defmethod driver/format-custom-field-name :bigquery_alt @@ -403,22 +457,17 @@ (str/replace #"(^\d)" "_$1"))] (subs replaced-str 0 (min 128 (count replaced-str))))) -;; These provide implementations of `->honeysql` that prevent HoneySQL from converting forms to prepared statement -;; parameters (`?` symbols) -(defmethod sql.qp/->honeysql [:bigquery_alt String] - [_ s] - (hx/literal s)) - -(defmethod sql.qp/->honeysql [:bigquery_alt Boolean] - [_ bool] - (hsql/raw (if bool "TRUE" "FALSE"))) - ;; See: ;; -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/timestamp_functions -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/time_functions -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/date_functions -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/datetime_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/time_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/datetime_functions + +(defmethod unprepare/unprepare-value [:bigquery_alt String] + [_ s] + ;; escape single-quotes like Cam's String -> Cam\'s String + (str \' (str/replace s "'" "\\\\'") \')) (defmethod unprepare/unprepare-value [:bigquery_alt LocalTime] [_ t] @@ -446,24 +495,29 @@ [_ t] (format "timestamp \"%s %s\"" (u.date/format-sql (t/local-date-time t)) (.getId (t/zone-id t)))) - (defmethod sql.qp/field->identifier :bigquery_alt [_ {table-id :table_id, field-name :name, :as field}] ;; TODO - Making a DB call for each field to fetch its Table is inefficient and makes me cry, but this method is ;; currently only used for SQL params so it's not a huge deal at this point ;; ;; TODO - we should make sure these are in the QP store somewhere and then could at least batch the calls - (let [table-name (db/select-one-field :name table/Table :id (u/get-id table-id))] + (let [table-name (db/select-one-field :name table/Table :id (u/the-id table-id))] (with-temporal-type (hx/identifier :field table-name field-name) (temporal-type field)))) (defmethod sql.qp/apply-top-level-clause [:bigquery_alt :breakout] - [driver _ honeysql-form {breakouts :breakout, fields :fields}] + [driver _ honeysql-form {breakouts :breakout, fields :fields, :as query}] (-> honeysql-form ;; Group by all the breakout fields. ;; ;; Unlike other SQL drivers, BigQuery requires that we refer to Fields using the alias we gave them in the ;; `SELECT` clause, rather than repeating their definitions. - ((partial apply h/group) (map (partial sql.qp/field-clause->alias driver) breakouts)) + ((partial apply h/group) (for [breakout breakouts + :let [alias (or (sql.qp/field-clause->alias driver breakout) + (throw (ex-info (tru "Error compiling SQL: breakout does not have an alias") + {:type error-type/qp + :breakout breakout + :query query})))]] + alias)) ;; Add fields form only for fields that weren't specified in :fields clause -- we don't want to include it ;; twice, or HoneySQL will barf ((partial apply h/merge-select) (for [field-clause breakouts @@ -490,8 +544,8 @@ (u/prog1 (into [clause-type] (map (partial ->temporal-type target-type) (cons f args))) - (when (not= [clause (meta clause)] [<> (meta <>)]) - (log/tracef "Coerced -> %s" (binding [*print-meta* true] (pr-str <>)))))) + (when (not= [clause (meta clause)] [<> (meta <>)]) + (log/tracef "Coerced -> %s" (binding [*print-meta* true] (pr-str <>)))))) clause)) (doseq [filter-type [:between := :!= :> :>= :< :<=]] @@ -506,6 +560,7 @@ ;;; | Other Driver / SQLDriver Method Implementations | ;;; +----------------------------------------------------------------------------------------------------------------+ + (defn- interval [amount unit] (hsql/raw (format "INTERVAL %d %s" (int amount) (name unit)))) @@ -515,7 +570,7 @@ ;; the first place (throw (ex-info (tru "Invalid query: you cannot add a {0} to a {1} column." (name unit) (name t-type)) - {:type error-type/invalid-query})))) + {:type error-type/invalid-query})))) ;; We can coerce the HoneySQL form this wraps to whatever we want and generate the appropriate SQL. ;; Thus for something like filtering against a relative datetime @@ -549,7 +604,7 @@ (let [current-type (temporal-type (:hsql-form add-interval-form))] (when (#{[:date :time] [:time :date]} [current-type target-type]) (throw (ex-info (tru "It doesn''t make sense to convert between DATEs and TIMEs!") - {:type error-type/invalid-query})))) + {:type error-type/invalid-query})))) (map->AddIntervalForm (update add-interval-form :hsql-form (partial ->temporal-type target-type)))) (defmethod sql.qp/add-interval-honeysql-form :bigquery_alt @@ -565,13 +620,15 @@ {table-name :name} (some-> source-table-id qp.store/table)] (assert (seq dataset-id)) (binding [sql.qp/*query* (assoc outer-query :dataset-id dataset-id)] - {:query (->> outer-query - (sql.qp/build-honeysql-form driver) - (honeysql-form->sql driver)) - :table-name (or table-name - (when source-query - sql.qp/source-query-alias)) - :mbql? true}))) + (let [[sql & params] (->> outer-query + (sql.qp/mbql->honeysql driver) + (sql.qp/format-honeysql driver))] + {:query sql + :params params + :table-name (or table-name + (when source-query + sql.qp/source-query-alias)) + :mbql? true})))) (defrecord ^:private CurrentMomentForm [t] hformat/ToSql @@ -604,7 +661,7 @@ [driver t] (sql/->prepared-substitution driver (t/offset-date-time t (t/local-time 0) (t/zone-offset 0)))) -(defmethod sql.params.substitution/->replacement-snippet-info [:bigquery FieldFilter] +(defmethod sql.params.substitution/->replacement-snippet-info [:bigquery_alt FieldFilter] [driver {:keys [field], :as field-filter}] (let [field-temporal-type (temporal-type field) parent-method (get-method sql.params.substitution/->replacement-snippet-info [:sql FieldFilter])