From 81eec87234274e55345bcd2cd136be736d0ff0f1 Mon Sep 17 00:00:00 2001 From: Brian Luczkiewicz Date: Wed, 21 Oct 2020 21:56:59 -0400 Subject: [PATCH 1/9] port to 0.36.7 --- src/metabase/driver/bigquery_alt.clj | 88 ++++++---- src/metabase/driver/bigquery_alt/common.clj | 0 .../driver/bigquery_alt/query_processor.clj | 166 ++++++++++++------ 3 files changed, 158 insertions(+), 96 deletions(-) mode change 100755 => 100644 src/metabase/driver/bigquery_alt.clj mode change 100755 => 100644 src/metabase/driver/bigquery_alt/common.clj mode change 100755 => 100644 src/metabase/driver/bigquery_alt/query_processor.clj diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj old mode 100755 new mode 100644 index a72912e..55e8b55 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -2,31 +2,33 @@ (:require [clojure [set :as set] [string :as str]] - [clojure.tools.logging :as log] + [clojure.tools.logging :as log] + [medley.core :as m] [metabase [driver :as driver] [util :as u]] [metabase.driver.bigquery_alt [common :as bigquery_alt.common] + [params :as bigquery_alt.params] [query-processor :as bigquery_alt.qp]] - [metabase.driver.google :as google] - [metabase.driver.sql.util.unprepare :as unprepare] + [metabase.driver.google :as google] [metabase.query-processor - [store :as qp.store] [error-type :as error-type] + [store :as qp.store] [timezone :as qp.timezone] [util :as qputil]] - [metabase.util.schema :as su] + [metabase.util.schema :as su] [schema.core :as s]) (:import com.google.api.client.googleapis.auth.oauth2.GoogleCredential com.google.api.client.http.HttpRequestInitializer [com.google.api.services.bigquery Bigquery Bigquery$Builder BigqueryScopes] - [com.google.api.services.bigquery.model QueryRequest QueryResponse Table TableCell TableFieldSchema TableList TableList$Tables TableReference TableRow TableSchema] + [com.google.api.services.bigquery.model QueryRequest QueryResponse Table TableCell TableFieldSchema TableList DatasetList DatasetList$Datasets DatasetReference TableList$Tables TableReference TableRow TableSchema] - java.util.Collections)) + java.util.Collections)) + +(driver/register! :bigquery, :parent #{:google :sql}) -(driver/register! :bigquery_alt, :parent #{:google :sql}) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Client | @@ -49,11 +51,15 @@ (def ^:private ^{:arglists '([database])} ^Bigquery database->client (comp credential->client database->credential)) +(defn find-project-id + "Select the user specified project-id or the one from the credential, in the case of a service account" + [project-id ^GoogleCredential credential] + (or project-id + (.getServiceAccountProjectId credential))) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Sync | ;;; +----------------------------------------------------------------------------------------------------------------+ - (defn- ^TableList list-tables "Fetch a page of Tables. By default, fetches the first page; page size is 50. For cases when more than 50 Tables are present, you may fetch subsequent pages by specifying the `page-token`; the token for the next page is returned with a @@ -145,7 +151,6 @@ :name table-name :fields (set (table-schema->metabase-field-info (.getSchema (get-table database dataset-id table-name))))}) - ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Running Queries | ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -184,8 +189,11 @@ ~@body))) (defn- post-process-native - "Parse results of a BigQuery query." - [^QueryResponse resp] + "Parse results of a BigQuery query. `respond` is the same function passed to + `metabase.driver/execute-reducible-query`, and has the signature + + (respond results-metadata rows)" + [respond ^QueryResponse resp] (with-finished-response [response resp] (let [^TableSchema schema (.getSchema response) @@ -194,44 +202,46 @@ (doall (for [^TableFieldSchema field (.getFields schema) :let [column-type (.getType field) + column-mode (.getMode field) method (get-method bigquery_alt.qp/parse-result-of-type column-type)]] - (partial method column-type bigquery_alt.common/*bigquery-timezone-id*))) + (partial method column-type column-mode bigquery_alt.common/*bigquery-timezone-id*))) columns (for [column (table-schema->metabase-field-info schema)] (-> column (set/rename-keys {:base-type :base_type}) - (dissoc :database-type)))] - {:columns (map (comp u/qualified-name :name) columns) - :cols columns - :rows (for [^TableRow row (.getRows response)] - (for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))] - (when-let [v (.getV cell)] - ;; There is a weird error where everything that *should* be NULL comes back as an Object. - ;; See https://jira.talendforge.org/browse/TBD-1592 - ;; Everything else comes back as a String luckily so we can proceed normally. - (when-not (= (class v) Object) - (parser v)))))}))) + (dissoc :database-type :database-position)))] + (respond + {:cols columns} + (for [^TableRow row (.getRows response)] + (for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))] + (when-let [v (.getV cell)] + ;; There is a weird error where everything that *should* be NULL comes back as an Object. + ;; See https://jira.talendforge.org/browse/TBD-1592 + ;; Everything else comes back as a String luckily so we can proceed normally. + (when-not (= (class v) Object) + (parser v))))))))) (defn- ^QueryResponse execute-bigquery - ([{{:keys [project-id]} :details, :as database} query-string] - (execute-bigquery (database->client database) project-id query-string)) + ([{{:keys [project-id]} :details, :as database} sql parameters] + (execute-bigquery (database->client database) (find-project-id project-id (database->credential database)) sql parameters)) - ([^Bigquery client, ^String project-id, ^String query-string] - {:pre [client (seq project-id) (seq query-string)]} + ([^Bigquery client ^String project-id ^String sql parameters] + {:pre [client (seq project-id) (seq sql)]} (let [request (doto (QueryRequest.) (.setTimeoutMs (* query-timeout-seconds 1000)) ;; if the query contains a `#legacySQL` directive then use legacy SQL instead of standard SQL - (.setUseLegacySql (str/includes? (str/lower-case query-string) "#legacysql")) - (.setQuery query-string))] + (.setUseLegacySql (str/includes? (str/lower-case sql) "#legacysql")) + (.setQuery sql) + (bigquery_alt.params/set-parameters! parameters))] (google/execute (.query (.jobs client) project-id request))))) -(defn- process-native* [database query-string] +(defn- process-native* [respond database sql parameters] {:pre [(map? database) (map? (:details database))]} ;; automatically retry the query if it times out or otherwise fails. This is on top of the auto-retry added by ;; `execute` (letfn [(thunk [] - (post-process-native (execute-bigquery database query-string)))] + (post-process-native respond (execute-bigquery database sql parameters)))] (try (thunk) (catch Throwable e @@ -243,22 +253,22 @@ (qp.timezone/system-timezone-id) "UTC")) - -(defmethod driver/execute-query :bigquery_alt - [driver {{sql :query, params :params, :keys [table-name mbql?]} :native, :as outer-query}] +(defmethod driver/execute-reducible-query :bigquery_alt + ;; TODO - it doesn't actually cancel queries the way we'd expect + [driver {{sql :query, :keys [params table-name mbql?]} :native, :as outer-query} _ respond] (let [database (qp.store/database)] (binding [bigquery_alt.common/*bigquery-timezone-id* (effective-query-timezone-id database)] (log/tracef "Running BigQuery query in %s timezone" bigquery_alt.common/*bigquery-timezone-id*) - (let [sql (str "-- " (qputil/query->remark outer-query) "\n" (if (seq params) - (unprepare/unprepare driver (cons sql params)) - sql))] - (process-native* database sql))))) + (let [sql (str "-- " (qputil/query->remark :bigquery outer-query) "\n" sql)] + (process-native* respond database sql params))))) ;;; +----------------------------------------------------------------------------------------------------------------+ ;;; | Other Driver Method Impls | ;;; +----------------------------------------------------------------------------------------------------------------+ +(defmethod driver/supports? [:bigquery_alt :percentile-aggregations] [_ _] false) + (defmethod driver/supports? [:bigquery_alt :expressions] [_ _] false) (defmethod driver/supports? [:bigquery_alt :foreign-keys] [_ _] true) diff --git a/src/metabase/driver/bigquery_alt/common.clj b/src/metabase/driver/bigquery_alt/common.clj old mode 100755 new mode 100644 diff --git a/src/metabase/driver/bigquery_alt/query_processor.clj b/src/metabase/driver/bigquery_alt/query_processor.clj old mode 100755 new mode 100644 index 1247c03..7369173 --- a/src/metabase/driver/bigquery_alt/query_processor.clj +++ b/src/metabase/driver/bigquery_alt/query_processor.clj @@ -23,8 +23,7 @@ [metabase.util [date-2 :as u.date] [honeysql-extensions :as hx] - [i18n :refer [tru]] - [schema :as su]] + [i18n :refer [tru]]] [schema.core :as s] [toucan.db :as db]) (:import [java.time LocalDate LocalDateTime LocalTime OffsetDateTime OffsetTime ZonedDateTime] @@ -55,28 +54,38 @@ (defmulti parse-result-of-type "Parse the values that come back in results of a BigQuery query based on their column type." - {:arglists '([column-type timezone-id v])} - (fn [column-type _ _] column-type)) + {:arglists '([column-type column-mode timezone-id v])} + (fn [column-type _ _ _] column-type)) + +(defn- parse-value + [column-mode v parse-fn] + ;; For results from a query like `SELECT [1,2]`, BigQuery sets the column-mode to `REPEATED` and wraps the column in an ArrayList, + ;; with ArrayMap entries, like: `ArrayList(ArrayMap("v", 1), ArrayMap("v", 2))` + (if (= "REPEATED" column-mode) + (for [result v + ^java.util.Map$Entry entry result] + (parse-fn (.getValue entry))) + (parse-fn v))) (defmethod parse-result-of-type :default - [_ _ v] - v) + [_ column-mode _ v] + (parse-value column-mode v identity)) (defmethod parse-result-of-type "BOOLEAN" - [_ _ v] - (Boolean/parseBoolean v)) + [_ column-mode _ v] + (parse-value column-mode v #(Boolean/parseBoolean %))) (defmethod parse-result-of-type "FLOAT" - [_ _ v] - (Double/parseDouble v)) + [_ column-mode _ v] + (parse-value column-mode v #(Double/parseDouble %))) (defmethod parse-result-of-type "INTEGER" - [_ _ v] - (Long/parseLong v)) + [_ column-mode _ v] + (parse-value column-mode v #(Long/parseLong %))) (defmethod parse-result-of-type "NUMERIC" - [_ _ v] - (bigdec v)) + [_ column-mode _ v] + (parse-value column-mode v bigdec)) (defn- parse-timestamp-str [timezone-id s] ;; Timestamp strings either come back as ISO-8601 strings or Unix timestamps in µs, e.g. "1.3963104E9" @@ -86,20 +95,20 @@ (u.date/parse s timezone-id))) (defmethod parse-result-of-type "DATE" - [_ timezone-id s] - (parse-timestamp-str timezone-id s)) + [_ column-mode timezone-id v] + (parse-value column-mode v (partial parse-timestamp-str timezone-id))) (defmethod parse-result-of-type "DATETIME" - [_ timezone-id s] - (parse-timestamp-str timezone-id s)) + [_ column-mode timezone-id v] + (parse-value column-mode v (partial parse-timestamp-str timezone-id))) (defmethod parse-result-of-type "TIMESTAMP" - [_ timezone-id s] - (parse-timestamp-str timezone-id s)) + [_ column-mode timezone-id v] + (parse-value column-mode v (partial parse-timestamp-str timezone-id))) (defmethod parse-result-of-type "TIME" - [_ timezone-id s] - (u.date/parse s timezone-id)) + [_ column-mode timezone-id v] + (parse-value column-mode v (fn [v] (u.date/parse v timezone-id)))) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -169,7 +178,7 @@ [:field-id id] (temporal-type (qp.store/field id)) [:field-literal _ base-type] (base-type->temporal-type base-type)))) -(defn- with-temporal-type [x new-type] +(defn- with-temporal-type {:style/indent 0} [x new-type] (if (= (temporal-type x) new-type) x (vary-meta x assoc :bigquery_alt/temporal-type new-type))) @@ -287,15 +296,42 @@ [target-type trunc-form] (map->TruncForm (update trunc-form :hsql-form (partial ->temporal-type target-type)))) - (defn- trunc "Generate a SQL call an appropriate truncation function, depending on the temporal type of `expr`." [unit hsql-form] (TruncForm. hsql-form unit)) +(def ^:private valid-date-extract-units + #{:dayofweek :day :dayofyear :week :isoweek :month :quarter :year :isoyear}) + +(def ^:private valid-time-extract-units + #{:microsecond :millisecond :second :minute :hour}) (defn- extract [unit expr] - (with-temporal-type (hsql/call :extract unit (->temporal-type :timestamp expr)) nil)) + (condp = (temporal-type expr) + :time + (do + (assert (valid-time-extract-units unit) + (tru "Cannot extract {0} from a TIME field" unit)) + (recur unit (with-temporal-type (hsql/call :timestamp (hsql/call :datetime "1970-01-01" expr)) + :timestamp))) + + ;; timestamp and date both support extract() + :date + (do + (assert (valid-date-extract-units unit) + (tru "Cannot extract {0} from a DATE field" unit)) + (with-temporal-type (hsql/call :extract unit expr) nil)) + + :timestamp + (do + (assert (or (valid-date-extract-units unit) + (valid-time-extract-units unit)) + (tru "Cannot extract {0} from a DATETIME or TIMESTAMP" unit)) + (with-temporal-type (hsql/call :extract unit expr) nil)) + + ;; for datetimes or anything without a known temporal type, cast to timestamp and go from there + (recur unit (->temporal-type :timestamp expr)))) (defmethod sql.qp/date [:bigquery_alt :minute] [_ _ expr] (trunc :minute expr)) (defmethod sql.qp/date [:bigquery_alt :minute-of-hour] [_ _ expr] (extract :minute expr)) @@ -316,9 +352,37 @@ (doseq [[unix-timestamp-type bigquery-fn] {:seconds :timestamp_seconds :milliseconds :timestamp_millis}] - (defmethod sql.qp/unix-timestamp->timestamp [:bigquery_alt unix-timestamp-type] + (defmethod sql.qp/unix-timestamp->honeysql [:bigquery_alt unix-timestamp-type] [_ _ expr] - (vary-meta (hsql/call bigquery-fn expr) assoc :bigquery_alt/temporal-type :timestamp))) + (with-temporal-type (hsql/call bigquery-fn expr) :timestamp))) + +(defmethod sql.qp/->float :bigquery_alt + [_ value] + (hx/cast :float64 value)) + +(defmethod sql.qp/->honeysql [:bigquery_alt :regex-match-first] + [driver [_ arg pattern]] + (hsql/call :regexp_extract (sql.qp/->honeysql driver arg) (sql.qp/->honeysql driver pattern))) + +(defn- percentile->quantile + [x] + (loop [x (double x) + power (int 0)] + (if (zero? (- x (Math/floor x))) + [(Math/round x) (Math/round (Math/pow 10 power))] + (recur (* 10 x) (inc power))))) + +(defmethod sql.qp/->honeysql [:bigquery_alt :percentile] + [driver [_ arg p]] + (let [[offset quantiles] (percentile->quantile p)] + (hsql/raw (format "APPROX_QUANTILES(%s, %s)[OFFSET(%s)]" + (hformat/to-sql (sql.qp/->honeysql driver arg)) + quantiles + offset)))) + +(defmethod sql.qp/->honeysql [:bigquery_alt :median] + [driver [_ arg]] + (sql.qp/->honeysql driver [:percentile arg 0.5])) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -386,14 +450,6 @@ (cond->> ((get-method sql.qp/->honeysql [:sql :relative-datetime]) driver clause) t (->temporal-type t)))) - -(s/defn ^:private honeysql-form->sql :- s/Str - [driver, honeysql-form :- su/Map] - (let [[sql & args :as sql+args] (sql.qp/format-honeysql driver honeysql-form)] - (if (seq args) - (unprepare/unprepare driver sql+args) - sql))) - ;; From the dox: Fields must contain only letters, numbers, and underscores, start with a letter or underscore, and be ;; at most 128 characters long. (defmethod driver/format-custom-field-name :bigquery_alt @@ -403,22 +459,17 @@ (str/replace #"(^\d)" "_$1"))] (subs replaced-str 0 (min 128 (count replaced-str))))) -;; These provide implementations of `->honeysql` that prevent HoneySQL from converting forms to prepared statement -;; parameters (`?` symbols) -(defmethod sql.qp/->honeysql [:bigquery_alt String] - [_ s] - (hx/literal s)) - -(defmethod sql.qp/->honeysql [:bigquery_alt Boolean] - [_ bool] - (hsql/raw (if bool "TRUE" "FALSE"))) - ;; See: ;; -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/timestamp_functions -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/time_functions -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/date_functions -;; * https://cloud.google.com/bigquery_alt/docs/reference/standard-sql/datetime_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/time_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions +;; * https://cloud.google.com/bigquery/docs/reference/standard-sql/datetime_functions + +(defmethod unprepare/unprepare-value [:bigquery_alt String] + [_ s] + ;; escape single-quotes like Cam's String -> Cam\'s String + (str \' (str/replace s "'" "\\\\'") \')) (defmethod unprepare/unprepare-value [:bigquery_alt LocalTime] [_ t] @@ -446,7 +497,6 @@ [_ t] (format "timestamp \"%s %s\"" (u.date/format-sql (t/local-date-time t)) (.getId (t/zone-id t)))) - (defmethod sql.qp/field->identifier :bigquery_alt [_ {table-id :table_id, field-name :name, :as field}] ;; TODO - Making a DB call for each field to fetch its Table is inefficient and makes me cry, but this method is @@ -565,13 +615,15 @@ {table-name :name} (some-> source-table-id qp.store/table)] (assert (seq dataset-id)) (binding [sql.qp/*query* (assoc outer-query :dataset-id dataset-id)] - {:query (->> outer-query - (sql.qp/build-honeysql-form driver) - (honeysql-form->sql driver)) - :table-name (or table-name - (when source-query - sql.qp/source-query-alias)) - :mbql? true}))) + (let [[sql & params] (->> outer-query + (sql.qp/build-honeysql-form driver) + (sql.qp/format-honeysql driver))] + {:query sql + :params params + :table-name (or table-name + (when source-query + sql.qp/source-query-alias)) + :mbql? true})))) (defrecord ^:private CurrentMomentForm [t] hformat/ToSql From f55c45b02c6a0d52422deed01eb6e04639d9c772 Mon Sep 17 00:00:00 2001 From: Brian Luczkiewicz Date: Wed, 21 Oct 2020 22:28:28 -0400 Subject: [PATCH 2/9] upgrade schema sync to newer metabase scheme --- src/metabase/driver/bigquery_alt.clj | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj index 55e8b55..5fd3468 100644 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -140,10 +140,11 @@ (s/defn ^:private table-schema->metabase-field-info [schema :- TableSchema] - (for [^TableFieldSchema field (.getFields schema)] - {:name (.getName field) - :database-type (.getType field) - :base-type (bigquery-type->base-type (.getType field))})) + (for [[idx ^TableFieldSchema field] (m/indexed (.getFields schema))] + {:name (.getName field) + :database-type (.getType field) + :base-type (bigquery-type->base-type (.getType field)) + :database-position idx})) (defmethod driver/describe-table :bigquery_alt [_ database {dataset-id :schema, table-name :name}] From cce5406cf134c160634a123767b6b92916d5dc4e Mon Sep 17 00:00:00 2001 From: Brian Luczkiewicz Date: Wed, 21 Oct 2020 22:28:33 -0400 Subject: [PATCH 3/9] enable expressions --- src/metabase/driver/bigquery_alt.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj index 5fd3468..7bc52ce 100644 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -270,7 +270,7 @@ (defmethod driver/supports? [:bigquery_alt :percentile-aggregations] [_ _] false) -(defmethod driver/supports? [:bigquery_alt :expressions] [_ _] false) +(defmethod driver/supports? [:bigquery_alt :expressions] [_ _] true) (defmethod driver/supports? [:bigquery_alt :foreign-keys] [_ _] true) From 21af15c4e215571d19ca32c14e04afe8f5690b86 Mon Sep 17 00:00:00 2001 From: Brian Luczkiewicz Date: Fri, 23 Oct 2020 23:07:40 -0400 Subject: [PATCH 4/9] fix a few remaining refs to :bigquery --- src/metabase/driver/bigquery_alt.clj | 4 ++-- src/metabase/driver/bigquery_alt/query_processor.clj | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj index 7bc52ce..5362297 100644 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -27,7 +27,7 @@ TableList$Tables TableReference TableRow TableSchema] java.util.Collections)) -(driver/register! :bigquery, :parent #{:google :sql}) +(driver/register! :bigquery_alt, :parent #{:google :sql}) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -260,7 +260,7 @@ (let [database (qp.store/database)] (binding [bigquery_alt.common/*bigquery-timezone-id* (effective-query-timezone-id database)] (log/tracef "Running BigQuery query in %s timezone" bigquery_alt.common/*bigquery-timezone-id*) - (let [sql (str "-- " (qputil/query->remark :bigquery outer-query) "\n" sql)] + (let [sql (str "-- " (qputil/query->remark :bigquery_alt outer-query) "\n" sql)] (process-native* respond database sql params))))) diff --git a/src/metabase/driver/bigquery_alt/query_processor.clj b/src/metabase/driver/bigquery_alt/query_processor.clj index 7369173..4cb974d 100644 --- a/src/metabase/driver/bigquery_alt/query_processor.clj +++ b/src/metabase/driver/bigquery_alt/query_processor.clj @@ -250,7 +250,7 @@ bigquery-type (do (log/tracef "Coercing %s (temporal type = %s) to %s" (binding [*print-meta* true] (pr-str x)) (pr-str (temporal-type x)) bigquery-type) - (with-temporal-type (hx/cast bigquery-type (sql.qp/->honeysql :bigquery x)) target-type)) + (with-temporal-type (hx/cast bigquery-type (sql.qp/->honeysql :bigquery_alt x)) target-type)) :else x)))) @@ -443,7 +443,7 @@ (let [hsql-form ((get-method sql.qp/->honeysql [:sql clause-type]) driver clause)] (with-temporal-type hsql-form (temporal-type clause))))) -(defmethod sql.qp/->honeysql [:bigquery :relative-datetime] +(defmethod sql.qp/->honeysql [:bigquery_alt :relative-datetime] [driver clause] ;; wrap the parent method, converting the result if `clause` itself is typed (let [t (temporal-type clause)] @@ -656,7 +656,7 @@ [driver t] (sql/->prepared-substitution driver (t/offset-date-time t (t/local-time 0) (t/zone-offset 0)))) -(defmethod sql.params.substitution/->replacement-snippet-info [:bigquery FieldFilter] +(defmethod sql.params.substitution/->replacement-snippet-info [:bigquery_alt FieldFilter] [driver {:keys [field], :as field-filter}] (let [field-temporal-type (temporal-type field) parent-method (get-method sql.params.substitution/->replacement-snippet-info [:sql FieldFilter]) From e10d60227c47c234b0bca1ffefcd2989df9d2840 Mon Sep 17 00:00:00 2001 From: Brian Luczkiewicz Date: Fri, 23 Oct 2020 23:07:54 -0400 Subject: [PATCH 5/9] add missing file --- src/metabase/driver/bigquery_alt/params.clj | 77 +++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 src/metabase/driver/bigquery_alt/params.clj diff --git a/src/metabase/driver/bigquery_alt/params.clj b/src/metabase/driver/bigquery_alt/params.clj new file mode 100644 index 0000000..58e6200 --- /dev/null +++ b/src/metabase/driver/bigquery_alt/params.clj @@ -0,0 +1,77 @@ +(ns metabase.driver.bigquery_alt.params + (:require [clojure.tools.logging :as log] + [java-time :as t] + [metabase.util.date-2 :as u.date]) + (:import [com.google.api.services.bigquery.model QueryParameter QueryParameterType QueryParameterValue QueryRequest])) + +(defn- param-type ^QueryParameterType [^String type-name] + (doto (QueryParameterType.) + (.setType type-name))) + +(defn- param-value ^QueryParameterValue [v] + (doto (QueryParameterValue.) + (.setValue (str v)))) + +(defn- param [type-name v] + (doto (QueryParameter.) + (.setParameterType (param-type type-name)) + (.setParameterValue (param-value v)))) + +(defmulti ^:private ->QueryParameter + {:arglists '(^QueryParameter [v])} + class) + +(defmethod ->QueryParameter :default + [v] + (param "STRING" v)) + +;; See https://cloud.google.com/spanner/docs/data-types for type mappings + +;; `nil` still has to be given a type (this determines the type it comes back as in cases like `["SELECT ?" nil]`) -- +;; AFAIK this only affects native queries because `NULL` is usually spliced into the compiled SQL directly in MBQL +;; queries. Unfortunately we don't know the actual type we should set here so `STRING` is going to have to do for now. +;; This shouldn't really matter anyways since `WHERE field = NULL` generally doesn't work (we have to do `WHERE FIELD +;; IS NULL` instead) +(defmethod ->QueryParameter nil + [_] + (doto (QueryParameter.) + (.setParameterType (param-type "STRING")) + (.setParameterValue (doto (QueryParameterValue.) + (.setValue nil))))) + +(defmethod ->QueryParameter String [v] (param "STRING" v)) +(defmethod ->QueryParameter Boolean [v] (param "BOOL" v)) +(defmethod ->QueryParameter Integer [v] (param "INT64" v)) +(defmethod ->QueryParameter Long [v] (param "INT64" v)) +(defmethod ->QueryParameter Short [v] (param "INT64" v)) +(defmethod ->QueryParameter Byte [v] (param "INT64" v)) +(defmethod ->QueryParameter clojure.lang.BigInt [v] (param "INT64" v)) +(defmethod ->QueryParameter Float [v] (param "FLOAT64" v)) +(defmethod ->QueryParameter Double [v] (param "FLOAT64" v)) +(defmethod ->QueryParameter java.math.BigDecimal [v] (param "FLOAT64" v)) + +(defmethod ->QueryParameter java.time.LocalDate [t] (param "DATE" (u.date/format t))) +(defmethod ->QueryParameter java.time.LocalDateTime [t] (param "DATETIME" (u.date/format t))) +(defmethod ->QueryParameter java.time.LocalTime [t] (param "TIME" (u.date/format t))) +(defmethod ->QueryParameter java.time.OffsetTime [t] (param "TIME" (u.date/format + (t/local-time + (t/with-offset-same-instant t (t/zone-offset 0)))))) +(defmethod ->QueryParameter java.time.OffsetDateTime [t] (param "TIMESTAMP" (u.date/format t))) +(defmethod ->QueryParameter java.time.ZonedDateTime [t] (param "TIMESTAMP" (u.date/format + (t/offset-date-time + (t/with-zone-same-instant t (t/zone-id "UTC")))))) + +(defn- query-parameter ^QueryParameter [value] + (let [param (->QueryParameter value)] + (log/tracef "Set parameter ^%s %s -> %s" (some-> value class .getCanonicalName) (pr-str value) (pr-str param)) + param)) + +(defn set-parameters! + "Set the `parameters` (i.e., values for `?` positional placeholders in the SQL) for a `query` request. Equivalent to + JDBC `.setObject()` and the like." + ^QueryRequest [^QueryRequest query parameters] + (if (seq parameters) + (doto query + (.setParameterMode "POSITIONAL") + (.setQueryParameters (apply list (map query-parameter parameters)))) + query)) From fe99c22a5ba97be7f46fc1dc2e219a637afc5993 Mon Sep 17 00:00:00 2001 From: Brian Luczkiewicz Date: Fri, 23 Oct 2020 23:32:29 -0400 Subject: [PATCH 6/9] update for v0.37.0.1 --- src/metabase/driver/bigquery_alt.clj | 100 +++++++++++++----- .../driver/bigquery_alt/query_processor.clj | 14 ++- 2 files changed, 83 insertions(+), 31 deletions(-) diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj index 5362297..607d1ad 100644 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -22,7 +22,7 @@ (:import com.google.api.client.googleapis.auth.oauth2.GoogleCredential com.google.api.client.http.HttpRequestInitializer [com.google.api.services.bigquery Bigquery Bigquery$Builder BigqueryScopes] - [com.google.api.services.bigquery.model QueryRequest QueryResponse Table TableCell TableFieldSchema TableList + [com.google.api.services.bigquery.model GetQueryResultsResponse QueryRequest QueryResponse Table TableCell TableFieldSchema TableList DatasetList DatasetList$Datasets DatasetReference TableList$Tables TableReference TableRow TableSchema] java.util.Collections)) @@ -158,15 +158,34 @@ (def ^:private ^:const ^Integer query-timeout-seconds 60) +(def ^:private ^:dynamic ^Long max-results-per-page + "Maximum number of rows to return per page in a query." + 20000) + +(def ^:private ^:dynamic page-callback + "Callback to execute when a new page is retrieved, used for testing" + nil) + +(defprotocol GetJobComplete + "A Clojure protocol for the .getJobComplete method on disparate Google BigQuery results" + (get-job-complete [this] "Call .getJobComplete on a BigQuery API response")) + +(extend-protocol GetJobComplete + com.google.api.services.bigquery.model.QueryResponse + (get-job-complete [this] (.getJobComplete ^QueryResponse this)) + + com.google.api.services.bigquery.model.GetQueryResultsResponse + (get-job-complete [this] (.getJobComplete ^GetQueryResultsResponse this))) + (defn do-with-finished-response "Impl for `with-finished-response`." {:style/indent 1} - [^QueryResponse response, f] + [response f] ;; 99% of the time by the time this is called `.getJobComplete` will return `true`. On the off chance it doesn't, ;; wait a few seconds for the job to finish. (loop [remaining-timeout (double query-timeout-seconds)] (cond - (.getJobComplete response) + (get-job-complete response) (f response) (pos? remaining-timeout) @@ -186,16 +205,44 @@ [[response-binding response] & body] `(do-with-finished-response ~response - (fn [~(vary-meta response-binding assoc :tag 'com.google.api.services.bigquery.model.QueryResponse)] + (fn [~response-binding] ~@body))) +(defn- ^GetQueryResultsResponse get-query-results + [^Bigquery client ^String project-id ^String job-id ^String page-token] + (when page-callback + (page-callback)) + (let [request (doto (.getQueryResults (.jobs client) project-id job-id) + (.setMaxResults max-results-per-page) + (.setPageToken page-token))] + (google/execute request))) + +(defn- ^GetQueryResultsResponse execute-bigquery + ([{{:keys [project-id]} :details, :as database} sql parameters] + (execute-bigquery (database->client database) (find-project-id project-id (database->credential database)) sql parameters)) + + ([^Bigquery client ^String project-id ^String sql parameters] + {:pre [client (seq project-id) (seq sql)]} + (let [request (doto (QueryRequest.) + (.setTimeoutMs (* query-timeout-seconds 1000)) + ;; if the query contains a `#legacySQL` directive then use legacy SQL instead of standard SQL + (.setUseLegacySql (str/includes? (str/lower-case sql) "#legacysql")) + (.setQuery sql) + (bigquery_alt.params/set-parameters! parameters)) + query-response ^QueryResponse (google/execute (.query (.jobs client) project-id request)) + job-ref (.getJobReference query-response) + job-id (.getJobId job-ref) + proj-id (.getProjectId job-ref)] + (with-finished-response [_ query-response] + (get-query-results client proj-id job-id nil))))) + (defn- post-process-native "Parse results of a BigQuery query. `respond` is the same function passed to `metabase.driver/execute-reducible-query`, and has the signature (respond results-metadata rows)" - [respond ^QueryResponse resp] - (with-finished-response [response resp] + [database respond ^GetQueryResultsResponse resp] + (with-finished-response [^GetQueryResultsResponse response resp] (let [^TableSchema schema (.getSchema response) @@ -214,35 +261,30 @@ (dissoc :database-type :database-position)))] (respond {:cols columns} - (for [^TableRow row (.getRows response)] - (for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))] - (when-let [v (.getV cell)] + (letfn [(fetch-page [^GetQueryResultsResponse response] + (lazy-cat + (.getRows response) + (when-let [next-page-token (.getPageToken response)] + (with-finished-response [next-resp (get-query-results (database->client database) + (.getProjectId (.getJobReference response)) + (.getJobId (.getJobReference response)) + next-page-token)] + (fetch-page next-resp)))))] + (for [^TableRow row (fetch-page response)] + (for [[^TableCell cell, parser] (partition 2 (interleave (.getF row) parsers))] + (when-let [v (.getV cell)] ;; There is a weird error where everything that *should* be NULL comes back as an Object. ;; See https://jira.talendforge.org/browse/TBD-1592 ;; Everything else comes back as a String luckily so we can proceed normally. - (when-not (= (class v) Object) - (parser v))))))))) - -(defn- ^QueryResponse execute-bigquery - ([{{:keys [project-id]} :details, :as database} sql parameters] - (execute-bigquery (database->client database) (find-project-id project-id (database->credential database)) sql parameters)) - - ([^Bigquery client ^String project-id ^String sql parameters] - {:pre [client (seq project-id) (seq sql)]} - (let [request (doto (QueryRequest.) - (.setTimeoutMs (* query-timeout-seconds 1000)) - ;; if the query contains a `#legacySQL` directive then use legacy SQL instead of standard SQL - (.setUseLegacySql (str/includes? (str/lower-case sql) "#legacysql")) - (.setQuery sql) - (bigquery_alt.params/set-parameters! parameters))] - (google/execute (.query (.jobs client) project-id request))))) + (when-not (= (class v) Object) + (parser v)))))))))) (defn- process-native* [respond database sql parameters] {:pre [(map? database) (map? (:details database))]} ;; automatically retry the query if it times out or otherwise fails. This is on top of the auto-retry added by ;; `execute` (letfn [(thunk [] - (post-process-native respond (execute-bigquery database sql parameters)))] + (post-process-native database respond (execute-bigquery database sql parameters)))] (try (thunk) (catch Throwable e @@ -256,7 +298,7 @@ (defmethod driver/execute-reducible-query :bigquery_alt ;; TODO - it doesn't actually cancel queries the way we'd expect - [driver {{sql :query, :keys [params table-name mbql?]} :native, :as outer-query} _ respond] + [_ {{sql :query, :keys [params]} :native, :as outer-query} _ respond] (let [database (qp.store/database)] (binding [bigquery_alt.common/*bigquery-timezone-id* (effective-query-timezone-id database)] (log/tracef "Running BigQuery query in %s timezone" bigquery_alt.common/*bigquery-timezone-id*) @@ -277,3 +319,7 @@ ;; BigQuery is always in UTC (defmethod driver/db-default-timezone :bigquery_alt [_ _] "UTC") + +(defmethod driver/db-start-of-week :bigquery_alt + [_] + :sunday) diff --git a/src/metabase/driver/bigquery_alt/query_processor.clj b/src/metabase/driver/bigquery_alt/query_processor.clj index 4cb974d..1048644 100644 --- a/src/metabase/driver/bigquery_alt/query_processor.clj +++ b/src/metabase/driver/bigquery_alt/query_processor.clj @@ -16,6 +16,7 @@ [metabase.mbql.util :as mbql.u] [metabase.models [field :refer [Field]] + [setting :as setting] [table :as table]] [metabase.query-processor [error-type :as error-type] @@ -338,18 +339,23 @@ (defmethod sql.qp/date [:bigquery_alt :hour] [_ _ expr] (trunc :hour expr)) (defmethod sql.qp/date [:bigquery_alt :hour-of-day] [_ _ expr] (extract :hour expr)) (defmethod sql.qp/date [:bigquery_alt :day] [_ _ expr] (trunc :day expr)) -(defmethod sql.qp/date [:bigquery_alt :day-of-week] [_ _ expr] (extract :dayofweek expr)) (defmethod sql.qp/date [:bigquery_alt :day-of-month] [_ _ expr] (extract :day expr)) (defmethod sql.qp/date [:bigquery_alt :day-of-year] [_ _ expr] (extract :dayofyear expr)) -(defmethod sql.qp/date [:bigquery_alt :week] [_ _ expr] (trunc :week expr)) -;; ; BigQuery's impl of `week` uses 0 for the first week; we use 1 -(defmethod sql.qp/date [:bigquery_alt :week-of-year] [_ _ expr] (-> (extract :week expr) hx/inc)) (defmethod sql.qp/date [:bigquery_alt :month] [_ _ expr] (trunc :month expr)) (defmethod sql.qp/date [:bigquery_alt :month-of-year] [_ _ expr] (extract :month expr)) (defmethod sql.qp/date [:bigquery_alt :quarter] [_ _ expr] (trunc :quarter expr)) (defmethod sql.qp/date [:bigquery_alt :quarter-of-year] [_ _ expr] (extract :quarter expr)) (defmethod sql.qp/date [:bigquery_alt :year] [_ _ expr] (trunc :year expr)) +(defmethod sql.qp/date [:bigquery_alt :day-of-week] + [_ _ expr] + (sql.qp/adjust-day-of-week :bigquery_alt (extract :dayofweek expr))) + +(defmethod sql.qp/date [:bigquery_alt :week] + [_ _ expr] + (trunc (keyword (format "week(%s)" (name (setting/get-keyword :start-of-week)))) expr)) + + (doseq [[unix-timestamp-type bigquery-fn] {:seconds :timestamp_seconds :milliseconds :timestamp_millis}] (defmethod sql.qp/unix-timestamp->honeysql [:bigquery_alt unix-timestamp-type] From 00fd198e759e631883d30cacdf0ec4b8de5260ab Mon Sep 17 00:00:00 2001 From: Abdullah Date: Wed, 17 Feb 2021 11:22:08 -0500 Subject: [PATCH 7/9] ported changes from master --- project.clj | 3 +- resources/metabase-plugin.yaml | 21 ---- src/metabase/driver/bigquery_alt.clj | 119 +++++++++--------- .../driver/bigquery_alt/query_processor.clj | 79 ++++++------ 4 files changed, 102 insertions(+), 120 deletions(-) diff --git a/project.clj b/project.clj index 7b62ea0..06910da 100755 --- a/project.clj +++ b/project.clj @@ -5,7 +5,7 @@ {"install-for-building-drivers" ["with-profile" "+install-for-building-drivers" "install"]} :dependencies - [[com.google.apis/google-api-services-bigquery "v2-rev20190917-1.30.3"]] + [[com.google.apis/google-api-services-bigquery "v2-rev20200523-1.30.9"]] :profiles {:provided @@ -18,7 +18,6 @@ {:auto-clean true :aot :all} - :uberjar {:auto-clean true :aot :all diff --git a/resources/metabase-plugin.yaml b/resources/metabase-plugin.yaml index 6c4856f..758dc3a 100755 --- a/resources/metabase-plugin.yaml +++ b/resources/metabase-plugin.yaml @@ -11,27 +11,6 @@ driver: parent: - google - sql - connection-properties: - - name: project-id - display-name: Project ID - placeholder: praxis-beacon-120871 - required: true - - name: client-id - display-name: Client ID - placeholder: 1201327674725-y6ferb0feo1hfssr7t40o4aikqll46d4.apps.googleusercontent.com - required: true - - name: client-secret - display-name: Client Secret - placeholder: dJNi4utWgMzyIFo2JbnsK6Np - required: true - - name: auth-code - display-name: Auth Code - placeholder: 4/HSk-KtxkSzTt61j5zcbee2Rmm5JHkRFbL5gD5lgkXek - required: true - - name: use-jvm-timezone - display-name: Use JVM Time Zone - default: false - type: boolean init: - step: load-namespace namespace: metabase.driver.bigquery-alt diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj index 607d1ad..66892d3 100644 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -1,22 +1,19 @@ (ns metabase.driver.bigquery_alt - (:require [clojure - [set :as set] - [string :as str]] + (:require [clojure.set :as set] + [clojure.string :as str] [clojure.tools.logging :as log] [medley.core :as m] - [metabase - [driver :as driver] - [util :as u]] - [metabase.driver.bigquery_alt - [common :as bigquery_alt.common] - [params :as bigquery_alt.params] - [query-processor :as bigquery_alt.qp]] + [metabase.driver :as driver] + [metabase.driver.bigquery_alt.common :as bigquery_alt.common] + [metabase.driver.bigquery_alt.params :as bigquery_alt.params] + [metabase.driver.bigquery_alt.query-processor :as bigquery_alt.qp] [metabase.driver.google :as google] - [metabase.query-processor - [error-type :as error-type] - [store :as qp.store] - [timezone :as qp.timezone] - [util :as qputil]] + [metabase.query-processor.error-type :as error-type] + [metabase.query-processor.store :as qp.store] + [metabase.query-processor.timezone :as qp.timezone] + [metabase.query-processor.util :as qputil] + [metabase.util :as u] + [metabase.util.i18n :refer [tru]] [metabase.util.schema :as su] [schema.core :as s]) (:import com.google.api.client.googleapis.auth.oauth2.GoogleCredential @@ -34,6 +31,7 @@ ;;; | Client | ;;; +----------------------------------------------------------------------------------------------------------------+ + (defn- ^Bigquery credential->client [^GoogleCredential credential] (.build (doto (Bigquery$Builder. google/http-transport @@ -68,48 +66,46 @@ (list-tables database nil)) ([{{:keys [project-id]} :details, :as database}, ^String dataset-id] - (list-tables (database->client database) project-id dataset-id nil)) + (let [db-client (database->client database) + project-id (find-project-id project-id (database->credential database))] + (list-tables (database->client database) project-id dataset-id nil))) ([{{:keys [project-id]} :details, :as database}, ^String dataset-id, ^String page-token-or-nil] - (list-tables (database->client database) project-id dataset-id page-token-or-nil)) + (list-tables (database->client database) (find-project-id project-id (database->credential database)) dataset-id page-token-or-nil)) ([^Bigquery client, ^String project-id, ^String dataset-id, ^String page-token-or-nil] {:pre [client (seq project-id) (seq dataset-id)]} (google/execute (u/prog1 (.list (.tables client) project-id dataset-id) - (.setPageToken <> page-token-or-nil))))) - + (.setPageToken <> page-token-or-nil))))) (defn- ^DatasetList list-datasets ([{{:keys [project-id]} :details, :as database}] - {:pre [database (seq project-id)]} - (google/execute (u/prog1 (.list (.datasets (database->client database)) project-id))))) - + {:pre [database (find-project-id project-id (database->credential database))]} + (google/execute (u/prog1 (.list (.datasets (database->client database)) (find-project-id project-id (database->credential database))))))) (defmethod driver/describe-database :bigquery_alt [_ database] ;; first page through all the 50-table pages until we stop getting "next page tokens" - (let [datasets (loop [datasets [], ^DatasetList dataset-list (list-datasets database)] - (let [datasets (concat datasets (.getDatasets dataset-list))] - (for [^DatasetList$Datasets dataset datasets - :let [^DatasetReference datasetref (.getDatasetReference dataset)]] - (.getDatasetId datasetref))))] + (let [datasets (loop [datasets [], ^DatasetList dataset-list (list-datasets database)] + (let [datasets (concat datasets (.getDatasets dataset-list))] + (for [^DatasetList$Datasets dataset datasets + :let [^DatasetReference datasetref (.getDatasetReference dataset)]] + (.getDatasetId datasetref))))] (let [tables (loop [tables [], dataset_id (first datasets), dataset_list (next datasets)] (let [tables (concat tables (loop [dataset_tables [], ^TableList table-list (list-tables database dataset_id)] - (let [dataset_tables (concat dataset_tables (.getTables table-list))] - (if-let [next-page-token (.getNextPageToken table-list)] - (recur dataset_tables (list-tables database dataset_id next-page-token)) - dataset_tables))))] - (if (and (seq dataset_list)) - (recur tables (first dataset_list) (next dataset_list)) - tables)) - )] + (let [dataset_tables (concat dataset_tables (.getTables table-list))] + (if-let [next-page-token (.getNextPageToken table-list)] + (recur dataset_tables (list-tables database dataset_id next-page-token)) + dataset_tables))))] + (if (and (seq dataset_list)) + (recur tables (first dataset_list) (next dataset_list)) + tables)))] ;; after that convert the results to MB format - {:tables (set (for [^TableList$Tables table tables - :let [^TableReference tableref (.getTableReference table)]] - {:schema (.getDatasetId tableref), :name (.getTableId tableref)}))}))) - + {:tables (set (for [^TableList$Tables table tables + :let [^TableReference tableref (.getTableReference table)]] + {:schema (.getDatasetId tableref), :name (.getTableId tableref)}))}))) (defmethod driver/can-connect? :bigquery_alt [_ details-map] @@ -119,7 +115,7 @@ (s/defn get-table :- Table ([{{:keys [project-id]} :details, :as database} dataset-id table-id] - (get-table (database->client database) project-id dataset-id table-id)) + (get-table (database->client database) (find-project-id project-id (database->credential database)) dataset-id table-id)) ([client :- Bigquery, project-id :- su/NonBlankString, dataset-id :- su/NonBlankString, table-id :- su/NonBlankString] (google/execute (.get (.tables client) project-id dataset-id table-id)))) @@ -148,7 +144,7 @@ (defmethod driver/describe-table :bigquery_alt [_ database {dataset-id :schema, table-name :name}] - {:schema dataset-id + {:schema dataset-id :name table-name :fields (set (table-schema->metabase-field-info (.getSchema (get-table database dataset-id table-name))))}) @@ -209,12 +205,13 @@ ~@body))) (defn- ^GetQueryResultsResponse get-query-results - [^Bigquery client ^String project-id ^String job-id ^String page-token] + [^Bigquery client ^String project-id ^String job-id ^String location ^String page-token] (when page-callback (page-callback)) (let [request (doto (.getQueryResults (.jobs client) project-id job-id) (.setMaxResults max-results-per-page) - (.setPageToken page-token))] + (.setPageToken page-token) + (.setLocation location))] (google/execute request))) (defn- ^GetQueryResultsResponse execute-bigquery @@ -223,18 +220,23 @@ ([^Bigquery client ^String project-id ^String sql parameters] {:pre [client (seq project-id) (seq sql)]} - (let [request (doto (QueryRequest.) - (.setTimeoutMs (* query-timeout-seconds 1000)) + (try + (let [request (doto (QueryRequest.) + (.setTimeoutMs (* query-timeout-seconds 1000)) ;; if the query contains a `#legacySQL` directive then use legacy SQL instead of standard SQL - (.setUseLegacySql (str/includes? (str/lower-case sql) "#legacysql")) - (.setQuery sql) - (bigquery_alt.params/set-parameters! parameters)) - query-response ^QueryResponse (google/execute (.query (.jobs client) project-id request)) - job-ref (.getJobReference query-response) - job-id (.getJobId job-ref) - proj-id (.getProjectId job-ref)] - (with-finished-response [_ query-response] - (get-query-results client proj-id job-id nil))))) + (.setUseLegacySql (str/includes? (str/lower-case sql) "#legacysql")) + (.setQuery sql) + (bigquery_alt.params/set-parameters! parameters)) + query-response ^QueryResponse (google/execute (.query (.jobs client) project-id request)) + job-ref (.getJobReference query-response) + location (.getLocation job-ref) + job-id (.getJobId job-ref) + proj-id (.getProjectId job-ref)] + (with-finished-response [_ query-response] + (get-query-results client proj-id job-id location nil))) + (catch Throwable e + (throw (ex-info (tru "Error executing query") + {:type error-type/invalid-query, :sql sql, :parameters parameters})))))) (defn- post-process-native "Parse results of a BigQuery query. `respond` is the same function passed to @@ -276,8 +278,8 @@ ;; There is a weird error where everything that *should* be NULL comes back as an Object. ;; See https://jira.talendforge.org/browse/TBD-1592 ;; Everything else comes back as a String luckily so we can proceed normally. - (when-not (= (class v) Object) - (parser v)))))))))) + (when-not (= (class v) Object) + (parser v)))))))))) (defn- process-native* [respond database sql parameters] {:pre [(map? database) (map? (:details database))]} @@ -302,7 +304,9 @@ (let [database (qp.store/database)] (binding [bigquery_alt.common/*bigquery-timezone-id* (effective-query-timezone-id database)] (log/tracef "Running BigQuery query in %s timezone" bigquery_alt.common/*bigquery-timezone-id*) - (let [sql (str "-- " (qputil/query->remark :bigquery_alt outer-query) "\n" sql)] + (let [sql (if (get-in database [:details :include-user-id-and-hash] true) + (str "-- " (qputil/query->remark :bigquery outer-query) "\n" sql) + sql)] (process-native* respond database sql params))))) @@ -310,6 +314,7 @@ ;;; | Other Driver Method Impls | ;;; +----------------------------------------------------------------------------------------------------------------+ + (defmethod driver/supports? [:bigquery_alt :percentile-aggregations] [_ _] false) (defmethod driver/supports? [:bigquery_alt :expressions] [_ _] true) diff --git a/src/metabase/driver/bigquery_alt/query_processor.clj b/src/metabase/driver/bigquery_alt/query_processor.clj index 1048644..68104c1 100644 --- a/src/metabase/driver/bigquery_alt/query_processor.clj +++ b/src/metabase/driver/bigquery_alt/query_processor.clj @@ -1,30 +1,25 @@ (ns metabase.driver.bigquery_alt.query-processor (:require [clojure.string :as str] [clojure.tools.logging :as log] - [honeysql - [core :as hsql] - [format :as hformat] - [helpers :as h]] + [honeysql.core :as hsql] + [honeysql.format :as hformat] + [honeysql.helpers :as h] [java-time :as t] - [metabase - [driver :as driver] - [util :as u]] + [metabase.driver :as driver] [metabase.driver.sql :as sql] [metabase.driver.sql.parameters.substitution :as sql.params.substitution] [metabase.driver.sql.query-processor :as sql.qp] [metabase.driver.sql.util.unprepare :as unprepare] [metabase.mbql.util :as mbql.u] - [metabase.models - [field :refer [Field]] - [setting :as setting] - [table :as table]] - [metabase.query-processor - [error-type :as error-type] - [store :as qp.store]] - [metabase.util - [date-2 :as u.date] - [honeysql-extensions :as hx] - [i18n :refer [tru]]] + [metabase.models.field :refer [Field]] + [metabase.models.setting :as setting] + [metabase.models.table :as table] + [metabase.query-processor.error-type :as error-type] + [metabase.query-processor.store :as qp.store] + [metabase.util :as u] + [metabase.util.date-2 :as u.date] + [metabase.util.honeysql-extensions :as hx] + [metabase.util.i18n :refer [tru]] [schema.core :as s] [toucan.db :as db]) (:import [java.time LocalDate LocalDateTime LocalTime OffsetDateTime OffsetTime ZonedDateTime] @@ -116,6 +111,7 @@ ;;; | SQL Driver Methods | ;;; +----------------------------------------------------------------------------------------------------------------+ + (def ^:private temporal-type-hierarchy (-> (make-hierarchy) (derive :date :temporal-type) @@ -176,8 +172,8 @@ (if (contains? (meta x) :bigquery_alt/temporal-type) (:bigquery_alt/temporal-type (meta x)) (mbql.u/match-one x - [:field-id id] (temporal-type (qp.store/field id)) - [:field-literal _ base-type] (base-type->temporal-type base-type)))) + [:field-id id] (temporal-type (qp.store/field id)) + [:field-literal _ base-type] (base-type->temporal-type base-type)))) (defn- with-temporal-type {:style/indent 0} [x new-type] (if (= (temporal-type x) new-type) @@ -193,7 +189,7 @@ (defn- throw-unsupported-conversion [from to] (throw (ex-info (tru "Cannot convert a {0} to a {1}" from to) - {:type error-type/invalid-query}))) + {:type error-type/invalid-query}))) (defmethod ->temporal-type [:date LocalTime] [_ t] (throw-unsupported-conversion "time" "date")) (defmethod ->temporal-type [:date OffsetTime] [_ t] (throw-unsupported-conversion "time" "date")) @@ -260,7 +256,6 @@ [target-type [_ t unit]] [:absolute-datetime (->temporal-type target-type t) unit]) - (def ^:private temporal-type->supported-units {:timestamp #{:microsecond :millisecond :second :minute :hour :day} :datetime #{:microsecond :millisecond :second :minute :hour :day :week :month :quarter :year} @@ -315,7 +310,7 @@ (assert (valid-time-extract-units unit) (tru "Cannot extract {0} from a TIME field" unit)) (recur unit (with-temporal-type (hsql/call :timestamp (hsql/call :datetime "1970-01-01" expr)) - :timestamp))) + :timestamp))) ;; timestamp and date both support extract() :date @@ -355,9 +350,9 @@ [_ _ expr] (trunc (keyword (format "week(%s)" (name (setting/get-keyword :start-of-week)))) expr)) - (doseq [[unix-timestamp-type bigquery-fn] {:seconds :timestamp_seconds - :milliseconds :timestamp_millis}] + :milliseconds :timestamp_millis + :microseconds :timestamp_micros}] (defmethod sql.qp/unix-timestamp->honeysql [:bigquery_alt unix-timestamp-type] [_ _ expr] (with-temporal-type (hsql/call bigquery-fn expr) :timestamp))) @@ -395,6 +390,7 @@ ;;; | Query Processor | ;;; +----------------------------------------------------------------------------------------------------------------+ + (defn- should-qualify-identifier? "Should we qualify an Identifier with the dataset name? @@ -418,17 +414,15 @@ ;; Only qualify Field identifiers that are qualified by a Table. (e.g. don't qualify stuff inside `CREATE TABLE` ;; DDL statements) (and (= identifier-type :field) - (>= (count components) 2)) + (>= (count components) 2)) true)) - (defmethod sql.qp/->honeysql [:bigquery_alt (class Field)] [driver field] (let [parent-method (get-method sql.qp/->honeysql [:sql (class Field)]) identifier (parent-method driver field)] (with-temporal-type identifier (temporal-type field)))) - (defmethod sql.qp/->honeysql [:bigquery_alt Identifier] [_ {:keys [identifier-type components], :as identifier}] (cond-> identifier @@ -437,11 +431,9 @@ (cons table more))) (and (= identifier-type :field) - (> (count components) 2)) + (> (count components) 2)) (update :components (fn [[table & more]] - more)) - )) - + more)))) (doseq [clause-type [:datetime-field :field-literal :field-id]] (defmethod sql.qp/->honeysql [:bigquery_alt clause-type] @@ -509,17 +501,23 @@ ;; currently only used for SQL params so it's not a huge deal at this point ;; ;; TODO - we should make sure these are in the QP store somewhere and then could at least batch the calls - (let [table-name (db/select-one-field :name table/Table :id (u/get-id table-id))] + (let [table-name (db/select-one-field :name table/Table :id (u/the-id table-id))] (with-temporal-type (hx/identifier :field table-name field-name) (temporal-type field)))) (defmethod sql.qp/apply-top-level-clause [:bigquery_alt :breakout] - [driver _ honeysql-form {breakouts :breakout, fields :fields}] + [driver _ honeysql-form {breakouts :breakout, fields :fields, :as query}] (-> honeysql-form ;; Group by all the breakout fields. ;; ;; Unlike other SQL drivers, BigQuery requires that we refer to Fields using the alias we gave them in the ;; `SELECT` clause, rather than repeating their definitions. - ((partial apply h/group) (map (partial sql.qp/field-clause->alias driver) breakouts)) + ((partial apply h/group) (for [breakout breakouts + :let [alias (or (sql.qp/field-clause->alias driver breakout) + (throw (ex-info (tru "Error compiling SQL: breakout does not have an alias") + {:type error-type/qp + :breakout breakout + :query query})))]] + alias)) ;; Add fields form only for fields that weren't specified in :fields clause -- we don't want to include it ;; twice, or HoneySQL will barf ((partial apply h/merge-select) (for [field-clause breakouts @@ -546,8 +544,8 @@ (u/prog1 (into [clause-type] (map (partial ->temporal-type target-type) (cons f args))) - (when (not= [clause (meta clause)] [<> (meta <>)]) - (log/tracef "Coerced -> %s" (binding [*print-meta* true] (pr-str <>)))))) + (when (not= [clause (meta clause)] [<> (meta <>)]) + (log/tracef "Coerced -> %s" (binding [*print-meta* true] (pr-str <>)))))) clause)) (doseq [filter-type [:between := :!= :> :>= :< :<=]] @@ -562,6 +560,7 @@ ;;; | Other Driver / SQLDriver Method Implementations | ;;; +----------------------------------------------------------------------------------------------------------------+ + (defn- interval [amount unit] (hsql/raw (format "INTERVAL %d %s" (int amount) (name unit)))) @@ -571,7 +570,7 @@ ;; the first place (throw (ex-info (tru "Invalid query: you cannot add a {0} to a {1} column." (name unit) (name t-type)) - {:type error-type/invalid-query})))) + {:type error-type/invalid-query})))) ;; We can coerce the HoneySQL form this wraps to whatever we want and generate the appropriate SQL. ;; Thus for something like filtering against a relative datetime @@ -605,7 +604,7 @@ (let [current-type (temporal-type (:hsql-form add-interval-form))] (when (#{[:date :time] [:time :date]} [current-type target-type]) (throw (ex-info (tru "It doesn''t make sense to convert between DATEs and TIMEs!") - {:type error-type/invalid-query})))) + {:type error-type/invalid-query})))) (map->AddIntervalForm (update add-interval-form :hsql-form (partial ->temporal-type target-type)))) (defmethod sql.qp/add-interval-honeysql-form :bigquery_alt @@ -622,7 +621,7 @@ (assert (seq dataset-id)) (binding [sql.qp/*query* (assoc outer-query :dataset-id dataset-id)] (let [[sql & params] (->> outer-query - (sql.qp/build-honeysql-form driver) + (sql.qp/mbql->honeysql driver) (sql.qp/format-honeysql driver))] {:query sql :params params From 7c05dcc3338f2bba3e433afcedea91b5ca871db8 Mon Sep 17 00:00:00 2001 From: Abdullah Date: Thu, 18 Feb 2021 11:55:34 -0500 Subject: [PATCH 8/9] added service account connection-properties --- resources/metabase-plugin.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/resources/metabase-plugin.yaml b/resources/metabase-plugin.yaml index 758dc3a..f7bb2bd 100755 --- a/resources/metabase-plugin.yaml +++ b/resources/metabase-plugin.yaml @@ -11,6 +11,19 @@ driver: parent: - google - sql + connection-properties: + - name: "service-account-json" + display-name: "Service account JSON file" + type: "textFile" + required: true + - name: use-jvm-timezone + display-name: "Use JVM Time Zone" + type: boolean + default: false + - name: "include-user-id-and-hash" + display-name: "Include User ID and query hash" + type: boolean + default: true init: - step: load-namespace namespace: metabase.driver.bigquery-alt From c98a238b099b61f265092b929cc86b84233ebf51 Mon Sep 17 00:00:00 2001 From: Abdullah Date: Mon, 8 Mar 2021 22:23:42 -0500 Subject: [PATCH 9/9] get paginated databases --- src/metabase/driver/bigquery_alt.clj | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/metabase/driver/bigquery_alt.clj b/src/metabase/driver/bigquery_alt.clj index 66892d3..81384d7 100644 --- a/src/metabase/driver/bigquery_alt.clj +++ b/src/metabase/driver/bigquery_alt.clj @@ -79,16 +79,27 @@ (.setPageToken <> page-token-or-nil))))) (defn- ^DatasetList list-datasets - ([{{:keys [project-id]} :details, :as database}] - {:pre [database (find-project-id project-id (database->credential database))]} - (google/execute (u/prog1 (.list (.datasets (database->client database)) (find-project-id project-id (database->credential database))))))) + ([database] + (list-datasets database nil)) + + ([{{:keys [project-id]} :details, :as database}, ^String page-token-or-nil] + (list-datasets (database->client database) (find-project-id project-id (database->credential database)) page-token-or-nil)) + + ([^Bigquery client, ^String project-id, ^String page-token-or-nil] + {:pre [client (seq project-id)]} + (google/execute (u/prog1 (.list (.datasets client) project-id) + (.setPageToken <> page-token-or-nil))))) (defmethod driver/describe-database :bigquery_alt [_ database] ;; first page through all the 50-table pages until we stop getting "next page tokens" (let [datasets (loop [datasets [], ^DatasetList dataset-list (list-datasets database)] - (let [datasets (concat datasets (.getDatasets dataset-list))] + (let [datasets (concat datasets (loop [bq_datasets [], ^DatasetList dataset-list (list-datasets database)] + (let [bq_datasets (concat bq_datasets (.getDatasets dataset-list))] + (if-let [next-page-token (.getNextPageToken dataset-list)] + (recur bq_datasets (list-datasets database next-page-token)) + bq_datasets))))] (for [^DatasetList$Datasets dataset datasets :let [^DatasetReference datasetref (.getDatasetReference dataset)]] (.getDatasetId datasetref))))]