From 6fd7db9406269b73260ebef73d9dc45b086c82db Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 23 Nov 2020 15:28:19 -0800
Subject: [PATCH 01/90]  feat: tf gcs-bq-ingest module sets bq permissions

... on multiple projects

fixup!
---
 .../gcs_ocn_bq_ingest_function/README.md         |  3 ++-
 .../gcs_ocn_bq_ingest_function/main.tf           | 16 ++++++++++++++++
 .../gcs_ocn_bq_ingest_function/variables.tf      |  7 ++++++-
 udfs/tests/.gitignore                            |  1 +
 4 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 udfs/tests/.gitignore

diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
index 1e42b1966..d5859d5cb 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
@@ -28,6 +28,7 @@ documented [here](../gcs_ocn_bq_ingest_function/README.md)
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | app\_id | Application Name | `any` | n/a | yes |
+| bigquery\_project\_ids | Project IDs to grant bigquery Admin / Job user for the data ingester account | `list(string)` | `[]` | no |
 | cloudfunctions\_source\_bucket | GCS bucket to store Cloud Functions Source | `any` | n/a | yes |
 | data\_ingester\_sa | Service Account Email responsible for ingesting data to BigQuery | `any` | n/a | yes |
 | destination\_regex | A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for destination `dataset`, `table`, (optional: `partition`, `batch`) | `string` | `""` | no |
@@ -36,7 +37,7 @@ documented [here](../gcs_ocn_bq_ingest_function/README.md)
 | input\_prefix | GCS prefix to watch for new files in input\_bucket | `any` | `null` | no |
 | job\_prefix | Prefix for BigQuery Job IDs | `string` | `""` | no |
 | max\_batch\_bytes | Max bytes for BigQuery Load job | `string` | `""` | no |
-| project\_id | GCP Project ID | `any` | n/a | yes |
+| project\_id | GCP Project ID containing cloud function, and input bucket | `any` | n/a | yes |
 | region | GCP region in which to deploy cloud function | `string` | `"us-central1"` | no |
 | success\_filename | Filename to trigger a load of a prefix | `string` | `""` | no |
 | use\_pubsub\_notifications | Setting this to true will use Pub/Sub notifications By default we will use Cloud Functions Event direct notifications. See https://cloud.google.com/storage/docs/pubsub-notifications. | `bool` | `false` | no |
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index 204d9bb42..cd62642ea 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -92,6 +92,22 @@ module "data_ingester_service_account" {
   ]
 }
 
+# Grant the ingester service account permissions to run load jobs and mutate
+# data in the target project
+resource "google_project_iam_binding" "ingester_bq_job_user" {
+  for_each = toset(concat(var.bigquery_project_ids, [var.project_id]))
+  project  = each.key
+  members  = [module.data_ingester_service_account.iam_email]
+  role     = "roles/bigquery.jobUser"
+}
+
+resource "google_project_iam_binding" "ingester_bq_admin" {
+  for_each = toset(concat(var.bigquery_project_ids, [var.project_id]))
+  project  = each.key
+  members  = [module.data_ingester_service_account.iam_email]
+  role     = "roles/bigquery.admin"
+}
+
 # Allow the GCS service account to publish notification for new objects to the
 # notification topic.
 resource "google_pubsub_topic_iam_binding" "gcs_publisher" {
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
index cd5e162bd..e68139b52 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 variable "project_id" {
-  description = "GCP Project ID"
+  description = "GCP Project ID containing cloud function, and input bucket"
 }
 
 variable "app_id" {
@@ -74,3 +74,8 @@ variable "use_pubsub_notifications" {
   default     = false
 }
 
+variable "bigquery_project_ids" {
+  description = "Additional project IDs to grant bigquery Admin / Job user for the data ingester account"
+  type        = list(string)
+  default     = []
+}
diff --git a/udfs/tests/.gitignore b/udfs/tests/.gitignore
new file mode 100644
index 000000000..c18dd8d83
--- /dev/null
+++ b/udfs/tests/.gitignore
@@ -0,0 +1 @@
+__pycache__/

From d2f00ce903707828b3fc39b8ee02e62cfa7ca259 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 23 Nov 2020 15:56:00 -0800
Subject: [PATCH 02/90] fixup roles

---
 .../gcs_ocn_bq_ingest_function/README.md          |  3 +--
 .../gcs_ocn_bq_ingest_function/main.tf            | 15 ++++-----------
 .../gcs_ocn_bq_ingest_function/outputs.tf         |  5 +++++
 .../gcs_ocn_bq_ingest_function/variables.tf       |  3 ++-
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
index d5859d5cb..f1acab548 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
@@ -6,7 +6,6 @@ for event based ingest of GCS data to BigQuery described [here](../README.md).
 Note that by default all environment variables for the cloud function
 will be empty deferring to the defaults implemented in the function and
 documented [here](../gcs_ocn_bq_ingest_function/README.md)
-
 ## Requirements
 
 | Name | Version |
@@ -28,7 +27,7 @@ documented [here](../gcs_ocn_bq_ingest_function/README.md)
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | app\_id | Application Name | `any` | n/a | yes |
-| bigquery\_project\_ids | Project IDs to grant bigquery Admin / Job user for the data ingester account | `list(string)` | `[]` | no |
+| bigquery\_project\_ids | Additional project IDs to grant bigquery Admin / Job user for the data ingester account | `list(string)` | `[]` | no |
 | cloudfunctions\_source\_bucket | GCS bucket to store Cloud Functions Source | `any` | n/a | yes |
 | data\_ingester\_sa | Service Account Email responsible for ingesting data to BigQuery | `any` | n/a | yes |
 | destination\_regex | A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for destination `dataset`, `table`, (optional: `partition`, `batch`) | `string` | `""` | no |
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index cd62642ea..faf9b3b82 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -36,6 +36,7 @@ module "bucket" {
 }
 
 resource "google_storage_notification" "notification" {
+  depends_on         = [google_pubsub_topic_iam_binding.gcs_publisher]
   count              = var.use_pubsub_notifications ? 1 : 0
   bucket             = module.bucket.bucket
   object_name_prefix = var.input_prefix
@@ -88,24 +89,16 @@ module "data_ingester_service_account" {
   names      = [var.data_ingester_sa, ]
   project_roles = [
     "${var.project_id}=>roles/bigquery.jobUser",
-    "${var.project_id}=>roles/bigquery.dataEditor",
   ]
 }
 
-# Grant the ingester service account permissions to run load jobs and mutate
-# data in the target project
-resource "google_project_iam_binding" "ingester_bq_job_user" {
-  for_each = toset(concat(var.bigquery_project_ids, [var.project_id]))
-  project  = each.key
-  members  = [module.data_ingester_service_account.iam_email]
-  role     = "roles/bigquery.jobUser"
-}
-
+# Grant the ingester service account permissions to mutate data in
+# target project(s)
 resource "google_project_iam_binding" "ingester_bq_admin" {
   for_each = toset(concat(var.bigquery_project_ids, [var.project_id]))
   project  = each.key
   members  = [module.data_ingester_service_account.iam_email]
-  role     = "roles/bigquery.admin"
+  role     = "roles/bigquery.dataEditor"
 }
 
 # Allow the GCS service account to publish notification for new objects to the
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
index 8ba2f4025..e34d2d0f4 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
@@ -16,3 +16,8 @@ output "cloud-function" {
   value       = google_cloudfunctions_function.gcs_to_bq
 }
 
+output "data-ingester-sa" {
+  description = "data ingester service account email created as cloud function identity"
+  value       = module.data_ingester_service_account.email
+}
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
index e68139b52..0452e9769 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
@@ -75,7 +75,8 @@ variable "use_pubsub_notifications" {
 }
 
 variable "bigquery_project_ids" {
-  description = "Additional project IDs to grant bigquery Admin / Job user for the data ingester account"
+  description = "Additional project IDs to grant bigquery Admin for the data ingester account"
   type        = list(string)
   default     = []
 }
+

From 65d9515cbe75abf5c7123844aa3f5c192ff60a80 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 23 Nov 2020 16:00:30 -0800
Subject: [PATCH 03/90] fixup dockerfil ci check

---
 tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
index eb12bd903..5cd40aa1e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
+++ b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
@@ -1,4 +1,4 @@
 FROM python:3.8-slim
 COPY requirements.txt requirements-dev.txt ./
-RUN pip3 install -r requirements-dev.txt
+RUN pip3 install --no-cache-dir -r requirements-dev.txt
 ENTRYPOINT ["pytest"]

From 7fdffd7539cfc47542fbaa48429a3d8395b2eb1a Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 24 Nov 2020 10:54:34 -0800
Subject: [PATCH 04/90] docs: add note on unicode delimiters

---
 .../cloud_functions/gcs_event_based_ingest/README.md  | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 9fda82d39..529e17939 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -133,6 +133,17 @@ The result of merging these would be:
 This configuration system gives us the ability to DRY up common defaults but
 override them at whatever level is appropriate as new cases come up.
 
+### Note on Delimiters: Use Unicode
+For CSV loads the `fieldDelimiter` in load.json to external.json should be
+specified as a unicode character _not_ a hexidecimal character as hexidecimal
+characters will confuse python's `json.load` function.
+For example ctrl-P should be specified as:
+```json
+{
+    "fieldDelimiter": "\u0010"
+}
+```
+
 #### Transformation SQL
 In some cases we may need to perform transformations on the files in GCS
 before they can be loaded to BigQuery. This is handled by query on an

From 537f05d1ef78f14a43a8124ac91ea10777c12239 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 30 Nov 2020 12:27:09 -0800
Subject: [PATCH 05/90] fix: update nested values in configs

---
 .../gcs_event_based_ingest/.flake8            |  2 +-
 .../gcs_ocn_bq_ingest/main.py                 | 43 ++++++++--
 .../test_gcs_ocn_bq_ingest.py                 | 83 +++++++++++++++++++
 3 files changed, 118 insertions(+), 10 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/.flake8 b/tools/cloud_functions/gcs_event_based_ingest/.flake8
index dafc87320..732e2a9fc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/.flake8
+++ b/tools/cloud_functions/gcs_event_based_ingest/.flake8
@@ -1,6 +1,6 @@
 [flake8]
 max-line-length = 110
 ignore = E731,W504,I001,W503,E402
-exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg,node_modules,.venv
+exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg,node_modules,.venv,.terraform
 # format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index d05b771db..20d4d7604 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -17,6 +17,8 @@
 """Background Cloud Function for loading data from GCS to BigQuery.
 """
 import collections
+import collections.abc
+import copy
 import json
 import os
 import pathlib
@@ -152,9 +154,8 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     default_query_config = bigquery.QueryJobConfig()
     default_query_config.use_legacy_sql = False
     default_query_config.labels = labels
-    bq_client = bigquery.Client(
-        client_info=CLIENT_INFO,
-        default_query_job_config=default_query_config)
+    bq_client = bigquery.Client(client_info=CLIENT_INFO,
+                                default_query_job_config=default_query_config)
 
     print(f"looking for {gsurl}_config/bq_transform.sql")
     external_query_sql = read_gcs_file_if_exists(
@@ -308,10 +309,8 @@ def handle_duplicate_notification(bkt: storage.Bucket,
     success_created_unix_timestamp = success_blob.time_created.timestamp()
 
     claim_blob: storage.Blob = bkt.blob(
-        success_blob.name.replace(
-            SUCCESS_FILENAME,
-            f"_claimed_{success_created_unix_timestamp}")
-    )
+        success_blob.name.replace(SUCCESS_FILENAME,
+                                  f"_claimed_{success_created_unix_timestamp}"))
     try:
         claim_blob.upload_from_string("", if_generation_match=0)
     except google.api_core.exceptions.PreconditionFailed as err:
@@ -379,9 +378,9 @@ def _get_parent_config(path):
             config_q.append(json.loads(config))
         parts.pop()
 
-    merged_config = dict()
+    merged_config: Dict = {}
     while config_q:
-        merged_config.update(config_q.popleft())
+        recursive_update(merged_config, config_q.popleft(), in_place=True)
     print(f"merged_config: {merged_config}")
     return bigquery.LoadJobConfig.from_api_repr({"load": merged_config})
 
@@ -549,3 +548,29 @@ def removesuffix(in_str: str, suffix: str) -> str:
     if suffix and in_str.endswith(suffix):
         return in_str[:-len(suffix)]
     return in_str[:]
+
+
+def recursive_update(
+    original: Dict,
+    update: Dict,
+    in_place: bool = False
+):
+    """
+    return a recursively updated dictionary.
+
+    Note, lists will be completely overwritten by value in update if there is a
+    conflict.
+
+    original: (dict) the base dictionary
+    update:  (dict) the dictionary of updates to apply on original
+    in_place: (bool) if true then original will be mutated in place else a new
+        dictionary as a result of the update will be returned.
+    """
+    out = original if in_place else copy.deepcopy(original)
+
+    for key, value in update.items():
+        if isinstance(value, dict):
+            out[key] = recursive_update(out.get(key, {}), value)
+        else:
+            out[key] = value
+    return out
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index f3e02a50b..6f983d22d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -119,3 +119,86 @@ def test_default_destination_regex(test_input: str,
 ])
 def test_flattend2dlist(test_input, expected):
     assert gcs_ocn_bq_ingest.main.flatten2dlist(test_input) == expected
+
+
+@pytest.mark.parametrize(
+    "original, update, expected",
+    [
+        # yapf: disable
+        (  # empty original
+            {},
+            {
+                "a": 1
+            },
+            {
+                "a": 1
+            }
+        ),
+        (  # empty update
+            {
+                "a": 1
+            },
+            {},
+            {
+                "a": 1
+            }),
+        (  # basic update of top-level key
+            {
+                "a": 1
+            },
+            {
+                "a": 2
+            },
+            {
+                "a": 2
+            }),
+        (  # update of list
+            {
+                "a": [1]
+            },
+            {
+                "a": [2]
+            },
+            {
+                "a": [2]
+            }),
+        (  # update of nested key
+            {
+                "a": {
+                    "b": 1
+                }
+            },
+            {
+                "a": {
+                    "b": 2
+                }
+            },
+            {
+                "a": {
+                    "b": 2
+                }
+            }),
+        (  # don't drop keys that only appear in original
+            {
+                "a": {
+                    "b": 1,
+                    "c": 2
+                },
+                "d": 3
+            },
+            {
+                "a": {
+                    "b": 4
+                },
+            },
+            {
+                "a": {
+                    "b": 4,
+                    "c": 2
+                },
+                "d": 3
+            }),
+        # yapf: enable
+    ])
+def test_recursive_update(original, update, expected):
+    assert gcs_ocn_bq_ingest.main.recursive_update(original, update) == expected

From 125ca9f47661cdef47ca76298ab361cd2ecafc35 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 30 Nov 2020 13:13:01 -0800
Subject: [PATCH 06/90] chore: improve error message for wrong external table
 name (#200)

---
 .../gcs_ocn_bq_ingest/main.py                       | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index d05b771db..32316593e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -249,8 +249,17 @@ def external_query(  # pylint: disable=too-many-arguments
     while time.monotonic() - start_poll_for_errors < WAIT_FOR_JOB_SECONDS:
         job.reload()
         if job.errors:
-            raise RuntimeError(
-                f"query job {job.job_id} failed quickly: {job.errors}")
+            msg = f"query job {job.job_id} failed quickly: {job.errors}"
+            for err in job.errors:
+                # BQ gives confusing warning about missing dataset if the
+                # external query refers to the wrong external table name.
+                # In this case we can give the end user a little more context.
+                if "missing dataset" in err.get("message", ""):
+                    raise RuntimeError(
+                        "External queries must select from the external table "
+                        "named 'temp_ext'. This error may be due to specifying"
+                        "the wrong name for the external table. " + msg)
+            raise RuntimeError(msg)
         time.sleep(JOB_POLL_INTERVAL_SECONDS)
 
 

From 02458b81f3c599f4925966345586fe2d646cc52b Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 25 Nov 2020 12:03:29 -0800
Subject: [PATCH 07/90] fix: external configs not found in parent dirs

---
 .../gcs_event_based_ingest/.flake8            |  2 +-
 .../gcs_ocn_bq_ingest/main.py                 | 40 ++++++++++---------
 .../gcs_event_based_ingest/tests/conftest.py  | 29 +++++++++++++-
 .../test_gcs_ocn_bq_ingest_it.py              | 24 +++++++++++
 4 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/.flake8 b/tools/cloud_functions/gcs_event_based_ingest/.flake8
index dafc87320..732e2a9fc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/.flake8
+++ b/tools/cloud_functions/gcs_event_based_ingest/.flake8
@@ -1,6 +1,6 @@
 [flake8]
 max-line-length = 110
 ignore = E731,W504,I001,W503,E402
-exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg,node_modules,.venv
+exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg,node_modules,.venv,.terraform
 # format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index d05b771db..aab5e8410 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -37,7 +37,10 @@
 MAX_SOURCE_URIS_PER_LOAD = 10**4
 
 DEFAULT_EXTERNAL_TABLE_DEFINITION = {
-    "sourceFormat": "CSV",
+    # The default must be a self describing data format
+    # because autodetecting CSV /JSON schemas is likely to not match
+    # expectations / assumptions of the transformation query.
+    "sourceFormat": "PARQUET",
 }
 
 DEFAULT_JOB_LABELS = {
@@ -152,18 +155,18 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     default_query_config = bigquery.QueryJobConfig()
     default_query_config.use_legacy_sql = False
     default_query_config.labels = labels
-    bq_client = bigquery.Client(
-        client_info=CLIENT_INFO,
-        default_query_job_config=default_query_config)
+    bq_client = bigquery.Client(client_info=CLIENT_INFO,
+                                default_query_job_config=default_query_config)
 
-    print(f"looking for {gsurl}_config/bq_transform.sql")
+    print("looking for bq_transform.sql")
     external_query_sql = read_gcs_file_if_exists(
         gcs_client, f"{gsurl}_config/bq_transform.sql")
-    print(f"external_query_sql = {external_query_sql}")
     if not external_query_sql:
-        external_query_sql = look_for_transform_sql(gcs_client, gsurl)
+        external_query_sql = look_for_config_in_parents(gcs_client, gsurl,
+                                                        "bq_transform.sql")
     if external_query_sql:
         print("EXTERNAL QUERY")
+        print(f"found external query:\n{external_query_sql}")
         external_query(gcs_client, bq_client, gsurl, external_query_sql,
                        dest_table_ref,
                        create_job_id_prefix(dest_table_ref, batch_id))
@@ -217,15 +220,19 @@ def external_query(  # pylint: disable=too-many-arguments
     """
     external_table_config = read_gcs_file_if_exists(
         gcs_client, f"{gsurl}_config/external.json")
+    if not external_table_config:
+        external_table_config = look_for_config_in_parents(
+            gcs_client, gsurl, "external.json")
     if external_table_config:
         external_table_def = json.loads(external_table_config)
     else:
         print(f"Falling back to default CSV external table."
-              f" {gsurl}/_config/external.json not found.")
+              f" {gsurl}_config/external.json not found.")
         external_table_def = DEFAULT_EXTERNAL_TABLE_DEFINITION
 
     external_table_def["sourceUris"] = flatten2dlist(
         get_batches_for_prefix(gcs_client, gsurl))
+    print(f"external table def = {json.dumps(external_table_config, indent=2)}")
     external_config = bigquery.ExternalConfig.from_api_repr(external_table_def)
     job_config = bigquery.QueryJobConfig(
         table_definitions={"temp_ext": external_config}, use_legacy_sql=False)
@@ -308,10 +315,8 @@ def handle_duplicate_notification(bkt: storage.Bucket,
     success_created_unix_timestamp = success_blob.time_created.timestamp()
 
     claim_blob: storage.Blob = bkt.blob(
-        success_blob.name.replace(
-            SUCCESS_FILENAME,
-            f"_claimed_{success_created_unix_timestamp}")
-    )
+        success_blob.name.replace(SUCCESS_FILENAME,
+                                  f"_claimed_{success_created_unix_timestamp}"))
     try:
         claim_blob.upload_from_string("", if_generation_match=0)
     except google.api_core.exceptions.PreconditionFailed as err:
@@ -333,16 +338,15 @@ def _get_parent_config_file(storage_client, config_filename, bucket, path):
                                    f"gs://{bucket}/{config_path}")
 
 
-def look_for_transform_sql(storage_client: storage.Client,
-                           gsurl: str) -> Optional[str]:
-    """look in parent directories for _config/bq_transform.sql"""
-    config_filename = "bq_transform.sql"
+def look_for_config_in_parents(storage_client: storage.Client, gsurl: str,
+                               config_filename: str) -> Optional[str]:
+    """look in parent directories for _config/config_filename"""
     blob: storage.Blob = storage.Blob.from_string(gsurl)
     bucket_name = blob.bucket.name
     obj_path = blob.name
     parts = removesuffix(obj_path, "/").split("/")
 
-    def _get_parent_query(path):
+    def _get_parent_config(path):
         return _get_parent_config_file(storage_client, config_filename,
                                        bucket_name, path)
 
@@ -350,7 +354,7 @@ def _get_parent_query(path):
     while parts:
         if config:
             return config
-        config = _get_parent_query("/".join(parts))
+        config = _get_parent_config("/".join(parts))
         parts.pop()
     return config
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index c0ae3f8ab..4121ba3fc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -134,6 +134,30 @@ def teardown():
     return data_objs[-1]
 
 
+@pytest.fixture(scope="function")
+@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
+def gcs_data_under_sub_dirs(request, gcs_bucket, dest_dataset,
+                            dest_table) -> storage.blob.Blob:
+    data_objs = []
+    for test_file in ["part-m-00000", "part-m-00001", "_SUCCESS"]:
+        data_obj: storage.blob.Blob = gcs_bucket.blob("/".join([
+            f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+            dest_table.table_id, "foo", "bar", "baz", test_file
+        ]))
+        data_obj.upload_from_filename(
+            os.path.join(TEST_DIR, "resources", "test-data", "nation",
+                         test_file))
+        data_objs.append(data_obj)
+
+    def teardown():
+        for do in data_objs:
+            if do.exists:
+                do.delete()
+
+    request.addfinalizer(teardown)
+    return data_objs[-1]
+
+
 @pytest.fixture(scope="function")
 @pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
 def gcs_truncating_load_config(request, gcs_bucket, dest_dataset,
@@ -188,7 +212,7 @@ def gcs_external_config(request, gcs_bucket, dest_dataset,
                         dest_table) -> List[storage.blob.Blob]:
     config_objs = []
     sql_obj = gcs_bucket.blob("/".join([
-        dest_dataset.dataset_id,
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
         dest_table.table_id,
         "_config",
         "bq_transform.sql",
@@ -198,7 +222,8 @@ def gcs_external_config(request, gcs_bucket, dest_dataset,
     sql_obj.upload_from_string(sql)
 
     config_obj = gcs_bucket.blob("/".join([
-        dest_dataset.dataset_id, dest_table.table_id, "_config", "external.json"
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+        dest_table.table_id, "_config", "external.json"
     ]))
 
     with open(os.path.join(TEST_DIR, "resources",
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
index a5a81b949..44a5e717a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
@@ -209,6 +209,30 @@ def test_load_job_partitioned(bq, gcs_partitioned_data,
     bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
 
 
+@pytest.mark.IT
+def test_look_for_config_in_parents(bq, gcs_data_under_sub_dirs,
+                                    gcs_external_config, dest_dataset,
+                                    dest_table, mock_env):
+    """test discovery of configuration files for external query in parent
+    _config paths.
+    """
+    if not all((blob.exists() for blob in gcs_external_config)):
+        raise google.cloud.exceptions.NotFound("config objects must exist")
+    if not gcs_data_under_sub_dirs.exists():
+        raise google.cloud.exceptions.NotFound("test data objects must exist")
+    test_event = {
+        "attributes": {
+            "bucketId": gcs_data_under_sub_dirs.bucket.name,
+            "objectId": gcs_data_under_sub_dirs.name
+        }
+    }
+    gcs_ocn_bq_ingest.main.main(test_event, None)
+    test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation",
+                                  "part-m-00001")
+    expected_num_rows = sum(1 for _ in open(test_data_file))
+    bq_wait_for_rows(bq, dest_table, expected_num_rows)
+
+
 def bq_wait_for_rows(bq_client: bigquery.Client, table: bigquery.Table,
                      expected_num_rows: int):
     """

From 1525102768c4df9f9b48b6498a583d66fed5dc36 Mon Sep 17 00:00:00 2001
From: pdunn <patricksddunn@gmail.com>
Date: Tue, 1 Dec 2020 20:32:41 +0000
Subject: [PATCH 08/90] time series UDFs (#198)

* time series UDFs

* code review changes

Co-authored-by: Ryan McDowell <me@ryanmcdowell.io>
---
 udfs/community/README.md                   | 67 ++++++++++++++++++++++
 udfs/community/linear_interpolate.sql      | 30 ++++++++++
 udfs/community/test_cases.yaml             | 38 +++++++++++-
 udfs/community/ts_gen_keyed_timestamps.sql | 46 +++++++++++++++
 udfs/community/ts_linear_interpolate.sql   | 67 ++++++++++++++++++++++
 udfs/community/ts_tumble.sql               | 30 ++++++++++
 6 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 udfs/community/linear_interpolate.sql
 create mode 100644 udfs/community/ts_gen_keyed_timestamps.sql
 create mode 100644 udfs/community/ts_linear_interpolate.sql
 create mode 100644 udfs/community/ts_tumble.sql

diff --git a/udfs/community/README.md b/udfs/community/README.md
index a8badc8e5..3107a3b9d 100644
--- a/udfs/community/README.md
+++ b/udfs/community/README.md
@@ -21,6 +21,7 @@ SELECT bqutil.fn.int(1.684)
 * [int](#intv-any-type)
 * [json_typeof](#json_typeofjson-string)
 * [last_day](#lastdaydt-date)
+* [linear_interpolate](#linear_interpolate)
 * [median](#medianarr-any-type)
 * [nlp_compromise_number](#nlp_compromise_numberstr-string)
 * [nlp_compromise_people](#nlp_compromise_peoplestr-string)
@@ -30,6 +31,9 @@ SELECT bqutil.fn.int(1.684)
 * [random_int](#random_intmin-any-type-max-any-type)
 * [random_value](#random_valuearr-any-type)
 * [translate](#translateexpression-string-characters_to_replace-string-characters_to_substitute-string)
+* [ts_gen_keyed_timestamps](#ts_gen_keyed_timestamps)
+* [ts_linear_interpolate](#ts_linear_interpolate)
+* [ts_tumble](#ts_tumble)
 * [typeof](#typeofinput-any-type)
 * [url_keys](#url_keysquery-string)
 * [url_param](#url_paramquery-string-p-string)
@@ -230,6 +234,22 @@ results:
 | 1987-12-31 | 1998-09-30 | 2020-02-29 | 2019-02-28 |
 
 
+### [linear_interpolate(pos INT64, prev STRUCT<x INT64, y FLOAT64>, next STRUCT<x INT64, y FLOAT64>)](linear_interpolate.sql)
+Interpolate the current positions value from the preceding and folllowing coordinates
+
+```sql
+SELECT 
+  bqutil.fn.linear_interpolate(2, STRUCT(0 AS x, 0.0 AS y), STRUCT(10 AS x, 10.0 AS y)),
+  bqutil.fn.linear_interpolate(2, STRUCT(0 AS x, 0.0 AS y), STRUCT(20 AS x, 10.0 AS y))
+```
+
+results:
+
+| f0_ | f1_ |
+|-----|-----|
+| 2.0 | 1.0 |
+
+
 ### [median(arr ANY TYPE)](median.sql)
 Get the median of an array of numbers.
 
@@ -344,6 +364,53 @@ SELECT bqutil.fn.translate('mint tea', 'inea', 'osin')
 most tin
 ```
 
+### [ts_gen_keyed_timestamps(keys ARRAY<STRING>, tumble_seconds INT64, min_ts TIMESTAMP, max_ts TIMESTAMP](ts_gen_keyed_timestamps.sql)
+Generate a timestamp array associated with each key
+
+```sql
+SELECT *
+FROM 
+  UNNEST(bqutil.fn.ts_gen_keyed_timestamps(['abc', 'def'], 60, TIMESTAMP '2020-01-01 00:30:00', TIMESTAMP '2020-01-01 00:31:00))
+```
+
+| series_key | tumble_val
+|------------|-------------------------|
+| abc        | 2020-01-01 00:30:00 UTC |
+| def        | 2020-01-01 00:30:00 UTC |
+| abc        | 2020-01-01 00:31:00 UTC |
+| def        | 2020-01-01 00:31:00 UTC |
+  
+
+### [ts_linear_interpolate(pos TIMESTAMP, prev STRUCT(x TIMESTAMP, y FLOAT6), next STRUCT(x TIMESTAMP, y FLOAT64))](ts_linear_interpolation.sql)
+Interpolate the positions value using timestamp seconds as the x-axis
+
+```sql
+select bqutil.fn.ts_linear_interpolate(
+  TIMESTAMP '2020-01-01 00:30:00', 
+  STRUCT(TIMESTAMP '2020-01-01 00:29:00' AS x, 1.0 AS y),
+  STRUCT(TIMESTAMP '2020-01-01 00:31:00' AS x, 3.0 AS y)
+)
+```
+
+| f0_ |
+|-----|
+| 2.0 |
+
+
+### [ts_tumble(input_ts TIMESTAMP, tumble_seconds INT64)](ts_tumble.sql)
+Calculate the [tumbling window](https://cloud.google.com/dataflow/docs/concepts/streaming-pipelines#tumbling-windows) the input_ts belongs in
+
+```sql
+SELECT
+  fn.ts_tumble(TIMESTAMP '2020-01-01 00:17:30', 900) AS min_15,
+  fn.ts_tumble(TIMESTAMP '2020-01-01 00:17:30', 600) AS min_10,
+  fn.ts_tumble(TIMESTAMP '2020-01-01 00:17:30', 60) As min_1
+```
+
+| min_15                  | min_10                  |                         |       
+|-------------------------|-------------------------|-------------------------|
+| 2020-01-01 00:15:00 UTC | 2020-01-01 00:10:00 UTC | 2020-01-01 00:17:00 UTC |
+
 
 ### [typeof(input ANY TYPE)](typeof.sql)
 
diff --git a/udfs/community/linear_interpolate.sql b/udfs/community/linear_interpolate.sql
new file mode 100644
index 000000000..4dc54229d
--- /dev/null
+++ b/udfs/community/linear_interpolate.sql
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- linear_interpolate:
+-- Input:
+-- pos: the position on x axis for the independent variable
+-- prev: the x,y coordinate of the preceding value
+-- next: the x,y coordinate of the following value
+-- Output: the interpolated y value
+CREATE OR REPLACE FUNCTION fn.linear_interpolate(pos INT64, prev STRUCT<x INT64,y FLOAT64>, next STRUCT<x INT64,y FLOAT64>) 
+RETURNS FLOAT64 AS (
+  CASE
+    WHEN pos IS NULL OR prev IS NULL OR next IS NULL THEN NULL
+    ELSE
+      (next.y - prev.y) / (next.x - prev.x) * (pos - prev.x) + prev.y
+  END
+);
diff --git a/udfs/community/test_cases.yaml b/udfs/community/test_cases.yaml
index 067a27fcc..2f5e80d64 100644
--- a/udfs/community/test_cases.yaml
+++ b/udfs/community/test_cases.yaml
@@ -293,4 +293,40 @@ percentage_difference:
   - test:
     input: CAST(1.0 AS FLOAT64), CAST(1000000000 AS INT64)
     expected_output: CAST(2.0 AS FLOAT64)
- 
+linear_interpolate:
+  - test:
+    input: CAST(2 AS INT64), STRUCT(CAST(1 AS INT64) AS x, CAST(1.0 AS FLOAT64) AS y), STRUCT(CAST(3 AS INT64) AS x, CAST(3.0 AS FLOAT64) AS y)
+    expected_output: CAST(2.0 AS FLOAT64)
+  - test:
+    input: CAST(3 AS INT64), STRUCT(CAST(1 AS INT64) AS x, CAST(1.0 AS FLOAT64) AS y), STRUCT(CAST(4 AS INT64) AS x, CAST(4.0 AS FLOAT64) AS y)
+    expected_output: CAST(3.0 AS FLOAT64)
+ts_lin_interpolate:
+  - test:
+    input: CAST('2020-01-01 00:15:00' AS TIMESTAMP), STRUCT(CAST('2020-01-01 00:00:00' AS TIMESTAMP) AS x, CAST(1.0 AS FLOAT64)), STRUCT(CAST('2020-01-01 00:30:00' AS TIMESTAMP) AS x, CAST(3.0 AS FLOAT64))
+    expected_output: CAST(2.0 AS FLOAT64)
+  - test:
+    input: CAST('2020-01-01 00:15:00' AS TIMESTAMP), STRUCT(CAST('2020-01-01 00:00:00' AS TIMESTAMP) AS x, CAST(1.0 AS FLOAT64)), STRUCT(CAST('2020-01-01 02:30:00' AS TIMESTAMP) AS x, CAST(3.0 AS FLOAT64))
+    expected_output: CAST(1.2 AS FLOAT64)
+ts_tumble:
+  - test: 
+    input: CAST('2020-01-01 00:17:30' AS TIMESTAMP), CAST(900 AS INT64)
+    expected_output: CAST('2020-01-01 00:15:00' AS TIMESTAMP)
+  - test: 
+    input: CAST('2020-01-01 00:17:30' AS TIMESTAMP), CAST(600 AS INT64)
+    expected_output: CAST('2020-01-01 00:10:00' AS TIMESTAMP)
+  - test: 
+    input: CAST('2020-01-01 00:17:30' AS TIMESTAMP), CAST(300 AS INT64)
+    expected_output: CAST('2020-01-01 00:15:00' AS TIMESTAMP)
+  - test: 
+    input: CAST('2020-01-01 00:17:30' AS TIMESTAMP), CAST(60 AS INT64)
+    expected_output: CAST('2020-01-01 00:17:00' AS TIMESTAMP)
+  - test: 
+    input: CAST('2020-01-01 00:17:30' AS TIMESTAMP), CAST(0 AS INT64)
+    expected_output: (NULL)
+ts_gen_keyed_timestamps:
+  - test:
+    input: ARRAY<STRING>['abc'], CAST(60 AS INT64), CAST('2020-01-01 00:30:00' AS TIMESTAMP), CAST('2020-01-01 00:31:00' AS TIMESTAMP)
+    expected_output: ([STRUCT(CAST('abc' AS STRING) AS series_key, CAST('2020-01-01 00:30:00' AS TIMESTAMP) AS tumble_val), STRUCT(CAST('abc' AS STRING) AS series_key, CAST('2020-01-01 00:31:00' AS TIMESTAMP) AS tumble_val)])
+  - test:
+    input: ARRAY<STRING>['abc', 'def'], CAST(60 AS INT64), CAST('2020-01-01 00:30:00' AS TIMESTAMP), CAST('2020-01-01 00:30:30' AS TIMESTAMP)
+    expected_output: ([STRUCT(CAST('abc' AS STRING) AS series_key, CAST('2020-01-01 00:30:00' AS TIMESTAMP) AS tumble_val), STRUCT(CAST('def' AS STRING) AS series_key, CAST('2020-01-01 00:30:00' AS TIMESTAMP) AS tumble_val)])
diff --git a/udfs/community/ts_gen_keyed_timestamps.sql b/udfs/community/ts_gen_keyed_timestamps.sql
new file mode 100644
index 000000000..45e549521
--- /dev/null
+++ b/udfs/community/ts_gen_keyed_timestamps.sql
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+* Generate an array of key-timestamp structs with the specified min, max and interval timeseries
+* Example Usage:
+* SELECT *
+* FROM UNNEST(bqutil.fn.ts_gen_keyed_timestamp(['abc'], 900, '2020-01-01', '2020-01-02') a
+* LEFT JOIN dataset.table ON a.series_key = a.key AND a.tumble_val = b.timestamp
+*/
+
+-- ts_gen_keyed_timestamps:
+-- Input:
+-- keys: strings that are cross joined with the generated timestamps
+-- tumble_seconds: the windowing interval for each generated timestamp
+-- min_ts: the inclusive lower bound for the generated timestamps, normalized by the tumble_seconds
+-- max_ts: the inclusive upper bound for the generated timestamps, normalized by the tumble_seconds
+-- Output: An array of generated timestamps for each key - ARRAY<STRUCT<series_key STRING, tumble_seconds TIMESTAMP>>
+CREATE OR REPLACE FUNCTION fn.ts_gen_keyed_timestamps(keys ARRAY<STRING>, tumble_seconds INT64, min_ts TIMESTAMP, max_ts Timestamp)
+RETURNS ARRAY<STRUCT<series_key STRING, tumble_val TIMESTAMP>> AS ((
+  SELECT ARRAY_AGG(x)
+  FROM (
+    SELECT series_key, tumble_val
+    FROM UNNEST(
+      GENERATE_TIMESTAMP_ARRAY(
+        bqutil.fn.ts_tumble(min_ts, tumble_seconds),
+        bqutil.fn.ts_tumble(max_ts, tumble_seconds),
+        INTERVAL tumble_seconds SECOND
+      )
+    ) AS tumble_val
+    CROSS JOIN UNNEST(keys) AS series_key
+  ) x
+));
diff --git a/udfs/community/ts_linear_interpolate.sql b/udfs/community/ts_linear_interpolate.sql
new file mode 100644
index 000000000..9be441166
--- /dev/null
+++ b/udfs/community/ts_linear_interpolate.sql
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+* wrap fn.linear_interpolate to handle time series interpolation
+* 
+* Example usage: use value if exists, otherwise attempt linear interpolation, else fill with zero
+* 
+* WITH tbl AS (
+*   SELECT 'abc' key, CAST('2021-01-01' AS TIMESTAMP) ts, 1 value, STRUCT(CAST('2021-01-01' AS TIMESTAMP) AS x, 1 AS y) coord
+*   UNION ALL
+*   SELECT 'abc', CAST('2021-01-02' AS TIMESTAMP), null, null
+*   UNION ALL
+*   SELECT 'abc', CAST('2021-01-03' AS TIMESTAMP), 3, STRUCT(CAST('2021-01-03' AS TIMESTAMP) AS x, 3 AS y)
+*   UNION ALL
+*   SELECT 'abc', CAST('2021-01-04' AS TIMESTAMP), null, null
+* )
+* SELECT 
+*   *,
+*   COALESCE(coord.y,
+*     fn.ts_lin_interpolate(
+*       ts,
+*       LAST_VALUE(coord IGNORE NULLS)
+*         OVER (PARTITION BY key
+*           ORDER BY unix_seconds(ts) ASC
+*           RANGE BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING),
+*       FIRST_VALUE(coord IGNORE NULLS)
+*         OVER (PARTITION BY key
+*           ORDER BY unix_seconds(ts) ASC
+*           RANGE BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
+*       ),
+*       0
+*   ) AS intrp
+* FROM tbl
+*/
+
+-- ts_linear_interpolate:
+-- Input:
+-- pos: the independent variable of a linear interpolation, represented as a TIMESTAMP
+-- prev: the x,y coordinate of the preceding value, where the x-coordinate is a TIMESTAMP
+-- next: the x,y coordinate of the following value, where the x-coordinate is a TIMESTAMP
+-- Output: the interpolated y value
+CREATE OR REPLACE FUNCTION fn.ts_linear_interpolate(pos TIMESTAMP, prev STRUCT<x TIMESTAMP, y FLOAT64>, next STRUCT<x TIMESTAMP, y FLOAT64>)
+RETURNS FLOAT64 AS (
+  CASE
+    WHEN pos IS NULL OR prev IS NULL OR next IS NULL THEN NULL
+    ELSE
+      bqutil.fn.linear_interpolate(
+          UNIX_SECONDS(pos),
+          STRUCT(UNIX_SECONDS(prev.x) AS x, prev.y AS y),
+          STRUCT(UNIX_SECONDS(next.x) AS x, next.y AS y)
+      )
+  END
+);
diff --git a/udfs/community/ts_tumble.sql b/udfs/community/ts_tumble.sql
new file mode 100644
index 000000000..263002202
--- /dev/null
+++ b/udfs/community/ts_tumble.sql
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- ts_tumble:
+-- Input:
+-- input_ts: timestamp to be divided into a [tumble window](https://cloud.google.com/dataflow/docs/reference/sql/streaming-extensions#tumble)
+-- tumble_seconds: size of the tumble window in seconds
+-- Output: the starting TIMESTAMP of the tumble winow the input_ts belongs to
+CREATE OR REPLACE FUNCTION fn.ts_tumble(input_ts TIMESTAMP, tumble_seconds INT64)
+RETURNS TIMESTAMP
+AS (
+  IF (
+    tumble_seconds > 0, 
+    TIMESTAMP_SECONDS(DIV(UNIX_SECONDS(input_ts), tumble_seconds) * tumble_seconds), 
+    NULL
+  )
+);

From cfe8f8ce90bfa47151f60808a7a231af8c639180 Mon Sep 17 00:00:00 2001
From: Daniel De Leo <danieldeleo@users.noreply.github.com>
Date: Tue, 1 Dec 2020 18:07:50 -0500
Subject: [PATCH 09/90] Adding helper assets for JMeter performance testing on
 BigQuery (#203)

* Adding helper assets for JMeter performance testing on BigQuery

* Adding trailing new lines and removing commented code

Co-authored-by: Ryan McDowell <me@ryanmcdowell.io>
---
 performance_testing/jmeter/README.md          | 128 +++++++
 .../jmeter/bigquery_http_sampler.jmx          | 338 ++++++++++++++++++
 .../jmeter/bigquery_jdbc_sampler.jmx          | 269 ++++++++++++++
 .../jmeter/cancel_running_jobs.py             |  38 ++
 .../jmeter/http_sampler_results.sql           |  32 ++
 .../jmeter/jdbc_sampler_results.sql           |  27 ++
 .../jmeter/run_jmeter_http_sampler.sh         |  44 +++
 .../jmeter/run_jmeter_jdbc_sampler.sh         |  44 +++
 .../jmeter/test_queries/complex_selects.csv   | 101 ++++++
 .../jmeter/test_queries/medium_selects.csv    | 101 ++++++
 .../jmeter/test_queries/simple_selects.csv    | 101 ++++++
 11 files changed, 1223 insertions(+)
 create mode 100644 performance_testing/jmeter/README.md
 create mode 100644 performance_testing/jmeter/bigquery_http_sampler.jmx
 create mode 100644 performance_testing/jmeter/bigquery_jdbc_sampler.jmx
 create mode 100644 performance_testing/jmeter/cancel_running_jobs.py
 create mode 100644 performance_testing/jmeter/http_sampler_results.sql
 create mode 100644 performance_testing/jmeter/jdbc_sampler_results.sql
 create mode 100755 performance_testing/jmeter/run_jmeter_http_sampler.sh
 create mode 100755 performance_testing/jmeter/run_jmeter_jdbc_sampler.sh
 create mode 100644 performance_testing/jmeter/test_queries/complex_selects.csv
 create mode 100644 performance_testing/jmeter/test_queries/medium_selects.csv
 create mode 100644 performance_testing/jmeter/test_queries/simple_selects.csv

diff --git a/performance_testing/jmeter/README.md b/performance_testing/jmeter/README.md
new file mode 100644
index 000000000..a7f855fdf
--- /dev/null
+++ b/performance_testing/jmeter/README.md
@@ -0,0 +1,128 @@
+# Using JMeter for BigQuery Performance Testing
+
+## Before You Start
+
+Make sure you've completed the following prerequisite steps before running the
+provided JMeter test plans
+
+*   Install
+    [Java 8+ Oracle JDK](https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html)
+    from Oracle page
+*   Download the
+    [Simba BigQuery JDBC Driver](https://cloud.google.com/bigquery/providers/simba-drivers)
+*   Download the latest
+    [JMeter Binary](https://jmeter.apache.org/download_jmeter.cgi)
+
+## Which JMeter Test Plan Do I Use?
+
+### [bigquery_jdbc_sampler.jmx](bigquery_jdbc_sampler.jmx) (Runs queries using JDBC driver)
+
+#### Pros
+
+*   **Long-running job polling** - The JDBC request sampler is necessary for
+    tests where queries run longer than 4 minutes and where a consistent
+    concurrency level must be maintained. The JDBC driver will poll the query
+    job until it is finished before submitting a new query, ensuring that JMeter
+    active threads exactly match active BigQuery query jobs.
+*   **Simpler query format** - The JDBC request sampler does not require you to
+    form a JSON configuration object to submit the query to the API. This
+    eliminates JSON errors as a source of problems.
+    *   Unescaped double quotes are allowed in SQL queries - You do not have to
+        escape double quotes in your SQL queries as is required in the HTTP
+        sampler.
+
+#### Cons
+
+*   **JDBC overhead latency** - The JDBC driver has some overhead latency
+    associated with it versus directly calling the REST API. Use the
+    BigQuery-provided
+    [INFORMATION_SCHEMA.JOBS_BY*](https://cloud.google.com/bigquery/docs/information-schema-jobs)
+    view to exclusively measure query runtime without any other latencies like
+    network.
+*   **BigQuery job labels unsupported** - You cannot currently set labels for
+    jobs submitted by the JDBC driver. In order to get a similar effect to
+    labeling, you'll need to include something like a JSON object in a comment
+    in each query, that can be parsed when querying the
+    [INFORMATION_SCHEMA.JOBS_BY*](https://cloud.google.com/bigquery/docs/information-schema-jobs)
+    view.
+*   **Response rows must be returned** - The JDBC driver does not support an
+    option to return 0 results. The MaxResults JDBC config should therefore be
+    set to 1, since the default setting of 0 instructs the JDBC driver to return
+    all rows.
+
+### [bigquery_http_sampler.jmx](bigquery_http_sampler.jmx) (Runs queries using REST API)
+
+#### Pros
+
+*   **Fully configurable job options, including job labels** - The HTTP request
+    sampler allows you to specify the raw JSON request body which can include
+    any supported BigQuery options. In particular, it's very useful to include
+    query labels, since these will be present in the
+    [jobs metadata schema](https://cloud.google.com/bigquery/docs/information-schema-jobs#schema)
+    in the labels field.
+*   **Faster Performance** - Since JMeter is making REST calls directly to the
+    BigQuery API, the performance is faster than having to invoke BigQuery API
+    via the Java JDBC driver.
+
+#### Cons
+
+*   **Default 1 hour maximum lifetime for access tokens** - The HTTP request
+    sampler uses an access token (which you provide as a command-line parameter
+    at startup) to authenticate with BigQuery. The default maximum lifetime of a
+    Google access token is 1 hour (3,600 seconds). However, you can extend the
+    maximum lifetime to 12 hours by
+    [modifying the organization policy](https://cloud.google.com/resource-manager/docs/organization-policy/restricting-service-accounts#extend_oauth_ttl).
+    JMeter calls to BigQuery APIs will start failing if your JMeter test runs
+    longer than your access token’s maximum lifetime.
+*   **JSON body configuration** - You need to configure the API request payload
+    using JSON, and the JSON object configuration is easy to break. A stray
+    quote or a missing comma can make your query fail in ways that are hard to
+    troubleshoot.
+    *   **Queries must have all double quotes escaped** - Since the SQL queries
+        you pass to JMeter are values inside the HTTP request JSON body, you
+        must escape all double quotes that appear in the SQL query with a
+        backslash. ( e.g. SELECT \”Hello World\” )
+*   **4min Max Timeout** - If a query runs for longer than 4 minutes, it can
+    appear to be done. If you intend to use JMeter's data to characterize the
+    runtime of your queries, this is a critical consideration. The results will
+    be wrong if you have queries that are long-running.
+
+## Running the JMeter Test Plan
+
+The JMeter test plans provided in this repo are designed to be run with very few
+modifications. You should first test-run them this way before adding in more
+changes to simplify troubleshooting if any issues are encountered.
+
+### [run_jmeter_jdbc_sampler.sh](run_jmeter_jdbc_sampler.sh) (**Runs bigquery_jdbc_sampler.jmx**)
+
+1.  Replace the bash script placeholders with your own values, depending on
+    whether you use JDBC or HTTP as shown below:
+    *   `-Jproject_id=`*YOUR_PROJECT*
+    *   `-Juser.classpath=`*/path/to/your/SimbaJDBCDriverforGoogleBigQuery*
+1.  Ensure proper authentication is set up for either service account or user
+    account authentication:
+    *   Service account authentication: \
+        `export GOOGLE_APPLICATION_CREDENTIALS=`*/path/to/your/private_key.json*
+    *   User account authentication: \
+        `gcloud auth application-default login`
+1.  Run the bash helper script to begin the JMeter test
+    *   `bash run_jmeter_jdbc_sampler.sh`
+
+### [run_jmeter_http_sampler.sh](run_jmeter_http_sampler.sh) (**Runs bigquery_http_sampler.jmx**)
+
+1.  Replace the bash script placeholders shown below with your own values:
+    *   `-Jproject_id=`*YOUR_PROJECT*
+1.  Ensure proper authentication is set up
+    *   Service account authentication: \
+        `gcloud auth activate-service-account
+        --key-file=`*/path/to/your/private_key.json*
+    *   User account authentication: \
+        `gcloud auth login`
+1.  Run the bash helper script to begin the JMeter test
+    *   `bash run_jmeter_http_sampler.sh`
+
+## Inspecting the JMeter Test Plans
+
+The best method of viewing and understand the JMeter test plans is to open then in JMeter's GUI mode as shown below:
+*   `./apache-jmeter-5.3/bin/jmeter -t bigquery_jdbc_sampler.jmx`
+*   `./apache-jmeter-5.3/bin/jmeter -t bigquery_http_sampler.jmx`
diff --git a/performance_testing/jmeter/bigquery_http_sampler.jmx b/performance_testing/jmeter/bigquery_http_sampler.jmx
new file mode 100644
index 000000000..31bfca048
--- /dev/null
+++ b/performance_testing/jmeter/bigquery_http_sampler.jmx
@@ -0,0 +1,338 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
+  <hashTree>
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="BigQuery Concurrency Test" enabled="true">
+      <stringProp name="TestPlan.comments"></stringProp>
+      <boolProp name="TestPlan.functional_mode">false</boolProp>
+      <boolProp name="TestPlan.serialize_threadgroups">false</boolProp>
+      <elementProp name="TestPlan.user_defined_variables" elementType="Arguments" guiclass="ArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
+        <collectionProp name="Arguments.arguments"/>
+      </elementProp>
+      <stringProp name="TestPlan.user_define_classpath"></stringProp>
+    </TestPlan>
+    <hashTree>
+      <HeaderManager guiclass="HeaderPanel" testclass="HeaderManager" testname="HTTP Header Manager" enabled="true">
+        <collectionProp name="HeaderManager.headers">
+          <elementProp name="" elementType="Header">
+            <stringProp name="Header.name">Authorization</stringProp>
+            <stringProp name="Header.value">Bearer ${__P(token)}</stringProp>
+          </elementProp>
+          <elementProp name="" elementType="Header">
+            <stringProp name="Header.name">Content-Type</stringProp>
+            <stringProp name="Header.value">application/json</stringProp>
+          </elementProp>
+        </collectionProp>
+      </HeaderManager>
+      <hashTree/>
+      <ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Simple Thread Group - API" enabled="true">
+        <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
+        <elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
+          <boolProp name="LoopController.continue_forever">false</boolProp>
+          <stringProp name="LoopController.loops">${__P(num_loops,0)}</stringProp>
+        </elementProp>
+        <stringProp name="ThreadGroup.num_threads">${__P(simple_num_users,1)}</stringProp>
+        <stringProp name="ThreadGroup.ramp_time">${__P(ramp_time)}</stringProp>
+        <longProp name="ThreadGroup.start_time">1504864705000</longProp>
+        <longProp name="ThreadGroup.end_time">1504864705000</longProp>
+        <boolProp name="ThreadGroup.scheduler">true</boolProp>
+        <stringProp name="ThreadGroup.duration">${__P(thread_duration,3600)}</stringProp>
+        <stringProp name="ThreadGroup.delay"></stringProp>
+        <boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
+      </ThreadGroup>
+      <hashTree>
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="HTTP Request-Simple Query Test" enabled="true">
+          <boolProp name="HTTPSampler.postBodyRaw">true</boolProp>
+          <elementProp name="HTTPsampler.Arguments" elementType="Arguments">
+            <collectionProp name="Arguments.arguments">
+              <elementProp name="" elementType="HTTPArgument">
+                <boolProp name="HTTPArgument.always_encode">false</boolProp>
+                <stringProp name="Argument.value">{&#xd;
+  &quot;kind&quot;: &quot;bigquery#QueryRequest&quot;,&#xd;
+  &quot;useQueryCache&quot;: false,&#xd;
+  &quot;useLegacySql&quot;: false,&#xd;
+  &quot;timeoutMs&quot;:21600000,&#xd;
+  &quot;query&quot;: &quot;${simple_query}&quot;,&#xd;
+  &quot;labels&quot;: {&quot;jmeter_id&quot;: &quot;${simple_id}&quot;, &quot;run_id&quot;: &quot;${__P(run_id)}&quot;},&#xd;
+  &quot;maxResults&quot;: 1&#xd;
+}&#xd;
+&#xd;
+</stringProp>
+                <stringProp name="Argument.metadata">=</stringProp>
+              </elementProp>
+            </collectionProp>
+          </elementProp>
+          <stringProp name="HTTPSampler.domain">bigquery.googleapis.com</stringProp>
+          <stringProp name="HTTPSampler.port"></stringProp>
+          <stringProp name="HTTPSampler.protocol">https</stringProp>
+          <stringProp name="HTTPSampler.contentEncoding"></stringProp>
+          <stringProp name="HTTPSampler.path">/bigquery/v2/projects/${__P(project_id)}/queries</stringProp>
+          <stringProp name="HTTPSampler.method">POST</stringProp>
+          <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
+          <boolProp name="HTTPSampler.auto_redirects">false</boolProp>
+          <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
+          <boolProp name="HTTPSampler.DO_MULTIPART_POST">false</boolProp>
+          <stringProp name="HTTPSampler.embedded_url_re"></stringProp>
+          <stringProp name="HTTPSampler.connect_timeout"></stringProp>
+          <stringProp name="HTTPSampler.response_timeout">21600000</stringProp>
+        </HTTPSamplerProxy>
+        <hashTree>
+          <ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report" enabled="true">
+            <boolProp name="ResultCollector.error_logging">true</boolProp>
+            <objProp>
+              <name>saveConfig</name>
+              <value class="SampleSaveConfiguration">
+                <time>true</time>
+                <latency>true</latency>
+                <timestamp>true</timestamp>
+                <success>true</success>
+                <label>true</label>
+                <code>true</code>
+                <message>true</message>
+                <threadName>true</threadName>
+                <dataType>true</dataType>
+                <encoding>false</encoding>
+                <assertions>true</assertions>
+                <subresults>true</subresults>
+                <responseData>false</responseData>
+                <samplerData>false</samplerData>
+                <xml>false</xml>
+                <fieldNames>true</fieldNames>
+                <responseHeaders>false</responseHeaders>
+                <requestHeaders>false</requestHeaders>
+                <responseDataOnError>false</responseDataOnError>
+                <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
+                <assertionsResultsToSave>0</assertionsResultsToSave>
+                <bytes>true</bytes>
+                <sentBytes>true</sentBytes>
+                <url>true</url>
+                <threadCounts>true</threadCounts>
+                <idleTime>true</idleTime>
+                <connectTime>true</connectTime>
+              </value>
+            </objProp>
+            <stringProp name="filename">${__P(error_csv_path)}</stringProp>
+          </ResultCollector>
+          <hashTree/>
+          <CSVDataSet guiclass="TestBeanGUI" testclass="CSVDataSet" testname="CSV Data Set Config" enabled="true">
+            <stringProp name="delimiter">\t</stringProp>
+            <stringProp name="fileEncoding"></stringProp>
+            <stringProp name="filename">${__P(simple_csv_path)}</stringProp>
+            <boolProp name="ignoreFirstLine">false</boolProp>
+            <boolProp name="quotedData">false</boolProp>
+            <boolProp name="recycle">true</boolProp>
+            <stringProp name="shareMode">shareMode.all</stringProp>
+            <boolProp name="stopThread">false</boolProp>
+            <stringProp name="variableNames"></stringProp>
+          </CSVDataSet>
+          <hashTree/>
+        </hashTree>
+      </hashTree>
+      <ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Medium Thread Group - API" enabled="true">
+        <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
+        <elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
+          <boolProp name="LoopController.continue_forever">false</boolProp>
+          <stringProp name="LoopController.loops">${__P(num_loops,0)}</stringProp>
+        </elementProp>
+        <stringProp name="ThreadGroup.num_threads">${__P(medium_num_users,1)}</stringProp>
+        <stringProp name="ThreadGroup.ramp_time">${__P(ramp_time)}</stringProp>
+        <longProp name="ThreadGroup.start_time">1504864705000</longProp>
+        <longProp name="ThreadGroup.end_time">1504864705000</longProp>
+        <boolProp name="ThreadGroup.scheduler">true</boolProp>
+        <stringProp name="ThreadGroup.duration">${__P(thread_duration,3600)}</stringProp>
+        <stringProp name="ThreadGroup.delay"></stringProp>
+        <boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
+      </ThreadGroup>
+      <hashTree>
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="HTTP Request-Medium Query Test" enabled="true">
+          <boolProp name="HTTPSampler.postBodyRaw">true</boolProp>
+          <elementProp name="HTTPsampler.Arguments" elementType="Arguments">
+            <collectionProp name="Arguments.arguments">
+              <elementProp name="" elementType="HTTPArgument">
+                <boolProp name="HTTPArgument.always_encode">false</boolProp>
+                <stringProp name="Argument.value">{&#xd;
+  &quot;kind&quot;: &quot;bigquery#QueryRequest&quot;,&#xd;
+  &quot;useQueryCache&quot;: false,&#xd;
+  &quot;useLegacySql&quot;: false,&#xd;
+  &quot;timeoutMs&quot;:21600000,&#xd;
+  &quot;query&quot;: &quot;${medium_query}&quot;,&#xd;
+  &quot;labels&quot;: {&quot;jmeter_id&quot;: &quot;${medium_id}&quot;, &quot;run_id&quot;: &quot;${__P(run_id)}&quot;},&#xd;
+  &quot;maxResults&quot;: 1&#xd;
+}&#xd;
+&#xd;
+</stringProp>
+                <stringProp name="Argument.metadata">=</stringProp>
+              </elementProp>
+            </collectionProp>
+          </elementProp>
+          <stringProp name="HTTPSampler.domain">bigquery.googleapis.com</stringProp>
+          <stringProp name="HTTPSampler.port"></stringProp>
+          <stringProp name="HTTPSampler.protocol">https</stringProp>
+          <stringProp name="HTTPSampler.contentEncoding"></stringProp>
+          <stringProp name="HTTPSampler.path">/bigquery/v2/projects/${__P(project_id)}/queries</stringProp>
+          <stringProp name="HTTPSampler.method">POST</stringProp>
+          <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
+          <boolProp name="HTTPSampler.auto_redirects">false</boolProp>
+          <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
+          <boolProp name="HTTPSampler.DO_MULTIPART_POST">false</boolProp>
+          <stringProp name="HTTPSampler.embedded_url_re"></stringProp>
+          <stringProp name="HTTPSampler.connect_timeout"></stringProp>
+          <stringProp name="HTTPSampler.response_timeout">21600000</stringProp>
+        </HTTPSamplerProxy>
+        <hashTree>
+          <ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report" enabled="true">
+            <boolProp name="ResultCollector.error_logging">true</boolProp>
+            <objProp>
+              <name>saveConfig</name>
+              <value class="SampleSaveConfiguration">
+                <time>true</time>
+                <latency>true</latency>
+                <timestamp>true</timestamp>
+                <success>true</success>
+                <label>true</label>
+                <code>true</code>
+                <message>true</message>
+                <threadName>true</threadName>
+                <dataType>true</dataType>
+                <encoding>false</encoding>
+                <assertions>true</assertions>
+                <subresults>true</subresults>
+                <responseData>false</responseData>
+                <samplerData>false</samplerData>
+                <xml>false</xml>
+                <fieldNames>true</fieldNames>
+                <responseHeaders>false</responseHeaders>
+                <requestHeaders>false</requestHeaders>
+                <responseDataOnError>false</responseDataOnError>
+                <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
+                <assertionsResultsToSave>0</assertionsResultsToSave>
+                <bytes>true</bytes>
+                <sentBytes>true</sentBytes>
+                <url>true</url>
+                <threadCounts>true</threadCounts>
+                <idleTime>true</idleTime>
+                <connectTime>true</connectTime>
+              </value>
+            </objProp>
+            <stringProp name="filename">${__P(error_csv_path)}</stringProp>
+          </ResultCollector>
+          <hashTree/>
+          <CSVDataSet guiclass="TestBeanGUI" testclass="CSVDataSet" testname="CSV Data Set Config" enabled="true">
+            <stringProp name="delimiter">\t</stringProp>
+            <stringProp name="fileEncoding"></stringProp>
+            <stringProp name="filename">${__P(medium_csv_path)}</stringProp>
+            <boolProp name="ignoreFirstLine">false</boolProp>
+            <boolProp name="quotedData">false</boolProp>
+            <boolProp name="recycle">true</boolProp>
+            <stringProp name="shareMode">shareMode.all</stringProp>
+            <boolProp name="stopThread">false</boolProp>
+            <stringProp name="variableNames"></stringProp>
+          </CSVDataSet>
+          <hashTree/>
+        </hashTree>
+      </hashTree>
+      <ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Complex Thread Group - API" enabled="true">
+        <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
+        <elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
+          <boolProp name="LoopController.continue_forever">false</boolProp>
+          <stringProp name="LoopController.loops">${__P(num_loops,0)}</stringProp>
+        </elementProp>
+        <stringProp name="ThreadGroup.num_threads">${__P(complex_num_users,1)}</stringProp>
+        <stringProp name="ThreadGroup.ramp_time">${__P(ramp_time)}</stringProp>
+        <longProp name="ThreadGroup.start_time">1504864705000</longProp>
+        <longProp name="ThreadGroup.end_time">1504864705000</longProp>
+        <boolProp name="ThreadGroup.scheduler">true</boolProp>
+        <stringProp name="ThreadGroup.duration">${__P(thread_duration,3600)}</stringProp>
+        <stringProp name="ThreadGroup.delay"></stringProp>
+        <boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
+      </ThreadGroup>
+      <hashTree>
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="HTTP Request-Complex Query Test" enabled="true">
+          <boolProp name="HTTPSampler.postBodyRaw">true</boolProp>
+          <elementProp name="HTTPsampler.Arguments" elementType="Arguments">
+            <collectionProp name="Arguments.arguments">
+              <elementProp name="" elementType="HTTPArgument">
+                <boolProp name="HTTPArgument.always_encode">false</boolProp>
+                <stringProp name="Argument.value">{&#xd;
+  &quot;kind&quot;: &quot;bigquery#QueryRequest&quot;,&#xd;
+  &quot;useQueryCache&quot;: false,&#xd;
+  &quot;useLegacySql&quot;: false,&#xd;
+  &quot;timeoutMs&quot;:21600000,&#xd;
+  &quot;query&quot;: &quot;${complex_query}&quot;,&#xd;
+  &quot;labels&quot;: {&quot;jmeter_id&quot;: &quot;${complex_id}&quot;, &quot;run_id&quot;: &quot;${__P(run_id)}&quot;},&#xd;
+  &quot;maxResults&quot;: 1&#xd;
+}&#xd;
+&#xd;
+</stringProp>
+                <stringProp name="Argument.metadata">=</stringProp>
+              </elementProp>
+            </collectionProp>
+          </elementProp>
+          <stringProp name="HTTPSampler.domain">bigquery.googleapis.com</stringProp>
+          <stringProp name="HTTPSampler.port"></stringProp>
+          <stringProp name="HTTPSampler.protocol">https</stringProp>
+          <stringProp name="HTTPSampler.contentEncoding"></stringProp>
+          <stringProp name="HTTPSampler.path">/bigquery/v2/projects/${__P(project_id)}/queries</stringProp>
+          <stringProp name="HTTPSampler.method">POST</stringProp>
+          <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
+          <boolProp name="HTTPSampler.auto_redirects">false</boolProp>
+          <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
+          <boolProp name="HTTPSampler.DO_MULTIPART_POST">false</boolProp>
+          <stringProp name="HTTPSampler.embedded_url_re"></stringProp>
+          <stringProp name="HTTPSampler.connect_timeout"></stringProp>
+          <stringProp name="HTTPSampler.response_timeout">21600000</stringProp>
+        </HTTPSamplerProxy>
+        <hashTree>
+          <ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report" enabled="true">
+            <boolProp name="ResultCollector.error_logging">true</boolProp>
+            <objProp>
+              <name>saveConfig</name>
+              <value class="SampleSaveConfiguration">
+                <time>true</time>
+                <latency>true</latency>
+                <timestamp>true</timestamp>
+                <success>true</success>
+                <label>true</label>
+                <code>true</code>
+                <message>true</message>
+                <threadName>true</threadName>
+                <dataType>true</dataType>
+                <encoding>false</encoding>
+                <assertions>true</assertions>
+                <subresults>true</subresults>
+                <responseData>false</responseData>
+                <samplerData>false</samplerData>
+                <xml>false</xml>
+                <fieldNames>true</fieldNames>
+                <responseHeaders>false</responseHeaders>
+                <requestHeaders>false</requestHeaders>
+                <responseDataOnError>false</responseDataOnError>
+                <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
+                <assertionsResultsToSave>0</assertionsResultsToSave>
+                <bytes>true</bytes>
+                <sentBytes>true</sentBytes>
+                <url>true</url>
+                <threadCounts>true</threadCounts>
+                <idleTime>true</idleTime>
+                <connectTime>true</connectTime>
+              </value>
+            </objProp>
+            <stringProp name="filename">${__P(error_csv_path)}</stringProp>
+          </ResultCollector>
+          <hashTree/>
+          <CSVDataSet guiclass="TestBeanGUI" testclass="CSVDataSet" testname="CSV Data Set Config" enabled="true">
+            <stringProp name="delimiter">\t</stringProp>
+            <stringProp name="fileEncoding"></stringProp>
+            <stringProp name="filename">${__P(complex_csv_path)}</stringProp>
+            <boolProp name="ignoreFirstLine">false</boolProp>
+            <boolProp name="quotedData">false</boolProp>
+            <boolProp name="recycle">true</boolProp>
+            <stringProp name="shareMode">shareMode.all</stringProp>
+            <boolProp name="stopThread">false</boolProp>
+            <stringProp name="variableNames"></stringProp>
+          </CSVDataSet>
+          <hashTree/>
+        </hashTree>
+      </hashTree>
+    </hashTree>
+  </hashTree>
+</jmeterTestPlan>
diff --git a/performance_testing/jmeter/bigquery_jdbc_sampler.jmx b/performance_testing/jmeter/bigquery_jdbc_sampler.jmx
new file mode 100644
index 000000000..0b0e9887b
--- /dev/null
+++ b/performance_testing/jmeter/bigquery_jdbc_sampler.jmx
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
+  <hashTree>
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="BigQuery Concurrency Test" enabled="true">
+      <stringProp name="TestPlan.comments"></stringProp>
+      <boolProp name="TestPlan.functional_mode">false</boolProp>
+      <boolProp name="TestPlan.serialize_threadgroups">false</boolProp>
+      <elementProp name="TestPlan.user_defined_variables" elementType="Arguments" guiclass="ArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
+        <collectionProp name="Arguments.arguments"/>
+      </elementProp>
+      <stringProp name="TestPlan.user_define_classpath"></stringProp>
+    </TestPlan>
+    <hashTree>
+      <JDBCDataSource guiclass="TestBeanGUI" testclass="JDBCDataSource" testname="JDBC Connection Configuration" enabled="true">
+        <boolProp name="autocommit">true</boolProp>
+        <stringProp name="checkQuery"></stringProp>
+        <stringProp name="connectionAge">5000</stringProp>
+        <stringProp name="connectionProperties"></stringProp>
+        <stringProp name="dataSource">bq_pool</stringProp>
+        <stringProp name="dbUrl">jdbc:bigquery://https://www.googleapis.com/bigquery/v2:443;OAuthType=3;ProjectId=${__P(project_id)};Timeout=3600;useQueryCache=0;MaxResults=1;</stringProp>
+        <stringProp name="driver">com.simba.googlebigquery.jdbc42.Driver</stringProp>
+        <stringProp name="initQuery"></stringProp>
+        <boolProp name="keepAlive">true</boolProp>
+        <stringProp name="password"></stringProp>
+        <stringProp name="poolMax">0</stringProp>
+        <boolProp name="preinit">false</boolProp>
+        <stringProp name="timeout">10000</stringProp>
+        <stringProp name="transactionIsolation">DEFAULT</stringProp>
+        <stringProp name="trimInterval">60000</stringProp>
+        <stringProp name="username"></stringProp>
+      </JDBCDataSource>
+      <hashTree/>
+      <ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Simple Thread Group - JDBC" enabled="true">
+        <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
+        <elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
+          <boolProp name="LoopController.continue_forever">false</boolProp>
+          <stringProp name="LoopController.loops">${__P(num_loops,0)}</stringProp>
+        </elementProp>
+        <stringProp name="ThreadGroup.num_threads">${__P(simple_num_users,1)}</stringProp>
+        <stringProp name="ThreadGroup.ramp_time">${__P(ramp_time)}</stringProp>
+        <boolProp name="ThreadGroup.scheduler">true</boolProp>
+        <stringProp name="ThreadGroup.duration">${__P(thread_duration,3600)}</stringProp>
+        <stringProp name="ThreadGroup.delay"></stringProp>
+        <boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
+      </ThreadGroup>
+      <hashTree>
+        <JDBCSampler guiclass="TestBeanGUI" testclass="JDBCSampler" testname="JDBC Request-Simple Query Test" enabled="true">
+          <stringProp name="dataSource">bq_pool</stringProp>
+          <stringProp name="query">/*${__P(run_id)},${simple_id}*/  ${simple_query}</stringProp>
+          <stringProp name="queryArguments"></stringProp>
+          <stringProp name="queryArgumentsTypes"></stringProp>
+          <stringProp name="queryTimeout">-1</stringProp>
+          <stringProp name="queryType">Select Statement</stringProp>
+          <stringProp name="resultSetHandler">Store as String</stringProp>
+          <stringProp name="resultSetMaxRows">0</stringProp>
+          <stringProp name="resultVariable"></stringProp>
+          <stringProp name="variableNames"></stringProp>
+        </JDBCSampler>
+        <hashTree>
+          <CSVDataSet guiclass="TestBeanGUI" testclass="CSVDataSet" testname="CSV Data Set Config" enabled="true">
+            <stringProp name="filename">${__P(simple_csv_path)}</stringProp>
+            <stringProp name="fileEncoding"></stringProp>
+            <stringProp name="variableNames"></stringProp>
+            <boolProp name="ignoreFirstLine">false</boolProp>
+            <stringProp name="delimiter">\t</stringProp>
+            <boolProp name="quotedData">false</boolProp>
+            <boolProp name="recycle">true</boolProp>
+            <boolProp name="stopThread">false</boolProp>
+            <stringProp name="shareMode">shareMode.all</stringProp>
+          </CSVDataSet>
+          <hashTree/>
+          <ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report" enabled="true">
+            <boolProp name="ResultCollector.error_logging">true</boolProp>
+            <objProp>
+              <name>saveConfig</name>
+              <value class="SampleSaveConfiguration">
+                <time>true</time>
+                <latency>true</latency>
+                <timestamp>true</timestamp>
+                <success>true</success>
+                <label>true</label>
+                <code>true</code>
+                <message>true</message>
+                <threadName>true</threadName>
+                <dataType>true</dataType>
+                <encoding>false</encoding>
+                <assertions>true</assertions>
+                <subresults>true</subresults>
+                <responseData>false</responseData>
+                <samplerData>false</samplerData>
+                <xml>false</xml>
+                <fieldNames>true</fieldNames>
+                <responseHeaders>false</responseHeaders>
+                <requestHeaders>false</requestHeaders>
+                <responseDataOnError>false</responseDataOnError>
+                <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
+                <assertionsResultsToSave>0</assertionsResultsToSave>
+                <bytes>true</bytes>
+                <sentBytes>true</sentBytes>
+                <url>true</url>
+                <threadCounts>true</threadCounts>
+                <idleTime>true</idleTime>
+                <connectTime>true</connectTime>
+              </value>
+            </objProp>
+            <stringProp name="filename">${__P(error_csv_path)}</stringProp>
+          </ResultCollector>
+          <hashTree/>
+        </hashTree>
+      </hashTree>
+      <ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Medium Thread Group - JDBC" enabled="true">
+        <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
+        <elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
+          <boolProp name="LoopController.continue_forever">false</boolProp>
+          <stringProp name="LoopController.loops">${__P(num_loops,0)}</stringProp>
+        </elementProp>
+        <stringProp name="ThreadGroup.num_threads">${__P(medium_num_users,1)}</stringProp>
+        <stringProp name="ThreadGroup.ramp_time">${__P(ramp_time)}</stringProp>
+        <boolProp name="ThreadGroup.scheduler">true</boolProp>
+        <stringProp name="ThreadGroup.duration">${__P(thread_duration,3600)}</stringProp>
+        <stringProp name="ThreadGroup.delay"></stringProp>
+        <boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
+      </ThreadGroup>
+      <hashTree>
+        <JDBCSampler guiclass="TestBeanGUI" testclass="JDBCSampler" testname="JDBC Request-medium Query Test" enabled="true">
+          <stringProp name="dataSource">bq_pool</stringProp>
+          <stringProp name="query">/*${__P(run_id)},${medium_id}*/ ${medium_query}</stringProp>
+          <stringProp name="queryArguments"></stringProp>
+          <stringProp name="queryArgumentsTypes"></stringProp>
+          <stringProp name="queryTimeout">-1</stringProp>
+          <stringProp name="queryType">Select Statement</stringProp>
+          <stringProp name="resultSetHandler">Store as String</stringProp>
+          <stringProp name="resultSetMaxRows">0</stringProp>
+          <stringProp name="resultVariable"></stringProp>
+          <stringProp name="variableNames"></stringProp>
+        </JDBCSampler>
+        <hashTree>
+          <CSVDataSet guiclass="TestBeanGUI" testclass="CSVDataSet" testname="CSV Data Set Config" enabled="true">
+            <stringProp name="filename">${__P(medium_csv_path)}</stringProp>
+            <stringProp name="fileEncoding"></stringProp>
+            <stringProp name="variableNames"></stringProp>
+            <boolProp name="ignoreFirstLine">false</boolProp>
+            <stringProp name="delimiter">\t</stringProp>
+            <boolProp name="quotedData">false</boolProp>
+            <boolProp name="recycle">true</boolProp>
+            <boolProp name="stopThread">false</boolProp>
+            <stringProp name="shareMode">shareMode.group</stringProp>
+          </CSVDataSet>
+          <hashTree/>
+          <ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report" enabled="true">
+            <boolProp name="ResultCollector.error_logging">true</boolProp>
+            <objProp>
+              <name>saveConfig</name>
+              <value class="SampleSaveConfiguration">
+                <time>true</time>
+                <latency>true</latency>
+                <timestamp>true</timestamp>
+                <success>true</success>
+                <label>true</label>
+                <code>true</code>
+                <message>true</message>
+                <threadName>true</threadName>
+                <dataType>true</dataType>
+                <encoding>false</encoding>
+                <assertions>true</assertions>
+                <subresults>true</subresults>
+                <responseData>false</responseData>
+                <samplerData>false</samplerData>
+                <xml>false</xml>
+                <fieldNames>true</fieldNames>
+                <responseHeaders>false</responseHeaders>
+                <requestHeaders>false</requestHeaders>
+                <responseDataOnError>false</responseDataOnError>
+                <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
+                <assertionsResultsToSave>0</assertionsResultsToSave>
+                <bytes>true</bytes>
+                <sentBytes>true</sentBytes>
+                <url>true</url>
+                <threadCounts>true</threadCounts>
+                <idleTime>true</idleTime>
+                <connectTime>true</connectTime>
+              </value>
+            </objProp>
+            <stringProp name="filename">${__P(error_csv_path)}</stringProp>
+          </ResultCollector>
+          <hashTree/>
+        </hashTree>
+      </hashTree>
+      <ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Complex Thread Group - JDBC" enabled="true">
+        <stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
+        <elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
+          <boolProp name="LoopController.continue_forever">false</boolProp>
+          <stringProp name="LoopController.loops">${__P(num_loops,0)}</stringProp>
+        </elementProp>
+        <stringProp name="ThreadGroup.num_threads">${__P(complex_num_users,1)}</stringProp>
+        <stringProp name="ThreadGroup.ramp_time">${__P(ramp_time)}</stringProp>
+        <boolProp name="ThreadGroup.scheduler">true</boolProp>
+        <stringProp name="ThreadGroup.duration">${__P(thread_duration,3600)}</stringProp>
+        <stringProp name="ThreadGroup.delay"></stringProp>
+        <boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
+      </ThreadGroup>
+      <hashTree>
+        <JDBCSampler guiclass="TestBeanGUI" testclass="JDBCSampler" testname="JDBC Request-complex Query Test" enabled="true">
+          <stringProp name="dataSource">bq_pool</stringProp>
+          <stringProp name="query">/*${__P(run_id)},${complex_id}*/ ${complex_query}</stringProp>
+          <stringProp name="queryArguments"></stringProp>
+          <stringProp name="queryArgumentsTypes"></stringProp>
+          <stringProp name="queryTimeout">-1</stringProp>
+          <stringProp name="queryType">Select Statement</stringProp>
+          <stringProp name="resultSetHandler">Store as String</stringProp>
+          <stringProp name="resultSetMaxRows">0</stringProp>
+          <stringProp name="resultVariable"></stringProp>
+          <stringProp name="variableNames"></stringProp>
+        </JDBCSampler>
+        <hashTree>
+          <CSVDataSet guiclass="TestBeanGUI" testclass="CSVDataSet" testname="CSV Data Set Config" enabled="true">
+            <stringProp name="filename">${__P(complex_csv_path)}</stringProp>
+            <stringProp name="fileEncoding"></stringProp>
+            <stringProp name="variableNames"></stringProp>
+            <boolProp name="ignoreFirstLine">false</boolProp>
+            <stringProp name="delimiter">\t</stringProp>
+            <boolProp name="quotedData">false</boolProp>
+            <boolProp name="recycle">true</boolProp>
+            <boolProp name="stopThread">false</boolProp>
+            <stringProp name="shareMode">shareMode.group</stringProp>
+          </CSVDataSet>
+          <hashTree/>
+          <ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report" enabled="true">
+            <boolProp name="ResultCollector.error_logging">true</boolProp>
+            <objProp>
+              <name>saveConfig</name>
+              <value class="SampleSaveConfiguration">
+                <time>true</time>
+                <latency>true</latency>
+                <timestamp>true</timestamp>
+                <success>true</success>
+                <label>true</label>
+                <code>true</code>
+                <message>true</message>
+                <threadName>true</threadName>
+                <dataType>true</dataType>
+                <encoding>false</encoding>
+                <assertions>true</assertions>
+                <subresults>true</subresults>
+                <responseData>false</responseData>
+                <samplerData>false</samplerData>
+                <xml>false</xml>
+                <fieldNames>true</fieldNames>
+                <responseHeaders>false</responseHeaders>
+                <requestHeaders>false</requestHeaders>
+                <responseDataOnError>false</responseDataOnError>
+                <saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
+                <assertionsResultsToSave>0</assertionsResultsToSave>
+                <bytes>true</bytes>
+                <sentBytes>true</sentBytes>
+                <url>true</url>
+                <threadCounts>true</threadCounts>
+                <idleTime>true</idleTime>
+                <connectTime>true</connectTime>
+              </value>
+            </objProp>
+            <stringProp name="filename">${__P(error_csv_path)}</stringProp>
+          </ResultCollector>
+          <hashTree/>
+        </hashTree>
+      </hashTree>
+    </hashTree>
+  </hashTree>
+</jmeterTestPlan>
diff --git a/performance_testing/jmeter/cancel_running_jobs.py b/performance_testing/jmeter/cancel_running_jobs.py
new file mode 100644
index 000000000..e645fd642
--- /dev/null
+++ b/performance_testing/jmeter/cancel_running_jobs.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from argparse import ArgumentParser
+from google.cloud import bigquery
+
+
+def cancel_jobs(client):
+    for job in client.list_jobs(all_users=True, state_filter="RUNNING"):
+        client.cancel_job(job.job_id, location='us')
+
+
+def get_cmd_line_args():
+    parser = ArgumentParser()
+    parser.add_argument(
+        '--project_id',
+        help='Project in which all running BigQuery jobs will be cancelled.')
+    return parser.parse_args()
+
+
+def main():
+    args = get_cmd_line_args()
+    cancel_jobs(bigquery.Client(project=args.project_id))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/performance_testing/jmeter/http_sampler_results.sql b/performance_testing/jmeter/http_sampler_results.sql
new file mode 100644
index 000000000..fd228272f
--- /dev/null
+++ b/performance_testing/jmeter/http_sampler_results.sql
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+SELECT
+  SPLIT(labels[OFFSET(1)].value, '_')[OFFSET(0)] AS complexity,
+  COUNT(1)
+FROM `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
+WHERE
+  DATE(creation_time) = CURRENT_DATE()  -- Partitioning column
+  AND project_id = 'YOUR_PROJECT'       -- Clustering column
+  AND ARRAY_LENGTH(labels) > 0
+  AND EXISTS (
+    SELECT *
+    FROM UNNEST(labels) AS labels
+    WHERE
+      labels.key = 'run_id'
+      AND labels.value = 'jmeter_http_test'
+  )
+GROUP BY 1
diff --git a/performance_testing/jmeter/jdbc_sampler_results.sql b/performance_testing/jmeter/jdbc_sampler_results.sql
new file mode 100644
index 000000000..5889f7b66
--- /dev/null
+++ b/performance_testing/jmeter/jdbc_sampler_results.sql
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+SELECT
+--  SPLIT(TRIM(SPLIT(query, '*/')[OFFSET(0)],'/*'))[OFFSET(1)] AS query_id,
+ SPLIT(SPLIT(TRIM(SPLIT(query, '*/')[OFFSET(0)],'/*'))[OFFSET(1)], '_')[OFFSET(0)] AS complexity,
+ COUNT(1)
+FROM
+  `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
+WHERE
+  DATE(creation_time) = CURRENT_DATE()  -- Partitioning column
+  AND project_id = 'YOUR_PROJECT'       -- Clustering column
+  AND SPLIT(TRIM(SPLIT(query, '*/')[OFFSET(0)],'/*'))[OFFSET(0)] = 'jmeter_jdbc_test'
+GROUP BY 1
diff --git a/performance_testing/jmeter/run_jmeter_http_sampler.sh b/performance_testing/jmeter/run_jmeter_http_sampler.sh
new file mode 100755
index 000000000..30368255c
--- /dev/null
+++ b/performance_testing/jmeter/run_jmeter_http_sampler.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+#########################################################################
+# Make sure you run the following gcloud auth command
+# if you're not using a service account to authenticate:
+#
+# gcloud auth login
+#
+# If you are using a service account, run the following gcloud auth command
+# after specifying the path to your service account private key.
+#
+# gcloud auth activate-service-account --key-file=/path/to/your/private_key.json
+#
+#########################################################################
+
+apache-jmeter-5.3/bin/jmeter -n \
+-t bigquery_http_sampler.jmx \
+-Jproject_id=YOUR_PROJECT \
+-Jtoken=$(gcloud auth print-access-token) \
+-Jsimple_csv_path=test_queries/simple_selects.csv \
+-Jmedium_csv_path=test_queries/medium_selects.csv \
+-Jcomplex_csv_path=test_queries/complex_selects.csv \
+-Jerror_csv_path=errors.csv \
+-Jsimple_num_users=6 \
+-Jmedium_num_users=3 \
+-Jcomplex_num_users=1 \
+-Jnum_loops=-1 \
+-Jrun_id=jmeter_http_test \
+-Jthread_duration=10 \
+-Jramp_time=0;
diff --git a/performance_testing/jmeter/run_jmeter_jdbc_sampler.sh b/performance_testing/jmeter/run_jmeter_jdbc_sampler.sh
new file mode 100755
index 000000000..95763442c
--- /dev/null
+++ b/performance_testing/jmeter/run_jmeter_jdbc_sampler.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+#########################################################################
+# Make sure you run the following gcloud auth command
+# if you're not using a service account to authenticate:
+#
+# gcloud auth application-default login
+#
+# If you are using a service account, uncomment the export command below
+# and specify the path to your service account private key.
+#
+# export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/private_key.json
+#
+#########################################################################
+
+apache-jmeter-5.3/bin/jmeter -n \
+-t bigquery_jdbc_sampler.jmx \
+-Jproject_id=YOUR_PROJECT \
+-Juser.classpath=/path/to/your/SimbaJDBCDriverforGoogleBigQuery \
+-Jsimple_csv_path=test_queries/simple_selects.csv \
+-Jmedium_csv_path=test_queries/medium_selects.csv \
+-Jcomplex_csv_path=test_queries/complex_selects.csv \
+-Jerror_csv_path=errors.csv \
+-Jsimple_num_users=6 \
+-Jmedium_num_users=3 \
+-Jcomplex_num_users=1 \
+-Jnum_loops=-1 \
+-Jrun_id=jmeter_jdbc_test \
+-Jthread_duration=10 \
+-Jramp_time=0;
diff --git a/performance_testing/jmeter/test_queries/complex_selects.csv b/performance_testing/jmeter/test_queries/complex_selects.csv
new file mode 100644
index 000000000..0f5b3f042
--- /dev/null
+++ b/performance_testing/jmeter/test_queries/complex_selects.csv
@@ -0,0 +1,101 @@
+complex_id	complex_query
+complex_00	SELECT 'some complex query 00';
+complex_01	SELECT 'some complex query 01';
+complex_02	SELECT 'some complex query 02';
+complex_03	SELECT 'some complex query 03';
+complex_04	SELECT 'some complex query 04';
+complex_05	SELECT 'some complex query 05';
+complex_06	SELECT 'some complex query 06';
+complex_07	SELECT 'some complex query 07';
+complex_08	SELECT 'some complex query 08';
+complex_09	SELECT 'some complex query 09';
+complex_10	SELECT 'some complex query 10';
+complex_11	SELECT 'some complex query 11';
+complex_12	SELECT 'some complex query 12';
+complex_13	SELECT 'some complex query 13';
+complex_14	SELECT 'some complex query 14';
+complex_15	SELECT 'some complex query 15';
+complex_16	SELECT 'some complex query 16';
+complex_17	SELECT 'some complex query 17';
+complex_18	SELECT 'some complex query 18';
+complex_19	SELECT 'some complex query 19';
+complex_20	SELECT 'some complex query 20';
+complex_21	SELECT 'some complex query 21';
+complex_22	SELECT 'some complex query 22';
+complex_23	SELECT 'some complex query 23';
+complex_24	SELECT 'some complex query 24';
+complex_25	SELECT 'some complex query 25';
+complex_26	SELECT 'some complex query 26';
+complex_27	SELECT 'some complex query 27';
+complex_28	SELECT 'some complex query 28';
+complex_29	SELECT 'some complex query 29';
+complex_30	SELECT 'some complex query 30';
+complex_31	SELECT 'some complex query 31';
+complex_32	SELECT 'some complex query 32';
+complex_33	SELECT 'some complex query 33';
+complex_34	SELECT 'some complex query 34';
+complex_35	SELECT 'some complex query 35';
+complex_36	SELECT 'some complex query 36';
+complex_37	SELECT 'some complex query 37';
+complex_38	SELECT 'some complex query 38';
+complex_39	SELECT 'some complex query 39';
+complex_40	SELECT 'some complex query 40';
+complex_41	SELECT 'some complex query 41';
+complex_42	SELECT 'some complex query 42';
+complex_43	SELECT 'some complex query 43';
+complex_44	SELECT 'some complex query 44';
+complex_45	SELECT 'some complex query 45';
+complex_46	SELECT 'some complex query 46';
+complex_47	SELECT 'some complex query 47';
+complex_48	SELECT 'some complex query 48';
+complex_49	SELECT 'some complex query 49';
+complex_50	SELECT 'some complex query 50';
+complex_51	SELECT 'some complex query 51';
+complex_52	SELECT 'some complex query 52';
+complex_53	SELECT 'some complex query 53';
+complex_54	SELECT 'some complex query 54';
+complex_55	SELECT 'some complex query 55';
+complex_56	SELECT 'some complex query 56';
+complex_57	SELECT 'some complex query 57';
+complex_58	SELECT 'some complex query 58';
+complex_59	SELECT 'some complex query 59';
+complex_60	SELECT 'some complex query 60';
+complex_61	SELECT 'some complex query 61';
+complex_62	SELECT 'some complex query 62';
+complex_63	SELECT 'some complex query 63';
+complex_64	SELECT 'some complex query 64';
+complex_65	SELECT 'some complex query 65';
+complex_66	SELECT 'some complex query 66';
+complex_67	SELECT 'some complex query 67';
+complex_68	SELECT 'some complex query 68';
+complex_69	SELECT 'some complex query 69';
+complex_70	SELECT 'some complex query 70';
+complex_71	SELECT 'some complex query 71';
+complex_72	SELECT 'some complex query 72';
+complex_73	SELECT 'some complex query 73';
+complex_74	SELECT 'some complex query 74';
+complex_75	SELECT 'some complex query 75';
+complex_76	SELECT 'some complex query 76';
+complex_77	SELECT 'some complex query 77';
+complex_78	SELECT 'some complex query 78';
+complex_79	SELECT 'some complex query 79';
+complex_80	SELECT 'some complex query 80';
+complex_81	SELECT 'some complex query 81';
+complex_82	SELECT 'some complex query 82';
+complex_83	SELECT 'some complex query 83';
+complex_84	SELECT 'some complex query 84';
+complex_85	SELECT 'some complex query 85';
+complex_86	SELECT 'some complex query 86';
+complex_87	SELECT 'some complex query 87';
+complex_88	SELECT 'some complex query 88';
+complex_89	SELECT 'some complex query 89';
+complex_90	SELECT 'some complex query 90';
+complex_91	SELECT 'some complex query 91';
+complex_92	SELECT 'some complex query 92';
+complex_93	SELECT 'some complex query 93';
+complex_94	SELECT 'some complex query 94';
+complex_95	SELECT 'some complex query 95';
+complex_96	SELECT 'some complex query 96';
+complex_97	SELECT 'some complex query 97';
+complex_98	SELECT 'some complex query 98';
+complex_99	SELECT 'some complex query 99';
diff --git a/performance_testing/jmeter/test_queries/medium_selects.csv b/performance_testing/jmeter/test_queries/medium_selects.csv
new file mode 100644
index 000000000..59cdabe62
--- /dev/null
+++ b/performance_testing/jmeter/test_queries/medium_selects.csv
@@ -0,0 +1,101 @@
+medium_id	medium_query
+medium_00	SELECT 'some medium complexity query 00';
+medium_01	SELECT 'some medium complexity query 01';
+medium_02	SELECT 'some medium complexity query 02';
+medium_03	SELECT 'some medium complexity query 03';
+medium_04	SELECT 'some medium complexity query 04';
+medium_05	SELECT 'some medium complexity query 05';
+medium_06	SELECT 'some medium complexity query 06';
+medium_07	SELECT 'some medium complexity query 07';
+medium_08	SELECT 'some medium complexity query 08';
+medium_09	SELECT 'some medium complexity query 09';
+medium_10	SELECT 'some medium complexity query 10';
+medium_11	SELECT 'some medium complexity query 11';
+medium_12	SELECT 'some medium complexity query 12';
+medium_13	SELECT 'some medium complexity query 13';
+medium_14	SELECT 'some medium complexity query 14';
+medium_15	SELECT 'some medium complexity query 15';
+medium_16	SELECT 'some medium complexity query 16';
+medium_17	SELECT 'some medium complexity query 17';
+medium_18	SELECT 'some medium complexity query 18';
+medium_19	SELECT 'some medium complexity query 19';
+medium_20	SELECT 'some medium complexity query 20';
+medium_21	SELECT 'some medium complexity query 21';
+medium_22	SELECT 'some medium complexity query 22';
+medium_23	SELECT 'some medium complexity query 23';
+medium_24	SELECT 'some medium complexity query 24';
+medium_25	SELECT 'some medium complexity query 25';
+medium_26	SELECT 'some medium complexity query 26';
+medium_27	SELECT 'some medium complexity query 27';
+medium_28	SELECT 'some medium complexity query 28';
+medium_29	SELECT 'some medium complexity query 29';
+medium_30	SELECT 'some medium complexity query 30';
+medium_31	SELECT 'some medium complexity query 31';
+medium_32	SELECT 'some medium complexity query 32';
+medium_33	SELECT 'some medium complexity query 33';
+medium_34	SELECT 'some medium complexity query 34';
+medium_35	SELECT 'some medium complexity query 35';
+medium_36	SELECT 'some medium complexity query 36';
+medium_37	SELECT 'some medium complexity query 37';
+medium_38	SELECT 'some medium complexity query 38';
+medium_39	SELECT 'some medium complexity query 39';
+medium_40	SELECT 'some medium complexity query 40';
+medium_41	SELECT 'some medium complexity query 41';
+medium_42	SELECT 'some medium complexity query 42';
+medium_43	SELECT 'some medium complexity query 43';
+medium_44	SELECT 'some medium complexity query 44';
+medium_45	SELECT 'some medium complexity query 45';
+medium_46	SELECT 'some medium complexity query 46';
+medium_47	SELECT 'some medium complexity query 47';
+medium_48	SELECT 'some medium complexity query 48';
+medium_49	SELECT 'some medium complexity query 49';
+medium_50	SELECT 'some medium complexity query 50';
+medium_51	SELECT 'some medium complexity query 51';
+medium_52	SELECT 'some medium complexity query 52';
+medium_53	SELECT 'some medium complexity query 53';
+medium_54	SELECT 'some medium complexity query 54';
+medium_55	SELECT 'some medium complexity query 55';
+medium_56	SELECT 'some medium complexity query 56';
+medium_57	SELECT 'some medium complexity query 57';
+medium_58	SELECT 'some medium complexity query 58';
+medium_59	SELECT 'some medium complexity query 59';
+medium_60	SELECT 'some medium complexity query 60';
+medium_61	SELECT 'some medium complexity query 61';
+medium_62	SELECT 'some medium complexity query 62';
+medium_63	SELECT 'some medium complexity query 63';
+medium_64	SELECT 'some medium complexity query 64';
+medium_65	SELECT 'some medium complexity query 65';
+medium_66	SELECT 'some medium complexity query 66';
+medium_67	SELECT 'some medium complexity query 67';
+medium_68	SELECT 'some medium complexity query 68';
+medium_69	SELECT 'some medium complexity query 69';
+medium_70	SELECT 'some medium complexity query 70';
+medium_71	SELECT 'some medium complexity query 71';
+medium_72	SELECT 'some medium complexity query 72';
+medium_73	SELECT 'some medium complexity query 73';
+medium_74	SELECT 'some medium complexity query 74';
+medium_75	SELECT 'some medium complexity query 75';
+medium_76	SELECT 'some medium complexity query 76';
+medium_77	SELECT 'some medium complexity query 77';
+medium_78	SELECT 'some medium complexity query 78';
+medium_79	SELECT 'some medium complexity query 79';
+medium_80	SELECT 'some medium complexity query 80';
+medium_81	SELECT 'some medium complexity query 81';
+medium_82	SELECT 'some medium complexity query 82';
+medium_83	SELECT 'some medium complexity query 83';
+medium_84	SELECT 'some medium complexity query 84';
+medium_85	SELECT 'some medium complexity query 85';
+medium_86	SELECT 'some medium complexity query 86';
+medium_87	SELECT 'some medium complexity query 87';
+medium_88	SELECT 'some medium complexity query 88';
+medium_89	SELECT 'some medium complexity query 89';
+medium_90	SELECT 'some medium complexity query 90';
+medium_91	SELECT 'some medium complexity query 91';
+medium_92	SELECT 'some medium complexity query 92';
+medium_93	SELECT 'some medium complexity query 93';
+medium_94	SELECT 'some medium complexity query 94';
+medium_95	SELECT 'some medium complexity query 95';
+medium_96	SELECT 'some medium complexity query 96';
+medium_97	SELECT 'some medium complexity query 97';
+medium_98	SELECT 'some medium complexity query 98';
+medium_99	SELECT 'some medium complexity query 99';
diff --git a/performance_testing/jmeter/test_queries/simple_selects.csv b/performance_testing/jmeter/test_queries/simple_selects.csv
new file mode 100644
index 000000000..11eabe283
--- /dev/null
+++ b/performance_testing/jmeter/test_queries/simple_selects.csv
@@ -0,0 +1,101 @@
+simple_id	simple_query
+simple_00	SELECT 'some simple query 00';
+simple_01	SELECT 'some simple query 01';
+simple_02	SELECT 'some simple query 02';
+simple_03	SELECT 'some simple query 03';
+simple_04	SELECT 'some simple query 04';
+simple_05	SELECT 'some simple query 05';
+simple_06	SELECT 'some simple query 06';
+simple_07	SELECT 'some simple query 07';
+simple_08	SELECT 'some simple query 08';
+simple_09	SELECT 'some simple query 09';
+simple_10	SELECT 'some simple query 10';
+simple_11	SELECT 'some simple query 11';
+simple_12	SELECT 'some simple query 12';
+simple_13	SELECT 'some simple query 13';
+simple_14	SELECT 'some simple query 14';
+simple_15	SELECT 'some simple query 15';
+simple_16	SELECT 'some simple query 16';
+simple_17	SELECT 'some simple query 17';
+simple_18	SELECT 'some simple query 18';
+simple_19	SELECT 'some simple query 19';
+simple_20	SELECT 'some simple query 20';
+simple_21	SELECT 'some simple query 21';
+simple_22	SELECT 'some simple query 22';
+simple_23	SELECT 'some simple query 23';
+simple_24	SELECT 'some simple query 24';
+simple_25	SELECT 'some simple query 25';
+simple_26	SELECT 'some simple query 26';
+simple_27	SELECT 'some simple query 27';
+simple_28	SELECT 'some simple query 28';
+simple_29	SELECT 'some simple query 29';
+simple_30	SELECT 'some simple query 30';
+simple_31	SELECT 'some simple query 31';
+simple_32	SELECT 'some simple query 32';
+simple_33	SELECT 'some simple query 33';
+simple_34	SELECT 'some simple query 34';
+simple_35	SELECT 'some simple query 35';
+simple_36	SELECT 'some simple query 36';
+simple_37	SELECT 'some simple query 37';
+simple_38	SELECT 'some simple query 38';
+simple_39	SELECT 'some simple query 39';
+simple_40	SELECT 'some simple query 40';
+simple_41	SELECT 'some simple query 41';
+simple_42	SELECT 'some simple query 42';
+simple_43	SELECT 'some simple query 43';
+simple_44	SELECT 'some simple query 44';
+simple_45	SELECT 'some simple query 45';
+simple_46	SELECT 'some simple query 46';
+simple_47	SELECT 'some simple query 47';
+simple_48	SELECT 'some simple query 48';
+simple_49	SELECT 'some simple query 49';
+simple_50	SELECT 'some simple query 50';
+simple_51	SELECT 'some simple query 51';
+simple_52	SELECT 'some simple query 52';
+simple_53	SELECT 'some simple query 53';
+simple_54	SELECT 'some simple query 54';
+simple_55	SELECT 'some simple query 55';
+simple_56	SELECT 'some simple query 56';
+simple_57	SELECT 'some simple query 57';
+simple_58	SELECT 'some simple query 58';
+simple_59	SELECT 'some simple query 59';
+simple_60	SELECT 'some simple query 60';
+simple_61	SELECT 'some simple query 61';
+simple_62	SELECT 'some simple query 62';
+simple_63	SELECT 'some simple query 63';
+simple_64	SELECT 'some simple query 64';
+simple_65	SELECT 'some simple query 65';
+simple_66	SELECT 'some simple query 66';
+simple_67	SELECT 'some simple query 67';
+simple_68	SELECT 'some simple query 68';
+simple_69	SELECT 'some simple query 69';
+simple_70	SELECT 'some simple query 70';
+simple_71	SELECT 'some simple query 71';
+simple_72	SELECT 'some simple query 72';
+simple_73	SELECT 'some simple query 73';
+simple_74	SELECT 'some simple query 74';
+simple_75	SELECT 'some simple query 75';
+simple_76	SELECT 'some simple query 76';
+simple_77	SELECT 'some simple query 77';
+simple_78	SELECT 'some simple query 78';
+simple_79	SELECT 'some simple query 79';
+simple_80	SELECT 'some simple query 80';
+simple_81	SELECT 'some simple query 81';
+simple_82	SELECT 'some simple query 82';
+simple_83	SELECT 'some simple query 83';
+simple_84	SELECT 'some simple query 84';
+simple_85	SELECT 'some simple query 85';
+simple_86	SELECT 'some simple query 86';
+simple_87	SELECT 'some simple query 87';
+simple_88	SELECT 'some simple query 88';
+simple_89	SELECT 'some simple query 89';
+simple_90	SELECT 'some simple query 90';
+simple_91	SELECT 'some simple query 91';
+simple_92	SELECT 'some simple query 92';
+simple_93	SELECT 'some simple query 93';
+simple_94	SELECT 'some simple query 94';
+simple_95	SELECT 'some simple query 95';
+simple_96	SELECT 'some simple query 96';
+simple_97	SELECT 'some simple query 97';
+simple_98	SELECT 'some simple query 98';
+simple_99	SELECT 'some simple query 99';

From d951ebfbc5a0336c861038f5247590f3936186a6 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 2 Dec 2020 14:14:33 -0800
Subject: [PATCH 10/90] feat: bq project env-var

Add an environment variable to support overriding the default project
for the BigQuery Client. By default this will be the project in which
the cloud function is deployed.
---
 tools/cloud_functions/gcs_event_based_ingest/README.md      | 6 ++++--
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md      | 1 +
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py        | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 9fda82d39..c976c21c7 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -37,8 +37,10 @@ better fit your naming convention on GCS. Your regex must include
 [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups)
 for destination `dataset`, and `table`.
 Note, that `dataset` can optionally, explicitly specify destination project
-(i.e. `gs://${BUCKET}/project_id.dataset_id/table/....`) otherwise the default
-project will be inferred from Application Default Credential (the project in
+(i.e. `gs://${BUCKET}/project_id.dataset_id/table/....`) alternatively,
+one can set the `BQ_PROJECT` environment variable to set to override the
+default target project for datasets at the function level. The default behavior is to 
+infer the project from Application Default Credential (the project in
 which the Cloud Function is running, or the ADC configured in Google Cloud SDK
 if invoked locally). This is useful in scenarios where a single deployment of
 the Cloud Function is responsible for ingesting data into BigQuery tables in
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index c86dceea4..5e30a1c4b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -31,6 +31,7 @@ following default behavior.
 | `DESTINATION_REGEX`   | A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for `dataset`, `table`, (optional: `partition` or `yyyy`, `mm`, `dd`, `hh`, `batch`)
 | `MAX_BATCH_BYTES`     | Max bytes for BigQuery Load job      | `15000000000000` ([15 TB](https://cloud.google.com/bigquery/quotas#load_jobs)|
 | `JOB_PREFIX`          | Prefix for BigQuery Job IDs          | `gcf-ingest-` |
+| `BQ_PROJECT`          | Default BQ project to use if not specified in dataset capturing group | Project where Cloud Function is deployed |
 
 
 ## Implementation notes
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index d05b771db..aa27422f9 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -112,7 +112,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     prefix_to_load = removesuffix(object_id, SUCCESS_FILENAME)
     gsurl = f"gs://{bucket_id}/{prefix_to_load}"
     gcs_client = storage.Client(client_info=CLIENT_INFO)
-    project = gcs_client.project
+    project = os.getenv("BQ_PROJECT", gcs_client.project)
     bkt = cached_get_bucket(gcs_client, bucket_id)
     success_blob: storage.Blob = bkt.blob(object_id)
     handle_duplicate_notification(bkt, success_blob, gsurl)

From cf23f1ba4e152c6a410a05153fc28002426157c7 Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan.denotter@kingsmenservices.org>
Date: Fri, 4 Dec 2020 14:09:59 -0700
Subject: [PATCH 11/90] Move utility methods into a utils module Change the
 tests to use new utils module.

---
 .../gcs_ocn_bq_ingest/main.py                 | 462 +----------------
 .../gcs_ocn_bq_ingest/utils.py                | 477 ++++++++++++++++++
 .../gcs_event_based_ingest/tests/conftest.py  |   4 +-
 .../test_gcs_ocn_bq_ingest.py                 |   5 +-
 4 files changed, 488 insertions(+), 460 deletions(-)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 6025122d1..8f4ff3d64 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -16,46 +16,19 @@
 # limitations under the License.
 """Background Cloud Function for loading data from GCS to BigQuery.
 """
-import collections
-import collections.abc
-import copy
-import json
 import os
-import pathlib
 import re
-import time
-from typing import Any, Deque, Dict, List, Optional, Tuple
+from typing import Dict
 
-import cachetools
 import google.api_core.client_info
-import google.api_core.exceptions
 import google.cloud.exceptions
 from google.cloud import bigquery, storage
 
-# https://cloud.google.com/bigquery/quotas#load_jobs
-# 15TB per BQ load job (soft limit).
-DEFAULT_MAX_BATCH_BYTES = str(15 * 10**12)
-# 10,000 GCS URIs per BQ load job.
-MAX_SOURCE_URIS_PER_LOAD = 10**4
-
-DEFAULT_EXTERNAL_TABLE_DEFINITION = {
-    # The default must be a self describing data format
-    # because autodetecting CSV /JSON schemas is likely to not match
-    # expectations / assumptions of the transformation query.
-    "sourceFormat": "PARQUET",
-}
-
-DEFAULT_JOB_LABELS = {
-    "component": "event-based-gcs-ingest",
-    "cloud-function-name": os.getenv("FUNCTION_NAME"),
-}
-
-BASE_LOAD_JOB_CONFIG = {
-    "sourceFormat": "CSV",
-    "fieldDelimiter": ",",
-    "writeDisposition": "WRITE_APPEND",
-    "labels": DEFAULT_JOB_LABELS,
-}
+from .utils import (parse_notification, SUCCESS_FILENAME, removesuffix,
+                    cached_get_bucket, handle_duplicate_notification,
+                    DEFAULT_JOB_LABELS, read_gcs_file_if_exists,
+                    look_for_config_in_parents, external_query,
+                    create_job_id_prefix, load_batches)
 
 # yapf: disable
 DEFAULT_DESTINATION_REGEX = (
@@ -70,28 +43,9 @@
 )
 # yapf: enable
 
-# Will wait up to this polling for errors before exiting
-# This is to check if job fail quickly, not to assert it succeed.
-# This may not be honored if longer than cloud function timeout.
-# https://cloud.google.com/functions/docs/concepts/exec#timeout
-# One might consider lowering this to 1-2 seconds to lower the
-# upper bound of expected execution time to stay within the free tier.
-# https://cloud.google.com/functions/pricing#free_tier
-WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "5"))
-
-# Use caution when lowering the job polling rate.
-# Keep in mind that many concurrent executions of this cloud function should not
-# violate the 300 concurrent requests or 100 request per second.
-# https://cloud.google.com/bigquery/quotas#all_api_requests
-JOB_POLL_INTERVAL_SECONDS = 1
-
-SUCCESS_FILENAME = os.getenv("SUCCESS_FILENAME", "_SUCCESS")
-
 CLIENT_INFO = google.api_core.client_info.ClientInfo(
     user_agent="google-pso-tool/bq-severless-loader")
 
-DEFAULT_JOB_PREFIX = "gcf-ingest-"
-
 
 def main(event: Dict, context):  # pylint: disable=unused-argument
     """entry point for background cloud function for event driven GCS to
@@ -177,407 +131,3 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     print("LOAD_JOB")
     load_batches(gcs_client, bq_client, gsurl, dest_table_ref,
                  create_job_id_prefix(dest_table_ref, batch_id))
-
-
-def create_job_id_prefix(dest_table_ref: bigquery.TableReference,
-                         batch_id: Optional[str]):
-    """Create job id prefix with a consistent naming convention.
-    The naming conventions is as follows:
-    gcf-ingest-<dataset_id>-<table_id>-<partition_num>-<batch_id>-
-    Parts that are not inferrable from the GCS path with have a 'None'
-    placeholder. This naming convention is crucial for monitoring the system.
-    Note, gcf-ingest- can be overridden with environment variable JOB_PREFIX
-
-    Examples:
-
-    Non-partitioned Non batched tables:
-      - gs://${BUCKET}/tpch/lineitem/_SUCCESS
-      - gcf-ingest-tpch-lineitem-None-None-
-    Non-partitioned batched tables:
-      - gs://${BUCKET}/tpch/lineitem/batch000/_SUCCESS
-      - gcf-ingest-tpch-lineitem-None-batch000-
-    Partitioned Batched tables:
-      - gs://${BUCKET}/tpch/lineitem/$20201031/batch000/_SUCCESS
-      - gcf-ingest-tpch-lineitem-20201031-batch000-
-    """
-    table_partition = dest_table_ref.table_id.split("$")
-    if len(table_partition) < 2:
-        # If there is no partition put a None placeholder
-        table_partition.append("None")
-    return f"{os.getenv('JOB_PREFIX', DEFAULT_JOB_PREFIX)}" \
-        f"{dest_table_ref.dataset_id}-" \
-        f"{'-'.join(table_partition)}-" \
-        f"{batch_id}-"
-
-
-def external_query(  # pylint: disable=too-many-arguments
-        gcs_client: storage.Client, bq_client: bigquery.Client, gsurl: str,
-        query: str, dest_table_ref: bigquery.TableReference,
-        job_id_prefix: str):
-    """Load from query over external table from GCS.
-
-    This hinges on a SQL query defined in GCS at _config/bq_transform.sql and
-    an external table definition _config/external.json (otherwise will assume
-    CSV external table)
-    """
-    external_table_config = read_gcs_file_if_exists(
-        gcs_client, f"{gsurl}_config/external.json")
-    if not external_table_config:
-        external_table_config = look_for_config_in_parents(
-            gcs_client, gsurl, "external.json")
-    if external_table_config:
-        external_table_def = json.loads(external_table_config)
-    else:
-        print(f"Falling back to default CSV external table."
-              f" {gsurl}_config/external.json not found.")
-        external_table_def = DEFAULT_EXTERNAL_TABLE_DEFINITION
-
-    external_table_def["sourceUris"] = flatten2dlist(
-        get_batches_for_prefix(gcs_client, gsurl))
-    print(f"external table def = {json.dumps(external_table_config, indent=2)}")
-    external_config = bigquery.ExternalConfig.from_api_repr(external_table_def)
-    job_config = bigquery.QueryJobConfig(
-        table_definitions={"temp_ext": external_config}, use_legacy_sql=False)
-
-    # Note, dest_table might include a partition decorator.
-    rendered_query = query.format(
-        dest_dataset=dest_table_ref.dataset_id,
-        dest_table=dest_table_ref.table_id,
-    )
-
-    job: bigquery.QueryJob = bq_client.query(
-        rendered_query,
-        job_config=job_config,
-        job_id_prefix=job_id_prefix,
-    )
-
-    print(f"started asynchronous query job: {job.job_id}")
-
-    start_poll_for_errors = time.monotonic()
-    # Check if job failed quickly
-    while time.monotonic() - start_poll_for_errors < WAIT_FOR_JOB_SECONDS:
-        job.reload()
-        if job.errors:
-            raise RuntimeError(
-                f"query job {job.job_id} failed quickly: {job.errors}")
-        time.sleep(JOB_POLL_INTERVAL_SECONDS)
-
-
-def flatten2dlist(arr: List[List[Any]]) -> List[Any]:
-    """Flatten list of lists to flat list of elements"""
-    return [j for i in arr for j in i]
-
-
-def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id_prefix):
-    """orchestrate 1 or more load jobs based on number of URIs and total byte
-    size of objects at gsurl"""
-    batches = get_batches_for_prefix(gcs_client, gsurl)
-    load_config = construct_load_job_config(gcs_client, gsurl)
-    load_config.labels = DEFAULT_JOB_LABELS
-    batch_count = len(batches)
-
-    jobs: List[bigquery.LoadJob] = []
-    for batch_num, batch in enumerate(batches):
-        print(load_config.to_api_repr())
-        job: bigquery.LoadJob = bq_client.load_table_from_uri(
-            batch,
-            dest_table_ref,
-            job_config=load_config,
-            job_id_prefix=f"{job_id_prefix}{batch_num}-of-{batch_count}-",
-        )
-
-        print(f"started asyncronous bigquery load job with id: {job.job_id} for"
-              f" {gsurl}")
-        jobs.append(job)
-
-    start_poll_for_errors = time.monotonic()
-    # Check if job failed quickly
-    while time.monotonic() - start_poll_for_errors < WAIT_FOR_JOB_SECONDS:
-        # Check if job failed quickly
-        for job in jobs:
-            job.reload()
-            if job.errors:
-                raise RuntimeError(
-                    f"load job {job.job_id} failed quickly: {job.errors}")
-        time.sleep(JOB_POLL_INTERVAL_SECONDS)
-
-
-def handle_duplicate_notification(bkt: storage.Bucket,
-                                  success_blob: storage.Blob, gsurl: str):
-    """
-    Need to handle potential duplicate Pub/Sub notifications.
-    To achieve this we will drop an empty "claimed" file that indicates
-    an invocation of this cloud function has picked up the success file
-    with a certain creation timestamp. This will support republishing the
-    success file as a mechanism of re-running the ingestion while avoiding
-    duplicate ingestion due to multiple Pub/Sub messages for a success file
-    with the same creation time.
-    """
-    success_blob.reload()
-    success_created_unix_timestamp = success_blob.time_created.timestamp()
-
-    claim_blob: storage.Blob = bkt.blob(
-        success_blob.name.replace(SUCCESS_FILENAME,
-                                  f"_claimed_{success_created_unix_timestamp}"))
-    try:
-        claim_blob.upload_from_string("", if_generation_match=0)
-    except google.api_core.exceptions.PreconditionFailed as err:
-        raise RuntimeError(
-            f"The prefix {gsurl} appears to already have been claimed for "
-            f"{gsurl}{SUCCESS_FILENAME} with created timestamp"
-            f"{success_created_unix_timestamp}."
-            "This means that another invocation of this cloud function has"
-            "claimed the ingestion of this batch."
-            "This may be due to a rare duplicate delivery of the Pub/Sub "
-            "storage notification.") from err
-
-
-def _get_parent_config_file(storage_client, config_filename, bucket, path):
-    config_dir_name = "_config"
-    parent_path = pathlib.Path(path).parent
-    config_path = parent_path / config_dir_name / config_filename
-    return read_gcs_file_if_exists(storage_client,
-                                   f"gs://{bucket}/{config_path}")
-
-
-def look_for_config_in_parents(storage_client: storage.Client, gsurl: str,
-                               config_filename: str) -> Optional[str]:
-    """look in parent directories for _config/config_filename"""
-    blob: storage.Blob = storage.Blob.from_string(gsurl)
-    bucket_name = blob.bucket.name
-    obj_path = blob.name
-    parts = removesuffix(obj_path, "/").split("/")
-
-    def _get_parent_config(path):
-        return _get_parent_config_file(storage_client, config_filename,
-                                       bucket_name, path)
-
-    config = None
-    while parts:
-        if config:
-            return config
-        config = _get_parent_config("/".join(parts))
-        parts.pop()
-    return config
-
-
-def construct_load_job_config(storage_client: storage.Client,
-                              gsurl: str) -> bigquery.LoadJobConfig:
-    """
-    merge dictionaries for loadjob.json configs in parent directories.
-    The configs closest to gsurl should take precedence.
-    """
-    config_filename = "load.json"
-    blob: storage.Blob = storage.Blob.from_string(gsurl)
-    bucket_name = blob.bucket.name
-    obj_path = blob.name
-    parts = removesuffix(obj_path, "/").split("/")
-
-    def _get_parent_config(path):
-        return _get_parent_config_file(storage_client, config_filename,
-                                       bucket_name, path)
-
-    config_q: Deque[Dict[str, Any]] = collections.deque()
-    config_q.append(BASE_LOAD_JOB_CONFIG)
-    while parts:
-        config = _get_parent_config("/".join(parts))
-        if config:
-            config_q.append(json.loads(config))
-        parts.pop()
-
-    merged_config: Dict = {}
-    while config_q:
-        recursive_update(merged_config, config_q.popleft(), in_place=True)
-    print(f"merged_config: {merged_config}")
-    return bigquery.LoadJobConfig.from_api_repr({"load": merged_config})
-
-
-def get_batches_for_prefix(gcs_client: storage.Client,
-                           prefix_path: str,
-                           ignore_subprefix="_config/",
-                           ignore_file=SUCCESS_FILENAME) -> List[List[str]]:
-    """
-    This function creates batches of GCS uris for a given prefix.
-    This prefix could be a table prefix or a partition prefix inside a
-    table prefix.
-    returns an Array of their batches
-    (one batch has an array of multiple GCS uris)
-    """
-    batches = []
-    blob: storage.Blob = storage.Blob.from_string(prefix_path)
-    bucket_name = blob.bucket.name
-    prefix_name = blob.name
-
-    prefix_filter = f"{prefix_name}"
-    bucket = cached_get_bucket(gcs_client, bucket_name)
-    blobs = list(bucket.list_blobs(prefix=prefix_filter, delimiter="/"))
-
-    cumulative_bytes = 0
-    max_batch_size = int(os.getenv("MAX_BATCH_BYTES", DEFAULT_MAX_BATCH_BYTES))
-    batch: List[str] = []
-    for blob in blobs:
-        # API returns root prefix also. Which should be ignored.
-        # Similarly, the _SUCCESS file should be ignored.
-        # Finally, anything in the _config/ prefix should be ignored.
-        if (blob.name
-                not in {f"{prefix_name}/", f"{prefix_name}/{ignore_file}"}
-                or blob.name.startswith(f"{prefix_name}/{ignore_subprefix}")):
-            if blob.size == 0:  # ignore empty files
-                print(f"ignoring empty file: gs://{bucket}/{blob.name}")
-                continue
-            cumulative_bytes += blob.size
-
-            # keep adding until we reach threshold
-            if cumulative_bytes <= max_batch_size or len(
-                    batch) > MAX_SOURCE_URIS_PER_LOAD:
-                batch.append(f"gs://{bucket_name}/{blob.name}")
-            else:
-                batches.append(batch.copy())
-                batch.clear()
-                batch.append(f"gs://{bucket_name}/{blob.name}")
-                cumulative_bytes = blob.size
-
-    # pick up remaining files in the final batch
-    if len(batch) > 0:
-        batches.append(batch.copy())
-        batch.clear()
-
-    if len(batches) > 1:
-        print(f"split into {len(batches)} load jobs.")
-    elif len(batches) == 1:
-        print("using single load job.")
-    else:
-        raise RuntimeError("No files to load!")
-    return batches
-
-
-def parse_notification(notification: dict) -> Tuple[str, str]:
-    """valdiates notification payload
-    Args:
-        notification(dict): Pub/Sub Storage Notification
-        https://cloud.google.com/storage/docs/pubsub-notifications
-        Or Cloud Functions direct trigger
-        https://cloud.google.com/functions/docs/tutorials/storage
-        with notification schema
-        https://cloud.google.com/storage/docs/json_api/v1/objects#resource
-    Returns:
-        tuple of bucketId and objectId attributes
-    Raises:
-        KeyError if the input notification does not contain the expected
-        attributes.
-    """
-    if notification.get("kind") == "storage#object":
-        # notification is GCS Object reosource from Cloud Functions trigger
-        # https://cloud.google.com/storage/docs/json_api/v1/objects#resource
-        return notification["bucket"], notification["name"]
-    if notification.get("attributes"):
-        # notification is Pub/Sub message.
-        try:
-            attributes = notification["attributes"]
-            return attributes["bucketId"], attributes["objectId"]
-        except KeyError:
-            raise RuntimeError(
-                "Issue with Pub/Sub message, did not contain expected"
-                f"attributes: 'bucketId' and 'objectId': {notification}"
-            ) from KeyError
-    raise RuntimeError(
-        "Cloud Function recieved unexpected trigger:\n"
-        f"{notification}\n"
-        "This function only supports direct Cloud Functions"
-        "Background Triggers or Pub/Sub storage notificaitons"
-        "as described in the following links:\n"
-        "https://cloud.google.com/storage/docs/pubsub-notifications\n"
-        "https://cloud.google.com/functions/docs/tutorials/storage")
-
-
-# cache lookups against GCS API for 1 second as buckets / objects have update
-# limit of once per second and we might do several of the same lookup during
-# the functions lifetime. This should improve performance by eliminating
-# unnecessary API calls. The lookups on bucket and objects in this function
-# should not be changing during the function's lifetime as this would lead to
-# non-deterministic results with or without this cache.
-# https://cloud.google.com/storage/quotas
-@cachetools.cached(cachetools.TTLCache(maxsize=1024, ttl=1))
-def read_gcs_file(gcs_client: storage.Client, gsurl: str) -> str:
-    """
-    Read a GCS object as a string
-
-    Args:
-        gcs_client:  GCS client
-        gsurl: GCS URI for object to read in gs://bucket/path/to/object format
-    Returns:
-        str
-    """
-    blob = storage.Blob.from_string(gsurl)
-    return blob.download_as_bytes(client=gcs_client).decode('UTF-8')
-
-
-def read_gcs_file_if_exists(gcs_client: storage.Client,
-                            gsurl: str) -> Optional[str]:
-    """return string of gcs object contents or None if the object does not exist
-    """
-    try:
-        return read_gcs_file(gcs_client, gsurl)
-    except google.cloud.exceptions.NotFound:
-        return None
-
-
-# Cache bucket lookups (see reasoning in comment above)
-@cachetools.cached(cachetools.TTLCache(maxsize=1024, ttl=1))
-def cached_get_bucket(
-    gcs_client: storage.Client,
-    bucket_id: str,
-) -> storage.Bucket:
-    """get storage.Bucket object by bucket_id string if exists or raise
-    google.cloud.exceptions.NotFound."""
-    return gcs_client.get_bucket(bucket_id)
-
-
-def dict_to_bq_schema(schema: List[Dict]) -> List[bigquery.SchemaField]:
-    """Converts a list of dicts to list of bigquery.SchemaField for use with
-    bigquery client library. Dicts must contain name and type keys.
-    The dict may optionally contain a mode key."""
-    default_mode = "NULLABLE"
-    return [
-        bigquery.SchemaField(
-            x["name"],
-            x["type"],
-            mode=x.get("mode") if x.get("mode") else default_mode)
-        for x in schema
-    ]
-
-
-# To be added to built in str in python 3.9
-# https://www.python.org/dev/peps/pep-0616/
-def removesuffix(in_str: str, suffix: str) -> str:
-    """removes suffix from a string."""
-    # suffix='' should not call self[:-0].
-    if suffix and in_str.endswith(suffix):
-        return in_str[:-len(suffix)]
-    return in_str[:]
-
-
-def recursive_update(
-    original: Dict,
-    update: Dict,
-    in_place: bool = False
-):
-    """
-    return a recursively updated dictionary.
-
-    Note, lists will be completely overwritten by value in update if there is a
-    conflict.
-
-    original: (dict) the base dictionary
-    update:  (dict) the dictionary of updates to apply on original
-    in_place: (bool) if true then original will be mutated in place else a new
-        dictionary as a result of the update will be returned.
-    """
-    out = original if in_place else copy.deepcopy(original)
-
-    for key, value in update.items():
-        if isinstance(value, dict):
-            out[key] = recursive_update(out.get(key, {}), value)
-        else:
-            out[key] = value
-    return out
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
new file mode 100644
index 000000000..e1e8df7f2
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -0,0 +1,477 @@
+# Copyright 2020 Google LLC.
+# This software is provided as-is, without warranty or representation
+# for any use or purpose.
+# Your use of it is subject to your agreement with Google.
+
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains utility methods used by the BQIngest process
+"""
+import os
+import collections
+import collections.abc
+import copy
+import json
+import pathlib
+import time
+from typing import Any, Deque, Dict, List, Optional, Tuple
+
+import cachetools
+import google.api_core.exceptions
+import google.api_core.client_info
+import google.cloud.exceptions
+from google.cloud import bigquery, storage
+
+# Will wait up to this polling for errors before exiting
+# This is to check if job fail quickly, not to assert it succeed.
+# This may not be honored if longer than cloud function timeout.
+# https://cloud.google.com/functions/docs/concepts/exec#timeout
+# One might consider lowering this to 1-2 seconds to lower the
+# upper bound of expected execution time to stay within the free tier.
+# https://cloud.google.com/functions/pricing#free_tier
+WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "5"))
+
+DEFAULT_EXTERNAL_TABLE_DEFINITION = {
+    # The default must be a self describing data format
+    # because autodetecting CSV /JSON schemas is likely to not match
+    # expectations / assumptions of the transformation query.
+    "sourceFormat": "PARQUET",
+}
+
+# Use caution when lowering the job polling rate.
+# Keep in mind that many concurrent executions of this cloud function should not
+# violate the 300 concurrent requests or 100 request per second.
+# https://cloud.google.com/bigquery/quotas#all_api_requests
+JOB_POLL_INTERVAL_SECONDS = 1
+
+DEFAULT_JOB_LABELS = {
+    "component": "event-based-gcs-ingest",
+    "cloud-function-name": os.getenv("FUNCTION_NAME"),
+}
+
+BASE_LOAD_JOB_CONFIG = {
+    "sourceFormat": "CSV",
+    "fieldDelimiter": ",",
+    "writeDisposition": "WRITE_APPEND",
+    "labels": DEFAULT_JOB_LABELS,
+}
+
+# https://cloud.google.com/bigquery/quotas#load_jobs
+# 15TB per BQ load job (soft limit).
+DEFAULT_MAX_BATCH_BYTES = str(15 * 10**12)
+
+# 10,000 GCS URIs per BQ load job.
+MAX_SOURCE_URIS_PER_LOAD = 10**4
+
+SUCCESS_FILENAME = os.getenv("SUCCESS_FILENAME", "_SUCCESS")
+
+DEFAULT_JOB_PREFIX = "gcf-ingest-"
+
+
+def create_job_id_prefix(dest_table_ref: bigquery.TableReference,
+                         batch_id: Optional[str]):
+    """Create job id prefix with a consistent naming convention.
+    The naming conventions is as follows:
+    gcf-ingest-<dataset_id>-<table_id>-<partition_num>-<batch_id>-
+    Parts that are not inferrable from the GCS path with have a 'None'
+    placeholder. This naming convention is crucial for monitoring the system.
+    Note, gcf-ingest- can be overridden with environment variable JOB_PREFIX
+
+    Examples:
+
+    Non-partitioned Non batched tables:
+      - gs://${BUCKET}/tpch/lineitem/_SUCCESS
+      - gcf-ingest-tpch-lineitem-None-None-
+    Non-partitioned batched tables:
+      - gs://${BUCKET}/tpch/lineitem/batch000/_SUCCESS
+      - gcf-ingest-tpch-lineitem-None-batch000-
+    Partitioned Batched tables:
+      - gs://${BUCKET}/tpch/lineitem/$20201031/batch000/_SUCCESS
+      - gcf-ingest-tpch-lineitem-20201031-batch000-
+    """
+    table_partition = dest_table_ref.table_id.split("$")
+    if len(table_partition) < 2:
+        # If there is no partition put a None placeholder
+        table_partition.append("None")
+    return f"{os.getenv('JOB_PREFIX', DEFAULT_JOB_PREFIX)}" \
+        f"{dest_table_ref.dataset_id}-" \
+        f"{'-'.join(table_partition)}-" \
+        f"{batch_id}-"
+
+
+def external_query(  # pylint: disable=too-many-arguments
+        gcs_client: storage.Client, bq_client: bigquery.Client, gsurl: str,
+        query: str, dest_table_ref: bigquery.TableReference,
+        job_id_prefix: str):
+    """Load from query over external table from GCS.
+
+    This hinges on a SQL query defined in GCS at _config/bq_transform.sql and
+    an external table definition _config/external.json (otherwise will assume
+    CSV external table)
+    """
+    external_table_config = read_gcs_file_if_exists(
+        gcs_client, f"{gsurl}_config/external.json")
+    if not external_table_config:
+        external_table_config = look_for_config_in_parents(
+            gcs_client, gsurl, "external.json")
+    if external_table_config:
+        external_table_def = json.loads(external_table_config)
+    else:
+        print(f"Falling back to default CSV external table."
+              f" {gsurl}_config/external.json not found.")
+        external_table_def = DEFAULT_EXTERNAL_TABLE_DEFINITION
+
+    external_table_def["sourceUris"] = flatten2dlist(
+        get_batches_for_prefix(gcs_client, gsurl))
+    print(f"external table def = {json.dumps(external_table_config, indent=2)}")
+    external_config = bigquery.ExternalConfig.from_api_repr(external_table_def)
+    job_config = bigquery.QueryJobConfig(
+        table_definitions={"temp_ext": external_config}, use_legacy_sql=False)
+
+    # Note, dest_table might include a partition decorator.
+    rendered_query = query.format(
+        dest_dataset=dest_table_ref.dataset_id,
+        dest_table=dest_table_ref.table_id,
+    )
+
+    job: bigquery.QueryJob = bq_client.query(
+        rendered_query,
+        job_config=job_config,
+        job_id_prefix=job_id_prefix,
+    )
+
+    print(f"started asynchronous query job: {job.job_id}")
+
+    start_poll_for_errors = time.monotonic()
+    # Check if job failed quickly
+    while time.monotonic() - start_poll_for_errors < WAIT_FOR_JOB_SECONDS:
+        job.reload()
+        if job.errors:
+            raise RuntimeError(
+                f"query job {job.job_id} failed quickly: {job.errors}")
+        time.sleep(JOB_POLL_INTERVAL_SECONDS)
+
+
+def flatten2dlist(arr: List[List[Any]]) -> List[Any]:
+    """Flatten list of lists to flat list of elements"""
+    return [j for i in arr for j in i]
+
+
+def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id_prefix):
+    """orchestrate 1 or more load jobs based on number of URIs and total byte
+    size of objects at gsurl"""
+    batches = get_batches_for_prefix(gcs_client, gsurl)
+    load_config = construct_load_job_config(gcs_client, gsurl)
+    load_config.labels = DEFAULT_JOB_LABELS
+    batch_count = len(batches)
+
+    jobs: List[bigquery.LoadJob] = []
+    for batch_num, batch in enumerate(batches):
+        print(load_config.to_api_repr())
+        job: bigquery.LoadJob = bq_client.load_table_from_uri(
+            batch,
+            dest_table_ref,
+            job_config=load_config,
+            job_id_prefix=f"{job_id_prefix}{batch_num}-of-{batch_count}-",
+        )
+
+        print(f"started asyncronous bigquery load job with id: {job.job_id} for"
+              f" {gsurl}")
+        jobs.append(job)
+
+    start_poll_for_errors = time.monotonic()
+    # Check if job failed quickly
+    while time.monotonic() - start_poll_for_errors < WAIT_FOR_JOB_SECONDS:
+        # Check if job failed quickly
+        for job in jobs:
+            job.reload()
+            if job.errors:
+                raise RuntimeError(
+                    f"load job {job.job_id} failed quickly: {job.errors}")
+        time.sleep(JOB_POLL_INTERVAL_SECONDS)
+
+
+def handle_duplicate_notification(bkt: storage.Bucket,
+                                  success_blob: storage.Blob, gsurl: str):
+    """
+    Need to handle potential duplicate Pub/Sub notifications.
+    To achieve this we will drop an empty "claimed" file that indicates
+    an invocation of this cloud function has picked up the success file
+    with a certain creation timestamp. This will support republishing the
+    success file as a mechanism of re-running the ingestion while avoiding
+    duplicate ingestion due to multiple Pub/Sub messages for a success file
+    with the same creation time.
+    """
+    success_blob.reload()
+    success_created_unix_timestamp = success_blob.time_created.timestamp()
+
+    claim_blob: storage.Blob = bkt.blob(
+        success_blob.name.replace(SUCCESS_FILENAME,
+                                  f"_claimed_{success_created_unix_timestamp}"))
+    try:
+        claim_blob.upload_from_string("", if_generation_match=0)
+    except google.api_core.exceptions.PreconditionFailed as err:
+        raise RuntimeError(
+            f"The prefix {gsurl} appears to already have been claimed for "
+            f"{gsurl}{SUCCESS_FILENAME} with created timestamp"
+            f"{success_created_unix_timestamp}."
+            "This means that another invocation of this cloud function has"
+            "claimed the ingestion of this batch."
+            "This may be due to a rare duplicate delivery of the Pub/Sub "
+            "storage notification.") from err
+
+
+def _get_parent_config_file(storage_client, config_filename, bucket, path):
+    config_dir_name = "_config"
+    parent_path = pathlib.Path(path).parent
+    config_path = parent_path / config_dir_name / config_filename
+    return read_gcs_file_if_exists(storage_client,
+                                   f"gs://{bucket}/{config_path}")
+
+
+def look_for_config_in_parents(storage_client: storage.Client, gsurl: str,
+                               config_filename: str) -> Optional[str]:
+    """look in parent directories for _config/config_filename"""
+    blob: storage.Blob = storage.Blob.from_string(gsurl)
+    bucket_name = blob.bucket.name
+    obj_path = blob.name
+    parts = removesuffix(obj_path, "/").split("/")
+
+    def _get_parent_config(path):
+        return _get_parent_config_file(storage_client, config_filename,
+                                       bucket_name, path)
+
+    config = None
+    while parts:
+        if config:
+            return config
+        config = _get_parent_config("/".join(parts))
+        parts.pop()
+    return config
+
+
+def construct_load_job_config(storage_client: storage.Client,
+                              gsurl: str) -> bigquery.LoadJobConfig:
+    """
+    merge dictionaries for loadjob.json configs in parent directories.
+    The configs closest to gsurl should take precedence.
+    """
+    config_filename = "load.json"
+    blob: storage.Blob = storage.Blob.from_string(gsurl)
+    bucket_name = blob.bucket.name
+    obj_path = blob.name
+    parts = removesuffix(obj_path, "/").split("/")
+
+    def _get_parent_config(path):
+        return _get_parent_config_file(storage_client, config_filename,
+                                       bucket_name, path)
+
+    config_q: Deque[Dict[str, Any]] = collections.deque()
+    config_q.append(BASE_LOAD_JOB_CONFIG)
+    while parts:
+        config = _get_parent_config("/".join(parts))
+        if config:
+            config_q.append(json.loads(config))
+        parts.pop()
+
+    merged_config: Dict = {}
+    while config_q:
+        recursive_update(merged_config, config_q.popleft(), in_place=True)
+    print(f"merged_config: {merged_config}")
+    return bigquery.LoadJobConfig.from_api_repr({"load": merged_config})
+
+
+def get_batches_for_prefix(gcs_client: storage.Client,
+                           prefix_path: str,
+                           ignore_subprefix="_config/",
+                           ignore_file=SUCCESS_FILENAME) -> List[List[str]]:
+    """
+    This function creates batches of GCS uris for a given prefix.
+    This prefix could be a table prefix or a partition prefix inside a
+    table prefix.
+    returns an Array of their batches
+    (one batch has an array of multiple GCS uris)
+    """
+    batches = []
+    blob: storage.Blob = storage.Blob.from_string(prefix_path)
+    bucket_name = blob.bucket.name
+    prefix_name = blob.name
+
+    prefix_filter = f"{prefix_name}"
+    bucket = cached_get_bucket(gcs_client, bucket_name)
+    blobs = list(bucket.list_blobs(prefix=prefix_filter, delimiter="/"))
+
+    cumulative_bytes = 0
+    max_batch_size = int(os.getenv("MAX_BATCH_BYTES", DEFAULT_MAX_BATCH_BYTES))
+    batch: List[str] = []
+    for blob in blobs:
+        # API returns root prefix also. Which should be ignored.
+        # Similarly, the _SUCCESS file should be ignored.
+        # Finally, anything in the _config/ prefix should be ignored.
+        if (blob.name
+                not in {f"{prefix_name}/", f"{prefix_name}/{ignore_file}"}
+                or blob.name.startswith(f"{prefix_name}/{ignore_subprefix}")):
+            if blob.size == 0:  # ignore empty files
+                print(f"ignoring empty file: gs://{bucket}/{blob.name}")
+                continue
+            cumulative_bytes += blob.size
+
+            # keep adding until we reach threshold
+            if cumulative_bytes <= max_batch_size or len(
+                    batch) > MAX_SOURCE_URIS_PER_LOAD:
+                batch.append(f"gs://{bucket_name}/{blob.name}")
+            else:
+                batches.append(batch.copy())
+                batch.clear()
+                batch.append(f"gs://{bucket_name}/{blob.name}")
+                cumulative_bytes = blob.size
+
+    # pick up remaining files in the final batch
+    if len(batch) > 0:
+        batches.append(batch.copy())
+        batch.clear()
+
+    if len(batches) > 1:
+        print(f"split into {len(batches)} load jobs.")
+    elif len(batches) == 1:
+        print("using single load job.")
+    else:
+        raise RuntimeError("No files to load!")
+    return batches
+
+
+def parse_notification(notification: dict) -> Tuple[str, str]:
+    """valdiates notification payload
+    Args:
+        notification(dict): Pub/Sub Storage Notification
+        https://cloud.google.com/storage/docs/pubsub-notifications
+        Or Cloud Functions direct trigger
+        https://cloud.google.com/functions/docs/tutorials/storage
+        with notification schema
+        https://cloud.google.com/storage/docs/json_api/v1/objects#resource
+    Returns:
+        tuple of bucketId and objectId attributes
+    Raises:
+        KeyError if the input notification does not contain the expected
+        attributes.
+    """
+    if notification.get("kind") == "storage#object":
+        # notification is GCS Object reosource from Cloud Functions trigger
+        # https://cloud.google.com/storage/docs/json_api/v1/objects#resource
+        return notification["bucket"], notification["name"]
+    if notification.get("attributes"):
+        # notification is Pub/Sub message.
+        try:
+            attributes = notification["attributes"]
+            return attributes["bucketId"], attributes["objectId"]
+        except KeyError:
+            raise RuntimeError(
+                "Issue with Pub/Sub message, did not contain expected"
+                f"attributes: 'bucketId' and 'objectId': {notification}"
+            ) from KeyError
+    raise RuntimeError(
+        "Cloud Function recieved unexpected trigger:\n"
+        f"{notification}\n"
+        "This function only supports direct Cloud Functions"
+        "Background Triggers or Pub/Sub storage notificaitons"
+        "as described in the following links:\n"
+        "https://cloud.google.com/storage/docs/pubsub-notifications\n"
+        "https://cloud.google.com/functions/docs/tutorials/storage")
+
+
+# cache lookups against GCS API for 1 second as buckets / objects have update
+# limit of once per second and we might do several of the same lookup during
+# the functions lifetime. This should improve performance by eliminating
+# unnecessary API calls. The lookups on bucket and objects in this function
+# should not be changing during the function's lifetime as this would lead to
+# non-deterministic results with or without this cache.
+# https://cloud.google.com/storage/quotas
+@cachetools.cached(cachetools.TTLCache(maxsize=1024, ttl=1))
+def read_gcs_file(gcs_client: storage.Client, gsurl: str) -> str:
+    """
+    Read a GCS object as a string
+
+    Args:
+        gcs_client:  GCS client
+        gsurl: GCS URI for object to read in gs://bucket/path/to/object format
+    Returns:
+        str
+    """
+    blob = storage.Blob.from_string(gsurl)
+    return blob.download_as_bytes(client=gcs_client).decode('UTF-8')
+
+
+def read_gcs_file_if_exists(gcs_client: storage.Client,
+                            gsurl: str) -> Optional[str]:
+    """return string of gcs object contents or None if the object does not exist
+    """
+    try:
+        return read_gcs_file(gcs_client, gsurl)
+    except google.cloud.exceptions.NotFound:
+        return None
+
+
+# Cache bucket lookups (see reasoning in comment above)
+@cachetools.cached(cachetools.TTLCache(maxsize=1024, ttl=1))
+def cached_get_bucket(
+    gcs_client: storage.Client,
+    bucket_id: str,
+) -> storage.Bucket:
+    """get storage.Bucket object by bucket_id string if exists or raise
+    google.cloud.exceptions.NotFound."""
+    return gcs_client.get_bucket(bucket_id)
+
+
+def dict_to_bq_schema(schema: List[Dict]) -> List[bigquery.SchemaField]:
+    """Converts a list of dicts to list of bigquery.SchemaField for use with
+    bigquery client library. Dicts must contain name and type keys.
+    The dict may optionally contain a mode key."""
+    default_mode = "NULLABLE"
+    return [
+        bigquery.SchemaField(
+            x["name"],
+            x["type"],
+            mode=x.get("mode") if x.get("mode") else default_mode)
+        for x in schema
+    ]
+
+
+# To be added to built in str in python 3.9
+# https://www.python.org/dev/peps/pep-0616/
+def removesuffix(in_str: str, suffix: str) -> str:
+    """removes suffix from a string."""
+    # suffix='' should not call self[:-0].
+    if suffix and in_str.endswith(suffix):
+        return in_str[:-len(suffix)]
+    return in_str[:]
+
+
+def recursive_update(original: Dict, update: Dict, in_place: bool = False):
+    """
+    return a recursively updated dictionary.
+
+    Note, lists will be completely overwritten by value in update if there is a
+    conflict.
+
+    original: (dict) the base dictionary
+    update:  (dict) the dictionary of updates to apply on original
+    in_place: (bool) if true then original will be mutated in place else a new
+        dictionary as a result of the update will be returned.
+    """
+    out = original if in_place else copy.deepcopy(original)
+
+    for key, value in update.items():
+        if isinstance(value, dict):
+            out[key] = recursive_update(out.get(key, {}), value)
+        else:
+            out[key] = value
+    return out
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 4121ba3fc..4adf3ba43 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -21,7 +21,7 @@
 import pytest
 from google.cloud import bigquery, storage
 
-import gcs_ocn_bq_ingest.main
+import gcs_ocn_bq_ingest.utils
 
 TEST_DIR = os.path.realpath(os.path.dirname(__file__))
 LOAD_JOB_POLLING_TIMEOUT = 10  # seconds
@@ -93,7 +93,7 @@ def teardown():
 def dest_table(request, bq, mock_env, dest_dataset) -> bigquery.Table:
     with open(os.path.join(TEST_DIR, "resources",
                            "nation_schema.json")) as schema_file:
-        schema = gcs_ocn_bq_ingest.main.dict_to_bq_schema(
+        schema = gcs_ocn_bq_ingest.utils.dict_to_bq_schema(
             json.load(schema_file))
 
     table = bigquery.Table(
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 6f983d22d..2a7e8896e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -19,6 +19,7 @@
 import pytest
 
 import gcs_ocn_bq_ingest.main
+import gcs_ocn_bq_ingest.utils
 
 COMPILED_DEFAULT_DENTINATION_REGEX = re.compile(
     gcs_ocn_bq_ingest.main.DEFAULT_DESTINATION_REGEX)
@@ -118,7 +119,7 @@ def test_default_destination_regex(test_input: str,
     ([["foo"], [], ["bar", "baz"]], ["foo", "bar", "baz"]),
 ])
 def test_flattend2dlist(test_input, expected):
-    assert gcs_ocn_bq_ingest.main.flatten2dlist(test_input) == expected
+    assert gcs_ocn_bq_ingest.utils.flatten2dlist(test_input) == expected
 
 
 @pytest.mark.parametrize(
@@ -201,4 +202,4 @@ def test_flattend2dlist(test_input, expected):
         # yapf: enable
     ])
 def test_recursive_update(original, update, expected):
-    assert gcs_ocn_bq_ingest.main.recursive_update(original, update) == expected
+    assert gcs_ocn_bq_ingest.utils.recursive_update(original, update) == expected

From b52d29123d411270f874845570da4193499434c6 Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan.denotter@kingsmenservices.org>
Date: Fri, 4 Dec 2020 14:29:57 -0700
Subject: [PATCH 12/90] Fix sorting issues

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py   | 10 +++++-----
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py  |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 8f4ff3d64..dec77b8ab 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -24,11 +24,11 @@
 import google.cloud.exceptions
 from google.cloud import bigquery, storage
 
-from .utils import (parse_notification, SUCCESS_FILENAME, removesuffix,
-                    cached_get_bucket, handle_duplicate_notification,
-                    DEFAULT_JOB_LABELS, read_gcs_file_if_exists,
-                    look_for_config_in_parents, external_query,
-                    create_job_id_prefix, load_batches)
+from .utils import (DEFAULT_JOB_LABELS, SUCCESS_FILENAME, cached_get_bucket,
+                    create_job_id_prefix, external_query,
+                    handle_duplicate_notification, load_batches,
+                    look_for_config_in_parents, parse_notification,
+                    read_gcs_file_if_exists, removesuffix)
 
 # yapf: disable
 DEFAULT_DESTINATION_REGEX = (
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index e1e8df7f2..db99b839b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -16,18 +16,18 @@
 # limitations under the License.
 """Contains utility methods used by the BQIngest process
 """
-import os
 import collections
 import collections.abc
 import copy
 import json
+import os
 import pathlib
 import time
 from typing import Any, Deque, Dict, List, Optional, Tuple
 
 import cachetools
-import google.api_core.exceptions
 import google.api_core.client_info
+import google.api_core.exceptions
 import google.cloud.exceptions
 from google.cloud import bigquery, storage
 

From 3276214178f2562f0f7ef1b365d3becc42e6d941 Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan.denotter@kingsmenservices.org>
Date: Fri, 4 Dec 2020 16:52:23 -0700
Subject: [PATCH 13/90] Move out constants into their own file Change import
 pattern

---
 .../gcs_ocn_bq_ingest/constants.py            | 83 +++++++++++++++++++
 .../gcs_ocn_bq_ingest/main.py                 | 61 +++++---------
 .../gcs_ocn_bq_ingest/utils.py                | 81 +++++-------------
 .../test_gcs_ocn_bq_ingest.py                 | 43 ++++------
 4 files changed, 140 insertions(+), 128 deletions(-)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
new file mode 100644
index 000000000..eefcc9f52
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -0,0 +1,83 @@
+# Copyright 2020 Google LLC.
+# This software is provided as-is, without warranty or representation
+# for any use or purpose.
+# Your use of it is subject to your agreement with Google.
+
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Configurations for Cloud Function for loading data from GCS to BigQuery.
+"""
+import os
+
+import google.api_core.client_info
+import google.cloud.exceptions
+
+# Will wait up to this polling for errors before exiting
+# This is to check if job fail quickly, not to assert it succeed.
+# This may not be honored if longer than cloud function timeout.
+# https://cloud.google.com/functions/docs/concepts/exec#timeout
+# One might consider lowering this to 1-2 seconds to lower the
+# upper bound of expected execution time to stay within the free tier.
+# https://cloud.google.com/functions/pricing#free_tier
+WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "5"))
+
+DEFAULT_EXTERNAL_TABLE_DEFINITION = {
+    # The default must be a self describing data format
+    # because autodetecting CSV /JSON schemas is likely to not match
+    # expectations / assumptions of the transformation query.
+    "sourceFormat": "PARQUET",
+}
+
+# Use caution when lowering the job polling rate.
+# Keep in mind that many concurrent executions of this cloud function should not
+# violate the 300 concurrent requests or 100 request per second.
+# https://cloud.google.com/bigquery/quotas#all_api_requests
+JOB_POLL_INTERVAL_SECONDS = 1
+
+DEFAULT_JOB_LABELS = {
+    "component": "event-based-gcs-ingest",
+    "cloud-function-name": os.getenv("FUNCTION_NAME"),
+}
+
+BASE_LOAD_JOB_CONFIG = {
+    "sourceFormat": "CSV",
+    "fieldDelimiter": ",",
+    "writeDisposition": "WRITE_APPEND",
+    "labels": DEFAULT_JOB_LABELS,
+}
+
+# https://cloud.google.com/bigquery/quotas#load_jobs
+# 15TB per BQ load job (soft limit).
+DEFAULT_MAX_BATCH_BYTES = str(15 * 10**12)
+
+# 10,000 GCS URIs per BQ load job.
+MAX_SOURCE_URIS_PER_LOAD = 10**4
+
+SUCCESS_FILENAME = os.getenv("SUCCESS_FILENAME", "_SUCCESS")
+
+DEFAULT_JOB_PREFIX = "gcf-ingest-"
+
+# yapf: disable
+DEFAULT_DESTINATION_REGEX = (
+    r"^(?P<dataset>[\w\-\._0-9]+)/"  # dataset (required)
+    r"(?P<table>[\w\-_0-9]+)/?"      # table name (required)
+    r"(?P<partition>\$[0-9]+)?/?"    # partition decorator (optional)
+    r"(?P<yyyy>[0-9]{4})?/?"         # partition year (yyyy) (optional)
+    r"(?P<mm>[0-9]{2})?/?"           # partition month (mm) (optional)
+    r"(?P<dd>[0-9]{2})?/?"           # partition day (dd)  (optional)
+    r"(?P<hh>[0-9]{2})?/?"           # partition hour (hh) (optional)
+    r"(?P<batch>[\w\-_0-9]+)?/"      # batch id (optional)
+)
+# yapf: enable
+
+CLIENT_INFO = google.api_core.client_info.ClientInfo(
+    user_agent="google-pso-tool/bq-severless-loader")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index dec77b8ab..6e81e2a1d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -20,31 +20,9 @@
 import re
 from typing import Dict
 
-import google.api_core.client_info
-import google.cloud.exceptions
 from google.cloud import bigquery, storage
 
-from .utils import (DEFAULT_JOB_LABELS, SUCCESS_FILENAME, cached_get_bucket,
-                    create_job_id_prefix, external_query,
-                    handle_duplicate_notification, load_batches,
-                    look_for_config_in_parents, parse_notification,
-                    read_gcs_file_if_exists, removesuffix)
-
-# yapf: disable
-DEFAULT_DESTINATION_REGEX = (
-    r"^(?P<dataset>[\w\-\._0-9]+)/"  # dataset (required)
-    r"(?P<table>[\w\-_0-9]+)/?"      # table name (required)
-    r"(?P<partition>\$[0-9]+)?/?"    # partition decorator (optional)
-    r"(?P<yyyy>[0-9]{4})?/?"         # partition year (yyyy) (optional)
-    r"(?P<mm>[0-9]{2})?/?"           # partition month (mm) (optional)
-    r"(?P<dd>[0-9]{2})?/?"           # partition day (dd)  (optional)
-    r"(?P<hh>[0-9]{2})?/?"           # partition hour (hh) (optional)
-    r"(?P<batch>[\w\-_0-9]+)?/"      # batch id (optional)
-)
-# yapf: enable
-
-CLIENT_INFO = google.api_core.client_info.ClientInfo(
-    user_agent="google-pso-tool/bq-severless-loader")
+from . import constants, utils
 
 
 def main(event: Dict, context):  # pylint: disable=unused-argument
@@ -54,27 +32,28 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     # Set by Cloud Function Execution Environment
     # https://cloud.google.com/functions/docs/env-var
     destination_regex = os.getenv("DESTINATION_REGEX",
-                                  DEFAULT_DESTINATION_REGEX)
+                                  constants.DEFAULT_DESTINATION_REGEX)
     dest_re = re.compile(destination_regex)
 
-    bucket_id, object_id = parse_notification(event)
+    bucket_id, object_id = utils.parse_notification(event)
 
     # Exit eagerly if not a success file.
     # we can improve this with pub/sub message filtering once it supports
     # a hasSuffix filter function (we can filter on hasSuffix successfile name)
     #  https://cloud.google.com/pubsub/docs/filtering
-    if not object_id.endswith(f"/{SUCCESS_FILENAME}"):
+    if not object_id.endswith(f"/{constants.SUCCESS_FILENAME}"):
         print(
-            f"No-op. This notification was not for a {SUCCESS_FILENAME} file.")
+            f"No-op. This notification was not for a {constants.SUCCESS_FILENAME} file."
+        )
         return
 
-    prefix_to_load = removesuffix(object_id, SUCCESS_FILENAME)
+    prefix_to_load = utils.removesuffix(object_id, constants.SUCCESS_FILENAME)
     gsurl = f"gs://{bucket_id}/{prefix_to_load}"
-    gcs_client = storage.Client(client_info=CLIENT_INFO)
+    gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
     project = os.getenv("BQ_PROJECT", gcs_client.project)
-    bkt = cached_get_bucket(gcs_client, bucket_id)
+    bkt = utils.cached_get_bucket(gcs_client, bucket_id)
     success_blob: storage.Blob = bkt.blob(object_id)
-    handle_duplicate_notification(bkt, success_blob, gsurl)
+    utils.handle_duplicate_notification(bkt, success_blob, gsurl)
 
     destination_match = dest_re.match(object_id)
     if not destination_match:
@@ -95,7 +74,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     if not partition and any(part_list):
         partition = '$' + ''.join(part_list)
     batch_id = destination_details.get('batch')
-    labels = DEFAULT_JOB_LABELS
+    labels = constants.DEFAULT_JOB_LABELS
     labels["bucket"] = bucket_id
 
     if batch_id:
@@ -111,23 +90,23 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     default_query_config = bigquery.QueryJobConfig()
     default_query_config.use_legacy_sql = False
     default_query_config.labels = labels
-    bq_client = bigquery.Client(client_info=CLIENT_INFO,
+    bq_client = bigquery.Client(client_info=constants.CLIENT_INFO,
                                 default_query_job_config=default_query_config)
 
     print("looking for bq_transform.sql")
-    external_query_sql = read_gcs_file_if_exists(
+    external_query_sql = utils.read_gcs_file_if_exists(
         gcs_client, f"{gsurl}_config/bq_transform.sql")
     if not external_query_sql:
-        external_query_sql = look_for_config_in_parents(gcs_client, gsurl,
-                                                        "bq_transform.sql")
+        external_query_sql = utils.look_for_config_in_parents(
+            gcs_client, gsurl, "bq_transform.sql")
     if external_query_sql:
         print("EXTERNAL QUERY")
         print(f"found external query:\n{external_query_sql}")
-        external_query(gcs_client, bq_client, gsurl, external_query_sql,
-                       dest_table_ref,
-                       create_job_id_prefix(dest_table_ref, batch_id))
+        utils.external_query(
+            gcs_client, bq_client, gsurl, external_query_sql, dest_table_ref,
+            utils.create_job_id_prefix(dest_table_ref, batch_id))
         return
 
     print("LOAD_JOB")
-    load_batches(gcs_client, bq_client, gsurl, dest_table_ref,
-                 create_job_id_prefix(dest_table_ref, batch_id))
+    utils.load_batches(gcs_client, bq_client, gsurl, dest_table_ref,
+                       utils.create_job_id_prefix(dest_table_ref, batch_id))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index db99b839b..5ec878c8a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -31,50 +31,7 @@
 import google.cloud.exceptions
 from google.cloud import bigquery, storage
 
-# Will wait up to this polling for errors before exiting
-# This is to check if job fail quickly, not to assert it succeed.
-# This may not be honored if longer than cloud function timeout.
-# https://cloud.google.com/functions/docs/concepts/exec#timeout
-# One might consider lowering this to 1-2 seconds to lower the
-# upper bound of expected execution time to stay within the free tier.
-# https://cloud.google.com/functions/pricing#free_tier
-WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "5"))
-
-DEFAULT_EXTERNAL_TABLE_DEFINITION = {
-    # The default must be a self describing data format
-    # because autodetecting CSV /JSON schemas is likely to not match
-    # expectations / assumptions of the transformation query.
-    "sourceFormat": "PARQUET",
-}
-
-# Use caution when lowering the job polling rate.
-# Keep in mind that many concurrent executions of this cloud function should not
-# violate the 300 concurrent requests or 100 request per second.
-# https://cloud.google.com/bigquery/quotas#all_api_requests
-JOB_POLL_INTERVAL_SECONDS = 1
-
-DEFAULT_JOB_LABELS = {
-    "component": "event-based-gcs-ingest",
-    "cloud-function-name": os.getenv("FUNCTION_NAME"),
-}
-
-BASE_LOAD_JOB_CONFIG = {
-    "sourceFormat": "CSV",
-    "fieldDelimiter": ",",
-    "writeDisposition": "WRITE_APPEND",
-    "labels": DEFAULT_JOB_LABELS,
-}
-
-# https://cloud.google.com/bigquery/quotas#load_jobs
-# 15TB per BQ load job (soft limit).
-DEFAULT_MAX_BATCH_BYTES = str(15 * 10**12)
-
-# 10,000 GCS URIs per BQ load job.
-MAX_SOURCE_URIS_PER_LOAD = 10**4
-
-SUCCESS_FILENAME = os.getenv("SUCCESS_FILENAME", "_SUCCESS")
-
-DEFAULT_JOB_PREFIX = "gcf-ingest-"
+from . import constants
 
 
 def create_job_id_prefix(dest_table_ref: bigquery.TableReference,
@@ -102,7 +59,7 @@ def create_job_id_prefix(dest_table_ref: bigquery.TableReference,
     if len(table_partition) < 2:
         # If there is no partition put a None placeholder
         table_partition.append("None")
-    return f"{os.getenv('JOB_PREFIX', DEFAULT_JOB_PREFIX)}" \
+    return f"{os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)}" \
         f"{dest_table_ref.dataset_id}-" \
         f"{'-'.join(table_partition)}-" \
         f"{batch_id}-"
@@ -128,7 +85,7 @@ def external_query(  # pylint: disable=too-many-arguments
     else:
         print(f"Falling back to default CSV external table."
               f" {gsurl}_config/external.json not found.")
-        external_table_def = DEFAULT_EXTERNAL_TABLE_DEFINITION
+        external_table_def = constants.DEFAULT_EXTERNAL_TABLE_DEFINITION
 
     external_table_def["sourceUris"] = flatten2dlist(
         get_batches_for_prefix(gcs_client, gsurl))
@@ -153,12 +110,13 @@ def external_query(  # pylint: disable=too-many-arguments
 
     start_poll_for_errors = time.monotonic()
     # Check if job failed quickly
-    while time.monotonic() - start_poll_for_errors < WAIT_FOR_JOB_SECONDS:
+    while time.monotonic(
+    ) - start_poll_for_errors < constants.WAIT_FOR_JOB_SECONDS:
         job.reload()
         if job.errors:
             raise RuntimeError(
                 f"query job {job.job_id} failed quickly: {job.errors}")
-        time.sleep(JOB_POLL_INTERVAL_SECONDS)
+        time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
 
 def flatten2dlist(arr: List[List[Any]]) -> List[Any]:
@@ -171,7 +129,7 @@ def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id_prefix):
     size of objects at gsurl"""
     batches = get_batches_for_prefix(gcs_client, gsurl)
     load_config = construct_load_job_config(gcs_client, gsurl)
-    load_config.labels = DEFAULT_JOB_LABELS
+    load_config.labels = constants.DEFAULT_JOB_LABELS
     batch_count = len(batches)
 
     jobs: List[bigquery.LoadJob] = []
@@ -190,14 +148,15 @@ def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id_prefix):
 
     start_poll_for_errors = time.monotonic()
     # Check if job failed quickly
-    while time.monotonic() - start_poll_for_errors < WAIT_FOR_JOB_SECONDS:
+    while time.monotonic(
+    ) - start_poll_for_errors < constants.WAIT_FOR_JOB_SECONDS:
         # Check if job failed quickly
         for job in jobs:
             job.reload()
             if job.errors:
                 raise RuntimeError(
                     f"load job {job.job_id} failed quickly: {job.errors}")
-        time.sleep(JOB_POLL_INTERVAL_SECONDS)
+        time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
 
 def handle_duplicate_notification(bkt: storage.Bucket,
@@ -215,14 +174,14 @@ def handle_duplicate_notification(bkt: storage.Bucket,
     success_created_unix_timestamp = success_blob.time_created.timestamp()
 
     claim_blob: storage.Blob = bkt.blob(
-        success_blob.name.replace(SUCCESS_FILENAME,
+        success_blob.name.replace(constants.SUCCESS_FILENAME,
                                   f"_claimed_{success_created_unix_timestamp}"))
     try:
         claim_blob.upload_from_string("", if_generation_match=0)
     except google.api_core.exceptions.PreconditionFailed as err:
         raise RuntimeError(
             f"The prefix {gsurl} appears to already have been claimed for "
-            f"{gsurl}{SUCCESS_FILENAME} with created timestamp"
+            f"{gsurl}{constants.SUCCESS_FILENAME} with created timestamp"
             f"{success_created_unix_timestamp}."
             "This means that another invocation of this cloud function has"
             "claimed the ingestion of this batch."
@@ -276,7 +235,7 @@ def _get_parent_config(path):
                                        bucket_name, path)
 
     config_q: Deque[Dict[str, Any]] = collections.deque()
-    config_q.append(BASE_LOAD_JOB_CONFIG)
+    config_q.append(constants.BASE_LOAD_JOB_CONFIG)
     while parts:
         config = _get_parent_config("/".join(parts))
         if config:
@@ -290,10 +249,11 @@ def _get_parent_config(path):
     return bigquery.LoadJobConfig.from_api_repr({"load": merged_config})
 
 
-def get_batches_for_prefix(gcs_client: storage.Client,
-                           prefix_path: str,
-                           ignore_subprefix="_config/",
-                           ignore_file=SUCCESS_FILENAME) -> List[List[str]]:
+def get_batches_for_prefix(
+        gcs_client: storage.Client,
+        prefix_path: str,
+        ignore_subprefix="_config/",
+        ignore_file=constants.SUCCESS_FILENAME) -> List[List[str]]:
     """
     This function creates batches of GCS uris for a given prefix.
     This prefix could be a table prefix or a partition prefix inside a
@@ -311,7 +271,8 @@ def get_batches_for_prefix(gcs_client: storage.Client,
     blobs = list(bucket.list_blobs(prefix=prefix_filter, delimiter="/"))
 
     cumulative_bytes = 0
-    max_batch_size = int(os.getenv("MAX_BATCH_BYTES", DEFAULT_MAX_BATCH_BYTES))
+    max_batch_size = int(
+        os.getenv("MAX_BATCH_BYTES", constants.DEFAULT_MAX_BATCH_BYTES))
     batch: List[str] = []
     for blob in blobs:
         # API returns root prefix also. Which should be ignored.
@@ -327,7 +288,7 @@ def get_batches_for_prefix(gcs_client: storage.Client,
 
             # keep adding until we reach threshold
             if cumulative_bytes <= max_batch_size or len(
-                    batch) > MAX_SOURCE_URIS_PER_LOAD:
+                    batch) > constants.MAX_SOURCE_URIS_PER_LOAD:
                 batch.append(f"gs://{bucket_name}/{blob.name}")
             else:
                 batches.append(batch.copy())
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 2a7e8896e..712b380be 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -18,11 +18,12 @@
 
 import pytest
 
+import gcs_ocn_bq_ingest.constants
 import gcs_ocn_bq_ingest.main
 import gcs_ocn_bq_ingest.utils
 
 COMPILED_DEFAULT_DENTINATION_REGEX = re.compile(
-    gcs_ocn_bq_ingest.main.DEFAULT_DESTINATION_REGEX)
+    gcs_ocn_bq_ingest.constants.DEFAULT_DESTINATION_REGEX)
 
 
 @pytest.mark.parametrize(
@@ -127,40 +128,31 @@ def test_flattend2dlist(test_input, expected):
     [
         # yapf: disable
         (  # empty original
-            {},
-            {
+            {}, {
                 "a": 1
-            },
-            {
+            }, {
                 "a": 1
-            }
-        ),
+            }),
         (  # empty update
             {
                 "a": 1
-            },
-            {},
-            {
+            }, {}, {
                 "a": 1
             }),
         (  # basic update of top-level key
             {
                 "a": 1
-            },
-            {
+            }, {
                 "a": 2
-            },
-            {
+            }, {
                 "a": 2
             }),
         (  # update of list
             {
                 "a": [1]
-            },
-            {
+            }, {
                 "a": [2]
-            },
-            {
+            }, {
                 "a": [2]
             }),
         (  # update of nested key
@@ -168,13 +160,11 @@ def test_flattend2dlist(test_input, expected):
                 "a": {
                     "b": 1
                 }
-            },
-            {
+            }, {
                 "a": {
                     "b": 2
                 }
-            },
-            {
+            }, {
                 "a": {
                     "b": 2
                 }
@@ -186,13 +176,11 @@ def test_flattend2dlist(test_input, expected):
                     "c": 2
                 },
                 "d": 3
-            },
-            {
+            }, {
                 "a": {
                     "b": 4
                 },
-            },
-            {
+            }, {
                 "a": {
                     "b": 4,
                     "c": 2
@@ -202,4 +190,5 @@ def test_flattend2dlist(test_input, expected):
         # yapf: enable
     ])
 def test_recursive_update(original, update, expected):
-    assert gcs_ocn_bq_ingest.utils.recursive_update(original, update) == expected
+    assert gcs_ocn_bq_ingest.utils.recursive_update(original,
+                                                    update) == expected

From 117d91bfa51ae736496cae1f83e7c7d540a16086 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Fri, 4 Dec 2020 16:17:44 -0800
Subject: [PATCH 14/90] fixup! pylint

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py           | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 6e81e2a1d..e6e7deaa9 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -22,7 +22,8 @@
 
 from google.cloud import bigquery, storage
 
-from . import constants, utils
+# pylint in cloud build is being flaky about this import discovery.
+from . import constants, utils  # pylint: disable=no-name-in-module
 
 
 def main(event: Dict, context):  # pylint: disable=unused-argument

From 3770c84132d57ae33ceb4ee11bdace188a2a15b0 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Fri, 4 Dec 2020 16:20:47 -0800
Subject: [PATCH 15/90] fixup! fixup! gcb pylint issue

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py          | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 5ec878c8a..434b423c7 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -31,7 +31,8 @@
 import google.cloud.exceptions
 from google.cloud import bigquery, storage
 
-from . import constants
+# pylint in cloud build is being flaky about this import discovery.
+from . import constants  # pylint: disable=no-name-in-module
 
 
 def create_job_id_prefix(dest_table_ref: bigquery.TableReference,

From 81bb167a6abd4db96589a81f0856247c5ad55154 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Fri, 4 Dec 2020 11:59:00 -0800
Subject: [PATCH 16/90] feat: sequencing with backlog publisher / subscriber

* Restructures code into constants and exception modules
* Implements Backlog Publisher / Subscriber algorithm for ordering incrementals
* Implements basic integration tests for Publisher / Subscriber
---
 .../gcs_ocn_bq_ingest/README.md               |   1 +
 .../gcs_ocn_bq_ingest/constants.py            |  33 +-
 .../gcs_ocn_bq_ingest/exceptions.py           |  52 +++
 .../gcs_ocn_bq_ingest/main.py                 | 206 +++++----
 .../gcs_ocn_bq_ingest/ordering.py             | 180 ++++++++
 .../gcs_ocn_bq_ingest/requirements.txt        |   5 +-
 .../gcs_ocn_bq_ingest/utils.py                | 418 ++++++++++++++----
 .../gcs_event_based_ingest/pytest.ini         |   1 +
 .../requirements-dev.txt                      |   2 +-
 .../gcs_event_based_ingest/requirements.txt   |   5 +-
 .../gcs_event_based_ingest/tests/conftest.py  | 168 ++++++-
 .../test_gcs_ocn_bq_ingest.py                 |  24 +
 .../test_gcs_ocn_bq_ingest_it.py              |  16 +-
 .../gcs_ocn_bq_ingest/test_ordering_it.py     | 141 ++++++
 .../tests/resources/ordering_schema.json      |  10 +
 .../resources/test-data/ordering/00/_SUCCESS  |   0
 .../resources/test-data/ordering/00/data.csv  |   1 +
 .../resources/test-data/ordering/01/_SUCCESS  |   0
 .../resources/test-data/ordering/01/data.csv  |   1 +
 .../resources/test-data/ordering/02/_SUCCESS  |   0
 .../resources/test-data/ordering/02/data.csv  |   1 +
 21 files changed, 1069 insertions(+), 196 deletions(-)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/ordering_schema.json
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/_SUCCESS
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/_SUCCESS
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/_SUCCESS
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index 5e30a1c4b..1252b1dda 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -32,6 +32,7 @@ following default behavior.
 | `MAX_BATCH_BYTES`     | Max bytes for BigQuery Load job      | `15000000000000` ([15 TB](https://cloud.google.com/bigquery/quotas#load_jobs)|
 | `JOB_PREFIX`          | Prefix for BigQuery Job IDs          | `gcf-ingest-` |
 | `BQ_PROJECT`          | Default BQ project to use if not specified in dataset capturing group | Project where Cloud Function is deployed |
+| `ORDERED_PER_TABLE`   | Force jobs to be executed sequentially (rather than parallel) based on the backlog. This is the same as having an `ORDERME` file in every config directory | `False` | 
 
 
 ## Implementation notes
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
index eefcc9f52..a0db05425 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -16,7 +16,9 @@
 # limitations under the License.
 """Configurations for Cloud Function for loading data from GCS to BigQuery.
 """
+import distutils.util
 import os
+import re
 
 import google.api_core.client_info
 import google.cloud.exceptions
@@ -71,13 +73,42 @@
     r"^(?P<dataset>[\w\-\._0-9]+)/"  # dataset (required)
     r"(?P<table>[\w\-_0-9]+)/?"      # table name (required)
     r"(?P<partition>\$[0-9]+)?/?"    # partition decorator (optional)
-    r"(?P<yyyy>[0-9]{4})?/?"         # partition year (yyyy) (optional)
+    r"(?:"                           # [begin] yyyy/mm/dd/hh/ group (optional)
+    r"(?P<yyyy>[0-9]{4})/?"          # partition year (yyyy) (optional)
     r"(?P<mm>[0-9]{2})?/?"           # partition month (mm) (optional)
     r"(?P<dd>[0-9]{2})?/?"           # partition day (dd)  (optional)
     r"(?P<hh>[0-9]{2})?/?"           # partition hour (hh) (optional)
+    r")?"                            # [end]yyyy/mm/dd/hh/ group (optional)
     r"(?P<batch>[\w\-_0-9]+)?/"      # batch id (optional)
 )
 # yapf: enable
 
+DESTINATION_REGEX = re.compile(
+    os.getenv("DESTINATION_REGEX", DEFAULT_DESTINATION_REGEX))
+
 CLIENT_INFO = google.api_core.client_info.ClientInfo(
     user_agent="google-pso-tool/bq-severless-loader")
+
+# Filename used to (re)start the backfill subscriber loop.
+BACKFILL_FILENAME = "_BACKFILL"
+
+# When this file is uploaded the subscriber will start applying items in order
+# off the backlog. This is meant to help scenarios where historical loads to GCS
+# are parallelized but must be applied in order. One can drop a _HISTORYDONE
+# file to indicate the entire history has been uploaded and it is safe to start
+# applying items in the backlog in order. By default this will be empty and the
+# backlog subscriber will not wait for any file and start applying the first
+# items in the backlog.
+START_BACKFILL_FILENAME = os.getenv("START_BACKFILL_FILENAME")
+
+# Filenames that cause cloud function to take action.
+ACTION_FILENAMES = {
+    SUCCESS_FILENAME,
+    BACKFILL_FILENAME,
+    START_BACKFILL_FILENAME,
+}
+
+RESTART_BUFFER_SECONDS = os.getenv("RESTART_BUFFER_SECONDS", 30)
+
+ORDER_ALL_JOBS = bool(
+    distutils.util.strtobool(os.getenv("ORDER_ALL_JOBS", "False")))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
new file mode 100644
index 000000000..908db717c
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
@@ -0,0 +1,52 @@
+# Copyright 2020 Google LLC.
+# This software is provided as-is, without warranty or representation
+# for any use or purpose.
+# Your use of it is subject to your agreement with Google.
+
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Custom Exceptions of GCS event based ingest to BigQuery"""
+
+
+class DuplicateNotificationException(Exception):
+    """Exception to indicate that the function was triggered twice for the same
+    event."""
+
+
+class BigQueryJobFailure(Exception):
+    """Exception to indicate that the function was triggered twice for the same
+    event."""
+
+
+class DestinationRegexMatchException(Exception):
+    """Exception to indicate that a success file did not match the destination
+    regex specified in the DESTINATION_REGEX environment variable (or the
+    default)"""
+
+
+class UnexpectedTriggerException(Exception):
+    """Exception to indicate the cloud function was triggered with an unexpected
+    payload."""
+
+
+class BacklogException(Exception):
+    """Exception to indicate an issue with the backlog mechanics of this
+    function."""
+
+
+EXCEPTIONS_TO_REPORT = {
+    BigQueryJobFailure,
+    UnexpectedTriggerException,
+    DestinationRegexMatchException,
+    BacklogException,
+    DuplicateNotificationException,
+}
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index e6e7deaa9..199ac47a8 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -17,97 +17,133 @@
 """Background Cloud Function for loading data from GCS to BigQuery.
 """
 import os
-import re
+import time
 from typing import Dict
 
-from google.cloud import bigquery, storage
-
 # pylint in cloud build is being flaky about this import discovery.
-from . import constants, utils  # pylint: disable=no-name-in-module
+# pylint: disable=no-name-in-module
+from google.cloud import bigquery, error_reporting, storage
+
+from . import constants, exceptions, ordering, utils
+# Reuse GCP Clients across function invocations using globbals
+# https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations
+# pylint: disable=global-statement
+from .utils import apply
+
+ERROR_REPORTING_CLIENT = None
+
+BQ_CLIENT = None
+
+GCS_CLIENT = None
 
 
 def main(event: Dict, context):  # pylint: disable=unused-argument
     """entry point for background cloud function for event driven GCS to
     BigQuery ingest."""
-    # pylint: disable=too-many-locals
-    # Set by Cloud Function Execution Environment
-    # https://cloud.google.com/functions/docs/env-var
-    destination_regex = os.getenv("DESTINATION_REGEX",
-                                  constants.DEFAULT_DESTINATION_REGEX)
-    dest_re = re.compile(destination_regex)
-
-    bucket_id, object_id = utils.parse_notification(event)
-
-    # Exit eagerly if not a success file.
-    # we can improve this with pub/sub message filtering once it supports
-    # a hasSuffix filter function (we can filter on hasSuffix successfile name)
-    #  https://cloud.google.com/pubsub/docs/filtering
-    if not object_id.endswith(f"/{constants.SUCCESS_FILENAME}"):
-        print(
-            f"No-op. This notification was not for a {constants.SUCCESS_FILENAME} file."
-        )
-        return
-
-    prefix_to_load = utils.removesuffix(object_id, constants.SUCCESS_FILENAME)
-    gsurl = f"gs://{bucket_id}/{prefix_to_load}"
-    gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
-    project = os.getenv("BQ_PROJECT", gcs_client.project)
-    bkt = utils.cached_get_bucket(gcs_client, bucket_id)
-    success_blob: storage.Blob = bkt.blob(object_id)
-    utils.handle_duplicate_notification(bkt, success_blob, gsurl)
-
-    destination_match = dest_re.match(object_id)
-    if not destination_match:
-        raise RuntimeError(f"Object ID {object_id} did not match regex:"
-                           f" {destination_regex}")
-    destination_details = destination_match.groupdict()
     try:
-        dataset = destination_details['dataset']
-        table = destination_details['table']
-    except KeyError:
-        raise RuntimeError(
-            f"Object ID {object_id} did not match dataset and table in regex:"
-            f" {destination_regex}") from KeyError
-    partition = destination_details.get('partition')
-    year, month, day, hour = (
-        destination_details.get(key, "") for key in ('yyyy', 'mm', 'dd', 'hh'))
-    part_list = (year, month, day, hour)
-    if not partition and any(part_list):
-        partition = '$' + ''.join(part_list)
-    batch_id = destination_details.get('batch')
-    labels = constants.DEFAULT_JOB_LABELS
-    labels["bucket"] = bucket_id
-
-    if batch_id:
-        labels["batch-id"] = batch_id
-
-    if partition:
-        dest_table_ref = bigquery.TableReference.from_string(
-            f"{dataset}.{table}{partition}", default_project=project)
-    else:
-        dest_table_ref = bigquery.TableReference.from_string(
-            f"{dataset}.{table}", default_project=project)
-
-    default_query_config = bigquery.QueryJobConfig()
-    default_query_config.use_legacy_sql = False
-    default_query_config.labels = labels
-    bq_client = bigquery.Client(client_info=constants.CLIENT_INFO,
-                                default_query_job_config=default_query_config)
-
-    print("looking for bq_transform.sql")
-    external_query_sql = utils.read_gcs_file_if_exists(
-        gcs_client, f"{gsurl}_config/bq_transform.sql")
-    if not external_query_sql:
-        external_query_sql = utils.look_for_config_in_parents(
-            gcs_client, gsurl, "bq_transform.sql")
-    if external_query_sql:
-        print("EXTERNAL QUERY")
-        print(f"found external query:\n{external_query_sql}")
-        utils.external_query(
-            gcs_client, bq_client, gsurl, external_query_sql, dest_table_ref,
-            utils.create_job_id_prefix(dest_table_ref, batch_id))
-        return
-
-    print("LOAD_JOB")
-    utils.load_batches(gcs_client, bq_client, gsurl, dest_table_ref,
-                       utils.create_job_id_prefix(dest_table_ref, batch_id))
+        function_start_time = time.monotonic()
+        # pylint: disable=too-many-locals
+
+        bucket_id, object_id = utils.parse_notification(event)
+
+        basename_object_id = os.path.basename(object_id)
+
+        # Exit eagerly if this is not a file to take action on.
+        if basename_object_id not in constants.ACTION_FILENAMES:
+            action_filenames = constants.ACTION_FILENAMES
+            if constants.START_BACKFILL_FILENAME is None:
+                action_filenames.remove(None)
+            print(f"No-op. This notification was not for a"
+                  f"{action_filenames} file.")
+            return
+
+        # Ignore success files in the backlog directory
+        if (basename_object_id == constants.SUCCESS_FILENAME
+                and "/_backlog/" in object_id):
+            print(f"No-op. This notification was for "
+                  f"gs://{bucket_id}/{object_id} a"
+                  f"{constants.SUCCESS_FILENAME} in a"
+                  "/_backlog/ directory.")
+            return
+
+        gcs_client = lazy_gcs_client()
+        bq_client = lazy_bq_client()
+        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(object_id)
+
+        enforce_ordering = (constants.ORDER_ALL_JOBS
+                            or utils.look_for_config_in_parents(
+                                gcs_client, f"gs://{bucket_id}/{object_id}",
+                                "ORDERME") is not None)
+
+        bkt: storage.Bucket = utils.cached_get_bucket(gcs_client, bucket_id)
+        event_blob: storage.Blob = bkt.blob(object_id)
+
+        if enforce_ordering:
+            if (constants.START_BACKFILL_FILENAME and basename_object_id
+                    == constants.START_BACKFILL_FILENAME):
+                # This will be the first backfill file.
+                ordering.start_backfill_subscriber_if_not_running(
+                    gcs_client, bkt, utils.get_table_prefix(object_id))
+                return
+            if basename_object_id == constants.SUCCESS_FILENAME:
+                ordering.backlog_publisher(gcs_client, event_blob)
+            elif basename_object_id == constants.BACKFILL_FILENAME:
+                ordering.backlog_subscriber(gcs_client, bq_client,
+                                            lazy_error_reporting_client(),
+                                            event_blob, function_start_time)
+        else:  # Default behavior submit job as soon as success file lands.
+            bkt = utils.cached_get_bucket(gcs_client, bucket_id)
+            success_blob: storage.Blob = bkt.blob(object_id)
+            utils.handle_duplicate_notification(success_blob)
+            apply(
+                gcs_client,
+                bq_client,
+                success_blob,
+                None,  # None lock blob as there is no serialization required.
+                utils.create_job_id(table_ref, batch))
+    # Unexpected exceptions will actually raise which may cause a cold restart.
+    except tuple(exceptions.EXCEPTIONS_TO_REPORT):
+        # We do this because we know these errors do not require a cold restart
+        # of the cloud function.
+        lazy_error_reporting_client().report_exception()
+
+
+def lazy_error_reporting_client() -> error_reporting.Client:
+    """
+    Return a error reporting client that may be shared between cloud function
+    invocations.
+
+    https://cloud.google.com/functions/docs/monitoring/error-reporting
+    """
+    global ERROR_REPORTING_CLIENT
+    if not ERROR_REPORTING_CLIENT:
+        ERROR_REPORTING_CLIENT = error_reporting.Client(
+            client_info=constants.CLIENT_INFO)
+    return ERROR_REPORTING_CLIENT
+
+
+def lazy_bq_client() -> bigquery.Client:
+    """
+    Return a BigQuery Client that may be shared between cloud function
+    invocations.
+    """
+    global BQ_CLIENT
+    if not BQ_CLIENT:
+        default_query_config = bigquery.QueryJobConfig()
+        default_query_config.use_legacy_sql = False
+        default_query_config.labels = constants.DEFAULT_JOB_LABELS
+        BQ_CLIENT = bigquery.Client(
+            client_info=constants.CLIENT_INFO,
+            default_query_job_config=default_query_config)
+    return BQ_CLIENT
+
+
+def lazy_gcs_client() -> storage.Client:
+    """
+    Return a BigQuery Client that may be shared between cloud function
+    invocations.
+    """
+    global GCS_CLIENT
+    if not GCS_CLIENT:
+        GCS_CLIENT = storage.Client(client_info=constants.CLIENT_INFO)
+    return GCS_CLIENT
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
new file mode 100644
index 000000000..310eb1f52
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -0,0 +1,180 @@
+# Copyright 2020 Google LLC.
+# This software is provided as-is, without warranty or representation
+# for any use or purpose.
+# Your use of it is subject to your agreement with Google.
+
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Background Cloud Function for loading data from GCS to BigQuery.
+"""
+import os
+import time
+import traceback
+
+import google.api_core
+import google.api_core.exceptions
+# pylint in cloud build is being flaky about this import discovery.
+# pylint: disable=no-name-in-module
+from google.cloud import bigquery, error_reporting, storage
+
+from . import constants, exceptions, utils
+
+
+def backlog_publisher(
+    gcs_client: storage.Client,
+    event_blob: storage.Blob,
+):
+    """add success files to the the backlog and trigger backfill if necessary"""
+    bkt = event_blob.bucket
+
+    # Create an entry in _backlog for this table for this batch / success file
+    backlog_blob = success_blob_to_backlog_blob(event_blob)
+    backlog_blob.upload_from_string("", client=gcs_client)
+    print(f"added gs://{backlog_blob.bucket.name}/{backlog_blob.name} "
+          "to the backlog.")
+
+    start_backfill = True
+    table_prefix = utils.get_table_prefix(event_blob.name)
+    if constants.START_BACKFILL_FILENAME:
+        start_backfill_blob = bkt.blob(
+            f"{table_prefix}/{constants.START_BACKFILL_FILENAME}")
+        start_backfill = start_backfill_blob.exists()
+
+    if start_backfill:
+        start_backfill_subscriber_if_not_running(gcs_client, bkt, table_prefix)
+
+
+# pylint: disable=too-many-arguments,too-many-locals
+def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
+                       error_client: error_reporting.Client,
+                       backfill_blob: storage.Blob, function_start_time: float):
+    """Pick up the table lock, poll BQ job id until completion and process next
+    item in the backlog.
+    """
+    # We need to retrigger the backfill loop before the Cloud Functions Timeout.
+    restart_time = function_start_time + (
+        float(os.getenv("FUNCTION_TIMEOUT_SEC", "60")) -
+        constants.RESTART_BUFFER_SECONDS)
+    bkt = backfill_blob.bucket
+    utils.handle_duplicate_notification(backfill_blob)
+    table_prefix = utils.get_table_prefix(backfill_blob.name)
+    last_job_done = False
+    # we will poll for job completion this long in an individual iteration of
+    # the while loop.
+    polling_timeout = 5  # seconds
+    lock_blob: storage.Blob = bkt.blob(f"{table_prefix}/_bqlock")
+    if restart_time - polling_timeout < time.monotonic():
+        raise EnvironmentError(
+            "The Cloud Function timeout is too short for "
+            "backlog subscriber to do it's job. We recommend "
+            "setting the timeout to 540 seconds or at least "
+            "1 minute (Cloud Functions default).")
+    while time.monotonic() < restart_time - polling_timeout:
+        job_id = utils.read_gcs_file_if_exists(
+            gcs_client, f"gs://{bkt.name}/{lock_blob.name}")
+        if job_id:
+            if job_id.startswith(
+                    os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)):
+                try:
+                    last_job_done = utils.wait_on_bq_job_id(
+                        bq_client, job_id, polling_timeout)
+                except (exceptions.BigQueryJobFailure,
+                        google.api_core.exceptions.NotFound):
+                    last_job_done = False
+                    error_client.report(
+                        f"previous BigQuery job: {job_id} failed or could not "
+                        "be found. This will kill the backfill subscriber for "
+                        f"the table prefix {table_prefix}."
+                        "Once the issue is dealt with by a human, the lock"
+                        "file at: "
+                        f"gs://{lock_blob.bucket.name}/{lock_blob.name} "
+                        "should be manually removed and a new empty "
+                        f"{constants.BACKFILL_FILENAME}"
+                        "file uploaded to:"
+                        f"gs://{lock_blob.bucket.name}/{table_prefix}/_BACKFILL"
+                        f"to resume the backfill subscriber so it can "
+                        "continue with the next item in the backlog.\n"
+                        "Original Exception:\n"
+                        f"{traceback.format_exc()}")
+                    time.sleep(polling_timeout)
+                    continue
+            else:
+                print(f"sleeping for {polling_timeout} seconds because"
+                      f"found manual lock gs://{bkt.name}/{lock_blob.name} with"
+                      "contents:\n"
+                      f"""{utils.read_gcs_file_if_exists(gcs_client,
+                        f'gs://{lock_blob.bucket.name}/{lock_blob.name}')}""")
+                time.sleep(polling_timeout)
+                continue
+        if last_job_done:
+            utils.remove_oldest_backlog_item(gcs_client, bkt, table_prefix)
+            last_job_done = False
+
+        next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
+                                                        table_prefix)
+        if not next_backlog_file:
+            print(f"backlog is empty for gs://{bkt.name}/{table_prefix}."
+                  "baclog subscriber exiting.")
+            utils.handle_bq_lock(gcs_client, lock_blob, None)
+            return
+        next_success_file: storage.Blob = bkt.blob(
+            next_backlog_file.name.replace("/_backlog/", "/"))
+        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
+            next_success_file.name)
+        if not next_success_file.exists():
+            raise exceptions.BacklogException(
+                "backlog contains"
+                f"gs://{next_backlog_file.bucket}/{next_backlog_file.name}"
+                "but the corresponding success file does not exist at:"
+                f"gs://{next_success_file.bucket}/{next_success_file.name}")
+        utils.apply(gcs_client, bq_client, next_success_file, lock_blob,
+                    utils.create_job_id(table_ref, batch))
+    # retrigger the subscriber loop by reposting the _BACKFILL file
+    print("ran out of time, restarting backfill subscriber loop for:"
+          f"gs://{bkt.name}/{table_prefix}")
+    backfill_blob = bkt.blob(f"{table_prefix}/{constants.BACKFILL_FILENAME}")
+    backfill_blob.upload_from_string("")
+
+
+def start_backfill_subscriber_if_not_running(gcs_client: storage.Client,
+                                             bkt: storage.Bucket,
+                                             table_prefix: str):
+    """start the backfill subscriber if  it is not already runnning for this
+    table prefix.
+
+    created a backfill file for the table prefix if not exists.
+    """
+    # Create a _BACKFILL file for this table if not exists
+    backfill_blob = bkt.blob(f"{table_prefix}/{constants.BACKFILL_FILENAME}")
+    try:
+        backfill_blob.upload_from_string("",
+                                         if_generation_match=0,
+                                         client=gcs_client)
+        print("triggered backfill with "
+              f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
+              f"created at {backfill_blob.time_created}. exiting. ")
+    except google.api_core.exceptions.PreconditionFailed:
+        backfill_blob.reload()
+        print("backfill already in progress due to: "
+              f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
+              f"created at {backfill_blob.time_created}. exiting.")
+
+
+def success_blob_to_backlog_blob(success_blob: storage.Blob) -> storage.Blob:
+    """create a blob object that is a pointer to the input success blob in the
+    backlog
+    """
+    bkt = success_blob.bucket
+    table_prefix = utils.get_table_prefix(success_blob.name)
+    success_file_suffix = utils.removeprefix(success_blob.name,
+                                             f"{table_prefix}/")
+    return bkt.blob(f"{table_prefix}/_backlog/{success_file_suffix}")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/requirements.txt b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/requirements.txt
index ccba892ee..f2112fdcc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/requirements.txt
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/requirements.txt
@@ -1,2 +1,3 @@
-google-cloud-bigquery==2.2.0
-google-cloud-storage==1.32.0
+google-cloud-bigquery==2.6.0
+google-cloud-storage==1.33.0
+google-cloud-error-reporting==1.1.0
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 434b423c7..f38555846 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -23,53 +23,23 @@
 import os
 import pathlib
 import time
-from typing import Any, Deque, Dict, List, Optional, Tuple
+import uuid
+from typing import Any, Deque, Dict, List, Optional, Tuple, Union
 
 import cachetools
+import google.api_core
 import google.api_core.client_info
 import google.api_core.exceptions
 import google.cloud.exceptions
-from google.cloud import bigquery, storage
-
 # pylint in cloud build is being flaky about this import discovery.
-from . import constants  # pylint: disable=no-name-in-module
-
+from google.cloud import bigquery, storage
 
-def create_job_id_prefix(dest_table_ref: bigquery.TableReference,
-                         batch_id: Optional[str]):
-    """Create job id prefix with a consistent naming convention.
-    The naming conventions is as follows:
-    gcf-ingest-<dataset_id>-<table_id>-<partition_num>-<batch_id>-
-    Parts that are not inferrable from the GCS path with have a 'None'
-    placeholder. This naming convention is crucial for monitoring the system.
-    Note, gcf-ingest- can be overridden with environment variable JOB_PREFIX
-
-    Examples:
-
-    Non-partitioned Non batched tables:
-      - gs://${BUCKET}/tpch/lineitem/_SUCCESS
-      - gcf-ingest-tpch-lineitem-None-None-
-    Non-partitioned batched tables:
-      - gs://${BUCKET}/tpch/lineitem/batch000/_SUCCESS
-      - gcf-ingest-tpch-lineitem-None-batch000-
-    Partitioned Batched tables:
-      - gs://${BUCKET}/tpch/lineitem/$20201031/batch000/_SUCCESS
-      - gcf-ingest-tpch-lineitem-20201031-batch000-
-    """
-    table_partition = dest_table_ref.table_id.split("$")
-    if len(table_partition) < 2:
-        # If there is no partition put a None placeholder
-        table_partition.append("None")
-    return f"{os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)}" \
-        f"{dest_table_ref.dataset_id}-" \
-        f"{'-'.join(table_partition)}-" \
-        f"{batch_id}-"
+from . import constants, exceptions  # pylint: disable=no-name-in-module
 
 
 def external_query(  # pylint: disable=too-many-arguments
         gcs_client: storage.Client, bq_client: bigquery.Client, gsurl: str,
-        query: str, dest_table_ref: bigquery.TableReference,
-        job_id_prefix: str):
+        query: str, dest_table_ref: bigquery.TableReference, job_id: str):
     """Load from query over external table from GCS.
 
     This hinges on a SQL query defined in GCS at _config/bq_transform.sql and
@@ -104,7 +74,7 @@ def external_query(  # pylint: disable=too-many-arguments
     job: bigquery.QueryJob = bq_client.query(
         rendered_query,
         job_config=job_config,
-        job_id_prefix=job_id_prefix,
+        job_id=job_id,
     )
 
     print(f"started asynchronous query job: {job.job_id}")
@@ -115,7 +85,7 @@ def external_query(  # pylint: disable=too-many-arguments
     ) - start_poll_for_errors < constants.WAIT_FOR_JOB_SECONDS:
         job.reload()
         if job.errors:
-            raise RuntimeError(
+            raise exceptions.BigQueryJobFailure(
                 f"query job {job.job_id} failed quickly: {job.errors}")
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
@@ -125,23 +95,18 @@ def flatten2dlist(arr: List[List[Any]]) -> List[Any]:
     return [j for i in arr for j in i]
 
 
-def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id_prefix):
+def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id):
     """orchestrate 1 or more load jobs based on number of URIs and total byte
     size of objects at gsurl"""
     batches = get_batches_for_prefix(gcs_client, gsurl)
     load_config = construct_load_job_config(gcs_client, gsurl)
     load_config.labels = constants.DEFAULT_JOB_LABELS
-    batch_count = len(batches)
 
     jobs: List[bigquery.LoadJob] = []
-    for batch_num, batch in enumerate(batches):
+    for batch in batches:
         print(load_config.to_api_repr())
         job: bigquery.LoadJob = bq_client.load_table_from_uri(
-            batch,
-            dest_table_ref,
-            job_config=load_config,
-            job_id_prefix=f"{job_id_prefix}{batch_num}-of-{batch_count}-",
-        )
+            batch, dest_table_ref, job_config=load_config, job_id=job_id)
 
         print(f"started asyncronous bigquery load job with id: {job.job_id} for"
               f" {gsurl}")
@@ -155,41 +120,11 @@ def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id_prefix):
         for job in jobs:
             job.reload()
             if job.errors:
-                raise RuntimeError(
+                raise exceptions.BigQueryJobFailure(
                     f"load job {job.job_id} failed quickly: {job.errors}")
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
 
-def handle_duplicate_notification(bkt: storage.Bucket,
-                                  success_blob: storage.Blob, gsurl: str):
-    """
-    Need to handle potential duplicate Pub/Sub notifications.
-    To achieve this we will drop an empty "claimed" file that indicates
-    an invocation of this cloud function has picked up the success file
-    with a certain creation timestamp. This will support republishing the
-    success file as a mechanism of re-running the ingestion while avoiding
-    duplicate ingestion due to multiple Pub/Sub messages for a success file
-    with the same creation time.
-    """
-    success_blob.reload()
-    success_created_unix_timestamp = success_blob.time_created.timestamp()
-
-    claim_blob: storage.Blob = bkt.blob(
-        success_blob.name.replace(constants.SUCCESS_FILENAME,
-                                  f"_claimed_{success_created_unix_timestamp}"))
-    try:
-        claim_blob.upload_from_string("", if_generation_match=0)
-    except google.api_core.exceptions.PreconditionFailed as err:
-        raise RuntimeError(
-            f"The prefix {gsurl} appears to already have been claimed for "
-            f"{gsurl}{constants.SUCCESS_FILENAME} with created timestamp"
-            f"{success_created_unix_timestamp}."
-            "This means that another invocation of this cloud function has"
-            "claimed the ingestion of this batch."
-            "This may be due to a rare duplicate delivery of the Pub/Sub "
-            "storage notification.") from err
-
-
 def _get_parent_config_file(storage_client, config_filename, bucket, path):
     config_dir_name = "_config"
     parent_path = pathlib.Path(path).parent
@@ -307,7 +242,8 @@ def get_batches_for_prefix(
     elif len(batches) == 1:
         print("using single load job.")
     else:
-        raise RuntimeError("No files to load!")
+        raise google.api_core.exceptions.NotFound(
+            f"No files to load at gs://{bucket_name}/{prefix_path}!")
     return batches
 
 
@@ -336,12 +272,12 @@ def parse_notification(notification: dict) -> Tuple[str, str]:
             attributes = notification["attributes"]
             return attributes["bucketId"], attributes["objectId"]
         except KeyError:
-            raise RuntimeError(
+            raise exceptions.UnexpectedTriggerException(
                 "Issue with Pub/Sub message, did not contain expected"
                 f"attributes: 'bucketId' and 'objectId': {notification}"
             ) from KeyError
-    raise RuntimeError(
-        "Cloud Function recieved unexpected trigger:\n"
+    raise exceptions.UnexpectedTriggerException(
+        "Cloud Function received unexpected trigger:\n"
         f"{notification}\n"
         "This function only supports direct Cloud Functions"
         "Background Triggers or Pub/Sub storage notificaitons"
@@ -409,6 +345,13 @@ def dict_to_bq_schema(schema: List[Dict]) -> List[bigquery.SchemaField]:
 
 # To be added to built in str in python 3.9
 # https://www.python.org/dev/peps/pep-0616/
+def removeprefix(in_str: str, prefix: str) -> str:
+    """remove string prefix"""
+    if in_str.startswith(prefix):
+        return in_str[len(prefix):]
+    return in_str[:]
+
+
 def removesuffix(in_str: str, suffix: str) -> str:
     """removes suffix from a string."""
     # suffix='' should not call self[:-0].
@@ -437,3 +380,316 @@ def recursive_update(original: Dict, update: Dict, in_place: bool = False):
         else:
             out[key] = value
     return out
+
+
+def handle_duplicate_notification(blob_to_claim: storage.Blob):
+    """
+    Need to handle potential duplicate Pub/Sub notifications.
+    To achieve this we will drop an empty "claimed" file that indicates
+    an invocation of this cloud function has picked up the success file
+    with a certain creation timestamp. This will support republishing the
+    success file as a mechanism of re-running the ingestion while avoiding
+    duplicate ingestion due to multiple Pub/Sub messages for a success file
+    with the same creation time.
+    """
+    blob_to_claim.reload()
+    created_unix_timestamp = blob_to_claim.time_created.timestamp()
+
+    basename = os.path.basename(blob_to_claim.name)
+    claim_blob: storage.Blob = blob_to_claim.bucket.blob(
+        blob_to_claim.name.replace(
+            basename, f"_claimed_{basename}_created_at_"
+            f"{created_unix_timestamp}"))
+    try:
+        claim_blob.upload_from_string("", if_generation_match=0)
+    except google.api_core.exceptions.PreconditionFailed as err:
+        raise exceptions.DuplicateNotificationException(
+            f"gs://{blob_to_claim.bucket.name}/{blob_to_claim.name} appears"
+            "to already have been claimed for created timestamp: "
+            f"{created_unix_timestamp}."
+            "This means that another invocation of this cloud function has "
+            "claimed the work to be one for this file. "
+            "This may be due to a rare duplicate delivery of the Pub/Sub "
+            "storage notification.") from err
+
+
+def get_table_prefix(object_id: str) -> str:
+    """Find the table prefix for a object_id based on the destination regex.
+    Args:
+        object_id: str object ID to parse
+    Returns:
+        str: table prefix
+    """
+    match = constants.DESTINATION_REGEX.match(object_id)
+    if not match:
+        raise exceptions.DestinationRegexMatchException(
+            f"could not determine table prefix for object id: {object_id}"
+            "because it did not contain a match for destination_regex: "
+            f"{constants.DESTINATION_REGEX.pattern}")
+    table_group_index = match.re.groupindex.get("table")
+    if table_group_index:
+        table_level_index = match.regs[table_group_index][1]
+        return object_id[:table_level_index]
+    raise exceptions.DestinationRegexMatchException(
+        f"could not determine table prefix for object id: {object_id}"
+        "because it did not contain a match for the table capturing group "
+        f"in destination regex: {constants.DESTINATION_REGEX.pattern}")
+
+
+def get_next_backlog_item(
+    gcs_client: storage.Client,
+    bkt: storage.Bucket,
+    table_prefix: str,
+) -> Optional[storage.Blob]:
+    """
+    Get next blob in the backlog if the backlog is not empty.
+
+    Args:
+        gcs_client: storage.Client
+        bkt: storage.Bucket that this cloud functions is ingesting data for.
+        table_prefix: the prefix for the table whose backlog should be checked.
+
+    Retruns:
+        storage.Blob: pointer to a SUCCESS file in the backlog
+    """
+    backlog_blobs = gcs_client.list_blobs(bkt,
+                                          prefix=f"{table_prefix}/_backlog/")
+    # Backlog items will be lexciographically sorted
+    # https://cloud.google.com/storage/docs/json_api/v1/objects/list
+    for blob in backlog_blobs:
+        return blob  # Return first item in iterator
+    return None
+
+
+def remove_oldest_backlog_item(
+    gcs_client: storage.Client,
+    bkt: storage.Bucket,
+    table_prefix: str,
+) -> bool:
+    """
+    Remove the oldes pointer in the backlog if the backlog is not empty.
+
+    Args:
+        gcs_client: storage.Client
+        bkt: storage.Bucket that this cloud functions is ingesting data for.
+        table_prefix: the prefix for the table whose backlog should be checked.
+
+    Returns:
+        bool: True if we removed the oldest blob. False if the backlog was
+        empty.
+    """
+    backlog_blobs = gcs_client.list_blobs(bkt,
+                                          prefix=f"{table_prefix}/_backlog/")
+    # Backlog items will be lexciographically sorted
+    # https://cloud.google.com/storage/docs/json_api/v1/objects/list
+    blob: storage.Blob
+    for blob in backlog_blobs:
+        blob.delete()
+        return True  # Return after deleteing first blob in the iterator
+    return False
+
+
+def wait_on_bq_job_id(bq_client: bigquery.Client,
+                      job_id: str,
+                      polling_timeout: int,
+                      polling_interval: int = 1) -> bool:
+    """"
+    Wait for a BigQuery Job ID to complete.
+
+    Args:
+        bq_client: bigquery.Client
+        job_id: str the BQ job ID to wait on
+        polling_timeout: int number of seconds to poll this job ID
+        polling_interval: frequency to query the job state during polling
+    Returns:
+        bool: if the job ID has finished successfully. True if DONE without
+        errors, False if RUNNING or PENDING
+    Raises:
+        exceptions.BigQueryJobFailure if the job failed.
+        google.api_core.exceptions.NotFound if the job id cannot be found.
+    """
+    start_poll = time.monotonic()
+    while time.monotonic() - start_poll < (polling_timeout - polling_interval):
+        job: Union[bigquery.LoadJob,
+                   bigquery.QueryJob] = bq_client.get_job(job_id)
+        if job.state == "DONE":
+            if job.errors:
+                raise exceptions.BigQueryJobFailure(
+                    f"BigQuery Job {job.job_id} failed during backfill with the"
+                    f"following errors: {job.errors}")
+            return True
+        if job.state in {"RUNNING", "PENDING"}:
+            print(f"waiting on BigQuery Job {job.job_id}")
+            time.sleep(polling_interval)
+    return False
+
+
+def wait_on_gcs_blob(gcs_client: storage.Client,
+                     wait_blob: storage.Blob,
+                     polling_timeout: int,
+                     polling_interval: int = 1) -> bool:
+    """"
+    Wait for a GCS Object to exists.
+
+    Args:
+        gcs_client: storage.Client
+        wait_blob: storage.Bllob the GCS to wait on.
+        polling_timeout: int number of seconds to poll this job ID
+        polling_interval: frequency to query the job state during polling
+    Returns:
+        bool: if the job ID has finished successfully. True if DONE without
+        errors, False if RUNNING or PENDING
+    Raises:
+        exceptions.BigQueryJobFailure if the job failed.
+        google.api_core.exceptions.NotFound if the job id cannot be found.
+    """
+    start_poll = time.monotonic()
+    while time.monotonic() - start_poll < (polling_timeout - polling_interval):
+        if wait_blob.exists(client=gcs_client):
+            return True
+        print(
+            f"waiting on GCS file gs://{wait_blob.bucket.name}/{wait_blob.name}"
+        )
+        time.sleep(polling_interval)
+    return False
+
+
+def gcs_path_to_table_ref_and_batch(
+        object_id) -> Tuple[bigquery.TableReference, Optional[str]]:
+    """extract bigquery table reference and batch id from gcs object id"""
+
+    destination_match = constants.DESTINATION_REGEX.match(object_id)
+    if not destination_match:
+        raise RuntimeError(f"Object ID {object_id} did not match regex:"
+                           f" {constants.DESTINATION_REGEX.pattern}")
+    destination_details = destination_match.groupdict()
+    try:
+        dataset = destination_details['dataset']
+        table = destination_details['table']
+    except KeyError:
+        raise exceptions.DestinationRegexMatchException(
+            f"Object ID {object_id} did not match dataset and table in regex:"
+            f" {constants.DESTINATION_REGEX.pattern}") from KeyError
+    partition = destination_details.get('partition')
+    year, month, day, hour = (
+        destination_details.get(key, "") for key in ('yyyy', 'mm', 'dd', 'hh'))
+    part_list = (year, month, day, hour)
+    if not partition and any(part_list):
+        partition = '$' + ''.join(part_list)
+    batch_id = destination_details.get('batch')
+    labels = constants.DEFAULT_JOB_LABELS
+
+    if batch_id:
+        labels["batch-id"] = batch_id
+
+    if partition:
+
+        dest_table_ref = bigquery.TableReference.from_string(
+            f"{dataset}.{table}{partition}",
+            default_project=os.getenv("BQ_PROJECT", os.getenv("GCP_PROJECT")))
+    else:
+        dest_table_ref = bigquery.TableReference.from_string(
+            f"{dataset}.{table}",
+            default_project=os.getenv("BQ_PROJECT", os.getenv("GCP_PROJECT")))
+    return dest_table_ref, batch_id
+
+
+def create_job_id(dest_table_ref: bigquery.TableReference,
+                  batch_id: Optional[str]):
+    """Create job id prefix with a consistent naming convention.
+    The naming conventions is as follows:
+    gcf-ingest-<dataset_id>-<table_id>-<partition_num>-<batch_id>-
+    Parts that are not inferrable from the GCS path with have a 'None'
+    placeholder. This naming convention is crucial for monitoring the system.
+    Note, gcf-ingest- can be overridden with environment variable JOB_PREFIX
+
+    Examples:
+
+    Non-partitioned Non batched tables:
+      - gs://${BUCKET}/tpch/lineitem/_SUCCESS
+      - gcf-ingest-tpch-lineitem-None-None-
+    Non-partitioned batched tables:
+      - gs://${BUCKET}/tpch/lineitem/batch000/_SUCCESS
+      - gcf-ingest-tpch-lineitem-None-batch000-
+    Partitioned Batched tables:
+      - gs://${BUCKET}/tpch/lineitem/$20201031/batch000/_SUCCESS
+      - gcf-ingest-tpch-lineitem-20201031-batch000-
+    """
+    table_partition = dest_table_ref.table_id.split("$")
+    if len(table_partition) < 2:
+        # If there is no partition put a None placeholder
+        table_partition.append("None")
+    return f"{os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)}" \
+           f"{dest_table_ref.dataset_id}-" \
+           f"{'-'.join(table_partition)}-" \
+           f"{batch_id}-{uuid.uuid4()}"
+
+
+def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob,
+                   next_job_id: Optional[str]):
+    """Reclaim the lock blob for the new job id (in-place) or delete the lock
+    blob if next_job_id is None."""
+    try:
+        if next_job_id:
+            if lock_blob.exists():
+                lock_blob.upload_from_string(
+                    next_job_id,
+                    if_generation_match=lock_blob.generation,
+                    client=gcs_client)
+            else:  # This happens when submitting the first job in the backlog
+                lock_blob.upload_from_string(next_job_id,
+                                             if_generation_match=0,
+                                             client=gcs_client)
+        else:
+            print("releasing lock at: "
+                  f"gs://{lock_blob.bucket.name}/{lock_blob.name}")
+            lock_blob.delete(
+                if_generation_match=lock_blob.generation,
+                client=gcs_client,
+            )
+    except google.api_core.exceptions.PreconditionFailed as err:
+        raise exceptions.BacklogException(
+            f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name}"
+            f"was changed by another process.") from err
+
+
+def apply(
+    gcs_client: storage.Client,
+    bq_client: bigquery.Client,
+    success_blob: storage.Blob,
+    lock_blob: Optional[storage.Blob],
+    job_id: str,
+):
+    """
+    Apply an incremental batch to the target BigQuery table via an asynchronous
+    load job or external query.
+
+    Args:
+        gcs_client: storage.Client
+        bq_client: bigquery.Client
+        success_blob: storage.Blob the success file whose batch should be
+            applied.
+        lock_blob: storage.Blob
+        job_id: str
+    """
+    bkt = success_blob.bucket
+    if lock_blob is not None:
+        handle_bq_lock(gcs_client, lock_blob, job_id)
+    dest_table_ref, _ = gcs_path_to_table_ref_and_batch(success_blob.name)
+    gsurl = removesuffix(f"gs://{bkt.name}/{success_blob.name}",
+                         constants.SUCCESS_FILENAME)
+    print("looking for bq_transform.sql")
+    external_query_sql = read_gcs_file_if_exists(
+        gcs_client, f"{gsurl}_config/bq_transform.sql")
+    if not external_query_sql:
+        external_query_sql = look_for_config_in_parents(gcs_client, gsurl,
+                                                        "bq_transform.sql")
+    if external_query_sql:
+        print("EXTERNAL QUERY")
+        print(f"found external query:\n{external_query_sql}")
+        external_query(gcs_client, bq_client, gsurl, external_query_sql,
+                       dest_table_ref, job_id)
+        return
+
+    print("LOAD_JOB")
+    load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
index 990ea2ca2..3864588b3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
+++ b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
@@ -1,5 +1,6 @@
 [pytest]
 markers =
     IT: marks tests as slow integration test requiring cloud resouces (deselect with '-m "not IT"')
+    ORDERING: marks tests that test features related to ordering
     CLI: marks tests of CLI utilities
 addopts = --workers=auto
diff --git a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
index 7682e7da0..b86a61183 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
+++ b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
@@ -7,4 +7,4 @@ mypy
 pylint
 pytest-parallel
 pytest-cov
-google-cloud-pubsub
+google-cloud-pubsub>=2.2.0
diff --git a/tools/cloud_functions/gcs_event_based_ingest/requirements.txt b/tools/cloud_functions/gcs_event_based_ingest/requirements.txt
index c65fa4df4..7279c2550 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/requirements.txt
+++ b/tools/cloud_functions/gcs_event_based_ingest/requirements.txt
@@ -1,3 +1,4 @@
-google-cloud-bigquery>=2.2.0
-google-cloud-storage>=1.32.0
+google-cloud-bigquery>=2.6.0
+google-cloud-storage>=1.33.0
+google-cloud-error-reporting>=1.1.0
 cachetools
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 4adf3ba43..93a459d63 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -18,9 +18,11 @@
 import uuid
 from typing import List
 
+import google.api_core.exceptions
 import pytest
-from google.cloud import bigquery, storage
+from google.cloud import bigquery, error_reporting, storage
 
+import gcs_ocn_bq_ingest.ordering
 import gcs_ocn_bq_ingest.utils
 
 TEST_DIR = os.path.realpath(os.path.dirname(__file__))
@@ -39,7 +41,12 @@ def gcs() -> storage.Client:
     return storage.Client()
 
 
-@pytest.mark.usefixtures("gcs")
+@pytest.fixture(scope="module")
+def error() -> error_reporting.Client:
+    """GCS Client"""
+    return error_reporting.Client()
+
+
 @pytest.fixture
 def gcs_bucket(request, gcs) -> storage.bucket.Bucket:
     """GCS bucket for test artifacts"""
@@ -60,16 +67,15 @@ def teardown():
     return bucket
 
 
-@pytest.mark.usefixtures("gcs_bucket")
 @pytest.fixture
 def mock_env(gcs, monkeypatch):
     """environment variable mocks"""
     # Infer project from ADC of gcs client.
     monkeypatch.setenv("GCP_PROJECT", gcs.project)
     monkeypatch.setenv("FUNCTION_NAME", "integration-test")
+    monkeypatch.setenv("FUNCTION_TIMEOUT_SEC", "120")
 
 
-@pytest.mark.usefixtures("bq", "mock_env")
 @pytest.fixture
 def dest_dataset(request, bq, mock_env, monkeypatch):
     random_dataset = f"test_bq_ingest_gcf_{str(uuid.uuid4())[:8].replace('-','_')}"
@@ -88,7 +94,6 @@ def teardown():
     return dataset
 
 
-@pytest.mark.usefixtures("bq", "mock_env", "dest_dataset")
 @pytest.fixture
 def dest_table(request, bq, mock_env, dest_dataset) -> bigquery.Table:
     with open(os.path.join(TEST_DIR, "resources",
@@ -111,7 +116,6 @@ def teardown():
 
 
 @pytest.fixture(scope="function")
-@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
 def gcs_data(request, gcs_bucket, dest_dataset,
              dest_table) -> storage.blob.Blob:
     data_objs = []
@@ -135,7 +139,6 @@ def teardown():
 
 
 @pytest.fixture(scope="function")
-@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
 def gcs_data_under_sub_dirs(request, gcs_bucket, dest_dataset,
                             dest_table) -> storage.blob.Blob:
     data_objs = []
@@ -151,7 +154,7 @@ def gcs_data_under_sub_dirs(request, gcs_bucket, dest_dataset,
 
     def teardown():
         for do in data_objs:
-            if do.exists:
+            if do.exists():
                 do.delete()
 
     request.addfinalizer(teardown)
@@ -159,7 +162,6 @@ def teardown():
 
 
 @pytest.fixture(scope="function")
-@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
 def gcs_truncating_load_config(request, gcs_bucket, dest_dataset,
                                dest_table) -> storage.blob.Blob:
     config_obj: storage.blob.Blob = gcs_bucket.blob("/".join([
@@ -180,7 +182,6 @@ def teardown():
 
 
 @pytest.fixture(scope="function")
-@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
 def gcs_batched_data(request, gcs_bucket, dest_dataset,
                      dest_table) -> List[storage.blob.Blob]:
     """
@@ -199,14 +200,13 @@ def gcs_batched_data(request, gcs_bucket, dest_dataset,
 
     def teardown():
         for do in data_objs:
-            if do.exists:
+            if do.exists():
                 do.delete()
 
     request.addfinalizer(teardown)
     return [data_objs[-1], data_objs[-4]]
 
 
-@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
 @pytest.fixture
 def gcs_external_config(request, gcs_bucket, dest_dataset,
                         dest_table) -> List[storage.blob.Blob]:
@@ -249,7 +249,7 @@ def gcs_external_config(request, gcs_bucket, dest_dataset,
 
     def teardown():
         for do in config_objs:
-            if do.exists:
+            if do.exists():
                 do.delete()
 
     request.addfinalizer(teardown)
@@ -257,7 +257,6 @@ def teardown():
 
 
 @pytest.fixture(scope="function")
-@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_parttioned_table")
 def gcs_partitioned_data(request, gcs_bucket, dest_dataset,
                          dest_partitioned_table) -> List[storage.blob.Blob]:
     data_objs = []
@@ -274,7 +273,8 @@ def gcs_partitioned_data(request, gcs_bucket, dest_dataset,
 
     def teardown():
         for dobj in data_objs:
-            if dobj.exists:
+            # we expect some backfill files to be removed by the cloud function.
+            if dobj.exists():
                 dobj.delete()
 
     request.addfinalizer(teardown)
@@ -282,7 +282,6 @@ def teardown():
 
 
 @pytest.fixture(scope="function")
-@pytest.mark.usefixtures("gcs_bucket", "dest_dataset", "dest_table")
 def dest_partitioned_table(request, bq: bigquery.Client, mock_env,
                            dest_dataset) -> bigquery.Table:
     public_table: bigquery.Table = bq.get_table(
@@ -335,3 +334,140 @@ def bq_wait_for_rows(bq_client: bigquery.Client, table: bigquery.Table,
         f"{table.project}.{table.dataset_id}.{table.table_id} to "
         f"reach {expected_num_rows} rows."
         f"last poll returned {actual_num_rows} rows.")
+
+
+@pytest.fixture
+def dest_ordered_update_table(request, bq, mock_env,
+                              dest_dataset) -> bigquery.Table:
+    with open(os.path.join(TEST_DIR, "resources",
+                           "ordering_schema.json")) as schema_file:
+        schema = gcs_ocn_bq_ingest.utils.dict_to_bq_schema(
+            json.load(schema_file))
+
+    table = bigquery.Table(
+        f"{os.environ.get('GCP_PROJECT')}.{dest_dataset.dataset_id}"
+        ".cf_test_ordering",
+        schema=schema,
+    )
+
+    table: bigquery.Table = bq.create_table(table)
+    # Our test query only updates so we need to populate the first row.
+    bq.load_table_from_json([{"id": 1, "alpha_update": ""}], table)
+
+    def teardown():
+        bq.delete_table(table, not_found_ok=True)
+
+    request.addfinalizer(teardown)
+    return table
+
+
+@pytest.fixture(scope="function")
+def gcs_ordered_update_data(
+        request, gcs_bucket, dest_dataset,
+        dest_ordered_update_table) -> List[storage.blob.Blob]:
+    data_objs = []
+    chunks = {
+        "00",
+        "01",
+        "02",
+    }
+    for chunk in chunks:
+        for test_file in ["data.csv", "_SUCCESS"]:
+            data_obj: storage.blob.Blob = gcs_bucket.blob("/".join([
+                f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+                dest_ordered_update_table.table_id, chunk, test_file
+            ]))
+            data_obj.upload_from_filename(
+                os.path.join(TEST_DIR, "resources", "test-data", "ordering",
+                             chunk, test_file))
+            data_objs.append(data_obj)
+
+    def teardown():
+        for dobj in data_objs:
+            if dobj.exists():
+                dobj.delete()
+
+    request.addfinalizer(teardown)
+    return list(filter(lambda do: do.name.endswith("_SUCCESS"), data_objs))
+
+
+@pytest.fixture(scope="function")
+def gcs_backlog(request, gcs, gcs_bucket,
+                gcs_ordered_update_data) -> List[storage.blob.Blob]:
+    data_objs = []
+
+    for success_blob in gcs_ordered_update_data:
+        gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, success_blob)
+        backlog_blob = gcs_ocn_bq_ingest.ordering.success_blob_to_backlog_blob(
+            success_blob)
+        backlog_blob.upload_from_string("")
+        data_objs.append(backlog_blob)
+
+    def teardown():
+        for dobj in data_objs:
+            if dobj.exists():
+                dobj.delete()
+
+    request.addfinalizer(teardown)
+    return list(filter(lambda do: do.name.endswith("_SUCCESS"), data_objs))
+
+
+@pytest.fixture
+def gcs_external_update_config(request, gcs_bucket, dest_dataset,
+                               dest_ordered_update_table) -> storage.Blob:
+    config_objs = []
+    sql_obj = gcs_bucket.blob("/".join([
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+        dest_ordered_update_table.table_id,
+        "_config",
+        "bq_transform.sql",
+    ]))
+
+    sql = """
+    UPDATE {dest_dataset}.{dest_table} dest
+    SET alpha_update = CONCAT(dest.alpha_update, src.alpha_update)
+    FROM temp_ext src
+    WHERE dest.id = src.id
+    """
+    sql_obj.upload_from_string(sql)
+
+    config_obj = gcs_bucket.blob("/".join([
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+        dest_ordered_update_table.table_id, "_config", "external.json"
+    ]))
+
+    with open(os.path.join(TEST_DIR, "resources",
+                           "ordering_schema.json")) as schema:
+        fields = json.load(schema)
+    config = {
+        "schema": {
+            "fields": fields
+        },
+        "csvOptions": {
+            "allowJaggedRows": False,
+            "allowQuotedNewlines": False,
+            "encoding": "UTF-8",
+            "fieldDelimiter": "|",
+            "skipLeadingRows": 0,
+        },
+        "sourceFormat": "CSV",
+        "sourceUris": ["REPLACEME"],
+    }
+    config_obj.upload_from_string(json.dumps(config))
+    backfill_blob = gcs_bucket.blob("/".join([
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+        dest_ordered_update_table.table_id,
+        gcs_ocn_bq_ingest.constants.BACKFILL_FILENAME
+    ]))
+    backfill_blob.upload_from_string("")
+    config_objs.append(sql_obj)
+    config_objs.append(config_obj)
+    config_objs.append(backfill_blob)
+
+    def teardown():
+        for do in config_objs:
+            if do.exists():
+                do.delete()
+
+    request.addfinalizer(teardown)
+    return backfill_blob
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 712b380be..019fd848e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -192,3 +192,27 @@ def test_flattend2dlist(test_input, expected):
 def test_recursive_update(original, update, expected):
     assert gcs_ocn_bq_ingest.utils.recursive_update(original,
                                                     update) == expected
+
+
+@pytest.mark.parametrize(
+    "test_input,expected",
+    [
+        (
+            "dataset/table/_SUCCESS",  # flat
+            "dataset/table"),
+        (
+            "dataset/table/$20201030/_SUCCESS",  # partitioned
+            "dataset/table"),
+        (
+            "dataset/table/$20201030/batch_id/_SUCCESS",  # partitioned, batched
+            "dataset/table"),
+        (
+            "dataset/table/batch_id/_SUCCESS",  # batched (no partitioning)
+            "dataset/table"),
+        ("dataset/table/2020/01/02/03/batch_id/_SUCCESS", "dataset/table"),
+        ("project.dataset/table/2020/01/02/03/batch_id/_SUCCESS",
+         "project.dataset/table"),
+        ("dataset/table/_backlog/_BACKFILL", "dataset/table"),
+    ])
+def test_get_table_prefix(test_input, expected):
+    assert gcs_ocn_bq_ingest.utils.get_table_prefix(test_input) == expected
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
index 44a5e717a..8aadeb08b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
@@ -14,6 +14,7 @@
 """integration tests for gcs_ocn_bq_ingest"""
 import os
 import time
+import unittest.mock
 
 import google.cloud.exceptions
 import pytest
@@ -22,7 +23,7 @@
 import gcs_ocn_bq_ingest.main
 
 TEST_DIR = os.path.realpath(os.path.dirname(__file__) + "/..")
-LOAD_JOB_POLLING_TIMEOUT = 10  # seconds
+LOAD_JOB_POLLING_TIMEOUT = 20  # seconds
 
 
 @pytest.mark.IT
@@ -67,8 +68,8 @@ def test_gcf_event_schema(bq, gcs_data, dest_dataset, dest_table, mock_env):
 
 
 @pytest.mark.IT
-def test_duplicate_notification(bq, gcs_data, dest_dataset, dest_table,
-                                mock_env):
+def test_duplicate_success_notification(bq, gcs_data, dest_dataset, dest_table,
+                                        mock_env):
     """tests behavior with two notifications for the same success file."""
     if not gcs_data.exists():
         raise EnvironmentError("test data objects must exist")
@@ -79,12 +80,11 @@ def test_duplicate_notification(bq, gcs_data, dest_dataset, dest_table,
         }
     }
     gcs_ocn_bq_ingest.main.main(test_event, None)
-    did_second_invocation_raise = False
-    try:
+    with unittest.mock.patch.object(google.cloud.error_reporting.Client,
+                                    "report_exception") as mock_method:
         gcs_ocn_bq_ingest.main.main(test_event, None)
-    except RuntimeError:
-        did_second_invocation_raise = True
-    assert did_second_invocation_raise
+
+    mock_method.assert_called_once()
 
     test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation",
                                   "part-m-00001")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
new file mode 100644
index 000000000..3608d6e08
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -0,0 +1,141 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""integration tests for the ordering behavior of backlog gcs_ocn_bq_ingest"""
+import os
+import queue
+import time
+
+import pytest
+from google.cloud import storage
+
+import gcs_ocn_bq_ingest.constants
+import gcs_ocn_bq_ingest.main
+import gcs_ocn_bq_ingest.ordering
+import gcs_ocn_bq_ingest.utils
+
+TEST_DIR = os.path.realpath(os.path.dirname(__file__) + "/..")
+LOAD_JOB_POLLING_TIMEOUT = 20  # seconds
+
+
+@pytest.mark.IT
+@pytest.mark.ORDERING
+def test_backlog_publisher(gcs, gcs_bucket, gcs_partitioned_data, mock_env):
+    """Test basic functionality of backlog_publisher
+    Drop two success files.
+    Assert that both success files are added to backlog and backfill file
+    created.
+    Assert that that only one backfill file is not recreated.
+    """
+    table_prefix = ""
+    # load each partition.
+    for gcs_data in gcs_partitioned_data:
+        if not gcs_data.exists():
+            raise EnvironmentError("test data objects must exist")
+        if gcs_data.name.endswith(gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME):
+            table_prefix = gcs_ocn_bq_ingest.utils.get_table_prefix(
+                gcs_data.name)
+            gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, gcs_data)
+
+    expected_backlog_blobs = queue.Queue()
+    expected_backlog_blobs.put("/".join([
+        table_prefix, "_backlog", "$2017041101",
+        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+    ]))
+    expected_backlog_blobs.put("/".join([
+        table_prefix, "_backlog", "$2017041102",
+        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+    ]))
+
+    for backlog_blob in gcs_bucket.list_blobs(
+            prefix=f"{table_prefix}/_backlog"):
+        assert backlog_blob.name == expected_backlog_blobs.get(block=False)
+
+    backfill_blob: storage.Blob = gcs_bucket.blob(
+        f"{table_prefix}/{gcs_ocn_bq_ingest.constants.BACKFILL_FILENAME}")
+    assert backfill_blob.exists()
+
+
+@pytest.mark.IT
+@pytest.mark.ORDERING
+def test_backlog_publisher_with_existing_backfill_file(gcs, gcs_bucket,
+                                                       dest_dataset,
+                                                       dest_partitioned_table,
+                                                       gcs_partitioned_data,
+                                                       mock_env):
+    """Test basic functionality of backlog_publisher when the backfill is
+    already running. It should not repost this backfill file.
+    """
+    table_prefix = "/".join(
+        [dest_dataset.dataset_id, dest_partitioned_table.table_id])
+    backfill_blob: storage.Blob = gcs_bucket.blob(
+        f"{table_prefix}/{gcs_ocn_bq_ingest.constants.BACKFILL_FILENAME}")
+    backfill_blob.upload_from_string("")
+    backfill_blob.reload()
+    original_backfill_blob_generation = backfill_blob.generation
+    table_prefix = ""
+    # load each partition.
+    for gcs_data in gcs_partitioned_data:
+        if not gcs_data.exists():
+            raise EnvironmentError("test data objects must exist")
+        if gcs_data.name.endswith(gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME):
+            table_prefix = gcs_ocn_bq_ingest.utils.get_table_prefix(
+                gcs_data.name)
+            gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, gcs_data)
+
+    # Use of queue to test that list responses are returned in expected order.
+    expected_backlog_blobs = queue.Queue()
+    expected_backlog_blobs.put("/".join([
+        table_prefix, "_backlog", "$2017041101",
+        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+    ]))
+    expected_backlog_blobs.put("/".join([
+        table_prefix, "_backlog", "$2017041102",
+        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+    ]))
+
+    for backlog_blob in gcs_bucket.list_blobs(
+            prefix=f"{table_prefix}/_backlog"):
+        assert backlog_blob.name == expected_backlog_blobs.get(block=False)
+
+    backfill_blob.reload()
+    assert backfill_blob.generation == original_backfill_blob_generation
+
+
+@pytest.mark.IT
+@pytest.mark.ORDERING
+def test_single_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error,
+                                            dest_ordered_update_table,
+                                            gcs_ordered_update_data,
+                                            gcs_external_update_config,
+                                            gcs_backlog, mock_env):
+    """Test basic functionality of backlog subscriber.
+    Populate a backlog with 3 files that make updates where we can assert
+    that these jobs were applied in order.
+    """
+    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq, error,
+                                                  gcs_external_update_config,
+                                                  time.monotonic())
+    backlog_blobs = gcs_bucket.list_blobs(
+        prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/_backlog/"
+    )
+    assert backlog_blobs.num_results == 0, "backlog is not empty"
+    rows = bq.query("SELECT alpha_update FROM "
+                    f"{dest_ordered_update_table.dataset_id}"
+                    f".{dest_ordered_update_table.table_id}")
+    expected_num_rows = 1
+    num_rows = 0
+    for row in rows:
+        num_rows += 1
+        assert row["alpha_update"] == "ABC", "incrementals not applied in order"
+    assert num_rows == expected_num_rows
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/ordering_schema.json b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/ordering_schema.json
new file mode 100644
index 000000000..ea54a4eed
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/ordering_schema.json
@@ -0,0 +1,10 @@
+[
+  {
+    "name": "id",
+    "type": "INT64"
+  },
+  {
+    "name": "alpha_update",
+    "type": "STRING"
+  }
+]
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/_SUCCESS b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/_SUCCESS
new file mode 100644
index 000000000..e69de29bb
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv
new file mode 100644
index 000000000..6b4f72558
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv
@@ -0,0 +1 @@
+1|A
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/_SUCCESS b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/_SUCCESS
new file mode 100644
index 000000000..e69de29bb
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv
new file mode 100644
index 000000000..3b4f35bfc
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv
@@ -0,0 +1 @@
+1|B
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/_SUCCESS b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/_SUCCESS
new file mode 100644
index 000000000..e69de29bb
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv
new file mode 100644
index 000000000..ecf1eb9e0
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv
@@ -0,0 +1 @@
+1|C

From 3c798f76be98ed4fe8c7577d9911a5cac194b655 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 8 Dec 2020 18:52:02 -0800
Subject: [PATCH 17/90] fixup! mypy pylint

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
index a0db05425..aa1e67f33 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -108,7 +108,7 @@
     START_BACKFILL_FILENAME,
 }
 
-RESTART_BUFFER_SECONDS = os.getenv("RESTART_BUFFER_SECONDS", 30)
+RESTART_BUFFER_SECONDS = int(os.getenv("RESTART_BUFFER_SECONDS", "30"))
 
 ORDER_ALL_JOBS = bool(
     distutils.util.strtobool(os.getenv("ORDER_ALL_JOBS", "False")))

From c631150373d255ceb2666d47ed1e1613c9edd6ba Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 8 Dec 2020 18:54:18 -0800
Subject: [PATCH 18/90] fixup! flake8

---
 tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 93a459d63..a06d21b36 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -18,7 +18,6 @@
 import uuid
 from typing import List
 
-import google.api_core.exceptions
 import pytest
 from google.cloud import bigquery, error_reporting, storage
 

From d5fabfa1497e64ce834725f776929d61a2adde80 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 8 Dec 2020 18:56:17 -0800
Subject: [PATCH 19/90] fixup! mypy tests

---
 tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index a06d21b36..46fe1d9ef 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -349,7 +349,7 @@ def dest_ordered_update_table(request, bq, mock_env,
         schema=schema,
     )
 
-    table: bigquery.Table = bq.create_table(table)
+    table = bq.create_table(table)
     # Our test query only updates so we need to populate the first row.
     bq.load_table_from_json([{"id": 1, "alpha_update": ""}], table)
 

From 1c26e2362a7c96e707eebb9201f02be77b0a2034 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 11:31:15 -0800
Subject: [PATCH 20/90] support _config/*.sql for bq tranform sql

---
 .../gcs_event_based_ingest/.gitignore         |  1 +
 .../gcs_ocn_bq_ingest/constants.py            |  2 ++
 .../gcs_ocn_bq_ingest/utils.py                | 30 +++++++++++++++----
 3 files changed, 27 insertions(+), 6 deletions(-)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/.gitignore

diff --git a/tools/cloud_functions/gcs_event_based_ingest/.gitignore b/tools/cloud_functions/gcs_event_based_ingest/.gitignore
new file mode 100644
index 000000000..8ca3bf9ba
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/.gitignore
@@ -0,0 +1 @@
+prof/
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
index aa1e67f33..32f2238b1 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -112,3 +112,5 @@
 
 ORDER_ALL_JOBS = bool(
     distutils.util.strtobool(os.getenv("ORDER_ALL_JOBS", "False")))
+
+BQ_TRANSFORM_SQL="*.sql"
\ No newline at end of file
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index f38555846..7953f2358 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -19,6 +19,7 @@
 import collections
 import collections.abc
 import copy
+import fnmatch
 import json
 import os
 import pathlib
@@ -42,7 +43,7 @@ def external_query(  # pylint: disable=too-many-arguments
         query: str, dest_table_ref: bigquery.TableReference, job_id: str):
     """Load from query over external table from GCS.
 
-    This hinges on a SQL query defined in GCS at _config/bq_transform.sql and
+    This hinges on a SQL query defined in GCS at _config/*.sql and
     an external table definition _config/external.json (otherwise will assume
     CSV external table)
     """
@@ -126,11 +127,27 @@ def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id):
 
 
 def _get_parent_config_file(storage_client, config_filename, bucket, path):
+    bkt = storage_client.lookup_bucket(bucket)
     config_dir_name = "_config"
     parent_path = pathlib.Path(path).parent
-    config_path = parent_path / config_dir_name / config_filename
+    config_path = parent_path / config_dir_name
+    config_file_path = config_path / config_filename
+    # Handle wild card (to support bq transform sql with different names).
+    if "*" in config_filename:
+        matches: List[storage.Blob] = list(filter(
+            lambda blob: fnmatch.fnmatch(blob.name, config_filename),
+            bkt.list_blobs(prefix=config_path)))
+        if matches:
+            if len(matches) > 1:
+                raise RuntimeError(
+                    f"Multiple matches for gs://{bucket}/{config_file_path}"
+                )
+            return read_gcs_file_if_exists(storage_client,
+                                           f"gs://{bucket}/{matches[0].name}")
+        else:
+            return None
     return read_gcs_file_if_exists(storage_client,
-                                   f"gs://{bucket}/{config_path}")
+                                   f"gs://{bucket}/{config_file_path}")
 
 
 def look_for_config_in_parents(storage_client: storage.Client, gsurl: str,
@@ -678,12 +695,13 @@ def apply(
     dest_table_ref, _ = gcs_path_to_table_ref_and_batch(success_blob.name)
     gsurl = removesuffix(f"gs://{bkt.name}/{success_blob.name}",
                          constants.SUCCESS_FILENAME)
-    print("looking for bq_transform.sql")
+    print(
+        f"looking for a transformation tranformation sql file in parent _config.")
     external_query_sql = read_gcs_file_if_exists(
-        gcs_client, f"{gsurl}_config/bq_transform.sql")
+        gcs_client, f"{gsurl}_config/*.sql")
     if not external_query_sql:
         external_query_sql = look_for_config_in_parents(gcs_client, gsurl,
-                                                        "bq_transform.sql")
+                                                        "*.sql")
     if external_query_sql:
         print("EXTERNAL QUERY")
         print(f"found external query:\n{external_query_sql}")

From d16fb1bd5614190bfe81910c507f2e9c85bcf586 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 12:00:17 -0800
Subject: [PATCH 21/90] improve performance of wait_on_bq_job

---
 .../gcs_ocn_bq_ingest/constants.py            |  6 ++---
 .../gcs_ocn_bq_ingest/ordering.py             | 11 ++++----
 .../gcs_ocn_bq_ingest/utils.py                | 27 ++++++++++---------
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
index 32f2238b1..e7eb75ff5 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -23,14 +23,14 @@
 import google.api_core.client_info
 import google.cloud.exceptions
 
-# Will wait up to this polling for errors before exiting
+# Will wait up to this long polling for errors in a bq job before exiting
 # This is to check if job fail quickly, not to assert it succeed.
 # This may not be honored if longer than cloud function timeout.
 # https://cloud.google.com/functions/docs/concepts/exec#timeout
 # One might consider lowering this to 1-2 seconds to lower the
 # upper bound of expected execution time to stay within the free tier.
 # https://cloud.google.com/functions/pricing#free_tier
-WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "5"))
+WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "1"))
 
 DEFAULT_EXTERNAL_TABLE_DEFINITION = {
     # The default must be a self describing data format
@@ -113,4 +113,4 @@
 ORDER_ALL_JOBS = bool(
     distutils.util.strtobool(os.getenv("ORDER_ALL_JOBS", "False")))
 
-BQ_TRANSFORM_SQL="*.sql"
\ No newline at end of file
+BQ_TRANSFORM_SQL = "*.sql"
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index 310eb1f52..468915831 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -79,11 +79,12 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
             "setting the timeout to 540 seconds or at least "
             "1 minute (Cloud Functions default).")
     while time.monotonic() < restart_time - polling_timeout:
-        job_id = utils.read_gcs_file_if_exists(
+        lock_contents = utils.read_gcs_file_if_exists(
             gcs_client, f"gs://{bkt.name}/{lock_blob.name}")
-        if job_id:
-            if job_id.startswith(
+        if lock_contents:
+            if lock_contents.startswith(
                     os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)):
+                job_id = lock_contents
                 try:
                     last_job_done = utils.wait_on_bq_job_id(
                         bq_client, job_id, polling_timeout)
@@ -110,9 +111,7 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
             else:
                 print(f"sleeping for {polling_timeout} seconds because"
                       f"found manual lock gs://{bkt.name}/{lock_blob.name} with"
-                      "contents:\n"
-                      f"""{utils.read_gcs_file_if_exists(gcs_client,
-                        f'gs://{lock_blob.bucket.name}/{lock_blob.name}')}""")
+                      f"contents:\n {lock_contents}")
                 time.sleep(polling_timeout)
                 continue
         if last_job_done:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 7953f2358..ae29f4b4f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -55,10 +55,12 @@ def external_query(  # pylint: disable=too-many-arguments
     if external_table_config:
         external_table_def = json.loads(external_table_config)
     else:
-        print(f"Falling back to default CSV external table."
-              f" {gsurl}_config/external.json not found.")
+        print(f" {gsurl}_config/external.json not found in parents of {gsurl}."
+              "Falling back to default PARQUET external table:\n"
+              f"{json.dumps(constants.DEFAULT_EXTERNAL_TABLE_DEFINITION)}")
         external_table_def = constants.DEFAULT_EXTERNAL_TABLE_DEFINITION
 
+    # This may cause an issue if >10,000 files. however, we
     external_table_def["sourceUris"] = flatten2dlist(
         get_batches_for_prefix(gcs_client, gsurl))
     print(f"external table def = {json.dumps(external_table_config, indent=2)}")
@@ -88,6 +90,8 @@ def external_query(  # pylint: disable=too-many-arguments
         if job.errors:
             raise exceptions.BigQueryJobFailure(
                 f"query job {job.job_id} failed quickly: {job.errors}")
+        if job.state == "DONE":
+            return
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
 
@@ -134,18 +138,16 @@ def _get_parent_config_file(storage_client, config_filename, bucket, path):
     config_file_path = config_path / config_filename
     # Handle wild card (to support bq transform sql with different names).
     if "*" in config_filename:
-        matches: List[storage.Blob] = list(filter(
-            lambda blob: fnmatch.fnmatch(blob.name, config_filename),
-            bkt.list_blobs(prefix=config_path)))
+        matches: List[storage.Blob] = list(
+            filter(lambda blob: fnmatch.fnmatch(blob.name, config_filename),
+                   bkt.list_blobs(prefix=config_path)))
         if matches:
             if len(matches) > 1:
                 raise RuntimeError(
-                    f"Multiple matches for gs://{bucket}/{config_file_path}"
-                )
+                    f"Multiple matches for gs://{bucket}/{config_file_path}")
             return read_gcs_file_if_exists(storage_client,
                                            f"gs://{bucket}/{matches[0].name}")
-        else:
-            return None
+        return None
     return read_gcs_file_if_exists(storage_client,
                                    f"gs://{bucket}/{config_file_path}")
 
@@ -696,9 +698,10 @@ def apply(
     gsurl = removesuffix(f"gs://{bkt.name}/{success_blob.name}",
                          constants.SUCCESS_FILENAME)
     print(
-        f"looking for a transformation tranformation sql file in parent _config.")
-    external_query_sql = read_gcs_file_if_exists(
-        gcs_client, f"{gsurl}_config/*.sql")
+        "looking for a transformation tranformation sql file in parent _config."
+    )
+    external_query_sql = read_gcs_file_if_exists(gcs_client,
+                                                 f"{gsurl}_config/*.sql")
     if not external_query_sql:
         external_query_sql = look_for_config_in_parents(gcs_client, gsurl,
                                                         "*.sql")

From c627af029b8062168930c20b71861fefacb9c296 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 17:22:03 -0800
Subject: [PATCH 22/90] wip

---
 .../gcs_event_based_ingest/ORDERING.md        | 107 ++++++++++++++++++
 .../gcs_event_based_ingest/README.md          |   6 +
 .../gcs_ocn_bq_ingest/README.md               |  11 +-
 .../gcs_ocn_bq_ingest/constants.py            |   7 +-
 .../gcs_ocn_bq_ingest/main.py                 |  57 +++++++---
 .../gcs_ocn_bq_ingest/ordering.py             |  91 ++++++++-------
 .../test_gcs_ocn_bq_ingest.py                 |  20 ++++
 .../gcs_ocn_bq_ingest/test_ordering_it.py     |   2 +-
 8 files changed, 243 insertions(+), 58 deletions(-)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/ORDERING.md

diff --git a/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
new file mode 100644
index 000000000..24a20fcd7
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
@@ -0,0 +1,107 @@
+# Ordering Batches
+There are use cases where it is important for incremental batches get
+applied in order rather than as soon as they are uploaded to GCS (which is the
+default behavior of this solution).
+1. When using External Query that performs DML other than insert only.
+(e.g. an `UPDATE` assumes that prior batches have already been committed) 
+1. To ensure that there are not time gaps in the data (e.g. ensure that
+2020/01/02 data is not committed to BigQuery before 2020/01/01, or similarly
+that 00 hour is ingested before the 01 hour, etc.)
+
+This Cloud Function supports serializing the submission of ingestion jobs to 
+BigQuery by using Google Cloud Storage's consistency guarantees to provide a
+pessimistic lock on a table to prevent concurrent jobs and
+[GCS Object.list](https://cloud.google.com/storage/docs/json_api/v1/objects/list)
+lexicographic sorting of results to providing ordering gurantees.
+The solution involves a table level `_backlog/` directory to keep track
+of success files whose batches have not yet been committed to BigQuery and
+a table level `_bqlock` file to keep track of what job is currently ingesting to
+that table. This way we can make our Cloud Function idempotent by having all the
+state stored in GCS so we can safely retrigger it to skirt the Cloud Functions
+timeout.
+
+## Assumptions
+This ordering solution assumes that you want to apply batches in lexicographic
+order. This is usually the case because path names usually contain some sort of
+date / hour information.
+
+## Enabling Ordering
+### Environment Variable
+Ordering can be enabled at the function level by setting the `ORDER_PER_TABLE`
+environment variable to `"True"`.
+### Config File
+Ordering can be configured at any level of your naming convention (e.g. dataset
+table or some sub-path) by placing a `_config/ORDERME` file. This can be helpful
+in scenarios where your historical load can be processed safely in parallel but
+incrementals must be ordered.
+For example:
+```text
+gs://${BUCKET}/${DATASET}/${TABLE}/historical/_config/load.json
+gs://${BUCKET}/${DATASET}/${TABLE}/incremental/_config/external.json
+gs://${BUCKET}/${DATASET}/${TABLE}/incremental/_config/bq_transform.sql
+gs://${BUCKET}/${DATASET}/${TABLE}/incremental/_config/ORDERME
+```
+
+## Dealing With Out of Order Publishing to GCS During Historical Load
+In some use cases, there is a period where incrementals that must be applied in
+order are uploaded in parallel (meaning their _SUCCESS files are expected to be
+out of order). This typically happens during some historical backfill period.
+This can be solved by setting the `START_BACKFILL_FILENAME` environment
+variable to a file name that indicates that the parallel upload of historical
+incrementals is complete (e.g. `_HISTORYDONE`). This will cause all success
+files for a table to be added to the backlog until the `_HISTORYDONE` file is
+dropped at the table level. At that point the backlog subscriber will begin
+processing the batches in order. 
+
+## Batch Failure Behavior
+When ordering is enabled, if the BQ job to apply a batch failed, it is not safe
+to continue to ingest the next batch. The Cloud Function will leave the
+`_bqlock` file and stop trying to process the backlog. The Cloud function 
+will report an exception like this which should be alerted on as the ingestion
+process for the table will be deadlocked until there is human intervention to
+address the failed batch:
+```text
+    f"previous BigQuery job: {job_id} failed or could not "
+    "be found. This will kill the backfill subscriber for "
+    f"the table prefix {table_prefix}."
+    "Once the issue is dealt with by a human, the lock"
+    "file at: "
+    f"gs://{lock_blob.bucket.name}/{lock_blob.name} "
+    "should be manually removed and a new empty _BACKFILL"
+    "file uploaded to:"
+    f"gs://{lock_blob.bucket.name}/{table_prefix}/_BACKFILL"
+    f"to resume the backfill subscriber so it can "
+    "continue with the next item in the backlog.\n"
+    "Original Exception:\n"
+    f"{traceback.format_exc()}")
+```
+
+## Ordering Mechanics Explained
+We've treated ordering incremental commits to table  as a variation on the
+[Producer-Consumer Problem](https://en.wikipedia.org/wiki/Producer%E2%80%93consumer_problem)
+Where we have multiple producers (each call of Backlog Publisher) and a single
+Consumer (the Backlog Subscriber which is enforced to be a singleton per table
+with a claim file). Our solution is to use GCS `_backlog` directory as our queue
+and `_bqlock` as a mutex.
+
+### Backlog Publisher 
+The Backlog Publisher has two responsibilities:
+1. add incoming success files to a
+table's `_backlog` so they are not "forgotten" by the ingestion system.
+1. if there is a non-empty backlog start the backfill subscriber (if one is not
+already running). This is accomplished by dropping a table level `_BACKFILL` file.
+
+### Backlog Subscriber
+The Backlog Subscriber is responsible for keeping track of BigQuery jobs running
+on a table and ensure that batches are committed in order. When the backlog is
+not empty for a table the backlog subscriber should be running for that table.
+It will either be polling a `RUNNING` BigQuery job for completion, or submitting
+the next batch in the `_backlog`.
+
+The state of what BigQuery job is currently running on a table is kept in a
+`_bqlock` file at the table prefix.
+
+In order to escape the maximum nine-minute (540s) Cloud Function Timeout, the
+backfill subscriber will re-trigger itself by posting a new `_BACKFILL` file
+until the `_backlog` for the table prefix is empty. When a new success file
+arrives it is the reponsibility of the publisher to restart the subscriber.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 2477cc4f5..5dcedf5c9 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -389,6 +389,12 @@ In theory, one could set up Pub/Sub notifications from multiple GCS Buckets
 Pub/Sub topic so that data uploaded to any of these buckets could get
 automatically loaded to BigQuery by a single deployment of the Cloud Function.
 
+## Ordering Guarantees
+It is possible to configure the Cloud Function to apply incrementals in order if
+this is crucial to your data integrity. This naturally comes with a performance
+penalty as for a given table we cannot parallelize ingestion of batches.
+The ordering behavior and options are described in detail in [ORDERING.md](ORDERING.md)
+
 ## Backfill
 There are some cases where you may have data already copied to GCS according to
 the naming convention / with success files before the Object Change
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index 1252b1dda..a1f417d7b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -9,7 +9,8 @@ BigQuery Table.
 1. [Pub/Sub Notification](https://cloud.google.com/storage/docs/pubsub-notifications)
 object finalize.
 1. Cloud Function subscribes to notifications and ingests all the data into
-BigQuery a directory once a `_SUCCESS` file arrives.
+BigQuery from a GCS prefix once a `_SUCCESS` file arrives. The success file name
+is configurable with environment variable.
 
 
 ## Deployment
@@ -32,9 +33,13 @@ following default behavior.
 | `MAX_BATCH_BYTES`     | Max bytes for BigQuery Load job      | `15000000000000` ([15 TB](https://cloud.google.com/bigquery/quotas#load_jobs)|
 | `JOB_PREFIX`          | Prefix for BigQuery Job IDs          | `gcf-ingest-` |
 | `BQ_PROJECT`          | Default BQ project to use if not specified in dataset capturing group | Project where Cloud Function is deployed |
-| `ORDERED_PER_TABLE`   | Force jobs to be executed sequentially (rather than parallel) based on the backlog. This is the same as having an `ORDERME` file in every config directory | `False` | 
-
+| `ORDER_PER_TABLE`\*   | Force jobs to be executed sequentially (rather than parallel) based on the backlog. This is the same as having an `ORDERME` file in every config directory | `False` | 
+| `START_BACKFILL_FILENAME`\*| Block submitting BigQuery Jobs for a table until this file is present at the table prefix. By default this will not happen. | `None` |
+| `RESTART_BUFFER_SECONDS`\* | Buffer before Cloud Function timeout to leave before re-triggering the backfill subscriber | 30 |
 
+\* only affect the behavior when ordering is enabled for a table.
+See [ORDERING.md](../ORDERING.md)
+ 
 ## Implementation notes
 1. To support notifications based on a GCS prefix
 (rather than every object in the bucket), we chose to use manually
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
index e7eb75ff5..0936d3f14 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -72,6 +72,7 @@
 DEFAULT_DESTINATION_REGEX = (
     r"^(?P<dataset>[\w\-\._0-9]+)/"  # dataset (required)
     r"(?P<table>[\w\-_0-9]+)/?"      # table name (required)
+    r"(?:historical|incremental)?/?" # break up hist v.s. inc to separate prefixes (optional)
     r"(?P<partition>\$[0-9]+)?/?"    # partition decorator (optional)
     r"(?:"                           # [begin] yyyy/mm/dd/hh/ group (optional)
     r"(?P<yyyy>[0-9]{4})/?"          # partition year (yyyy) (optional)
@@ -110,7 +111,9 @@
 
 RESTART_BUFFER_SECONDS = int(os.getenv("RESTART_BUFFER_SECONDS", "30"))
 
-ORDER_ALL_JOBS = bool(
-    distutils.util.strtobool(os.getenv("ORDER_ALL_JOBS", "False")))
+ORDER_PER_TABLE = bool(
+    distutils.util.strtobool(os.getenv("ORDER_PER_TABLE", "False")))
 
 BQ_TRANSFORM_SQL = "*.sql"
+
+ENSURE_SUBSCRIBER_SECONDS = 10
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 199ac47a8..60e22c77b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -48,7 +48,8 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
 
         basename_object_id = os.path.basename(object_id)
 
-        # Exit eagerly if this is not a file to take action on.
+        # Exit eagerly if this is not a file to take action on
+        # (e.g. a data, config, or lock file)
         if basename_object_id not in constants.ACTION_FILENAMES:
             action_filenames = constants.ACTION_FILENAMES
             if constants.START_BACKFILL_FILENAME is None:
@@ -57,20 +58,11 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
                   f"{action_filenames} file.")
             return
 
-        # Ignore success files in the backlog directory
-        if (basename_object_id == constants.SUCCESS_FILENAME
-                and "/_backlog/" in object_id):
-            print(f"No-op. This notification was for "
-                  f"gs://{bucket_id}/{object_id} a"
-                  f"{constants.SUCCESS_FILENAME} in a"
-                  "/_backlog/ directory.")
-            return
-
         gcs_client = lazy_gcs_client()
         bq_client = lazy_bq_client()
         table_ref, batch = utils.gcs_path_to_table_ref_and_batch(object_id)
 
-        enforce_ordering = (constants.ORDER_ALL_JOBS
+        enforce_ordering = (constants.ORDER_PER_TABLE
                             or utils.look_for_config_in_parents(
                                 gcs_client, f"gs://{bucket_id}/{object_id}",
                                 "ORDERME") is not None)
@@ -78,6 +70,38 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
         bkt: storage.Bucket = utils.cached_get_bucket(gcs_client, bucket_id)
         event_blob: storage.Blob = bkt.blob(object_id)
 
+        # For SUCCESS files in a backlog directory, ensure that subscriber is
+        # running.
+        if (
+            basename_object_id == constants.SUCCESS_FILENAME
+            and "/_backlog/" in object_id
+        ):
+            print(f"This notification was for "
+                  f"gs://{bucket_id}/{object_id} a"
+                  f"{constants.SUCCESS_FILENAME} in a"
+                  "/_backlog/ directory. Ensuring that subscriber is running.")
+            # Handle rare race condition where:
+            # 1. subscriber reads an empty backlog (before it can delete the
+            #   _BACKFILL blob...)
+            # 2. a new item is added to the backlog (causing a separate function
+            #   invocation)
+            # 3. In this new invocation we reach this point in the code path and
+            #    start_subscriber_if_not_running sees the old _BACKFILL and does
+            #    not create a new one.
+            # 4. The subscriber deletes the _BACKFILL blob and exits without
+            #    processing the new item on the backlog from #2.
+            backfill_blob = ordering.start_backfill_subscriber_if_not_running(
+                gcs_client, bkt, utils.get_table_prefix(object_id))
+
+            time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
+            while not utils.wait_on_gcs_blob(
+                gcs_client, backfill_blob, constants.ENSURE_SUBSCRIBER_SECONDS
+            ):
+                backfill_blob =\
+                    ordering.start_backfill_subscriber_if_not_running(
+                        gcs_client, bkt, utils.get_table_prefix(object_id))
+            return
+
         if enforce_ordering:
             if (constants.START_BACKFILL_FILENAME and basename_object_id
                     == constants.START_BACKFILL_FILENAME):
@@ -89,7 +113,6 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
                 ordering.backlog_publisher(gcs_client, event_blob)
             elif basename_object_id == constants.BACKFILL_FILENAME:
                 ordering.backlog_subscriber(gcs_client, bq_client,
-                                            lazy_error_reporting_client(),
                                             event_blob, function_start_time)
         else:  # Default behavior submit job as soon as success file lands.
             bkt = utils.cached_get_bucket(gcs_client, bucket_id)
@@ -102,10 +125,16 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
                 None,  # None lock blob as there is no serialization required.
                 utils.create_job_id(table_ref, batch))
     # Unexpected exceptions will actually raise which may cause a cold restart.
-    except tuple(exceptions.EXCEPTIONS_TO_REPORT):
+    except tuple(exceptions.EXCEPTIONS_TO_REPORT) as original_error:
         # We do this because we know these errors do not require a cold restart
         # of the cloud function.
-        lazy_error_reporting_client().report_exception()
+        try:
+            lazy_error_reporting_client().report_exception()
+        except Exception:  # pylint: disable=broad-except
+            # This mostly handles the case where error reporting API is not
+            # enabled or IAM permissions did not allow us to report errors with
+            # error reporting API.
+            raise original_error
 
 
 def lazy_error_reporting_client() -> error_reporting.Client:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index 468915831..2d4ade6be 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -19,6 +19,7 @@
 import os
 import time
 import traceback
+from typing import Optional
 
 import google.api_core
 import google.api_core.exceptions
@@ -42,20 +43,12 @@ def backlog_publisher(
     print(f"added gs://{backlog_blob.bucket.name}/{backlog_blob.name} "
           "to the backlog.")
 
-    start_backfill = True
     table_prefix = utils.get_table_prefix(event_blob.name)
-    if constants.START_BACKFILL_FILENAME:
-        start_backfill_blob = bkt.blob(
-            f"{table_prefix}/{constants.START_BACKFILL_FILENAME}")
-        start_backfill = start_backfill_blob.exists()
-
-    if start_backfill:
-        start_backfill_subscriber_if_not_running(gcs_client, bkt, table_prefix)
+    start_backfill_subscriber_if_not_running(gcs_client, bkt, table_prefix)
 
 
 # pylint: disable=too-many-arguments,too-many-locals
 def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
-                       error_client: error_reporting.Client,
                        backfill_blob: storage.Blob, function_start_time: float):
     """Pick up the table lock, poll BQ job id until completion and process next
     item in the backlog.
@@ -90,40 +83,47 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                         bq_client, job_id, polling_timeout)
                 except (exceptions.BigQueryJobFailure,
                         google.api_core.exceptions.NotFound):
-                    last_job_done = False
-                    error_client.report(
+                    raise exceptions.BigQueryJobFailure(
                         f"previous BigQuery job: {job_id} failed or could not "
                         "be found. This will kill the backfill subscriber for "
-                        f"the table prefix {table_prefix}."
-                        "Once the issue is dealt with by a human, the lock"
+                        f"the table prefix: {table_prefix}."
+                        "Once the issue is dealt with by a human, the lock "
                         "file at: "
                         f"gs://{lock_blob.bucket.name}/{lock_blob.name} "
                         "should be manually removed and a new empty "
-                        f"{constants.BACKFILL_FILENAME}"
-                        "file uploaded to:"
-                        f"gs://{lock_blob.bucket.name}/{table_prefix}/_BACKFILL"
+                        f"{constants.BACKFILL_FILENAME} "
+                        "file uploaded to: "
+                        f"gs://{backfill_blob.bucket.name}/{table_prefix}"
+                        "/_BACKFILL "
                         f"to resume the backfill subscriber so it can "
                         "continue with the next item in the backlog.\n"
                         "Original Exception:\n"
                         f"{traceback.format_exc()}")
-                    time.sleep(polling_timeout)
-                    continue
             else:
                 print(f"sleeping for {polling_timeout} seconds because"
                       f"found manual lock gs://{bkt.name}/{lock_blob.name} with"
-                      f"contents:\n {lock_contents}")
+                      f"contents:\n {lock_contents}. This will be an infinite"
+                      "loop until the manual lock is released.")
                 time.sleep(polling_timeout)
                 continue
         if last_job_done:
             utils.remove_oldest_backlog_item(gcs_client, bkt, table_prefix)
             last_job_done = False
 
+        check_backlog_time = time.monotonic()
         next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
                                                         table_prefix)
         if not next_backlog_file:
-            print(f"backlog is empty for gs://{bkt.name}/{table_prefix}."
-                  "baclog subscriber exiting.")
+            backfill_blob.delete(if_generation_match=backfill_blob.generation)
+            if time.monotonic() > check_backlog_time:
+                raise exceptions.BacklogException(
+                    "Please check if the backlog is empty for "
+                    f"gs://${bkt.name}/{table_prefix}/_backlog/"
+                    "There was more than {}"
+                )
             utils.handle_bq_lock(gcs_client, lock_blob, None)
+            print(f"backlog is empty for gs://{bkt.name}/{table_prefix}. "
+                  "backlog subscriber exiting.")
             return
         next_success_file: storage.Blob = bkt.blob(
             next_backlog_file.name.replace("/_backlog/", "/"))
@@ -144,28 +144,43 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
     backfill_blob.upload_from_string("")
 
 
-def start_backfill_subscriber_if_not_running(gcs_client: storage.Client,
-                                             bkt: storage.Bucket,
-                                             table_prefix: str):
+def start_backfill_subscriber_if_not_running(
+    gcs_client: storage.Client,
+    bkt: storage.Bucket,
+    table_prefix: str
+) -> Optional[storage.Blob]:
     """start the backfill subscriber if  it is not already runnning for this
     table prefix.
 
     created a backfill file for the table prefix if not exists.
     """
-    # Create a _BACKFILL file for this table if not exists
-    backfill_blob = bkt.blob(f"{table_prefix}/{constants.BACKFILL_FILENAME}")
-    try:
-        backfill_blob.upload_from_string("",
-                                         if_generation_match=0,
-                                         client=gcs_client)
-        print("triggered backfill with "
-              f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
-              f"created at {backfill_blob.time_created}. exiting. ")
-    except google.api_core.exceptions.PreconditionFailed:
-        backfill_blob.reload()
-        print("backfill already in progress due to: "
-              f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
-              f"created at {backfill_blob.time_created}. exiting.")
+    start_backfill = True
+    # Do not start subscriber until START_BACKFILL_FILENAME has been dropped
+    # at the table prefix.
+    if constants.START_BACKFILL_FILENAME:
+        start_backfill_blob = bkt.blob(
+            f"{table_prefix}/{constants.START_BACKFILL_FILENAME}")
+        start_backfill = start_backfill_blob.exists()
+
+    if start_backfill:
+        # Create a _BACKFILL file for this table if not exists
+        backfill_blob = bkt.blob(f"{table_prefix}/{constants.BACKFILL_FILENAME}")
+        try:
+            backfill_blob.upload_from_string("",
+                                             if_generation_match=0,
+                                             client=gcs_client)
+            print("triggered backfill with "
+                  f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
+                  f"created at {backfill_blob.time_created}. exiting. ")
+            return backfill_blob
+        except google.api_core.exceptions.PreconditionFailed:
+            backfill_blob.reload()
+            print("backfill already in progress due to: "
+                  f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
+                  f"created at {backfill_blob.time_created}. exiting.")
+            return backfill_blob
+    else:
+        return None
 
 
 def success_blob_to_backlog_blob(success_blob: storage.Blob) -> storage.Blob:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 019fd848e..49f76389f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -97,6 +97,26 @@
             "hh": "03",
             "batch": "batch_id"
         }),
+        ("project.dataset/table/historical/2020/01/02/03/batch_id/_SUCCESS", {
+            "dataset": "project.dataset",
+            "table": "table",
+            "partition": None,
+            "yyyy": "2020",
+            "mm": "01",
+            "dd": "02",
+            "hh": "03",
+            "batch": "batch_id"
+        }),
+        ("project.dataset/table/incremental/2020/01/02/04/batch_id/_SUCCESS", {
+            "dataset": "project.dataset",
+            "table": "table",
+            "partition": None,
+            "yyyy": "2020",
+            "mm": "01",
+            "dd": "02",
+            "hh": "04",
+            "batch": "batch_id"
+        }),
     ])
 def test_default_destination_regex(test_input: str,
                                    expected: Dict[str, Optional[str]]):
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index 3608d6e08..c3cb23585 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -123,7 +123,7 @@ def test_single_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error,
     Populate a backlog with 3 files that make updates where we can assert
     that these jobs were applied in order.
     """
-    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq, error,
+    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq,
                                                   gcs_external_update_config,
                                                   time.monotonic())
     backlog_blobs = gcs_bucket.list_blobs(

From 35e26d9a8a1f82a847efdd9b09e118e7a25e6cdf Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 18:13:42 -0800
Subject: [PATCH 23/90] fixup! handle race condition

---
 .../gcs_ocn_bq_ingest/constants.py            |  2 +-
 .../gcs_ocn_bq_ingest/main.py                 | 48 +++++----------
 .../gcs_ocn_bq_ingest/ordering.py             | 59 +++++++++++++++++--
 3 files changed, 72 insertions(+), 37 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
index 0936d3f14..908d0e854 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -116,4 +116,4 @@
 
 BQ_TRANSFORM_SQL = "*.sql"
 
-ENSURE_SUBSCRIBER_SECONDS = 10
+ENSURE_SUBSCRIBER_SECONDS = 5
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 60e22c77b..163fa0629 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -70,39 +70,21 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
         bkt: storage.Bucket = utils.cached_get_bucket(gcs_client, bucket_id)
         event_blob: storage.Blob = bkt.blob(object_id)
 
-        # For SUCCESS files in a backlog directory, ensure that subscriber is
-        # running.
-        if (
-            basename_object_id == constants.SUCCESS_FILENAME
-            and "/_backlog/" in object_id
-        ):
-            print(f"This notification was for "
-                  f"gs://{bucket_id}/{object_id} a"
-                  f"{constants.SUCCESS_FILENAME} in a"
-                  "/_backlog/ directory. Ensuring that subscriber is running.")
-            # Handle rare race condition where:
-            # 1. subscriber reads an empty backlog (before it can delete the
-            #   _BACKFILL blob...)
-            # 2. a new item is added to the backlog (causing a separate function
-            #   invocation)
-            # 3. In this new invocation we reach this point in the code path and
-            #    start_subscriber_if_not_running sees the old _BACKFILL and does
-            #    not create a new one.
-            # 4. The subscriber deletes the _BACKFILL blob and exits without
-            #    processing the new item on the backlog from #2.
-            backfill_blob = ordering.start_backfill_subscriber_if_not_running(
-                gcs_client, bkt, utils.get_table_prefix(object_id))
-
-            time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
-            while not utils.wait_on_gcs_blob(
-                gcs_client, backfill_blob, constants.ENSURE_SUBSCRIBER_SECONDS
-            ):
-                backfill_blob =\
-                    ordering.start_backfill_subscriber_if_not_running(
-                        gcs_client, bkt, utils.get_table_prefix(object_id))
-            return
-
         if enforce_ordering:
+            # For SUCCESS files in a backlog directory, ensure that subscriber
+            # is running.
+            if (
+                basename_object_id == constants.SUCCESS_FILENAME
+                and "/_backlog/" in object_id
+            ):
+                print(f"This notification was for "
+                      f"gs://{bucket_id}/{object_id} a"
+                      f"{constants.SUCCESS_FILENAME} in a"
+                      "/_backlog/ directory. "
+                      f"Watiting {constants.ENSURE_SUBSCRIBER_SECONDS} seconds to "
+                      "ensure that subscriber is running.")
+                ordering.subscriber_monitor(gcs_client, bkt, object_id)
+                return
             if (constants.START_BACKFILL_FILENAME and basename_object_id
                     == constants.START_BACKFILL_FILENAME):
                 # This will be the first backfill file.
@@ -111,9 +93,11 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
                 return
             if basename_object_id == constants.SUCCESS_FILENAME:
                 ordering.backlog_publisher(gcs_client, event_blob)
+                return
             elif basename_object_id == constants.BACKFILL_FILENAME:
                 ordering.backlog_subscriber(gcs_client, bq_client,
                                             event_blob, function_start_time)
+                return
         else:  # Default behavior submit job as soon as success file lands.
             bkt = utils.cached_get_bucket(gcs_client, bucket_id)
             success_blob: storage.Blob = bkt.blob(object_id)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index 2d4ade6be..cbc21d3ac 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -115,12 +115,25 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                                                         table_prefix)
         if not next_backlog_file:
             backfill_blob.delete(if_generation_match=backfill_blob.generation)
-            if time.monotonic() > check_backlog_time:
-                raise exceptions.BacklogException(
-                    "Please check if the backlog is empty for "
+            if (
+                check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
+                time.monotonic()
+            ):
+                print(
+                    "checking if the backlog is still empty for "
                     f"gs://${bkt.name}/{table_prefix}/_backlog/"
-                    "There was more than {}"
+                    f"There was more than {constants.ENSURE_SUBSCRIBER_SECONDS}"
+                    " seconds between listing items on the backlog and "
+                    f"attempting to delete the {constants.BACKFILL_FILENAME}. "
+                    "This should not happen often but is meant to alleviate a "
+                    "race condition in the event that something caused the "
+                    "delete operation was delayed or had to be retried for a "
+                    "long time."
                 )
+                next_backlog_file = utils.get_next_backlog_item(
+                    gcs_client, bkt, table_prefix)
+                if next_backlog_file:
+                    continue
             utils.handle_bq_lock(gcs_client, lock_blob, None)
             print(f"backlog is empty for gs://{bkt.name}/{table_prefix}. "
                   "backlog subscriber exiting.")
@@ -192,3 +205,41 @@ def success_blob_to_backlog_blob(success_blob: storage.Blob) -> storage.Blob:
     success_file_suffix = utils.removeprefix(success_blob.name,
                                              f"{table_prefix}/")
     return bkt.blob(f"{table_prefix}/_backlog/{success_file_suffix}")
+
+
+def subscriber_monitor(
+    gcs_client: storage.Client,
+    bkt: storage.Bucket,
+    object_id: str
+):
+    """
+    Monitor to handle a rare race condition where:
+
+    1. subscriber reads an empty backlog (before it can delete the
+      _BACKFILL blob...)
+    2. a new item is added to the backlog (causing a separate
+       function invocation)
+    3. In this new invocation we reach this point in the code path
+       and start_subscriber_if_not_running sees the old _BACKFILL
+       and does not create a new one.
+    4. The subscriber deletes the _BACKFILL blob and exits without
+       processing the new item on the backlog from #2.
+
+    We handle this by success file added to the backlog starts this monitoring
+    to wait constants.ENSURE_SUBSCRIBER_SECONDS before checking that the
+    backfill file exists. On the subscriber side we check if there was more time
+    than this between list backlog items and delete backfill calls. This way
+    we always handle this race condition either in this monitor or in the
+    subscriber itself.
+    """
+    backfill_blob = start_backfill_subscriber_if_not_running(
+        gcs_client, bkt, utils.get_table_prefix(object_id))
+
+    time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
+    while not utils.wait_on_gcs_blob(
+        gcs_client, backfill_blob, constants.ENSURE_SUBSCRIBER_SECONDS
+    ):
+        backfill_blob = \
+            start_backfill_subscriber_if_not_running(
+                gcs_client, bkt, utils.get_table_prefix(object_id))
+

From 8c97f5a7efd9097cd891a1ba47ff057ae52b9fa7 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 19:49:06 -0800
Subject: [PATCH 24/90] ordering docs and isort single line rule

---
 .../gcs_event_based_ingest/.isort.cfg         |   1 +
 .../gcs_event_based_ingest/ORDERING.md        |  36 +++-
 .../gcs_event_based_ingest/backfill.py        |   4 +-
 .../gcs_ocn_bq_ingest/main.py                 |  34 ++--
 .../gcs_ocn_bq_ingest/ordering.py             |  45 +++--
 .../gcs_ocn_bq_ingest/utils.py                |  14 +-
 .../gcs_event_based_ingest/img/ordering.png   | Bin 0 -> 77197 bytes
 .../ordered_backfill.py                       | 179 ++++++++++++++++++
 .../gcs_event_based_ingest/tests/conftest.py  |  54 +++++-
 .../test_gcs_ocn_bq_ingest.py                 |   3 +-
 .../test_gcs_ocn_bq_ingest_it.py              |  33 ++++
 11 files changed, 357 insertions(+), 46 deletions(-)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/img/ordering.png
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py

diff --git a/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg b/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg
index ed7944aca..7b7b2d6f3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg
+++ b/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg
@@ -1,3 +1,4 @@
 [settings]
 src_paths=backfill.py,gcs_ocn_bq_ingest,test
 skip=terraform_module
+force_single_line=True
diff --git a/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
index 24a20fcd7..8a3dda5d8 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
@@ -42,7 +42,7 @@ gs://${BUCKET}/${DATASET}/${TABLE}/incremental/_config/bq_transform.sql
 gs://${BUCKET}/${DATASET}/${TABLE}/incremental/_config/ORDERME
 ```
 
-## Dealing With Out of Order Publishing to GCS During Historical Load
+## Dealing With Out-of-Order Publishing to GCS During Historical Load
 In some use cases, there is a period where incrementals that must be applied in
 order are uploaded in parallel (meaning their _SUCCESS files are expected to be
 out of order). This typically happens during some historical backfill period.
@@ -82,7 +82,8 @@ We've treated ordering incremental commits to table  as a variation on the
 Where we have multiple producers (each call of Backlog Publisher) and a single
 Consumer (the Backlog Subscriber which is enforced to be a singleton per table
 with a claim file). Our solution is to use GCS `_backlog` directory as our queue
-and `_bqlock` as a mutex.
+and `_bqlock` as a mutex. There is still a rare corner case of a race condition
+that we handle as well.
 
 ### Backlog Publisher 
 The Backlog Publisher has two responsibilities:
@@ -105,3 +106,34 @@ In order to escape the maximum nine-minute (540s) Cloud Function Timeout, the
 backfill subscriber will re-trigger itself by posting a new `_BACKFILL` file
 until the `_backlog` for the table prefix is empty. When a new success file
 arrives it is the reponsibility of the publisher to restart the subscriber.
+
+
+### Note on Handling Race Condition
+we use subscribe_monitor to handle a rare race condition where:
+
+1. subscriber reads an empty backlog (before it can delete the
+  _BACKFILL blob...)
+2. a new item is added to the backlog (causing a separate
+   function invocation)
+3. In this new invocation we reach this point in the code path
+   and start_subscriber_if_not_running sees the old _BACKFILL
+   and does not create a new one.
+4. The subscriber deletes the _BACKFILL blob and exits without
+   processing the new item on the backlog from #2.
+
+We handle this by the following: 
+
+1. When success file added to the backlog starts this monitoring
+to wait 10 seconds before checking that the backfill file exists. To catch if
+the backfill file disappears when it should not. This might trigger an extra
+loop of the backfill subscriber but this loop will not take any action and this
+wasted compute is far better than dropping a batch of data.
+1. On the subscriber side we check if there was more time
+than 10 seconds between list backlog items and delete backfill calls. If so the
+subscriber double checks that the backlog is still empty. This way
+we always handle this race condition either in this monitor or in the
+subscriber itself.
+
+
+### Visualization of Ordering Triggers in the Cloud Function
+![architecture](img/ordering.png)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/backfill.py b/tools/cloud_functions/gcs_event_based_ingest/backfill.py
index f0a2ce415..3730074ee 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/backfill.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/backfill.py
@@ -19,7 +19,9 @@
 import os
 import pprint
 import sys
-from typing import Dict, Iterator, List
+from typing import Dict
+from typing import Iterator
+from typing import List
 
 import google.api_core.client_info
 from google.cloud import storage
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 163fa0629..3d349eeea 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -22,9 +22,14 @@
 
 # pylint in cloud build is being flaky about this import discovery.
 # pylint: disable=no-name-in-module
-from google.cloud import bigquery, error_reporting, storage
-
-from . import constants, exceptions, ordering, utils
+from google.cloud import bigquery
+from google.cloud import error_reporting
+from google.cloud import storage
+
+from . import constants
+from . import exceptions
+from . import ordering
+from . import utils
 # Reuse GCP Clients across function invocations using globbals
 # https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations
 # pylint: disable=global-statement
@@ -73,16 +78,15 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
         if enforce_ordering:
             # For SUCCESS files in a backlog directory, ensure that subscriber
             # is running.
-            if (
-                basename_object_id == constants.SUCCESS_FILENAME
-                and "/_backlog/" in object_id
-            ):
-                print(f"This notification was for "
-                      f"gs://{bucket_id}/{object_id} a"
-                      f"{constants.SUCCESS_FILENAME} in a"
-                      "/_backlog/ directory. "
-                      f"Watiting {constants.ENSURE_SUBSCRIBER_SECONDS} seconds to "
-                      "ensure that subscriber is running.")
+            if (basename_object_id == constants.SUCCESS_FILENAME
+                    and "/_backlog/" in object_id):
+                print(
+                    f"This notification was for "
+                    f"gs://{bucket_id}/{object_id} a"
+                    f"{constants.SUCCESS_FILENAME} in a"
+                    "/_backlog/ directory. "
+                    f"Watiting {constants.ENSURE_SUBSCRIBER_SECONDS} seconds to "
+                    "ensure that subscriber is running.")
                 ordering.subscriber_monitor(gcs_client, bkt, object_id)
                 return
             if (constants.START_BACKFILL_FILENAME and basename_object_id
@@ -95,8 +99,8 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
                 ordering.backlog_publisher(gcs_client, event_blob)
                 return
             elif basename_object_id == constants.BACKFILL_FILENAME:
-                ordering.backlog_subscriber(gcs_client, bq_client,
-                                            event_blob, function_start_time)
+                ordering.backlog_subscriber(gcs_client, bq_client, event_blob,
+                                            function_start_time)
                 return
         else:  # Default behavior submit job as soon as success file lands.
             bkt = utils.cached_get_bucket(gcs_client, bucket_id)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index cbc21d3ac..dea38dbec 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -25,9 +25,12 @@
 import google.api_core.exceptions
 # pylint in cloud build is being flaky about this import discovery.
 # pylint: disable=no-name-in-module
-from google.cloud import bigquery, error_reporting, storage
+from google.cloud import bigquery
+from google.cloud import storage
 
-from . import constants, exceptions, utils
+from . import constants
+from . import exceptions
+from . import utils
 
 
 def backlog_publisher(
@@ -115,10 +118,8 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                                                         table_prefix)
         if not next_backlog_file:
             backfill_blob.delete(if_generation_match=backfill_blob.generation)
-            if (
-                check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
-                time.monotonic()
-            ):
+            if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS - 2 <
+                    time.monotonic()):
                 print(
                     "checking if the backlog is still empty for "
                     f"gs://${bkt.name}/{table_prefix}/_backlog/"
@@ -128,12 +129,16 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                     "This should not happen often but is meant to alleviate a "
                     "race condition in the event that something caused the "
                     "delete operation was delayed or had to be retried for a "
-                    "long time."
-                )
+                    "long time.")
                 next_backlog_file = utils.get_next_backlog_item(
                     gcs_client, bkt, table_prefix)
                 if next_backlog_file:
-                    continue
+                    # The backfill file may have been deleted but the backlog is
+                    # not empty. Retrigger the backfill subscriber loop by
+                    # dropping a new backfill file.
+                    start_backfill_subscriber_if_not_running(
+                        gcs_client, bkt, table_prefix)
+                    return
             utils.handle_bq_lock(gcs_client, lock_blob, None)
             print(f"backlog is empty for gs://{bkt.name}/{table_prefix}. "
                   "backlog subscriber exiting.")
@@ -158,10 +163,8 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
 
 
 def start_backfill_subscriber_if_not_running(
-    gcs_client: storage.Client,
-    bkt: storage.Bucket,
-    table_prefix: str
-) -> Optional[storage.Blob]:
+        gcs_client: storage.Client, bkt: storage.Bucket,
+        table_prefix: str) -> Optional[storage.Blob]:
     """start the backfill subscriber if  it is not already runnning for this
     table prefix.
 
@@ -177,7 +180,8 @@ def start_backfill_subscriber_if_not_running(
 
     if start_backfill:
         # Create a _BACKFILL file for this table if not exists
-        backfill_blob = bkt.blob(f"{table_prefix}/{constants.BACKFILL_FILENAME}")
+        backfill_blob = bkt.blob(
+            f"{table_prefix}/{constants.BACKFILL_FILENAME}")
         try:
             backfill_blob.upload_from_string("",
                                              if_generation_match=0,
@@ -207,11 +211,8 @@ def success_blob_to_backlog_blob(success_blob: storage.Blob) -> storage.Blob:
     return bkt.blob(f"{table_prefix}/_backlog/{success_file_suffix}")
 
 
-def subscriber_monitor(
-    gcs_client: storage.Client,
-    bkt: storage.Bucket,
-    object_id: str
-):
+def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
+                       object_id: str):
     """
     Monitor to handle a rare race condition where:
 
@@ -236,10 +237,8 @@ def subscriber_monitor(
         gcs_client, bkt, utils.get_table_prefix(object_id))
 
     time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
-    while not utils.wait_on_gcs_blob(
-        gcs_client, backfill_blob, constants.ENSURE_SUBSCRIBER_SECONDS
-    ):
+    while not utils.wait_on_gcs_blob(gcs_client, backfill_blob,
+                                     constants.ENSURE_SUBSCRIBER_SECONDS):
         backfill_blob = \
             start_backfill_subscriber_if_not_running(
                 gcs_client, bkt, utils.get_table_prefix(object_id))
-
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index ae29f4b4f..208189e39 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -25,7 +25,13 @@
 import pathlib
 import time
 import uuid
-from typing import Any, Deque, Dict, List, Optional, Tuple, Union
+from typing import Any
+from typing import Deque
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Tuple
+from typing import Union
 
 import cachetools
 import google.api_core
@@ -33,9 +39,11 @@
 import google.api_core.exceptions
 import google.cloud.exceptions
 # pylint in cloud build is being flaky about this import discovery.
-from google.cloud import bigquery, storage
+from google.cloud import bigquery
+from google.cloud import storage
 
-from . import constants, exceptions  # pylint: disable=no-name-in-module
+from . import constants  # pylint: disable=no-name-in-module
+from . import exceptions
 
 
 def external_query(  # pylint: disable=too-many-arguments
diff --git a/tools/cloud_functions/gcs_event_based_ingest/img/ordering.png b/tools/cloud_functions/gcs_event_based_ingest/img/ordering.png
new file mode 100644
index 0000000000000000000000000000000000000000..0361ac97cf74a6cc64d60db7b6f6230694b727ac
GIT binary patch
literal 77197
zcmeFZg;$kd*Dj0*QWDZFjkJJt3P=mm-5?+>-635{OG~MAcWgQZ1nExc*mQH&*5`e{
z=lQ+oJLCKTXN)}<?9JxB*P3hQb<KH)y-|=tM<qsugM&ktk(N+~gL?>pgM)WNegr-#
z_E4pQgQI|xk$9=<roWqx=oa6DAB^e0w7;+5a4zN#pI4{jz_!2tW-bEP)pc^kwu>yh
z(KGhiE3R>N@NL{}Wuw;ZZ(;Y%!RR!1bC=3R-=Q>=)s2k}dS)6?_<z2Zx)Ivn+%!P9
z-@p6kpG4uq;kwv(;88xo{pZVX7U#=_^EK^n1Q|aB>HmB^^QlMr$4B7pTn1cM=`0&N
zz4ZTBfGBi8=zp`UGlXY8^FCpECIJL&L-y$6mbCv^!!K;<xUUTok%T?D-nbBjM(Yf-
zDgTEyv<hjs8C;e_iT}B<*0%_@3va%?#l)kI1zxQqxCKELvqXi;!}BPZNVGyLP~
zo;Xu=<%nISo^0iwKcZJAg(hMuDIQ?|+e_*RkO8iHGzk?vE`+kZtN$P3lLQ;6r_~Jr
z8<%6oiK6+BfBr@*3f-xa6YqG8^5*8f$Ug+EDekv2N$g>5#RD$wB0lI3Bq$i!+8uRm
z`twVEh3~ys7%89e_d%c2Z88qyPG&y$_hw}#%myuL_^fjjPepuUv&03Yv&8ts2kG>B
z3?M#nBs}}FzkgUhjI1{8qtj~fN-fgm({B$s$8Z+l{d>5go7i72(Dhn&=V1%CL5Q7)
zEz0l719NQ1H${V3e3nPj1Bul%j=#rZg*+w0h`HW>FV*Uarlzr;D&ugmK8G_Y)~Zxd
zFI`|b{*6vxcVM4oi-h!7UdVc2A6WxQ&9k9E8d0JZBrrkR8$$(bW|XIE4+4uc*@sGY
z#=Z_3hT$?8Q$VdWsCfzhoQUXmplayOousY!uw}iB@dnFsV|*YYBBE7gkyE7CC`T^j
zK^KPI71y62C({v(*A>5Ue!RiP;jkh$UA|hj1abWRgGR5BJ29Ri?pdJoj!T?;va%e+
zr}$Jl3X!Ipi0_Dv&;2N%GYm&OmR^xr>SG2P$8NplFQ51Ob9mmqWc)8aJwj*fpDK4;
zZ}nHHu${-fwW>5PRnHo^J{wE|ze%6(n}Bn1;jo<(bU)dwHyuioygIj|mWiW#V-t|-
zd$6!NF6d0_b2C+8Ht=}lmb{4DHtnlEcMsxYRQw;5uKR+Y(gi+B$3J_Lr(S$C`d!IP
zao!`12ey4)ya(Y1-?TA^ad5#e`{*#Sm8G?AjyRfjh1)0>Yg_guvV53p@itg!zNQfJ
z<mO@Uxjs#$xjrLH;<nkAEz%q@TWG32I@|NqYILcP2*p|`y0|@Djw?9c`ekl8RTj(7
zCSWxo>wdaz^J(S$NAduN`4F)gNuW#0i#}SdioldkyUMq>H10<dqt#Y8C<LrUkIDG;
zZtodXt{5%a2JiRhLjo?1^=)P+5)PL;@+%=ptn;{`yH=C?n_Z=RtXL3wMg4ew=YF~A
zidSZnCDa_gcl^nm=ATUm5*LmTP38U?R4Ue&i)s`}7wj2u#^R~ZujzqyXu#j!U27Jo
z+pxV(QB9aAwn9qzTE1~UQGBgjsBXSBURbYMtgSund$<&)T5{9qbg-cMZtP;Z=pvoZ
zG4JMdk`btRTCVK+0ZZ@&IoCUvGL+I_%*=ej`e37BZ9xbYq|zKy$vjYL2`@v?y_{62
zL6uz4jd=G)N#uLk1_};-@BX}0!o_$y;%#VPjODK@1_3W`?>y7KZsl5AM9Y!v;6_{a
zp$M3l(G3P#J;0n`P9h84ncf+mj)OzdR4^PbAmezy#!S0})o!I5*>w<%?rD0sBx2SR
z4WV(_CA`{BV5*IqtFQe2#@x|p_;lM@!-igQKgX_=cD9mh#kDVXBv#m4mTO@sJtR+~
z6k6eZ%{<+B?pAg@$>>vU&rX|dHMaST&+%>ILbHx$OQ^NXAJ=M#18j}Etye)T6|T$9
z0_W{L@Jsu$aQaipoEp95CIhOHCXOovSv}H%&NmmR&?D5G(uFc3HfI;tn~RLMU3Oce
zuk$3rZXO_H3U#rG)ik=0vFOyuCpqt&Aqe;uuV|nVsH{wmeSmkJw@M@Uqoe{rH_Z(w
zXTBi=Uq=1MZI~e>vd(7@4N`^S#PTKMK=?5?O7t9Q-u|$-+Qwt7N)!uvFMY7UG8n#?
z!X+TnA1`Az_MP?WEHT`GrhEo+Zwd|z>&$xdk_#8o==v1R<&Gap!fuQ}#8V;#CkQ*h
zx;Ho4HB!^zF!}Gk0`5{jmoHiks84ZwPdC^xxjY<7>XNthudmVk=Y>qd!ovMHO*XSH
zw`N_ltrrCKuxUc6d2Hw6W~v)|KT+=G>DFf#G?ZWl3ThOnD$mrAQ7yLz#f9N`=1+Qi
z-y#I?ybAB!ZlItE4lsQ6-T7owNsUadi%qaZnz-8aKoEL#r0k9UR9M{1V#3T5L*b^y
z=azNqG6yo0e*bA>*q|=}k@^EJ1h#08Ly{;Jn0ljtV`43re-XQq<oPFpGSmO8+~A$A
z-1y$G@XTuB<o)q)q05cT9Yz`4yGED2l2|5<w_77Q$+-&Ystry=wB|#)vFgR|3a{;A
z@}~`26>x#etuSo!z$1>@38Zjyh#FmJ?#Z=Z)|I^xdnRVTT9Bn(-Jj1LL12e%sxIt4
z3Sm>}8cSXT?|bbar^=HY2@EQl803P*6=KvMs_>yCB0i)jws&O@Q7Y`KRVv?(k3WQa
zE}P0-Pw3&JG)zhmZZL5EOeYo=B!s}h&wSwMDN6x{FpfO^0DOjob8&T=DxcXR&uzEB
zcChFhPa}gZ9faDI=W`prt$T2^u9E;4=vwD>X<UHO9u>okY7?`Gnw;T${(Hr>cdp5N
zn4MP4YJ!=GUg3s1hFVH_g#a;QQ6>(*uOmc})_L1hR@H90LZowrz?;&^b0i!62-xf8
z*0&pMYRMN`wYIfyp;oqcZr(q?Sb~6w5=O+?;Iw^`8qb_8^67%BCun8z!=h;#KUk?8
z@IKCAX4x^gkhzEd2;vhk&lF5PI}-(VHvh(v;@>j>Y<oFyh_>}b`#cQc2Io2|L!tkX
zbnMCiz09cMOY8!A$s*Yk{Ev%Mi9rW!maxGe^^B7(X#OJu;bnn&em!F1x6*zqN!R}W
zabf9JVtDniMH3wZZBFjl1}y)Zbz^}s?QnyTm5n9XO_tsx*k+u^ZD77?4`erL$n5=)
z>ch%I@5;`r%^b7oAkPvRdJ(q9f+N}PRzG0PhY5CNPGy*Iq4#1XgJu|aJo&e{j+lRI
z2g%c1mr2vCpOt5<3_jJB#;^ZwKL%0!hDK>-6PsOjN|aaF_b^7sd{K5T_eog)5JHR3
zS<J@+FB<O*LOLlQ``ZtuEGKFexR1q1XQ%Zsq`>0z5sL63L4|bjn+_d~1b9P|ic9M`
zZ13OAs(=`W^7j@=RAttYnQMigRE;~)ZphXLD@)&--r_LK{wT+E(L?<n73qC;K!3S$
zkfdDUgB>T7F>@y$o9iJ3wW40w4MvcrpnwI;P(xUH^#SIR;85zQz(*UpT(BPm+OOa<
z#K7^WsL!;!E8l1%M-Ku|0$!xD;M1&&52R@;lYEmw>M?%bfDW9-C)C(y;6m^NOgR58
zFa)Ga&d+?n#X1!WrCV3QjXZAfk;!2hGxDFK^kag3Wk@Y&<NxMD9zN+fBE?h>KGK8C
z5US&sVPN~OjPaZre3E7*J6!5mf(E{!ND1CEFxe~%Ze3Pmk*3D{*&S~@&`@#cuj8Gx
z3rjo{H$kx3kBjgpH}BE@wf(1mwyy$i<fvx>zJUtfD{5JyZO9Cl1}#lR{!y{kETE2L
z;h7o#xvwu9NjiV?{MK7w?ED)kqu@q6VqoVg;Jrt`tPZcDhYt%v&)dIBjsZ(>-`t$s
z@YDMMS(7WaZgn1{@|2oQSMa@G>s>tBomL6MXGy_jR256%zSYZ?2rDos!+g3h5P<j#
zBwF(zXCVp0W2#x}Q<0vj-k~(=2v&on3VRo&@j6Hg``*#?#eIui^gW;@;=D6_KhhIP
z^o)QtEy4YG0@2NTZ-$n``-)eq*`rh<ig<80*!^fN>xH$Z(j!cAw##DzE}g|uRIr*v
znK;Sbvpss2Juwx5aD&&MHN*Z&p$~aV{AQ)GV<<h9MxK!El#JG_e@r=BLW{wmrNGVI
z-I7YVL{}JO0XeGq%9EzvDvL}lUYArLQ;FesP62Li`~xY{qE&;FCEc&DEHN)EN3AO8
ztC&WPNoE44MNmG8+9RaN-U$2LylHT;VP!KAP6|MbkgG6dSYK$KeSdMd$l-C~)R)4Q
zlrqrs@oBE(#@qKGTr#K^Rnf@COJs=n(hX+_CB{&H%jfsLx?v??%ZMvbyS@Jmgrn(m
z-~26CF*A9E2jKuEd=abFSnaRMexo#GgL<y@`sAza@SM)CjpgUO2a=-o5pdcp4hy1P
zIiIa99j&qP&ew1Dk7qb$>MErhEs}tx6;uk~Th8yOYHjtBz+Gj(%1@u{JbdtYxVS_<
zm0O$JT9fhvlHw<?OH?H7p7Vo6+3I(+3AJ@^2Nz!ya|%JevMb%Zn-TtdEt(}$gk4&n
zsGROcnEPMJXG*oJ<+3HCY!kz2S0=5$-Iro%XBdF|9tDRkQk@fi4A>8qwKZ6VAvL@L
zwU6`DpVjZUV(DJbAo4XUW(vy|Yn#OrxJ{MG(Csw4LFKn6Sm=wjoiR8fd+V~)p|h3d
zB<}Zj=@Jq6dhUriQXeVT`_&Q~oKp+RzI|!Wm@L+z15xJ5)T56stKBvyMk61|=acvx
zzka9hMvm+ML>W<KJ<aF0<a>1zBcCCtJd`fLz;4nVlPeQfc-2t0;ePzJuf}F)ZD+Fd
zr~{KP&v~b$pW4<Y>!Lc5Vix3q!1j0p{Bk-ECW?1V+XJ&WtR_(FeeW|K6OXCkvn^0L
ztafV<Mj|qFvPH`XxgSZB3)V4Nj+%Z_FK+mL0ROCek?HRCJo<&z@2A^yc>{3km(AYS
z4f1KcVz%=QjL;R@H(5WCb0(sK?p$`Ko?W!Wa9JsA&AI#Wvj(qhY%mQZ4#vGsRY!@A
zUlu@Z&qu?%QfiKNG3mjGHR_-nh$LD?T>VIV`VC~`v)N{O7uColB*2?Yh5G#pT?|4q
z@6Ue4X|_LC9n0%bLGjJ?Y!?M2Ak|Jg+|sfMSG2tzgQ=5mp00d&6cy*R#gW{FhYj_-
zy~4Acu85<NOB9PH7ouQN|B(b_E%V_Ki+s4j{L^+uwe4p{4d%n@&R>B|2E}sQJUW%y
z`6zNFS8kj~BNm8U;jqehd3T%oBehn)JqYcaSpG~^K2xT!cWeq5NqDWR8z}n{o7|6O
zLHz5%kOn84tDL7@07)0%kvmxEpj9v8eY;^(0uRa$F|YK$)=Y%LD|<t=h)nKQs#~vf
zceznT2Ti4TNtLf&46OD@YobK2fV@I6y+InN)ssz6#5A~`XtMYx)E+xsc8xCRYx9jf
z$`BtkT6-u(lUC)K2Q&id)J>WVTXJ2lFM$bivcn_GLb5yG=-Q}SsIG`&E@;|wj_>8m
z_~y%J{TngdT8j}GFE6iLQU7e<5Ldr`dkswLEedbcMVCs~d}D2IcT|X4JENZa-A(=-
z1UiNrMoXdE6PcVZ^3eKSVy)fcr<WfZkcXZSuxQH$iPFn|=9Q+Pd;)nwEg9?ouxI;P
zOeb_2kIAL)cta7Mrs?SPG~w#hS4mOZ^OU$`W{JTnM>1jja#h${^6uXCJ_sva#8<n)
z*@`lbZcszf2l~bkx&krnj$HS!d*0siO@S=0vveOqz&u5Pf{h>~RoU1kff#Ne_q`nb
zZ&=GgnUcH7;B(Xk<y3a86{wyh@YynGfm>ID2za?=(m}00A8t=q(vMbIaxv)E1)glO
zcdm&$BQNzQL~M204W{JL7_?B0R`0aJO*v;6xY-=9F?`q8sW7dnaQOALV_QIZwOh^X
z=3+XQK}Fqr*8gE+FOY$1VEWK;M96kmL{hnj76^UBlmf7FH~=WC0dlYRwF^v)CyF@G
zwWCP`wEQhfQn#In)e4iIM??ZHW%8e&^RyyWGd$`IuCx3lt5sndeUuwb$gaj=es{oJ
zZbV|$$tH3AUFmkc6rwJ3ak%_2bW!RfxipYnJpIJs$L<TL_}W~(%GK`Z#Y4mx&z%Xb
zFf7V!LGLTg`>|@9Sr2T^x7K*?bD4stGn8syMWn>i$h@~)Y)KMU?GN|PvXg~cp@r{$
z_^Xf$Fb4>IvrtCSUZ|M)1Y5vGH=>j6ikQ3Tn{I=Xevn)$zk(UF&;5;Kr1e~#^7gn@
ziC~?Z^he|E9|vt8JWYE)?N?Y&yO=$-jTV(iHiAMgSs9dbW43-JZFjM4&r)Sau1qc*
zAVh;~%TT)2vXgD7iPb=ut+k#IT;!<;O{1$_!b2n%253alp!ao+hW^-7-?1}<qw&I|
z(Q*^&(=7B6>jF*SYmIv1=-ECIvKv3bg76Z|)jO(Ou6`65k2BP}+$ojW_8M$(zFyxP
zx#h4o)L_u6D2iv%(F)?kB%2X`^q4SbXl|@XOFJBwfpTy5K8bJiq96wZ-M+DW-|vpS
zBDbpg{Lb4Xn;(;Dbuw2bvENyX|EXY?Ea1;znA6i=<}Q+-Imjh;!f7)Ms$M-i0*-dv
z6@g`iK<TYq(okXo6M^hnuhQ5^SsLGQGA<;Tn^FTgI{o%?BUW^g=#T=0*FX;_nhw>g
zaO=|wkmN;uA_M+d^Cgf-se&lgG+wMe5k8G$!1LgXE}_<Ht0W~2%F_c8?c*`Zk{Ekb
zBjK;8Vxi}|({zOz2PlU}H<!KzSXt;K82QI_(r#v?XTS{+C%>?)W7eurbN`Hfa5?Vo
zbA5Iz9*%!2vv|Ha@^Gy`!PoR`muoajoEqh+a6lxvka%<Sn*)#2e&AV-e{<2gAH`oC
z^L@t|6M`Iy<s3Xh*OqPe7<_SzQb6@zr4+2fdWZ~cj@aCjJB=|~f--PQx1DL#(?=?L
zx3Gnp;yeT&T1bL}d5wg*&**xvplq_zRtFV=1h1TfL`v3znor|sE&p{%EbDIa%H)I8
z&*GqJn@WS(4b2;inW};&&vW(e$Yk1l<-Bo4mx-dVinoKtv_48$C61dv_3<913AjG+
z7DC>ft4AxKQIf`g!u9UOt<~+7vnplRg^6Lo=Fs4FQ}Oo?WtKE4Vi%|FK_qINCeR3{
z`?^%ZtR6IcYgJcV>R(Rh`yUdJ@Q({LO4DSCN6JkM#*y|ntbTq8SnF349q+g_Xo}t)
zdsbHIywflWxqk*+NA3+&N@Sf(^1hcN%J`X&(ZlYxCME5fqP$qs%~&nXN-I$BXZ6(l
zO*x~fmL3COW1z14NsLkNv46mtGPTqhfNm0{Q(kO!T@+B$G`G7Vc(H+fKpJS<4?PjD
zo@01+pQGw+R<IViU!9mSLe1@02xKa>s|%>{m?q+DY#MixP&bmCvwq=1qG#GjlR#zh
z0vYDG*RU#P3pI3JUMR#+(T=MLN#_G3N*~~|6-8Q=itK(rtrzcj+^^57ahcS$vFQ~U
zdt1G3E?#=`z9@MhIw5B2(c*qwAS*V~<c{P_*_0n3f@SeRST2#7p53@pS*B=z5mMNm
zY;V-8aH9|L(K`KrwQ+mxiV~~mv^}QSpIA(Tm0fLhUo^B=V>Ky0FW%&`hn>uB5-|3C
zL{!hn?>V)p-_7xG9-}|r(DO`?Fl*QDXG`n`Aabf=hd)A3DLfXCUO;>j?XF&=Dc2r!
ztd9B6kOW(w^5sNlSjXC=fmQEhse$SKT>fa85tFl;e>EtVwF9g{bb%F@$nZlHK9p1>
z+&5~YUk9H>5mzhu)|wBIvK{^;4qtSGVi(MkQWyi5O%Y0*%577Qn#FzE+^%`E(k2+|
zvC;$uSl;;gBZ-{(ItMOIXoN&I?^%%JfLXt)Y9AjIj$G=;+acX7>`%%!{Kn_+<*_OJ
ze^M=}t`Nj^BAvAdajxB5s%*c@3`D_^$q)?6eK+3lfP-jtVHV_U{rM_VszPr3ly^Rt
zc?GCKAV>wsuEDXVK2uV5Mt@WPUM=AD;71YeN^?bJ89MuBtkJ8<l6h%&17}2E!$VyB
z4SoApN@=`MkWtJ8SVcM^P9oe~ZfGWaLTe}TtxIG{EgZT4(2lI~S9zA#sWloJ=wB$}
zNl+?>GDY%#Pn8+T)dSVNGRZwweacs1FdckIA4r!c9iwg56MPXw7~*;}Q!U5nMU06-
zksF1hsaX*QV1cY|?tiH-9ZY>6AEq15RE=z-W=9fL7I@L(&|^$S7?{56et62FGtnPG
zph)X|#k|e`D*c=0WXSS4DzLMVg~MkMpIfn5S~*(eoS_V%kJFq0-*nrIKG5SeiTqv<
zl2y5_QHbn#fitX{2{(|kGmQp^X{Z9PK<{J2-}q+p2Y`a!5O!NO<X$+Z<b9$1yCzBs
zLrf?MyrbsnJ;LY1a2)_cw8nq}QCvE6g(tApTJ^Tozb2Mdk3r`1^N=VHQ=Y&7wl5EA
zXg-akf3Kj?mt|FMg0VhF6{y_IAk#Eb!m3+LSSg!^_8R>7M=cpFr1Squ^8aH={`}xl
zsA9S0#cq0cRxSpq1=H4OLTn(4BkvdnhQeP%B?d>8?wG!L={)3|rAD*F0Nx9k<*}sE
z3qo{$XC?)C>d5WS)#tfESGMV8VrlTtEVMWQu2)x%2TSz;%>u(p-2W$FB6dTr=_pm=
zoyLTONgh9(DN>lx)ZBb3Mrh0nihKO%FU3S%rQ8Zdul}w>5vC5VASM4NfDtu(v(V_O
z7-o1wD&nKNXmxV|JvFWq^@qog#&-fMNu@Rm`QvgX{_Mn^;eTKkmaNO;L1+YPt^N;X
z9~(4#sENp4oot;N^O|%=;z!>ZfQuA9sdxx0fDfMmv~H+=4)%k!DjY#D2p}fkimHN+
zU(oxYH=csDnOZ7(Q>f!3xzH#iO~luSHaG&0i4N4EkIMlNO)1&^=+9zC{w(J5-xgEY
zWtaPXSuiM}XXtpA;A#FmmsaS;{n?%lpVQ{21<o7js?@ktXXukt{bjKrRGG2wN`q&w
zp;q`$n1GnUixBIgu(Z_cmWIkm-VlldJ;n*c>KluoYVq+|!RtP;+J7{CaF0_lY3=RZ
zwxGB||IrY8eaC!}#(+qqPx+@le*J9tdttD)wkKt>H+P<c20zN`MEp&+iWMzRm)agS
zV+nhld?(iiAj^p{uVRKEc6TQM27oLVP0}b~_n(SwZBc7!b2(p*k~YrKxhc`DXW1Ci
zPmubUDU%mYOQW`(Tl1pN@;ZgXJq$3e`ltj$w8<QK+_$Z*!4u*W-LJ8$>RzWrw@(Jw
zO#{{Od+?yJlT9Ypj|+iSuOU|~*WvBO(fYtu31F9sFk=*wSkteBx=Oinr~k6F6uj`B
z=j2HM?dcE0Apk7FCXCuT#Q_NPN4Uu<O9MXFArU*NDhs7t02go5%OzHkv->K<0&nC9
zL!XAU1y1E5zW1#33TZzx34)U}HL!@&Y5J%cbj}F+GMejDhCg)wg~{qL{Hdje`or;N
zfy$_0)5_B1YvX0tM$)`aX?3PEdMH<GBS7JOt$;fA3sWRlg$h6z72JeDda!fygMobf
zKIj(yDDb8F&B_`KTY?=2ix7=!0ADb7MQo^vxF4CuY>o_O63`j`#aytT)7XQ=Vts4A
zdMDJ)Gekvyp+Idw`W+^UBliM2xJtrkUh1QInVITB`Y2+x-*i+kznBCA0gvQ1iEs=P
zxJUo17eKSICcnuo=FL#1FJlO1=7=J{<!GQRG67Sf6tKEyMjamRkFbV-Q~**((FqfM
zzF>1I_nwN)tx$6wt$461<^I4tF=4PU$xyLLPp~rNu5_PUZzu%FQ5=R}ZnihhQ~{e@
zL3=FmTx^_GzbPwIVWOzcfVUKv`E$C#TV0zNOE#Fo1Y-N^)ZVFOy8@EyT_7r+k7_o7
zFQ?`W%8#^qWe~ofz)9`Rtmj!xc1!%hk8EL#woT$_Gd;F$dts>oq;I-13BY{6-XAV8
zT%IrXcn}^U0~AZ@{l;C}>X3c?dGyjQY#*(H{$Q*9N9(hu>-{zX5InPfT=i@o*L~*S
zB%Yd4#~Y=xadh6NmEH9l!<mr&ldXp|X8knek{^E2{^1aDA^Qy#w%nzkOg(*|E7fVj
zQSzhT63j35bncJa*^MQxeM6mW@u;FAGyc^au`dPZi?p=WH4J#CT!}ERZF#or0L=ip
zHw{jlfF_MP*`F7yFdqhFYjj+hQAb~u<t>cOlK`cbC_F37)2%1{<fOyGhFZz50qXAk
z_81~<AZ4gCqGH7|9_t&55bO?Vk9M)Yp0NQz*y|#fpm{i>Ileu}v0!&F6yBI!@rwaO
zCYF8}Q#oH*M(uJeKSCDnn@pVUSA{($fP-=^qFDXWM;uri#!6R~<^7W~$s1zFCi~?E
zQjxols4)N_O}Seq<>O#rj}*B9rb_4YS04!|msVGE2%34XVtMS;@fLIoR9_M#`yZ#&
z!w45Y-zx@4B;m-kJ9?fO%-4PK2R`zVbkT|%d<`hDDlbpABKX<7PBz_XgAW$Gzw4g?
z>Pw7pAz*i!Q}$>rj1<!!AGS0&6a=cWw}Y(CU(c^j8@G>%vk6>(RZK;<0=~_3s*FfE
zr+aubM~ZXS`A5c|V;%xNb?<Rnhn6HoRcC2A7jm3{=UI3^O%#^poOdY5pMStnjtX9z
zPV-m%iA_`Ct%)qx3AF?xf?#(_*|qQ2Gp)ykuGy)cyY6cz^E-#Vu+!HArNS$iU!+-z
z1=rVmKP@<H;TdLNQc-I=uaRMdY;<#RbydK~d%u?XiXUu@lv2!XSO<6LGt7oaMd6I<
zu$lpb{U);uRL1IkMQ+2Mn{3^IC}^f5*-@j#Iym`ig}R%Vy-)t^p&Q)8=5hRd5XJ+k
zFoV_UCC1QS-zb9>88FLB*81XJh2szA1llJuTZ$vWdjj<Y`4636C~2s^U}&cjE=@&1
zEst?0GP!)$oyyY}76qX4R}9A3qc9u5FNZuJje)JHh(Z)<g(%2f{q%wP6G$+pq`DHa
z;XP34^|QU}Z8JMNyLG(o1mfvR^VoO>oLNj<sXzXNhuVI7j8?1MxNvvDTNRrwvXG#8
zBuC1*GP$AKsm1p^#&%w^kOY@bai1Cy?YBL8$KP(7P~<MQT7Nk#dr7F(<Ndz4jrYHQ
z;PE-YC+8}n?I%lwJr${GFwi8RBIB`TPBK)WhIIph{1E0hfaBr0FW&z=?(T6(wB4%)
zeR_R)JUQBYO?dj#wLiWk((V0C>`!<ggD4<1t^i}M9&CF=U^AQDQJ&jPhvg<4z)G*r
z4+><1A2Z}xjLh{jMLyq>jv=hrn40ez0k=vEYaL8IkKfd5_V@-+eysuxww$38It9|<
z<R2$)I%Nv;jUUzOpD#U7NO_Uf!$}DTZngn)tT$E?Xs<f)x=NSZFE7Ke+mYjh->(&=
z346=jzrVYg1~A>OEVB7vO2*D>n3a;tEZyDG%bt{ISL+Qv%_dk_z-4fA>eS-t%vT{u
zfa@mO1w1i|_(-~Qo&_Mp{h|Q{bRGbt<_x5EIp2sWi+pV!D@^Sc5Dm}vTrAgd{b-mf
zo!?nGN%JZ3AJRS%+wJZq0lS$=!<N8{dt1V;!HI!+FLrNN8qU1j^MtUFDy945fn`q&
zAMPLLn+BNu-sJM2hsbrQ7hIA6tWu#39!`To)KSjSb7*976}_|ku(}m*E*wZjGhx;K
zC%-+ECf|D{HV<gBM!7M{`j%O@*K8%m9{FU^G($(oaJ+#p3+2N{RVaJQQ-GfUC<!cw
zH+HhNP=w!S^@-BbZ*-ADF$$ET&iOrAx~Q7)Bl*{NtgYSY+b;X~XED3=k4gXZLKM;Y
z-5~KR9zgDe)hE|QPk#SM?%Qse_FqN*kz6VH^|cNoI<aaz1{pE$5LUE2OuTJ?URyr{
ze0T<(4?00tPmhML{zYdP+WVt5#M^-&w2gS(dJeg0(qtOXv&XFEi7)<yeH4l?yrC0N
z8f(ZVGE)HSO!T^}aA(EhNxiIdI5z8xeGP~VJ<GYe@YKQRU!d5qM*`X3epxvAb0kH4
zNcX_???D0QJnSK~r@&#h%0&hgeZSMny=`u{{6%-}m)^zzq*e$2KvyG>vzdTZH;L-i
z`$EU&SK({#5r0{lAfr4ceEv#LalFwLEnmCZ30bESP{|-JvOWl3{JB1`U?btT!ejC2
z{6OT+_lOx-sA5Lbn}j%8xo5pJ=%vp)?b`z#Y^sQ1RHYw6)e}(;noxa-qSg+y5rBjG
z*hFa!(4u(V?^EkFM8=ATC=B$*r01#T2e9&C5(A}N$^b?)@2EF#tBwnqNjd>!`jz}B
z2$V63nJv;Fi>5W{9*gC&((a3ADCQTicGJI@<zd@<=w|rZ4wU8QWZXrWNZ1a$@ea<P
zOYI0FBA4`Qk##~zfGOz!*q&aaWDeFYbt9tTZ$&<cynp+Ylu+@`8fa;?(;mNU877^=
zF3aVS89J2Q_)l()j1Hum^)kVG2Iyv~nKZ=vx}-hlsn8WgrpR?4poUJ!Xyp>6-9JEa
zggmJHzZjy(w+;*_Qc2yB)1*98x`~S^pZW-Qw%zk_0U!NhONAE-Etlujc|k!^|HA*d
zkp#2*NAc$&m<Ow)j<pkRy33Q*I~JEWmxc15f2wepN}Jgv{RJn~K}3WB!(~XIblG|4
zdx@<M+?%rgQqRZCfz_VjwUHe2fN#goxU42V*)KEW;7n-!z9|Noyj>J1mF!;6jZ=aD
zQ$PC?a#tyL>Kuf|PQCx6220Xli_y8m<y_z%<B|QQuhK8Le4xMu$s*9p#!I#bqg%~W
z*v_P^OkUzAf*%u|dMw!&)jnnp`;Af`%VT*CFhQRjCV+^tPtz-qVDY%@6Ob0*UVpTJ
z^DJH^i@z8@y;}ZIYdu||re31U09w~Z1g9EZ4@`j_y<h1>xjb5r0NjfFOpOf_5r^p~
zy+)V53+SrZ-OVL7&%SS84WNZ+A@_HUCwsFRsQ5Q*Px|}f=mF_!2HHtp0|>GSB)Y1^
zFDxhkT>o>{@&3+xe6ikfqqC<+YNacJetjS*DIAZfKaO4zfChAQHX*OSyI!B!m>pDG
zO?D0p$Z@>=@eJ@BNfsm7VWp*|pyyDx#%_@nB(i`cT9MwHsm`;W9)0XN(L}wg!d}&N
zPzt$Y98BSglTBa@>JJ3988x6S;(czfl6N71{8G-9equROW%)f)#BM>*{pO<l1Te#k
z_fLIqvKCx{mmC#*Yk%+!e6uf4E@=&14fW^Z)#=W-r+rsA9?0KuPg628GeHmH%NpzH
z?1dgB0N9^w{gN9<WTAZ-fN*uz<}JA?Y$s17V$%Ki)E#iEMu0(z5p+8gjY99XnW<8G
z9gre-vN=L{R^%>)pyx1{lFs3EA)j!ADpRKUC$1}j!y$W>u^J4z7wtmr5euvpZidq8
z#lYP`yC8W2fW_td;~B=26de%_7T640m=l;Z)YTNz`SV)jzse=CN~{nV=mOe?A`FLK
zJ%F(NY<s*=x$^DnK7fYb4OoJDGv0zlzo||lnpBG-IF3e!DvFqEG{V3u>vaO-(lp0O
z%yT2LQ!{=bEae(&E%(c#JnRud9^E>7*$Z*pC9biqCeT_)r&g$*eZf`xWcP-@tJJwQ
zmB)@1a(kAF?F%|U6Tl%evWK3Jfsi_Wjj<wFceLCQVz{UXcqBLJ;9)<2S^`h16_4&_
z2Rwkb&}T9BbD}I9$asO8%JFk%$jwp2Rng`|ajNNXhMspf&;i9kGE7K7YDBFoFR+=K
z)RxmUJ}c9is=VzhYTXZ=e=^)?kgx0@Z<BI%0vU=&GK+p#>ST*GKHWYIxw$@o905%8
zG_O`NideZoHQx}ai(pK<`W;G>>Ug0Brm9Ba#`)#(#%T5{G}_N0kBPFI6uS{KDBU8K
zW4Z{;OiQ_0`G@T$OY}gy8n(Z_$osWW(444xm`9pnxjI^+ClE#^Q1h5vkhfLtv1g6#
zL6D?XxB-CXggj`%uxU|MpTyu{`GeC*Kz>5i3-)6+nk#d3vZZpmaDH~CUP>Y&%CX-&
zcP&$R5yayH(7kO=giPA6;fcecUxe+PZriRKJ<n~mn%v4$a*R-HSs#0HLng(PP3Gz@
zD?pR2<=FS|Dk}{hn%7zYH1<QVhIO0J5l+ikz+6;x8Q<`M$o-rK`I|s`YA2!$RY$}O
zl|WkYx)fB*aYmmf!J%v`FT<3l9PV!1L9e66CovM|A7P8$*Jlihqac|C9Z+NbUy<xI
zkJkDg24ySXq|Bn6`OcHC@EEjsy;G!=;C`L*;$7xWHYEk~6I%P<KZx+c{E&Fu$PUQ#
z+B#^)Kxdh+s<cr1ISIG*_p{FqQ=AK)fb)!FQ2F}Z-c!YB*nBucO$wVr-J~m=_VDcr
zWkLyswcC7yGw93Exm*r;y4D><!sv9}^VIionJ6#!;o-9Qsn2M_2P90?_891=d|>a&
zb@oP;5%2bIbJcFBQXV=6=wYqG<cAFU4-Y(l;7BpGi;fV-FZ~vSTDdR<*0uXRh%-C>
zeGAx<hn7FT7>CYQf=)7!iu`*!^h6dny$_F_a8bKgp?WbR?=_NQWPO>G=2(Fm6CMYz
zVJn;rz?|WztHTWf$=WM%=Cn-alMF&p(vG>*oK^_9w}!A+<yborP`g`+`sI^3P+IK8
zMF|u0RlfDt*vx`Xi=hXCJ-CpGz2=`5Hki<eP57p#_K9TK*w<bwonhO%&xS&x$pqAc
z(5aOln5#YE@;@#{&I;k(|C#k+#)Mbov412r;*>97<B%zQKqCZOj${-dX^~_BO^VA3
zz*;Ol31LyuBvw7`5(5Zpp+>2C0LjMu@<`xRk;=Dd{t7q&tkH;b^lolux88?O^a18^
zTY7(UtcYhD*}&TXH#>;uct8&*u-zr-t6WKNWFJVC2h0Qaj~`)TI~~}Z0QGhnXX9P1
zM917dv>$%nPE%L^2Bsa9zQEX?ZjG*E0#GXQWM@*FQH_!4NMR}frk)_gL9HlxxhvUV
zfzvG&2O{m3*SJBj@YG~j`b`dq5mG^n;%;KdOLC2@FQe3<5j1KRoW`A@6!c+lHkv)p
z84pz7)FjKg1+g8I@Ys%>N3gXbLpw>CRKI`0)?R-6ydU(jBy5I+ppy$v1#yP{e-i#n
z7MpK-S!Ia@AsC%L3qHVcbBz&1y~g!4|M8jqJ;-8{g*<ESeiTq5rEd_3;b!CaO+6O(
ziRwD&jiFxePhj$Jh-#r=rBtP~r^Hz1dZj6XDwyUbvb8%B6=mwE*WjcSga1-!4z=%E
zr0zTpACUK`p8<PynqZGvU~w7wJmC-(6YVf#0<WlNrrK)!VR?h|j%+fA*-0k-%fX-(
zo)-&kZiJWGC`+(j$<OTbYdueogN8(R6JUai1OU`~xjF=x*b*6NR(Nu$+d8U15}Ck2
zw7}-|AjDM-I1qwI%h^~XFY&x$gzcALzP7vQBB&N48xK)TE9tr1Bu{&_(auD%PQqsJ
zo3JO;!9Esg_a8!tmP@ec)e2Ni+Y3M0hQBGXvx7!#iOP%lQ3(>PgK!P}47UH5z{5aM
zKo>#QhBFO7VENWm_tH!OFq-whc<dIC$tfstC!eoP0{TTLz&o;Ny&)jv<y2!ClDSh%
zC(6czl6^@n@G9HQpE=DHyi*a;@V{5y5-zIKe*Yt?rUzIxpEHe+Ys+o4g{hq~c|zxY
z()P7q&s=zkO!*O!lP(!a9xODUW-@&J4H)ppbWIiGZ`rpueh+$%)*nA}Ki_YTB>JWI
ze!ZVmmbzRAmh@bF1O<}o2Uy!>wQaJP%FQ-2Xd9_UC6er=6cYn>i;Uo=7xRhkdw_Kn
zFmYvl)>V5mJrYNApQvle&php!l!;NpywtNV1SNTY)tmN-g%J#&Xi1z9(f0G~;D;28
z%eOvRA(%YgohldK6HgoN>L3o>Y4=2ag62@v*n*~n*!wy1&{zd=1!rR_MvauR_IpTW
zO@lGB&y$(RyCuqTJYBZnH<p3MtGE7@ExeQA0f*j!fcvKmOD&<0C-w$>9&_i9L9XXM
zB9>1ebvmR*!@XrCIQw6_M~;Y$_J5Q*Ks2T7JNSPC#cUl2b_6v@%&P+jJ39lErwcu#
z+#h{!L3TNsH}DMfq^3lY&FX=;vk#R=hM{&&0EP=@H8``S2Fd1xe^tmuP|)G$6)zee
z3^IP2ju1=@8L!8jJyb=626ss4#JNo-Sa`_h+$H9`CVJfJff-`RpBX9fAe#><ztrJ(
z#ZLmf<`?7T_yyF2Mx(@UBN@Ht8xXpk_UH@{QQzp&<*Q9*@}s36&)Q>@C=4B(KPlXZ
zeP1DjYbyPUdL|<H2`d!K2ILXyl!N>q^B9KN1pAA1YA0N49XAvba&1A6bpLgc0B0m8
zB(zghjJe~@yEK*H`9t*`v0E0Sr@AB^=fA)L#+48yv<UI1&=h&vKel0I6JTQ6|6b%p
zt1L+e!SqwOfke@gzxW${^LgE13^EW5cA|{O7C}329!Q%xOu9w;(goz6eeB)`MZaJ=
z475LhBS;njISv}0_6mXdnZ3yOk|i|W7SoQ^z{e$Z#{?*EmmEg%yq2W`$i1dtO|fsU
z&Qb4=1yM|RKaiBB(HBQPhBcJ9(F{?qkmj&%;oXTe*(0LjK2O~vS|ynQNQHTVAF@#v
zZ%^1HMWC~Jc+4AQ?xAE(xzSwqbW=2M&`MCt!(wRz%rRF3OJ>Sk`ccqvD^?D2gjAbw
zQ0Ry6ub#1xe+8W!GAP>f*AG4M5U&TG&mMnM%vAL~>|R;MK)rg^{Z?;<K~O@>?L{`V
z(>0EXsAz|0nDwZeTi_e-IZpt4O&Muo9YuC(VQtnhwl!x`w8luwor}Vmh}o|+mo(}S
zLOPPYkj->c-)=JW%pE?53+t)!lKhMV;f;Nu&>*1!@*)+ZEyKqL-ftYYCW;Gz3VQfd
zA8@$&>eP@*Lkr1>tU!gU`4VI$sBYHpL}2<>j==(rVB5jU?mdZQD?CgTdqWjZh-gf>
zoXq<%bSG$uPyGEC;C2dWgc<_aejj2{#<3zi>SP_d5Q|)iU~f6h6m(afZ;M#W61wX;
zpr}}m>LAcTHg%heJb2c1u(TI|+=IuBH7_mg1j>#COCCVcFq-0?y?hMuJXf&4MY;1l
zTzVjQY&CyY>2Bp2YOuTWZoDAhaTnRu2h;^K)UfQCTw_;gmp-s}bzie7y0S%jF7O;)
zFa(^y+n~okvCQ4juZIEApC-~H9HJ?4kDS$gHx?wmIzweDE{X4GYT07H9B_o+Q3=R#
zk-}9t-VeGZx_n*48zBxtI3=vV@R~H(TcG1oiL65r1Z)A`A}{5sj``cm+3AKBS6<>R
zIAXOTJ4-$gIc(j4C-*ZOO4D*5jq%(5mgzGZBA$93+4_)6kgy|Vxw5Tn=@@y0y7KLy
zawgkSrTH-VDcc0DvEcXNV32bF{b`VSQIm5qjvPMzKXXHSi%^64an`zaM573hsVY<U
zU2v2fE3_3N8u_s`Ux=Zx`>|Ca^8`{nXXgEe%!bvZ(JO)07U>!2z^bfqnD_Ly&FVvd
zDR=KwB-`NE3Zb2uWd#+}<*p=y6MwcjWXxCh4QrZpC68S5vY)0gb?bfWMJAz@m4M~&
znS(kZH5WnBfuZdgfxR6<Er&zf`<Fpq#48w(x!vd-qVX4Gkaa^};HHthT7xU04JYO*
z@Mwj1h(zM-ULnA=UE=HZM9w+`U72dxk4+R)xnG}#oMT-DyLIyEOQ@=r=xQ|wBgtd=
zxINuQT`-tB$MP&g*H<uguTksew0^vQiy&|)3YTH&)&|w`>vWN}Hrq`IHYZ!2^1Z*U
zZSpvk9mLGA@37w8WUCuSR+-LlW`1rbNb^vn%x^QuTrcSre_j3a*~-Di7JvTS3v};p
zQovU)!8CggofW3&4Q8n5P^<J;CBITM$oLkugK93ETZoYCVvMA@-nH+9+nc!g*d$L+
zkJnAL>xUcYym{P$3E&*_A256uHrS{e3A~Rp+S{;;SDG9{M32<$OY<EPRd<S<h9&^5
zSj(CizKHo$ST>6IZo}yV;~~Z)XQYzjn8R9R%Ae94A)hSfj3PEs0+l<c(J@`5Blhp~
zTvu_GF}^E8tx&2zRcBqG6R}Ou0bC#&P6#pqHL(%^Lx$BDRkSMvuKFvxHg6EK+8Wl_
zC$u)^{(u(V%y^KN79Y?XEC+?6;H>3hbr+${y0#>z)*zdY$9zFInnI?Oym%4)bcry4
z*3qAyD66#%Ph-ur&jaY77>lV5sLHw{s)1uI0$t`-WFj>+IIv7c6eNI|!ia*tR`V8F
zvsVB)IRbbYm~)9GWY3oE41KxSX2m2&_3Db;RO82)HB<l0b-C(|F@na-&eu?{>oYw-
zKv03EO{Km%FlCr19YZAx3jHz9fUYxy0MC0QF5*fdD+8o(gS{fnH~<=Zz}yd~DW(ey
z-zGD41ZTCdw|E6zB>>D8k^9+h!rDO6Yw)|FFZG=WXpOoZ(V_O)vnxKOzPh;R-<v&O
z2ZLenH-;VpgNe<RPnJ;rrob46jp&V@bV^-`BnRT^J9;T-VLIL)w+G{CJc4)IFkraL
zhh5thf)_9t{PtS0KZ@j>Mm#uPxk!^dLQJrjdcf1Cc->saQ}GD|fNQGR=YYaE0@HIc
zDKGqUWfSDX7LcDbvApczcA*jlZuLc1sf&6MfOQRpV1gv7KoWo@Iv5q8znm;a<r^zX
ziy*BumQ*-yAFabmXBCDzvd*t#5-~&W@B2e5=L1Ql{Qv~?yuZT<BNr-I7yx59)+9eo
z2+n|H3e!3Mf3AKsS!z_g)7qy4T(FXQ@eHf<m`bw;5datp=b14;-eGIvK;a9I1zBvV
zpUeO~EG21tj#^;aC-3)p2j&)y`H=i}&+M%8(3<1NJ1M_R^ck0j|Ha@Nl7PdhS?<*r
zO9uGIvxhDp_U-1cG(h(!gZZ$2%-g{NnL_pIc|-NI00A7gtCJwWG04|gU+S<Lw9tY?
zIO(bH4|>q;CpwxZw>+e>F()kw#GjN3YbJr=n0qp*CR%^dX7Wk<-cKTCw^u>{4|ut?
z2#Era$jB<8ef!YsrKPrCfOu1yE;B0aAJeU}u%&f_c8I}eNP@HcrJi<7`=AFlaKY>w
zT@nM#RPs&HSL7d=5PdpD{oE$=X3yGBMOwe&9;5Kaw>EozG(F$Xsc_k2b-y^wH|>j6
zKUjaAAs8cWv${m{-$<WK>oJU8UPx(Ni)4V&N2KpwG+6=uI5vrGQM%UlH8_7|jOvgN
zp5zbGUWP2qH=O$2+@Aj#Ho|2})B%9Xm)Od3P{M}FYED(Gi-H!dFMt#bk_P@nnnD$p
z>@n)Q*fPNcOtqzTE9iSC`{?m!s-Ea{&^GNGGn7u^jt@69L5WbJ-<;E)lEgAg^;Fm^
zcCMaN4!Uw@?jDnn7WCgf4uI)rurf|@hIGIfToOOWMEnWp*MtD-bPp8c3PikR>7C(t
z>AKRbUlHsv*_xVmCic{2CSioo&`~0D@?yz!j}1SAizNIXLqzH&1;ydt(`usyqG;&k
z;H!>dQn!v%%6SX2ArvrYpbi5z6>Qj|9MR!7hV&9_W-<Ev6K3Pvf>75#t#lnTvaz+N
zX@Ly~0Pbag48TH$iePbPxr61*bYQjY{M;`n?C);Wh+bHv%r##FqScbwc{`r|DK;|u
zgEVR>&11m+t^s~4mg<#pa}tkE&11^_kLc2^aWKyMXxnk0#?TjbMf0I2EHMA^-~~6k
z%+k)J-DpmaCZGLMJeUuS1Au6FuCyr5gEX=pE^Pr9(pdZDcE&a|Ha2Wcs4UBCWUVj+
zc;Ic<zrt`?dSJVrqrWxrVPFj*8OfuUhAW*x752-I`jR>09utk~r;nIYJ}`l=OGGwy
z-5O;%TIsTq$rR4RZl;x8ONt~?c>?B_{SsjA1)=vhI2lUd9Z0Bji*{jMDA814D0rpl
zH&AJscVoSy5ML5~k=2C(vh3;QcT_Yo0v;!60MwE}SupOp89oX|-@(gn(Ih-{sr#>9
zfMlRQ7T`*AWdFNU=S0|_bEjzbh)UX-Jdka4In7nd={NnIg<uN|m)mxZUM|Tt@nF%L
z2Fw|Hu-O%;$_U1zetQj{B?njwr^^ag&~*1F;!*!=emzc(WICsZce?pH6twtC09tZs
z)G-S>g97vQF8hK-2nXu}wJ;+5{`}~-(uO<iz{Qe)zt}u7I{-_B&DODkQMO4$f<eUB
z8Jy-(@~Jhd>Fa@T3dykx!UVdKujvdLoK#@Gi-dbu?euu<a`|3=!3)63Ua!5&uz@sB
zVbV13B?p(T!uhhHA;2`@1RL{0YCkW_0%4ch`$`_y4FL(mnki2f@8miBzQDZefpG8F
z*T_~2RMMsi&y)n4lWGCIw=du@6p4`b8Mp}?HjLNQ4?yD@bSF4sJ6a|C!nsGyd4X@4
zd{QA@vcD^WC)fRWd$hugWrHg*F`3pL&{DK+F`cM+V6+kh3EN9(j~C#l<tz8JfJdV^
z0cH|Ru5$*sEi7<-HrJqk%@@8GpbkM#r39nVvDmaneSpxR1*r8EpRnhd@<h?JjL~jx
zVe}~&S^CC5aJ}3?kJpaTrc-4>?%a%r1~%^qqKI>}^B27TZsXGP0t0EREq<)lcLiz!
zrfa=gv4ZYYMC{pdroBtPwN^>2Icq0dauqhS@}N__32pdiv;1Q7^>=JATdDuV&hFqI
zP2ZP|t)$8rMvt+ABO0nEuL?$(gOyl~rDdr<KCVo@#vgt4&0)FSnbyUi$&Ka1qiU6V
zg_J7k!LQ!eSD-tTmLNUZ>a#cK;@FyU$laY6P|SW6f!(_a`g!upeuD|;{K-#ckI#?V
z2U7VQi6=kP6w^Gwgw2L!p$%Kc`F#M=;zt$?K_>#d*(a#6%`DEv-EE`O=5sNCvOWEB
z3@TTE<a1h!P<W~$94=+46}~(I16*74jX|*SZ^gHRDSG__p4VsIwBfgiz@xb*OeO)B
z*m?<^1OGkoh#0m#J=ofQRf?@3m|5k~*Cj|v0nEkCjiV!M>>aMq^TdLY>s`EaR&rf%
zJ|?+fOd<<XKgf6z+5p1^2G4h%wKbabP@Xn|9*525Cd*M8$EdIoSpa3gBE0P*@QWu1
zJk|6z(Pr%6lZcV>EOeAeLUt8eB@WP+Pm?%aNIzpw;}S%pR;-O4KagaH81Mv8=0F!U
z%LM~LliX4}P;EUos0BYt9ZaJMbYCdG0KM8c8uRtPL6`X=hhb|xMO1rwD<0#`rmkdT
z85bZ}<)caYNGu*9TkVhLmBSjCg3P6UfmF^3T&$h|SjqnwK;@?LV{Qj6uQ7!hc7?tm
zKO_3Gp;?|iFqB?t^CD%#<@jU^&2rNA`&@(bA`JpfrFNA?{098W51H0MnD?I!c+XF>
zv<5Dp?&WWhAp%xvBhWS}Wh$v?V3VLVAPeuavO&KRa({W1-_q37F8%qrzWUkD^q@n@
z45r}+27P^fY&v<uk8o<WAxWK~J5N;BX@p>Niy^Fn!24hT7r8@mNXl&r?obU4xJX~<
zkScIpJ!PE<EyqXUW%DoQJdj?W5QFYC$w>@YivoA@2x}S<SjSs%#3@SU+0?K+FQ*g-
zQtgp9KH*mlgpiil`qi=)P<$@V7N3j3HfF+Uu7Sq?1c1B-8tsGwFKiofAiIjfv5N6y
zg4tP+T6WX~>`ea-aRmkfM<#7iUeW>n{hhet|NH8HO5lBD)M85zm%!mS#os%h_c}4~
zz=R_0^?7Ezao3UbTwVF@^XyY`iO?k`eSM!cm$kh;lUvbUSpN4LoU~1JJX!D~*deHb
zA!7$)7SIR+j<cjpHo6?TGg)@Dm#R_r!S`Z`#iwnDvxo5DQI2HT(d+g=6c4I}g%)gK
z!LCvgX0C5b!Qf)^_Db^(vVas<Pgv0)p`W07paG+xPz%=xNf>;{I`nuLyGl(i9i6bt
zU{7t}I+>cVg=t&X*NgG)uxja6JeZ=lGxBvyh8y(Qe*^iXYqSD<4J@w!^E=dIFa;Ce
zwSanVk11)@*#`qtdEFlJkwUBTcWf}aj!c0nXk2`z!J1iRF`@)WEt0idPz%9W*96HZ
zZMkF)yz1Bb;>BnLL;aW{Yd_zptO0)q`XX`pT=#Rm?(g(gdK8~QZj@g$YLpbuSS5#i
z_21RB+?{#}$Um9BOo)Y_(#Bva-E2*9oY{bjMa^$8FbW$z0@Z?~ANPkx&M-PldN`xw
z1OjNk`XcS>H((CtnUE(&LX$gMSiVOJ5|0vKttSDIV<nTwoTuekMAVaE!vON|+3N05
zMjycC(gAcl4)B9=DK9L?Xh3f{Bd7ULJm7Xf>G#A8@E~BwzV)1admiFzpj~6l09ca*
zKto%>ukGwG4J5H8?akH>0B(auA&pnL(ZvR^N|A!#AtK;%RDiNh0L&@EW-0L9AkAk0
zQ?x;M=Y+n1!v>FB;e$e<F`yw?22kC3#$ES!be{`<)j61e@eE6fSy(@=kjKd<Fy+F4
z54t@+0k%~dz~H$yvo*1l;*S6icLF9&F~Fp$0&M!8At^|b2*%gf(33Lwd?XW~c=*UE
zCZ6FJcsfqkdPm54h20_sxKkx;G6!I?xmqt)<fK8ulxI8t6*li{6UU>1wqn&6I}K)H
zgez}fHo5N2h&44eja`6;62$o4H-qQAeEkHbQ7tn<ru}zyjedOge}8qtp3LVM1iF%5
zi+D(P0rEU)?R=4>zsgb$Xu`>Zx9lD#aL_XhHZp-TC5VXcU6s?$gcvcmHN)1Z8DL>6
z<l>(xb=755KLZ?10eK>!h-&#M@5BsI>>21o&RW-Ut`1)WxNP6gEOE0|zXu>QMn$yj
zi=_pRr{Slyrt~;o>%&1_$Pn_R0cyhmm~}9T){I>OdN5v~xdY0i;5BFopfzd_d=FwA
zc!o?Wz-IU)R?g4%W@Xb>IP1UxVDr5<rS1L~8LjMMk^c}%0=Ojx%`!u{**17c$A<?~
z$k%Ta()notiyq^&H7aB8n03L`h^OffoGvh~U##fpoFK6(v=IUGmtZnV4ZuarU}*Id
zyUAObI{W4LHlS;tT=s~icc&c8Kw^b~7lqY=)%v@$6ma@gmMV(+Enci~^utLPP@Lqz
z1LWa@ok_L?DKg`^Sa@io;|3jr+BU(~gw3a}6$0`TR=pH31=j8IQWNw!D5C8DWRBMS
z_%jXj>gxr%O?$~KL>+3yxQr)QdI%5t|M;ch4$sJW?M2;nDFY}CDNzR#T=r&U0Bc}&
zxYXuf@b2_xh4bQk(N`m56}sA!dctp&|L`G&kK<Y|B^kdHvgt6O(!mpZv<nnYx5png
zlh5fs^BLSjXi<P40%`Q5`($t;5eO#ZHDsi;K9sgt-Qd9{V5o`ri>u-3b{B`^Z?|MF
zt2`RT4A~$w>fN)xIH{whHa*aF@uBM7_$v<6-fX&h_v3e9z%20Upu&tv=9|LpNn8&v
zmI`b{836?w%yi)8YFA%*lt;bM4P?v|@s$lKTK|!(NX%nP4Q2-=7r&KM);mthu5|Xn
z`@j0^c)nkpYx!$24~%68AbfNOqw7-7>AK}&X@Z<Kf9i0&y?I&9=lHvhS^I@tiC*Kg
zk(?gYtd_wP=bxUso@dt7?nk()<m-_8g?t2?JlX18br#>!NBv;X6SonFg24r}rY5N7
zk8U*=xbDxzZ~f{4*|WJCGB9eyT!&Q`;4|d^7h7)~R%O?H3lkzG0wUchBB6vJB`A$S
zcM3{NiF8RQ(j`hOAl)ShY(iQ<x?7}S1JWBfb3ecLdCxiD`Tp{{1nza;bFDSkj4{TF
zef<yxolqTQ7O_n{pE0Rs-`F0Z{aJx7+0r)ROycl9DoP!=N_h|7n}YBVCV~@a&Dve@
z`P{IBqP2^S@?2IYT+&}{O=<8NiDy@5x$j+7tM>?y+E<x}?q*lS!?y=rKyHK;c?AkU
zWR==KONYYqX>-gwogf(`iCq*2%KcZMxHgYNGlegRFd^y#kJvgRXB=OeF;Es+$Ep+f
z(zH*noRZl*--cF%{X&sID#gxsr==&*5zI>*g1QOY%c*ae4}y12ff>Nbm+jbOajE&U
zDoMy7@QRkag|rg^cmb#zZ3fN7FPbPg3XMGW*T^$DWf6L7=|&C<hk&vfNSDSYw>AXE
zoz%f)VF*l}#r%t7{r1omjG4;M&o847B|BXCkXf*A=X-L(IBjy~!eCiSmdW~46VN%?
zRYGwnL@9)5;pKyExaI|1)>z23P`^7L!lFrhlqUHEz?%7#h__caqe)p$lv>{lX&iC)
z{)R$=LRUgD3y<C_wKr5oFWyPLQ^0F&d3&L?9O}sHa*kRBm-w1m^i>Q$pZCd6B@|Tx
z4&m4Cg;w9r=AfjZd$Z{TfBfILX9BJ117F$=h#})Zb0N26mWWoXBZTZNn|wqa*Cnr>
zM0O?HgDnYz%7CLFi~v75%Y8WKK(*?a-?W(yU};I>ZVpcYpUVT$ylSdma#9c;ATV>U
zHIg`jnw0t(CD-4KXMnzc2JO%fK;G33BBY@NqVEg5hHzT#wU#t!|1$diRiBxV3!hN<
z`LT3pib-Mlr*3{Nz;QCrRDBT1NoQbUQ;acq*809Ma6j&5#!!v*yIDATI|j0xhp@5U
zSp@#bXmG3t48LM~#N(mF1%GT7JQ7~$zKCPoY2RHN4h1@_S*}j$*XuyN-t-4d6fO-e
zDFGaxBz(msYQ;tZmAre)7dJt+C;I$MKEKA)kX7bg&LzjSq17Sk12_sh7Fv_opZ!pn
zn0a?MPZs1!qF#=LQ|thLSa}@ig`xq3JE6{i4`YbGe}PhmMau*KmhY~}g<KPf`Mov$
z9fT}i0o3aqG}7etDj#eGYrIedr>hBQ1&$Ok*Q>9JwDXS)0n)^6SgR5AbTPR&_e<8*
z32D>|kX1?%8j1gS$83%9<1-^D9I2SjTMeEkBw@J(BoVe2j~6x)u+3G#QcHPlW%-dA
z9fO@VsV-11a}m>xvD4nABQRfqMV;w$W*ql1N3Zf_x{Rt@z15(#QdE<E_N_wwQNUUW
z0F9WWde$eO4Z;>7KZ{pr^$pi8L0yCo0*+Oc2!?6Cy+4J-ae?^W)<m9iYB^}i`=1_p
zwnrb+xBCMX@>DZEpI<5Az3#R#rrV`XkkmqDaH+<!|B-Hu)8_*HYU4A81<L2YJFM1>
z-SYCa`Esd-i)SCWOEbGg2NK|ClI$9MKj`aX4R>s!LjatmER~vp1`ZExjwi|4hf=h>
z@1CF*_766F)fcW}ji{g*N6LB?IRKM`s<|8}w)qRbG|-)>+T=x?>pgr!=-Gyi_s-y*
zahzgoaQ;b~X<zVum7PA{Vmq-a_gtYc?;o8nJ}U6Pi?$OLAcS%Ja2)HYXjcS1EsjmZ
z>^$@36X}?{ZYip-i0$PfQ*Vx!Ee#yVRjBg3H>UM})$+39o$=J3C!BXs?uxpGz`w?(
z!sakS33d4(30f8bp9`F~WQv=?M6?`$z(FI~ZuP@4p*1X%e<Yi;H&gEk)zN)S$%jPk
z!)mzukJ;;JAa^zITwt_%wcJb3jx^PeW|7tKrq4#hYSx48xt?^IK}hTIQ|<eK48+%k
zX&FFtABL#$IdJX$8wysM&F7CFel=HTc|KN>>a;rG4w$A;pD)1++M~F=*JugU?%y|#
z4}n}+0to#soe9My*POq;*V-Xe1tE(k?7xXx^BvRL_S{Yn!iREpV*joQ8Tg<|!>%Ea
zQ2=pyz;NFuy)J~$NL+nESS_<d3Dq1c;xMP<J&II<08o9RdYi|@<9PQzzvqGV*}7)l
zb!{U%H*?ap&@1hxEhlNkBjj$v^Q}KxE-ii`L#Th{)}XG5uCQ+OD}`6R=UuhDIVs%n
z%`pIw6T;fs4qt&bFtuhHSlGH<Vp<RUYeFd6$;H}OM6lC9yFNp1U;EhsF#K2B(P9fj
z`PxQTSjn!GM=xSN1RasT>v@|gmPvZ(diP31{g^sRuNe(8>2f{=8kO|X?dJCocmWtO
zcdyN6qyT?cKu4K=jOb31pq1M3{;2W(U`O)3a+&ZG<G^cW#`Y5xitZgdhnMi1=XgfQ
z)gd~nG}kU6!WTiKQ<rsc^-R$@NEOcl=MG^XbF)5_cX>;nwc?v_KX76Gd-9i>x%GYg
z-J5C-FUe=_<2^LIa~)9|*3`jtr8v*ANq4C1`JU3|M0osdLCuQy#)f#+xiwS=Hr!Pt
zuMG<I@ax|-e;RjL|B#EEFsRwhFE7BeF#<gt(L)<06iqMn0Sn5b|4WZj;KG#s;8)Fk
zx;w^!msmF@zA+ilKAZeT_1?Iy%B1Pac!PI6K~nHC!yC%=m2G@UhB;~MK>m@}$hn=n
zl!)ot%Il12+n{`C`(G2AoWqRxyBcYeco>&HBPC}b{_tl+b5gATZYAV;E&cSKj<0tJ
z|0sOG;X{HH;3d;85&428AZJek^e^6;4B!>9-Pu`JX)e3|8U3~hMC5!2yRdt&H++fN
zANPQaBJvr@7+Hthe*hKq)eOrr{|E5~9d4%3v->#qsDd_#2Fq8crMyeG5Bl!6bCP~D
zX}TOQ-VvujeXk6ZKz1VubVtY^GUM^FpZk7)Euk@1n2R;9Te0%=&2Qze1V7NL$9!`F
z5fGHCJd?Gjfvb5>B>K{w5RyL=l}QQ#m(LHiSmJ0><qIMB>;x$`<*9Ew@gl92sv^Fj
zwi6<Kp~nw24p#-#CjXGA&X^}32Q5?LSU{pu$Vbe8$`@xQL5TKkzbF5W#nx+$X2FBf
z5&QmtyHEBL!-8w-)D+6isnOg-ubJ^bIX2z1cINKCybg46dGfmqaT0UBi*4~3%wr;A
zXOs5i!}UMdl~9j%^4*2eHIQ#_j&RZjv!CYw)+UMl_7%_doMmGIS!Cp<pRbY20x(g{
zXXL3`ZpPv3)Vm|#MIN&Fs4~u28(;@9E<O`Un5r3s+Sx8qW2s^z0c7G@jb`enP#Pa@
zn}D-{g}}loy3}nT^O&oE5=SwKDcGTXJ4bbJcsop%RotFL3xE`{y(y>bH>^2*92u&f
zq?E07v9Yxcusffij$<Y9`y2fYHI9IHy8c(OnuLPqnp{vX^Za%G8xK)y8v?TArY#={
z+kWgnQfLT#(|{I{0ZfwcJ>rLbeAhUc<0zPWYEKk4aEK4{G(Fk$Dpp=dAgIhiO5u%j
z9@3UIL}R=xeqXUR_{u6TAkH-Qf~4e{M47nhIvXC51N=@D4;!F*(bsB85C8#D`9ix~
z94$mG>-vF;xes}F_M4+7`y>s&`5Q^IikITbAe`}%wTW6Bq2QEt3rRO$%k9jq%E&yw
z)U8c!9(R{J_<+fAq{7N<D1ge4Um#4vtnCs&)#8<=Wc!M%`e;U+xG^or5ef`J1;1)p
zS<zs><?xG>bv{L)Obh{8efk0_{g&PXxp0u@6$}F0IYuo@DR<?H+dIc8ObbsJ#bV?I
zWUU-v(_b36x6GfQnvGfljj?gBY8>JM@79>TnZTOYYOpQq$Yn#{FC*6PU#&TGCn!I>
z3Y>j`n3Fl8c8k+-A+rlRbfdAOD2(3+MHl}F4`+eJs4ftY$PfL01!j}t#{)m3;K$n{
z*!-xM?GZ%$uQyEMUqSG^%Qa5+1F(1knqqE$qwY|B$^=~BSAJgBPo&Kej)oDuj-(HY
zqot`~u?TBI&d&{5w%Rj>Pak*q_=Q`<?)jcyW`7qzrbrU5K!Q^mp<<h?D~BggM%Xe_
zpzHjJ9MN$pyr8=$u~K_H#Bqjtyd?S&0Z!FX=!j0&WC&l9iAL!&p5x=?_Z-X5l~7+;
z9@INt@crN;U|^f`$eH!A$ztH;Hrin12-|C)Z+6eme@Y{GBpb2?QbTzzfk>HB8C3K4
z$yyisrW&ii2~5%t?l~^JyO#42aqk0FwJ{=M&6*{!J_Df?Z=H{e6MQfyP1wsDpc_s@
zG-TM@@h5D(=2hni1moT#B_GW2Me>hQ#NEpbYMf*tzP<_2w^%^W<z58m$ViI_JopfO
zPZW|LkP2}gNBAO?_=7gO6t><1mqpP<`}PluorTt6#1Va&3dncq<3HnZVe$b<DqbAI
z%XN8II==x-E~~D4h453V{Y+i1`0(STyP8HTbAOrz5E0WCbSak#fG}@EBvDmKi;mTD
z=x_R%GRv_3_^~-TI{Zo9O^R7=u4X7cQAj!WyHT~R?kzZWA%-Xi<=&K`;fKLl&%5<N
zCEzMrm*#vFaym_xYxtSZ+9PfR5s+(}wgPxw+a*eHy%@pK1zE*#&BUWmWlAl<Jj26M
zoFdNOy^g)0YH<Uk8ip*a@5&yg`9A3eNd}zJmGrsu8MLU@z0V)X6v1a}QGas8iG3L%
zq8yr^PLgN4^^>p6B;#t`GSY2@fM4Pv$rq_#eIM@_v4=J%JT&kOeAHiyOVqYU06I<c
z%4w3Dm#D*4nFi)VB)r^;uT?=1rS&P5j9n>jdlyv8AceSnjGRx&e4M1ZJzgz?`l24_
z4o`cs$?R4X9Zu~m+fjY*gJmRgDVp`P1ESR77RVyL0x>28E5y;fJou2(mDMbA!4$9e
zbgk$Pe|6EsOh{F3tHA#p`!L7TWzR#=oC5ah=J9cmp;89qGrJnwg-tV3x5{N*OS&E+
z_M4^zj#D|RtoyUUOU*b2cdB$CYyRq!0(B_zyW#7fUbPy~14^a83OL_fq?F<V-_BQi
zl7zbi!X0c4FoWKPGy%GFlzvt-IrcvqgRgf$i_ltMN<P%sPAjn(XGgR8<vpn9#7R^#
z{nevfAR>ML<Ea~l-!ChQLo}1^+{SkG3Lqu}mS23h+WMLU0zD`FD8F@d7CBwH$Jq@5
zA5V2Y2uKK?pKjB0?cEl4AI#>91@R1<q`IWx2QkGE2rBk}$%<P>!5S)aSr>H?-i~4>
zo3^b=j<^Z?d5Th+)FJR_bEf`#Ev0bI0Hg!tN*nm_;$&K%oL_0ful&QX1yu<<!nL&p
zN~0dAWaN>ofcj^Cd1<O1$>p#s!wS|=BT)uGegpyL%dwJp$<GPdWjk^1>S*b}w_d3c
zBnYVFX@q=z!-ePEcF*%E=XFGCNmp{lcl4)>#=DfegMysCiZ&%;;zg$(+Z;6{EfQua
zJfE@v&nsG_0{X|CpywJ7alb-^<+(79i0971foYY6g)!TNJeD*RVwOh!5nY}55c2<2
zhAZD0x9d>sszQpy0fgyTAT-DQ2w4oO!@glVzqxs9zU5Tp^K(J8I3UQiHlkbM*7_Xq
z#mPFMmkiJx*;@rMX11W6amBST1E!F@)#LweO6L>N{0C1_B*i4M<GJ#S8Y*yzE3_j`
z<X<1q+M)3eZBlkq^q`OJUnX{~1f7i0Hk)gImWMV4t^R+<@+&>6_q`~!9e0unF+#a^
z#vFrmZc))~@(D{bEJ;bDyIcK^(}s0!hJYjBf_7azw=Qbi-1Y6Xy9LE!j{=;TFsspe
zlCPE5QpvtRajFPv<s#5s0j^uhk%Y%u`n5Xb0nG={id6)FgMlemMf9aNY(O8CwHkLy
zhciG0P&N<Lpry-3k_VIUkepAf8ZA?8$M(Og_xbI{@y~r-qpQ;xk_4-tcP9?ixf5xB
zB(P(HU)(}7U$fCw`Rm<MtUn>J5R&h>aQW{SHYVbanh)ms?9Q$BnDm5`vL=FZtnnGt
zlIJci7A|f?1>!G)5S)niM_#+XDxh!)ymjXZwmBwPidV7vOdkRuM4J}99{*={{CGFq
zn+94zdb(DMgLMOlPwzl|QW*^@I-iC9@TELLQd=duMOTRHop9(X`X57$zjn3R36@zV
zIyZi0YN>_pU{Ach^xrvbygff0LUsX}$KDb!oIM7$3llkSs&ibu9S=NoD!h5WFJmff
zzQ?;lQs0sD_3_yvj?a9F%RZ|FJ3vU2cZ(?_{6FpRaE@3%LMTzY%(w2b4Wcu&yZ?KX
zAx(KQ4(=$~(7RlFhug+XP;(~O(U@sC`?X0F^e+|w>C0D&5b-&mic|2p32-pXlaV0p
z^-E=@?_7YL!VXjf=Bgl~f0izqp!<JfZnSAd|7#~Hl|vN6Jtr-9f9)bVM(aVS0lTC-
ztoh9k^DPbMSNpfUO&HUmYY9zek~RpT<oggvM0<D0_C<RUy3j5(8m<-Vmyh19!|{8!
zzoQ+G$bY|siF0TDN|o=$7MTJa?q8-;&)8YS1y!R#p!Nl&-kI@7As^gr$BnT&$)+9^
zO%Y)8O}wOsAlLx>V`1PQql>7IM0fPRD)QSA!Hxq-GkiL|GZn6CiV)IKtH)PHN&s?*
ztMKFb#=Q~t)svHbeLo-ICRoQ+PTY3}nQO<mLlM0S&LaFktt5zxy2GXaKPZ%ZBKjZO
zyN;NBt5!Chdc6vBAf?t-0L^Ey4EN732CMBQcQ;lEcL->;Smdto0fSBD!sFS^S&^{r
z_r^Ty`+s^)F&NATmm`fs(u^DXgaTDhZ!8V4uP8BrFunQ$6~k{lQgLU5rRkO}XvU`4
z$yyTQe-nniaA;&){R#H`BscqwFVT&t<8>&5%R(RjnQr-uFKt>|>YI)JvEY`aLCZcW
zQDdZF6$FJ0)^329!<K+*?+<SO<vz#o*e$LbgVY7ah^ubrW0SPw`7XNoXA<osU2ajA
z9K#s-if_MYe-ZQxjttt9uzPT!L!&PkxrX+?w-Z#5`UCE*GhJF;(;iT1ahsDC<2e?i
z>z5Vdt3VJ#znURRIpMs0#@$a_wZI-lsA&25aK>{xkn0_etV#{R0q=|rLi^0PHTYRQ
z*BH)q+Pm+GE@dYC!mn4Bzoxemh2_8JJA>~jaxXbHx$L`zYoAzOOGec6C;OT4pIi0~
z_6_5_vhT0Ie&|sBax+r~Ke%ziUSq%VXk&cCz#$pCJK+HU?x<^M+IAjO*0+-nUufsO
z>h}bEHNH2|b2~|LCe&_II+jM`BOE-$i@cty2U>ne+Ok)5{Vxc(4Zf+Rx-Ik=X!RxE
z(^HCHczaaqqCd?9WS*P3hTjuu11GEOtjnXn*1X_~N==P!KVJ+rdhS<Y0Y$%!pkHfr
zBE6duygc~$vY;Z%I>~|(b<!EX$z`GoLw%s9q4|@GF`v-)P>>U&@6Y{<dolxq0?sb3
zKi)A96w%K=AzMNn?{U%5%*+K5YUlcH4dupT&2LRl=GLciYcT2H0LEeUfa+B2lxlu7
zm0eP@mtL)~=}v^_z=Ql!Tx#kfUDPe9qoqM<oE*PT1SCE^8cB9R1sL<f=BsWQkFSp*
z?muSmuzZ)9N%!r?f<<c0)5}-u+BhWcjQ#{dPIsCws7=&)y5_yPYG;ppo=_<~N>%m8
z4aX};-E&(Wnu6xM^dv^!ckWQuwUbYW)KpwDjG*L=yn4+i!~O5LE0;+FR-o~okVPmn
zr_>RVwBXFMTR2<whwi%7TYPDE54U}@fLzz1brup>s{EnAF#rh^!zsnGIU=a~ykL9u
zEJ^h%Mugpf$<e~hqVgf5r@ZSNRdy(TEtgW3lE_GISlq3I*!o(H+=U3u807lc=jZJ?
ze(W%pdQLi)an&%kB-s<~xuM)wO+LnfJ3zS95$}_<8!!7D#&DK6$o{L)S47g{&rjJd
zT7U-sF;Fb?IN0En4W$?<<k6DBd83IDQjJP+_v#Pz(kLtE+qy63R{2LFc=?+WIoqEB
ze(hz9GQ%{RX8V$-B*ocS8ccLBqw_iKo{!c2kT!`!wjEdmf7cI7pf;$zX=qqtlGPX>
z=(;6LQS#G=11f(l_4lbnt<j$gl-trXQJK(;2bkvfaPgKmBo4gm$Wkr()au-takd?M
zOP?r=#AA2da|pX=Yi-qia=*EFDLC-5DyBTluFGGfXtr~p@p2fuXG}E~=qW|p6niU(
z#~RqpHps7y6iTl)t28b-b;6OQS+#P{`9p4E1)FYy@r-H{CLwuAD3)95-i&W@Y!i)c
zzeFg75s^sSLZUDo>(7ae2S-Bsj#ctFU-U7X?O#PA>^z<xRraX#|GG_nbapt-|7yh|
zCRtqaXQJEp`#4b=wj%}XT-+Y(Bhy=Kx(ZQ2=DOS(jrauS&r^0vFUC%<{AP`bj$Yd0
z6>p*JlPqHLTMA8Gq}#r8&23c|58;v5S(}4F-tiQM@*&t+{-6S83^7e7<={mA9o*)$
z@5xau{`iX~NvFXp1$v}sO@wT$tb-|Rgfk|A3gs%8H*WbEVEdbw$YI?pw45J4uWx@2
zO@;@U-Ev1nI$TByum`Tf9&jOv)}$)Nm>+cuAAcisePzrsU3}+9iqoXssBTL-XDKwN
z2cey=_UW#jKev8W?>__h-R`pvkHPhyu9E@zp9s&5`y=A*HHXX)?{SZMlXJz@M5Pn9
zi!RN$7O=0q59dkWVn*$L&iMaxDRKk_hNlTruCsfP9pBE?udWT}cKwi>B)qM`6jrh_
zG!y{FOE6C5gdTybmc=+N7s*#F(57*HtL>TQzOkw)>AqX>J?q;y9bvk~i`W>g4tm$W
z(0NR(X({NAyGi}^d0h&D3MQ9BE_w%$SE0=qYWgMGl<T(jkO;NFix@WcaQt$PnW6+N
z1)%tkJcBR&oOcbq61Rr)GvauABAx4By^!eDViF)bjf~9sxqnZ}tKYfKtlg;ckfEPE
zRlbh7PETy6bML2F-^TE_;2`vPBw<*AvnE3em87u#Y45C#mxK^Zxr~;>WgD9A`@`*z
zY~vr}e8CWJ#4z0;h6^K-2$nWR3d0*|lRu8}M{&6kk*8xxUjNmc`j*&Ni+yree0^-x
z$J{Nk^3CkSDfQB4H3ssc?y3F;IV#d?PjFzRIG|lyiwPlYyDBOU8tPyy1@Xa*_GtpI
z;`%C2Dxn>%4044hGfd?ce5$7bpjrx|kNYeB^-Uqr@ng?OvN+RdR3*4nzD(&E%I6VT
zFQC1QqW6reyhcmA-fOhEdgma}4CVXiQCNoHa|;*`35zD->06%bOsI)xA1%$P2pThv
zrkq=zPgL{)XI;&7=!brf8U`T+@-lWHCAUs)jJeDDh!%TCa~c+o)3uWlFw&f{zsa2N
zefsEl(ffUOFsKpV^+1z52kUpTSxQv$`oJYaAgOm0Tb(C^Nt3{BR(49se*Y9WYw3%X
zJbR&Se5Nr{ILQUA1Xmvm`mq+o`9zkK+n=AUWj5OHm145o189Yd78@DLl{KUoasCF*
zN^v*Wv{_lzlxuo!KfElpm~39yXamIu9LCSXS<{WlQ7UXvf85cwCq<um^Oz=k{x`u}
zKe}P`+!&{*;Y1pca!IJuS=aGuvI>KnAdbV2(+nwK+{T9UK#lV{;mn-OP1QH}9!DQl
zFrVW544#!5YS&#)@QC8Wd6Ceal{eU&@AxA&d-<C|&6JTj!zYS&Cw0LDRQa$2b-Z=I
zKS6KFYcaz*YW{YU2OT`s;_Qq*M|IGl?TWcOkTXX<R=pi1%8$<kKKjMJ>Qsh2Zv0Ie
z`C7C!O=g{a1_w$m#>n3B1ubUCKmGQWYJNDLi}_B%_AhI@r~GO7Tv9k;ny>>38VU?=
zz)BIt@3iL5ZTxtLY$<@a4cd5S>~}w$%JAG%Fr}${)+&e7EOHm0A?pJVZEby1{H+Ay
z^4Bv@a5nP&$R!XSIUzZbQ{#EwL3)hwV1ljtyxK(-Zk~~R9BW-WED7AY2E=<hxa9Bj
zU?5eozXxj~^P@(KvNi$gbMtTCzVCK1l04m5ntS<sxJrHQ@9LE${L6TKX=w$pE;q)d
z{cqPb!=XNL-GK{rnLlE6^^?y5<8rTgVTx~9tEyY1^ILV}#Pod2{$_u{KARjV4KAsp
z@GjG8<7|Ay0j+=@-A62u^0J3-{dZR~syihPr>Mpr*L9H+*iP3@5;fy_7lW%2<;7<*
zK=?eC{~Mi>j&*#Kqz9RlcK7IbXYmGhmr3Dp&=39NAgz`FI!a1Pg*})jktdkV#f!m1
zpXecSaW)vg6%Bz%7unOgn`Uiw?BTSdS!}9#jdkwb(@a<FRSvf{uU|5DHH_rc44jUj
zKp%*a6&K}^e8GB*Vy_3LNg`hEx@3~2f77piJx}|CCdV%beO|jcSWd{oFMH7>=QZZ2
zE<!@;2<OU7uAw>Njmli3f6LmGNuV6)yerZt2wMKa0CKk$NhMN^%Y+!mA^F!{pKm&U
zW+dG`8IZNGCT_8Nu{2&jZgd)1S+X{D8s*4M+vXocAfgMO5-UF{?ahi3wY)tZZvad0
zSn5c9F7p1Lu8!zM(6fI<Ex7WcW`9KTdMJU2oWEK76HX-Xe+$JA#x|V}r-enn+P~hw
zn~_B}llg3lcvRe@g4b@!X|-Yf3Q^UwfpXIT_#hhYlAiphk0sdBd?lZ7JWf>RE9q&E
z`D3hs(YH^?SI1{Fdd_}v=zG}hhgpB~X(0TCoEyeiofW*#<Jd%^pbU5x-M(9KJlZ(!
zuL{g)>l;U?3H7d}Vog?G@bulkXkhA`?B=`A;(h95?CJJ?%~^O}n~`2ObRYc}zG0T1
z-@&I*iD<$XtIKO$`1BJNAifs#8{mu-&*nWyA*NG^9X0k<14fkKC`Q#EZOmpq#t2JQ
z@Blolc6&b8eC#Cs1n>6|U*)J<z`hoS^%n~_gQB?dJr>YQ)aOEfD}#<kAnefzuCM)9
zN#U%!^A0`R{rm%Ww{Ogk{=rt*!$){sex>PWWMa5v{`~SSzl=JJ22k5jpMy%OS%ALz
zeiN);BCR_{_S*25ujy+z%af3_&#@U~r=O2GUni=<G|ZwL!%)@DDk1<8RV?@UAmU6Q
z?C(z4JXOk&t8^($i(dsx{a@eBe2Chmb|`_950|(|(Qp#&J8mUXe|~%|x#v<rV=<yP
zZq%6ZrpSE?PcNIH96Dru-7f<e{?A&^2un7Y-D#qHiRpU!vM^q$TW%rR)mwk#w&SjE
z{J@Z!Bd~S4eGxU^f{4Crm)K!bEWr#Xe~2j2#*k@qaf)1MRq($v_wQ7Gf?#{pyor8h
z>io<KE_2eUraWrG#Z_ntuB4l>dEIAC;I-3wiTcj70)Ris8F_9x-ZT<#vgR0I;DfjF
zV*4Ts(NINLMbD|?QROklfENP2W*w+LW7Idw<#dT+tIlOaHpJRD92*wZm6cziKl%8h
zwH3#Swm1ZgFpEcwktF;W7J-#o`DX|Cn&1qE?aDGR&#lOadOI<Pxsoudo!GaOM=z|#
zqB?^&B=Sb8&Nr*&yN`B!l@LA>OIZmFRaK=Gyb0)Ff~5Zom`G1<;?EfyYQhV{N>TzQ
zndK|I3~8)n9t{KkZ?dNHyAE?gyFUdEWi@A0e(5$BlgUMU<H^8xFmdf9zJo(M{D|Zi
zMgSOjl*v@rju!`5MBHiKulos6jo8A98FDT;*U0ho{Q(o$IoGt^U7)M{JJs`LoIGPa
z&xWY3|1p@LXEd2dP7a=F{>ma>8oiLzZX3O%1)Eh-bIDUF@s+R!_w%QSxNsT5pMK*u
zs?%feInt`xZaDu?8XkNJJ=G%hZk67!UnX2PzH~V?+iMWHYPftr(rUKYh4eCrzLJqV
z`zwEFGzT&xiYYB!4{+Y70)>c`-wo*iAp3Ih{UHgb^+NAAH7TQdkHE&QmjnM$fg2dT
zZKYe93NN{@5$mF>2tSFJW39FQOes>^rr$TSHIo865EOcD^3@@q*31VRd1&?3vZTWs
zVSOp@d<Tbq_`3~TK^^#*Ca$*iO|bbWFYD4HvB80et{Z8nxdW_O*V+33C8_P*;P%W4
z#e#}s6v%NHA8d@@fQ^}H;O#<bxb~bq8csf1-LvspceKd{6M2F@!zfpF-14)~e!|1;
zOPh#?f5wZPmbI0xc-r`OVF`(cun6&2XX*`g_IF;aqa%=BonP0?;P+|r!EvEK=gEUI
znr;XZDyO1k3Z>r?GsZ^VKcCseX*irYhYo*z&Ts&IUt!}LoLlkj)eiFyV8=!(!(Deb
zqXS?G{^NSZ;d#55r9rT&^skVGUy+8X#QU;C@sK>7a!keDPWL#BI`sOHR<4?A@KtYv
zMB!rlZl7P47wFahN+uMRwk5Sy(qSypW)Cy-*a!Wz`I0YK@)2|ik&-99n>cO9g*}O+
zZUrJ*a+89N3$_q}_GL;3B<`-|M=IBQ2!*|j*_W4sQ7_3@#FC;NAL;nGWeGe#Kr?J~
z61H8jp&Xa!X0o_@Dj@N-98%ZQrG@uIk~lrD_?%e1uRGn+22RIAbYu$>((hHjO8tG6
zfTWc$qu-QZN6DR%nZ%xx`PlKR2ntxjOiC-a(LU0eTj?j;oqub#g3~L1DDa6zca^2J
z`=hwjWue1iZA1r5E@wyx5BbrDo9YAUA9M=I!Crc%*DV96(G#fh$u6)ECeRjN*IyPr
zfv-#~N7xB~4{Qu^#f>E#A7oR~@2bh73?<6zA_+YF=iUkC&)-d<%)_qpytKE%sP~&q
zl@RN?RV>a0X>OJ4TTAZ-c<^5QJ4zAQpv4a_53EkZ40#7;RQm?buF!jm9iYZaV|SZy
zXKU*lU1z-m1np-UBMke8|0t$~Ugyz$v_RP2*_6_rxy=;)K=zegb43@)pD>4IN$=Y3
zDCS5dh@P$N%0G9&Y6(w1upR%`*FMkyi+^#b>)D0zdG5;y{(6h0p3Tvjopzp6*GMex
z5Ig1L`Paig+l&30HmCH2k)ia>%9k^y_iDETvF5(Hc}hq=eCxvj90vWDwX0n22jjS#
z&&3Oglwn47c6VOTqL*jCiEMc;tP}=si^CM;`o@ln({=>oeRj~63!fzP8Z{hM4y7PU
znOaglx$7O);jXP}HG9LAC1<#N@b};T*a7<}@851)HS=#+!?)_n)6iMs!JXs}Yp@};
zDXDcsh|tG_NtERmJje_D)nP_NUCcAGjeXB+q$H1j<1XPeT%7xI@;L|wcJ-bpvI%6A
zupQ)YIG1QYxzB0A|3o?MwBU@}{KbpT^1iFFID}exVKV`jiAt)PDtbv^%>;LJDe>gM
z?tc^TAeI;Qa(8xfF4!wYw5)%)nC!-rZsff2cbJzM_j*x=_T1+)m!cQt2^AC0HFq}_
zc5mXcwZ-#l&pv<5?vN@V-T<t-sCGt#`c`dznh;TzW-z#7A%kakO-NxZ3TnK>=znS^
z%uisX^`YqSULTMsmy$deX(#lW^*)QIMC&R?Tr+Fh=2e{3R;Zme5K8yqNvU{BK+zR<
z>m|{O(;%;s;=S}n*TuHY-PsFCCgEAHN|=7;b@Xh^eEh{?1{R~qxMeep4#j%kl!<nu
z9n4RaUnD8lMcnxfgNaVNz>h3O_zdJ}vZh!^+6pOid}n)ZA-yV@o_GBnkvQS*7i0A-
zx6Vhaj?Qz3RO5q%`mVQ-)0MYmgRi*D6n!sx8{Jd-k{12gCZ>KFLl|(4h4t1QJs7Z`
zp!{3`y%OVgZAUF}zCJbPWc2=9KS>zo>~ChaxsGEbInDn4Zku8?<}h1>3i3?nHnVJ(
z#?P3S!ZH+oz<T-82zN4&p<PHPx%Mmm6IGM5yXf>fK>ABaEets<E2s6W0(@Eq`HQn?
zHCW;|m&S{z36dpKctwO?4`kMWWT|J(*+Ihq#6SXxBbDKr&IV=AWec!ooAFDThc`vH
zV~!WqGM#z^23Bi3Fpx3qHgM(RJlFNJcx{GQ(ENEjL^<<Y944%nR{fykp9!P>Z;Jol
zI-3}dLtyV)MxX4uMyRr8G!jBQPa~5!eO#%?t6N>-_li;H@%s<&2=k9O1eVD$Mr~hd
zi5l4PV)T6~f8_23phKSGy4@TQp<yd%K)$d5o4Jg*l?k7t{p@PV|Kqjw(Us57b&X^<
zdP~D8-CCX{6nr$4+T7tW;xM9|F6Zx692mQvJIkd%@s}UhFZ<CaKEC?DX#9eqk^2%U
z=$jPrdYiKMf)`#-E;WC;3z=y5jUN=S)a~v(66C)GpUeENP2aY#@#m*dbl%3vhlKnl
zy{>U*XWm^SoW$I@3^eT`qF-X?DdiI2xY)6rnO9-WeNE|A?A~g_LI&awE)`ZwCVYLo
z-idZgn-l)eb4My$3V#0Ue^mZW4*$&-C7fV*)@%v=h-55k>Q7ySAU9UNRMuTWc_gYF
zH2+e6GsgXX=<d;nT(u68&rj$j@*kw@ABynxh|*#=nzcRBAS419Q*45<j|@QYEfkD(
za?ikg6TVl*0~%pfUj9l}{Qkc&8ajS39VmN7%X!;o<eK}%nSDIv(bjkSs7d>6)}5V`
zK+3lhXDRE3{EMCmFnO|F+I-ing)paGQNO5h8bj(02XX=$FvOjHiyWi&c0gEG-z8bK
zGo}^0EQC@v8;;8rx<ktip?P5%`RQP94%CFhFH0vW_(JTM5XyTx{y%?mFX2Gor#n#Z
zDYk_3a<a-w#-q+6RGGsU6IsX|Z%rxG8q9iHb{)eNp_=Jq<BdT@NHFUytXNc=7g~qf
zZBfgKYA~Jriq|6d2%T={P1$n4K^u@$GU=)TIORFL9VJh+8yI!#*IT^)QNlC#3gdbB
zF4nBe=sv+4LJGk?!sOa%_E<?*#bsJ9#rPBaHkTp!9xG3Q=jUVBS6ytvhO8JEFc3R!
zoX1NE)J|vn3EiQ5=iNg-C3j9C0Ib{|>))M28#bG=I3fxi{Nne$Akv1H`S!!MQ4U>&
zR5hA2_D#7tUE4%BB@aqXFWiW)ZuDZ#zOKKewBOcx#lBA)F12~yfhx^)>Z!k@R_<tt
zfm#S<>>A2d<o`FS&1AG4LPvL-_g+?ry1~+HvS}0hH+)l6hc`%M5uh#oge4LSqHo18
zkG*22H~C6bKYzE%<^3e6lquV{tCp{8e%Si;rE)?LVk5SJxJ_Z$!ynt>_3J9}(~aAo
z#9V8pocqGjiTt?j5384%uyW))E{!$l!oWRhiJO605C?D{yOsZ&KkLbmi#Y!&*u`QE
zq*P3DV7pdEv924zaQ%~D7kjv9d~9u@(u%H2hs)-)m)?iR<HZ=?)Cy6UAd{+2;FZ9m
zf8*k0l*j4f>;kXGZicEJM#*s9$S7Qf)05UQ=^z1VGw7c&ehhZ*(Pl9sSt4jmI-c<~
zHbi0_YLbAA;88nqiZ~0uW1PbD$n#8}?=c_L+z*3e%REMyS5MOwj;+rzk06aF@ER>V
zRX#u4{vQ7}Ifx(-olaaIc8jD(?+nMGFTY+i+h4!x5%=m7l9nC2=VfqKBP8;>Ti=bl
z`tJ8d6&X%)VgUN;nBArKQ@I9|>p$A-uilv7A<rGjsSs4+j^Eyo{$|piNLJ=(n)sLg
zI_q;TBkQK#l$!)z0M=p!{!z^NS#en4C)+`0_(ppEs13P)BOo1!L5s=d{O>|?H9v}{
zshh1%{!y4uSNb-@+m7-Pl;0GhLyhK=UV5l8xXp1uSuEcezl`6hvHHQL+CYV}m9lcB
zORM8})k4W%+Uu<BT}FZ~a1auB%^$Z?H;ft#_6(e*JC2|i<k_7sK_}?*LSBC489G@F
zz<II%-?Hgr@{V4~Yz(*Nz*P`DupwWY{QQs*og$)B#a!L;_bAjJuB2yz_g!2sWo=&O
z9=^SFMM9g>=c{+d%ja|c(zC^=sIlF155O{JtHC-tSm|$S9vKw;jy@B+75aM}z}Uh0
zOxwcfAonJJc{CpUB;0Ssc7T-NXBsgdQX9}lR_L-{7~@6ucVpuFUink5#uv&bArcMv
zS)t1L-QD9#0pXz$UFVzanO=LtyC~&t1!z?MUP@s6@Z-masx{p3$|Cr#^#~%KK`1U1
zH+YoYMF;;hY~1<b-(YjTBgP~uDFwcNinh_*&SLh`nyi;sGV1*FhT7)`)Q5<t$7*a1
z3U4ZE^4{=x*1bF@)h#y<<sLgIszVM0paXCHg8(^wb$B#f#j3RmrCDv?$GRDqFsno)
z@tS%M;3w?C{&HhF{vbQu+0EzcrAYj;Zim4A{P^t<;_0c@4{~*l3FCEE=eY2bh8M3B
z>68>>F(WBsK4RVFPyVWB1P@qx`|k2uCOrBEvu=1PJUV<hp;8_yV)nl)EV>W0nQpCz
z{oTI!riA*P50j&!I60)yYfk<l!u!@FqJ^c*sg3!b^GV#_npx6m(;M9AM2-bpTHwlm
zoRSnVcn^=94$%U1koP|0LDI&T#uC%n4&wW$`OzsGF>#(vJ=X-@v;46vWz6wU;`@Hz
zdb5<bzhl=XQ>yxV!94!CMat*!4lMolO6K5q=_8jDJbC*ID8z^zY}7~+*v`~FgzWlD
z9Rinlp^tQbHt^@mfjleWnm;p_PXx_N^*)@H8ZRtNw+X%e7YiWpyK9pB1yxUJ7Ccq|
zDsA}He{-`v7%#E-p1z<wnC6~ndHjl+$qpm$c=h-6SBxr}&j@~qRPBZOD#r&d=ngph
znSJkw_yooqW&1q-=m-3*oEM&@6cbnxTQy{KOcfqNFCTtaU)TTnph20Za~;;~w>Q0g
z1eJ5k*KewyhrRmV-<9!TvY-eavBoNM`WND<{Dhc-b``K%a94L1D_BkV%|f|V`^^%k
zD&?x8_p#{n|MQp9i1oDMt2G8Jii-zIt!oyU&c@3gH5t!tvyLf3!q;js_!S(Pkmk#~
z6nU@|RpJ-Dwa`<tPx{6CwW>*vSAwk$-L2`%TJc|67dM_&@xD$KTZg#pKq>sPMb<-m
ztP<WYa=Pq+vz)X@&Mn?~;rF<+PYiFMCafwfZEnK{2B?OPs$tF&6d3=G(R3!vQCWVL
zwV0vyRe%~DY}rKf*cCHj$S*x4k%Q8$>AQA)J8<WtS}o-DZO}V)-AI{nZY~~_|9B>#
z=ik$9SY~b=hBX_rL`XB8oVDI363?S(rk?58?mV!@k5!U|+x@JK)Q9ITlVJO8v}qK(
z-|uTkJw0kY&-wdmPaJ!E9EVLnK=dGW_1>R;wFIa__olfj+H$8wTT<_-oakmMRQ+hH
zeNZE`;z>Xueq(26IiP{>ny{_mQp_>pJ=QFLJfUp!rS3}iwJ}{qngkY}?`a8t3^y<v
z-JZU99wdpoQ15e6;lAzCO;mgJ^1R`1ry3AI|J5#yD`0CZ+w^7`$JsIm$Jt)w@a@D)
z!D;!U@UQ(<j>h<iyA0xq(8!S*1B>y+JG6@?{6zJ$ClD2IE#;A4)i=AL>A<oX_LDJz
zg}%s1aWP^9*97XfsSPVzg!7{(i_nQYr|N(3Pw^3Y_e_cC?Z9w0on*+rr0i!;o~x6u
zAb6j~=hWww?+2{8qR~)6OEWtR4%9B`>-T8i6v+mUF_R(8p!0c}EW&)GUZ_{;ml`x`
z<oTF@f}0Zxv=@}W_Fovj0_{_GWD_o$CpiGO$_;a$+rb4aa8))I+%Q(+1wT>)25I`E
zM7Tlf3ZtX}2MG1bzL)vKdIXu~2f*Vh0%|j$Y<je{VQn|daEyprL;AMflw&)+({}x#
z5>Obi%H(L_iuc<)1ms*DO9{qBskz_2DVLv~?{@P+v&seFmS^nytabL#oaBcKEI^aQ
z4`c)%p>>#vCH_Wf6zHf{R5cv`p}nbHlw9izs8Wa>BB6qJ_PgP9D;9`#ouC=%H33&w
zrhy!S{_knm*?2w#YQR1vRX76(QNmr6#$Jw!DS@-0F`2f`otb@x16jV$bJdD1rTU(*
z@lBLH54ggB`dD{%U;(V9hw6_Nl{@2(&D586{aIY}btwJzp{Uq*tHGzevgQTch;CiP
z?`*Vlc>CleLZ|v=N*LWewF6Ky6`pbkkg{L}(x1h1aQ@bkcWW>0)mNCdTt<sHH*k<b
zHG*@25+X;*OM*-!IA6y1P9uF$54UR9d27y3*0H0N>O~e87ne%g83<QA%6Xd!H$!fu
zMxCFXc9Uxxd`>(=OUnY8NWBqr#SS#=e*KZ5@j*kTj}H+SJdp39^Nt&qrxNx^?F7Ef
zX6ughB<$g50OBa~*tf><(7|i+uAcF*Y0$DVJL~g3SLeAq*I?{>raA3-x>;@Pbio=a
z&H`H2A3(G1Bqq(Y>C(QC{fvS4uVhgdrI7+%*6q5z2)!Rj97LbwX#t(Mn+nus1a4~N
zM4cGwSJ{3#A^FQYYUr8*!q>^jr0tx`u1)8L_vR+&C*G?Ss+P{ZjQBMSMSgZjz$)ra
z)*Rr1h@w?3x~UZAy|%AiLayx!JscyKOeE%9dlIKVq-od9AA*`MzvV9`tMRfd6Cde2
z|CrswS031KdOSy!*279gwQF3t8o4nbcrhGhK^=;rGX=*LB~k8d`G6E6p(N^L@ag@|
zZ;p$*l_AKaVi{5dq^>*6>6reUbAm@fqc=G?N+P2gyidEZiN&M%P?z;?6t&=r*XcKO
zm72B3Eq}A@|0T}lKjB}6$qp^gc;B;iV}J3>lEoo{sAob{Mz1C2wEJowPzZ+6VppPR
z+G`IN(4?4rf+#OAs<(%|M9&UR=8=k$vNF1sD3Y)=S#7V~M=ANGq~nc+-mOpyo*3v?
z7(%UwW6AwMvu<+W{Cq9HSP7<WnlblWyz-k#Z}Pps+1sf%w)Kg#zL^hjMr+k~DffJx
zm)BjfOkB#JsFpB{;rO3eyYj@p-t*^190O9!W$jsa->n&BD1uM7OvYUU<ecqJ>aML&
zL9<E8$ZZ}6)L9t+(TAR)7;u5kOWpfR0@yy-LL}R_FmBHatF#t))6b7iEIwT4<uq|U
z@?H!l24$#er~P5=`~-P9x!^Er0lA=6xY9!B>;g=vL3N|<i|~wF1{S7UIYx%w>Ueqf
zNxJ+;4lUF9>BmY+G5hPI`M;h9IxKhuMxQU=zfi57pLi;HEaN_ogrZ3Phs?}O&TUP9
zES$J%LemdN!h!FwDCT$u#oOJ-m;0{T^U*cS&QHtwCtN?1TFJ41RPrzj#L^|x+zUim
zQP-<XvgjJc9p<D~L2r2S;WG6>a(p~HK*%YiD9#ap8O!tLc|~VBlCzDRZ&4x+w6C-p
zl&eh9_N@=!+ekPl2f1d0Z~=n4Bs}-lvBj3lM!iV@zOrCE8^$Tl%zo`AiSg;;boN4n
z>fRcz%!635?K<_ZPg1x#J3Fll`uX~z7EmX_J3pLDUvKK+gaTbb&CZ1Rl9`{yl|II{
zr?(~+uK1~x4<=B|q?{j2+T$<}xNV)o<CH)EelKO)t7#iX%UweT8+$!!Hfwmbs^*{l
z4W?}Cq@SVfQXfX+r1p+2;b^1SASmz}OychnsLO#%^Li^dM;O6C@=2tN{c&wvtSC?T
z>5<RD_}i?<Nism9Eax^7*S>b`eW#}Oer%%9dIwZGdF3NA6}vxd{?5%diiu%&HUZTF
zM}DgTw{gUMS>V62)q5KAL4hm#$B*k#ow2?jAWT*_Jlw1%1JMC4xXL+0^Q<SSI@)ou
zA-bU8k>aBb-lyVsbO^}#c^yY*c(R|bGM<uSXcyV>V2oh<L~o9-?Y&=4_P_ni&2y({
z69|r{u1!DPTSxiQ52inR%S#QyPOIgUwY*C~?@jJ^YX;}z<c{4Nd%C)HiCItgddgC~
z_lH$lT_1Sz+lGvr#3KsTj~b-^Qq3$LPB}+<Trk&KfZY8X9?lY0F)@AFw0XlY2a9c=
z1^Pqqmms%wi`*+C?HxlRG`3EHce!)QL4LdIE{blO2Cz#RazQ0!LIRV{jm1Gxd~B#-
zZhCt`b<7)$e2v(qr5`_Hn&UCP!6hIGAW`&V#K1F6b|Y<`3@z!+PW4batFsv`;u<3R
zEJyR|*_-xDR~;G=W@%UEvVf;f;B#l(Z1?gntT2q)SdPbPQEw>KtgZ9JQOFhomQvSi
z_gg~onJ_VW`y_o|#{o8WbP;n*9*@(=*3Y<3qjZ8u|1-WxDO9p!qM{r+&4MJQuraQR
zy8t<w(n~Ns5)ZMvblfVbyb|u<SVsa}p5k=sDB?}bAdF{zY=Y9?g#PvvBWBu6`L<w-
z{ss@Id6s8TQ$~XuHVC>0L-Isz$51Md1HD>EJ}(NGx@b|V#fVxAqtpp2B%!@PMT-uD
zgonmvLU3>Olv5}h2S*p2_eGK{82%-<5DoK7L<&FNbl6o-e>I`xdmXqqAHZ?{sQyo)
z4a$1HQ2)h?uP1JAUbxNdY~Yq{Yt!0r+s;w9l%I^~+vi%)FJ_aA3s9X}q`J#lL2Zq<
zPvS87<jzURG9Jdo@7Sj&DQ)8Ol%g14KC`!J-8xceg?9AY$~UsuJpvDohL?F97aXo0
zjo9dGtyL58-jFJx!0^#7e)7R{>o@_5da6xUY6IMGcXcqdwVgq0;wCi+X01-uyoH`t
zcixHcEFCwXDdY7j*E%8Pe!ezjytLDv7Axs}&Ww9C6RIJI&dB>dp9p;rXRk9W>?T#W
zzL`a3em&=D2_}jc+oofyo>~yQ#ysY8^xdbgO&#(eX>=7JXl(=&4z5GwyfYkJf+elr
ziI1_7KFyx}0@r?`*Sa&7$zN0iM#qyhNkezZ9p1}-g+SbFnKjrIhvI4J<9wh@5crke
zJ_F!ruIH{N-vTY(w*Aix*sawQ%}e&%d=2-J5v^i*&^feTCz!eM>-y&Ww%hE27-lb)
z2m|t}+$@rQe)?&hHj*gNlBo`N$GI*W>2tRAecODd&V3hVJI7}+CFqd&JOamrTaxtx
zn?eiNgo;PQHqHDw49&&T&1HJt<PKOUxW!%DeYN8EJv?Hl@t@-SnycX=u=JNFh!r$?
z*U+3Qv{H94VM9LUK~Z3f6^wc5WAYAvb2o?>w5{<s@{0fbIJBc0*$I>igf<_oTX>~8
z^H@?+nm1lrn{sOc{mfI0#w))t=lTy&SW_5p-Gd|+vj|Hbe&j0J=Y^vx;noSBVLA`N
zwO1y0FgGzgcATGzIXq5ZoS$w$s2EF)|LI90cd+UFh*S-r<TA=^Xr4J6UMfs^opK<J
znRL4i!xn>vASTn4ub^ryYF3BE8iNt%C;$)R`eXe@iPXtDam3LWz93wp(PGm4TY}8U
z$^|3wPO47sC;^35$y%g?#YQJi^1?;wNu&h@qp(}=-h@R#CC*dfiq^b_8^6Es7>8KR
zxl@V0%Ha<}(w=+H{l!=P+$)qA_aT^`k5@d<xO|%i1960elQQ|Xhu>=b;S6U0E`mA1
z@OAu2HJo&5+`2JrP(fAmJ@g4qbN|Cku8N$nVa!Y8GfZTI!tu5NQtb7{C~$bVLn8a|
z+kJOhZ?FppJe+aAv3xk%QRcj<sGIgm;e_bgP4!M1i~?MgN9vx<u)A!4F?WOMljdja
z6kjDmKkdCoEdSgS!x}XqC5=io9usH~RzWt%0oAN-jdhpBL{%v2@L9TP-!FWVyq;=H
zdr-<Jc(+py#OgeS8~|{05{pf=3HxOyOqZv<V|mjupI_WiD??#y+UJYfdFx?pjZajW
z^lf>xV_Z3{79Vl4(xG=ns}Ua=`m>nkqZBXCPjZvr_z&EyTAYL6w%zm|LZ8yEF@W6~
zta(29Gl45^qCELDP$CqA)$7+H&b%F^6rEi&*%gtVZb=^eU_P-r#t@ZxN<QwfmGz*&
zyLI#00nah&vlFf&Lx0QRDF@4q-5_{s0V*YN=l&6IT@ffuMsbwhSlsK#%MVkj10~i8
zDSRXjsm=T`kn7avgiNh#_ry<+!`V|G*Bwcusagh}V*2H@;2jaYSo_sR!zgw%E%h7A
z=PAj!Ul0aQAT^^Ah9NhG)r^Ci`%Y6X51pwODk$AkQrj)~7ey*D4{iPn0UzHI%3gsQ
zx}s5WasUR(9S+zC3_3Sft4*s7WI8{2SK5z*)c@u{O3nXUVpv0OLK^xaHcLwhvzjwT
zaS&T8ogHshCnFz8-8#QYyi;l>kF!cgGK1>8F`Juq`eqiNtgV9&@zce1IU{l=2JUa_
zUg^J2J1whuQ5Ao~e0^7NRg6<k=?|Mgq-8&?PrzyzOn6m0Wwd0+cmG0R9j5&V&O0_n
zJK5?Y$n|<-%57b4(Uw8_4$nI(N3uh-4bN|1cv8q5n)vcpZO(ZT%vje=mbgVYSoAH7
zKjopk)R0?}L%dz01rf>Thi|VxY9Whgu-cQH7ydZg&|#o_6dtx^E2MyvjDZ+S!jkHL
zzwE<UM^zVEbzg|@$Q$a{<Z(wyKZQOH7+r!SNPWg&3frEV<5j_LtTyDXtmU2Nz!IhR
z2%FPr)OfakAWZft85gSg@>n=`UBi1}6jtzDe_LL`@)s=e^c6B&2_MwAlR1oJCNC=x
zp6L=xHum+d+$Po`ow%a&SNtm0d^ZNCI4<y~zGsTo&r{}a;$6rx?yb{GiXi*vR;Oj&
zeWSqi`}Hcnv3Pj_^TFB9yniHBU^R0ceWIE4SF`pAB{T7hBc_%y*55Rwcsgh8-skTR
z4Cb-PL$6?1d6}C!Vx~%87dZSQJE<x^Zb7oft@hPg(P~EG@a}siV@7F120T==O{n*G
z#6@bRxc<d)-<i`~qh!tDjJ(F(<>H1D8EK?N+FAO<)k{9|VQx>|V-e>k)#)2V%L_c+
z5uIBu@>$Sae%2;7yeBwJfV63=`{M3W`WTy(Qz!1n)ceCyt)I!-D5CeN$WV8RZKfgW
zf9A0D_3fX>Zu0Jwe)&~YqHwEbrI6z3)!oDTKwo)D5;v!L)wbs151q5d&9~eV=g(SG
z%|x!0d`pcY=`LAz;NT$JVn-!1@h9OKW8P>kKIo4~NPWFk$d`qIz|iVj)YyIVq%VO&
zOJcqfdx%AlnqdeusdA%K0wv9cspp#%xcoKMHmoq2SnBXgA`-NW4nAQbbaI1u6mFB^
zp{!Glw~^chBCMouVY^cM8+)v>lXFD;cyhci%y%UF<;uosKfwgHmwWIlGvC|SJeiw5
z9X41_Wj7VO8lE?&?(Lzhk)bk=kN1gE)mQu@FBMuYhMu*KQPOjwJSG{>uSBT_6bEYi
z>C)j|=HnHVr@G%fRY%A|MM8wZ$6L{eg(vA$lFePbth9H);ze`%U9h7~;)PG994+dl
zE_0ceM}dJrPOwqrQ>pz+gow`0(BH?)jN0>EJbJ&)F#+wIgW4bE=x!-KNjZ64JY?_q
zWa6(VW0a(vp}EkJkhM`$GA41xpUqZDbz7I_x_lnDq<h6jv`;hUgrg2w<|yahyE+@;
zY_rx)R(M@)D3Wc!bxYkb_L?-=8E_5_bG@&M%d#CML-;GK)x_2v6ZPyC&F<SHE>J($
z+QxRv*lzG-njg&xd64aa{|}HeOtUcrNf?QFaKc1KU6Sq|iyYWbPTo-^<|D@#qgC47
zLoIk<5IsmHO~e*y;Sw;>zBNV`pvZ74ey7uno&3}nyR`4hQR`bwBN-%$r88b@jwtxX
zRnOd=asbij1>MBl!u*@68&c^lMf0t(C0CkQ3g5-#LX&B6{w(vMalwJd+aC=(ZM0n!
zfuuGJmVxV#u{oSh?5!6UIm}a%Bpc6SgqM(4S<M^$VhFwZi&3#WDu`|$<F0Cn+nZrW
zZaStP6KQ82Jy#p%v?oU+-r<b-Azvg95=K^QA;p4-*}}+D?TMnI?@aB9jzxNw-u|7>
zBZJ3uOekqk>Fnb^6m68@X(i$7bL*OVdQQdnw!>0exe7XNRBxh??D-)gK?6^K|6#`5
zqgh}VcfBLMpNr5R-uwQ#*Yuv_O5k5_h2IZu2av{G*2o^78_jWVrL`?}!P|aa=UJKI
zoqPr1Uq@LgM0n$WQS}ynQAKOGFf~d@D%~L6-6<_fND0y)(n@!ybeDvHGz{I1bSvGB
zl0!FsYtFgn{_gh=%$~jWTJL(}2_mEKg3y3NHo<v>V@9?V^iKR^Ew&dV5$Fod!4U74
zl0W$w#=|I8t}pJ{B~frMnUTM(`&}P4tWLFJU-u@HybvUWk>_ro`{f~^aTvO93w7H|
zc3i}@guzIK4(A<k&;@aQCcQ_WhqV>W#e@aDL!IrF!PJpO)S&)?mxD}sk{X5^A1KCj
z)5Vd4#uC>^K>X=M-LD@PC;XqhmYg50Pjm|8(dgp6hzq^M-H4yzIU)y3Yb1gI!t<9=
zNIC0yhz7=;70Cl7z#WPi@kup|<{qDe*^dpK3X6lq509HO59=VbPs}ZDXV%a#hV{}8
z5#i(%VzvSK+WQrVEHL2&%9C?^m<yDBGM*RPx__pv-3W`QtONxtj>SQQT!HcjsCna0
zTqzv|$*!}IAd_pO&In55fH{jY|3Znoz-!I%Gj&Xb6DcGS+)0Gk-kQi5GB?L{%d7J-
zEBx-wm{r6`o-9N+!paMT_kAaKeh;GxdjTlAZYy4090Y2dK5i8-;}?7K%mmo2<CA?I
zT?yFOH5Q3a5BibjF$K=!=X6pG)OX&ifCei{zpc)9WePTRPHL%z5AVBPI0gh8N*+C2
z{d+|oI#p+_g5MlzGp=BEu^HoA!aAW9w0fZ^|MKv7lliY^>+$b)<w-s3pZ|%h8`fi~
zf?slV-KJV2KaR01RC%og+9vM84=-SG(?$mcxj|`O^G3fsXLq|Y6+s78D9;H+r{`OY
z&_37aEBdK-lPYXa19zqCEF?}cD}i?RMSGtwu53XR+T}CT{TDtVA1HKQ_BKF72g8<n
zTCk5_wUQpSo_*|w7T3tbqHuizXCqx@BoPa{Ir4u7MaGgG2tiUeQb`b4x_Q(PQd9kK
z=`nSX=MY7a{0%=mHR@vK7r(9x7#T;fDXTD{d`5*dYXza)D3bWkc9=uJrtQR^z`zzV
z!ska~RYQ<ORSY1+zO$=LkDfLPN<oxE6uhqVzrsV~hJE)BqFI>t(tUDpf4hG?+27=c
zKv;`$EL<&@yXHoAAR$-%GLmg=nbf9r|6DVL0!FH^icmlCWJTGH|0K5`DUcxCPkC?-
z8X1GmDvfZ9nBe#E9J4o@z(bidYd)(km@Ja5kNmRJI?9yKD>#YvCvm^gngyya^1-I*
z)9;T6-jUdNnFv-VYMU57lTU4kbs9f^KSjzJQx)xKuO$+&`}T{X-`tImOoZR#{7wz6
zBQ-hpyhJB$<JYOfCJzq;ql2iFnCCHeGEYK%_W}r>Y2IFS=&$(P$^M<S_xs@cAUWK1
zTp%k?=e)=TX%FTQ(|;RdIGwX!iFDunDQ_3w&<Mkxj^GeM<J!FlMmp5G;Vs^*6{-d!
z9q`1ZLW6RwWb0JQnBhiqpK4ZKe{~hBcQRh-tCAmJj*|6a)u8!`>0uS81!G1wt5PBK
z@VtC}jO6BWA{GRZXGKEJuM2R+@WE2yAQ-Q*Cym7-oxmdaT7@$r)*<!pHp2&beb#Ak
z-iQ%>j@*Zb$I*iJnQWlyvD9@nfqK-x{!kHa#?1+rvI37sGS09;GUHAJ2LgcTFZ(W8
zdw6r9A*MbEE#o@9)%_m-p}Zq@YiN!(A&^@%9!8q@y)_kmD-WSYOz$6<5d04k{mSzh
z4t><LiW@DiFv>B<Wk=_5q*%2e^OG#YG9#05j@I1Y1#h0arQ?wNl!lC`BaG9Ikz8uy
z936X86uhs0|2^|lh~b~?q^yL8CiP5^#zqMBQ@l4`x~Mf?9NMgQzA+vqvrn}4r)}{w
zGY@W8h(tvkcl4=i7J~k{L8+>cgrgI3)y<&P_i|dk^%OZ^_=FIA+Wc(&73zhIAhxtS
z9v<G*ijE|T5#b%#7has`HK{)g(%v{qk=`tg$-(HUKM*dRXlV;O0(R9F(T$SdXjw5V
ztP`4kxs`c~>J&x)<A-rSYWAq<k5F0!J(mhGCenDUV$-N0jjw2HSdJWALubS8y-Q!r
ztZBgbLjnSyf#9;`@1~3+0RD#>7_<rOA_{&XLO6(TwjEY_s3-WkSO+_6^6X)HFLsuL
zw0b&S_Ov~AjPbe-Ryn~N3P)W$cH;SzOUTtu?7)T*^O4-aC~lh`$Zq9{!X(E|qyV4c
z7rGA9VhsmdG{peoTA%)qVrDWO4O_~O#nqPWTGl($jt^Hxks1}YZ(!5NK5SyB{9IVa
zx!!kJKn#7^)})GORHndtq>Hr=fJgY=>dlnpA-Q0A&Zs@n+LnZ9mCZ52VQo@@b}mN8
zb95riM$Xr=@$F0<6-i>lB#Ik#)A=83He3?tKWALos-!@kUoyeUwN{(gUSIRwtDzPl
zwnnJG!&X{t7ei{!A|wXz?O7syF>AVe5-6A61RN27MN^Ib8vdqRl`?f|T7j!T$aDx$
z>C6Dv4ba56ujUdt3=Hj*$(YG2N?%TsZj)j5$LDqWFB5bBE7V|GcFS~X&&}0<8VvsW
z6-0Vz^LK3iry!VGKluu?z83@|<=IrWKRJFYu#7a~!a&Wk0Q4Xz96Af}&GNqVhq0Jq
zYDhkXY;25Sh<DOxa&uR|__Lv+uI|9jv(R8q4`xI0EERuV0=e>(L|gW>^QPC$=TKJ7
zJ^ZM|!DK4|Tl)eqKw};SRDqbst8Dd3qt;rxWt|7hY4m@#FRQA)TWIm;*7&W~SYQyT
z@$ElTMQja)qlg=pXBav~+|qE-!Dx`^(k7^q!Wqv8lOT0^KhG}<RvIH)7L5KU@Bbld
z7-XZr2W-MLq{6p9T8v#Mb2XdFjwikcfA}Js8?cN==jG{oW#clqHOby|kj$)JrG=Yu
zVPNPOb2R=R#M}M_4d%kn0iePF_{%rk=IGpylp0aMJCctORl8VS%8*tkgAKp41y%2}
zT=W)KXg!%E2{{}-<@aMFI1Y{`RlJz5j{~E`D2)_nJvIN$ewAipAgo{4q^zuLQ!Gj7
z3p5ay5?V|%Uw?dDi$6jxRl3;=E}ygQu@#PUk=u!R6Nd$0RE>ehV(m2_*|a!CeSM<k
z+W_0f&K?kR%60kTQ;DpPvH3p?FxrC0{XnJ+UZ%g7ThCdn8V)iPi6*hd`ad9d5)qcq
zTLWJN4(J)6<I_oFnR+ba?9yq^@w(3z?EW)V4MSQ3Q`PzR)j75j&Ie^Lqu#`6rDUdj
z)JF=O)o)QZ8%n(4c0UybW4=nO8iO_xbiX>BzhCi(pV(s9R{}Vfp=UH{ITnB@*u&CK
zqW>>`;{_Q177UEYDQm1ve5bXW)3397SCI_lbMso8{g!ED&!0~HXPm4QiL?gB$?0?Z
z1AIwexb=dC1^&9d)zv2;y1ref326Tx7NFW?p%ya4QWl&=$dMI`5d_raahl#%oA-cG
zD+xOnX(b$a5b+FUbO7)}IkIsI!8vi>8<11!xI9*Q9`u_=YI>p0cYDtb@PgQ17AYl4
z2aotqVn%$Zc6f^j>FibN7xADPGyC>#O%G~bUweozeC-hd`iT?`+Q5?9bgv(ibfy`H
zCNn{>^h1++^bG%kFc=@#<HoE!{%1~ZI}PUK<slJ$PvE$AIAeqE#azpk-`$_@n56Yh
zw%uBj!|c|_V|`?lluc!~);ld`JsUcI`2m!a4Od7clC1KehFy`QLEDN~kcHf1|7rUQ
zzlBBibGKkzn)1U*xq`!p`8?9s3G~iU2h98Y@Nj@eQ-k&zj->g?IT3ab0{TuNj<4PS
zVX*wE^1M3Ia3wkelAz1vF;Q$(ppV}Xo5MWm(7#*u?J&MWt5@*~QS*nYnl<_1^P}IJ
z4&ft|E!xb}qO_S%9xN~K`EU!#VJh!~IYno*HEj_&w0~^+%`%PpTMzVbwC?-Q{DfTK
zBjYAyVLpX(O^E8;$}_>g0MLWno*5tpY~Z)t8vxTT?@Wl`{K!UHLtc%<k-ZbIpRtLL
z(2wslCRB|<pRbUH{{GjYoMYGQod6ZjS)_7yR*g1M|5gsLX4nhR7G(IM1qdp6n}q#4
zZT}^D_;iAkg`E7AP);3ex|h=yi*j%Xg}VL>c^Hyj!;y=p3s5cEZ-;m~@y#7hvv9?h
zu)2?K3<7jWX%4zS;E`8X7KKXw2OWZ!_Ur89{sqJsru)S5+iE0`_CU5=ZjL*BMw?LE
zah85D5#k&Kzl@v?-3r^_FK6AFJEPleKZVt{Df0H(`5s5FA`tLpRV#s3pSqMnjaIDp
z`@m%YZPy(;`ut26mht-6B_TlVYGW3rhpo1p$2_QWiodF8WZNj%vPM{Q1r{^tA==QD
zvI?l`_cGF;b^N|bCDs`@lsD>24t^hesSQl)a{|HkCYJ$#9uQxw2aUfZs=@jYU4TX?
zGmvdwT^9IOT54{3G?~|ycY9FE<H*pswN=&q>!y#n{=oWl3VA~~o4xJ0`o|5hi6OW$
zSdWZkokkny2?k?eMlg)2v)Cmw;#;3Z3>;ub0YDi6sx&vX|BLifgNHJ8evf^K8G;M2
zMPX-Rdb%XNwlqUF_eSGiR3G=gJeVtW|EjV#*K_urjtq=<zO4mFBdw2jd<5auA4hbG
zjNBe>%lDdxl7>sgx`p27lPVrAOn;kvpK%@lvrh2OO)~~Bim4~9WfTHllt<$uX&hJr
zQncyJY2ZCbIz9R7Y=>^R-8*zCljVJNs?&aQLQaYGEdSV^6H!|lzuZqc*&h#e_&r6v
zt$MQ~Zxi0hzg1e5q7i&M6b>1$tk(we3pP^pC*KYxg@EGV=Qx_+%sJ=b^Kh7T+V>pl
zC-7C)G17v@fQNfKnX%2ldGl>tRn4-1WTu_93;^*1fHh_3_@z}D>wsQqcdWd8B`Rz|
zVEc94H4WOYof351I@*rD*-;Tzd36<qj!>D6jkA^oVhNSkBFsD=S=9}U4^-rffwOeK
zno?B09HwVTg`Rs5YKImD`9c9ajEYU0>4(4*3uRp`GGq9`qCg<!?Jyf8fwW){Rsg~B
zm`1j~IBp<QsF}44kZt^b!t0d30SxD-;*DP=+CtXEE?wzS7%@YS<o)@pfCHh{itjRC
z?Q93F9Nvx(fNVOF|Cq{^eRW~ax(=R$sUoprO9`HHNnBb~p9mwJ?m7N5_S}bDtd-gs
zxFKqC13ETvm#*u?C`dZpe%&J0LM~j3T5PxCq~`B9PxD+pJh4PaTKAmMn#r=BRgB(~
zpF;Ed7#ba`xvor<R6cTRH)>oTu0CU(m8LVpnALoBoJQ%#n9rAC=zsIRbVH}*f-Z^R
zSR+RB_s|Nk!<>^6lHMzTxEB0`R=QvWa8X5DU@!`CuMu0>-mm7%S<8S}E;}G>A&`+o
zjfDL<SocT1<?OHIKL<1T{pNQ=J-=L4BNG`-(Gpm#e{S@aYPy|g`_eO!R&nTvyd$~`
z{?hmP!-q2S!-jZc01t_JV#@{M2Q+RE$c%%7y+=>SQk@FELj-0R+RoYlbtf>Qu-*n3
z`y+z5LjvOwczRQBn~`dSmf?r=m%$OCQ8(|`FUBej8QJ-&ogf?C)Pl|iUh-6_s9t#H
z0C>>oAs`Nw9)Y3VZ7|N?K4wb-pMD)Lu+_E`aM{2M=b7i*uBdcFN1w*-+`)+s2cgKG
zFA$rr1KT2i#k-Shqt!I}uR80oEPc0P9h$<<!8`<`lqpM1FiK+~#(m`im0&8hpj7wt
z8rF3Fq-&9oC<X=wMt2C^dARgZ0X#vD)$+S$v38TW{yzozyj@U*pZmPRiE5aGzc*Vb
z+ifJs8CFx}U&o+zjFDK9{m$*Ob2#-7f^+%<2Xsoi*jrUuXVll`q~a8_Tk?%fgo+o3
zmZIu??ZxSa3&~U|R+aeB+C3zQTU>rHLOG4x#<RRjZl7RUH9fbWwj`)0mT?5t#n~Tu
z?NzjnNxqf(L$>wV`d9KjX8>JqeJl@<V6VH_$L95`O!B@7CmkKBj;3o)-891vOpd7m
zeN^GKb1}{z0<odj<vYD^xCEUSE#F@c`~1BFIeNJ~j8FF*p8w{`mTI{1)>Gvq0?xBP
z=OqTMrB=<yizxmchl}SvYyvNN2dKa<e}Xn_bpaN+6q;2wDhyV?oQJ?)$Klo~BtYMb
z7phYw1=LQK8>TDtM=@L8WXxdX3mce9L9h-65!V1FTLF&Au7h<}JWIQJS90DYA!o%)
zb4|irpFPxIyIAg1h}ctH{B&9qu#mxKmv(b%a+pyF`Is*rWJo)0zN`fRgVsMLbJ+e%
z5Bm(TeumaD2jR~Pr$SGbnuhTYx__7Azx+KZ+H$S)@k=yP@uEI6O7m~bq<pIflvlzo
ze~y*YB=}`38pY)HcxkUmXdGgbOt>%%jDzH9F+|YEe)mxryM<KgcNKT+2dz?35*XO&
zfw!(FZ~nvm4MwWJN&cpd?q1irHqM*a;$SQzg_T9yF0rhP#}0Xa7FujXIG;+&S7OW7
zV;OaXe%}T`@G@X;`SDewL|<d}Hs)YheQmL=L*Zct#ZbR@x~h6K!?m%k8|Zsz@d?-)
ztb!$f#iYNm^sp}M!lA7RNGFNrZCdacT8H~xOdO<Wr(N)OULA<G(Opox_>z%WEA8$Q
zSE<AT(yC;3{Ynb~HM)M|aB+x>-RHq`F7_rHBYY;qyfAhoG6^R+fBTI(cFU=(wbF<p
zdW`@^pdqEj0_AM&VFaXCH4xA_<3IA*d|Gt}mr3`lpY*J7*fd^`B<(5q4;k6g_f7iX
zKctH|fRQoN99;iIN!1zHGc+KA@709Rl9>+@+Z~d=8P?Ly`f2tw5vlyUPG<FbrS-}K
zQTOxzB*pwG*(n8?sO`q##OoQe<%=bF%ZMSssHvb#%B|KV5ifoziPiET>x&p22qwk!
z`_=!Ywqn^j!cTn9ng;kBebF{bLsF0y*i>gtgrjR+K)y!0MZ??7y*`re1>5q@WV003
zTUxnh>@wyb%rBl5bNpqfrnCI;mG{u;t*#5hbqFBg+6y~u{f9(KHwxS;%B!m#wojX)
z)vIp<NY-a_u?a_i>4W2a6kjb(3&I-7cjo=t0QjvYk7(hB<N|~N%0*kUAX%w^z*CwR
z3QS2$?0d>aPta{XY{YM+A*@-cw<>$7K8z|!Fr4-OPzaov|Fz=~V6VC{58Kcz<u8te
z`K^P3MhT#PmZ&205@V&Bb-28*;zC)TuM2#DxOc#LP7CJwxl3=m?Gq&su#Uhes<msm
z$q^c`{Nwd3y11Yi26v5B)-o_q2%5z?E<mEG<{zZnCm@Igm{~>X!A3vcL2*X|fBW)2
ziyse~D64)ljuWy9iFIx#B={6jf1W2=O*3x`E1~u0%Qgdao*0tNDl1ssk4&R3-EgK;
zvRCVTH#p*fq~Cy!1LK_Rr}cC`J%{DUd>+^V4XNYBUQ$45s`fj71biL7emXs^pUepC
z-*7T^^>@cxrYKg2>c$lc#Zguf=v>at;Ryg}-olFC{l%lCgi7kXPKUq~i1kA@Y+1=}
zd&ZIz3K4iwzF8#ueG~w3$<7)7a<2;<`>#}+7lPozS)q5OT@eA`VS6FVz}Q`8@Oh7&
zu9W{4G5dTK_1k9bBKnhH^lLsQORukqrHOR<_Egup#!F=%Us>%*)oHs8CB6h$s2Xly
zF~WR=V`($pxjM@D-yBH`o+bvX<zW7g4Po>StV7dNI{^0I`=);|tKZ>cT|rrIzp5<k
zIyPSuVa5%*Ao1v|X&dp0hbLCppZ(`vue(e_i`82-lFu8N;<c)6MVJt9vP4|d7FfQe
zrO^TK#i^iDu|Glx20l95x~`8fAZfk}lW>LORtqEZd+q?z`x-(1%T+G9&-!R4Ak!>h
zJcUie>6XBVVF|MZpCvhJqcBLnp#%wop?OPHiTW=;l`B%+kLOp4-)Yi*?Gu#hzHKS1
z=sa332G~`SG-1BMHXH!dbcXZ{Sopy{9gxqI0tT~s{DlsDYfsCET{3~!^uqa8B3awT
z5wG4=gE)Q!EIpTcNJ#Z0g`GZ8XNokaXDy#@4+}aE%yWb?aUNR$IwCrtBMPk-gS#u1
zNHL}K*hxozJ~NCdHZ!w4{-g2HPlNRuiyF=PPk!JQ>7iW$xl(w<uqU>M0Xaa7+qlhF
z4uVbfhTz+1FkE4QciAoF0{^q{Jw1bctovE=FvwHQhLiP=5W{0)h9NEH!^zc;Mw8Oa
zYx4f{rn1~`36le4O}1_NESoOHm1H52?~Lv)JEp(b!O(#~>N~!v94R)->5{lpbYOL^
zTB3nwXN|f(+jwF!Is?Bw%1?6me8AB<odl8eZJ;5UP-0>tNRi+b4>sv#@pcebgZg|b
zklpz^MQz@ZtKl!vjx#8_9iNe9=1|J$jY9Lw_5gEio~8AJd$2xx3jDfs4{{uPF@_e`
zrxo9dzAix1(=SK+Kl1k)h?q~H#K)DPNG_qPAm9#yPS7++MQCdS@`Oc=8-Uo=B@vFU
zB1Brl=;qY<^bf85P*@yCiA@{WV|eIP-EwE>o1X*|`=a(6uh3~_7PPS-pk9-Uu$2Rh
z2>X{B78z3Tq+geN#ppTY;7Ny~B8wTwJJJ7BzG(?je-A@9J(@!Eba@Uyk0$)Yi0O)u
zb2Ws)a0>5pzMIqG*`&g#V00N>;HwC3fch-BGJ`o*u3e>I!X~~Q)oA*(_P@8+QRknk
zylcS1rT^evMEf5p7lQ@foeM|;?*;#E=Cc(q8sd8+7#PRxunt_*2eoyIBzal<WWO_H
zxxE5l<~INmY{2`vt6fjSAJp^3Ki~f@pc@>^6gm@HBjjxL^-U)e{H)V{f65o3t5Q?s
zCk}SX#bjI^5nOb^$p5|f&li<=_st)2$L4z5_1#>q4(4}fAf5y4-FCUaj^U+AdX+J=
zkt}3qTqXJ6{jTlJQA@)QwLD=h-Z*|W#0Dh!m@HL!rY;sy#~qW4V+EPH8jF4m#TpU?
z&=4pl!vFjYoW_34VjdBhYv-NDYHgwKAM@m*61KKZ3Wnn{z2ANu)ukCuMsyJv0?Fwz
ze<Hj7hBT4m>+N~A4~?;ZFRP@OjTj&}bSFUFY9L0UodqC2YZLy~CZP{Ze9s=NHs7E6
zqUifC^Nd~c6EC*{ttg3Sa9U9;_SffNVTFxf<mF(4$Varrc$k|8%SdvT>pD8!-Un41
zs1%n=HjAi7YZgnjbE!mlFhG0PpT%#r{NLl17_op|Du6(X!ARZd@GJe0vOImT$%RYe
zKT@X~k9{*AYF0`bDt-1#_wr8&rnO$DRk|c30I(xSTR*naB7qgAr4r8Il|`go2Vw#5
zg1Kmeh_jYKySC|0qunbFqjFK5TrB9b0K82vt!)G<ZJ?KMcX~EXsQ?m9X=s?Mx<4vo
ztadj`>#Is_VD<`$Pxnwjf5Dg|+I@{2!|B&(rbsUo*;!dNhyz|Tgo?zT%Mb*hoAer4
z#}^aNeN?hU+2D#!IT11VFF7XBqw6L4cYgl-#<?>$hqU7qTdwNQsAjY`_0s*M7kj&5
zRXiSCT(XML`8S1(IG^e*SaVH464O2K7^7aUE9;zy*PVA#Xdo8yAhO-`*+c@h<1Lxy
z)5~z!ey5aqN+QyF=W&%OgN8!-&b*k@pD?{?P))OpS3GIq;^NYPi!>qQUvI`V0ka(j
z?k^jK0POFMtY<Q<A?XW23N}1Wj-$fuXQDn`2f`7|Us31t>R_MlQwkCPNEnoHIe~!s
z-MU}o4ZOXOJ7+Vb0SWM5?Fo$;{7-gjGH$24z|~L=<kp>f*e?&N;fhBUNN~<O9cH?y
zdl1#HfME1FMSs=x*_s%I#H1Q+w_qfb7dD8Esl27%!8aa#TRoxm6yB6u%Mq@69c0K5
z&3uOfISnujVq>7AYw1Y^nmI=V<v<38usc<+D33`8=v;-BA$2?ozccXl&IhNP5ikmz
z%%Y6<HK-II=YEFc^guNniXdnKg-LhaVH7^uatn66L^JL93|zPZYZFu6`NQBJu8{JW
zA1)T5C|yJR8y^<53WUu7WkSi7h^HH9dQG2w6Ki4Hsvcg*uwx+22mB3t2;fc7+qYc=
z1C3bS&l*Iao1B2;(F1i|g_m@aZe0QRdkS<f%w+AS(Q%ek5xJiC&i@Usf|dsv-bL$c
zya!>YWIZ~4j7H`l)3rSjM44jcG*HtyT(!9zS{-rj*ewIA7y1K6{K`@!pzNdBF<xWg
zOGl@~c~-99&!v0FPvRry+*{?TkPaD#0sV=o77h>(J)xi9KKc7Z)PX~nQZDfB&Huk?
z5vOSc!Wg_{Uw}qLjtq>IB{e%+GtM2(RRsed6Z-0qYG-g10<_jbDd~qRjSYue-B(Mn
z-3j1le<aAzUccUMxm=PcP6gBk8WKSHu%Ol^+%ecuGL?Ah@SOMkqE<Jwu0M1}AEX__
z@?FvBHmi>fN{`>k#hgGL)k5l4MH_iD5N8@ds~(@wzgQJfn})9HkAV&_t!m=eMv<&K
zu*`+U0cprVz9b;Lyklr(5Nqqc7NKFP)YdseMGo2AaN}>w%;I2~SGunI^=X3OzY;TZ
zQvN_(j85=UO2mT^=<M0LfR^BLmOhz8ev(nO*rF8jf&wI0(;q-3-iSY5d%kbIehTj3
z7%k#C`wV6y{OgNkU<*M$#M1hCT?yn;5w~A}vP-`C@Hg*ngbhH6p3`tnCr1NEAWk*Y
z_6c0q;?tm8AR0Q5M$`ukm1A)aM$npUnFd!`voYcN$RqmtY>>anbB#_E2#EO>H|M{z
z&HnzX!Xwq|3?i6E!Jkk;EbWF`U{?tcO=>87<8$1h=@MyBPK$5q&WjT*l|;<4hOQ>8
zx2FGxJTHG42HPJnuG24s3#ttc?oVgEbaMq7pZiCzeivM55IOcokN(ha`G|NG1tYE0
zXpKeJ(e)pfpyUAhf13nPUdP}?4O>$a9M{FshR(_5JMTZLgQ5r)g1MNu46Z>(%DkY>
zM7N!K+mPS9uh2RQ6h}k4;W{JH9<2fz5MbDPumT0BsQLN%qY@k|$OHsd5BT;|^rxb`
z0$WP%v>;{|uyC6E)FBqTwRqp|_3f{1jTAFZ1(FNwerLKLgb5~`5DU2WTRgaHh5(o4
z5*;K^t?mz>%{Lyfz;@a9lZp&s2R#qtMB3GUP)9Uf?VCXEfug8gUaT|sQ<sd_rCL$W
zJjq#x%lT%q8dSOHUZ;cD0r}dmlkZ=siOdP&qsk(-fD@Q~B0dh17^Qlf`C^|)?`|Q*
z8iz*=!H=MJ1YARbo%#c=_1WZ}kM4POKQ$I6U$LhQ+wyR~-k78AWH(5Z!|#Tw4OJV!
zqZiP2%Umm44aVpYXfUpqgE*;nTEh032K|vk@lk0bcw4&}lbDlQ5PF2dYtng(O9Q`_
zb;DH~4>^@;)j<A<dai18<8UxLDrBlH@>;bNGy)31(f&9(f)Pb?zsFTw7Kkom;D5FH
zPs``VJY*zWg8sB`0*M(XAL(s~p*>SFhhd>~J|UYf-D3S=Txu#2n1orN8;(##$>_3<
zfk3=J^2PZB6tyFoRQ;WBtqpNB`H<q<I=DWlElBA>Y906C1YpBX87py_jT(^vfh+0$
zq+c|RXV<!C(F9pT(jJnF<|9`3T2W|ig+<i$$l)Rn6d=t~L<YuWngbhTV8zCWU&~(w
z!S?^GPnSEQTK<wwo2?iX!p4D=HlrHObf<oi%h3e8otaijq)5^Y{r=cD&m4Y}I_{*b
zYyNC0mPC0GrgeRhNTQbwBlpMo3s0yj^qKGo30=EbzBv|f8Z`n#A6z18{$jXpQ(K~$
zSO`dTuEjk0D;>#rcs8F+)2Fkn0&y(BPtti%d@VwMwmYuv@PbiQ@^H25<Q3?}B<)SJ
z?8Y;RKbp{#D(r)?%Q>oG-h<>Wt#8}5DE=ehcTQX$EyMX-ve!wn?wG+Ch9hN^UtegY
zZ@#LMz`FvRK22{h4S{qod{Ju5m5Ukz#}MB3So~~xCipAH%slPxu;^h-w&xgYUAsX(
z8K1b=4}bk?BuxixEAVWp*p2pw@iO6yfuW)JsVQ6L4@}TD;nX<{0#OU%WgwkPjIKu-
zABW39F*q1_^jb^H=<NLj{@D{|oB*WWulk~|#O4-FO=P|jU_Lw=EjkWe1g*Bp1J!OD
z*I%p%C#$~4DL|CJBrlOShVxOc)lGUJM#jUV^^?26=)Fs1gk$)V4m7l}f6LVOA@^aY
zay;-(MsdB^;@9vmiqg`LD@ff?vHldTT@bcXRKgPAnXW{7(CQlRYP)yLDJpVM3cG%(
z9Z>U6M~|2>Y6RT|DNUtoOr?rg4*LJmC%U0&R*N-k@L!73+W()D8Q*_?3oB@Kf^U3a
zpm_}+UMeJ)1qIQ-!<b?-7)ZMFOJ8GpKMQC|fRRS(w(^_6(G!NAhkU?VG3S2u-$S-u
z51?TIhiF>!e?AAeEmB8~n^@Vm?GKlWdn>+xpM$vX3pVahSK!m>3owaD0QPcdkC^{1
z*!3FhG(F7$jln1Tf`NfySViLDC7g=oqEV{#1|W6bf?_ENWZjYgGcWFb`?v51FqztW
z1PHyMyclt2G&HnR0M3VV4b8v=Pki48oQ)p<Oa-*_TuUGIzx3jZ@1M82?~J6C0MX_f
zIY2zweZ*ZCSnCfY6jnuSTbrHzzy}PHfmug39WCvvEcYE^%h`%d6$n?Tan@e|^BvDF
zEG!%btO|NKmrnzl)gf;NXa`nfKQEm3h!XNE>{r`QJCU&mfSZzU_~^p@CE!rL4#gsW
z=5CecvB%VOe>(gFh<zhYcR238B?jJC0MTg3+#m%~_e8ng2tq<%$4wxsyafn`34{i2
zQo_!ATXqc4Cg5ind$XFd9wT}h_QCV8E<UA=K_yGW8-(U2qeb*Aw58cf<5$%8PtFDZ
zc;K#UO9%mtufLb*v}g=UKyYe_!r1RKNP*ccId!hd`E61UR@(H$uh#-ABQ53}?QTpy
zD0uftLuktUme&cNYj}CJ-Xcf|dEeFs$RTWLYU|S_F>A&H{@0HB%}5&0>9n>XQy437
zA%oKnf$tzyFvf11Lzof_QMbxOwh7QBHS{|EJ+_bmhDw)9I3Art9goRR)c4h9;Z_*$
z<JzRo9<EkXm^Dj^VhR9V0yuc;C6WWv)ho1;RWRI&Bp1<o(+6mdF21o3o0R`lA7HH4
zIYDAUt8igP@$L?Do0T#2Kah?UM|^_60IBf4PB65$2521?2@TfREY`o#f*P=q3fO7j
zJmqY;n9<{ia|8rEU>oBG?rjz1_=UePU|~RA_XiFaR01xBr_(5oH?Oe8UV`RF9B?Tc
z21@m2fc6z2L_%yC2;g2BvrAZ}r!8`NWPcXwO4IknFjlbLpxfrh5ghG|8T_{D5gh;v
zQWgtjZ*=x%p(@v-SC$k1z(IXX&+SWKP-%%#5752_b8+Cx$`u3q^Q|Tr1D_c*bxiv@
zTeLiUMkmwCgNniG&$ir^YL%;(!XzHAH|aP_^dqKQ0V7}4{WD^#pM{%#s|2@~BE~gD
zN8;q?^Syr95zu-{NGZFwu&`j^umilFR=`u;>(OGdw6uitGB2osPO|Fu1>kOEttKYE
zsB^&PK)0C?&+&4%5?iFc$Mfm=Mw9#K0ye*%f*&zK|0$A|Of;1+3TMctNZ|Z|Mx%vb
zmbDGOxF!PTu{~8U6D%V=y@UX#ZP>o!p`g}#zq|n{?iB%t0V{08)E{X@?HG>{D2O&Z
zb%=rvP^x5{Rue`v!5x2&!!rPbGQNG{jC6wb`@U?Dq5k%{-aJTImWcCb%jU}}tS52+
zSGu%fj}2_MSYvB}xBtTe#M4D60x;O08%)~~P`3Q-Yf;wf9D{j2TwDzRYMKV)n(yNo
zW!(_=c#&46w7A&-PWD%Jok~e}TU@^{n^6KvAR;2EuaNp6{?bvq!t(6t9|Q-qTpIm#
zGAk3Pz9p6~d_j;vAA9irv@U%dlf9ee`L^rUBHMc>#N3zCENGPsl6okVBcD#Vo#zLb
zn?{|PO*_o2^<5dI_-;(VKlJ!F-iWpDzlR1R#Dk81574Qyw7YBUAmtv?$S|Da?3?}c
zCmq@b?mtdH(<l>A&%)?I4_tToNz57exH$yZIJXhmJ#STxdAqH+Qm9b~CmB-ZtJXrS
zMSHSvZ3AVc??wDxAcUckA6M>%KW9MiyZ{7AW{s<gmLrEySz50vunloWmKnk1No(RO
zDyht-8$pn$X$vZi6ViX|je$y6zdey#4uI7yQ4H|iblDqRntVsWfa(}s;5)p%#@q^@
zxFUf|Wjs+|oXlfzFpoW&KSO8IZ%zc>y6`YmK=UXLTv_=-fA;k$0iiIUig@%zM5=Y2
zaFY6SdKrL72)%)MqlKaz^}22WHm}*~)*xW~rMBSoh8Bs3i0x9ag@`>pJwGMs!LQ`G
zAIoUa>Y{2b@iX+If-|msZng=#`msfk<{Gw3*V+r-WR$e&qoVy5ZMs3jy0!)7T{8tG
z@ik03jQn!V!^3oKvNx2GUdpR73NT#jf&;wGuCMPvuzld)y*UYt{p9ARW&zIS($yzQ
zGt{cz;ySnn`t>&NoiP`3c2lxe@3Sl)MeRXJxk~q1YdgIZAshC`WDM!ENd;`XOJ)m|
z09I-L4{Q<5Nt%^AAKcSOS&0!@e*#P&U|U?Vx7gwe)qNI(nW};l74R^SO3Jmm9*Cv-
z6<ZuZ5S{7;6$UEYaORuf7ioW90;De;4EHAXC*aE8-i(pZqOdXa^eKFkQ6SEaXp@?O
z=muNq&H{|ll7H;FwF-x{Pf%kh#B}G+?$5?|_xN|jMt%?Is}?Ay`MvBCL%8dUA}xKi
zJS%o_{NJ}s7uVsPzQpq!KXjjuxPw(i_d<@nUI1(o1)4qw$V!3yQ<CkXi-x*7mZH@j
zHBeV9{CfE+h@w8>lOAbhC}o^rY99ECXP7%OBWw$<c{?Av9&g>xc9`VRj{viv+(_K>
z?~j!H*C|+{djqCFff|-rd#j>7<ltEcsy<@SMh`A5B7jpW2(p8Jc~DNRb=Z2fb^Cjs
zXC1%7Umk_h@H%V?T^y<E6pN}t^?Lxitc(JA_YbwT6TiZZQGwMFLZ2#+m>9E{E#3X!
z3TI{SsLskzsF$u4=<a`=m0o?oEx-7S*df@k<XM4)+I>g*M_k|c>;*w@rFoB`iG<<i
z4$A?tQ8*ikLb59$J+s({?6#tEaK<~nN#&H%EZ&wtK)6en;JfZMA*(<PL)20OjCD|g
zSbeVmRiRy@+#P}_f&|{bE}m#{ntVbCMk>$85!HyyEC^0a`<fVza#ss7EXrk_t9?6J
zZ;HYwQF2`%wn;43n%7(Ou>Yxmcsu+iZgDwOkN8pa^e&{Xs=E!)jk3_aE9ma`@=OKm
z+>UM@E@qpCZu}$`js@GR)@~r3CM|vf_Zyy=LWHqBy)_4m4Js5~Fe!HsfE<PNV0WRy
zt8Sa1Y6&$8TK$}5vQnA>=4fmAQ`{o7BRH`b<ETc@slC(XpM3X({rZ&fvULK%$x9>G
zC?Hs#C?T&S*ZZF8J%%6Qb5?qDFF{MNpvc$A-D)uZeDla3yRNN~B(mf7pH&2*ms*|7
zuzhPYoYTl&KADgyP$#T)oi*$dI#V~JRHC8&Btu@4-tjsRkgLU5BXq=QjY##pH_v%;
zf4M)ag2-I&>vq&}5n?!nJrO*MZ(vqVl}H-ig|MA^C+ZvRPN>Fr4YioUwCOmg9!}g9
znhSXBiV%0+)2NdS$JVvcLwn(aoVJMEB=`GiCzP$s#9vBJbmDe<+~A(<`wpDKwudeH
zLB}uH0;uvHyu*D288nRVS;G^RYwwd@<M_lxC&eR_`z)gsN#1CId#i}R)ND>dV99Kd
z#tRfGz?G<?aYQs85GLkz6-9J{yAH%Lk2Z(zF5oib-*Q(xeZB~Yk5qDCd3vcKG@&lO
za-%>O7f1Lxw;vqC8^lQyi9?+GfPc98^iC1yC=?Kq1ZZ02GMt)r1&*sWvsTx#Yz+?e
z5LtRSqyurc<7Yo?lxjb&5y}@+>q6E&P|tc=gM-C`_%}DP*CN_%QL$wTtVX<bWg=Wf
zNW5l>)LxMj2e>O^bb-}#>%<MY?jyAa7H4eBnq54BF|W^YDGbd_<anPAfrhDsd8kFB
zz$H2|dmx(o<qvFbI^<f!>>>`;YfLjc?5*_sOTfE1u!Zi9#}Oh&yg=QS<HcK_y-$mV
zS-0=3uz7SJ1BKqA%X<^7Gj>5bWM413)^08A5k<ju<T&Pmg6J@!Z$JGNDC`mX0*tqZ
zwoyC4#P%Q3M8FcLdILRa02@FY57*<VJ%?zmg5P$jkuR#&5n?6kXph-nj-J}flR(qR
z%Av49g719c;o(CpfzsC(5-uR~N_k4*rUpuA21BnCtIvTWrNw@sNLt{1&(`!i`6G<J
zN?gl|g^?i|(Hsye>e&G*uO=VC>KYp4p5}%^a{i3W!4MV^YwqS%#fPHCq{~<>0sIs1
zsFk%@t`U}7-D}c$a1{gHFVn3n)YgMQ0gw!;iPwN6xjbuX(7R&d_oWenE26Lwj7mO*
z#(9-h6Zhhox94_}cA}mC$f5s-=dAa+(5y?3D{_n)SnMYDy1)_xE$lAN`x@T7yV!3S
z`mM6c0*KYNYXWCV`tS}S$OZJsAx}bsI@s(VtHpXmB3F%HHgt;yZvKI}+VpfvXS*7A
zmo9zfJa|9bpPm$40(we<+h76lQjKEpMO$_7Q4%-);Eo5x$k`r~aOZ7&FnJl@C7)2Q
z8<wPS|IFB~UEF8wp%a}}8Zizb1h(&c@Ob=hC277UW#}p8GkCV}RA8Vr9eobLr!LmQ
zD<9&ZoUxm88(?@~5vYxa6WoNBh53yz0wVCjk52qFHV{FzC{yL2$s75*Pned}iLzTT
z&xjg_Bre*w+Q^^R4Z|EAA2J=IN5&A|+j@r8KgBk%kmTo9H*J?$1i06C^WV4%JqX-l
z5XalA$#$<)X{8dA-yw$g;e?dem78D4x25kAUr617AX@BBY5eQIi;8-`V-9wr5d(q2
zB)jNF5V&PgynTS`%O<ct{t}-$rw%GA@#6Pqzi6AzEyL?ja{U<YL7JP*Y=RPDk;UM>
zc14l)O)WMb8|}Wb4heD=cUd0Ngc=bzck3?-(Lw#`wd-CIsznGt%>!qH*2@1m&1-&!
zK@gMNSAV#FZE@(;k7R~HC?io9f4A1?9x4>a9QdNfNzrQ@>d<@rT;Ifd&T^Tv)`OM6
zEy@DB_VCb(U4OA+ct9xEXj}w~h<SA!vkeSUij!0i$b%Y)T|Uv#_tCj7J3r7-i>(ax
z_GhOAK*k{qMTpvfcWpx~e9l8G2qU?e?2?9bt96!S8JZCW?;<(At^{;nug3L8TC=jJ
zOEe<f@#2U^Rkp~!p;v_OixJ!WnBDBZP>ND2%mj;dU;F*sAio?k!IGNGVYu%WUwX=c
zCi=375*At63D+`Vrg?Ih$C2t=*%*4`0Nj&@7QYIqi0q)*=D|gH4vnYrW)O<F{&l~K
zj4bVX5%e0_O2p|;4i$l;Ft<@pO<pDug5S@{x1bEx^h{lk>+>-?;OIjt(mKspT(=xL
zCb50I;?v$-o!D(X=*^O~iIGaZxjbB)K4D$>q6@#uBG;hHjkqf-P_yOcj;%q4<9LxH
zRG(vaSovx6{R?m~YHJ;>zGFuy&n@`b3*V1zUZG<+FwR+pG@%dvgDnRSh<njmt||$>
zM>#{TZ?udsN`1{GVJ!0Gw9CkcRig-@Crj)}C_Ew4cwB-*h`R<4(hi9!?fkS~I!pZ`
zRCXEo^c9=h3&)+ni+6o<(DBC!&_mD|H2dtsphbG37F^i_#3F9a?{mf7h&d;_kptR3
zTu=P8YKO*%`JzF+E{OUQ)_MpoZkQ7od^vi*(rqy%<F8!-!F?b&$WdY@hU&_$&AvOL
zPQZ*)f}|B}$m(bOhzNt|A$0i3bm9^$$C4>xxXwi1AQqWabZKF^=^*w9R%2qAqN>Q<
z{JR`O7X4ac+oR%vEFG%wycnGTfWuH;;3P7u=L`I$A=i-n`lrw-4(nG0)f>sLen`J$
z0ppxPiAwASdbPHLZJdy~Y?HR^zVpxVB<-8fwvP(E=eXr5qN<BuGG?inwndyy8oljX
zpY6Z`g*4Gu<|vxc$Z>I7z!1JTjkBPJ+dK*?x@accke@WE-DI{ioGTkK#U)?xnja-m
z_-p0Rt(iMD)xK_dn+(5ft8Bgb3tfKf1dA`H`uFDf`X5&gmYUw#UelQmIU4fQD7UXC
zCuYvnENcj`jr%$t;#@k=TUuQtS*CI8()G(Mw|Np88yoN0Cwheuo)p~cE{fC^W(!{s
zDu27?G4QTK0rEvr(!r;e9cfJDN>okvLyFX=k#TK!TQzB-o?FBWG^`w*Tk)&3afb8z
zk{=htuZPa$#i4^d>c7&*a8oQoL`k>Y895Nowgv;i_ble?s>aqbGWs^e^YZ1)ciX8~
zJ1knYvfD!#I7HWf21XMJx3#g=Ovu-cPKR-Xv-~3OLxMy-+zOrlN&y;gs{hTI_IKZZ
zSVfxq<gYb%#uYnh5IqN>y#c9IZ;%|w;{TR8DKc`i{eEU163HeaLW(c2J^L7!B1RvA
z{SWQ#ic)@eENU`W*7lCDo9wqN)sOoE79JM6o!Z@gV>O@hKcdQef=G$0@;2LttChw*
z>X6LSBCLK{8DF86h<o4+OqnT++0}B~9t}97yw2f-X!$9AErZZP@!_p+Z-~p|h`G7N
z)Z{G9HRQ4u3yKGKOxSpI%}|8OUl&kk%-sHH==k&CGX(Xf<pugc%HJ9hINCHmu+;#o
zmK<*kBRdyr$(uS1xrnsSTeCDY^b^R4L&tfH0~*3NMo9~Fq~rs)0<;E4G#%u^&$|7Q
z8K#_eH+r+VOp4|{KE?SgaC0$bk#@k(X8qM%xW0@y*&AbTy7cPjd;?R9pR#45<*228
z^$tg!z9IR<j}Pf|+q2@=U`$u;;;;fN?*PyuY^HpFfx&cV0|zP5Fx1GK=G_p~QLrY_
z3GcJTO;of`ZD|q12ViB{H(gpC%}VO9!`q?NopksNwX!uXiKS3%JhKHUPi}N06i3?y
z51Va~;&lJ}C#Tu6xjDfP?Ox<pN3vABym`W=Jlpdb&84*oEcx5-vviYMTm9CI#tj=j
zF}?g<F!<?soKY>elH@SS*ZpsXq{B_!WsRwm(=(v29pQE3K<l`ptR*mYF4E7}fdE8w
zm%ZHrX;B+%?u7$FS<aSfb~!H(gz}^1+}4A0E6TpUF>g`w0k($$Gv#;GhV7hSOXJ@^
zJlK>-*wY|7qWog%$lvoq>W|Lst+)MG60luVlPjC;;zK7|DU&yRJegO><#p|Ju<ZFF
z+v^k;2)nNNt6h%J)1??%>$zRoq4+$y^ML_Llu56RZ?dWnQ^(AuSb-}J;t({jb0PJx
zLjUO$<&49&L08Rq^X!zq_TX`+|0W<ZWfSq0KsWj#P!1IRPOVka$A<UUdl*4?i?C3|
zu#;zes|R>M#3-lzTB%xbuX7xak7VtTc&J55VBnBh?}ki$0=+2$$8jo>+L^y`tDaXB
zMqL4`u**eguG^QzS*Lg4pFDZE_-$Cj)NvaT?qww6218-iVhyB`J>7=}6i*G{pSJis
zxSSt~;9cysg^GJGG-9%7O?jiL`YbbO?O1u-UkmY}vs(MEdevw&%h@eu$lO}n%iM^$
z96XbY{#N|ayw2wR(@bGLB?*O{H76wSC&I!)Qw8ibo1fA#b|e}Wx_S|O%JQSw7GNJ+
zgsrD@o8!tHcwE|q|N4>bN=nh*j`p^+l+62A+5E1Rceb*t&tCDjw=5YD;!uQa`6J5x
zyDM5w(>`v{L6vaZj7pivk>U#q-v0bAq$vk7=GE7d;~s+#dYE_vc4O5u@5T<jx07~1
z=+h<J)B2x__V@I_iC$7x?`oJehZb~@J5f7CmS96WM_r_uijpy>`y-e8O=XD4eH#s{
z{H6zU9Mezv4Pu_hV#*i1eSjGH%wvIShK2%|f#NA7bnZ>-O}Up<b7_rdm8;f#l*Oa<
zXFwg;k;{`G4`6*|wNOhB^w^k~h*LWc7hTnUD3S6JO;)boJ)t*l`kQDt9GNQp6os)v
z7*(1?^UrS!6n>jIf5n6+C*_XZZyiQthLa7~${P&2fEDLCxy(#7ih`s7idjLJjqXbh
zf^B8k<soT~QCDkoF(u1}E)oEr2bUcuy(9FvvV#i>cGcmV{AFkOygTNtQ8I<T+{`Dx
z**C3kKq;o}cUrppr_xRlJp1<O8MpLg=`82LiZ@OIJ*LcHyzIHYF~f9)#rW~`NB-vw
zSWd&4RhdEIy|D;r`O-#*3*}ZX1euvh#5`(c4i}E&ll;*JJ{`|hIaYb!8F<~FJpD77
z7wm6Y`F9X)spB64K1;nS4}uvz0(1&Z)n{I0DF9(x`=4?AQ+udE&zI9JA>~vU{?`{w
zIscSIDno{oZ98WTRb*ORE=~p+F35E7YUmO1U}i|waid|lzpG4ov#nm}WHAI|w2adS
zW|5Iv)W4Yt9RJqz?E81(i56GuIxZ3pJWkx$%*|Dw7A9d=yC4F#Jsf2c@TvzCc2a()
z`vk__qZ#W;e)da^Q74;EseB(fuZkKot(oMpcx@M+f^Kzp<Cg~<JUp9Syi{_uSiec|
zMFY6e$#8MQ?UvKO+buK8y)qiO%o6;9wb?iNj`aIrIAV2iQbR)u_4~@n;^$$}(Ngo(
zQIp?ay{h|*|Np%9EL(Rzv-`;g-}#Z)2iqB%45RI_%w=z8`@(cA(#r&48=%eOxsmBQ
zG{>a<@f9%pbV7=YQ?p`&oo<s(*T{-*5wLu`OUshmExx)ORyaPG@1Wu2l#IF@BCcpv
z=p;kD8M){qrOdNky#2J8Tr>J>k1o;h5vvITVvb%AQ*nIZTYXg|GCaH>F4J~Z+8nv2
z$H%)IVY?i=<)Q2QvwAs8{zXNkmuoUeqyk48d7ZAuaz%VT7yH`ICVyFnCQ;sIiN>Hk
zCouY5u)LS(XV9kGwYoY!2`p*_=woAJ!??{!iU>`rr6~UYZ+xyp2f(vG3M6+&GcfYx
zE~Gwxj=kgS4k2l<7Qg}h*&eXiYB`<n__sPiCmOh<awDegFy?i>PN|UZ{R;hVJ}eCU
z5W17?&CvCqr}9|~?TqQ8ad2?(Eey}UYQ{>bV7|KD$DIDC4MNT@&9-wb;vwh?P4{go
zo#c)4cr-{GLnm9VPyq^#s3-I)1PYDu<d-om-$ouTQsbF^N)Y|?Q&687a0dQM1KZ7R
znP#l~c6Rf(Flw5;vvnz^v-sUEm*p=So25o%J}V84ytb|`|D#!)nK^Tm6|b5&;mwBE
zDfH~6fg&%IZ=6FJtD<x6o9!85X^G9yYri=Ip~2;)25JH;pY0(`Ub`hyaHo^e%3q$$
zxl|j2XWr6M?LU~w|1tD=ABv7|0Jtx8u=_LF3vIQT0air!FU*Dj^0T*XnZ#_a_!Xs$
zUbW)GySgpFhxw?rYMoEY*dh0|p0pw9bic0)8P-qPGEq4G8PGlvfs?({abLdr9SCG5
z%gyCh&1XPW3^u@Dtp<N8eWm#@1s}TCrktR#%RNyPi8q41y}h^gLT6pV0~b8db^L3`
z1XH(n8yQ@tVgBKjfB#aeewTIx5~Na@Xk#qSAPcy}YM=s39KreH{UD$9qjZOl>698K
z7#@B3_k_;cUs>c}{>-!1N|TUWg#Wy#zKRPx8C>(!)KcXTgT~L8@XL|@ja80DvBh#J
z?vtX!07xm9>k&E#8j^~~K&8soPpc+8`X%hBh3#a;)4ureXT<5S_}UIQ%61f;|4KT%
zZk{|N=EjX8)s}Ct8x_k_Lm=VX`!T~qYHMK~gGNOvjYOefv_IpUh+^30@$T?(p-9d7
z`sT*N==E}`VxfxRPZbHaE&v!-Z92_R(uj1T3GkF1ZnM$@aT<u5KFRid4ENMr8m!wM
zfF}6Y2rhyv&~~$+zh&!QPv~ibW-0b`t*C1U{TtmdrgsF!e<sGKNVN^XTfWR;K%(iy
zAh<6W3&ThBxzJ=E{;cUmz@}R|bGgz=T#?`KarA3N3HtDbB$8%ny=;>MbZwYp(cNhO
zVD{w@^g)N4;VE9uy)aG%Ms+e!ioG_*0x4M0Sia-z{??$Q)l`GW#aANEoWK_TzzcYI
z{ESq6lA};b)-zLY>s?16$jRB+qgS52lPTng%VWXbGnAAuRi#1GF63Y~tpa)fJcbJB
z#~!K`j9^f-G<<Cfkh!TNRNUEAh39q~&WbxAm+0Oa#5kF^cr)GPbmVsN=R_=;Oz1~}
z2qBYtCwWGvZ~_7vQJ5cDFFkDxfWNBeb)HJ4rI?}(2#r+NUv2bSD|txVS5}*1m>q@C
zfs2u$uHAPcXDq}#=f`@YAzNZ<-!}M)?AZRVm^obzK3wpNZZ|b@ORE*WrF+4gI8MF2
zK>HhDM}WMQj|}*<6OCUmJ<SG1{}Uh9QCyCepwY6zV6QCH7MH#heED)<eVe4*4?y&c
zFAof>yl&+9@HHRDf`hj?NuJctcA;RA3x{7{yZ!8E01GXjC+RP0B}q-aK6z#KTLj$h
zIrr7_jsFdkg9S=MRb<us=KmhaRZC|?h<HBM-*!$kIhxjfQd=w#CHH=3X7*xl_Bc8K
za%M7B)DAmYSEGskICih-hjQ?`^SsRa;es#;q2p%fbqxMQ)7h+*rBXOv@S$EMqQl+A
zY?M`yel)kzjl(R$sKxV4g&e29w+hP2ym}io<KltyH64y7wo7@8Lr~A3gRr{%dk+`F
z>)ZWsoq8MOp%h>I4E~35lDmzFUM|}O9DRPTtpQ>yWa6}=`##bbU!y7W;P*`M)P(cZ
z+HnVVNnK|(@|RLA=R_2Xp+v-j%o}r$TG3rMOWMyzOXhTK0C7|C$RBpSGn}HgII1j*
z#E(NH=0UJC^4|Dz3=vt_;T<238Dj^(6Yt7~7uXxSJgzR`n537TK;4{@#bvrAd?1M>
zqoy?eqdUkmWx;=&uBCYo9RXg~eke<sewJuTe}B=bHQ3?T(f$LTDH=7632pU%hSZ60
zS2z2|b3LyJtln`Myj=T)Gz(qryE~R>YH`dG6#^enP@eEO`R1L^GPg}%Dev}djj1?D
z=^{x5{)Nk39^Rpe-xhtXAX%+gmXwhZ?Hq{nc4pXVM(P~`IZ7kGzwe_^nVdfmSdQ4L
zVZ?ahh8{r}^QE*NI296d7-;@t=_eW-4qRzl1lg15SQ*kVw;*C#6u<pSNk1dN&pmsS
z{#gRgQ9tn18fHl3YAL=wt50L@EoSZwEfeE$ZM&EmCK9`K+~;wgATQwg+HqS#Yc;iY
z6N%M5Vgok82N|Q)cGFMG&8yFUwA@uWMRzxS95u9J*rk<!n+Gn7Pcvo7d0$Q1U&b=~
zmulPRdR+XIu;ViA!zt4jfjmC?<bMo7*iKpQW`k$kG|K#%4t<e7fb;F4pvytz{GEV(
zR`8n9?6Hgoh~8iyKEXYue$_lTrL`i11Y(Z|{Y3kN<o(0dcZL$ijN8E%LrYCH`%SlR
zOBa(^1g1Vg4<4xGhX)o6C|q}W?^*x9_TD<Gs_*L?6+}Ws1SOR^gwg^6q9901DIr~w
zN=SDqigY&uDoDd2q*DoLkQ9~lAkuIU5IDe{=Nstnj^}xwKi>C_amTpV!B7Y1?7i2X
zYp%6seCB7ipT<clXWUmQ(kNVbZSo-i6?+n=lczR7_blu`+=|nqs(pJaTp!|6_~xKC
zQG8@@W!w`F7~Yh2?Kc-@RdNSXdh_MjS-7-{Pd-1B-g3$2ro;heH+g^}xvlg^|3yC`
zp$r%I!}}*hLi`NUQnoT{Y!=0|svg+nX;ZN29ZS6~RB93RC~0)UmjrTTu9AY4yHoz?
z9KEqk-}EIMzQRT(ZHKW8yzO2lL7G6fx2GqTX1n_ozO&oyjnJN^<GZ#0VC`jZ`}yRL
zW=(?&3O+dE0o~mB$?S>fMV$jqeI_t`WbSIP-f+A^x;TgP?$u56)lk9+Nw14jvu0*(
zE^^$u+g*q59f#GgaoIi_U6&UpalUOJa4Uv41*tuwLP~Ub#~im)J!Aj#D+Rt&WOwnD
zm+!O$oDdH@lb$kvN@d2x%3I03!|{`od%i`<sRM>g<b9X=8;rcSgnSd0hOhA0GMJ1E
z9fsGb-y}t<^SOLI)QR`|cId{$(1Mv9+^d}FJ~;TqA)|x-*gAKfhe3@!{kAdUTx7N#
zZr)Y<Wec)SWc)<SRiM(`aaJUh@*!CvY)WzNjxc?s>q5K}{ET6;jQXM)&i7<)T+->1
zPj9DA-uw|cX?^o~WdKvtp`bzj!e_Co^OnrEtT?HpXLm$PZ=AIq^69khso<^-#%19{
z*p{449ey#)`1lgh*LqU_<u2qa%M>s3^^2dpE>QDN@YZ1-=|&y<WiDAV+D)Q+aD9uD
zy|O#x<>~K+2&z}Btshv8=M(vV<;?M$r+9iyib{R0dmOx}znAnBZS{2G2aZi9<~nO&
ztl<YcXN{s*LyIVLVdk|%*!Gu*)1<%y;YtP%=*49~Rq>u6dcZ2aGaTBP_cYcp-)^3p
zcUIGt@&2>)tu*I<u>hyV!5#KvxR4VRYiRQhLQ&u0)anjolN0BMX*O5aV9CVjJX3HD
zVr#59MZ{rx3^uIecw^#9nHbAcg0rK1mpYMVGK#lwlfexU$S7rJoE>G?t5Hzg8P=E6
zVmuBCPs7lTZ;6=I)GcOfj2Bk%;o>(wN!G6jW2KI}N5s~sF+HGf+=*O~cz=@G@7V0<
zv0H+7&#^Urexd{;2|`;Xk*Y=dnZt8?ath?-cFIpCtdObsI8%~ov>$`_#wRR&oGiC9
zOkoWpQU1&QdxCcr4j)SyvwISq$a_^z35n?Z+Q?!Fe6Xwp#3PA^q&krmMruA>j;}E~
z<}K%$UnYU~#Lu2jrs`Ap{!ML5tl{{xr{sLcKm<5XtY5OK`jnqYfsvep7iP+0!0CbK
zCaY-2MWU_9xyM<uFbmww1(7(gwGf{*JuVOh)9~qkoQD77qJLF}5@KY;*_gZ2oGj2~
zzqh4i+Lv~Y-uElbz5I~@b-=UT0MCZ-I6-)S>;zp9KJh}3pWiVENt`DjS&@#Qv$^7f
z5joGUK14V6g@vROsegs<99eO&U;gYiB;bKf3eRPAA~+{U4%KIP7HA!XgDH7(b7_b3
z>H{ilYF{g-$|MhD3#m9cUDYYi^N$dF^lV{R?0ud4;93_?S&3;!`h1Vh#y*$s$h%+b
zj-=<$zi{e1*!L{995(hk*bz$J_!xa<XY$N#`U@BQ`7M-RKlnKJ_Up>etDu<e|CQ#X
zvNVbviX0c)|H%YkD*xhQk!ud7VnFnL&+FH#3KGMj$jST74Nub3#cEvkqtRCPkjBPj
zYu30$@smK8fByNIWd#qPm5F-dbf3?fWqQK8KGFx*oh_e4+_Vg4zh`+(-v>zY{H{B9
zX80biu8-)hGCM2pq`dXsV^vHlZ7-q~gu!-H`5qRnt*_fJbm>~rNbL;Eg!~t3(yxvF
z(y;%bJ)Vmo;LQHJyxk<S8)`o1&Yhc{o!!(Dr4>M16>(Y}vqz26=qM`SZZS^D?}P<b
zJ+E<|ytO!z{@Q2%yP!b3Hag|=Mvn?pV{V-5uQea1#gUV79GbFyX~$y{ofn3vvhSof
zm73`eanj4`Z|&~IO*B&GmPddpqW>#Z?2Vq(D7>kzw$ZJm8y;8{QRB?<+I97v+xGZ&
z%tAWmc(RvUq!TrtX-gCnL8PeX83CIb)i)&Sy96W%Xg+>+X{=0ADS}(=JC^|Q-HD1W
zf`HbLIit;mH&q^+Cmm)_N|sxes~`>d3v?@SnPg7C?|)?i4&P<yFGMlvD~2=awITez
z+4hpR7GH;L9J@e?>*}MgjlPu7GF$pXxM&oMLPh5FbWu-vC0P-t2Ry*e6AXACS}t7|
znFkNd^=e(H0hg`L)B(m(_;G>*>E-~gk8dC9<{QV0@Xn5!JJm(uS+jx^>zx<zp-wVj
zYq>ThwSu!Ro%TkBQCq0l?PUeF6tp#$TGmBCc0N%^=zVqfmGOhg27+L6j#n$fj^FXx
zLU$;jMTBJd#p)q-v%&N8fUC{X7U46~iZ6?0-=xv0SPI1Kq(d0#IAfo<dqZy$|B0w5
z5?<?1rqE4SYW7RU2hH0|ji)GkTfW$8zOS(HeLp&SN7PF9?T?(aF$We_RtO<dYgMBu
zH@*52Rv!x3?Dhz8<fLbBE%{78(@YD@7uf@r<h-<IV5&c3U@)<;bFltlHMfz(<U_AK
zjX<NvCSP0F9^Ky7KC!Q_FYtYkE`N5GwX+NC+}_{rn#xB8n5@rKTjHw~>D^&-0d~%S
zUH9gIdvtqW)_a5uA>QD8+c}y=%yI4@>)?y3HUNdi$4bf1UbqlKe8ERD3eie_$I}oN
zpA;>uzTLQ?wMk#s`wK(#L#tW<c^_Ie^T+4n)bJTAJ)u=<N{1R_Y?MXsu{Rz%bd=pP
z|9-FF7ad^Ca~dFyFx&r1lEkz<vcq_S?c80TLYcM;atv5!_ruRK79FeDu0*IZP2?@N
zIp`B8Ad`;)GVZhV-a~j0HTtmHh*YN+QG74rj(z})ZW01CGt0|ijfWEfjFN3R4s8+i
zSw36u2%OiyI`W9$th=4Wr_}!wNB)2T*k_2>*Vh-`HCpt!V19kuzO0RA5l}UXHrz3~
zSB)m9vYWmb&z%|pH@?60X>36<+AA_D>S=-jd*RKOx2YB4yQLpIpa`WCr4@RlcMr`$
z-;{jyVMum5_fxncpVVHLrxRI-a-%2uq}&vT<|scv#dydQ5)jh_A*9p13mA1~H4Si~
zA_4E?N0kL_*1mlEZ{HbgCvbm!t<Lp$cd0LzLz5f9%8GCE*-7P4*N2s7%W~NArIa%u
zE-m-%9^*!!To*^gVihYbhqY(979(9(FsCMc_F7HQH$Xk>whfdZ`f~CRQ+Cc5QzEmB
z^RZCZcbM)W(E1lhkJp67`GtfQ8PqG5?`)1vDsPUZI*f#)1n$4LTl!Q&4c#<a14-5H
zzHSMTXZ;YqPmg>2_{TbTS(YU}QQc1-o_jCtzX=AH3~jVh8n*i3G^^L6T5pMqza*Qn
zNMn04R(5H+J<31vhHIS1I=ioD^x<-q+xo?Zy&^Hp{_*}&vrC(m5+eteYznCDcm8`h
zS|xcN!VGn4ZC|zwFo&*inyNT~S+Srw*OxBy{7jfip87+0AFhjHK2#wTJ*qqgmZOgn
zEsvvI&42zts$}<mSa7w0D61s|Cmungmc-~u?K0NaDj6g4flx%2W$;LJL-Br!!P~n&
z<(n*72plEs`Fuq?74}RHt0H`ol{XW5oj)W}1k<OVQ5tA$4zo9Q2-T6@ULJTuz4Ddl
z!wcd3D^3p#FWzvWDziYkg}$5ZqGZVE<umTDbANv6S0V}v3B0v~hQ(9u)qd`l5;ykx
z&o$0WaPP1p{k>cg<<j28-qbIa=f|9&7s5q|&b&|L<&OkF97`i&TEE8Yk#j-!_+QB;
za|v7xQoC|Y=l9$G_?vfmzIMjtXBzCz_U@}kU$vy(QEHeThQj*(8fSfh&av#ivqMNj
zB4qv6h>6-$mW|a$`j^b$yv(-8vi&47M>L|RJw^)1E!89kS(uv2ts3_eF<B6M{~7J@
z`EBdM3G=Radhn<px^0ck#h|IL6+fpr5jV^KP_$M}qu@^ZnFW@NP^Yd_cc)n-*m2o?
zQ&23X*~<09mp6bH{&`0XWnPlJLsDndYS2_j=2fc^gMFna1!mybPhnHJvah6YP$@Z(
zn6g#yhBL}=r&h=v-FJtxwIr`uM-EItnZb|f6pcoj>pvYg-y8aDdwF}`dp5hRWKhwa
z-YC;!F6Z9S9m=Y7vuUF*LH_fQlDRsB19a>CpgQ$1N#ECYVWY!B>H!lT%xkpqb+Udi
zR`YS9iEqDPY0S#&$Z7gH|Bb=6`Ahj?R@T56qA_|hm^@^NfIMjec!BI$UX!z{s~B@J
zop9(NIPG4`qxyp1;?udOiX=NHPL13Y^wuWRi8j2g*63p<Fs>x?(yZpoc&+Qyw~Zep
zG4s(RT6~raw%XD@W)>>B*`nN?NYTrDBxJ?mU=C?oa@@F#)T<`k_`d(Ns!9twpIl>c
z(UJ90TiN2|IcsK9px6-?rRv@w_y`k3<4%qflYh&f-EFzNRNlBpAAMgh{!L&LYvbE?
zWT>?_`nI<K`T{*YK@u19g)^*;sJ#;>&%bX8+K9;<`dW6wY#cSl>1)L-mh_kz@uhg$
z)3Rk3)sZ1@1ew`9l)=IFP;Mi$nf=Ch=c|t}J_DQ?v5RkJpJm(KKF}LQH0pGc?QrSu
z5<AVKpE=VGbq>LP-gJ?xvBUS0+7?HYmOEn{rnS1;Akv4qv23i=>;uH5V`!p9J-Ih0
zxpamkIurIvrL2KdZ=o!vdUG4ir`Q=&iZZJgpq%~NR-pi!mv&J41y|$1N$PT1zRU!j
z^jf2L*UP`lcO_58%F=e|@CvI+WG#?$>s?F{LTWDFhjTyAxuP`Y=L_K&Ue7I4%+{CQ
z%yQF?+sP=`vC^Ih+Q*C!-?rPrz7WZ8@koFiX*oP#`8iFIMbZ5>&5j8x8ggqnnVW+1
zVeGR)^?2@LL|S*mNzhh#<pui7pUa<lxL@kxu#PjVz7zgB#^TldgBs}&^ZfFnQa$OB
ztxkUBEw;$=>Rh0kl!c0hF^w+dJWCFzSWu|f%d{`&Si3zX{LDH_MD7fFxLo4V3xAfU
zxvIYz3i#(@5B!kRP{6ZaY&Y#8pyWA_8MW}~O`q6mrcAt<<=gq_+%E!F4R55h)0Ww0
zN56LIa-lgg-o;~QKyyX^UIi1J7lJjtv6Hd;@*{X%t7)NUQo#7Ac<p=GH?*p&rDd=w
z-07s0-k@G63SFNB8ulJN(Fr%JK{-aBKHa`rBfKL}Ul9G~OsP=t|6TlgjEw`)a3r1)
zE|R=i(DAdBM6AHbzBxY@ZS}cDDoZp)#={KHbfzWBa!BH46F_Ozy{Te_GCH!SlOAMg
z`j~yArruee@b=qn-avPm4XGm>6CO5mbRtc5`5akrEJ*ufF!Cv8VQJP6${c0|H!+@9
z1%#K0k*eXC;L~tci!0?{)kjaeuT7<0b6t7s5zzS{vOSS=^NMSdU|p%S50CzJWPDCL
zC(WR6n_5=;&u<pp@o(<=n3=%&^qGwgL0Ecv7CeFW)EguL#4(#S1pVb!3f(Vu@^$tu
zGri;Ev#v3<1Prafu={E$TjVnOLg3@$vaGWlOfK=AuQAsbg>-a<!57{c_xu)yw)8kS
z^!3>x2dh(KFpb}=GgQC!Kz=vLiy~^$5MFb1^~R>BRgGy7gz4_2RJ0UeI=e-*@jq6s
zj#}1NA4xXWFySYTMF-OQB7%u9t4Gc>1K)k`GQAv1QZ9_f49?2E%*s%{VMV&FtlW>^
zM--vr`vg$yX}Y%$84sQp9!xn1r}fMwrb9J{hpH!zJRo{N?H%7EIl$Biz&eQp?w*)T
z@Ak-vg^0eN^Ig$Wx~6&~GjiJSR4021d0dxEQ<ggd%U-sYi7%Kq6vU+N&h;G<mByfF
zT77fM%N)X}H^M&KYsm=B%Cp(=)r`G;=v*EZTi32|AhN%R42!h*(h_uP==+`LB&%wD
zRgsaW*?F`j3EgrnJUvjZ)9<zAXDoVjiUgCRzpY!26dZnfzMqrR4%{wavk2aRI+WA#
zJd>}}TA{4wU4eE?v`d#@m_x_F`7vTw%&+N0fkjP$8qU#yHyc+T4m?my*$UCQU>$-v
zbMX@`1hLd><x{wG6W)|wZge>uR?-Z>P7SSy=N5C8vi^&o!tx_cU8iEl$~2Sp&vB<a
z(g+o0`5={e3Q(@PKBI;{JI<S9tzX+$L#e+FD=%i<v?adUi4=I77!PMU_x$l*FyGc#
zQUuz+$hj|ar@U@&c5RRRm^Y3n1^WG4+bjFSS089oa#|r3Kez{CIz8Uv6DQ}-dn1yd
z_DR`;pKUN@cFwj@UptOzaNCqPVQGZZYRvT1;r1jl43Hg~AMo0B`=Xe#wYeGr8C7~c
zPjaM%Q&*54uz8w;MYB0)I=0LDbaKX1%{t4ysPrmVG=~bt#Fs7QcbsoiTD)R5a>e?W
zf;Kl*%k)j>Ce^aTbCa$Dlz|#Tt89uqv!*q#m$(>iaD|-h;%@Gk6_O4Anq%0QL^fV-
z<=Yy^kpM2Z9*Y~gE{^yuYW7ukaTIaI_a5ECEla4BiM!5S<gr<^@GL_vXDO8PMyn1}
zBVwRbF;gli!EUC7v_Pj^YuR{j%pzi>Qr@66zwsi^r3+huUtu#*3hUJIuul@1Ds|CH
z+b-39r8f`{dG0*Y6>ysuJQ2rL!`C8v{7}>eNCUO64c=<Y-}Ne%Zdfv1apXDkR?#!Q
zr#?%OI^){yuc+02wL7!rS`@RRD~&bto&8|?HZp+=tw-z4BKx#MC5W%N=k2t)B{6?}
zUDN?*Z81{2>@>j{j6c!2@A=xUS>tI|Lpj;u1J}KN`N=b7I|N?T^#mLcOWRN8NPHY+
zPZz~)yLe0|!3AL}WxTtn&BDqF=@`~@=EAaipZ$|E;TK<Lz3ga%{%6jrwUQ}W-#ec3
z3Y*JS{79%w=hNX_G*q0!?TVsu&={ZXnu`*46m({e4y9>lp<glYeGv%9+w^oZp|kB9
z`C)KR&q0?`cIRGXyg=h%)_&yMifgQ_IyM-@htterbDB@}dr!riBt7x>AHIG;m|Mz)
z=ndw2774#5nVlexW+$?nH7aA=w9g-yB6D)<9G32uTRKO}_@aIwgum#uC}!O-@21MT
z*`JKkbaA9mftJi~s+<?E2UBppG9PO{Q|!>uCF`CwQSEqY2TiWtcVBo#mfOAKGE*+}
zH&U4^>Uz}OlUy$O?um3kwBcB&tc=dAzuVor#U8nr7paxzFr7JZf!THKu`Z~d6#6)h
z(I%*tDv+@&565i%SPvpoPI~{a#uP~?oDa0jU7GHX+2;DY+I0C492VtWyT<OX*q4QD
zcFnrlkr&AY98e6K?=`Zrp)6L3s2sa;dk=b)YenEChZJ}Fx|6;QtAwCu^uu|gDVHDW
z^CcD=y>lvW(^UT8wmWYDhAO4NS;mBRBt~!KXBQcc`zsa)E}p^6!=ssnV5<i`OzZXf
z`}k~;7YwMG`>nHz2&+_moKHN7TPMW*x+bqn1&-!tOQS>e?^&YR+2=cLJ$|ekkjdTh
zV`>baC;Q(DdH!$3+A1K%SrKn_M|x+tH<%3zf9pg_O9`^P#C(3LBqXHb<9zO<MvFK$
z7J0G)Cs`h1k*5D&k466fVc`FBVc_#S<Hb#<l1led>Q{R=sZ4v8BO<$fG-kV6dKu4B
z#sqR?)6LAXkM&Tv?X(<6b#5Os%X7bxEy4K=@3go$YCGatd|C<)VaJlyF&gXFwdVqZ
z<eN4fqb}d;z)VJkd_Y7Z?U)U6QL`&bN}BtJT8VxeAG}CoKVnul!$MTr53TWk?NTSG
z8ROb*O?lUC-BK<qu+<BEirn^Ae>C&CkNF5nn%HA4T2BU1&B0+^H-4f|k0F+S=4_8C
zsXamBw!TQ6s=LM|=40=uact>7#I7#sWgtw>rk^FDr>DomPp0&-7-bWkWobG7j=C;V
zpxcYP<YUt!y&IF-$j{jKo{#{HK3DwfYwTo~V%o-6)1RU#ei&M!4%9QwYSqIVXmes)
z)ApsYi=5>vooq&}LqlRbtsih0U$q_7Q=0tT*p}$^%H4iLIk_cMgd6|h#?~C{CTO!2
z7X&!u=j8k?%B)6loR+Rp4Q6?VQ5tfZ+tFCxB!aBx-#=6#@G4sCE?-7eeZ~cyYVv-C
z)9zuD&|nEc;ZC(gP2L3#Ko$W;;qRX*Eo^Y>KOSK7#lUHv@BGGhV#$xM?_79+pK|Bl
zZ;Ru$!8imo>c%U|V+|{bQ;XOSMhy>NT8-M4JotF|JZGY&rn89|HckKU&qW1*Txm6Q
zk1NBj<AVEe&05g*;KF#V@AEebzTypDxl3ebb~1mygBv^STG=uq&7Y-rHjDjga-cg_
z^(hGKHV2b`Bc+IMmM%9c`TYjsxJ6hP$_eNAhXQcnaJ!p7H5R}mzB<8F?6Jv=YJa!k
z`|0-4faIFwU?jhBGlMF_kbDkr@Rps~p3G)ch*!LxN1ZS6`<1cj=~zSAbLI#CKyaHa
zfJsyXnwdTTx9qfOH<hs)9o~^d#c^e@GE#jzUR4-tD0C(*Jg>pLhvMhr_HpRM>i_cP
zG1}qKdyF{PH2vQ{c2{BC8ASZPwUC4Qd?I6-ADU40P+c+NtguZd5fd|P-e5TP`|DT8
z;OkMZm)mUKU~25woYXhtI1XE|;8D+Qgq!<(j&Z@s-x?9=*9`eC+VI2^pZq~L=*}m7
z05ln|(+{bo&*hO<ev8Eq@3ahL*oysXtsL1JS7?O2hdt*9HmL-cE?IvOykkgiV)N&h
zFDk>l+O=>t(SbC#Ubc%+@Ep*Zv~{<A?<phn|2_Y5O-9(`=2TU!U;=uvC~8o%9*8hP
zg3eJIH3!V^=22PwhdSDUsGl>L<7~m4mJ>*SU*G7<Umv`qQ0%gN{{4}}um}e#kit9M
zm7*Syf)(oLz15u!-{!B?8~~xk^S&2a{vQ&)248L_Ej#9km9Rfz0|J69fXDGS>+)ef
z{adsQIR2!N2IQPt`K;H8td&tV+)NYQ(`>hwZN%{D{Es3q@6`(cB-DG%PBYQ}S_Ij5
z41>cixEgIOt^T~nI}-4xyrw~3XJor^mjJA2MH1X?0>QCgi%8!q-n)1I8xfH2^u%Px
z)@r10ju1TtjH>@}0&e5}mpa#TN%L?0smYwU&(G|KcE(XMT)C1mhWKj}Oua`PFNvE=
z3QS{!(~;seL<M^8=RbsxOGMC_4W?4^|0#7lF<cgTh=|r0e$p}lxmgh@rioB{ZY{mb
zyd6_eA2$!%-;tJxyf`N7BP!BlO-Do(1iB~p(OmcU=S|mPsu$gO1pr|g4l3$=@uEi9
z?~C;y?YcN~tk?~nvImp$IfBIz@$ZUZz`4UDzO(r@uvC=U$O!IpoFHB4((g}Cfe4on
zut(5I!d-Y@nMxiFO0H=IK6H`oJdK5FNW;hMXwHP+&;5A^B6o*WB>`|X4K`E!#F+25
zf?Qz$MZ)p#WsUL6285P}rRf;=?@#5#a5`;wsrd-tSE-<B3(RK=@P9AGACZu|eSYhH
z91O-9#$O(GVfBw!V!~#i89S|+h^U@|v=|<n^Zy|&e0a+XFV`vH*52^s1%B=V@;`13
zM;KH_7}3J5mn1-MC_}rS{(dQuXt*I$bEiy{A1}O97`1Cf^6yXI!h<bewOoZa{s$my
z%<{*RSwLRg?|LQRR~NDPS;D;TnBNzRV0RV%nRX30fQ|u<>cY`k1pmo|9i^)QG;oR+
zZbElXUht1WEx`_A<(k<Y37F4&@Np5M&*ArD#j!|%-3U@EIe^ne!P|w~%%uE_1z@_&
z%Jh4-kApmDuKcj(Bl_p)h+qCK*2g~~!Jd`Q=m=`uPXx24jQh^=_Za=oU{#Omyhad=
zLJ(y9rE2|;8NGrFSCGD5`flU(YweY!PGn+YAdul+W41M4pusIEDG7{-kTo)TX~o{8
zN-H5gGrzdFSX@_MUr!q?#Lx2Y5c?_NE)6s9J|{9xUB2}OQ(7D4=XusH6N7)^#AEa#
z-M(Kr^WWcn{1e_KD}iCv_1_(ftRH)-6*V_^M*V%r`+b4ws!_@b)m4%0THW8<X0(!a
zU7ZN~@#6uiD}e+;`VxSWzIqF(l7SE1&HZ)C0f2^NQBhPp4V)3@AVN=1@3r>0Mmp<f
zny@22bh0f^Yz5wnE5I7z1+~bR@4obF{TU57xkmNDXFzd)WUGH81<l9>pg7`Vt*foV
z?Nsyz)T4g`GLPpNt5t;=7VjUx$fbJvLnyU>fB#)i$_(Mg6Q?MQI-*&$psUb#&w|@-
zz`H>{3Y-lyNzh+*W(0yzCIBY=1%2lknDXcAFE`G!wC^2FKP-TReerEP2mm8I5h1_{
zqQMC*Li~Xv<+WTCBKgxb2_I=EF=%q`^=tZh4zwB1`_T0G`1H4L8Ml=I_w^V^Dkvla
zPTxusalcyLJb}L9fRY2~=QMQO9aeA1I47*?19;@;fL(kA#qRPRhTfa^WQ8vEV8F3k
zcl=!DqM-;-=)Vs273@j3LV8tvAG|(DM@a$;0%VR9yncR^75wP*6qB4ESU-E!j5@q`
zlC?@>f_bs3zOJr&BF<n(iDMPI`o0F_><>sR<_Rb-`f#K?<gLzbUxkLj>qBaHaz8rB
z+PF#Ylz&Q1{CDdV0sPmfiNt(0v3({6kg_;RDT30#D%+#1I|0ZD)7=#yV~G;=bU7hB
z8+#{e60rWnhyj37tCBJABxPL+32b+og--E0VP`rYwE%*kH2DcMEwIM+W>+eYCsn1k
zX>ePAO>-+YAE<XMQJPsiz)LF3)(&CyW?<3j#us(hi7JKsa-I|48&6OocKRJ4Oo&&X
z1rJ_xUk@dOK;S~gz3F3yETdU_$YW@R3%{OOYbFt!0@SPupK6WMLjh4enG7`pL^3ex
zscZuvL!Wo=dx~v;hAfGI=&#+)#k@igy|k8=R$i)RqH^LD98fk*wH&B|Cl;U;xpqx!
zoQa1E<!Ntk|EbEJ=_Dn0=;8jV?}7z~W|8{fqc=)Cb!)h1E?#>nv#m6k)|YxCd%@w?
z+I;~X4&H|!U!UMJ>on!HReQz~FJLv|Rd4|9#x-md3Lojr0OcwhL=I0vL)W`Dh0uLD
zxq+LhyV7=2W0}+?SE(@m<|P{*&7zB$Bq7)2Ig<sf;?MI;Ga0_u+THS8tp}{b;>|6z
zcswrfd>L*U=E{QDAj<0aLez7s<YC`{MXl5PYb`=a$H6qNa$O&6^}B$x9`}6)=X7Xu
zW_e?F)G>YCn);v3`x@3_*Ur=q@4>ss))?)&x3wJi$s0Hg<kO-Jb!LSudS6%umwNe8
zzKg?{j%NqH2@I55`C{8xI1nfjToo)1SXk<6hgsDpl+@HgY-(8y?CkP|ViZ9F9mw`p
zU<-*m7VOpTG&3}E&>3r?wu?e@Y%6$5`AH>%`Gcpu%K4sgCm2I_!$onJ7f4a$jijC^
zxd&bYnskZVsY~G;gUx2_71atptTrAi)pOA(&5^*)%V}#A_bH;{*~a(;zI<g-Yh)>m
z9nMi<YE`eQ|1}emSH!v#x2l<Q_f|(Lii0=SBls#nCUqBDTY%)JFw+oCx<fu4lRz9j
zitVe8=RQC1Cb%n*lu12lK-tm#5;8yZd+<wnU9;tU!py*ja&mHc+g3r%*e>=jq>f1&
zOA|hetFw;GXfzcqlxzG+;+T-zcQ04<1dDuZ6Oa#uh#g|ip(puL4Y=nG`4-rC%uv!z
zFo)^~XjK_*HZ=ogIoizCiCR|@RV1{izqOnAum@RJ73Cpk5N%*=r0fH8W<Y}_6Dmv>
z3M#EL;_Z<G>jOhL8}|2h0Dm8XJyOc?Bh4>kR#T=#s!1wPDqhM%N^W-GuWW6N{kH%b
z74lp%`}3rvA7gu-D`sRO1kE~QRZ8!Z5o>4c!6uYjD*e=qrsB2_OPIi5^s^-Ecr&t5
zz-{C^cdHtdTMnxyX(A2oy=GK%Hd8B%-Ra87iuK)I(Fc}F$)KL~Zy$KQcb$~HWtbIY
zmJ}hjssL<MY7(3Y*6qyhT!sx_msI$%bV90Zh6pW(q)bc9uXT3Yptw4z$U{LTR|*%t
zWF9Y1z`(4WyG_1cXi#4hZKxe_k&f;l(XETHj<5uyD0qWE!PSyD!L9M<fI@_lr=V-&
z$hAPP6oZrPLx$ALD}&h)(RZt#La**x)v#yP5Bt!3;*h&}t7{;U`66Jv($)|U4=*Qd
z0A-Oic|6FoGZ)oiAy%MYO9AXG?<2$x60hDsyus*LR?1b^IXxW63`({!viyeaWxh+4
zEo!wJXM#8~to}CVYA#JZ*daews*4PKhwY7?)!(T;2j%%Apl%o`Q%DA7DWseujZ&tL
zik<)Ha9bydj5DK^&*7o%j(+8%)J{(*(QJPq?5NuPh7PW;qanU}X%z7i5zlv>Q_T>_
zy7KS}fc6v!%w&_jKfJ)OPSUyoJ%fDCTz682OtP8*A1+6!C=4}|9;gV^!n{&44e*xZ
zG!0akeq>oeOrJ``t(fK=mqDG|z;+CBC+?7oDTyuHy6Zt%sC8#fUUgkz++?hC8Oxb0
z!IT9S=SWI6G)u<&@7#{w%6RGqPI7uuf@T4aQ6<ic8UoV#c$(%{+5_u22_jWo-R<uu
zZ>7~;jKQ3goma~ubyiD;gjw!5@dZJ3Z}#e`uG1&yUC~Iz*12}%3jJ^a!#|tLk2Elx
zwi%62q%t<Lb|P@lj}1)~Q62G7h8+qrqp0JJ?LmO((&4eR-679SU~5MF>X2PGHPtlL
z%JNEyF;v@=4m9N|Q+yWp@gcPUIg!9}q)kx*q@o?IW9FDPNvZdtU@4}7Yw*6=(_$ch
zTQDS5r!Gu56x3(Ee}`?j16GHhQbritBDX3J)64JZ3Jo8%k)HT#i7|#rw;A>=*JoOj
zXGgDIXu7I!OQzLqxx9tf_W+aUD|eVcs;5oQ#q<tcDk<|;%>ZVY)Uq0CE81hdsb9r$
zU=?SZ($MRbj2EbR@=po58SadAKI5J!!voqQf2EiJEGKL({5hFj7DisuG#D8w*pW~$
z7dxLFFIXq<-iuURjr*vleaY(14msO}D@K0O6KvU#gPbtkKnw;bM+aFeGiSu`jgP*-
zbY8zpT}aNF6=gt8O<7IZasF16z?<+$L-!qql$#yxWsp>z=aaasmeZBMW2<pjy59^n
zepTSJldZ!}9NTnh{N_fV*qyjHOsLWLYo8Q+lm@e5t_y79eVCb6`OrKG-KQhV>ky56
zoje)y9iyR_2+X1lg}>;?*t|LHGS}p@8GC`Td|0XYIDxH~kIT@&qaEKI(aPtYindzU
zOd~pSZ?0l(dA}CsA{JOIaTqgy#2`z`wT|b$qz1x=nt0-ZRD3p9SQalx?<%UfsnI#Z
z>mlO6c&kY^w&dA~%dIONM24Qxtx@en7Ffk^HguS^y=@!5N<0D*2rOPQ;u^do4H-tR
zYSRqPD7qvXj%C-Un9i(we6!k_?aULUH??G_lomC5i1LOsN2Y?OIzL-FqB)q7ONru9
zW}jPiGYw=~mDFC*GB#GUyfZ*^l$}Mf1cZJKsq4g7=UW~d+BVBO1gzkdM_;d%GgSu`
zD89SXz?kG^_$%}4x*j?{HRoCO5*c@jRmUlYL4|W%%kei^7Q@UmAC6sCc3~4rEe9m-
z&kKF&P@eL)jsF0uriUeh9_q<AeMED-W_Dnq|7n(~;$kAGlpp6Dg}8<0ACu}}e!?eu
z6^hsQ9TY<eV?!M}%vI5jV_NMnzzk!)XbAJrbKH;FEGSh&R)g}%(?liu6OgdyMy0w-
zYoN%YVmD(Ejfg$ycC9wpU?$0;EygiuVkTsEn@23+n<L8bVK$Sj&n(N6(94~d-zeJ0
z=Z^5jX@`Fy4NBk;xD`jLM2!=#BBfo+Rjbt%YdiQ^eQaDSt5RWt&sjH9Q&5vn>A5n8
ztz(_qmo@g;w-$x=XU7&N8ynd>HFn}?+mvcW(?#6L8uzzSuJhfN*^zd5(~F)g<xOeP
z7A%TRxZsfflVtFxy)dqPVpsU5d`+YV&l6Ln;)j(MW3xI2p>jSBUh#J14d_zo16@I#
znB0%?yXghnaZ24Nx@yx@WQ+2G#E$bkQ{uCKWa{?;RLn#1vS=k^c&y=Ahn9?hnMWDM
zR@hM^%eFI0AgjE@p{=0c#`*har(@?U5EZOD<kaNracu7)<Eol*4`I{Mw?l3pry@8Q
z_c=iGVc|}kQZ;H0!OrKb?Q)F5p4T-=#yrNBf%&4Dsd9J>sdf~TyD4hKskf*dsrZ<j
z0>M+d_%<}HVWam(9wGI5e0dfcR~)7FRNa)foTJ{#hsAI#^R|rdET=&dnrnuOhgH!^
zK`q8m_>7z}tBt_;rHm*(d7JBVnpwY2l8~B^atd$Bp|tS20SV1=W-HQg%vo<Yqi!v7
ztHb>xdZzhmY5o0Xe1SY}9%_^vqdO6US`SNB)<-Y*cZ`d{vu5F;Kjg1ZC#d>7OSs6w
z!Xs(A`?i22A<DB_Q)coFiMC~>Vn@OO%O<aWDV@H{<ago5v^_72u76f*Qxdqiy_r3w
z)`%&&9cT3kL4l!gq2=*f%`jB5>F025ehxA_!pBdaKK&407ri--c90oy>$1jM*;+cg
z)E)be4}KbA7*;uUwH79{EIAJGxfy5|@`q8txVf*nT#+JR1!u{y%D_~fV5wdoGeb&j
zhXd}IpOQ?TS&NR$irI4U(zWFrA5OE5qT#xuT|^`ujM5F0r7bR}*q0u!F`eqxNfn%G
zZf1jA!j*B9L{-Nh15cr3AzS=r2cH0-Xr5TMXlCg+_8@OaMLPfE%i`>V=e-~%6M%#i
z%bLF5vC~+Gf9*`u_kT4h6(Bjwc+q3m!u^<LNA1hady5!~Z6|NG>!^&)CMH;rUa=9k
zSk<uqTvi{%C-#py(9Z+Ax{pb7^1f`73R<BlP1dL{O|%N?|MM(JJPks|UjOSu{~Hk6
zhCRSJU5mORE7q_%U}7Oa4S=2Izo2?DU=<hd@4&4Q12&3u;){L?yF~>Wfy0Fd8OHDs
zN}jy1XtyXo3f$7P#9C1+##U^U1p6~}0F$>uFIfRz+mskCa6JZu9w;YY4N^RkBKAL1
zyl`I^LKu4ljAAqKZYkqGH-LLU3O?4bmdOkoYbt`Eu`{`Qyu$Br-7Zvq4Eu&^Ny61(
z@HCoRy#IO)u(?q|G-=fDmPO3%CN+p*hzgWiQ~nMJWx@;!?VnCM4$(Db5VJEGp~fE}
zNhdjoay7p!mWL--V54hoVXptg7hvb~BZVu%&YD$3;hn|?lOn~-lKvPqFi#+f``u2#
z7jMFoH>oK{f`12ueqy0-&o39Bfh+zAm;;RP=yEz>HCT+gZ<Zm~lnC$C7J5Th>G!8P
z@Zj>h#x(H8{}h*&L?2zsJVgfbN-zTOf;etCHt3Ic@B6=AI{YdP8=a!IH{}4qAA?eJ
z;y-6X6#{N%lVBUHQNc|jh*+RH^ytnmAnEcOs^$=Dnqr5KHKQ!5k3M96=?lNw{}3nu
ziY@o7X*gfg9{6}N)42^BE&}dZI{l`1bi*7#eWO_vq8WeBL8RY15D)PL&L=N2J6BvV
z)>=AZE~(kH0rdtLnP*&WSr`9EtuRL_1G|V5E@~MbUBpssJ;vt+9CGFyA79=&{&Qit
ztDM|){`aWFace*X!_rh*L}Uz)f1b_JuNCArsQV>FfoKB=lG2@#x<A62a!ryTT2bam
zYkVTAB<S^e>Iy`(KAa6ayS+Ht*RJ)+<)7t+wVocE!ptmTCy?&eOK2Qa<-TFAsw)+E
zijsSY)W(GAPq_|2yfnopp!+e96A<8;cPBok_r<6TOG#ZTcW%0#!=C^5a`|V`;DRJ6
z#Vw-X)?1g(l9N9Jf=U7?aXw4>eO+eDqWj`e1W2wa6@)h~>``!o$c^D~Ric!XXsE1{
zDeh~gZ=~mv{1SiQwa8pT2(sdo@CF4&D2eM?BgkeR|N14x+;$1UNP4aK@EEj|gR1n-
zrmhQXTU$>mNv6JiJ3%kT^DB?X_SXrwcYTu=4jvzRpKxco(oTT$F5)2Nj|CLzHwWv&
z!$5HiLDtWOLpr=y`OQvDgo1S1KCtTC^T1krH#;FZl9g6QK7@hy4uLz9xark)0-whd
z^6K8l>*A_{n}XYSe|a>2nTz@wj0#@DH8?C6B@GG^H^z-*Fus@`_^awL;d>QTlm+Pm
zyb22g>q686S<ygEGkmk3w*tMM?IM_m2&nn?@{pn0m`JvK9t`f~_L^nW0;=CCmI=$V
zs|XulJ&Rk3Y@ON~rg~}^gVcIz81ZgArDTl+eXEVL0Wzx3BNZD6J5kS#c7!rWp72!}
za7srrt$xTwK1Z+CUlXaj^n@;C53w{|LfEtNyjyrbN+6rQ6or-@`5MvS<)wsPKAU-T
z;$WwrffiI`-tPl8pif5{ncJb+Cp-PQ`m){)`mhU>8N&~(<G&sd_K^DZzkq*MA{58=
zMRCXcYwfE%nP^;XTXK+V`5!G?uFIw!Lv`{PK5oCqARcyc<9FN^qP1A_{pL)QPNd%#
z(kf>be9CaKmS^9hs=V0Av>gc;JR8JO?UYZK+>a9r320?$P1aEb!J~&7NBd+LWgCv6
zIB1^MNk1XgxYECD+<o%MB#iXCgq>N{t78;oc2{t;(>*N>p|a1{?}m_ZROJK0_a~oE
z<-UJ+hI>r3tkItFq~;y5rZ?Y6hIUaBJWpTtpQ4WB>gJbe8s}#(fZWADL0J`0)-W3P
ziWIy828N8f=M|I9QTp4FhhwMRc6UPCwHexjFDCwkocN0yZY0@T6S9wI-d$doVMVbi
z1}stkUe!~autNSSm75B{Gj;vmIQr~cI`Onyku}A6U#7Cp_DPz%gfBihQp#35=OE(N
zXUYG3Ec8Fr)&xdE+m%oq<M)LuGBxQ?&*`|&<C|_f^%nzln-@cymJc^5xM@B8PHs9d
z*pi0h-tZUhI&zt=aD&Y4P5R<v#PjfGjL(&L&6uHlSgBeK`y^D)Fx#y^z}qUQ57Jpq
zm#E0DqWJVUC+W8ge(!cyWR2z)bfL55^8tDkg5lEHYYmHMRwhGb;<8Z7cwaf6{Ciw9
zy#zJFCl*Oj!hJ}pCAv?;_x*HJ8PbCF@};v?uKf6(qm$yP%I+`j(u5q1kpag98}R2}
zu@)f8ylr$)e5yTijKtO_F(vbb_!MTEaOq(A`e&cL&~x%8q`xI=qzIBVX-h3+eSDP&
zBZ5XCR$5+pJjbkU&aL!@uW<YJHiqg_X7q3QLbA8a{NU)yHXxD^c(gR;KqeR0gp5Hp
zedxR3AZRm2;>cv8AvWM}>~YIeDQkTP;)w=>Dtm6SLVU4hKh;E*e{}<Upn}O<N)Ngs
za!VXlc{lg&SCWX<2I9&g>PB>Hmk(a=Qe%X-@7MSIJf>15qx>d4s8g|qd(&phM}uHq
z!-Klq8z+b&#q-b{09O97TB7f2w}U!<o-37F+K_lQsd4@XfA&HB^KZ4Q49r6$tp=y0
zi=Mm-3=hr5Wcjn%9%UmKz__{GsB@W@n20LW?;%PPHR?28bMaXj!OFL<?+8)t^nsW@
zGpU0)zNW4&BRlH{D{+G)j43*)Vovn#Ar?)8Ec}Rq{=>32n9s-JD*ssk*q4lh+iKvN
zIFLcs#1U;}>8fZd!ky}tBl86L!Xwby9hY=V@rncaX^D6}9GNDQk}xP+t@2<c)9e#b
z4|PDDYEgS8xuzavR375e3B_roZ0CO89MY0^eFnoKsj>UAoPtkGr64Oi&S;<XOxuZ_
z;3qFMIrv^Iv+bM|A1OYDXZCYbM7%?X`Sc^hp!drhAFG_%PF8YDH;a9`AdJRmoA1$P
zAbgiJU=9Y^5AOGEM~2T2NIPg=dTb;hCw>R-ZKrMLb=ips$zQ_{pjqgxU*!+J*3<dt
z5ZDv;Ok?-6D7KWw&%!zle5ULEl@WX0N%0@`#*Qr=aP0)Y8`tHna%G~JxGDai_n2wN
zd_;Hk@u44;<+UbiKAS^(+LTA9?H$mr&7t;EedfqBxD}nqY6KJ03>}e)8z<t?2gHmr
zC*9YeX8+m}IYmDj^0<%FFGFt<c29*godqn)9s|QCf`eNVYa~A(`@XCk7a9u=xh-z(
zglme@K=JQRXCQM3)&qkBpAYGp9!%E*{Q*l3ah?PmDG$34E7YaCjA|M?2D=fDUt_W|
z%Fr4|gCgt4+J~g)W1{?`2tTR$49;xHU6H=_E!IwE|I$muE8ZwX*1G&%B}4m?pta2n
zRg1HGzua;+?Ca}scx-vf*W*WhYi(H&r07E;B5`9c#LTyIQD+n2#FJ`iWz8YA5IdBW
zSf;%EWM8gSx(cZKNY7&A-uv^3kfEnS8`jV+dL&SN9?s<m+!p^X`*WcFd$>fAP3Q2S
zrefqZrF~e0H**O(?reHs%hMLw=#<1?mv8LMiIGy=-`v_#E#)x0*?{OZfZ>e{a}UQF
zT6y9Q{&5Cug0hbAf1_odTF3jz-k5eW4ZWBfWmbD++x*1VC}%$?=4O6*4t9<?{3qx|
zy$Q~v6!jnG>qFR&u2=fs^-1kYd2^&qR54(nC)!)&gT<SlaT9Dcl`C50M=unCo%m1v
zKm%iR11B8wxwkj|23upr-EX457rhZJY{$-ZiOG1Uv9|5kD{BAUBY@y;D{>Mdhej=d
zxALI%I6bPHIzT>dLbf@eaB6WezrlCu6C+?&vVX1A5<zuiVpfin%*~eWq;tg&2Unpu
zl<xg|^h<}?g<osUZMSzUJLP19fSKwf{%<JER0+t8?t+eBD4z8tqdPm$Zio{4_ywDM
zI0N;2!VkXwItK(w8uwO)a=%es&AEh6LiY@c&`e9-Lvh=W@mkt&I?+t2>z-SB*z#9M
z=RK~REDDAE)QB;iha$+wL|yCBk$nEh@Dt!}#ZFc``bq+S+r7fw?bTqY&oFua)IHhC
zeYR5?*uU=0YZhod1<O;p!y$=pHX03lW63nc$~)dY&mPDnCMFUBV-PW0;T?&c@-*tz
zf0YUzW(E7oUyxp-W4PhblgP#ue}x3@%RA7<p4aEs^hden0_{eGW>NOgp+SW;4wT_N
zuXSCW+L<$K3pyv;U!*StR59dGL?t01fy=6_x2AP|W@e_p_&zzd^b&do`TkXoy!7b_
zHGjM)YdZi*E&%UFZUZH$JxL5}*!z(}-<<gY>5!YxM+(KH0Yp30_nd@2u*~AlE1mMs
ziToBPO>3N?O0L9d-iuSK_{_n<0eh;PeGDu8ORpXF2VW}kmFj`1;+){${11OCj|Nyc
zI?qCaZZ}ckEwMEMHJEILweZ?%;|)Om%>5X)I@=lNNeRhJ@I|_=_#WK3jU|0!KYd|~
zoJQ!8Kg?Pvo0<&M_3JdJC<>&3p2TvLJK}Ts1)xq_xq~Mm6UU=>?KoRsG^VY!^|fy0
z2&BO!Hy7yNjgFE4a}Zg!GHo;P#|+Drw<81XUrDs~=iRfAl9gq=lZFBM3;f;9y))2s
zD?G3HV+H}R%y0n-3@1x}g<h<oZqE8&<Z6FUQE@n|q3Z^UO1J?Gpm9oP3ZS%iZT^0d
zf8eoWr+?g*x}~1&O-e<T*--B|M{$~}pX;^{q^QIE{KT=PPMaHCx`N^|A<~@K>Wc3@
zR<E5GxdFvKcFvwk*r8ay(P^GF1YP@R9GQ51lK|>CIG#Lta?hZog!jQfW)01Fp<!cn
z^d#^LnLypz?ZFBbeWby1B%9jh-R(nzaUq+}B%kWs>pQzf;JD%to{;_RQGN!sDlapZ
zC%bYQ$Fp-)Xc7|$R7ErB*;9!^tCz|*HWGOU<4{?#lUYhuKmjAv|E7yFc(WtsLw~NC
z2}`p(8YNj~E&ybdvR)Yb{7c4v6~aOa74qrz(mQc)Q-ltSWV~Vw`4D>f8pomkrdr1;
z)IVRb`8@Mspg@0*qYX+Wfr5}$LW1ulFcqm(SW63-eg2;YYKtaDLd%TDTgzfH+bhS{
zCOiewBeCW9GZXbUsKgFF252?zS4ahYhdRVaMoHG8B7Nn;cLE*uXutNHbpRvqnsrKq
zK-9F-K{2{|{FT{L;j3322a+f0eP8TreT6dhmm`#}3q!I>j*liA7~8^9{NFzd`SMi^
zSY|T5{m!uc%X7fWjPvcRT`WdMTKcs{p~Hjzm(O%$QyfsP+bcFdCJ*_cy&(l44cd^6
z+)`mQo6$1&Zn!=QUR%?Jb;D9uR~Lhen2b@%M%7J1>EnGUEBaJo;+S{ApkrO$L23ZA
zW2H9!ZtU&!J^7tD2{V)ybG>Ot0#JKeDyBDF)lc<Dr*M1KCH-rx`r%!LWo7xkMT*&o
z6T4aX9*&=&9*-l=Ul@A$6T;xa4JK`;@7<8-x;n>aHX$)q8X*`T-Q_d{3^@BL<(7q+
zs}WEQL&HKRXw3)J|7PVuBk!N_m$<FduWk7-cSM^*rL$$_+-#R`+pqQJp&I?cwtLYs
zsIPD7gt>OeA095`)3qn@3r)CZ=OqWHriGwW=sf;(^50^I_pL>p+2}HM$5c-B$bZ-Z
zr9RD|cGJtt&u+)~Kv8=9C6wz(!R>2qYld})2hW2m3r=yXX1Waos5xY+^rhL@FMYbD
zp0h>=wegU3_5HRsy`Q5|cm}8mRL5Q65OJCxxVX6>Ao=X%y<*2XL;1j2H$;?S)Ym&=
zHbpKk4t`{0$s0>QFx$wVs3*u58Hk1q`TlO)+SDyPR^=DzP9o8TslX8OzgjLx@eMRu
z^#d(rlk*>Ze0|*E`)M^!HD%9<a;yC4-dfk@7_BbqI)}IY7i?oSCu0OEAsN{m&$Va|
z3q)XF`0`~!wJgjNPVG|G@k?{-+y){9q@>%!ALXg}1B`o;&2G<O0-jH{wFEWCQFUq8
z!`7QGRi&8ZKryRH25}Y&KmvFeOLFR7Cz>6lu&jIjd;SZssI72HwHQA}!cUSuHaIL~
zCz)9b=g95%XF_@*bErA+QG#s)iZd&9E-iig7BZLat4#!?5{<A}X`P{xUVMZ;fo($&
z64D7P#^6EAw4Xk*IOP7p>nj4xmT8K8Cr$tunz|)-&~d|Z>u(h1N`QX?Ti2>|UwOF(
zYU<%EBG6Xz`m3PwE57rhpuOA_D%41LMK1h)v`J*CMPJg2!~gUQ+C)mO<S<yd4A-xp
zyno+)_@fLI_`Fys>h2BiwMZ|&Yvun1Dh7+bg+aa^y|{F_ROIN0X2SYmybIR+L|P0m
zUI9xZ0M>?yd^rl}O<{d5ive?O<|$UN9eRn+&yH5tY191c8vTJr8(@Zb-XnrnnCl5b
z3P<3sE39Au@qX*o09#5^)*EX`85Of18Ax<kaB;~ivu3#_7u<_G<*BJ$P_u3Gs?GBc
zp&sE3sepNMwEBM5fA<LSWb7E18Nk>>n?F2yNE?hy!2Ii&CA{rI4hh6k&!5(K_ODVj
zVf}4U!MD~_P+br89~EU~v&x-Ft|2#)f8X*Lk(Mk(QE7RySaBr56TB|e{og)94r?@4
z(?0!gZOvFi+D(v2_>*^eN8XVeC;a9^Ks+%M&KDw}2X=9(_<DP%{29&aA0XI8i2oIS
zYaL=XBE*`r0ZISuBmUoE&cYa7&H~A>d)d_^h5POJ!z>ID{IBfO;zwJ~PXkW8w9CEw
zg?~CVnMkCL{(1f4qn9#YuJmkQ`HwKIGu9VaH?}0Zg7<iHqv)Mb6J?)fyLsSNO-&SK
zxXjeAUb~^yw7%hc_jiu8g84tJVY@jGPKt>7Kt2Hn#)7-W%hc=2A$Z`qwFYP>+<oP9
z<~%2<F|X(!{_HWLPgu0|1)&LPT)I0`OVb3Ot{h&MkL?OZAnkU`m~egOTaGX3xYL^D
znYV7(MwKv8@!vU;0}z}-K!FDoAX9#-t@Xi@miGk}60*B9g`Dm8A~Tw{esDvKZK~g}
zk};zJ*_cj#N9+Tsp{Jc0-S6>7QeZtQ0`S|VaX%b1MC|*HOB>_fU3;VG_Pte+V)g!$
zOGNE*t4ZG5cB|&6<r}E;WIVn1>jM7>^~*I8f#j6^x)E|sYJ@Lt{yKK)AgC#KpA>2_
z2+`+z{OVdIrj{iiD{!?z5UpkQ9Esh59G(M;SUgIV`fjGWj65<MqgqURG^j^dxUS$U
zW9i%CVt5drnZ|s4zOeW{Nbc$PHQ8RwEr_EL3Zn_$fB90y=e6i?z9>DUC2S)K8$@tF
zNx<m4ZiM2Y2ra1I-#jNZ@(0xPA7p!3HM5Z|ZV4|@l>_TZ%|5B=&aumSjc3EALT|Ry
zPy}{Y<3<W6i1E#x_5*LaKE&mr57By1l_MAa^yq8*IghLFky78b${(P*F9d$QLx5PC
z7e{eZ<!G(_8MU+%g|)b7(nJ&KCrcU_q0my;#aqS3=O%z(Sro|7#=tE|npC>OXrke3
z+CCF9byp0kL7q=3HY_MaXb$<iiPTYM3ubvhFJrzLTKgOL+mBi$P(ljIzNW+*vK3zs
z+F1{8lKc5F^hJA#wmozH<H@?PzW>?z3RoFtE)Te^x?Eft4{zOHyC&5>I(|*0>O%Gi
zF@f3IEu%mZ+$Y{=&jJ8Md<Pxi3vFp|>!uf>jmp~kW<UfIv-oCZ1C0;%)70jq^<s;Z
zxHk(k;#R{98KWw<p*=6r0*4?gX^F~uV34x7qB^cI;v4Dy6*@#6nq~4-2g*P4uAyo7
zQb?$_cNA|3J)ha;Ju>>_@jgrZI>-`0@c8eaChW2+GU<zk5b^<2WaKd^2)1MfT1bh!
zrekGTxG(aW>D*94^kB|kfqrhmTVaS4Qw<p-Gs=aQ7Acmx1vGA*bU^(plvab6bo_qZ
zkc=8)05QZzt}A0XtXV9C>rdZNDd%Q~Pal`|9}!XuYyw;6;@HXG>**hd@bOg;;+W{~
zG5$MP5dhMRev|CRkN!K}`^VpK0m1*X7hvcoS?+h;TUrV}27hi#D@zqg7(MyF0EE<-
AP5=M^

literal 0
HcmV?d00001

diff --git a/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py b/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py
new file mode 100644
index 000000000..87285a891
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py
@@ -0,0 +1,179 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Command Line utility for backfilling gcs_ocn_bq_ingest cloud function when
+ordering of incrementals is required
+"""
+import argparse
+import concurrent.futures
+import logging
+import os
+import pprint
+import sys
+from typing import Dict
+from typing import Iterator
+from typing import List
+
+import google.api_core.client_info
+from google.cloud import storage
+
+import gcs_ocn_bq_ingest.main  # pylint: disable=import-error
+
+CLIENT_INFO = google.api_core.client_info.ClientInfo(
+    user_agent="google-pso-tool/bq-severless-loader-cli")
+
+os.environ["FUNCTION_NAME"] = "backfill-cli"
+
+
+def find_blobs_with_suffix(
+    gcs_client: storage.Client,
+    prefix: str,
+    suffix: str = "_SUCCESS",
+) -> Iterator[storage.Blob]:
+    """
+    Find GCS blobs with a given suffix.
+
+    :param gcs_client:  storage.Client
+    :param prefix: A GCS prefix to search i.e. gs://bucket/prefix/to/search
+    :param suffix: A suffix in blob name to match
+    :return:  Iterable of blobs matching the suffix.
+    """
+    prefix_blob: storage.Blob = storage.Blob.from_string(prefix)
+    # filter passes on scalability / laziness advantages of iterator.
+    return filter(
+        lambda blob: blob.name.endswith(suffix),
+        prefix_blob.bucket.list_blobs(client=gcs_client,
+                                      prefix=prefix_blob.name))
+
+
+def main(args: argparse.Namespace):
+    """main entry point for backfill CLI."""
+    gcs_client: storage.Client = storage.Client(client_info=CLIENT_INFO)
+    pubsub_client = None
+    suffix = args.success_filename
+    if args.destination_regex:
+        os.environ["DESTINATION_REGEX"] = args.destination_regex
+    if args.mode == "NOTIFICATIONS":
+        if not args.pubsub_topic:
+            raise ValueError("when passing mode=NOTIFICATIONS"
+                             "you must also pass pubsub_topic.")
+        # import is here because this utility can be used without
+        # google-cloud-pubsub dependency in LOCAL mode.
+        # pylint: disable=import-outside-toplevel
+        from google.cloud import pubsub
+        pubsub_client = pubsub.PublisherClient()
+
+    # These are all I/O bound tasks so use Thread Pool concurrency for speed.
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future_to_gsurl = {}
+        for blob in find_blobs_with_suffix(gcs_client, args.gcs_path, suffix):
+            if pubsub_client:
+                # kwargs are message attributes
+                # https://googleapis.dev/python/pubsub/latest/publisher/index.html#publish-a-message
+                logging.info("sending pubsub message for: %s",
+                             f"gs://{blob.bucket.name}/{blob.name}")
+                future_to_gsurl[executor.submit(
+                    pubsub_client.publish,
+                    args.pubsub_topic,
+                    b'',  # cloud function ignores message body
+                    bucketId=blob.bucket.name,
+                    objectId=blob.name,
+                    _metaInfo="this message was submitted with "
+                    "gcs_ocn_bq_ingest backfill.py utility"
+                )] = f"gs://{blob.bucket.name}/{blob.name}"
+            else:
+                logging.info("running  cloud function locally for: %s",
+                             f"gs://{blob.bucket.name}/{blob.name}")
+                future_to_gsurl[executor.submit(
+                    gcs_ocn_bq_ingest.main.main,
+                    {
+                        "attributes": {
+                            "bucketId": blob.bucket.name,
+                            "objectId": blob.name
+                        }
+                    },
+                    None,
+                )] = f"gs://{blob.bucket.name}/{blob.name}"
+        exceptions: Dict[str, Exception] = dict()
+        for future in concurrent.futures.as_completed(future_to_gsurl):
+            gsurl = future_to_gsurl[future]
+            try:
+                future.result()
+            except Exception as err:  # pylint: disable=broad-except
+                logging.error("Error processing %s: %s", gsurl, err)
+                exceptions[gsurl] = err
+        if exceptions:
+            raise RuntimeError("The following errors were encountered:\n" +
+                               pprint.pformat(exceptions))
+
+
+def parse_args(args: List[str]) -> argparse.Namespace:
+    """argument parser for backfill CLI"""
+    parser = argparse.ArgumentParser(
+        description="utility to backfill success file notifications "
+        "or run the cloud function locally in concurrent threads.")
+
+    parser.add_argument(
+        "--gcs-path",
+        "-p",
+        help="GCS path (e.g. gs://bucket/prefix/to/search/)to search for "
+        "existing _SUCCESS files",
+        required=True,
+    )
+
+    parser.add_argument(
+        "--mode",
+        "-m",
+        help="How to perform the backfill: LOCAL run cloud function main"
+        " method locally (in concurrent threads) or NOTIFICATIONS just push"
+        " notifications to Pub/Sub for a deployed version of the cloud function"
+        " to pick up. Default is NOTIFICATIONS.",
+        required=False,
+        type=str.upper,
+        choices=["LOCAL", "NOTIFICATIONS"],
+        default="NOTIFICATIONS",
+    )
+
+    parser.add_argument(
+        "--pubsub-topic",
+        "--topic",
+        "-t",
+        help="Pub/Sub notifications topic to post notifications for. "
+        "i.e. projects/{PROJECT_ID}/topics/{TOPIC_ID} "
+        "Required if using NOTIFICATIONS mode.",
+        required=False,
+        default=None,
+    )
+
+    parser.add_argument(
+        "--success-filename",
+        "-f",
+        help="Override the default success filename '_SUCCESS'",
+        required=False,
+        default="_SUCCESS",
+    )
+
+    parser.add_argument(
+        "--destination-regex",
+        "-r",
+        help="Override the default destination regex for determining BigQuery"
+        "destination based on information encoded in the GCS path of the"
+        "success file",
+        required=False,
+        default=None,
+    )
+    return parser.parse_args(args)
+
+
+if __name__ == "__main__":
+    main(parse_args(sys.argv))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 46fe1d9ef..0f6bfbca2 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -19,7 +19,9 @@
 from typing import List
 
 import pytest
-from google.cloud import bigquery, error_reporting, storage
+from google.cloud import bigquery
+from google.cloud import error_reporting
+from google.cloud import storage
 
 import gcs_ocn_bq_ingest.ordering
 import gcs_ocn_bq_ingest.utils
@@ -470,3 +472,53 @@ def teardown():
 
     request.addfinalizer(teardown)
     return backfill_blob
+
+
+@pytest.mark.usefixtures("bq", "gcs_bucket", "dest_dataset",
+                         "dest_partitioned_table")
+@pytest.fixture
+def gcs_external_partitioned_config(
+        request, bq, gcs_bucket, dest_dataset,
+        dest_partitioned_table) -> List[storage.blob.Blob]:
+    config_objs = []
+    sql_obj = gcs_bucket.blob("/".join([
+        dest_dataset.dataset_id,
+        dest_partitioned_table.table_id,
+        "_config",
+        "bq_transform.sql",
+    ]))
+
+    sql = "INSERT {dest_dataset}.cf_test_nyc_311 SELECT * FROM temp_ext"
+    sql_obj.upload_from_string(sql)
+
+    config_obj = gcs_bucket.blob("/".join([
+        dest_dataset.dataset_id, dest_partitioned_table.table_id, "_config",
+        "external.json"
+    ]))
+
+    public_table: bigquery.Table = bq.get_table(
+        bigquery.TableReference.from_string(
+            "bigquery-public-data.new_york_311.311_service_requests"))
+    config = {
+        "schema": public_table.to_api_repr()['schema'],
+        "csvOptions": {
+            "allowJaggedRows": False,
+            "allowQuotedNewlines": False,
+            "encoding": "UTF-8",
+            "fieldDelimiter": "|",
+            "skipLeadingRows": 0,
+        },
+        "sourceFormat": "CSV",
+        "sourceUris": ["REPLACEME"],
+    }
+    config_obj.upload_from_string(json.dumps(config))
+    config_objs.append(sql_obj)
+    config_objs.append(config_obj)
+
+    def teardown():
+        for do in config_objs:
+            if do.exists:
+                do.delete()
+
+    request.addfinalizer(teardown)
+    return config_objs
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 49f76389f..e312351af 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -14,7 +14,8 @@
 # limitations under the License.
 """unit tests for gcs_ocn_bq_ingest"""
 import re
-from typing import Dict, Optional
+from typing import Dict
+from typing import Optional
 
 import pytest
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
index 8aadeb08b..6459a206b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
@@ -209,6 +209,39 @@ def test_load_job_partitioned(bq, gcs_partitioned_data,
     bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
 
 
+@pytest.mark.IT
+def test_external_query_partitioned(bq, gcs_partitioned_data,
+                                    gcs_external_partitioned_config,
+                                    dest_dataset, dest_partitioned_table,
+                                    mock_env):
+    """tests the basic external query ingrestion mechanics
+    with bq_transform.sql and external.json
+    """
+    if not all((blob.exists() for blob in gcs_external_partitioned_config)):
+        raise google.cloud.exceptions.NotFound("config objects must exist")
+
+    for blob in gcs_partitioned_data:
+        if not blob.exists():
+            raise google.cloud.exceptions.NotFound(
+                "test data objects must exist")
+        test_event = {
+            "attributes": {
+                "bucketId": blob.bucket.name,
+                "objectId": blob.name
+            }
+        }
+        gcs_ocn_bq_ingest.main.main(test_event, None)
+    expected_num_rows = 0
+    for part in [
+            "$2017041101",
+            "$2017041102",
+    ]:
+        test_data_file = os.path.join(TEST_DIR, "resources", "test-data",
+                                      "nyc_311", part, "nyc_311.csv")
+        expected_num_rows += sum(1 for _ in open(test_data_file))
+    bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
+
+
 @pytest.mark.IT
 def test_look_for_config_in_parents(bq, gcs_data_under_sub_dirs,
                                     gcs_external_config, dest_dataset,

From 70d2d2b75e8d07fc13f59994b98c740d9a925a5e Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 19:59:55 -0800
Subject: [PATCH 25/90] docs

---
 .../gcs_event_based_ingest/README.md          | 20 +++++++++-------
 .../gcs_ocn_bq_ingest/README.md               | 21 ++++++++++++++++-
 .../gcs_ocn_bq_ingest/constants.py            | 23 ++++++++++---------
 3 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 5dcedf5c9..51b5b3a06 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -21,14 +21,18 @@ By Default we try to read dataset, table, partition (or yyyy/mm/dd/hh) and
 batch id using the following python regex:
 ```python3
 DEFAULT_DESTINATION_REGEX = (
-    r"^(?P<dataset>[\w\-\._0-9]+)/"  # dataset (required)
-    r"(?P<table>[\w\-_0-9]+)/?"      # table name (required)
-    r"(?P<partition>\$[0-9]+)?/?"    # partition decortator (optional)
-    r"(?P<yyyy>[0-9]{4})?/?"         # partition year (yyyy) (optional)
-    r"(?P<mm>[0-9]{2})?/?"           # partition month (mm) (optional)
-    r"(?P<dd>[0-9]{2})?/?"           # partition day (dd)  (optional)
-    r"(?P<hh>[0-9]{2})?/?"           # partition hour (hh) (optional)
-    r"(?P<batch>[\w\-_0-9]+)?/"      # batch id (optional)
+    r"^(?P<dataset>[\w\-\._0-9]+)/"   # dataset (required)
+    r"(?P<table>[\w\-_0-9]+)/?"       # table name (required)
+    # break up historical v.s. incremental to separate prefixes (optional)
+    r"(?:historical|incremental)?/?"
+    r"(?P<partition>\$[0-9]+)?/?"     # partition decorator (optional)
+    r"(?:"                            # [begin] yyyy/mm/dd/hh/ group (optional)
+    r"(?P<yyyy>[0-9]{4})/?"           # partition year (yyyy) (optional)
+    r"(?P<mm>[0-9]{2})?/?"            # partition month (mm) (optional)
+    r"(?P<dd>[0-9]{2})?/?"            # partition day (dd)  (optional)
+    r"(?P<hh>[0-9]{2})?/?"            # partition hour (hh) (optional)
+    r")?"                             # [end]yyyy/mm/dd/hh/ group (optional)
+    r"(?P<batch>[\w\-_0-9]+)?/"       # batch id (optional)
 )
 ```
 you can see if this meets your needs in this [regex playground](https://regex101.com/r/5Y9TDh/2)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index a1f417d7b..868d9e7bf 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -29,7 +29,7 @@ following default behavior.
 |-----------------------|---------------------------------------|----------------------------------------------|
 | `WAIT_FOR_JOB_SECONDS`| How long to wait before deciding BQ job did not fail quickly| `5` |
 | `SUCCESS_FILENAME`    | Filename to trigger a load of a prefix| `_SUCCESS` |
-| `DESTINATION_REGEX`   | A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for `dataset`, `table`, (optional: `partition` or `yyyy`, `mm`, `dd`, `hh`, `batch`)
+| `DESTINATION_REGEX`   | A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for `dataset`, `table`, (optional: `partition` or `yyyy`, `mm`, `dd`, `hh`, `batch`) | (see below)|
 | `MAX_BATCH_BYTES`     | Max bytes for BigQuery Load job      | `15000000000000` ([15 TB](https://cloud.google.com/bigquery/quotas#load_jobs)|
 | `JOB_PREFIX`          | Prefix for BigQuery Job IDs          | `gcf-ingest-` |
 | `BQ_PROJECT`          | Default BQ project to use if not specified in dataset capturing group | Project where Cloud Function is deployed |
@@ -39,6 +39,24 @@ following default behavior.
 
 \* only affect the behavior when ordering is enabled for a table.
 See [ORDERING.md](../ORDERING.md)
+
+## Default Destination Regex
+```python3
+DEFAULT_DESTINATION_REGEX = (
+    r"^(?P<dataset>[\w\-\._0-9]+)/"   # dataset (required)
+    r"(?P<table>[\w\-_0-9]+)/?"       # table name (required)
+    # break up historical v.s. incremental to separate prefixes (optional)
+    r"(?:historical|incremental)?/?"
+    r"(?P<partition>\$[0-9]+)?/?"     # partition decorator (optional)
+    r"(?:"                            # [begin] yyyy/mm/dd/hh/ group (optional)
+    r"(?P<yyyy>[0-9]{4})/?"           # partition year (yyyy) (optional)
+    r"(?P<mm>[0-9]{2})?/?"            # partition month (mm) (optional)
+    r"(?P<dd>[0-9]{2})?/?"            # partition day (dd)  (optional)
+    r"(?P<hh>[0-9]{2})?/?"            # partition hour (hh) (optional)
+    r")?"                             # [end]yyyy/mm/dd/hh/ group (optional)
+    r"(?P<batch>[\w\-_0-9]+)?/"       # batch id (optional)
+)
+`
  
 ## Implementation notes
 1. To support notifications based on a GCS prefix
@@ -46,3 +64,4 @@ See [ORDERING.md](../ORDERING.md)
 configure Pub/Sub Notifications manually and use a Pub/Sub triggered
 Cloud Function.
 
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
index 908d0e854..daa116dfe 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
@@ -70,17 +70,18 @@
 
 # yapf: disable
 DEFAULT_DESTINATION_REGEX = (
-    r"^(?P<dataset>[\w\-\._0-9]+)/"  # dataset (required)
-    r"(?P<table>[\w\-_0-9]+)/?"      # table name (required)
-    r"(?:historical|incremental)?/?" # break up hist v.s. inc to separate prefixes (optional)
-    r"(?P<partition>\$[0-9]+)?/?"    # partition decorator (optional)
-    r"(?:"                           # [begin] yyyy/mm/dd/hh/ group (optional)
-    r"(?P<yyyy>[0-9]{4})/?"          # partition year (yyyy) (optional)
-    r"(?P<mm>[0-9]{2})?/?"           # partition month (mm) (optional)
-    r"(?P<dd>[0-9]{2})?/?"           # partition day (dd)  (optional)
-    r"(?P<hh>[0-9]{2})?/?"           # partition hour (hh) (optional)
-    r")?"                            # [end]yyyy/mm/dd/hh/ group (optional)
-    r"(?P<batch>[\w\-_0-9]+)?/"      # batch id (optional)
+    r"^(?P<dataset>[\w\-\._0-9]+)/"   # dataset (required)
+    r"(?P<table>[\w\-_0-9]+)/?"       # table name (required)
+    # break up historical v.s. incremental to separate prefixes (optional)
+    r"(?:historical|incremental)?/?"
+    r"(?P<partition>\$[0-9]+)?/?"     # partition decorator (optional)
+    r"(?:"                            # [begin] yyyy/mm/dd/hh/ group (optional)
+    r"(?P<yyyy>[0-9]{4})/?"           # partition year (yyyy) (optional)
+    r"(?P<mm>[0-9]{2})?/?"            # partition month (mm) (optional)
+    r"(?P<dd>[0-9]{2})?/?"            # partition day (dd)  (optional)
+    r"(?P<hh>[0-9]{2})?/?"            # partition hour (hh) (optional)
+    r")?"                             # [end]yyyy/mm/dd/hh/ group (optional)
+    r"(?P<batch>[\w\-_0-9]+)?/"       # batch id (optional)
 )
 # yapf: enable
 

From 6ec3625144ffab3d2533db0430ef1ac06038c998 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 20:06:25 -0800
Subject: [PATCH 26/90] fixup linters

---
 .../gcs_ocn_bq_ingest/README.md               |  2 +-
 .../gcs_ocn_bq_ingest/main.py                 | 22 +++++++++----------
 .../gcs_ocn_bq_ingest/ordering.py             |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index 868d9e7bf..e93b10056 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -15,7 +15,7 @@ is configurable with environment variable.
 
 ## Deployment
 The source for this Cloud Function can easily be reused to repeat this pattern
-for many tables by using the accompanying terraform module (TODO).
+for many tables by using the accompanying terraform module.
 
 This way we can reuse the tested source code for the Cloud Function.
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 3d349eeea..7a7fe8a28 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -98,20 +98,20 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
             if basename_object_id == constants.SUCCESS_FILENAME:
                 ordering.backlog_publisher(gcs_client, event_blob)
                 return
-            elif basename_object_id == constants.BACKFILL_FILENAME:
+            if basename_object_id == constants.BACKFILL_FILENAME:
                 ordering.backlog_subscriber(gcs_client, bq_client, event_blob,
                                             function_start_time)
                 return
         else:  # Default behavior submit job as soon as success file lands.
-            bkt = utils.cached_get_bucket(gcs_client, bucket_id)
-            success_blob: storage.Blob = bkt.blob(object_id)
-            utils.handle_duplicate_notification(success_blob)
-            apply(
-                gcs_client,
-                bq_client,
-                success_blob,
-                None,  # None lock blob as there is no serialization required.
-                utils.create_job_id(table_ref, batch))
+            if basename_object_id == constants.SUCCESS_FILENAME:
+                utils.handle_duplicate_notification(event_blob)
+                apply(
+                    gcs_client,
+                    bq_client,
+                    event_blob,
+                    # None lock blob as there is no serialization required.
+                    None,
+                    utils.create_job_id(table_ref, batch))
     # Unexpected exceptions will actually raise which may cause a cold restart.
     except tuple(exceptions.EXCEPTIONS_TO_REPORT) as original_error:
         # We do this because we know these errors do not require a cold restart
@@ -122,7 +122,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
             # This mostly handles the case where error reporting API is not
             # enabled or IAM permissions did not allow us to report errors with
             # error reporting API.
-            raise original_error
+            raise original_error  # pylint: disable=raise-missing-from
 
 
 def lazy_error_reporting_client() -> error_reporting.Client:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index dea38dbec..092a232da 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -85,7 +85,7 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                     last_job_done = utils.wait_on_bq_job_id(
                         bq_client, job_id, polling_timeout)
                 except (exceptions.BigQueryJobFailure,
-                        google.api_core.exceptions.NotFound):
+                        google.api_core.exceptions.NotFound) as err:
                     raise exceptions.BigQueryJobFailure(
                         f"previous BigQuery job: {job_id} failed or could not "
                         "be found. This will kill the backfill subscriber for "
@@ -101,7 +101,7 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                         f"to resume the backfill subscriber so it can "
                         "continue with the next item in the backlog.\n"
                         "Original Exception:\n"
-                        f"{traceback.format_exc()}")
+                        f"{traceback.format_exc()}") from err
             else:
                 print(f"sleeping for {polling_timeout} seconds because"
                       f"found manual lock gs://{bkt.name}/{lock_blob.name} with"

From 2d0e5a8c5d8bb0c1478fefee95019fc95199cf6f Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 20:08:55 -0800
Subject: [PATCH 27/90] fixup import style

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py           | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 7a7fe8a28..2cf3bbd64 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -33,7 +33,6 @@
 # Reuse GCP Clients across function invocations using globbals
 # https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations
 # pylint: disable=global-statement
-from .utils import apply
 
 ERROR_REPORTING_CLIENT = None
 
@@ -105,7 +104,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
         else:  # Default behavior submit job as soon as success file lands.
             if basename_object_id == constants.SUCCESS_FILENAME:
                 utils.handle_duplicate_notification(event_blob)
-                apply(
+                utils.apply(
                     gcs_client,
                     bq_client,
                     event_blob,

From 7cb00e46d7560e568a39f3f6114a4cb6cd1f0364 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 20:10:50 -0800
Subject: [PATCH 28/90] typing isort single line exclusion

---
 tools/cloud_functions/gcs_event_based_ingest/.isort.cfg   | 1 +
 tools/cloud_functions/gcs_event_based_ingest/backfill.py  | 4 +---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py      | 1 +
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py     | 8 +-------
 .../gcs_event_based_ingest/ordered_backfill.py            | 4 +---
 .../tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py     | 3 +--
 6 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg b/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg
index 7b7b2d6f3..6f72bca0f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg
+++ b/tools/cloud_functions/gcs_event_based_ingest/.isort.cfg
@@ -2,3 +2,4 @@
 src_paths=backfill.py,gcs_ocn_bq_ingest,test
 skip=terraform_module
 force_single_line=True
+single_line_exclusions=typing
diff --git a/tools/cloud_functions/gcs_event_based_ingest/backfill.py b/tools/cloud_functions/gcs_event_based_ingest/backfill.py
index 3730074ee..f0a2ce415 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/backfill.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/backfill.py
@@ -19,9 +19,7 @@
 import os
 import pprint
 import sys
-from typing import Dict
-from typing import Iterator
-from typing import List
+from typing import Dict, Iterator, List
 
 import google.api_core.client_info
 from google.cloud import storage
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 2cf3bbd64..776f61317 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -30,6 +30,7 @@
 from . import exceptions
 from . import ordering
 from . import utils
+
 # Reuse GCP Clients across function invocations using globbals
 # https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations
 # pylint: disable=global-statement
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 208189e39..6fdc1192c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -25,13 +25,7 @@
 import pathlib
 import time
 import uuid
-from typing import Any
-from typing import Deque
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Tuple
-from typing import Union
+from typing import Any, Deque, Dict, List, Optional, Tuple, Union
 
 import cachetools
 import google.api_core
diff --git a/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py b/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py
index 87285a891..ed0b1da79 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py
@@ -20,9 +20,7 @@
 import os
 import pprint
 import sys
-from typing import Dict
-from typing import Iterator
-from typing import List
+from typing import Dict, Iterator, List
 
 import google.api_core.client_info
 from google.cloud import storage
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index e312351af..49f76389f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 """unit tests for gcs_ocn_bq_ingest"""
 import re
-from typing import Dict
-from typing import Optional
+from typing import Dict, Optional
 
 import pytest
 

From 0be46f97df9a7c4c4b4b1e6b60aed7941a3ec132 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 9 Dec 2020 20:13:59 -0800
Subject: [PATCH 29/90] fixup gcb no-name-in-module bug

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 6fdc1192c..8144b4bae 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -37,7 +37,7 @@
 from google.cloud import storage
 
 from . import constants  # pylint: disable=no-name-in-module
-from . import exceptions
+from . import exceptions  # pylint: disable=no-name-in-module
 
 
 def external_query(  # pylint: disable=too-many-arguments

From 9a0ee105cb18a81c4d869178189280acb6d97fcb Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 10 Dec 2020 14:12:12 -0800
Subject: [PATCH 30/90] add test of subscriber after subscriber exit

---
 .../gcs_ocn_bq_ingest/main.py                 |  3 +-
 .../gcs_ocn_bq_ingest/ordering.py             |  9 +++--
 .../gcs_ocn_bq_ingest/utils.py                | 21 +++++-----
 .../gcs_event_based_ingest/tests/conftest.py  | 37 ++++++++++++++++--
 .../gcs_ocn_bq_ingest/test_ordering_it.py     | 39 ++++++++++++++++---
 .../resources/test-data/ordering/00/data.csv  |  1 -
 .../resources/test-data/ordering/01/data.csv  |  2 +-
 .../resources/test-data/ordering/02/data.csv  |  2 +-
 .../test-data/ordering/{00 => 03}/_SUCCESS    |  0
 .../resources/test-data/ordering/03/data.csv  |  1 +
 .../resources/test-data/ordering/04/_SUCCESS  |  0
 .../resources/test-data/ordering/04/data.csv  |  1 +
 12 files changed, 87 insertions(+), 29 deletions(-)
 delete mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv
 rename tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/{00 => 03}/_SUCCESS (100%)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/03/data.csv
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/04/_SUCCESS
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/04/data.csv

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 776f61317..fc415f94b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -151,7 +151,8 @@ def lazy_bq_client() -> bigquery.Client:
         default_query_config.labels = constants.DEFAULT_JOB_LABELS
         BQ_CLIENT = bigquery.Client(
             client_info=constants.CLIENT_INFO,
-            default_query_job_config=default_query_config)
+            default_query_job_config=default_query_config,
+            project=os.getenv("BQ_PROJECT", os.getenv("GCP_PROJECT")))
     return BQ_CLIENT
 
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index 092a232da..4e188e5b0 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -36,7 +36,7 @@
 def backlog_publisher(
     gcs_client: storage.Client,
     event_blob: storage.Blob,
-):
+) -> Optional[storage.Blob]:
     """add success files to the the backlog and trigger backfill if necessary"""
     bkt = event_blob.bucket
 
@@ -47,7 +47,8 @@ def backlog_publisher(
           "to the backlog.")
 
     table_prefix = utils.get_table_prefix(event_blob.name)
-    start_backfill_subscriber_if_not_running(gcs_client, bkt, table_prefix)
+    return start_backfill_subscriber_if_not_running(gcs_client, bkt,
+                                                    table_prefix)
 
 
 # pylint: disable=too-many-arguments,too-many-locals
@@ -118,7 +119,7 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                                                         table_prefix)
         if not next_backlog_file:
             backfill_blob.delete(if_generation_match=backfill_blob.generation)
-            if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS - 2 <
+            if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
                     time.monotonic()):
                 print(
                     "checking if the backlog is still empty for "
@@ -134,7 +135,7 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
                     gcs_client, bkt, table_prefix)
                 if next_backlog_file:
                     # The backfill file may have been deleted but the backlog is
-                    # not empty. Retrigger the backfill subscriber loop by
+                    # not empty. Re-trigger the backfill subscriber loop by
                     # dropping a new backfill file.
                     start_backfill_subscriber_if_not_running(
                         gcs_client, bkt, table_prefix)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 8144b4bae..f3ac5dfc5 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -72,15 +72,16 @@ def external_query(  # pylint: disable=too-many-arguments
 
     # Note, dest_table might include a partition decorator.
     rendered_query = query.format(
-        dest_dataset=dest_table_ref.dataset_id,
+        dest_dataset=f"`{dest_table_ref.project}`.{dest_table_ref.dataset_id}",
         dest_table=dest_table_ref.table_id,
     )
 
-    job: bigquery.QueryJob = bq_client.query(
-        rendered_query,
-        job_config=job_config,
-        job_id=job_id,
-    )
+    job: bigquery.QueryJob = bq_client.query(rendered_query,
+                                             job_config=job_config,
+                                             job_id=job_id,
+                                             project=os.getenv(
+                                                 "BQ_PROJECT",
+                                                 bq_client.project))
 
     print(f"started asynchronous query job: {job.job_id}")
 
@@ -259,12 +260,10 @@ def get_batches_for_prefix(
         batch.clear()
 
     if len(batches) > 1:
-        print(f"split into {len(batches)} load jobs.")
-    elif len(batches) == 1:
-        print("using single load job.")
-    else:
+        print(f"split into {len(batches)} batches.")
+    elif len(batches) < 1:
         raise google.api_core.exceptions.NotFound(
-            f"No files to load at gs://{bucket_name}/{prefix_path}!")
+            f"No files to load at {prefix_path}!")
     return batches
 
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 0f6bfbca2..d8e877e0b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -338,7 +338,7 @@ def bq_wait_for_rows(bq_client: bigquery.Client, table: bigquery.Table,
 
 
 @pytest.fixture
-def dest_ordered_update_table(request, bq, mock_env,
+def dest_ordered_update_table(request, gcs, gcs_bucket, bq, mock_env,
                               dest_dataset) -> bigquery.Table:
     with open(os.path.join(TEST_DIR, "resources",
                            "ordering_schema.json")) as schema_file:
@@ -352,11 +352,31 @@ def dest_ordered_update_table(request, bq, mock_env,
     )
 
     table = bq.create_table(table)
-    # Our test query only updates so we need to populate the first row.
-    bq.load_table_from_json([{"id": 1, "alpha_update": ""}], table)
+
+    # Our test query only updates on a single row so we need to populate
+    # original row.
+    # This can be used to simulate an existing _bqlock from a prior run of the
+    # subscriber loop with a job that has succeeded.
+    job: bigquery.LoadJob = bq.load_table_from_json(
+        [{
+            "id": 1,
+            "alpha_update": ""
+        }],
+        table,
+        job_id_prefix=gcs_ocn_bq_ingest.constants.DEFAULT_JOB_PREFIX)
+
+    # The subscriber will be responsible for cleaning up this file.
+    bqlock_obj: storage.blob.Blob = gcs_bucket.blob("/".join([
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}", table.table_id,
+        "_bqlock"
+    ]))
+
+    bqlock_obj.upload_from_string(job.job_id)
 
     def teardown():
         bq.delete_table(table, not_found_ok=True)
+        if bqlock_obj.exists():
+            bqlock_obj.delete()
 
     request.addfinalizer(teardown)
     return table
@@ -367,10 +387,17 @@ def gcs_ordered_update_data(
         request, gcs_bucket, dest_dataset,
         dest_ordered_update_table) -> List[storage.blob.Blob]:
     data_objs = []
+    older_success_blob: storage.blob.Blob = gcs_bucket.blob("/".join([
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+        dest_ordered_update_table.table_id, "00", "_SUCCESS"
+    ]))
+    older_success_blob.upload_from_string("")
+    data_objs.append(older_success_blob)
+
     chunks = {
-        "00",
         "01",
         "02",
+        "03",
     }
     for chunk in chunks:
         for test_file in ["data.csv", "_SUCCESS"]:
@@ -397,6 +424,8 @@ def gcs_backlog(request, gcs, gcs_bucket,
                 gcs_ordered_update_data) -> List[storage.blob.Blob]:
     data_objs = []
 
+    # We will deal with the last incremental in the test itself to test the
+    # behavior of a new backlog subscriber.
     for success_blob in gcs_ordered_update_data:
         gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, success_blob)
         backlog_blob = gcs_ocn_bq_ingest.ordering.success_blob_to_backlog_blob(
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index c3cb23585..aa0cc5a94 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -114,11 +114,11 @@ def test_backlog_publisher_with_existing_backfill_file(gcs, gcs_bucket,
 
 @pytest.mark.IT
 @pytest.mark.ORDERING
-def test_single_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error,
-                                            dest_ordered_update_table,
-                                            gcs_ordered_update_data,
-                                            gcs_external_update_config,
-                                            gcs_backlog, mock_env):
+def test_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error, dest_dataset,
+                                     dest_ordered_update_table,
+                                     gcs_ordered_update_data,
+                                     gcs_external_update_config, gcs_backlog,
+                                     mock_env):
     """Test basic functionality of backlog subscriber.
     Populate a backlog with 3 files that make updates where we can assert
     that these jobs were applied in order.
@@ -130,6 +130,8 @@ def test_single_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error,
         prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/_backlog/"
     )
     assert backlog_blobs.num_results == 0, "backlog is not empty"
+    bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock")
+    assert not bqlock_blob.exists(), "_bqlock was not cleaned up"
     rows = bq.query("SELECT alpha_update FROM "
                     f"{dest_ordered_update_table.dataset_id}"
                     f".{dest_ordered_update_table.table_id}")
@@ -137,5 +139,30 @@ def test_single_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error,
     num_rows = 0
     for row in rows:
         num_rows += 1
-        assert row["alpha_update"] == "ABC", "incrementals not applied in order"
+        assert row["alpha_update"] == "ABC", "backlog not applied in order"
+    assert num_rows == expected_num_rows
+
+    # Now we will test what happens when the publisher posts another batch after
+    # the backlog subscriber has exited.
+    data_obj: storage.Blob
+    for test_file in ["data.csv", "_SUCCESS"]:
+        data_obj = gcs_bucket.blob("/".join([
+            f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+            dest_ordered_update_table.table_id, "04", test_file
+        ]))
+        data_obj.upload_from_filename(
+            os.path.join(TEST_DIR, "resources", "test-data", "ordering", "04",
+                         test_file))
+    backfill_blob = gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, data_obj)
+    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq, backfill_blob,
+                                                  time.monotonic())
+
+    rows = bq.query("SELECT alpha_update FROM "
+                    f"{dest_ordered_update_table.dataset_id}"
+                    f".{dest_ordered_update_table.table_id}")
+    expected_num_rows = 1
+    num_rows = 0
+    for row in rows:
+        num_rows += 1
+        assert row["alpha_update"] == "ABCD", "new incremental not applied"
     assert num_rows == expected_num_rows
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv
deleted file mode 100644
index 6b4f72558..000000000
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/data.csv
+++ /dev/null
@@ -1 +0,0 @@
-1|A
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv
index 3b4f35bfc..6b4f72558 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/01/data.csv
@@ -1 +1 @@
-1|B
+1|A
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv
index ecf1eb9e0..3b4f35bfc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/02/data.csv
@@ -1 +1 @@
-1|C
+1|B
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/_SUCCESS b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/03/_SUCCESS
similarity index 100%
rename from tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/00/_SUCCESS
rename to tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/03/_SUCCESS
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/03/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/03/data.csv
new file mode 100644
index 000000000..ecf1eb9e0
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/03/data.csv
@@ -0,0 +1 @@
+1|C
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/04/_SUCCESS b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/04/_SUCCESS
new file mode 100644
index 000000000..e69de29bb
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/04/data.csv b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/04/data.csv
new file mode 100644
index 000000000..09b72c865
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/resources/test-data/ordering/04/data.csv
@@ -0,0 +1 @@
+1|D

From feb867ee1817b34821e33dae08796831d4cea280 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 10 Dec 2020 17:11:04 -0800
Subject: [PATCH 31/90] chores: tf updates, larger machine type, etc.

---
 .../gcs_event_based_ingest/cloudbuild.yaml    |   2 +-
 .../gcs_ocn_bq_ingest/main.py                 |   1 -
 .../gcs_ocn_bq_ingest/ordering.py             |  42 ++++++--
 .../gcs_ocn_bq_ingest/utils.py                |  20 ++--
 .../requirements-dev.txt                      |   1 +
 .../gcs_ocn_bq_ingest_function/README.md      |   9 +-
 .../gcs_ocn_bq_ingest_function/main.tf        |   8 +-
 .../gcs_ocn_bq_ingest_function/variables.tf   |  23 +---
 .../gcs_ocn_bq_ingest/test_ordering_it.py     | 102 +++++++++++++++---
 9 files changed, 144 insertions(+), 64 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index d1367b925..697e6d702 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -128,6 +128,6 @@ steps:
     - 'IT'
   id: 'integration-test'
 options:
-  machineType: 'N1_HIGHCPU_8'
+  machineType: 'N1_HIGHCPU_32'
 substitutions:
   '_BUILD_DIR': 'tools/cloud_functions/gcs_event_based_ingest'
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index fc415f94b..fe4efb903 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -104,7 +104,6 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
                 return
         else:  # Default behavior submit job as soon as success file lands.
             if basename_object_id == constants.SUCCESS_FILENAME:
-                utils.handle_duplicate_notification(event_blob)
                 utils.apply(
                     gcs_client,
                     bq_client,
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index 4e188e5b0..0e88238b3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -19,7 +19,7 @@
 import os
 import time
 import traceback
-from typing import Optional
+from typing import Optional, Tuple
 
 import google.api_core
 import google.api_core.exceptions
@@ -52,17 +52,19 @@ def backlog_publisher(
 
 
 # pylint: disable=too-many-arguments,too-many-locals
-def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
+def backlog_subscriber(gcs_client: Optional[storage.Client],
+                       bq_client: Optional[bigquery.Client],
                        backfill_blob: storage.Blob, function_start_time: float):
     """Pick up the table lock, poll BQ job id until completion and process next
     item in the backlog.
     """
+    gcs_client, bq_client = _get_clients_if_none(gcs_client, bq_client)
     # We need to retrigger the backfill loop before the Cloud Functions Timeout.
     restart_time = function_start_time + (
         float(os.getenv("FUNCTION_TIMEOUT_SEC", "60")) -
         constants.RESTART_BUFFER_SECONDS)
     bkt = backfill_blob.bucket
-    utils.handle_duplicate_notification(backfill_blob)
+    utils.handle_duplicate_notification(gcs_client, backfill_blob)
     table_prefix = utils.get_table_prefix(backfill_blob.name)
     last_job_done = False
     # we will poll for job completion this long in an individual iteration of
@@ -118,7 +120,8 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
         next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
                                                         table_prefix)
         if not next_backlog_file:
-            backfill_blob.delete(if_generation_match=backfill_blob.generation)
+            backfill_blob.delete(if_generation_match=backfill_blob.generation,
+                                 client=gcs_client)
             if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
                     time.monotonic()):
                 print(
@@ -148,7 +151,7 @@ def backlog_subscriber(gcs_client: storage.Client, bq_client: bigquery.Client,
             next_backlog_file.name.replace("/_backlog/", "/"))
         table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
             next_success_file.name)
-        if not next_success_file.exists():
+        if not next_success_file.exists(client=gcs_client):
             raise exceptions.BacklogException(
                 "backlog contains"
                 f"gs://{next_backlog_file.bucket}/{next_backlog_file.name}"
@@ -177,7 +180,7 @@ def start_backfill_subscriber_if_not_running(
     if constants.START_BACKFILL_FILENAME:
         start_backfill_blob = bkt.blob(
             f"{table_prefix}/{constants.START_BACKFILL_FILENAME}")
-        start_backfill = start_backfill_blob.exists()
+        start_backfill = start_backfill_blob.exists(client=gcs_client)
 
     if start_backfill:
         # Create a _BACKFILL file for this table if not exists
@@ -192,7 +195,7 @@ def start_backfill_subscriber_if_not_running(
                   f"created at {backfill_blob.time_created}. exiting. ")
             return backfill_blob
         except google.api_core.exceptions.PreconditionFailed:
-            backfill_blob.reload()
+            backfill_blob.reload(client=gcs_client)
             print("backfill already in progress due to: "
                   f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
                   f"created at {backfill_blob.time_created}. exiting.")
@@ -243,3 +246,28 @@ def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
         backfill_blob = \
             start_backfill_subscriber_if_not_running(
                 gcs_client, bkt, utils.get_table_prefix(object_id))
+
+
+def _get_clients_if_none(
+    gcs_client: Optional[storage.Client],
+    bq_client: Optional[bigquery.Client]
+) -> Tuple[storage.Client, bigquery.Client]:
+    """method to handle case where clients are None.
+
+    This is a workaround to be able to run the backlog subscriber in a separate
+    process to facilitate some of our integration tests. Though it should be
+    harmless.
+    """
+    print("instantiating missing clients in backlog subscriber this should only"
+          "happen during integration tests.")
+    if not gcs_client:
+        gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
+    if not bq_client:
+        default_query_config = bigquery.QueryJobConfig()
+        default_query_config.use_legacy_sql = False
+        default_query_config.labels = constants.DEFAULT_JOB_LABELS
+        bq_client = bigquery.Client(
+            client_info=constants.CLIENT_INFO,
+            default_query_job_config=default_query_config,
+            project=os.getenv("BQ_PROJECT", os.getenv("GCP_PROJECT")))
+    return gcs_client, bq_client
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index f3ac5dfc5..c07f718cb 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -89,7 +89,7 @@ def external_query(  # pylint: disable=too-many-arguments
     # Check if job failed quickly
     while time.monotonic(
     ) - start_poll_for_errors < constants.WAIT_FOR_JOB_SECONDS:
-        job.reload()
+        job.reload(client=bq_client)
         if job.errors:
             raise exceptions.BigQueryJobFailure(
                 f"query job {job.job_id} failed quickly: {job.errors}")
@@ -126,7 +126,7 @@ def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id):
     ) - start_poll_for_errors < constants.WAIT_FOR_JOB_SECONDS:
         # Check if job failed quickly
         for job in jobs:
-            job.reload()
+            job.reload(client=bq_client)
             if job.errors:
                 raise exceptions.BigQueryJobFailure(
                     f"load job {job.job_id} failed quickly: {job.errors}")
@@ -402,7 +402,8 @@ def recursive_update(original: Dict, update: Dict, in_place: bool = False):
     return out
 
 
-def handle_duplicate_notification(blob_to_claim: storage.Blob):
+def handle_duplicate_notification(gcs_client: storage.Client,
+                                  blob_to_claim: storage.Blob):
     """
     Need to handle potential duplicate Pub/Sub notifications.
     To achieve this we will drop an empty "claimed" file that indicates
@@ -412,7 +413,7 @@ def handle_duplicate_notification(blob_to_claim: storage.Blob):
     duplicate ingestion due to multiple Pub/Sub messages for a success file
     with the same creation time.
     """
-    blob_to_claim.reload()
+    blob_to_claim.reload(client=gcs_client)
     created_unix_timestamp = blob_to_claim.time_created.timestamp()
 
     basename = os.path.basename(blob_to_claim.name)
@@ -421,7 +422,9 @@ def handle_duplicate_notification(blob_to_claim: storage.Blob):
             basename, f"_claimed_{basename}_created_at_"
             f"{created_unix_timestamp}"))
     try:
-        claim_blob.upload_from_string("", if_generation_match=0)
+        claim_blob.upload_from_string("",
+                                      if_generation_match=0,
+                                      client=gcs_client)
     except google.api_core.exceptions.PreconditionFailed as err:
         raise exceptions.DuplicateNotificationException(
             f"gs://{blob_to_claim.bucket.name}/{blob_to_claim.name} appears"
@@ -504,7 +507,7 @@ def remove_oldest_backlog_item(
     # https://cloud.google.com/storage/docs/json_api/v1/objects/list
     blob: storage.Blob
     for blob in backlog_blobs:
-        blob.delete()
+        blob.delete(client=gcs_client)
         return True  # Return after deleteing first blob in the iterator
     return False
 
@@ -651,7 +654,7 @@ def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob,
     blob if next_job_id is None."""
     try:
         if next_job_id:
-            if lock_blob.exists():
+            if lock_blob.exists(client=gcs_client):
                 lock_blob.upload_from_string(
                     next_job_id,
                     if_generation_match=lock_blob.generation,
@@ -669,7 +672,7 @@ def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob,
             )
     except google.api_core.exceptions.PreconditionFailed as err:
         raise exceptions.BacklogException(
-            f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name}"
+            f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} "
             f"was changed by another process.") from err
 
 
@@ -692,6 +695,7 @@ def apply(
         lock_blob: storage.Blob
         job_id: str
     """
+    handle_duplicate_notification(gcs_client, success_blob)
     bkt = success_blob.bucket
     if lock_blob is not None:
         handle_bq_lock(gcs_client, lock_blob, job_id)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
index b86a61183..f250ab6ee 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
+++ b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
@@ -8,3 +8,4 @@ pylint
 pytest-parallel
 pytest-cov
 google-cloud-pubsub>=2.2.0
+pytest-repeat
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
index f1acab548..b347aceeb 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
@@ -27,24 +27,21 @@ documented [here](../gcs_ocn_bq_ingest_function/README.md)
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | app\_id | Application Name | `any` | n/a | yes |
-| bigquery\_project\_ids | Additional project IDs to grant bigquery Admin / Job user for the data ingester account | `list(string)` | `[]` | no |
+| bigquery\_project\_ids | Additional project IDs to grant bigquery Admin for the data ingester account | `list(string)` | `[]` | no |
 | cloudfunctions\_source\_bucket | GCS bucket to store Cloud Functions Source | `any` | n/a | yes |
 | data\_ingester\_sa | Service Account Email responsible for ingesting data to BigQuery | `any` | n/a | yes |
-| destination\_regex | A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for destination `dataset`, `table`, (optional: `partition`, `batch`) | `string` | `""` | no |
+| environment\_variables | Environment variables to set on the cloud function. | `map(string)` | `{}` | no |
 | function\_source\_folder | Path to Cloud Function source | `string` | `"../gcs_event_based_ingest/gcs_ocn_bq_ingest/"` | no |
 | input\_bucket | GCS bucket to watch for new files | `any` | n/a | yes |
 | input\_prefix | GCS prefix to watch for new files in input\_bucket | `any` | `null` | no |
-| job\_prefix | Prefix for BigQuery Job IDs | `string` | `""` | no |
-| max\_batch\_bytes | Max bytes for BigQuery Load job | `string` | `""` | no |
 | project\_id | GCP Project ID containing cloud function, and input bucket | `any` | n/a | yes |
 | region | GCP region in which to deploy cloud function | `string` | `"us-central1"` | no |
-| success\_filename | Filename to trigger a load of a prefix | `string` | `""` | no |
 | use\_pubsub\_notifications | Setting this to true will use Pub/Sub notifications By default we will use Cloud Functions Event direct notifications. See https://cloud.google.com/storage/docs/pubsub-notifications. | `bool` | `false` | no |
-| wait\_for\_job\_seconds | How long to wait before deciding BQ job did not fail quickly | `string` | `""` | no |
 
 ## Outputs
 
 | Name | Description |
 |------|-------------|
 | cloud-function | instance of cloud function deployed by this module. |
+| data-ingester-sa | data ingester service account email created as cloud function identity |
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index faf9b3b82..16d7ce821 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -69,13 +69,7 @@ resource "google_cloudfunctions_function" "gcs_to_bq" {
   source_archive_bucket = var.cloudfunctions_source_bucket
   source_archive_object = google_storage_bucket_object.function_zip_object.name
   entry_point           = "main"
-  environment_variables = {
-    WAIT_FOR_JOB_SECONDS = var.wait_for_job_seconds
-    SUCCESS_FILENAME     = var.success_filename
-    DESTINATION_REGEX    = var.destination_regex
-    MAX_BATCH_BYTES      = var.max_batch_bytes
-    JOB_PREFIX           = var.job_prefix
-  }
+  environment_variables = var.environment_variables
   event_trigger {
     event_type = var.use_pubsub_notifications ? "providers/cloud.pubsub/eventTypes/topic.publish" : "google.storage.object.finalize"
     resource   = var.use_pubsub_notifications ? google_pubsub_topic.notification_topic[0].id : module.bucket.name
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
index 0452e9769..d26edee2e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
@@ -36,27 +36,12 @@ variable "data_ingester_sa" {
   description = "Service Account Email responsible for ingesting data to BigQuery"
 }
 
-variable "wait_for_job_seconds" {
-  description = "How long to wait before deciding BQ job did not fail quickly"
-  default     = ""
-}
-variable "success_filename" {
-  description = "Filename to trigger a load of a prefix"
-  default     = ""
-}
-variable "destination_regex" {
-  description = "A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for destination `dataset`, `table`, (optional: `partition`, `batch`)"
-  default     = ""
-}
-variable "max_batch_bytes" {
-  description = "Max bytes for BigQuery Load job"
-  default     = ""
+variable "environment_variables" {
+  description = "Environment variables to set on the cloud function."
+  type = map(string)
+  default = {}
 }
 
-variable "job_prefix" {
-  description = "Prefix for BigQuery Job IDs "
-  default     = ""
-}
 
 variable "region" {
   description = "GCP region in which to deploy cloud function"
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index aa0cc5a94..197d4f92d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """integration tests for the ordering behavior of backlog gcs_ocn_bq_ingest"""
+import multiprocessing
 import os
 import queue
+import random
 import time
 
 import pytest
+from google.cloud import bigquery
 from google.cloud import storage
 
 import gcs_ocn_bq_ingest.constants
@@ -114,14 +117,17 @@ def test_backlog_publisher_with_existing_backfill_file(gcs, gcs_bucket,
 
 @pytest.mark.IT
 @pytest.mark.ORDERING
-def test_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error, dest_dataset,
-                                     dest_ordered_update_table,
-                                     gcs_ordered_update_data,
-                                     gcs_external_update_config, gcs_backlog,
-                                     mock_env):
+def test_backlog_subscriber_in_order_with_new_batch_after_exit(
+        bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
+        gcs_ordered_update_data, gcs_external_update_config, gcs_backlog,
+        mock_env):
     """Test basic functionality of backlog subscriber.
     Populate a backlog with 3 files that make updates where we can assert
     that these jobs were applied in order.
+
+    To ensure that the subscriber cleans up properly after itself before exit,
+    we will drop a 4th batch after the subscriber has exited and assert that it
+    gets applied as expected.
     """
     gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq,
                                                   gcs_external_update_config,
@@ -144,16 +150,8 @@ def test_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error, dest_dataset,
 
     # Now we will test what happens when the publisher posts another batch after
     # the backlog subscriber has exited.
-    data_obj: storage.Blob
-    for test_file in ["data.csv", "_SUCCESS"]:
-        data_obj = gcs_bucket.blob("/".join([
-            f"{dest_dataset.project}.{dest_dataset.dataset_id}",
-            dest_ordered_update_table.table_id, "04", test_file
-        ]))
-        data_obj.upload_from_filename(
-            os.path.join(TEST_DIR, "resources", "test-data", "ordering", "04",
-                         test_file))
-    backfill_blob = gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, data_obj)
+    backfill_blob = _post_a_new_batch(gcs_bucket, dest_dataset,
+                                      dest_ordered_update_table)
     gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq, backfill_blob,
                                                   time.monotonic())
 
@@ -166,3 +164,77 @@ def test_backlog_subscriber_in_order(bq, gcs, gcs_bucket, error, dest_dataset,
         num_rows += 1
         assert row["alpha_update"] == "ABCD", "new incremental not applied"
     assert num_rows == expected_num_rows
+
+
+@pytest.mark.IT
+@pytest.mark.ORDERING
+@pytest.mark.repeat(5)
+def test_backlog_subscriber_in_order_with_new_batch_while_running(
+        bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
+        gcs_ordered_update_data, gcs_external_update_config: storage.Blob,
+        gcs_backlog, mock_env):
+    """Test functionality of backlog subscriber when new batches are added
+    before the subscriber is done finishing the existing backlog.
+
+    Populate a backlog with 3 files that make updates where we can assert
+    that these jobs were applied in order.
+    In another process populate a fourth batch, and call the publisher.
+    """
+    # Cannot pickle clients to another process so we need to recreate some
+    # objects without the client property.
+    backfill_blob = storage.Blob.from_string(
+        f"gs://{gcs_external_update_config.bucket.name}/"
+        f"{gcs_external_update_config.name}")
+    dataset = bigquery.Dataset.from_string(
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}")
+    table = bigquery.Table.from_string(
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}."
+        f"{dest_ordered_update_table.table_id}")
+    bkt = storage.Bucket.from_string(f"gs://{gcs_bucket.name}")
+
+    # Run subscriber w/ backlog and publisher w/ new batch in parallel.
+    with multiprocessing.Pool(processes=2) as pool:
+        res_subscriber = pool.apply_async(
+            gcs_ocn_bq_ingest.ordering.backlog_subscriber,
+            (None, None, backfill_blob, time.monotonic()))
+        # We run this test multiple times and sleep a random amount to simulate
+        # the next batch landing at a random time during the backfill.
+        time.sleep(random.uniform(0, 2))
+        res_backlog_publisher = pool.apply_async(_post_a_new_batch,
+                                                 (bkt, dataset, table))
+
+        # wait on each function to complete
+        res_subscriber.wait()
+        res_backlog_publisher.wait()
+
+    backlog_blobs = gcs_bucket.list_blobs(
+        prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/_backlog/"
+    )
+    assert backlog_blobs.num_results == 0, "backlog is not empty"
+    bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock")
+    assert not bqlock_blob.exists(), "_bqlock was not cleaned up"
+    rows = bq.query("SELECT alpha_update FROM "
+                    f"{dest_ordered_update_table.dataset_id}"
+                    f".{dest_ordered_update_table.table_id}")
+    expected_num_rows = 1
+    num_rows = 0
+    for row in rows:
+        num_rows += 1
+        assert row["alpha_update"] == "ABCD", "backlog not applied in order"
+    assert num_rows == expected_num_rows
+
+
+def _post_a_new_batch(gcs_bucket, dest_dataset, dest_ordered_update_table):
+    # We may run this in another process and cannot pickle client objects
+    gcs = storage.Client()
+    data_obj: storage.Blob
+    for test_file in ["data.csv", "_SUCCESS"]:
+        data_obj = gcs_bucket.blob("/".join([
+            f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+            dest_ordered_update_table.table_id, "04", test_file
+        ]))
+        data_obj.upload_from_filename(os.path.join(TEST_DIR, "resources",
+                                                   "test-data", "ordering",
+                                                   "04", test_file),
+                                      client=gcs)
+    return gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, data_obj)

From 2218212d585eb078b5872e110608ad4cbf6aa5ba Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 10 Dec 2020 17:16:58 -0800
Subject: [PATCH 32/90] terraform fmt

---
 .../terraform_module/gcs_ocn_bq_ingest_function/variables.tf  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
index d26edee2e..ca3073a0d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
@@ -38,8 +38,8 @@ variable "data_ingester_sa" {
 
 variable "environment_variables" {
   description = "Environment variables to set on the cloud function."
-  type = map(string)
-  default = {}
+  type        = map(string)
+  default     = {}
 }
 
 

From d528d851f390bdc71db4c4305ced0cf8c02755a4 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Fri, 11 Dec 2020 13:37:23 -0800
Subject: [PATCH 33/90] handle abandoned _BACKFILL and other review feedback

---
 .../gcs_event_based_ingest/ORDERING.md        |  16 +-
 .../gcs_event_based_ingest/cloudbuild.yaml    |  12 +-
 .../gcs_ocn_bq_ingest/exceptions.py           |   1 -
 .../gcs_ocn_bq_ingest/main.py                 | 100 ++++++----
 .../gcs_ocn_bq_ingest/ordering.py             |  40 +++-
 .../gcs_ocn_bq_ingest/utils.py                |  14 +-
 .../ordered_backfill.py                       | 177 ------------------
 .../gcs_event_based_ingest/pytest.ini         |   1 +
 .../requirements-dev.txt                      |   2 +-
 .../gcs_event_based_ingest/tests/conftest.py  |   6 +
 .../test_gcs_ocn_bq_ingest.py                 |  48 +++++
 .../test_gcs_ocn_bq_ingest_it.py              |   6 -
 .../gcs_ocn_bq_ingest/test_ordering_it.py     |  10 +-
 13 files changed, 174 insertions(+), 259 deletions(-)
 delete mode 100644 tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py

diff --git a/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
index 8a3dda5d8..c85020276 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
@@ -44,8 +44,8 @@ gs://${BUCKET}/${DATASET}/${TABLE}/incremental/_config/ORDERME
 
 ## Dealing With Out-of-Order Publishing to GCS During Historical Load
 In some use cases, there is a period where incrementals that must be applied in
-order are uploaded in parallel (meaning their _SUCCESS files are expected to be
-out of order). This typically happens during some historical backfill period.
+order are uploaded in parallel (meaning their `_SUCCESS` files are expected to
+be out of order). This typically happens during some historical backfill period.
 This can be solved by setting the `START_BACKFILL_FILENAME` environment
 variable to a file name that indicates that the parallel upload of historical
 incrementals is complete (e.g. `_HISTORYDONE`). This will cause all success
@@ -90,7 +90,8 @@ The Backlog Publisher has two responsibilities:
 1. add incoming success files to a
 table's `_backlog` so they are not "forgotten" by the ingestion system.
 1. if there is a non-empty backlog start the backfill subscriber (if one is not
-already running). This is accomplished by dropping a table level `_BACKFILL` file.
+already running). This is accomplished by dropping a table level `_BACKFILL`
+file if it does not already exist.
 
 ### Backlog Subscriber
 The Backlog Subscriber is responsible for keeping track of BigQuery jobs running
@@ -105,11 +106,11 @@ The state of what BigQuery job is currently running on a table is kept in a
 In order to escape the maximum nine-minute (540s) Cloud Function Timeout, the
 backfill subscriber will re-trigger itself by posting a new `_BACKFILL` file
 until the `_backlog` for the table prefix is empty. When a new success file
-arrives it is the reponsibility of the publisher to restart the subscriber.
+arrives it is the responsibility of the publisher to restart the subscriber.
 
 
 ### Note on Handling Race Condition
-we use subscribe_monitor to handle a rare race condition where:
+We use `subscribe_monitor` to handle a rare race condition where:
 
 1. subscriber reads an empty backlog (before it can delete the
   _BACKFILL blob...)
@@ -130,9 +131,8 @@ loop of the backfill subscriber but this loop will not take any action and this
 wasted compute is far better than dropping a batch of data.
 1. On the subscriber side we check if there was more time
 than 10 seconds between list backlog items and delete backfill calls. If so the
-subscriber double checks that the backlog is still empty. This way
-we always handle this race condition either in this monitor or in the
-subscriber itself.
+subscriber double checks that the backlog is still empty. This way we always
+handle this race condition either in this monitor or in the subscriber itself.
 
 
 ### Visualization of Ordering Triggers in the Cloud Function
diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 697e6d702..4a6d5b519 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -113,19 +113,21 @@ steps:
     - 'mypy-main'
     - 'mypy-tests'
     - 'terraform-fmt'
+  entrypoint: /bin/sh
   args:
-    - '-m'
-    - 'not IT'
+    - '-c'
+    # pip installing again to get GCB to recognize mocker from pytest-mock
+    - 'pip install -r requirements-dev.txt && python3 -m pytest -m "not IT"'
   id: 'unit-test'
 - name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'
   dir: '${_BUILD_DIR}'
   waitFor:
     - 'build-ci-image'
     - 'unit-test'
+  entrypoint: /bin/sh
   args:
-    - '--maxfail=1'
-    - '-m'
-    - 'IT'
+  - '-c'
+  - 'pip install -r requirements-dev.txt && python3 -m pytest -m IT'
   id: 'integration-test'
 options:
   machineType: 'N1_HIGHCPU_32'
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
index 908db717c..a1126c22e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
@@ -48,5 +48,4 @@ class BacklogException(Exception):
     UnexpectedTriggerException,
     DestinationRegexMatchException,
     BacklogException,
-    DuplicateNotificationException,
 }
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index fe4efb903..73b7b9657 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -18,7 +18,8 @@
 """
 import os
 import time
-from typing import Dict
+import traceback
+from typing import Dict, Optional
 
 # pylint in cloud build is being flaky about this import discovery.
 # pylint: disable=no-name-in-module
@@ -65,7 +66,6 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
 
         gcs_client = lazy_gcs_client()
         bq_client = lazy_bq_client()
-        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(object_id)
 
         enforce_ordering = (constants.ORDER_PER_TABLE
                             or utils.look_for_config_in_parents(
@@ -75,43 +75,14 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
         bkt: storage.Bucket = utils.cached_get_bucket(gcs_client, bucket_id)
         event_blob: storage.Blob = bkt.blob(object_id)
 
-        if enforce_ordering:
-            # For SUCCESS files in a backlog directory, ensure that subscriber
-            # is running.
-            if (basename_object_id == constants.SUCCESS_FILENAME
-                    and "/_backlog/" in object_id):
-                print(
-                    f"This notification was for "
-                    f"gs://{bucket_id}/{object_id} a"
-                    f"{constants.SUCCESS_FILENAME} in a"
-                    "/_backlog/ directory. "
-                    f"Watiting {constants.ENSURE_SUBSCRIBER_SECONDS} seconds to "
-                    "ensure that subscriber is running.")
-                ordering.subscriber_monitor(gcs_client, bkt, object_id)
-                return
-            if (constants.START_BACKFILL_FILENAME and basename_object_id
-                    == constants.START_BACKFILL_FILENAME):
-                # This will be the first backfill file.
-                ordering.start_backfill_subscriber_if_not_running(
-                    gcs_client, bkt, utils.get_table_prefix(object_id))
-                return
-            if basename_object_id == constants.SUCCESS_FILENAME:
-                ordering.backlog_publisher(gcs_client, event_blob)
-                return
-            if basename_object_id == constants.BACKFILL_FILENAME:
-                ordering.backlog_subscriber(gcs_client, bq_client, event_blob,
-                                            function_start_time)
-                return
-        else:  # Default behavior submit job as soon as success file lands.
-            if basename_object_id == constants.SUCCESS_FILENAME:
-                utils.apply(
-                    gcs_client,
-                    bq_client,
-                    event_blob,
-                    # None lock blob as there is no serialization required.
-                    None,
-                    utils.create_job_id(table_ref, batch))
+        triage_event(gcs_client, bq_client, event_blob, function_start_time,
+                     enforce_ordering)
+
     # Unexpected exceptions will actually raise which may cause a cold restart.
+    except exceptions.DuplicateNotificationException:
+        print("recieved duplicate notification. this was handled gracefully."
+              f"{traceback.format_exc()}")
+
     except tuple(exceptions.EXCEPTIONS_TO_REPORT) as original_error:
         # We do this because we know these errors do not require a cold restart
         # of the cloud function.
@@ -124,6 +95,59 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
             raise original_error  # pylint: disable=raise-missing-from
 
 
+def triage_event(gcs_client: Optional[storage.Client],
+                 bq_client: Optional[bigquery.Client],
+                 event_blob: storage.Blob,
+                 function_start_time: float,
+                 enforce_ordering: bool = False):
+    """call the appropriate method based on the details of the trigger event
+    blob."""
+    bkt = event_blob.bucket
+    basename_object_id = os.path.basename(event_blob.name)
+    table_ref, batch = utils.gcs_path_to_table_ref_and_batch(event_blob.name)
+    if enforce_ordering:
+        # For SUCCESS files in a backlog directory, ensure that subscriber
+        # is running.
+        if (basename_object_id == constants.SUCCESS_FILENAME
+                and "/_backlog/" in event_blob.name):
+            print(f"This notification was for "
+                  f"gs://{bkt.name}/{event_blob.name} a"
+                  f"{constants.SUCCESS_FILENAME} in a"
+                  "/_backlog/ directory. "
+                  f"Watiting {constants.ENSURE_SUBSCRIBER_SECONDS} seconds to "
+                  "ensure that subscriber is running.")
+            ordering.subscriber_monitor(gcs_client, bkt, event_blob.name)
+            return
+        if (constants.START_BACKFILL_FILENAME
+                and basename_object_id == constants.START_BACKFILL_FILENAME):
+            # This will be the first backfill file.
+            ordering.start_backfill_subscriber_if_not_running(
+                gcs_client, bkt, utils.get_table_prefix(event_blob.name))
+            return
+        if basename_object_id == constants.SUCCESS_FILENAME:
+            ordering.backlog_publisher(gcs_client, event_blob)
+            return
+        if basename_object_id == constants.BACKFILL_FILENAME:
+            if (event_blob.name != f"{utils.get_table_prefix(event_blob.name)}/"
+                    f"{constants.BACKFILL_FILENAME}"):
+                raise RuntimeError(
+                    f"recieved notification for gs://{event_blob.bucket.name}/"
+                    f"{event_blob.name}\n{constants.BACKFILL_FILENAME} files "
+                    "are expected only at the table prefix level.")
+            ordering.backlog_subscriber(gcs_client, bq_client, event_blob,
+                                        function_start_time)
+            return
+    else:  # Default behavior submit job as soon as success file lands.
+        if basename_object_id == constants.SUCCESS_FILENAME:
+            utils.apply(
+                gcs_client,
+                bq_client,
+                event_blob,
+                # None lock blob as there is no serialization required.
+                None,
+                utils.create_job_id(table_ref, batch))
+
+
 def lazy_error_reporting_client() -> error_reporting.Client:
     """
     Return a error reporting client that may be shared between cloud function
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index 0e88238b3..b8ea9c323 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -14,8 +14,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Background Cloud Function for loading data from GCS to BigQuery.
+"""Implement function to ensure loading data from GCS to BigQuery in order.
 """
+import datetime
 import os
 import time
 import traceback
@@ -192,7 +193,7 @@ def start_backfill_subscriber_if_not_running(
                                              client=gcs_client)
             print("triggered backfill with "
                   f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
-                  f"created at {backfill_blob.time_created}. exiting. ")
+                  f"created at {backfill_blob.time_created}. exiting.")
             return backfill_blob
         except google.api_core.exceptions.PreconditionFailed:
             backfill_blob.reload(client=gcs_client)
@@ -225,7 +226,7 @@ def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
     2. a new item is added to the backlog (causing a separate
        function invocation)
     3. In this new invocation we reach this point in the code path
-       and start_subscriber_if_not_running sees the old _BACKFILL
+       and start_backlog_subscriber_if_not_running sees the old _BACKFILL
        and does not create a new one.
     4. The subscriber deletes the _BACKFILL blob and exits without
        processing the new item on the backlog from #2.
@@ -240,23 +241,42 @@ def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
     backfill_blob = start_backfill_subscriber_if_not_running(
         gcs_client, bkt, utils.get_table_prefix(object_id))
 
-    time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
-    while not utils.wait_on_gcs_blob(gcs_client, backfill_blob,
-                                     constants.ENSURE_SUBSCRIBER_SECONDS):
-        backfill_blob = \
+    # backfill blob may be none if the START_BACKFILL_FILENAME has not been
+    # dropped
+    if backfill_blob:
+        # Handle case where a subscriber loop was not able to repost the
+        # backfill file before the cloud function timeout.
+        if (datetime.datetime.utcnow() - backfill_blob.time_created >
+                datetime.timedelta(
+                    seconds=int(os.getenv("FUNCTION_TIMEOUT_SEC", "60")))):
+            print(
+                f"backfill blob gs://{backfill_blob.bucket.name}/"
+                f"{backfill_blob.name} appears to be abandoned as it is older "
+                "than the cloud function timeout of "
+                f"{os.getenv('FUNCTION_TIMEOUT_SEC', '60')} seconds."
+                "reposting this backfill blob to restart the backfill"
+                "subscriber for this table.")
+            backfill_blob.delete(client=gcs_client)
             start_backfill_subscriber_if_not_running(
                 gcs_client, bkt, utils.get_table_prefix(object_id))
+            return
+
+        time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
+        while not utils.wait_on_gcs_blob(gcs_client, backfill_blob,
+                                         constants.ENSURE_SUBSCRIBER_SECONDS):
+            backfill_blob = \
+                start_backfill_subscriber_if_not_running(
+                    gcs_client, bkt, utils.get_table_prefix(object_id))
 
 
 def _get_clients_if_none(
-    gcs_client: Optional[storage.Client],
-    bq_client: Optional[bigquery.Client]
+    gcs_client: Optional[storage.Client], bq_client: Optional[bigquery.Client]
 ) -> Tuple[storage.Client, bigquery.Client]:
     """method to handle case where clients are None.
 
     This is a workaround to be able to run the backlog subscriber in a separate
     process to facilitate some of our integration tests. Though it should be
-    harmless.
+    harmless if these clients are recreated in the Cloud Function.
     """
     print("instantiating missing clients in backlog subscriber this should only"
           "happen during integration tests.")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index c07f718cb..1c83e7deb 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -306,14 +306,6 @@ def parse_notification(notification: dict) -> Tuple[str, str]:
         "https://cloud.google.com/functions/docs/tutorials/storage")
 
 
-# cache lookups against GCS API for 1 second as buckets / objects have update
-# limit of once per second and we might do several of the same lookup during
-# the functions lifetime. This should improve performance by eliminating
-# unnecessary API calls. The lookups on bucket and objects in this function
-# should not be changing during the function's lifetime as this would lead to
-# non-deterministic results with or without this cache.
-# https://cloud.google.com/storage/quotas
-@cachetools.cached(cachetools.TTLCache(maxsize=1024, ttl=1))
 def read_gcs_file(gcs_client: storage.Client, gsurl: str) -> str:
     """
     Read a GCS object as a string
@@ -338,7 +330,11 @@ def read_gcs_file_if_exists(gcs_client: storage.Client,
         return None
 
 
-# Cache bucket lookups (see reasoning in comment above)
+# cache lookups against GCS API for 1 second as buckets have update
+# limit of once per second and we might do several of the same lookup during
+# the functions lifetime. This should improve performance by eliminating
+# unnecessary API calls.
+# https://cloud.google.com/storage/quotas
 @cachetools.cached(cachetools.TTLCache(maxsize=1024, ttl=1))
 def cached_get_bucket(
     gcs_client: storage.Client,
diff --git a/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py b/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py
deleted file mode 100644
index ed0b1da79..000000000
--- a/tools/cloud_functions/gcs_event_based_ingest/ordered_backfill.py
+++ /dev/null
@@ -1,177 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-""" Command Line utility for backfilling gcs_ocn_bq_ingest cloud function when
-ordering of incrementals is required
-"""
-import argparse
-import concurrent.futures
-import logging
-import os
-import pprint
-import sys
-from typing import Dict, Iterator, List
-
-import google.api_core.client_info
-from google.cloud import storage
-
-import gcs_ocn_bq_ingest.main  # pylint: disable=import-error
-
-CLIENT_INFO = google.api_core.client_info.ClientInfo(
-    user_agent="google-pso-tool/bq-severless-loader-cli")
-
-os.environ["FUNCTION_NAME"] = "backfill-cli"
-
-
-def find_blobs_with_suffix(
-    gcs_client: storage.Client,
-    prefix: str,
-    suffix: str = "_SUCCESS",
-) -> Iterator[storage.Blob]:
-    """
-    Find GCS blobs with a given suffix.
-
-    :param gcs_client:  storage.Client
-    :param prefix: A GCS prefix to search i.e. gs://bucket/prefix/to/search
-    :param suffix: A suffix in blob name to match
-    :return:  Iterable of blobs matching the suffix.
-    """
-    prefix_blob: storage.Blob = storage.Blob.from_string(prefix)
-    # filter passes on scalability / laziness advantages of iterator.
-    return filter(
-        lambda blob: blob.name.endswith(suffix),
-        prefix_blob.bucket.list_blobs(client=gcs_client,
-                                      prefix=prefix_blob.name))
-
-
-def main(args: argparse.Namespace):
-    """main entry point for backfill CLI."""
-    gcs_client: storage.Client = storage.Client(client_info=CLIENT_INFO)
-    pubsub_client = None
-    suffix = args.success_filename
-    if args.destination_regex:
-        os.environ["DESTINATION_REGEX"] = args.destination_regex
-    if args.mode == "NOTIFICATIONS":
-        if not args.pubsub_topic:
-            raise ValueError("when passing mode=NOTIFICATIONS"
-                             "you must also pass pubsub_topic.")
-        # import is here because this utility can be used without
-        # google-cloud-pubsub dependency in LOCAL mode.
-        # pylint: disable=import-outside-toplevel
-        from google.cloud import pubsub
-        pubsub_client = pubsub.PublisherClient()
-
-    # These are all I/O bound tasks so use Thread Pool concurrency for speed.
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        future_to_gsurl = {}
-        for blob in find_blobs_with_suffix(gcs_client, args.gcs_path, suffix):
-            if pubsub_client:
-                # kwargs are message attributes
-                # https://googleapis.dev/python/pubsub/latest/publisher/index.html#publish-a-message
-                logging.info("sending pubsub message for: %s",
-                             f"gs://{blob.bucket.name}/{blob.name}")
-                future_to_gsurl[executor.submit(
-                    pubsub_client.publish,
-                    args.pubsub_topic,
-                    b'',  # cloud function ignores message body
-                    bucketId=blob.bucket.name,
-                    objectId=blob.name,
-                    _metaInfo="this message was submitted with "
-                    "gcs_ocn_bq_ingest backfill.py utility"
-                )] = f"gs://{blob.bucket.name}/{blob.name}"
-            else:
-                logging.info("running  cloud function locally for: %s",
-                             f"gs://{blob.bucket.name}/{blob.name}")
-                future_to_gsurl[executor.submit(
-                    gcs_ocn_bq_ingest.main.main,
-                    {
-                        "attributes": {
-                            "bucketId": blob.bucket.name,
-                            "objectId": blob.name
-                        }
-                    },
-                    None,
-                )] = f"gs://{blob.bucket.name}/{blob.name}"
-        exceptions: Dict[str, Exception] = dict()
-        for future in concurrent.futures.as_completed(future_to_gsurl):
-            gsurl = future_to_gsurl[future]
-            try:
-                future.result()
-            except Exception as err:  # pylint: disable=broad-except
-                logging.error("Error processing %s: %s", gsurl, err)
-                exceptions[gsurl] = err
-        if exceptions:
-            raise RuntimeError("The following errors were encountered:\n" +
-                               pprint.pformat(exceptions))
-
-
-def parse_args(args: List[str]) -> argparse.Namespace:
-    """argument parser for backfill CLI"""
-    parser = argparse.ArgumentParser(
-        description="utility to backfill success file notifications "
-        "or run the cloud function locally in concurrent threads.")
-
-    parser.add_argument(
-        "--gcs-path",
-        "-p",
-        help="GCS path (e.g. gs://bucket/prefix/to/search/)to search for "
-        "existing _SUCCESS files",
-        required=True,
-    )
-
-    parser.add_argument(
-        "--mode",
-        "-m",
-        help="How to perform the backfill: LOCAL run cloud function main"
-        " method locally (in concurrent threads) or NOTIFICATIONS just push"
-        " notifications to Pub/Sub for a deployed version of the cloud function"
-        " to pick up. Default is NOTIFICATIONS.",
-        required=False,
-        type=str.upper,
-        choices=["LOCAL", "NOTIFICATIONS"],
-        default="NOTIFICATIONS",
-    )
-
-    parser.add_argument(
-        "--pubsub-topic",
-        "--topic",
-        "-t",
-        help="Pub/Sub notifications topic to post notifications for. "
-        "i.e. projects/{PROJECT_ID}/topics/{TOPIC_ID} "
-        "Required if using NOTIFICATIONS mode.",
-        required=False,
-        default=None,
-    )
-
-    parser.add_argument(
-        "--success-filename",
-        "-f",
-        help="Override the default success filename '_SUCCESS'",
-        required=False,
-        default="_SUCCESS",
-    )
-
-    parser.add_argument(
-        "--destination-regex",
-        "-r",
-        help="Override the default destination regex for determining BigQuery"
-        "destination based on information encoded in the GCS path of the"
-        "success file",
-        required=False,
-        default=None,
-    )
-    return parser.parse_args(args)
-
-
-if __name__ == "__main__":
-    main(parse_args(sys.argv))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
index 3864588b3..07bd1315d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
+++ b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
@@ -4,3 +4,4 @@ markers =
     ORDERING: marks tests that test features related to ordering
     CLI: marks tests of CLI utilities
 addopts = --workers=auto
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
index f250ab6ee..a9b6c99c0 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
+++ b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
@@ -8,4 +8,4 @@ pylint
 pytest-parallel
 pytest-cov
 google-cloud-pubsub>=2.2.0
-pytest-repeat
+pytest-mock
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index d8e877e0b..5b06554f3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -77,6 +77,12 @@ def mock_env(gcs, monkeypatch):
     monkeypatch.setenv("FUNCTION_TIMEOUT_SEC", "120")
 
 
+@pytest.fixture
+def ordered_mock_env(mock_env, monkeypatch):
+    """environment variable mocks"""
+    monkeypatch.setenv("ORDER_PER_TABLE", "TRUE")
+
+
 @pytest.fixture
 def dest_dataset(request, bq, mock_env, monkeypatch):
     random_dataset = f"test_bq_ingest_gcf_{str(uuid.uuid4())[:8].replace('-','_')}"
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 49f76389f..6dfc57dec 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -14,9 +14,11 @@
 # limitations under the License.
 """unit tests for gcs_ocn_bq_ingest"""
 import re
+import time
 from typing import Dict, Optional
 
 import pytest
+from google.cloud import storage
 
 import gcs_ocn_bq_ingest.constants
 import gcs_ocn_bq_ingest.main
@@ -236,3 +238,49 @@ def test_recursive_update(original, update, expected):
     ])
 def test_get_table_prefix(test_input, expected):
     assert gcs_ocn_bq_ingest.utils.get_table_prefix(test_input) == expected
+
+
+def test_triage_event(mock_env, mocker):
+    test_event_blob: storage.Blob = storage.Blob.from_string(
+        "gs://foo/bar/baz/00/_SUCCESS")
+    apply_mock = mocker.patch('gcs_ocn_bq_ingest.utils.apply')
+    gcs_ocn_bq_ingest.main.triage_event(None, None, test_event_blob,
+                                        time.monotonic())
+    apply_mock.assert_called_once()
+
+
+def test_triage_event_ordered(ordered_mock_env, mocker):
+    enforce_ordering = True
+    test_event_blob: storage.Blob = storage.Blob.from_string(
+        "gs://foo/bar/baz/00/_SUCCESS")
+    apply_mock = mocker.patch('gcs_ocn_bq_ingest.utils.apply')
+    publisher_mock = mocker.patch(
+        'gcs_ocn_bq_ingest.ordering.backlog_publisher')
+    gcs_ocn_bq_ingest.main.triage_event(None,
+                                        None,
+                                        test_event_blob,
+                                        time.monotonic(),
+                                        enforce_ordering=enforce_ordering)
+    publisher_mock.assert_called_once()
+
+    test_event_blob: storage.Blob = storage.Blob.from_string(
+        "gs://foo/bar/baz/_BACKFILL")
+    subscriber_mock = mocker.patch(
+        'gcs_ocn_bq_ingest.ordering.backlog_subscriber')
+    gcs_ocn_bq_ingest.main.triage_event(None,
+                                        None,
+                                        test_event_blob,
+                                        time.monotonic(),
+                                        enforce_ordering=enforce_ordering)
+    subscriber_mock.assert_called_once()
+
+    test_event_blob: storage.Blob = storage.Blob.from_string(
+        "gs://foo/bar/baz/_backlog/00/_SUCCESS")
+    monitor_mock = mocker.patch('gcs_ocn_bq_ingest.ordering.subscriber_monitor')
+    gcs_ocn_bq_ingest.main.triage_event(None,
+                                        None,
+                                        test_event_blob,
+                                        time.monotonic(),
+                                        enforce_ordering=enforce_ordering)
+    monitor_mock.assert_called_once()
+    apply_mock.assert_not_called()
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
index 6459a206b..e1fe45b18 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
@@ -14,7 +14,6 @@
 """integration tests for gcs_ocn_bq_ingest"""
 import os
 import time
-import unittest.mock
 
 import google.cloud.exceptions
 import pytest
@@ -80,11 +79,6 @@ def test_duplicate_success_notification(bq, gcs_data, dest_dataset, dest_table,
         }
     }
     gcs_ocn_bq_ingest.main.main(test_event, None)
-    with unittest.mock.patch.object(google.cloud.error_reporting.Client,
-                                    "report_exception") as mock_method:
-        gcs_ocn_bq_ingest.main.main(test_event, None)
-
-    mock_method.assert_called_once()
 
     test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation",
                                   "part-m-00001")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index 197d4f92d..c231d498e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -168,11 +168,13 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
 
 @pytest.mark.IT
 @pytest.mark.ORDERING
-@pytest.mark.repeat(5)
+@pytest.mark.parametrize('execution_number', range(5))
 def test_backlog_subscriber_in_order_with_new_batch_while_running(
-        bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
-        gcs_ordered_update_data, gcs_external_update_config: storage.Blob,
-        gcs_backlog, mock_env):
+    execution_number,
+    bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
+    gcs_ordered_update_data, gcs_external_update_config: storage.Blob,
+    gcs_backlog, mock_env
+):
     """Test functionality of backlog subscriber when new batches are added
     before the subscriber is done finishing the existing backlog.
 

From a0114e17f7f920afccd54459ace62efadd20afc6 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Fri, 11 Dec 2020 17:32:16 -0800
Subject: [PATCH 34/90] improve tests

---
 .../gcs_event_based_ingest/cloudbuild.yaml    |  1 +
 .../gcs_ocn_bq_ingest/main.py                 |  3 +-
 .../gcs_ocn_bq_ingest/ordering.py             | 19 ++++---
 .../gcs_ocn_bq_ingest/utils.py                |  2 +-
 .../requirements-dev.txt                      |  1 +
 .../gcs_event_based_ingest/requirements.txt   |  4 +-
 .../gcs_event_based_ingest/tests/conftest.py  |  2 +-
 .../gcs_ocn_bq_ingest/test_ordering_it.py     | 53 ++++++++++++++-----
 8 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 4a6d5b519..a41666c65 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -129,6 +129,7 @@ steps:
   - '-c'
   - 'pip install -r requirements-dev.txt && python3 -m pytest -m IT'
   id: 'integration-test'
+timeout: '1200s'
 options:
   machineType: 'N1_HIGHCPU_32'
 substitutions:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 73b7b9657..b172f430f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -132,7 +132,8 @@ def triage_event(gcs_client: Optional[storage.Client],
                     f"{constants.BACKFILL_FILENAME}"):
                 raise RuntimeError(
                     f"recieved notification for gs://{event_blob.bucket.name}/"
-                    f"{event_blob.name}\n{constants.BACKFILL_FILENAME} files "
+                    f"{event_blob.name}\n"
+                    f"{constants.BACKFILL_FILENAME} files "
                     "are expected only at the table prefix level.")
             ordering.backlog_subscriber(gcs_client, bq_client, event_blob,
                                         function_start_time)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index b8ea9c323..74e490601 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -64,12 +64,14 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
     restart_time = function_start_time + (
         float(os.getenv("FUNCTION_TIMEOUT_SEC", "60")) -
         constants.RESTART_BUFFER_SECONDS)
+    backfill_blob_generation = backfill_blob.generation
     bkt = backfill_blob.bucket
     utils.handle_duplicate_notification(gcs_client, backfill_blob)
     table_prefix = utils.get_table_prefix(backfill_blob.name)
     last_job_done = False
     # we will poll for job completion this long in an individual iteration of
-    # the while loop.
+    # the while loop (before checking if we are too close to cloud function
+    # timeout and should retrigger).
     polling_timeout = 5  # seconds
     lock_blob: storage.Blob = bkt.blob(f"{table_prefix}/_bqlock")
     if restart_time - polling_timeout < time.monotonic():
@@ -82,6 +84,8 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
         lock_contents = utils.read_gcs_file_if_exists(
             gcs_client, f"gs://{bkt.name}/{lock_blob.name}")
         if lock_contents:
+            # is this a lock placed by this cloud function.
+            # the else will handle a manual _bqlock
             if lock_contents.startswith(
                     os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)):
                 job_id = lock_contents
@@ -109,8 +113,9 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
             else:
                 print(f"sleeping for {polling_timeout} seconds because"
                       f"found manual lock gs://{bkt.name}/{lock_blob.name} with"
-                      f"contents:\n {lock_contents}. This will be an infinite"
-                      "loop until the manual lock is released.")
+                      "This will be an infinite loop until the manual lock is "
+                      "released.\n"
+                      f"manual lock contents:\n {lock_contents}. ")
                 time.sleep(polling_timeout)
                 continue
         if last_job_done:
@@ -121,7 +126,7 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
         next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
                                                         table_prefix)
         if not next_backlog_file:
-            backfill_blob.delete(if_generation_match=backfill_blob.generation,
+            backfill_blob.delete(if_generation_match=backfill_blob_generation,
                                  client=gcs_client)
             if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
                     time.monotonic()):
@@ -154,9 +159,9 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
             next_success_file.name)
         if not next_success_file.exists(client=gcs_client):
             raise exceptions.BacklogException(
-                "backlog contains"
-                f"gs://{next_backlog_file.bucket}/{next_backlog_file.name}"
-                "but the corresponding success file does not exist at:"
+                "backlog contains "
+                f"gs://{next_backlog_file.bucket}/{next_backlog_file.name} "
+                "but the corresponding success file does not exist at: "
                 f"gs://{next_success_file.bucket}/{next_success_file.name}")
         utils.apply(gcs_client, bq_client, next_success_file, lock_blob,
                     utils.create_job_id(table_ref, batch))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 1c83e7deb..7d9c85900 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -486,7 +486,7 @@ def remove_oldest_backlog_item(
     table_prefix: str,
 ) -> bool:
     """
-    Remove the oldes pointer in the backlog if the backlog is not empty.
+    Remove the oldest pointer in the backlog if the backlog is not empty.
 
     Args:
         gcs_client: storage.Client
diff --git a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
index a9b6c99c0..2fe24ea9a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
+++ b/tools/cloud_functions/gcs_event_based_ingest/requirements-dev.txt
@@ -9,3 +9,4 @@ pytest-parallel
 pytest-cov
 google-cloud-pubsub>=2.2.0
 pytest-mock
+pytest-repeat
diff --git a/tools/cloud_functions/gcs_event_based_ingest/requirements.txt b/tools/cloud_functions/gcs_event_based_ingest/requirements.txt
index 7279c2550..b715db130 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/requirements.txt
+++ b/tools/cloud_functions/gcs_event_based_ingest/requirements.txt
@@ -1,4 +1,4 @@
-google-cloud-bigquery>=2.6.0
-google-cloud-storage>=1.33.0
+google-cloud-bigquery>=2.6.1
+google-cloud-storage>=1.34.0
 google-cloud-error-reporting>=1.1.0
 cachetools
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 5b06554f3..1d4043de5 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -74,7 +74,7 @@ def mock_env(gcs, monkeypatch):
     # Infer project from ADC of gcs client.
     monkeypatch.setenv("GCP_PROJECT", gcs.project)
     monkeypatch.setenv("FUNCTION_NAME", "integration-test")
-    monkeypatch.setenv("FUNCTION_TIMEOUT_SEC", "120")
+    monkeypatch.setenv("FUNCTION_TIMEOUT_SEC", "540")
 
 
 @pytest.fixture
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index c231d498e..50ff1c97c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -15,7 +15,6 @@
 import multiprocessing
 import os
 import queue
-import random
 import time
 
 import pytest
@@ -30,6 +29,11 @@
 TEST_DIR = os.path.realpath(os.path.dirname(__file__) + "/..")
 LOAD_JOB_POLLING_TIMEOUT = 20  # seconds
 
+# Testing that the subscriber does not get choked up by a common race condition
+# is crucial to ensuring this solution works.
+# This parameter is for running the subscriber tests many times.
+NUM_TRIES_SUBSCRIBER_TESTS = 25
+
 
 @pytest.mark.IT
 @pytest.mark.ORDERING
@@ -117,6 +121,7 @@ def test_backlog_publisher_with_existing_backfill_file(gcs, gcs_bucket,
 
 @pytest.mark.IT
 @pytest.mark.ORDERING
+@pytest.mark.repeat(NUM_TRIES_SUBSCRIBER_TESTS)
 def test_backlog_subscriber_in_order_with_new_batch_after_exit(
         bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
         gcs_ordered_update_data, gcs_external_update_config, gcs_backlog,
@@ -129,9 +134,11 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
     we will drop a 4th batch after the subscriber has exited and assert that it
     gets applied as expected.
     """
-    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq,
-                                                  gcs_external_update_config,
-                                                  time.monotonic())
+    _run_subscriber(
+        gcs,
+        bq,
+        gcs_external_update_config
+    )
     backlog_blobs = gcs_bucket.list_blobs(
         prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/_backlog/"
     )
@@ -152,8 +159,7 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
     # the backlog subscriber has exited.
     backfill_blob = _post_a_new_batch(gcs_bucket, dest_dataset,
                                       dest_ordered_update_table)
-    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs, bq, backfill_blob,
-                                                  time.monotonic())
+    _run_subscriber(gcs, bq, backfill_blob)
 
     rows = bq.query("SELECT alpha_update FROM "
                     f"{dest_ordered_update_table.dataset_id}"
@@ -168,9 +174,8 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
 
 @pytest.mark.IT
 @pytest.mark.ORDERING
-@pytest.mark.parametrize('execution_number', range(5))
+@pytest.mark.repeat(NUM_TRIES_SUBSCRIBER_TESTS)
 def test_backlog_subscriber_in_order_with_new_batch_while_running(
-    execution_number,
     bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
     gcs_ordered_update_data, gcs_external_update_config: storage.Blob,
     gcs_backlog, mock_env
@@ -194,14 +199,19 @@ def test_backlog_subscriber_in_order_with_new_batch_while_running(
         f"{dest_ordered_update_table.table_id}")
     bkt = storage.Bucket.from_string(f"gs://{gcs_bucket.name}")
 
+    basename = os.path.basename(gcs_external_update_config.name)
+    claim_blob: storage.Blob = gcs_external_update_config.bucket.blob(
+        gcs_external_update_config.name.replace(
+            basename, f"_claimed_{basename}_created_at_"
+                      f"{gcs_external_update_config.time_created.timestamp()}"))
     # Run subscriber w/ backlog and publisher w/ new batch in parallel.
     with multiprocessing.Pool(processes=2) as pool:
         res_subscriber = pool.apply_async(
-            gcs_ocn_bq_ingest.ordering.backlog_subscriber,
-            (None, None, backfill_blob, time.monotonic()))
-        # We run this test multiple times and sleep a random amount to simulate
-        # the next batch landing at a random time during the backfill.
-        time.sleep(random.uniform(0, 2))
+            _run_subscriber,
+            (None, None, backfill_blob))
+        # wait for existence of claim blob to ensure subscriber is running.
+        while not claim_blob.exists():
+            pass
         res_backlog_publisher = pool.apply_async(_post_a_new_batch,
                                                  (bkt, dataset, table))
 
@@ -226,6 +236,23 @@ def test_backlog_subscriber_in_order_with_new_batch_while_running(
     assert num_rows == expected_num_rows
 
 
+def _run_subscriber(
+    gcs_client: storage.Client,
+    bq_client: bigquery.Client,
+    backfill_blob,
+):
+    try:
+        gcs_ocn_bq_ingest.ordering.backlog_subscriber(
+            gcs_client,
+            bq_client,
+            backfill_blob,
+            time.monotonic())
+    except gcs_ocn_bq_ingest.exceptions.DuplicateNotificationException:
+        print("ignoring potential duplicate notification exception as this is"
+              "not a critical error and would be ignored by the main method"
+              "of the cloud function.")
+
+
 def _post_a_new_batch(gcs_bucket, dest_dataset, dest_ordered_update_table):
     # We may run this in another process and cannot pickle client objects
     gcs = storage.Client()

From def1ddb9fe0b2936cb2262c274039a5a33455b70 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Sun, 13 Dec 2020 16:48:28 -0800
Subject: [PATCH 35/90] fix: handle long running bq jobs

---
 .../gcs_event_based_ingest/.gitignore         |  1 +
 .../gcs_ocn_bq_ingest/main.py                 |  9 +--
 .../gcs_ocn_bq_ingest/ordering.py             | 58 ++++++++++++------
 .../gcs_ocn_bq_ingest/utils.py                | 25 +++++---
 .../gcs_event_based_ingest/pytest.ini         |  6 ++
 .../gcs_event_based_ingest/tests/conftest.py  | 14 +++--
 .../gcs_ocn_bq_ingest/test_ordering_it.py     | 59 +++++++++----------
 7 files changed, 103 insertions(+), 69 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/.gitignore b/tools/cloud_functions/gcs_event_based_ingest/.gitignore
index 8ca3bf9ba..66d580175 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/.gitignore
+++ b/tools/cloud_functions/gcs_event_based_ingest/.gitignore
@@ -1 +1,2 @@
 prof/
+test.log
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index b172f430f..5f79f5962 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -140,13 +140,8 @@ def triage_event(gcs_client: Optional[storage.Client],
             return
     else:  # Default behavior submit job as soon as success file lands.
         if basename_object_id == constants.SUCCESS_FILENAME:
-            utils.apply(
-                gcs_client,
-                bq_client,
-                event_blob,
-                # None lock blob as there is no serialization required.
-                None,
-                utils.create_job_id(table_ref, batch))
+            utils.apply(gcs_client, bq_client, event_blob, None,
+                        utils.create_job_id(table_ref, batch))
 
 
 def lazy_error_reporting_client() -> error_reporting.Client:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
index 74e490601..cd998f985 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
@@ -24,6 +24,7 @@
 
 import google.api_core
 import google.api_core.exceptions
+import pytz
 # pylint in cloud build is being flaky about this import discovery.
 # pylint: disable=no-name-in-module
 from google.cloud import bigquery
@@ -52,7 +53,7 @@ def backlog_publisher(
                                                     table_prefix)
 
 
-# pylint: disable=too-many-arguments,too-many-locals
+# pylint: disable=too-many-arguments,too-many-locals,too-many-statements,too-many-branches
 def backlog_subscriber(gcs_client: Optional[storage.Client],
                        bq_client: Optional[bigquery.Client],
                        backfill_blob: storage.Blob, function_start_time: float):
@@ -80,7 +81,8 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
             "backlog subscriber to do it's job. We recommend "
             "setting the timeout to 540 seconds or at least "
             "1 minute (Cloud Functions default).")
-    while time.monotonic() < restart_time - polling_timeout:
+    while time.monotonic() < restart_time - polling_timeout - 1:
+        first_bq_lock_claim = False
         lock_contents = utils.read_gcs_file_if_exists(
             gcs_client, f"gs://{bkt.name}/{lock_blob.name}")
         if lock_contents:
@@ -118,14 +120,25 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
                       f"manual lock contents:\n {lock_contents}. ")
                 time.sleep(polling_timeout)
                 continue
-        if last_job_done:
+        else:  # this condition handles absence of _bqlock file
+            first_bq_lock_claim = True
+            last_job_done = True  # there's no running job to poll.
+
+        if not last_job_done:
+            # keep polling th running job.
+            continue
+
+        # if reached here, last job is done.
+        if not first_bq_lock_claim:
+            # If the BQ lock was missing we do not want to delete a backlog
+            # item for a job we have not yet submitted.
             utils.remove_oldest_backlog_item(gcs_client, bkt, table_prefix)
-            last_job_done = False
 
         check_backlog_time = time.monotonic()
         next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
                                                         table_prefix)
         if not next_backlog_file:
+            print("no more files found in the backlog deleteing backfill blob")
             backfill_blob.delete(if_generation_match=backfill_blob_generation,
                                  client=gcs_client)
             if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
@@ -135,7 +148,7 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
                     f"gs://${bkt.name}/{table_prefix}/_backlog/"
                     f"There was more than {constants.ENSURE_SUBSCRIBER_SECONDS}"
                     " seconds between listing items on the backlog and "
-                    f"attempting to delete the {constants.BACKFILL_FILENAME}. "
+                    f"deleting the {constants.BACKFILL_FILENAME}. "
                     "This should not happen often but is meant to alleviate a "
                     "race condition in the event that something caused the "
                     "delete operation was delayed or had to be retried for a "
@@ -143,7 +156,7 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
                 next_backlog_file = utils.get_next_backlog_item(
                     gcs_client, bkt, table_prefix)
                 if next_backlog_file:
-                    # The backfill file may have been deleted but the backlog is
+                    # The backfill file was deleted but the backlog is
                     # not empty. Re-trigger the backfill subscriber loop by
                     # dropping a new backfill file.
                     start_backfill_subscriber_if_not_running(
@@ -163,8 +176,11 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
                 f"gs://{next_backlog_file.bucket}/{next_backlog_file.name} "
                 "but the corresponding success file does not exist at: "
                 f"gs://{next_success_file.bucket}/{next_success_file.name}")
+        print("applying next batch for:"
+              f"gs://{next_success_file.bucket}/{next_success_file.name}")
+        next_job_id = utils.create_job_id(table_ref, batch)
         utils.apply(gcs_client, bq_client, next_success_file, lock_blob,
-                    utils.create_job_id(table_ref, batch))
+                    next_job_id)
     # retrigger the subscriber loop by reposting the _BACKFILL file
     print("ran out of time, restarting backfill subscriber loop for:"
           f"gs://{bkt.name}/{table_prefix}")
@@ -173,13 +189,15 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
 
 
 def start_backfill_subscriber_if_not_running(
-        gcs_client: storage.Client, bkt: storage.Bucket,
+        gcs_client: Optional[storage.Client], bkt: storage.Bucket,
         table_prefix: str) -> Optional[storage.Blob]:
     """start the backfill subscriber if  it is not already runnning for this
     table prefix.
 
     created a backfill file for the table prefix if not exists.
     """
+    if not gcs_client:
+        gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
     start_backfill = True
     # Do not start subscriber until START_BACKFILL_FILENAME has been dropped
     # at the table prefix.
@@ -198,7 +216,7 @@ def start_backfill_subscriber_if_not_running(
                                              client=gcs_client)
             print("triggered backfill with "
                   f"gs://{backfill_blob.bucket.name}/{backfill_blob.name} "
-                  f"created at {backfill_blob.time_created}. exiting.")
+                  f"created at {backfill_blob.time_created}.")
             return backfill_blob
         except google.api_core.exceptions.PreconditionFailed:
             backfill_blob.reload(client=gcs_client)
@@ -221,8 +239,8 @@ def success_blob_to_backlog_blob(success_blob: storage.Blob) -> storage.Blob:
     return bkt.blob(f"{table_prefix}/_backlog/{success_file_suffix}")
 
 
-def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
-                       object_id: str):
+def subscriber_monitor(gcs_client: Optional[storage.Client],
+                       bkt: storage.Bucket, object_id: str) -> bool:
     """
     Monitor to handle a rare race condition where:
 
@@ -243,6 +261,8 @@ def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
     we always handle this race condition either in this monitor or in the
     subscriber itself.
     """
+    if not gcs_client:
+        gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
     backfill_blob = start_backfill_subscriber_if_not_running(
         gcs_client, bkt, utils.get_table_prefix(object_id))
 
@@ -251,9 +271,10 @@ def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
     if backfill_blob:
         # Handle case where a subscriber loop was not able to repost the
         # backfill file before the cloud function timeout.
-        if (datetime.datetime.utcnow() - backfill_blob.time_created >
-                datetime.timedelta(
-                    seconds=int(os.getenv("FUNCTION_TIMEOUT_SEC", "60")))):
+        time_created_utc = backfill_blob.time_created.replace(tzinfo=pytz.UTC)
+        now_utc = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
+        if (now_utc - time_created_utc > datetime.timedelta(
+                seconds=int(os.getenv("FUNCTION_TIMEOUT_SEC", "60")))):
             print(
                 f"backfill blob gs://{backfill_blob.bucket.name}/"
                 f"{backfill_blob.name} appears to be abandoned as it is older "
@@ -264,14 +285,15 @@ def subscriber_monitor(gcs_client: storage.Client, bkt: storage.Bucket,
             backfill_blob.delete(client=gcs_client)
             start_backfill_subscriber_if_not_running(
                 gcs_client, bkt, utils.get_table_prefix(object_id))
-            return
+            return True
 
         time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
         while not utils.wait_on_gcs_blob(gcs_client, backfill_blob,
                                          constants.ENSURE_SUBSCRIBER_SECONDS):
-            backfill_blob = \
-                start_backfill_subscriber_if_not_running(
-                    gcs_client, bkt, utils.get_table_prefix(object_id))
+            start_backfill_subscriber_if_not_running(
+                gcs_client, bkt, utils.get_table_prefix(object_id))
+            return True
+    return False
 
 
 def _get_clients_if_none(
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
index 7d9c85900..de49b8ca6 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
@@ -62,7 +62,7 @@ def external_query(  # pylint: disable=too-many-arguments
               f"{json.dumps(constants.DEFAULT_EXTERNAL_TABLE_DEFINITION)}")
         external_table_def = constants.DEFAULT_EXTERNAL_TABLE_DEFINITION
 
-    # This may cause an issue if >10,000 files. however, we
+    # This may cause an issue if >10,000 files.
     external_table_def["sourceUris"] = flatten2dlist(
         get_batches_for_prefix(gcs_client, gsurl))
     print(f"external table def = {json.dumps(external_table_config, indent=2)}")
@@ -70,10 +70,12 @@ def external_query(  # pylint: disable=too-many-arguments
     job_config = bigquery.QueryJobConfig(
         table_definitions={"temp_ext": external_config}, use_legacy_sql=False)
 
-    # Note, dest_table might include a partition decorator.
+    # drop partition decorator if present.
+    table_id = dest_table_ref.table_id.split("$")[0]
+
     rendered_query = query.format(
         dest_dataset=f"`{dest_table_ref.project}`.{dest_table_ref.dataset_id}",
-        dest_table=dest_table_ref.table_id,
+        dest_table=table_id
     )
 
     job: bigquery.QueryJob = bq_client.query(rendered_query,
@@ -398,8 +400,10 @@ def recursive_update(original: Dict, update: Dict, in_place: bool = False):
     return out
 
 
-def handle_duplicate_notification(gcs_client: storage.Client,
-                                  blob_to_claim: storage.Blob):
+def handle_duplicate_notification(
+    gcs_client: storage.Client,
+    blob_to_claim: storage.Blob,
+):
     """
     Need to handle potential duplicate Pub/Sub notifications.
     To achieve this we will drop an empty "claimed" file that indicates
@@ -422,8 +426,9 @@ def handle_duplicate_notification(gcs_client: storage.Client,
                                       if_generation_match=0,
                                       client=gcs_client)
     except google.api_core.exceptions.PreconditionFailed as err:
+        blob_to_claim.reload(client=gcs_client)
         raise exceptions.DuplicateNotificationException(
-            f"gs://{blob_to_claim.bucket.name}/{blob_to_claim.name} appears"
+            f"gs://{blob_to_claim.bucket.name}/{blob_to_claim.name} appears "
             "to already have been claimed for created timestamp: "
             f"{created_unix_timestamp}."
             "This means that another invocation of this cloud function has "
@@ -540,6 +545,7 @@ def wait_on_bq_job_id(bq_client: bigquery.Client,
         if job.state in {"RUNNING", "PENDING"}:
             print(f"waiting on BigQuery Job {job.job_id}")
             time.sleep(polling_interval)
+    print(f"reached polling timeout waiting for bigquery job {job_id}")
     return False
 
 
@@ -688,13 +694,13 @@ def apply(
         bq_client: bigquery.Client
         success_blob: storage.Blob the success file whose batch should be
             applied.
-        lock_blob: storage.Blob
+        lock_blob: storage.Blob _bqlock blob to acquire for this job.
         job_id: str
     """
     handle_duplicate_notification(gcs_client, success_blob)
-    bkt = success_blob.bucket
-    if lock_blob is not None:
+    if lock_blob:
         handle_bq_lock(gcs_client, lock_blob, job_id)
+    bkt = success_blob.bucket
     dest_table_ref, _ = gcs_path_to_table_ref_and_batch(success_blob.name)
     gsurl = removesuffix(f"gs://{bkt.name}/{success_blob.name}",
                          constants.SUCCESS_FILENAME)
@@ -715,3 +721,4 @@ def apply(
 
     print("LOAD_JOB")
     load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id)
+    return
diff --git a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
index 07bd1315d..7602954dc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
+++ b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
@@ -1,4 +1,10 @@
 [pytest]
+log_format = %(asctime)s %(levelname)s %(message)s
+log_date_format = %Y-%m-%d %H:%M:%S
+log_file_format = %(asctime)s %(levelname)s %(message)s
+log_file_date_format = %Y-%m-%d %H:%M:%S
+log_file_level = INFO
+log_file = test.log
 markers =
     IT: marks tests as slow integration test requiring cloud resouces (deselect with '-m "not IT"')
     ORDERING: marks tests that test features related to ordering
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 1d4043de5..776f7b08b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -85,7 +85,8 @@ def ordered_mock_env(mock_env, monkeypatch):
 
 @pytest.fixture
 def dest_dataset(request, bq, mock_env, monkeypatch):
-    random_dataset = f"test_bq_ingest_gcf_{str(uuid.uuid4())[:8].replace('-','_')}"
+    random_dataset = (f"test_bq_ingest_gcf_"
+                      f"{str(uuid.uuid4())[:8].replace('-','_')}")
     dataset = bigquery.Dataset(f"{os.getenv('GCP_PROJECT')}"
                                f".{random_dataset}")
     dataset.location = "US"
@@ -109,7 +110,9 @@ def dest_table(request, bq, mock_env, dest_dataset) -> bigquery.Table:
             json.load(schema_file))
 
     table = bigquery.Table(
-        f"{os.environ.get('GCP_PROJECT')}.{dest_dataset.dataset_id}.cf_test_nation",
+        f"{os.environ.get('GCP_PROJECT')}"
+        f".{dest_dataset.dataset_id}.cf_test_nation_"
+        f"{str(uuid.uuid4()).replace('-','_')}",
         schema=schema,
     )
 
@@ -298,7 +301,8 @@ def dest_partitioned_table(request, bq: bigquery.Client, mock_env,
 
     table: bigquery.Table = bigquery.Table(
         f"{os.environ.get('GCP_PROJECT')}"
-        f".{dest_dataset.dataset_id}.cf_test_nyc_311",
+        f".{dest_dataset.dataset_id}.cf_test_nyc_311_"
+        f"{str(uuid.uuid4()).replace('-','_')}",
         schema=schema,
     )
 
@@ -353,7 +357,7 @@ def dest_ordered_update_table(request, gcs, gcs_bucket, bq, mock_env,
 
     table = bigquery.Table(
         f"{os.environ.get('GCP_PROJECT')}.{dest_dataset.dataset_id}"
-        ".cf_test_ordering",
+        f".cf_test_ordering_{str(uuid.uuid4()).replace('-','_')}",
         schema=schema,
     )
 
@@ -523,7 +527,7 @@ def gcs_external_partitioned_config(
         "bq_transform.sql",
     ]))
 
-    sql = "INSERT {dest_dataset}.cf_test_nyc_311 SELECT * FROM temp_ext"
+    sql = "INSERT {dest_dataset}.{dest_table} SELECT * FROM temp_ext"
     sql_obj.upload_from_string(sql)
 
     config_obj = gcs_bucket.blob("/".join([
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index 50ff1c97c..2230417d9 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -16,6 +16,7 @@
 import os
 import queue
 import time
+from typing import Optional
 
 import pytest
 from google.cloud import bigquery
@@ -32,6 +33,8 @@
 # Testing that the subscriber does not get choked up by a common race condition
 # is crucial to ensuring this solution works.
 # This parameter is for running the subscriber tests many times.
+# During development it can be helpful to tweak this up or down as you are
+# experimenting.
 NUM_TRIES_SUBSCRIBER_TESTS = 25
 
 
@@ -134,11 +137,7 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
     we will drop a 4th batch after the subscriber has exited and assert that it
     gets applied as expected.
     """
-    _run_subscriber(
-        gcs,
-        bq,
-        gcs_external_update_config
-    )
+    _run_subscriber(gcs, bq, gcs_external_update_config)
     backlog_blobs = gcs_bucket.list_blobs(
         prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/_backlog/"
     )
@@ -176,10 +175,9 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
 @pytest.mark.ORDERING
 @pytest.mark.repeat(NUM_TRIES_SUBSCRIBER_TESTS)
 def test_backlog_subscriber_in_order_with_new_batch_while_running(
-    bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
-    gcs_ordered_update_data, gcs_external_update_config: storage.Blob,
-    gcs_backlog, mock_env
-):
+        bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
+        gcs_ordered_update_data, gcs_external_update_config: storage.Blob,
+        gcs_backlog, mock_env):
     """Test functionality of backlog subscriber when new batches are added
     before the subscriber is done finishing the existing backlog.
 
@@ -203,25 +201,33 @@ def test_backlog_subscriber_in_order_with_new_batch_while_running(
     claim_blob: storage.Blob = gcs_external_update_config.bucket.blob(
         gcs_external_update_config.name.replace(
             basename, f"_claimed_{basename}_created_at_"
-                      f"{gcs_external_update_config.time_created.timestamp()}"))
+            f"{gcs_external_update_config.time_created.timestamp()}"))
     # Run subscriber w/ backlog and publisher w/ new batch in parallel.
-    with multiprocessing.Pool(processes=2) as pool:
-        res_subscriber = pool.apply_async(
-            _run_subscriber,
-            (None, None, backfill_blob))
+    with multiprocessing.Pool(processes=3) as pool:
+        res_subscriber = pool.apply_async(_run_subscriber,
+                                          (None, None, backfill_blob))
         # wait for existence of claim blob to ensure subscriber is running.
         while not claim_blob.exists():
             pass
         res_backlog_publisher = pool.apply_async(_post_a_new_batch,
                                                  (bkt, dataset, table))
+        res_backlog_publisher.wait()
+        res_monitor = pool.apply_async(
+            gcs_ocn_bq_ingest.ordering.subscriber_monitor,
+            (None, bkt,
+             f"{dataset.project}.{dataset.dataset_id}/{table.table_id}/"
+             f"_backlog/04/_SUCCESS"))
+
+        if res_monitor.get():
+            print("subscriber monitor had to retrigger subscriber loop")
+            backfill_blob.reload(client=gcs)
+            _run_subscriber(None, None, backfill_blob)
 
-        # wait on each function to complete
         res_subscriber.wait()
-        res_backlog_publisher.wait()
 
     backlog_blobs = gcs_bucket.list_blobs(
-        prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/_backlog/"
-    )
+        prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/"
+        f"_backlog/")
     assert backlog_blobs.num_results == 0, "backlog is not empty"
     bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock")
     assert not bqlock_blob.exists(), "_bqlock was not cleaned up"
@@ -237,20 +243,13 @@ def test_backlog_subscriber_in_order_with_new_batch_while_running(
 
 
 def _run_subscriber(
-    gcs_client: storage.Client,
-    bq_client: bigquery.Client,
+    gcs_client: Optional[storage.Client],
+    bq_client: Optional[bigquery.Client],
     backfill_blob,
 ):
-    try:
-        gcs_ocn_bq_ingest.ordering.backlog_subscriber(
-            gcs_client,
-            bq_client,
-            backfill_blob,
-            time.monotonic())
-    except gcs_ocn_bq_ingest.exceptions.DuplicateNotificationException:
-        print("ignoring potential duplicate notification exception as this is"
-              "not a critical error and would be ignored by the main method"
-              "of the cloud function.")
+    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs_client,
+                                                  bq_client, backfill_blob,
+                                                  time.monotonic())
 
 
 def _post_a_new_batch(gcs_bucket, dest_dataset, dest_ordered_update_table):

From ddaf280d4c504f084ec580ab53cccd1a717bba9c Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 17:48:34 -0800
Subject: [PATCH 36/90] chore: add e2e test, fixup terraform

---
 tools/__init__.py                             |   0
 tools/cloud_functions/__init__.py             |   0
 .../gcs_event_based_ingest/Dockerfile.ci      |   2 +-
 .../gcs_event_based_ingest/README.md          |  21 ++-
 .../gcs_event_based_ingest/cloudbuild.yaml    |  51 ++++++-
 .../gcs_event_based_ingest/e2e/.gitignore     |  35 +++++
 .../{tests/cli => e2e}/__init__.py            |   0
 .../gcs_event_based_ingest/e2e/conftest.py    |  93 +++++++++++++
 .../gcs_event_based_ingest/e2e/e2e_test.py    | 127 ++++++++++++++++++
 .../gcs_event_based_ingest/e2e/main.tf        |  45 +++++++
 .../common}/__init__.py                       |   0
 .../{ => common}/constants.py                 |   9 +-
 .../{ => common}/exceptions.py                |   4 +-
 .../{ => common}/ordering.py                  |   5 +-
 .../gcs_ocn_bq_ingest/{ => common}/utils.py   |  17 ++-
 .../gcs_ocn_bq_ingest/main.py                 |  32 +++--
 .../gcs_event_based_ingest/pytest.ini         |   1 +
 .../gcs_ocn_bq_ingest_function/README.md      |   5 +-
 .../gcs_ocn_bq_ingest_function/main.tf        |  49 +++++--
 .../gcs_ocn_bq_ingest_function/outputs.tf     |   4 +
 .../gcs_ocn_bq_ingest_function/variables.tf   |   9 ++
 .../gcs_ocn_bq_ingest_function/versions.tf    |   2 +-
 .../gcs_event_based_ingest/tests/__init__.py  |  20 ---
 .../gcs_event_based_ingest/tests/conftest.py  |  21 +--
 .../test_gcs_ocn_bq_ingest.py                 |  35 +++--
 .../gcs_ocn_bq_ingest/test_ordering_it.py     |  56 ++++----
 26 files changed, 533 insertions(+), 110 deletions(-)
 create mode 100644 tools/__init__.py
 create mode 100644 tools/cloud_functions/__init__.py
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/e2e/.gitignore
 rename tools/cloud_functions/gcs_event_based_ingest/{tests/cli => e2e}/__init__.py (100%)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
 rename tools/cloud_functions/gcs_event_based_ingest/{tests/gcs_ocn_bq_ingest => gcs_ocn_bq_ingest/common}/__init__.py (100%)
 rename tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/{ => common}/constants.py (96%)
 rename tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/{ => common}/exceptions.py (98%)
 rename tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/{ => common}/ordering.py (98%)
 rename tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/{ => common}/utils.py (97%)
 delete mode 100644 tools/cloud_functions/gcs_event_based_ingest/tests/__init__.py

diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tools/cloud_functions/__init__.py b/tools/cloud_functions/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
index 5cd40aa1e..f92277062 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
+++ b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
@@ -1,4 +1,4 @@
 FROM python:3.8-slim
 COPY requirements.txt requirements-dev.txt ./
 RUN pip3 install --no-cache-dir -r requirements-dev.txt
-ENTRYPOINT ["pytest"]
+ENTRYPOINT ["python3 -m pytest"]
diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 51b5b3a06..70029831c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -316,8 +316,10 @@ docker run --rm -it gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci
 #### Running on your local machine
 Alternatively to the local cloudbuild or using the docker container to run your
 tests, you can `pip3 install -r requirements-dev.txt` and select certain tests
-to run with [`pytest`](https://docs.pytest.org/en/stable/usage.html). This is
-mostly useful if you'd like to integrate with your IDE debugger.
+to run with [`python3 -m pytest`](https://docs.pytest.org/en/stable/usage.html).
+Note, this is not quite the same as callin `pytest` without the `python -m` prefix
+([pytest invocation docs](https://docs.pytest.org/en/stable/usage.html#calling-pytest-through-python-m-pytest)) 
+This is mostly useful if you'd like to integrate with your IDE debugger.
 
 Note that integration tests will spin up / tear down cloud resources that can
 incur a small cost. These resources will be spun up based on your Google Cloud SDK
@@ -331,16 +333,25 @@ See more info on sharing pytest fixtures in the [pytest docs](https://docs.pytes
 
 #### Running All Tests
 ```bash
-pytest
+python3 -m pytest
 ```
 #### Running Unit Tests Only
 ```bash
-pytest -m "not IT"
+python3 -m pytest -m "not IT"
 ```
 
 #### Running Integration Tests Only
 ```bash
-pytest -m IT
+python3 -m pytest -m IT
+```
+
+#### Running System Tests Only
+The system tests assume that you have deployed the cloud function.
+```bash
+export TF_VAR_short_sha=$(git rev-parse --short=7 HEAD)
+export TF_VAR_project_id=${YOUR_GCP_PROJECT_ID}
+(cd e2e && terraform init && terraform apply -auto-approve)
+python3 -m pytest -m SYS
 ```
 
 ## Deployment
diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index a41666c65..2ef218e43 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -117,7 +117,7 @@ steps:
   args:
     - '-c'
     # pip installing again to get GCB to recognize mocker from pytest-mock
-    - 'pip install -r requirements-dev.txt && python3 -m pytest -m "not IT"'
+    - 'pip install -r requirements-dev.txt && python3 -m pytest tests -m "not IT"'
   id: 'unit-test'
 - name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'
   dir: '${_BUILD_DIR}'
@@ -127,10 +127,55 @@ steps:
   entrypoint: /bin/sh
   args:
   - '-c'
-  - 'pip install -r requirements-dev.txt && python3 -m pytest -m IT'
+  - 'pip install -r requirements-dev.txt && python3 -m pytest tests -m IT'
   id: 'integration-test'
-timeout: '1200s'
+- name: 'hashicorp/terraform'
+  waitFor:
+  - 'integration-test'
+  dir: '${_BUILD_DIR}/e2e'
+  args: ['init']
+  id: 'terraform-e2e-init'
+- name: 'hashicorp/terraform'
+  waitFor:
+  - 'terraform-e2e-init'
+  dir: '${_BUILD_DIR}/e2e'
+  args: ['apply', '-auto-approve']
+  env:
+  - 'TF_VAR_project_id=$PROJECT_ID'
+  - 'TF_VAR_region=$_REGION'
+  - 'TF_VAR_suffix=$SHORT_SHA'
+  id: 'terraform-e2e-apply'
+- name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'
+  dir: '${_BUILD_DIR}'
+  waitFor:
+    - 'build-ci-image'
+    - 'terraform-e2e-apply'
+  entrypoint: /bin/sh
+  args:
+  - '-c'
+  - 'python3 -m pytest e2e --tfstate=${_BUILD_DIR}/e2e/terraform.state'
+  id: 'e2e-test'
+- name: 'hashicorp/terraform'
+  waitFor:
+  - 'e2e-test'
+  dir: '${_BUILD_DIR}/e2e'
+  # Note if the e2e test fails the resources will not be cleaned up due to
+  # cloud build not allowing ignored failed steps.
+  # this will allow maintainer to evaluate what went wrong during e2e test
+  # because the evidence will not be destroyed.
+  # Maintainers of bqutil project should destroy these resources after the
+  # failure cause has been diagnosed.
+  # We do not run this e2e test unless all unit and integration tests pass.
+  # https://github.com/GoogleCloudPlatform/cloud-builders/issues/253
+  args: ['destroy', '-auto-approve']
+  env:
+  - 'TF_VAR_project_id=$PROJECT_ID'
+  - 'TF_VAR_region=$_REGION'
+  - 'TF_VAR_suffix=$SHORT_SHA'
+  id: 'terraform-e2e-destroy'
+timeout: '3600s'
 options:
   machineType: 'N1_HIGHCPU_32'
 substitutions:
   '_BUILD_DIR': 'tools/cloud_functions/gcs_event_based_ingest'
+  '_REGION': 'us-central1'
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/.gitignore b/tools/cloud_functions/gcs_event_based_ingest/e2e/.gitignore
new file mode 100644
index 000000000..9e399369c
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/.gitignore
@@ -0,0 +1,35 @@
+# Local .terraform directories
+**/.terraform/*
+
+# .tfstate files
+*.tfstate
+*.tfstate.*
+
+# Crash log files
+crash.log
+
+# Exclude all .tfvars files, which are likely to contain sentitive data, such as
+# password, private keys, and other secrets. These should not be part of version 
+# control as they are data points which are potentially sensitive and subject 
+# to change depending on the environment.
+#
+*.tfvars
+
+# Ignore override files as they are usually used to override resources locally and so
+# are not checked in
+override.tf
+override.tf.json
+*_override.tf
+*_override.tf.json
+
+# Include override files you do wish to add to version control using negated pattern
+#
+# !example_override.tf
+
+# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
+# example: *tfplan*
+
+# Ignore CLI configuration files
+.terraformrc
+terraform.rc
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/cli/__init__.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/__init__.py
similarity index 100%
rename from tools/cloud_functions/gcs_event_based_ingest/tests/cli/__init__.py
rename to tools/cloud_functions/gcs_event_based_ingest/e2e/__init__.py
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
new file mode 100644
index 000000000..80b870617
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -0,0 +1,93 @@
+# Copyright 2020 Google LLC.
+# This software is provided as-is, without warranty or representation
+# for any use or purpose.
+# Your use of it is subject to your agreement with Google.
+
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""End-to-end tests for event based BigQuery ingest Cloud Function."""
+import json
+import os
+import uuid
+
+import pytest
+from google.cloud import bigquery
+from google.cloud import storage
+
+
+def pytest_addoption(parser):
+    # if Terraform was used to deploy resources, pass the state details
+    parser.addoption("--tfstate", action="store", default=None)
+
+
+@pytest.fixture(scope="module")
+def bq() -> bigquery.Client:
+    """BigQuery Client"""
+    return bigquery.Client(location="US")
+
+
+@pytest.fixture(scope="module")
+def gcs() -> storage.Client:
+    """GCS Client"""
+    return storage.Client()
+
+
+@pytest.fixture(scope='module')
+def tf_state(pytestconfig):
+
+    # if we used Terraform to create the GCP resources, use the output variables
+    if pytestconfig.getoption('tfstate') is not None:
+        tf_state_file = pytestconfig.getoption('tfstate')
+        with open(tf_state_file, 'r', encoding='utf-8') as fp:
+            return json.load(fp)
+
+
+@pytest.fixture
+def dest_dataset(request, bq, monkeypatch):
+    random_dataset = (f"test_bq_ingest_gcf_"
+                      f"{str(uuid.uuid4())[:8].replace('-','_')}")
+    dataset = bigquery.Dataset(f"{os.getenv('TF_VAR_project_id', 'bqutil')}"
+                               f".{random_dataset}")
+    dataset.location = "US"
+    bq.create_dataset(dataset)
+    monkeypatch.setenv("BQ_LOAD_STATE_TABLE",
+                       f"{dataset.dataset_id}.serverless_bq_loads")
+    print(f"created dataset {dataset.dataset_id}")
+
+    def teardown():
+        bq.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
+
+    request.addfinalizer(teardown)
+    return dataset
+
+
+@pytest.fixture(scope="function")
+def dest_table(request, bq: bigquery.Client, dest_dataset) -> bigquery.Table:
+    public_table: bigquery.Table = bq.get_table(
+        bigquery.TableReference.from_string(
+            "bigquery-public-data.new_york_311.311_service_requests"))
+    schema = public_table.schema
+
+    table: bigquery.Table = bigquery.Table(
+        f"{os.environ.get('TF_VAR_project_id', 'bqutil')}"
+        f".{dest_dataset.dataset_id}.cf_e2e_test_nyc_311_"
+        f"{os.getenv('SHORT_SHA', 'manual')}",
+        schema=schema,
+    )
+
+    table = bq.create_table(table)
+
+    def teardown():
+        bq.delete_table(table, not_found_ok=True)
+
+    request.addfinalizer(teardown)
+    return table
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
new file mode 100644
index 000000000..7eaa9f7e8
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
@@ -0,0 +1,127 @@
+# Copyright 2020 Google LLC.
+# This software is provided as-is, without warranty or representation
+# for any use or purpose.
+# Your use of it is subject to your agreement with Google.
+
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import concurrent.futures
+import json
+import time
+from typing import Dict
+
+import pytest
+from google.cloud import bigquery
+from google.cloud import storage
+
+WAIT_FOR_ROWS_TIMEOUT = 180  # seconds
+
+
+@pytest.mark.SYS
+def test_gcs_ocn_bq_ingest_cloud_function(
+    gcs: storage.Client,
+    bq: bigquery.Client,
+    tf_state: Dict,
+    dest_table: bigquery.Table,
+):
+    """drop some test data and assert that the excpected actions are taken by
+    the deployed cloud function"""
+    input_bucket_id = tf_state['outputs']['bucket']['value']
+    table_prefix = f"{dest_table.dataset_id}/" \
+                   f"{dest_table.table_id}"
+    extract_config = bigquery.ExtractJobConfig()
+    extract_config.destination_format = bigquery.DestinationFormat.AVRO
+    public_table: bigquery.Table = bq.get_table(
+        bigquery.TableReference.from_string(
+            "bigquery-public-data.new_york_311.311_service_requests"))
+
+    def _extract(batch: str):
+        extract_job: bigquery.ExtractJob = bq.extract_table(
+            public_table, f"gs://{input_bucket_id}/{table_prefix}/{batch}/"
+            f"data-*.avro",
+            job_config=extract_config)
+        return extract_job.result()
+
+    batches = [
+        "historical/00", "historical/01", "historical/02", "incremental/03"
+    ]
+    history_batch_nums = ["00", "01", "02"]
+    with concurrent.futures.ThreadPoolExecutor() as pool:
+        # export some data from public BQ table into a historical partitions
+        extract_results = pool.map(_extract, batches)
+
+    for res in extract_results:
+        assert res.errors is None, f"extract job {res.job_id} failed"
+
+    bkt: storage.Bucket = gcs.lookup_bucket(input_bucket_id)
+    # configure load jobs for this table
+    load_config = bkt.blob(f"{table_prefix}/_config/load.json")
+    load_config.upload_from_string(
+        json.dumps({
+            "writeDisposition": "WRITE_APPEND",
+            "sourceFormat": "AVRO",
+            "useAvroLogicalTypes": "True",
+        }))
+    # add historical success files
+    for batch in history_batch_nums:
+        historical_success_blob: storage.Blob = bkt.blob(
+            f"{table_prefix}/historical/{batch}/_SUCCESS")
+        historical_success_blob.upload_from_string("")
+
+    # assert 0 bq rows (because _HISTORYDONE not dropped yet)
+    dest_table: bigquery.Table = bq.get_table(dest_table)
+    assert dest_table.num_rows == 0, \
+        "history was ingested before _HISTORYDONE was uploaded"
+
+    # add _HISTORYDONE
+    history_done_blob: storage.Blob = bkt.blob(f"{table_prefix}/_HISTORYDONE")
+    history_done_blob.upload_from_string("")
+
+    # wait for bq rows to reach expected num rows
+    bq_wait_for_rows(bq, dest_table,
+                     public_table.num_rows * len(history_batch_nums))
+
+    # add the incremental success file
+    incremental_success_blob: storage.Blob = bkt.blob(
+        f"{table_prefix}/{batches[-1]}/_SUCCESS")
+    incremental_success_blob.upload_from_string("")
+
+    # wait on new expected bq rows
+    bq_wait_for_rows(bq, dest_table, public_table.num_rows * len(batches))
+
+
+def bq_wait_for_rows(bq_client: bigquery.Client, table: bigquery.Table,
+                     expected_num_rows: int):
+    """
+  polls tables.get API for number of rows until reaches expected value or
+  times out.
+
+  This is mostly an optimization to speed up the test suite without making it
+  flaky.
+  """
+
+    start_poll = time.monotonic()
+    actual_num_rows = 0
+    while time.monotonic() - start_poll < WAIT_FOR_ROWS_TIMEOUT:
+        bq_table: bigquery.Table = bq_client.get_table(table)
+        actual_num_rows = bq_table.num_rows
+        if actual_num_rows == expected_num_rows:
+            return
+        if actual_num_rows > expected_num_rows:
+            raise AssertionError(
+                f"{table.project}.{table.dataset_id}.{table.table_id} has"
+                f"{actual_num_rows} rows. expected {expected_num_rows} rows.")
+    raise AssertionError(
+        f"Timed out after {WAIT_FOR_ROWS_TIMEOUT} seconds waiting for "
+        f"{table.project}.{table.dataset_id}.{table.table_id} to "
+        f"reach {expected_num_rows} rows."
+        f"last poll returned {actual_num_rows} rows.")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
new file mode 100644
index 000000000..4c302663e
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
@@ -0,0 +1,45 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+variable "short_sha" {}
+variable "project_id" { default = "bqutil" }
+variable "region" { default = "us-central1" }
+output "bucket" {
+  value = module.gcs_ocn_bq_ingest.input-bucket
+}
+
+resource "google_storage_bucket" "cloud_functions_source" {
+  name          = "gcf-source-archives-${var.short_sha}"
+  project       = var.project_id
+  storage_class = "REGIONAL"
+  location      = var.region
+  force_destroy = "true"
+}
+
+module "gcs_ocn_bq_ingest" {
+  source                       = "../terraform_module/gcs_ocn_bq_ingest_function"
+  function_source_folder       = "../gcs_ocn_bq_ingest"
+  app_id                       = "gcs-ocn-bq-ingest-e2e-test-${var.short_sha}"
+  cloudfunctions_source_bucket = google_storage_bucket.cloud_functions_source.name
+  data_ingester_sa             = "data-ingester-sa-${var.short_sha}"
+  input_bucket                 = "gcs-ocn-bq-ingest-e2e-tests-${var.short_sha}"
+  project_id                   = var.project_id
+  environment_variables = {
+    START_BACKFILL_FILENAME = "_HISTORYDONE"
+    ORDER_PER_TABLE         = "True"
+  }
+  # We'll use a shorter timeout for e2e stress subscriber re-triggering
+  timeout       = 60
+  force_destroy = "true"
+}
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/__init__.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/__init__.py
similarity index 100%
rename from tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/__init__.py
rename to tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/__init__.py
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
similarity index 96%
rename from tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
rename to tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index daa116dfe..50faf6d12 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -47,16 +47,21 @@
 
 DEFAULT_JOB_LABELS = {
     "component": "event-based-gcs-ingest",
-    "cloud-function-name": os.getenv("FUNCTION_NAME"),
+    "cloud-function-name": os.getenv("K_SERVICE"),
 }
 
-BASE_LOAD_JOB_CONFIG = {
+DEFAULT_LOAD_JOB_CONFIG = {
     "sourceFormat": "CSV",
     "fieldDelimiter": ",",
     "writeDisposition": "WRITE_APPEND",
     "labels": DEFAULT_JOB_LABELS,
 }
 
+BASE_LOAD_JOB_CONFIG = {
+    "writeDisposition": "WRITE_APPEND",
+    "labels": DEFAULT_JOB_LABELS,
+}
+
 # https://cloud.google.com/bigquery/quotas#load_jobs
 # 15TB per BQ load job (soft limit).
 DEFAULT_MAX_BATCH_BYTES = str(15 * 10**12)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
similarity index 98%
rename from tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
rename to tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
index a1126c22e..8ab701e8d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/exceptions.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
@@ -43,9 +43,9 @@ class BacklogException(Exception):
     function."""
 
 
-EXCEPTIONS_TO_REPORT = {
+EXCEPTIONS_TO_REPORT = (
     BigQueryJobFailure,
     UnexpectedTriggerException,
     DestinationRegexMatchException,
     BacklogException,
-}
+)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
similarity index 98%
rename from tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
rename to tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index cd998f985..68e39542d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -60,11 +60,14 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
     """Pick up the table lock, poll BQ job id until completion and process next
     item in the backlog.
     """
+    print(f"started backfill subscriber for gs://{backfill_blob.bucket.name}/"
+          f"{backfill_blob.name}")
     gcs_client, bq_client = _get_clients_if_none(gcs_client, bq_client)
     # We need to retrigger the backfill loop before the Cloud Functions Timeout.
     restart_time = function_start_time + (
         float(os.getenv("FUNCTION_TIMEOUT_SEC", "60")) -
         constants.RESTART_BUFFER_SECONDS)
+    print(f"restart time is {restart_time}")
     backfill_blob_generation = backfill_blob.generation
     bkt = backfill_blob.bucket
     utils.handle_duplicate_notification(gcs_client, backfill_blob)
@@ -169,7 +172,7 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
         next_success_file: storage.Blob = bkt.blob(
             next_backlog_file.name.replace("/_backlog/", "/"))
         table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
-            next_success_file.name)
+            next_success_file.name, bq_client.project)
         if not next_success_file.exists(client=gcs_client):
             raise exceptions.BacklogException(
                 "backlog contains "
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
similarity index 97%
rename from tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
rename to tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index de49b8ca6..496ec8dae 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -75,8 +75,7 @@ def external_query(  # pylint: disable=too-many-arguments
 
     rendered_query = query.format(
         dest_dataset=f"`{dest_table_ref.project}`.{dest_table_ref.dataset_id}",
-        dest_table=table_id
-    )
+        dest_table=table_id)
 
     job: bigquery.QueryJob = bq_client.query(rendered_query,
                                              job_config=job_config,
@@ -199,12 +198,16 @@ def _get_parent_config(path):
     while parts:
         config = _get_parent_config("/".join(parts))
         if config:
+            print(f"found config: {'/'.join(parts)}")
             config_q.append(json.loads(config))
         parts.pop()
 
     merged_config: Dict = {}
     while config_q:
         recursive_update(merged_config, config_q.popleft(), in_place=True)
+    if merged_config == constants.BASE_LOAD_JOB_CONFIG:
+        print("falling back to default CSV load job config")
+        return constants.DEFAULT_LOAD_JOB_CONFIG
     print(f"merged_config: {merged_config}")
     return bigquery.LoadJobConfig.from_api_repr({"load": merged_config})
 
@@ -580,7 +583,8 @@ def wait_on_gcs_blob(gcs_client: storage.Client,
 
 
 def gcs_path_to_table_ref_and_batch(
-        object_id) -> Tuple[bigquery.TableReference, Optional[str]]:
+    object_id: str, default_project: Optional[str]
+) -> Tuple[bigquery.TableReference, Optional[str]]:
     """extract bigquery table reference and batch id from gcs object id"""
 
     destination_match = constants.DESTINATION_REGEX.match(object_id)
@@ -611,11 +615,11 @@ def gcs_path_to_table_ref_and_batch(
 
         dest_table_ref = bigquery.TableReference.from_string(
             f"{dataset}.{table}{partition}",
-            default_project=os.getenv("BQ_PROJECT", os.getenv("GCP_PROJECT")))
+            default_project=os.getenv("BQ_PROJECT", default_project))
     else:
         dest_table_ref = bigquery.TableReference.from_string(
             f"{dataset}.{table}",
-            default_project=os.getenv("BQ_PROJECT", os.getenv("GCP_PROJECT")))
+            default_project=os.getenv("BQ_PROJECT", default_project))
     return dest_table_ref, batch_id
 
 
@@ -701,7 +705,8 @@ def apply(
     if lock_blob:
         handle_bq_lock(gcs_client, lock_blob, job_id)
     bkt = success_blob.bucket
-    dest_table_ref, _ = gcs_path_to_table_ref_and_batch(success_blob.name)
+    dest_table_ref, _ = gcs_path_to_table_ref_and_batch(success_blob.name,
+                                                        bq_client.project)
     gsurl = removesuffix(f"gs://{bkt.name}/{success_blob.name}",
                          constants.SUCCESS_FILENAME)
     print(
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 5f79f5962..cbd55cf7c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -27,10 +27,16 @@
 from google.cloud import error_reporting
 from google.cloud import storage
 
-from . import constants
-from . import exceptions
-from . import ordering
-from . import utils
+try:
+    from common import constants
+    from common import exceptions
+    from common import ordering
+    from common import utils
+except ModuleNotFoundError:
+    from .common import constants
+    from .common import exceptions
+    from .common import ordering
+    from .common import utils
 
 # Reuse GCP Clients across function invocations using globbals
 # https://cloud.google.com/functions/docs/bestpractices/tips#use_global_variables_to_reuse_objects_in_future_invocations
@@ -83,7 +89,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
         print("recieved duplicate notification. this was handled gracefully."
               f"{traceback.format_exc()}")
 
-    except tuple(exceptions.EXCEPTIONS_TO_REPORT) as original_error:
+    except exceptions.EXCEPTIONS_TO_REPORT as original_error:
         # We do this because we know these errors do not require a cold restart
         # of the cloud function.
         try:
@@ -104,7 +110,13 @@ def triage_event(gcs_client: Optional[storage.Client],
     blob."""
     bkt = event_blob.bucket
     basename_object_id = os.path.basename(event_blob.name)
-    table_ref, batch = utils.gcs_path_to_table_ref_and_batch(event_blob.name)
+    if bq_client:
+        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
+            event_blob.name, bq_client.project)
+    else:
+        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
+            event_blob.name, None)
+
     if enforce_ordering:
         # For SUCCESS files in a backlog directory, ensure that subscriber
         # is running.
@@ -140,8 +152,12 @@ def triage_event(gcs_client: Optional[storage.Client],
             return
     else:  # Default behavior submit job as soon as success file lands.
         if basename_object_id == constants.SUCCESS_FILENAME:
-            utils.apply(gcs_client, bq_client, event_blob, None,
-                        utils.create_job_id(table_ref, batch))
+            utils.apply(
+                gcs_client,
+                bq_client,
+                event_blob,
+                None,  # no lock blob when ordering not enabled.
+                utils.create_job_id(table_ref, batch))
 
 
 def lazy_error_reporting_client() -> error_reporting.Client:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
index 7602954dc..bf550fdcf 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
+++ b/tools/cloud_functions/gcs_event_based_ingest/pytest.ini
@@ -7,6 +7,7 @@ log_file_level = INFO
 log_file = test.log
 markers =
     IT: marks tests as slow integration test requiring cloud resouces (deselect with '-m "not IT"')
+    SYS: marks tests as slow system or e2e test requiring cloud resouces (deselect with '-m "not IT"')
     ORDERING: marks tests that test features related to ordering
     CLI: marks tests of CLI utilities
 addopts = --workers=auto
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
index b347aceeb..d4ea6dbd1 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/README.md
@@ -10,7 +10,7 @@ documented [here](../gcs_ocn_bq_ingest_function/README.md)
 
 | Name | Version |
 |------|---------|
-| terraform | >= 0.12 |
+| terraform | >= 0.13 |
 | archive | ~> 2.0.0 |
 | google | >= 3.38.0 |
 | template | ~> 2.2.0 |
@@ -31,11 +31,13 @@ documented [here](../gcs_ocn_bq_ingest_function/README.md)
 | cloudfunctions\_source\_bucket | GCS bucket to store Cloud Functions Source | `any` | n/a | yes |
 | data\_ingester\_sa | Service Account Email responsible for ingesting data to BigQuery | `any` | n/a | yes |
 | environment\_variables | Environment variables to set on the cloud function. | `map(string)` | `{}` | no |
+| force\_destroy | force destroy resources (e.g. for e2e tests) | `string` | `"false"` | no |
 | function\_source\_folder | Path to Cloud Function source | `string` | `"../gcs_event_based_ingest/gcs_ocn_bq_ingest/"` | no |
 | input\_bucket | GCS bucket to watch for new files | `any` | n/a | yes |
 | input\_prefix | GCS prefix to watch for new files in input\_bucket | `any` | `null` | no |
 | project\_id | GCP Project ID containing cloud function, and input bucket | `any` | n/a | yes |
 | region | GCP region in which to deploy cloud function | `string` | `"us-central1"` | no |
+| timeout | Cloud Functions timeout in seconds | `number` | `540` | no |
 | use\_pubsub\_notifications | Setting this to true will use Pub/Sub notifications By default we will use Cloud Functions Event direct notifications. See https://cloud.google.com/storage/docs/pubsub-notifications. | `bool` | `false` | no |
 
 ## Outputs
@@ -44,4 +46,5 @@ documented [here](../gcs_ocn_bq_ingest_function/README.md)
 |------|-------------|
 | cloud-function | instance of cloud function deployed by this module. |
 | data-ingester-sa | data ingester service account email created as cloud function identity |
+| input-bucket | n/a |
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index 16d7ce821..80226e344 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -23,15 +23,17 @@ resource "google_pubsub_topic" "notification_topic" {
 }
 
 module "bucket" {
-  source  = "terraform-google-modules/cloud-storage/google//modules/simple_bucket"
-  version = "~> 1.3"
+  depends_on = [module.data_ingester_service_account]
+  source     = "terraform-google-modules/cloud-storage/google//modules/simple_bucket"
+  version    = "~> 1.3"
 
-  name       = var.input_bucket
-  project_id = var.project_id
-  location   = var.region
+  name          = var.input_bucket
+  project_id    = var.project_id
+  location      = var.region
+  force_destroy = var.force_destroy
   iam_members = [{
     role   = "roles/storage.objectAdmin"
-    member = module.data_ingester_service_account.iam_email
+    member = "serviceAccount:${var.data_ingester_sa}@${var.project_id}.iam.gserviceaccount.com"
   }]
 }
 
@@ -59,20 +61,28 @@ resource "google_storage_bucket_object" "function_zip_object" {
   content_type = "application/zip"
 }
 
+locals {
+  function_name = "gcs_to_bq_${var.app_id}"
+}
 resource "google_cloudfunctions_function" "gcs_to_bq" {
+  depends_on            = [google_storage_bucket_object.function_zip_object]
   project               = var.project_id
-  name                  = "gcs_to_bq_${var.app_id}"
+  name                  = local.function_name
   region                = var.region
   runtime               = "python38"
-  timeout               = 9 * 60 # seconds
-  service_account_email = var.data_ingester_sa
+  timeout               = var.timeout
+  service_account_email = module.data_ingester_service_account.email
   source_archive_bucket = var.cloudfunctions_source_bucket
   source_archive_object = google_storage_bucket_object.function_zip_object.name
   entry_point           = "main"
-  environment_variables = var.environment_variables
+  environment_variables = merge(var.environment_variables, {
+    GCP_PROJECT          = var.project_id,
+    FUNCTION_TIMEOUT_SEC = var.timeout
+    FUNCTION_NAME        = local.function_name
+  })
   event_trigger {
     event_type = var.use_pubsub_notifications ? "providers/cloud.pubsub/eventTypes/topic.publish" : "google.storage.object.finalize"
-    resource   = var.use_pubsub_notifications ? google_pubsub_topic.notification_topic[0].id : module.bucket.name
+    resource   = var.use_pubsub_notifications ? "projects/${var.project_id}/${google_pubsub_topic.notification_topic[0].id}" : module.bucket.bucket.name
   }
 }
 
@@ -83,6 +93,7 @@ module "data_ingester_service_account" {
   names      = [var.data_ingester_sa, ]
   project_roles = [
     "${var.project_id}=>roles/bigquery.jobUser",
+    "${var.project_id}=>roles/storage.admin",
   ]
 }
 
@@ -112,3 +123,19 @@ resource "google_pubsub_topic_iam_binding" "cf_subscriber" {
   members = [module.data_ingester_service_account.iam_email]
 }
 
+module "project-services" {
+  source  = "terraform-google-modules/project-factory/google//modules/project_services"
+  version = "4.0.0"
+
+  project_id                  = var.project_id
+  disable_services_on_destroy = "false"
+
+  activate_apis = [
+    "compute.googleapis.com",
+    "iam.googleapis.com",
+    "bigquery.googleapis.com",
+    "storage.googleapis.com",
+    "pubsub.googleapis.com",
+    "clouderrorreporting.googleapis.com",
+  ]
+}
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
index e34d2d0f4..69d8017ab 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
@@ -21,3 +21,7 @@ output "data-ingester-sa" {
   value       = module.data_ingester_service_account.email
 }
 
+output "input-bucket" {
+  value = module.bucket.bucket.name
+}
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
index ca3073a0d..1783034f5 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
@@ -65,3 +65,12 @@ variable "bigquery_project_ids" {
   default     = []
 }
 
+variable "force_destroy" {
+  description = "force destroy resources (e.g. for e2e tests)"
+  default     = "false"
+}
+
+variable "timeout" {
+  description = "Cloud Functions timeout in seconds"
+  default     = 540
+}
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf
index 68daa41d7..e4234775c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf
@@ -15,7 +15,7 @@
  */
 
 terraform {
-  required_version = ">= 0.12"
+  required_version = ">= 0.13"
 
   required_providers {
     google   = ">= 3.38.0"
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/__init__.py b/tools/cloud_functions/gcs_event_based_ingest/tests/__init__.py
deleted file mode 100644
index 3deceee10..000000000
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2020 Google LLC.
-# This software is provided as-is, without warranty or representation
-# for any use or purpose.
-# Your use of it is subject to your agreement with Google.
-
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import sys
-
-sys.path.append(os.path.realpath(os.path.dirname(__file__) + "/.."))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 776f7b08b..cfdc4323a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -23,8 +23,8 @@
 from google.cloud import error_reporting
 from google.cloud import storage
 
-import gcs_ocn_bq_ingest.ordering
-import gcs_ocn_bq_ingest.utils
+import gcs_ocn_bq_ingest.common.ordering
+import gcs_ocn_bq_ingest.common.utils
 
 TEST_DIR = os.path.realpath(os.path.dirname(__file__))
 LOAD_JOB_POLLING_TIMEOUT = 10  # seconds
@@ -75,6 +75,7 @@ def mock_env(gcs, monkeypatch):
     monkeypatch.setenv("GCP_PROJECT", gcs.project)
     monkeypatch.setenv("FUNCTION_NAME", "integration-test")
     monkeypatch.setenv("FUNCTION_TIMEOUT_SEC", "540")
+    monkeypatch.setenv("BQ_PROJECT", gcs.project)
 
 
 @pytest.fixture
@@ -106,7 +107,7 @@ def teardown():
 def dest_table(request, bq, mock_env, dest_dataset) -> bigquery.Table:
     with open(os.path.join(TEST_DIR, "resources",
                            "nation_schema.json")) as schema_file:
-        schema = gcs_ocn_bq_ingest.utils.dict_to_bq_schema(
+        schema = gcs_ocn_bq_ingest.common.utils.dict_to_bq_schema(
             json.load(schema_file))
 
     table = bigquery.Table(
@@ -352,7 +353,7 @@ def dest_ordered_update_table(request, gcs, gcs_bucket, bq, mock_env,
                               dest_dataset) -> bigquery.Table:
     with open(os.path.join(TEST_DIR, "resources",
                            "ordering_schema.json")) as schema_file:
-        schema = gcs_ocn_bq_ingest.utils.dict_to_bq_schema(
+        schema = gcs_ocn_bq_ingest.common.utils.dict_to_bq_schema(
             json.load(schema_file))
 
     table = bigquery.Table(
@@ -373,7 +374,7 @@ def dest_ordered_update_table(request, gcs, gcs_bucket, bq, mock_env,
             "alpha_update": ""
         }],
         table,
-        job_id_prefix=gcs_ocn_bq_ingest.constants.DEFAULT_JOB_PREFIX)
+        job_id_prefix=gcs_ocn_bq_ingest.common.constants.DEFAULT_JOB_PREFIX)
 
     # The subscriber will be responsible for cleaning up this file.
     bqlock_obj: storage.blob.Blob = gcs_bucket.blob("/".join([
@@ -437,9 +438,11 @@ def gcs_backlog(request, gcs, gcs_bucket,
     # We will deal with the last incremental in the test itself to test the
     # behavior of a new backlog subscriber.
     for success_blob in gcs_ordered_update_data:
-        gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, success_blob)
-        backlog_blob = gcs_ocn_bq_ingest.ordering.success_blob_to_backlog_blob(
-            success_blob)
+        gcs_ocn_bq_ingest.common.ordering.backlog_publisher(gcs, success_blob)
+        backlog_blob = \
+            gcs_ocn_bq_ingest.common.ordering.success_blob_to_backlog_blob(
+                success_blob
+            )
         backlog_blob.upload_from_string("")
         data_objs.append(backlog_blob)
 
@@ -497,7 +500,7 @@ def gcs_external_update_config(request, gcs_bucket, dest_dataset,
     backfill_blob = gcs_bucket.blob("/".join([
         f"{dest_dataset.project}.{dest_dataset.dataset_id}",
         dest_ordered_update_table.table_id,
-        gcs_ocn_bq_ingest.constants.BACKFILL_FILENAME
+        gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME
     ]))
     backfill_blob.upload_from_string("")
     config_objs.append(sql_obj)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 6dfc57dec..349780f32 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -16,16 +16,17 @@
 import re
 import time
 from typing import Dict, Optional
+from unittest.mock import Mock
 
 import pytest
 from google.cloud import storage
 
-import gcs_ocn_bq_ingest.constants
+import gcs_ocn_bq_ingest.common.constants
+import gcs_ocn_bq_ingest.common.utils
 import gcs_ocn_bq_ingest.main
-import gcs_ocn_bq_ingest.utils
 
 COMPILED_DEFAULT_DENTINATION_REGEX = re.compile(
-    gcs_ocn_bq_ingest.constants.DEFAULT_DESTINATION_REGEX)
+    gcs_ocn_bq_ingest.common.constants.DEFAULT_DESTINATION_REGEX)
 
 
 @pytest.mark.parametrize(
@@ -142,7 +143,7 @@ def test_default_destination_regex(test_input: str,
     ([["foo"], [], ["bar", "baz"]], ["foo", "bar", "baz"]),
 ])
 def test_flattend2dlist(test_input, expected):
-    assert gcs_ocn_bq_ingest.utils.flatten2dlist(test_input) == expected
+    assert gcs_ocn_bq_ingest.common.utils.flatten2dlist(test_input) == expected
 
 
 @pytest.mark.parametrize(
@@ -212,8 +213,8 @@ def test_flattend2dlist(test_input, expected):
         # yapf: enable
     ])
 def test_recursive_update(original, update, expected):
-    assert gcs_ocn_bq_ingest.utils.recursive_update(original,
-                                                    update) == expected
+    assert gcs_ocn_bq_ingest.common.utils.recursive_update(original,
+                                                           update) == expected
 
 
 @pytest.mark.parametrize(
@@ -237,14 +238,17 @@ def test_recursive_update(original, update, expected):
         ("dataset/table/_backlog/_BACKFILL", "dataset/table"),
     ])
 def test_get_table_prefix(test_input, expected):
-    assert gcs_ocn_bq_ingest.utils.get_table_prefix(test_input) == expected
+    assert gcs_ocn_bq_ingest.common.utils.get_table_prefix(
+        test_input) == expected
 
 
 def test_triage_event(mock_env, mocker):
     test_event_blob: storage.Blob = storage.Blob.from_string(
         "gs://foo/bar/baz/00/_SUCCESS")
-    apply_mock = mocker.patch('gcs_ocn_bq_ingest.utils.apply')
-    gcs_ocn_bq_ingest.main.triage_event(None, None, test_event_blob,
+    apply_mock = mocker.patch('gcs_ocn_bq_ingest.common.utils.apply')
+    bq_mock = Mock()
+    bq_mock.project = "foo"
+    gcs_ocn_bq_ingest.main.triage_event(None, bq_mock, test_event_blob,
                                         time.monotonic())
     apply_mock.assert_called_once()
 
@@ -253,11 +257,13 @@ def test_triage_event_ordered(ordered_mock_env, mocker):
     enforce_ordering = True
     test_event_blob: storage.Blob = storage.Blob.from_string(
         "gs://foo/bar/baz/00/_SUCCESS")
-    apply_mock = mocker.patch('gcs_ocn_bq_ingest.utils.apply')
+    apply_mock = mocker.patch('gcs_ocn_bq_ingest.common.utils.apply')
     publisher_mock = mocker.patch(
-        'gcs_ocn_bq_ingest.ordering.backlog_publisher')
+        'gcs_ocn_bq_ingest.common.ordering.backlog_publisher')
+    bq_mock = Mock()
+    bq_mock.project = "foo"
     gcs_ocn_bq_ingest.main.triage_event(None,
-                                        None,
+                                        bq_mock,
                                         test_event_blob,
                                         time.monotonic(),
                                         enforce_ordering=enforce_ordering)
@@ -266,7 +272,7 @@ def test_triage_event_ordered(ordered_mock_env, mocker):
     test_event_blob: storage.Blob = storage.Blob.from_string(
         "gs://foo/bar/baz/_BACKFILL")
     subscriber_mock = mocker.patch(
-        'gcs_ocn_bq_ingest.ordering.backlog_subscriber')
+        'gcs_ocn_bq_ingest.common.ordering.backlog_subscriber')
     gcs_ocn_bq_ingest.main.triage_event(None,
                                         None,
                                         test_event_blob,
@@ -276,7 +282,8 @@ def test_triage_event_ordered(ordered_mock_env, mocker):
 
     test_event_blob: storage.Blob = storage.Blob.from_string(
         "gs://foo/bar/baz/_backlog/00/_SUCCESS")
-    monitor_mock = mocker.patch('gcs_ocn_bq_ingest.ordering.subscriber_monitor')
+    monitor_mock = mocker.patch(
+        'gcs_ocn_bq_ingest.common.ordering.subscriber_monitor')
     gcs_ocn_bq_ingest.main.triage_event(None,
                                         None,
                                         test_event_blob,
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index 2230417d9..9ecf236bc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -22,10 +22,10 @@
 from google.cloud import bigquery
 from google.cloud import storage
 
-import gcs_ocn_bq_ingest.constants
+import gcs_ocn_bq_ingest.common.constants
+import gcs_ocn_bq_ingest.common.ordering
+import gcs_ocn_bq_ingest.common.utils
 import gcs_ocn_bq_ingest.main
-import gcs_ocn_bq_ingest.ordering
-import gcs_ocn_bq_ingest.utils
 
 TEST_DIR = os.path.realpath(os.path.dirname(__file__) + "/..")
 LOAD_JOB_POLLING_TIMEOUT = 20  # seconds
@@ -52,19 +52,20 @@ def test_backlog_publisher(gcs, gcs_bucket, gcs_partitioned_data, mock_env):
     for gcs_data in gcs_partitioned_data:
         if not gcs_data.exists():
             raise EnvironmentError("test data objects must exist")
-        if gcs_data.name.endswith(gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME):
-            table_prefix = gcs_ocn_bq_ingest.utils.get_table_prefix(
+        if gcs_data.name.endswith(
+                gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME):
+            table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix(
                 gcs_data.name)
-            gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, gcs_data)
+            gcs_ocn_bq_ingest.common.ordering.backlog_publisher(gcs, gcs_data)
 
     expected_backlog_blobs = queue.Queue()
     expected_backlog_blobs.put("/".join([
         table_prefix, "_backlog", "$2017041101",
-        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+        gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME
     ]))
     expected_backlog_blobs.put("/".join([
         table_prefix, "_backlog", "$2017041102",
-        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+        gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME
     ]))
 
     for backlog_blob in gcs_bucket.list_blobs(
@@ -72,7 +73,8 @@ def test_backlog_publisher(gcs, gcs_bucket, gcs_partitioned_data, mock_env):
         assert backlog_blob.name == expected_backlog_blobs.get(block=False)
 
     backfill_blob: storage.Blob = gcs_bucket.blob(
-        f"{table_prefix}/{gcs_ocn_bq_ingest.constants.BACKFILL_FILENAME}")
+        f"{table_prefix}/{gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME}"
+    )
     assert backfill_blob.exists()
 
 
@@ -89,7 +91,8 @@ def test_backlog_publisher_with_existing_backfill_file(gcs, gcs_bucket,
     table_prefix = "/".join(
         [dest_dataset.dataset_id, dest_partitioned_table.table_id])
     backfill_blob: storage.Blob = gcs_bucket.blob(
-        f"{table_prefix}/{gcs_ocn_bq_ingest.constants.BACKFILL_FILENAME}")
+        f"{table_prefix}/{gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME}"
+    )
     backfill_blob.upload_from_string("")
     backfill_blob.reload()
     original_backfill_blob_generation = backfill_blob.generation
@@ -98,20 +101,21 @@ def test_backlog_publisher_with_existing_backfill_file(gcs, gcs_bucket,
     for gcs_data in gcs_partitioned_data:
         if not gcs_data.exists():
             raise EnvironmentError("test data objects must exist")
-        if gcs_data.name.endswith(gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME):
-            table_prefix = gcs_ocn_bq_ingest.utils.get_table_prefix(
+        if gcs_data.name.endswith(
+                gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME):
+            table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix(
                 gcs_data.name)
-            gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, gcs_data)
+            gcs_ocn_bq_ingest.common.ordering.backlog_publisher(gcs, gcs_data)
 
     # Use of queue to test that list responses are returned in expected order.
     expected_backlog_blobs = queue.Queue()
     expected_backlog_blobs.put("/".join([
         table_prefix, "_backlog", "$2017041101",
-        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+        gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME
     ]))
     expected_backlog_blobs.put("/".join([
         table_prefix, "_backlog", "$2017041102",
-        gcs_ocn_bq_ingest.constants.SUCCESS_FILENAME
+        gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME
     ]))
 
     for backlog_blob in gcs_bucket.list_blobs(
@@ -138,9 +142,9 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
     gets applied as expected.
     """
     _run_subscriber(gcs, bq, gcs_external_update_config)
-    backlog_blobs = gcs_bucket.list_blobs(
-        prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/_backlog/"
-    )
+    table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix(
+        gcs_external_update_config.name)
+    backlog_blobs = gcs_bucket.list_blobs(prefix=f"{table_prefix}/_backlog/")
     assert backlog_blobs.num_results == 0, "backlog is not empty"
     bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock")
     assert not bqlock_blob.exists(), "_bqlock was not cleaned up"
@@ -213,7 +217,7 @@ def test_backlog_subscriber_in_order_with_new_batch_while_running(
                                                  (bkt, dataset, table))
         res_backlog_publisher.wait()
         res_monitor = pool.apply_async(
-            gcs_ocn_bq_ingest.ordering.subscriber_monitor,
+            gcs_ocn_bq_ingest.common.ordering.subscriber_monitor,
             (None, bkt,
              f"{dataset.project}.{dataset.dataset_id}/{table.table_id}/"
              f"_backlog/04/_SUCCESS"))
@@ -225,9 +229,10 @@ def test_backlog_subscriber_in_order_with_new_batch_while_running(
 
         res_subscriber.wait()
 
-    backlog_blobs = gcs_bucket.list_blobs(
-        prefix=f"{gcs_ocn_bq_ingest.utils.get_table_prefix(gcs_external_update_config.name)}/"
-        f"_backlog/")
+    table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix(
+        gcs_external_update_config.name)
+    backlog_blobs = gcs_bucket.list_blobs(prefix=f"{table_prefix}/"
+                                          f"_backlog/")
     assert backlog_blobs.num_results == 0, "backlog is not empty"
     bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock")
     assert not bqlock_blob.exists(), "_bqlock was not cleaned up"
@@ -247,9 +252,8 @@ def _run_subscriber(
     bq_client: Optional[bigquery.Client],
     backfill_blob,
 ):
-    gcs_ocn_bq_ingest.ordering.backlog_subscriber(gcs_client,
-                                                  bq_client, backfill_blob,
-                                                  time.monotonic())
+    gcs_ocn_bq_ingest.common.ordering.backlog_subscriber(
+        gcs_client, bq_client, backfill_blob, time.monotonic())
 
 
 def _post_a_new_batch(gcs_bucket, dest_dataset, dest_ordered_update_table):
@@ -265,4 +269,4 @@ def _post_a_new_batch(gcs_bucket, dest_dataset, dest_ordered_update_table):
                                                    "test-data", "ordering",
                                                    "04", test_file),
                                       client=gcs)
-    return gcs_ocn_bq_ingest.ordering.backlog_publisher(gcs, data_obj)
+    return gcs_ocn_bq_ingest.common.ordering.backlog_publisher(gcs, data_obj)

From c18e5e9787cd4398751dcf4ee2e16c523702004e Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 18:36:28 -0800
Subject: [PATCH 37/90] ignore pylint redherring import errors

---
 .../gcs_ocn_bq_ingest/common/ordering.py                    | 6 +++---
 .../gcs_ocn_bq_ingest/common/utils.py                       | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 68e39542d..28d7e203d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -30,9 +30,9 @@
 from google.cloud import bigquery
 from google.cloud import storage
 
-from . import constants
-from . import exceptions
-from . import utils
+from . import constants  # pylint: disable=no-name-in-module,import-error
+from . import exceptions  # pylint: disable=no-name-in-module,import-error
+from . import utils  # pylint: disable=no-name-in-module,import-error
 
 
 def backlog_publisher(
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 496ec8dae..764aec0a0 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -36,8 +36,8 @@
 from google.cloud import bigquery
 from google.cloud import storage
 
-from . import constants  # pylint: disable=no-name-in-module
-from . import exceptions  # pylint: disable=no-name-in-module
+from . import constants  # pylint: disable=no-name-in-module,import-error
+from . import exceptions  # pylint: disable=no-name-in-module,import-error
 
 
 def external_query(  # pylint: disable=too-many-arguments

From 2c4376a0070cc4a432e88c8f10ba2fa6fdde23f1 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 18:59:46 -0800
Subject: [PATCH 38/90] fixup! e2e tf to support builds where short_sha is set
 to empty string.

---
 tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
index 4c302663e..53d1adc07 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
@@ -19,7 +19,7 @@ output "bucket" {
 }
 
 resource "google_storage_bucket" "cloud_functions_source" {
-  name          = "gcf-source-archives-${var.short_sha}"
+  name          = "gcf-source-archives${var.short_sha}"
   project       = var.project_id
   storage_class = "REGIONAL"
   location      = var.region
@@ -29,10 +29,10 @@ resource "google_storage_bucket" "cloud_functions_source" {
 module "gcs_ocn_bq_ingest" {
   source                       = "../terraform_module/gcs_ocn_bq_ingest_function"
   function_source_folder       = "../gcs_ocn_bq_ingest"
-  app_id                       = "gcs-ocn-bq-ingest-e2e-test-${var.short_sha}"
+  app_id                       = "gcs-ocn-bq-ingest-e2e-test${var.short_sha}"
   cloudfunctions_source_bucket = google_storage_bucket.cloud_functions_source.name
-  data_ingester_sa             = "data-ingester-sa-${var.short_sha}"
-  input_bucket                 = "gcs-ocn-bq-ingest-e2e-tests-${var.short_sha}"
+  data_ingester_sa             = "data-ingester-sa${var.short_sha}"
+  input_bucket                 = "gcs-ocn-bq-ingest-e2e-tests${var.short_sha}"
   project_id                   = var.project_id
   environment_variables = {
     START_BACKFILL_FILENAME = "_HISTORYDONE"

From b6690afb882e251874d1c8ba99a15b8923d246e5 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 20:32:57 -0800
Subject: [PATCH 39/90] fix TF_VAR env var

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 2ef218e43..56b7854be 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -143,7 +143,7 @@ steps:
   env:
   - 'TF_VAR_project_id=$PROJECT_ID'
   - 'TF_VAR_region=$_REGION'
-  - 'TF_VAR_suffix=$SHORT_SHA'
+  - 'TF_VAR_short_sha=$SHORT_SHA'
   id: 'terraform-e2e-apply'
 - name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'
   dir: '${_BUILD_DIR}'
@@ -171,7 +171,7 @@ steps:
   env:
   - 'TF_VAR_project_id=$PROJECT_ID'
   - 'TF_VAR_region=$_REGION'
-  - 'TF_VAR_suffix=$SHORT_SHA'
+  - 'TF_VAR_short_sha=$SHORT_SHA'
   id: 'terraform-e2e-destroy'
 timeout: '3600s'
 options:

From 36be628502f8386e7186076b4cedac409dc91926 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 20:45:23 -0800
Subject: [PATCH 40/90] enable resource manager api

---
 .../terraform_module/gcs_ocn_bq_ingest_function/main.tf          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index 80226e344..e52c55775 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -137,5 +137,6 @@ module "project-services" {
     "storage.googleapis.com",
     "pubsub.googleapis.com",
     "clouderrorreporting.googleapis.com",
+    "cloudresourcemanager.googleapis.com",
   ]
 }

From 610374396e0bd193990f34f9cb37c4f496fd179e Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 21:02:31 -0800
Subject: [PATCH 41/90] enable cloud functions api...

---
 .../terraform_module/gcs_ocn_bq_ingest_function/main.tf          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index e52c55775..6094881c3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -138,5 +138,6 @@ module "project-services" {
     "pubsub.googleapis.com",
     "clouderrorreporting.googleapis.com",
     "cloudresourcemanager.googleapis.com",
+    "cloudfunctions.googleapis.com",
   ]
 }

From edcdae553577d6f6894dd28b2b2b34fea4ad6088 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 21:27:14 -0800
Subject: [PATCH 42/90] add unit test timeout

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 56b7854be..c23d78d15 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -118,6 +118,7 @@ steps:
     - '-c'
     # pip installing again to get GCB to recognize mocker from pytest-mock
     - 'pip install -r requirements-dev.txt && python3 -m pytest tests -m "not IT"'
+  timeout: 15s
   id: 'unit-test'
 - name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'
   dir: '${_BUILD_DIR}'

From 63f480dbf8b50aca036c95040b062517073f8086 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 21:41:49 -0800
Subject: [PATCH 43/90] explicit local backend

---
 .../cloud_functions/gcs_event_based_ingest/cloudbuild.yaml  | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf    | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index c23d78d15..b44ba6df8 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -154,7 +154,7 @@ steps:
   entrypoint: /bin/sh
   args:
   - '-c'
-  - 'python3 -m pytest e2e --tfstate=${_BUILD_DIR}/e2e/terraform.state'
+  - 'python3 -m pytest e2e --tfstate=/workspace/${_BUILD_DIR}/e2e/terraform.state'
   id: 'e2e-test'
 - name: 'hashicorp/terraform'
   waitFor:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
index 53d1adc07..aed37b488 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
@@ -43,3 +43,9 @@ module "gcs_ocn_bq_ingest" {
   force_destroy = "true"
 }
 
+terraform {
+  backend "local" {
+    path = "terraform.tfstate"
+  }
+}
+

From 03d9b795f36375cfee48fcf0431cba16f93505d1 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 21:50:52 -0800
Subject: [PATCH 44/90] debug missing state file

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index b44ba6df8..21163fb4f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -154,7 +154,8 @@ steps:
   entrypoint: /bin/sh
   args:
   - '-c'
-  - 'python3 -m pytest e2e --tfstate=/workspace/${_BUILD_DIR}/e2e/terraform.state'
+#  - 'python3 -m pytest e2e --tfstate=/workspace/${_BUILD_DIR}/e2e/terraform.state'
+  - 'ls -R /workspace'
   id: 'e2e-test'
 - name: 'hashicorp/terraform'
   waitFor:

From fa82f12f7ed813fdd252f5884aa179c698e01eff Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 22:05:11 -0800
Subject: [PATCH 45/90] debug

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 21163fb4f..85bda8250 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -154,8 +154,8 @@ steps:
   entrypoint: /bin/sh
   args:
   - '-c'
-#  - 'python3 -m pytest e2e --tfstate=/workspace/${_BUILD_DIR}/e2e/terraform.state'
   - 'ls -R /workspace'
+#  - 'python3 -m pytest e2e --tfstate=/workspace/${_BUILD_DIR}/e2e/terraform.state'
   id: 'e2e-test'
 - name: 'hashicorp/terraform'
   waitFor:

From d1acf9e933254c2d2055f6c0e4c8946ea54b060c Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 22:18:58 -0800
Subject: [PATCH 46/90] relative state path

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 85bda8250..cd1573d4e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -154,8 +154,7 @@ steps:
   entrypoint: /bin/sh
   args:
   - '-c'
-  - 'ls -R /workspace'
-#  - 'python3 -m pytest e2e --tfstate=/workspace/${_BUILD_DIR}/e2e/terraform.state'
+  - 'python3 -m pytest e2e --tfstate=e2e/terraform.state'
   id: 'e2e-test'
 - name: 'hashicorp/terraform'
   waitFor:

From b9e741c395b6d7f66a710326735a756ad5b1ccae Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 22:27:23 -0800
Subject: [PATCH 47/90] typo .[tf]state

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index cd1573d4e..32cf50f14 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -154,7 +154,7 @@ steps:
   entrypoint: /bin/sh
   args:
   - '-c'
-  - 'python3 -m pytest e2e --tfstate=e2e/terraform.state'
+  - 'python3 -m pytest e2e --tfstate=e2e/terraform.tfstate'
   id: 'e2e-test'
 - name: 'hashicorp/terraform'
   waitFor:

From dadacaa95eadb4f0c7f0ab032ea4e87bb06bc543 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 14 Dec 2020 22:48:27 -0800
Subject: [PATCH 48/90] fixup docs

---
 tools/cloud_functions/gcs_event_based_ingest/README.md     | 7 ++++---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md     | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 70029831c..99d47ab60 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -252,6 +252,7 @@ SELECT
    total_slot_ms,
    destination_table
    state,
+   error_result,
    (SELECT value FROM UNNEST(labels) WHERE key = "component") as component,
    (SELECT value FROM UNNEST(labels) WHERE key = "cloud-function-name") as cloud_function_name,
    (SELECT value FROM UNNEST(labels) WHERE key = "batch-id") as batch_id,
@@ -351,7 +352,7 @@ The system tests assume that you have deployed the cloud function.
 export TF_VAR_short_sha=$(git rev-parse --short=7 HEAD)
 export TF_VAR_project_id=${YOUR_GCP_PROJECT_ID}
 (cd e2e && terraform init && terraform apply -auto-approve)
-python3 -m pytest -m SYS
+python3 -m pytest e2e --tfstate e2e/terraform.tfstate
 ```
 
 ## Deployment
@@ -379,7 +380,7 @@ gcloud functions deploy test-gcs-bq-ingest \
   --trigger-topic=${PUBSUB_TOPIC} \
   --service-account=${SERVICE_ACCOUNT_EMAIL} \
   --timeout=540 \
-  --set-env-vars='DESTINATION_REGEX=^(?:[\w\-0-9]+)/(?P<dataset>[\w\-_0-9]+)/(?P<table>[\w\-_0-9]+)/?(?:incremental|history)?/?(?P<yyyy>[0-9]{4})?/?(?P<mm>[0-9]{2})?/?(?P<dd>[0-9]{2})?/?(?P<hh>[0-9]{2})?/?(?P<batch>[0-9]+)?/?'
+  --set-env-vars='DESTINATION_REGEX=^(?:[\w\-0-9]+)/(?P<dataset>[\w\-_0-9]+)/(?P<table>[\w\-_0-9]+)/?(?:incremental|history)?/?(?P<yyyy>[0-9]{4})?/?(?P<mm>[0-9]{2})?/?(?P<dd>[0-9]{2})?/?(?P<hh>[0-9]{2})?/?(?P<batch>[0-9]+)?/?,FUNCTION_TIMEOUT_SEC=540'
 ```
 
 #### Cloud Functions Events
@@ -396,7 +397,7 @@ gcloud functions deploy test-gcs-bq-ingest \
   --trigger-event google.storage.object.finalize
   --service-account=${SERVICE_ACCOUNT_EMAIL} \
   --timeout=540 \
-  --set-env-vars='DESTINATION_REGEX=^(?:[\w\-0-9]+)/(?P<dataset>[\w\-_0-9]+)/(?P<table>[\w\-_0-9]+)/?(?:incremental|history)?/?(?P<yyyy>[0-9]{4})?/?(?P<mm>[0-9]{2})?/?(?P<dd>[0-9]{2})?/?(?P<hh>[0-9]{2})?/?(?P<batch>[0-9]+)?/?'
+  --set-env-vars='DESTINATION_REGEX=^(?:[\w\-0-9]+)/(?P<dataset>[\w\-_0-9]+)/(?P<table>[\w\-_0-9]+)/?(?:incremental|history)?/?(?P<yyyy>[0-9]{4})?/?(?P<mm>[0-9]{2})?/?(?P<dd>[0-9]{2})?/?(?P<hh>[0-9]{2})?/?(?P<batch>[0-9]+)?/?,FUNCTION_TIMEOUT_SEC=540'
 ```
 
 In theory, one could set up Pub/Sub notifications from multiple GCS Buckets
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index e93b10056..cd701cd09 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -33,6 +33,7 @@ following default behavior.
 | `MAX_BATCH_BYTES`     | Max bytes for BigQuery Load job      | `15000000000000` ([15 TB](https://cloud.google.com/bigquery/quotas#load_jobs)|
 | `JOB_PREFIX`          | Prefix for BigQuery Job IDs          | `gcf-ingest-` |
 | `BQ_PROJECT`          | Default BQ project to use if not specified in dataset capturing group | Project where Cloud Function is deployed |
+| `FUNCTION_TIMEOUT_SEC`| Number of seconds set for this deployment of Cloud Function (no longer part of python38 runtime) | 60 |
 | `ORDER_PER_TABLE`\*   | Force jobs to be executed sequentially (rather than parallel) based on the backlog. This is the same as having an `ORDERME` file in every config directory | `False` | 
 | `START_BACKFILL_FILENAME`\*| Block submitting BigQuery Jobs for a table until this file is present at the table prefix. By default this will not happen. | `None` |
 | `RESTART_BUFFER_SECONDS`\* | Buffer before Cloud Function timeout to leave before re-triggering the backfill subscriber | 30 |

From 41f04aedd7efe7e62030e9e68c4a43dcd8f6c65a Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 11:00:12 -0800
Subject: [PATCH 49/90] chore: clean up subscriber

---
 .../gcs_event_based_ingest/e2e/e2e_test.py    |  21 ++-
 .../gcs_event_based_ingest/e2e/main.tf        |   1 -
 .../gcs_ocn_bq_ingest/common/ordering.py      | 160 +++++++++++-------
 .../gcs_ocn_bq_ingest/common/utils.py         |   1 +
 4 files changed, 115 insertions(+), 68 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
index 7eaa9f7e8..8e66658f3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
@@ -14,6 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""End-to-end test for GCS event based ingest to BigQuery Cloud Function"""
 import concurrent.futures
 import json
 import time
@@ -27,14 +28,20 @@
 
 
 @pytest.mark.SYS
-def test_gcs_ocn_bq_ingest_cloud_function(
+def test_gcs_ocn_bq_ingest_cloud_function_long_runnning_bq_jobs_with_orderme(
     gcs: storage.Client,
     bq: bigquery.Client,
     tf_state: Dict,
     dest_table: bigquery.Table,
 ):
-    """drop some test data and assert that the excpected actions are taken by
-    the deployed cloud function"""
+    """This test assumes the cloud function has been deployed with the
+    accompanying terraform module which configures a 1 min timeout.
+    It exports some larger data from a public BigQuery table and then reloads
+    them to test table to test the cloud function behavior with longer running
+    BigQuery jobs which are likely to require the backlog subscriber to restart
+    itself by reposting a _BACKFILL file. The ordering behavior is controlled
+    with the ORDERME blob.
+    """
     input_bucket_id = tf_state['outputs']['bucket']['value']
     table_prefix = f"{dest_table.dataset_id}/" \
                    f"{dest_table.table_id}"
@@ -64,13 +71,15 @@ def _extract(batch: str):
 
     bkt: storage.Bucket = gcs.lookup_bucket(input_bucket_id)
     # configure load jobs for this table
-    load_config = bkt.blob(f"{table_prefix}/_config/load.json")
-    load_config.upload_from_string(
+    load_config_blob = bkt.blob(f"{table_prefix}/_config/load.json")
+    load_config_blob.upload_from_string(
         json.dumps({
             "writeDisposition": "WRITE_APPEND",
             "sourceFormat": "AVRO",
             "useAvroLogicalTypes": "True",
         }))
+    orderme_blob = bkt.blob(f"{table_prefix}/_config/ORDERME")
+    orderme_blob.upload_from_string("")
     # add historical success files
     for batch in history_batch_nums:
         historical_success_blob: storage.Blob = bkt.blob(
@@ -78,7 +87,7 @@ def _extract(batch: str):
         historical_success_blob.upload_from_string("")
 
     # assert 0 bq rows (because _HISTORYDONE not dropped yet)
-    dest_table: bigquery.Table = bq.get_table(dest_table)
+    dest_table = bq.get_table(dest_table)
     assert dest_table.num_rows == 0, \
         "history was ingested before _HISTORYDONE was uploaded"
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
index aed37b488..af45d7eed 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
@@ -36,7 +36,6 @@ module "gcs_ocn_bq_ingest" {
   project_id                   = var.project_id
   environment_variables = {
     START_BACKFILL_FILENAME = "_HISTORYDONE"
-    ORDER_PER_TABLE         = "True"
   }
   # We'll use a shorter timeout for e2e stress subscriber re-triggering
   timeout       = 60
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 28d7e203d..1b9bfeddf 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -53,7 +53,6 @@ def backlog_publisher(
                                                     table_prefix)
 
 
-# pylint: disable=too-many-arguments,too-many-locals,too-many-statements,too-many-branches
 def backlog_subscriber(gcs_client: Optional[storage.Client],
                        bq_client: Optional[bigquery.Client],
                        backfill_blob: storage.Blob, function_start_time: float):
@@ -68,7 +67,6 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
         float(os.getenv("FUNCTION_TIMEOUT_SEC", "60")) -
         constants.RESTART_BUFFER_SECONDS)
     print(f"restart time is {restart_time}")
-    backfill_blob_generation = backfill_blob.generation
     bkt = backfill_blob.bucket
     utils.handle_duplicate_notification(gcs_client, backfill_blob)
     table_prefix = utils.get_table_prefix(backfill_blob.name)
@@ -93,28 +91,9 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
             # the else will handle a manual _bqlock
             if lock_contents.startswith(
                     os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)):
-                job_id = lock_contents
-                try:
-                    last_job_done = utils.wait_on_bq_job_id(
-                        bq_client, job_id, polling_timeout)
-                except (exceptions.BigQueryJobFailure,
-                        google.api_core.exceptions.NotFound) as err:
-                    raise exceptions.BigQueryJobFailure(
-                        f"previous BigQuery job: {job_id} failed or could not "
-                        "be found. This will kill the backfill subscriber for "
-                        f"the table prefix: {table_prefix}."
-                        "Once the issue is dealt with by a human, the lock "
-                        "file at: "
-                        f"gs://{lock_blob.bucket.name}/{lock_blob.name} "
-                        "should be manually removed and a new empty "
-                        f"{constants.BACKFILL_FILENAME} "
-                        "file uploaded to: "
-                        f"gs://{backfill_blob.bucket.name}/{table_prefix}"
-                        "/_BACKFILL "
-                        f"to resume the backfill subscriber so it can "
-                        "continue with the next item in the backlog.\n"
-                        "Original Exception:\n"
-                        f"{traceback.format_exc()}") from err
+                last_job_done = wait_on_last_job(bq_client, lock_blob,
+                                                 backfill_blob, lock_contents,
+                                                 polling_timeout)
             else:
                 print(f"sleeping for {polling_timeout} seconds because"
                       f"found manual lock gs://{bkt.name}/{lock_blob.name} with"
@@ -136,39 +115,75 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
             # If the BQ lock was missing we do not want to delete a backlog
             # item for a job we have not yet submitted.
             utils.remove_oldest_backlog_item(gcs_client, bkt, table_prefix)
-
-        check_backlog_time = time.monotonic()
-        next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
-                                                        table_prefix)
-        if not next_backlog_file:
-            print("no more files found in the backlog deleteing backfill blob")
-            backfill_blob.delete(if_generation_match=backfill_blob_generation,
-                                 client=gcs_client)
-            if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
-                    time.monotonic()):
-                print(
-                    "checking if the backlog is still empty for "
-                    f"gs://${bkt.name}/{table_prefix}/_backlog/"
-                    f"There was more than {constants.ENSURE_SUBSCRIBER_SECONDS}"
-                    " seconds between listing items on the backlog and "
-                    f"deleting the {constants.BACKFILL_FILENAME}. "
-                    "This should not happen often but is meant to alleviate a "
-                    "race condition in the event that something caused the "
-                    "delete operation was delayed or had to be retried for a "
-                    "long time.")
-                next_backlog_file = utils.get_next_backlog_item(
-                    gcs_client, bkt, table_prefix)
-                if next_backlog_file:
-                    # The backfill file was deleted but the backlog is
-                    # not empty. Re-trigger the backfill subscriber loop by
-                    # dropping a new backfill file.
-                    start_backfill_subscriber_if_not_running(
-                        gcs_client, bkt, table_prefix)
-                    return
-            utils.handle_bq_lock(gcs_client, lock_blob, None)
-            print(f"backlog is empty for gs://{bkt.name}/{table_prefix}. "
-                  "backlog subscriber exiting.")
+        should_subscriber_exit = handle_backlog(gcs_client, bq_client, bkt,
+                                                lock_blob, backfill_blob)
+        if should_subscriber_exit:
             return
+    # retrigger the subscriber loop by reposting the _BACKFILL file
+    print("ran out of time, restarting backfill subscriber loop for:"
+          f"gs://{bkt.name}/{table_prefix}")
+    backfill_blob = bkt.blob(f"{table_prefix}/{constants.BACKFILL_FILENAME}")
+    backfill_blob.upload_from_string("")
+
+
+def wait_on_last_job(bq_client: bigquery.Client, lock_blob: storage.Blob,
+                     backfill_blob: storage.blob, job_id: str,
+                     polling_timeout: int):
+    """wait on a bigquery job or raise informative exception.
+
+    Args:
+        bq_client: bigquery.Client
+        lock_blob: storage.Blob _bqlock blob
+        backfill_blob: storage.blob _BACKFILL blob
+        job_id: str BigQuery job ID to wait on (read from _bqlock file)
+        polling_timeout: int seconds to poll before returning.
+    """
+    try:
+        return utils.wait_on_bq_job_id(bq_client, job_id, polling_timeout)
+    except (exceptions.BigQueryJobFailure,
+            google.api_core.exceptions.NotFound) as err:
+        table_prefix = utils.get_table_prefix(backfill_blob.name)
+        raise exceptions.BigQueryJobFailure(
+            f"previous BigQuery job: {job_id} failed or could not "
+            "be found. This will kill the backfill subscriber for "
+            f"the table prefix: {table_prefix}."
+            "Once the issue is dealt with by a human, the lock "
+            "file at: "
+            f"gs://{lock_blob.bucket.name}/{lock_blob.name} "
+            "should be manually removed and a new empty "
+            f"{constants.BACKFILL_FILENAME} "
+            "file uploaded to: "
+            f"gs://{backfill_blob.bucket.name}/{table_prefix}"
+            "/_BACKFILL "
+            f"to resume the backfill subscriber so it can "
+            "continue with the next item in the backlog.\n"
+            "Original Exception:\n"
+            f"{traceback.format_exc()}") from err
+
+
+def handle_backlog(
+    gcs_client: storage.Client,
+    bq_client: bigquery.Client,
+    bkt: storage.Bucket,
+    lock_blob: storage.Blob,
+    backfill_blob: storage.Blob,
+):
+    """submit the next item in the _backlog if it is non-empty or clean up the
+    _BACKFILL and _bqlock files.
+    Args:
+        gcs_client: storage.Client
+        bq_client: bigquery.Client
+        bkt: storage.Bucket
+        lock_blob: storage.Blob _bqlock blob
+        backfill_blob: storage.blob _BACKFILL blob
+    Returns:
+        bool: should this backlog subscriber exit
+    """
+    table_prefix = utils.get_table_prefix(backfill_blob.name)
+    check_backlog_time = time.monotonic()
+    next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
+                                                    table_prefix)
+    if next_backlog_file:
         next_success_file: storage.Blob = bkt.blob(
             next_backlog_file.name.replace("/_backlog/", "/"))
         table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
@@ -184,11 +199,34 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
         next_job_id = utils.create_job_id(table_ref, batch)
         utils.apply(gcs_client, bq_client, next_success_file, lock_blob,
                     next_job_id)
-    # retrigger the subscriber loop by reposting the _BACKFILL file
-    print("ran out of time, restarting backfill subscriber loop for:"
-          f"gs://{bkt.name}/{table_prefix}")
-    backfill_blob = bkt.blob(f"{table_prefix}/{constants.BACKFILL_FILENAME}")
-    backfill_blob.upload_from_string("")
+        return False  # BQ job running
+    print("no more files found in the backlog deleteing backfill blob")
+    backfill_blob.delete(if_generation_match=backfill_blob.generation,
+                         client=gcs_client)
+    if (check_backlog_time + constants.ENSURE_SUBSCRIBER_SECONDS <
+            time.monotonic()):
+        print("checking if the backlog is still empty for "
+              f"gs://${bkt.name}/{table_prefix}/_backlog/"
+              f"There was more than {constants.ENSURE_SUBSCRIBER_SECONDS}"
+              " seconds between listing items on the backlog and "
+              f"deleting the {constants.BACKFILL_FILENAME}. "
+              "This should not happen often but is meant to alleviate a "
+              "race condition in the event that something caused the "
+              "delete operation was delayed or had to be retried for a "
+              "long time.")
+        next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
+                                                        table_prefix)
+        if next_backlog_file:
+            # The backfill file was deleted but the backlog is
+            # not empty. Re-trigger the backfill subscriber loop by
+            # dropping a new backfill file.
+            start_backfill_subscriber_if_not_running(gcs_client, bkt,
+                                                     table_prefix)
+            return True  # we are re-triggering a new backlog subscriber
+    utils.handle_bq_lock(gcs_client, lock_blob, None)
+    print(f"backlog is empty for gs://{bkt.name}/{table_prefix}. "
+          "backlog subscriber exiting.")
+    return True  # the backlog is empty
 
 
 def start_backfill_subscriber_if_not_running(
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 764aec0a0..9c41bb0fc 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -440,6 +440,7 @@ def handle_duplicate_notification(
             "storage notification.") from err
 
 
+@cachetools.cached(cachetools.LRUCache(maxsize=1024))
 def get_table_prefix(object_id: str) -> str:
     """Find the table prefix for a object_id based on the destination regex.
     Args:

From d8ae3cfec50d3004961f34b385193588beea57cf Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 12:03:23 -0800
Subject: [PATCH 50/90] fix: don't try to regex match _backlog/* items

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py         | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index cbd55cf7c..228c1e509 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -110,7 +110,10 @@ def triage_event(gcs_client: Optional[storage.Client],
     blob."""
     bkt = event_blob.bucket
     basename_object_id = os.path.basename(event_blob.name)
-    if bq_client:
+    # the _backlog/ directory is likely to mess up the regex matching
+    # in gcs_path_to_table_ref_and_batch and we won't use the variables in that
+    # code path anyway.
+    if bq_client and '_backlog' not in event_blob.name:
         table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
             event_blob.name, bq_client.project)
     else:

From d9f34823850001d174b455b3799af74c3ced970b Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 12:07:21 -0800
Subject: [PATCH 51/90] don't regex match in triage if ordering enabled (this
 happens later)

---
 .../gcs_ocn_bq_ingest/main.py                     | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 228c1e509..466108042 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -110,15 +110,6 @@ def triage_event(gcs_client: Optional[storage.Client],
     blob."""
     bkt = event_blob.bucket
     basename_object_id = os.path.basename(event_blob.name)
-    # the _backlog/ directory is likely to mess up the regex matching
-    # in gcs_path_to_table_ref_and_batch and we won't use the variables in that
-    # code path anyway.
-    if bq_client and '_backlog' not in event_blob.name:
-        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
-            event_blob.name, bq_client.project)
-    else:
-        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
-            event_blob.name, None)
 
     if enforce_ordering:
         # For SUCCESS files in a backlog directory, ensure that subscriber
@@ -155,6 +146,12 @@ def triage_event(gcs_client: Optional[storage.Client],
             return
     else:  # Default behavior submit job as soon as success file lands.
         if basename_object_id == constants.SUCCESS_FILENAME:
+            if bq_client:
+                table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
+                    event_blob.name, bq_client.project)
+            else:
+                table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
+                    event_blob.name, None)
             utils.apply(
                 gcs_client,
                 bq_client,

From 7d2f28f3bea84e0f19b02b2d2f6041d13c24d0b6 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 12:28:54 -0800
Subject: [PATCH 52/90] fix: subscriber monitor get table prefix

---
 .../gcs_ocn_bq_ingest/common/ordering.py                       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 1b9bfeddf..bfc39c535 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -305,7 +305,8 @@ def subscriber_monitor(gcs_client: Optional[storage.Client],
     if not gcs_client:
         gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
     backfill_blob = start_backfill_subscriber_if_not_running(
-        gcs_client, bkt, utils.get_table_prefix(object_id))
+        gcs_client, bkt, utils.get_table_prefix(
+            object_id.replace("_backlog/", "")))
 
     # backfill blob may be none if the START_BACKFILL_FILENAME has not been
     # dropped

From 35fe6e3a66b27bcc6588c6be9835489d8eabf699 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 12:53:12 -0800
Subject: [PATCH 53/90] fix: get_table_prefix issues w/ backlog, backfill and
 historydone

---
 .../gcs_event_based_ingest/README.md          |  5 +++--
 .../gcs_ocn_bq_ingest/common/ordering.py      | 19 ++++++++++++-------
 .../gcs_ocn_bq_ingest/common/utils.py         | 11 ++++++++++-
 .../gcs_ocn_bq_ingest/main.py                 |  7 ++++---
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 99d47ab60..f53917936 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -349,10 +349,11 @@ python3 -m pytest -m IT
 #### Running System Tests Only
 The system tests assume that you have deployed the cloud function.
 ```bash
-export TF_VAR_short_sha=$(git rev-parse --short=7 HEAD)
-export TF_VAR_project_id=${YOUR_GCP_PROJECT_ID}
+export TF_VAR_short_sha=$(git rev-parse --short=10 HEAD)
+export TF_VAR_project_id=jferriero-pp-dev
 (cd e2e && terraform init && terraform apply -auto-approve)
 python3 -m pytest e2e --tfstate e2e/terraform.tfstate
+(cd e2e && terraform destroy -auto-approve)
 ```
 
 ## Deployment
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index bfc39c535..78f820de9 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -69,7 +69,8 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
     print(f"restart time is {restart_time}")
     bkt = backfill_blob.bucket
     utils.handle_duplicate_notification(gcs_client, backfill_blob)
-    table_prefix = utils.get_table_prefix(backfill_blob.name)
+    table_prefix = utils.removesuffix(backfill_blob.name,
+                                      constants.BACKFILL_FILENAME)
     last_job_done = False
     # we will poll for job completion this long in an individual iteration of
     # the while loop (before checking if we are too close to cloud function
@@ -142,7 +143,8 @@ def wait_on_last_job(bq_client: bigquery.Client, lock_blob: storage.Blob,
         return utils.wait_on_bq_job_id(bq_client, job_id, polling_timeout)
     except (exceptions.BigQueryJobFailure,
             google.api_core.exceptions.NotFound) as err:
-        table_prefix = utils.get_table_prefix(backfill_blob.name)
+        table_prefix = utils.removesuffix(backfill_blob.name,
+                                          constants.BACKFILL_FILENAME)
         raise exceptions.BigQueryJobFailure(
             f"previous BigQuery job: {job_id} failed or could not "
             "be found. This will kill the backfill subscriber for "
@@ -179,7 +181,8 @@ def handle_backlog(
     Returns:
         bool: should this backlog subscriber exit
     """
-    table_prefix = utils.get_table_prefix(backfill_blob.name)
+    table_prefix = utils.removesuffix(backfill_blob.name,
+                                      constants.BACKFILL_FILENAME)
     check_backlog_time = time.monotonic()
     next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
                                                     table_prefix)
@@ -305,8 +308,8 @@ def subscriber_monitor(gcs_client: Optional[storage.Client],
     if not gcs_client:
         gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
     backfill_blob = start_backfill_subscriber_if_not_running(
-        gcs_client, bkt, utils.get_table_prefix(
-            object_id.replace("_backlog/", "")))
+        gcs_client, bkt,
+        utils.get_table_prefix(object_id))
 
     # backfill blob may be none if the START_BACKFILL_FILENAME has not been
     # dropped
@@ -326,14 +329,16 @@ def subscriber_monitor(gcs_client: Optional[storage.Client],
                 "subscriber for this table.")
             backfill_blob.delete(client=gcs_client)
             start_backfill_subscriber_if_not_running(
-                gcs_client, bkt, utils.get_table_prefix(object_id))
+                gcs_client, bkt,
+                utils.get_table_prefix(object_id))
             return True
 
         time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
         while not utils.wait_on_gcs_blob(gcs_client, backfill_blob,
                                          constants.ENSURE_SUBSCRIBER_SECONDS):
             start_backfill_subscriber_if_not_running(
-                gcs_client, bkt, utils.get_table_prefix(object_id))
+                gcs_client, bkt,
+                utils.get_table_prefix(object_id))
             return True
     return False
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 9c41bb0fc..3016f76ac 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -448,7 +448,16 @@ def get_table_prefix(object_id: str) -> str:
     Returns:
         str: table prefix
     """
-    match = constants.DESTINATION_REGEX.match(object_id)
+    basename = os.path.basename(object_id)
+    if basename in {
+        constants.BACKFILL_FILENAME,
+        constants.START_BACKFILL_FILENAME
+    }:
+        # These files will not match the regex and always should appear at the
+        # table level.
+        return removesuffix(object_id, basename)
+    match = constants.DESTINATION_REGEX.match(
+        object_id.replace("_backlog/", ""))
     if not match:
         raise exceptions.DestinationRegexMatchException(
             f"could not determine table prefix for object id: {object_id}"
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 466108042..6425fb506 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -128,7 +128,9 @@ def triage_event(gcs_client: Optional[storage.Client],
                 and basename_object_id == constants.START_BACKFILL_FILENAME):
             # This will be the first backfill file.
             ordering.start_backfill_subscriber_if_not_running(
-                gcs_client, bkt, utils.get_table_prefix(event_blob.name))
+                gcs_client, bkt,
+                utils.removesuffix(event_blob.name,
+                                   constants.START_BACKFILL_FILENAME))
             return
         if basename_object_id == constants.SUCCESS_FILENAME:
             ordering.backlog_publisher(gcs_client, event_blob)
@@ -169,8 +171,7 @@ def lazy_error_reporting_client() -> error_reporting.Client:
     """
     global ERROR_REPORTING_CLIENT
     if not ERROR_REPORTING_CLIENT:
-        ERROR_REPORTING_CLIENT = error_reporting.Client(
-            client_info=constants.CLIENT_INFO)
+        ERROR_REPORTING_CLIENT = error_reporting.Client()
     return ERROR_REPORTING_CLIENT
 
 

From d93a2c9df9325b49c94c90309ed0650f228a206a Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 13:50:42 -0800
Subject: [PATCH 54/90] fix: look_for_config_in_parents should return empty
 string for empty file

---
 tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py    | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
index 8e66658f3..39be223b6 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
@@ -32,7 +32,7 @@ def test_gcs_ocn_bq_ingest_cloud_function_long_runnning_bq_jobs_with_orderme(
     gcs: storage.Client,
     bq: bigquery.Client,
     tf_state: Dict,
-    dest_table: bigquery.Table,
+    dest_table: bigquery.Table
 ):
     """This test assumes the cloud function has been deployed with the
     accompanying terraform module which configures a 1 min timeout.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 3016f76ac..5c05ab3ab 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -170,7 +170,7 @@ def _get_parent_config(path):
 
     config = None
     while parts:
-        if config:
+        if config is not None:
             return config
         config = _get_parent_config("/".join(parts))
         parts.pop()

From d50fefc49d87891ee62c3d248f21ce77066cccb2 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 14:02:43 -0800
Subject: [PATCH 55/90] fix table prefix w/ trailing slash

---
 tools/.gitignore                                                | 1 +
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py    | 2 +-
 .../tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py           | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 tools/.gitignore

diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 000000000..c18dd8d83
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1 @@
+__pycache__/
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 5c05ab3ab..097f6a3dd 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -455,7 +455,7 @@ def get_table_prefix(object_id: str) -> str:
     }:
         # These files will not match the regex and always should appear at the
         # table level.
-        return removesuffix(object_id, basename)
+        return removesuffix(object_id, f"/{basename}")
     match = constants.DESTINATION_REGEX.match(
         object_id.replace("_backlog/", ""))
     if not match:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 349780f32..ba6d95bf2 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -235,7 +235,6 @@ def test_recursive_update(original, update, expected):
         ("dataset/table/2020/01/02/03/batch_id/_SUCCESS", "dataset/table"),
         ("project.dataset/table/2020/01/02/03/batch_id/_SUCCESS",
          "project.dataset/table"),
-        ("dataset/table/_backlog/_BACKFILL", "dataset/table"),
     ])
 def test_get_table_prefix(test_input, expected):
     assert gcs_ocn_bq_ingest.common.utils.get_table_prefix(

From b16a8b0cc755d4a47b8b7e97f38426bba8762747 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 14:28:10 -0800
Subject: [PATCH 56/90] use get_table_prefix instead of removesuffix

---
 .../gcs_event_based_ingest/e2e/e2e_test.py     |  7 ++-----
 .../gcs_ocn_bq_ingest/common/ordering.py       | 18 ++++++------------
 .../gcs_ocn_bq_ingest/common/utils.py          |  5 ++---
 .../test_gcs_ocn_bq_ingest.py                  |  3 +++
 4 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
index 39be223b6..4d35fab39 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
@@ -29,11 +29,8 @@
 
 @pytest.mark.SYS
 def test_gcs_ocn_bq_ingest_cloud_function_long_runnning_bq_jobs_with_orderme(
-    gcs: storage.Client,
-    bq: bigquery.Client,
-    tf_state: Dict,
-    dest_table: bigquery.Table
-):
+        gcs: storage.Client, bq: bigquery.Client, tf_state: Dict,
+        dest_table: bigquery.Table):
     """This test assumes the cloud function has been deployed with the
     accompanying terraform module which configures a 1 min timeout.
     It exports some larger data from a public BigQuery table and then reloads
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 78f820de9..1b9bfeddf 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -69,8 +69,7 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
     print(f"restart time is {restart_time}")
     bkt = backfill_blob.bucket
     utils.handle_duplicate_notification(gcs_client, backfill_blob)
-    table_prefix = utils.removesuffix(backfill_blob.name,
-                                      constants.BACKFILL_FILENAME)
+    table_prefix = utils.get_table_prefix(backfill_blob.name)
     last_job_done = False
     # we will poll for job completion this long in an individual iteration of
     # the while loop (before checking if we are too close to cloud function
@@ -143,8 +142,7 @@ def wait_on_last_job(bq_client: bigquery.Client, lock_blob: storage.Blob,
         return utils.wait_on_bq_job_id(bq_client, job_id, polling_timeout)
     except (exceptions.BigQueryJobFailure,
             google.api_core.exceptions.NotFound) as err:
-        table_prefix = utils.removesuffix(backfill_blob.name,
-                                          constants.BACKFILL_FILENAME)
+        table_prefix = utils.get_table_prefix(backfill_blob.name)
         raise exceptions.BigQueryJobFailure(
             f"previous BigQuery job: {job_id} failed or could not "
             "be found. This will kill the backfill subscriber for "
@@ -181,8 +179,7 @@ def handle_backlog(
     Returns:
         bool: should this backlog subscriber exit
     """
-    table_prefix = utils.removesuffix(backfill_blob.name,
-                                      constants.BACKFILL_FILENAME)
+    table_prefix = utils.get_table_prefix(backfill_blob.name)
     check_backlog_time = time.monotonic()
     next_backlog_file = utils.get_next_backlog_item(gcs_client, bkt,
                                                     table_prefix)
@@ -308,8 +305,7 @@ def subscriber_monitor(gcs_client: Optional[storage.Client],
     if not gcs_client:
         gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
     backfill_blob = start_backfill_subscriber_if_not_running(
-        gcs_client, bkt,
-        utils.get_table_prefix(object_id))
+        gcs_client, bkt, utils.get_table_prefix(object_id))
 
     # backfill blob may be none if the START_BACKFILL_FILENAME has not been
     # dropped
@@ -329,16 +325,14 @@ def subscriber_monitor(gcs_client: Optional[storage.Client],
                 "subscriber for this table.")
             backfill_blob.delete(client=gcs_client)
             start_backfill_subscriber_if_not_running(
-                gcs_client, bkt,
-                utils.get_table_prefix(object_id))
+                gcs_client, bkt, utils.get_table_prefix(object_id))
             return True
 
         time.sleep(constants.ENSURE_SUBSCRIBER_SECONDS)
         while not utils.wait_on_gcs_blob(gcs_client, backfill_blob,
                                          constants.ENSURE_SUBSCRIBER_SECONDS):
             start_backfill_subscriber_if_not_running(
-                gcs_client, bkt,
-                utils.get_table_prefix(object_id))
+                gcs_client, bkt, utils.get_table_prefix(object_id))
             return True
     return False
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 097f6a3dd..98c583899 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -450,14 +450,13 @@ def get_table_prefix(object_id: str) -> str:
     """
     basename = os.path.basename(object_id)
     if basename in {
-        constants.BACKFILL_FILENAME,
-        constants.START_BACKFILL_FILENAME
+            constants.BACKFILL_FILENAME, constants.START_BACKFILL_FILENAME
     }:
         # These files will not match the regex and always should appear at the
         # table level.
         return removesuffix(object_id, f"/{basename}")
     match = constants.DESTINATION_REGEX.match(
-        object_id.replace("_backlog/", ""))
+        object_id.replace("/_backlog/", "/"))
     if not match:
         raise exceptions.DestinationRegexMatchException(
             f"could not determine table prefix for object id: {object_id}"
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index ba6d95bf2..877ac0104 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -235,6 +235,9 @@ def test_recursive_update(original, update, expected):
         ("dataset/table/2020/01/02/03/batch_id/_SUCCESS", "dataset/table"),
         ("project.dataset/table/2020/01/02/03/batch_id/_SUCCESS",
          "project.dataset/table"),
+        ("dataset/table/_BACKFILL", "dataset/table"),
+        ("dataset/table/_bqlock", "dataset/table"),
+        ("dataset/table/_backlog/2020/01/02/03/_SUCCESS", "dataset/table"),
     ])
 def test_get_table_prefix(test_input, expected):
     assert gcs_ocn_bq_ingest.common.utils.get_table_prefix(

From f685511ea08a5fa6da0e516a5077dc6de1773bda Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 16:10:10 -0800
Subject: [PATCH 57/90] chore: refactor terraform into pytest fixture to always
 clean up

---
 .../gcs_event_based_ingest/.hadolint.yaml     |  2 +
 .../gcs_event_based_ingest/Dockerfile.ci      | 13 +++++-
 .../gcs_event_based_ingest/README.md          |  4 +-
 .../gcs_event_based_ingest/cloudbuild.yaml    | 40 +++----------------
 .../gcs_event_based_ingest/e2e/conftest.py    | 33 ++++++++++++---
 .../gcs_event_based_ingest/e2e/e2e_test.py    | 10 +++--
 .../gcs_ocn_bq_ingest/common/utils.py         |  4 +-
 .../scripts/install_terraform.sh              | 27 +++++++++++++
 8 files changed, 84 insertions(+), 49 deletions(-)
 create mode 100644 tools/cloud_functions/gcs_event_based_ingest/.hadolint.yaml
 create mode 100755 tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh

diff --git a/tools/cloud_functions/gcs_event_based_ingest/.hadolint.yaml b/tools/cloud_functions/gcs_event_based_ingest/.hadolint.yaml
new file mode 100644
index 000000000..8f7e23e45
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/.hadolint.yaml
@@ -0,0 +1,2 @@
+ignored:
+  - DL3008
diff --git a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
index f92277062..d383e7563 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
+++ b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
@@ -1,4 +1,15 @@
-FROM python:3.8-slim
+FROM python:3.8
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+        apt-transport-https \
+        ca-certificates \
+        curl \
+        sudo \
+        unzip \
+    && apt-get autoremove -yqq --purge \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt requirements-dev.txt ./
+COPY scripts/install_terraform.sh ./
+RUN ./install_terraform.sh
 RUN pip3 install --no-cache-dir -r requirements-dev.txt
 ENTRYPOINT ["python3 -m pytest"]
diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index f53917936..87515fb1c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -351,9 +351,7 @@ The system tests assume that you have deployed the cloud function.
 ```bash
 export TF_VAR_short_sha=$(git rev-parse --short=10 HEAD)
 export TF_VAR_project_id=jferriero-pp-dev
-(cd e2e && terraform init && terraform apply -auto-approve)
-python3 -m pytest e2e --tfstate e2e/terraform.tfstate
-(cd e2e && terraform destroy -auto-approve)
+python3 -m pytest -vvv e2e
 ```
 
 ## Deployment
diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 32cf50f14..fb9ff10de 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -20,6 +20,8 @@ steps:
   dir: '${_BUILD_DIR}'
   entrypoint: '/bin/hadolint'
   args:
+    - '--config'
+    - '.hadolint.yaml'
     - 'Dockerfile.ci'
   id: 'lint-ci-docker-image'
 - name: 'gcr.io/kaniko-project/executor:latest'
@@ -130,50 +132,20 @@ steps:
   - '-c'
   - 'pip install -r requirements-dev.txt && python3 -m pytest tests -m IT'
   id: 'integration-test'
-- name: 'hashicorp/terraform'
-  waitFor:
-  - 'integration-test'
-  dir: '${_BUILD_DIR}/e2e'
-  args: ['init']
-  id: 'terraform-e2e-init'
-- name: 'hashicorp/terraform'
-  waitFor:
-  - 'terraform-e2e-init'
-  dir: '${_BUILD_DIR}/e2e'
-  args: ['apply', '-auto-approve']
+- name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'
+  dir: '${_BUILD_DIR}'
   env:
   - 'TF_VAR_project_id=$PROJECT_ID'
   - 'TF_VAR_region=$_REGION'
   - 'TF_VAR_short_sha=$SHORT_SHA'
-  id: 'terraform-e2e-apply'
-- name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'
-  dir: '${_BUILD_DIR}'
   waitFor:
+    - 'integration-test'
     - 'build-ci-image'
-    - 'terraform-e2e-apply'
   entrypoint: /bin/sh
   args:
   - '-c'
-  - 'python3 -m pytest e2e --tfstate=e2e/terraform.tfstate'
+  - 'python3 -m pytest -vvv e2e'
   id: 'e2e-test'
-- name: 'hashicorp/terraform'
-  waitFor:
-  - 'e2e-test'
-  dir: '${_BUILD_DIR}/e2e'
-  # Note if the e2e test fails the resources will not be cleaned up due to
-  # cloud build not allowing ignored failed steps.
-  # this will allow maintainer to evaluate what went wrong during e2e test
-  # because the evidence will not be destroyed.
-  # Maintainers of bqutil project should destroy these resources after the
-  # failure cause has been diagnosed.
-  # We do not run this e2e test unless all unit and integration tests pass.
-  # https://github.com/GoogleCloudPlatform/cloud-builders/issues/253
-  args: ['destroy', '-auto-approve']
-  env:
-  - 'TF_VAR_project_id=$PROJECT_ID'
-  - 'TF_VAR_region=$_REGION'
-  - 'TF_VAR_short_sha=$SHORT_SHA'
-  id: 'terraform-e2e-destroy'
 timeout: '3600s'
 options:
   machineType: 'N1_HIGHCPU_32'
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index 80b870617..bce25d00a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -17,6 +17,8 @@
 """End-to-end tests for event based BigQuery ingest Cloud Function."""
 import json
 import os
+import shlex
+import subprocess
 import uuid
 
 import pytest
@@ -24,6 +26,9 @@
 from google.cloud import storage
 
 
+TEST_DIR = os.path.realpath(os.path.dirname(__file__))
+
+
 def pytest_addoption(parser):
     # if Terraform was used to deploy resources, pass the state details
     parser.addoption("--tfstate", action="store", default=None)
@@ -42,13 +47,29 @@ def gcs() -> storage.Client:
 
 
 @pytest.fixture(scope='module')
-def tf_state(pytestconfig):
+def terraform_infra(request):
+    def _run(cmd):
+        print(
+            subprocess.check_output(
+                cmd,
+                stderr=subprocess.STDOUT,
+                cwd=TEST_DIR
+            )
+        )
+
+    init = shlex.split("terraform init")
+    apply = shlex.split("terraform apply -auto-approve")
+    destroy = shlex.split("terraform destroy -auto-approve")
+
+    _run(init)
+    _run(apply)
 
-    # if we used Terraform to create the GCP resources, use the output variables
-    if pytestconfig.getoption('tfstate') is not None:
-        tf_state_file = pytestconfig.getoption('tfstate')
-        with open(tf_state_file, 'r', encoding='utf-8') as fp:
-            return json.load(fp)
+    def teardown():
+        _run(destroy)
+
+    request.addfinalizer(teardown)
+    with open(os.path.join(TEST_DIR, "terraform.tfstate")) as tf_state_file:
+        return json.load(tf_state_file)
 
 
 @pytest.fixture
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
index 4d35fab39..e5057a903 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
@@ -28,9 +28,11 @@
 
 
 @pytest.mark.SYS
-def test_gcs_ocn_bq_ingest_cloud_function_long_runnning_bq_jobs_with_orderme(
-        gcs: storage.Client, bq: bigquery.Client, tf_state: Dict,
-        dest_table: bigquery.Table):
+def test_cloud_function_long_runnning_bq_jobs_with_orderme(
+    gcs: storage.Client, bq: bigquery.Client,
+    dest_table: bigquery.Table,
+    terraform_infra: Dict
+):
     """This test assumes the cloud function has been deployed with the
     accompanying terraform module which configures a 1 min timeout.
     It exports some larger data from a public BigQuery table and then reloads
@@ -39,7 +41,7 @@ def test_gcs_ocn_bq_ingest_cloud_function_long_runnning_bq_jobs_with_orderme(
     itself by reposting a _BACKFILL file. The ordering behavior is controlled
     with the ORDERME blob.
     """
-    input_bucket_id = tf_state['outputs']['bucket']['value']
+    input_bucket_id = terraform_infra['outputs']['bucket']['value']
     table_prefix = f"{dest_table.dataset_id}/" \
                    f"{dest_table.table_id}"
     extract_config = bigquery.ExtractJobConfig()
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 98c583899..cd6ef936b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -450,7 +450,9 @@ def get_table_prefix(object_id: str) -> str:
     """
     basename = os.path.basename(object_id)
     if basename in {
-            constants.BACKFILL_FILENAME, constants.START_BACKFILL_FILENAME
+        constants.BACKFILL_FILENAME,
+        constants.START_BACKFILL_FILENAME,
+        "_bqlock",
     }:
         # These files will not match the regex and always should appear at the
         # table level.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh b/tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh
new file mode 100755
index 000000000..70f9cb521
--- /dev/null
+++ b/tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Copyright 2020 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# This software is provided as-is,
+# without warranty or representation for any use or purpose.
+# Your use of it is subject to your agreement with Google.
+set -eao pipefail
+
+TERRAFORM_VERSION="0.14.2"
+TERRAFORM_BASE_URL="https://releases.hashicorp.com/terraform"
+TERRAFORM_ZIP="terraform_${TERRAFORM_VERSION}_$(uname | tr '[:upper:]' '[:lower:]')_amd64.zip"
+echo "Downloading from ${TERRAFORM_BASE_URL}/${TERRAFORM_VERSION}/${TERRAFORM_ZIP}"
+curl -Lo /tmp/terraform.zip "${TERRAFORM_BASE_URL}/${TERRAFORM_VERSION}/${TERRAFORM_ZIP}"
+sudo unzip /tmp/terraform.zip -d /bin

From 905949dcbeb80dabbad2be43aa55c5885372b323 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 17:40:51 -0800
Subject: [PATCH 58/90] fix don't removesuffix for start backfill file

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 6425fb506..6d5dbad73 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -66,7 +66,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
             action_filenames = constants.ACTION_FILENAMES
             if constants.START_BACKFILL_FILENAME is None:
                 action_filenames.remove(None)
-            print(f"No-op. This notification was not for a"
+            print(f"No-op. This notification was not for a "
                   f"{action_filenames} file.")
             return
 
@@ -86,7 +86,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
 
     # Unexpected exceptions will actually raise which may cause a cold restart.
     except exceptions.DuplicateNotificationException:
-        print("recieved duplicate notification. this was handled gracefully."
+        print("recieved duplicate notification. this was handled gracefully.\n "
               f"{traceback.format_exc()}")
 
     except exceptions.EXCEPTIONS_TO_REPORT as original_error:
@@ -117,8 +117,8 @@ def triage_event(gcs_client: Optional[storage.Client],
         if (basename_object_id == constants.SUCCESS_FILENAME
                 and "/_backlog/" in event_blob.name):
             print(f"This notification was for "
-                  f"gs://{bkt.name}/{event_blob.name} a"
-                  f"{constants.SUCCESS_FILENAME} in a"
+                  f"gs://{bkt.name}/{event_blob.name} a "
+                  f"{constants.SUCCESS_FILENAME} in a "
                   "/_backlog/ directory. "
                   f"Watiting {constants.ENSURE_SUBSCRIBER_SECONDS} seconds to "
                   "ensure that subscriber is running.")
@@ -129,8 +129,8 @@ def triage_event(gcs_client: Optional[storage.Client],
             # This will be the first backfill file.
             ordering.start_backfill_subscriber_if_not_running(
                 gcs_client, bkt,
-                utils.removesuffix(event_blob.name,
-                                   constants.START_BACKFILL_FILENAME))
+                utils.get_table_prefix(event_blob.name)
+            )
             return
         if basename_object_id == constants.SUCCESS_FILENAME:
             ordering.backlog_publisher(gcs_client, event_blob)

From 675c756d9ef581e9f99bdf1b824d8532d502779d Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 17:43:18 -0800
Subject: [PATCH 59/90] fixup isort

---
 tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index bce25d00a..69aaea108 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -25,7 +25,6 @@
 from google.cloud import bigquery
 from google.cloud import storage
 
-
 TEST_DIR = os.path.realpath(os.path.dirname(__file__))
 
 

From f0ebcd04dcc08d3c754db97ed5de1d51e060e0ac Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 17:51:55 -0800
Subject: [PATCH 60/90] more logging statements fail on untriageable event

---
 .../gcs_ocn_bq_ingest/common/ordering.py                    | 4 ++++
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py        | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 1b9bfeddf..488292f39 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -246,6 +246,10 @@ def start_backfill_subscriber_if_not_running(
         start_backfill_blob = bkt.blob(
             f"{table_prefix}/{constants.START_BACKFILL_FILENAME}")
         start_backfill = start_backfill_blob.exists(client=gcs_client)
+        if not start_backfill:
+            print("note triggering backfill because"
+                  f"gs://{start_backfill_blob.bucket.name}/"
+                  f"{start_backfill_blob.name} was not found.")
 
     if start_backfill:
         # Create a _BACKFILL file for this table if not exists
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 6d5dbad73..652efb165 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -126,6 +126,9 @@ def triage_event(gcs_client: Optional[storage.Client],
             return
         if (constants.START_BACKFILL_FILENAME
                 and basename_object_id == constants.START_BACKFILL_FILENAME):
+            print(
+                f"notification for gs://{event_blob.bucket.name}/"
+                f"{event_blob.name}")
             # This will be the first backfill file.
             ordering.start_backfill_subscriber_if_not_running(
                 gcs_client, bkt,
@@ -146,6 +149,9 @@ def triage_event(gcs_client: Optional[storage.Client],
             ordering.backlog_subscriber(gcs_client, bq_client, event_blob,
                                         function_start_time)
             return
+        raise RuntimeError(
+            f"gs://{event_blob.bucket.name}/"
+            f"{event_blob.name} could not be triaged.")
     else:  # Default behavior submit job as soon as success file lands.
         if basename_object_id == constants.SUCCESS_FILENAME:
             if bq_client:

From b83fee87f9db4c76b49db42b71c16bb4748bac0e Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 17:59:42 -0800
Subject: [PATCH 61/90] fix pylint

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py             | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 652efb165..f5a4e123c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -111,6 +111,7 @@ def triage_event(gcs_client: Optional[storage.Client],
     bkt = event_blob.bucket
     basename_object_id = os.path.basename(event_blob.name)
 
+    # pylint: disable=no-else-raise
     if enforce_ordering:
         # For SUCCESS files in a backlog directory, ensure that subscriber
         # is running.

From eae687fad22367b687958b3f1637dcc254bb169d Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 18:48:26 -0800
Subject: [PATCH 62/90] feat: env-var t numDmlRowsAffected = 0 as a failure

---
 .../gcs_event_based_ingest/README.md          | 27 ++++++++++++++---
 .../gcs_event_based_ingest/e2e/conftest.py    |  9 ++----
 .../gcs_event_based_ingest/e2e/e2e_test.py    |  6 ++--
 .../gcs_ocn_bq_ingest/README.md               |  1 +
 .../gcs_ocn_bq_ingest/common/constants.py     | 11 +++++++
 .../gcs_ocn_bq_ingest/common/utils.py         | 29 +++++++++++++++----
 .../gcs_ocn_bq_ingest/main.py                 | 14 ++++-----
 7 files changed, 68 insertions(+), 29 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 87515fb1c..8cf9a18d7 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -156,14 +156,33 @@ before they can be loaded to BigQuery. This is handled by query on an
 temporary external table over the GCS objects as a proxy for load job.
 `gs://${INGESTION_BUCKET}/${BQ_DATASET}/${BQ_TABLE_NAME}/_config/bq_transform.sql`
 
-Note, external queries will consume query slots from this project's reservation
-or count towards your on-demand billing. They will _not_ use free tie load slots.
+By default, if a query job finishes of statement type
+`INSERT`,`UPDATE`,`DELETE`, or `MERGE` and `numDmlRowsAffected = 0` this will be
+treated as a failure ([See Query Job Statistics API docs](https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobstatistics2)).
+This is usually due to a bad query / configuration with bad DML predicate.
+For example running the following query on an empty table:
 
+```sql
+UPDATE foo.bar dest ... FROM temp_ext src WHERE src.id = dest.id
+```
+
+By failing on this condition we keep the backlog intact when we run a query job
+that unexpectedly did no affect any rows.
+This can be disabled by setting the environment variable
+`FAIL_ON_ZERO_DML_ROWS_AFFECTED=False`.
+
+A `CREATE OR REPLACE TABLE` is not DML and will not be subject to this behavior.
+
+##### Cost Note
+External queries will consume query slots from this project's reservation
+or count towards your on-demand billing.
+They will _not_ use free tier load slots.
+
+##### External Table Name: `temp_ext`
 Note, that the query should select from a `temp_ext` which will be a temporary
 external table configured on the fly by the Cloud Function.
 The query must handle the logic for inserting into the destination table.
-This means it should use BigQuery DML to either `INSERT` or `MERGE` into the
-destination table.
+This means it should use BigQuery DML to mutate the destination table.
 For example:
 ```sql
 INSERT {dest_dataset}.{dest_table}
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index 69aaea108..58af67496 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -47,14 +47,11 @@ def gcs() -> storage.Client:
 
 @pytest.fixture(scope='module')
 def terraform_infra(request):
+
     def _run(cmd):
         print(
-            subprocess.check_output(
-                cmd,
-                stderr=subprocess.STDOUT,
-                cwd=TEST_DIR
-            )
-        )
+            subprocess.check_output(cmd, stderr=subprocess.STDOUT,
+                                    cwd=TEST_DIR))
 
     init = shlex.split("terraform init")
     apply = shlex.split("terraform apply -auto-approve")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
index e5057a903..b8542631c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
@@ -29,10 +29,8 @@
 
 @pytest.mark.SYS
 def test_cloud_function_long_runnning_bq_jobs_with_orderme(
-    gcs: storage.Client, bq: bigquery.Client,
-    dest_table: bigquery.Table,
-    terraform_infra: Dict
-):
+        gcs: storage.Client, bq: bigquery.Client, dest_table: bigquery.Table,
+        terraform_infra: Dict):
     """This test assumes the cloud function has been deployed with the
     accompanying terraform module which configures a 1 min timeout.
     It exports some larger data from a public BigQuery table and then reloads
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index cd701cd09..5ffea5c17 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -34,6 +34,7 @@ following default behavior.
 | `JOB_PREFIX`          | Prefix for BigQuery Job IDs          | `gcf-ingest-` |
 | `BQ_PROJECT`          | Default BQ project to use if not specified in dataset capturing group | Project where Cloud Function is deployed |
 | `FUNCTION_TIMEOUT_SEC`| Number of seconds set for this deployment of Cloud Function (no longer part of python38 runtime) | 60 |
+| `FAIL_ON_ZERO_DML_ROWS_AFFECTED` | Treat External Queries that result in `numDmlAffectedRows = 0` as failures | True | 
 | `ORDER_PER_TABLE`\*   | Force jobs to be executed sequentially (rather than parallel) based on the backlog. This is the same as having an `ORDERME` file in every config directory | `False` | 
 | `START_BACKFILL_FILENAME`\*| Block submitting BigQuery Jobs for a table until this file is present at the table prefix. By default this will not happen. | `None` |
 | `RESTART_BUFFER_SECONDS`\* | Buffer before Cloud Function timeout to leave before re-triggering the backfill subscriber | 30 |
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index 50faf6d12..61931ec31 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -123,3 +123,14 @@
 BQ_TRANSFORM_SQL = "*.sql"
 
 ENSURE_SUBSCRIBER_SECONDS = 5
+
+FAIL_ON_ZERO_DML_ROWS_AFFECTED = bool(
+    distutils.util.strtobool(os.getenv("FAIL_ON_ZERO_DML_ROWS_AFFECTED",
+                                       "True")))
+
+BQ_DML_STATEMENT_TYPES = {
+    "INSERT",
+    "UPDATE",
+    "DELETE",
+    "MERGE",
+}
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index cd6ef936b..81d549cdb 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -23,6 +23,7 @@
 import json
 import os
 import pathlib
+import pprint
 import time
 import uuid
 from typing import Any, Deque, Dict, List, Optional, Tuple, Union
@@ -93,8 +94,15 @@ def external_query(  # pylint: disable=too-many-arguments
         job.reload(client=bq_client)
         if job.errors:
             raise exceptions.BigQueryJobFailure(
-                f"query job {job.job_id} failed quickly: {job.errors}")
+                f"query job {job.job_id} failed quickly: {job.errors}."
+                f"\n{pprint.pformat(job.to_api_repr())}")
         if job.state == "DONE":
+            if (constants.FAIL_ON_ZERO_DML_ROWS_AFFECTED
+                    and job.statement_type in constants.BQ_DML_STATEMENT_TYPES
+                    and job.num_dml_affected_rows < 1):
+                raise exceptions.BigQueryJobFailure(
+                    f"query job {job.job_id} ran successfully but did not affect"
+                    f"any rows.\n {pprint.pformat(job.to_api_repr())}")
             return
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
@@ -130,7 +138,8 @@ def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id):
             job.reload(client=bq_client)
             if job.errors:
                 raise exceptions.BigQueryJobFailure(
-                    f"load job {job.job_id} failed quickly: {job.errors}")
+                    f"load job {job.job_id} failed quickly: {job.errors}\n"
+                    f"{pprint.pformat(job.to_api_repr())}")
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
 
@@ -450,9 +459,9 @@ def get_table_prefix(object_id: str) -> str:
     """
     basename = os.path.basename(object_id)
     if basename in {
-        constants.BACKFILL_FILENAME,
-        constants.START_BACKFILL_FILENAME,
-        "_bqlock",
+            constants.BACKFILL_FILENAME,
+            constants.START_BACKFILL_FILENAME,
+            "_bqlock",
     }:
         # These files will not match the regex and always should appear at the
         # table level.
@@ -554,7 +563,15 @@ def wait_on_bq_job_id(bq_client: bigquery.Client,
             if job.errors:
                 raise exceptions.BigQueryJobFailure(
                     f"BigQuery Job {job.job_id} failed during backfill with the"
-                    f"following errors: {job.errors}")
+                    f"following errors: {job.errors}\n"
+                    f"{pprint.pformat(job.to_api_repr())}")
+            if (isinstance(job, bigquery.QueryJob)
+                    and constants.FAIL_ON_ZERO_DML_ROWS_AFFECTED
+                    and job.statement_type in constants.BQ_DML_STATEMENT_TYPES
+                    and job.num_dml_affected_rows < 1):
+                raise exceptions.BigQueryJobFailure(
+                    f"query job {job.job_id} ran successfully but did not"
+                    f"affect any rows.\n {pprint.pformat(job.to_api_repr())}")
             return True
         if job.state in {"RUNNING", "PENDING"}:
             print(f"waiting on BigQuery Job {job.job_id}")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index f5a4e123c..10d2eb3b3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -127,14 +127,11 @@ def triage_event(gcs_client: Optional[storage.Client],
             return
         if (constants.START_BACKFILL_FILENAME
                 and basename_object_id == constants.START_BACKFILL_FILENAME):
-            print(
-                f"notification for gs://{event_blob.bucket.name}/"
-                f"{event_blob.name}")
+            print(f"notification for gs://{event_blob.bucket.name}/"
+                  f"{event_blob.name}")
             # This will be the first backfill file.
             ordering.start_backfill_subscriber_if_not_running(
-                gcs_client, bkt,
-                utils.get_table_prefix(event_blob.name)
-            )
+                gcs_client, bkt, utils.get_table_prefix(event_blob.name))
             return
         if basename_object_id == constants.SUCCESS_FILENAME:
             ordering.backlog_publisher(gcs_client, event_blob)
@@ -150,9 +147,8 @@ def triage_event(gcs_client: Optional[storage.Client],
             ordering.backlog_subscriber(gcs_client, bq_client, event_blob,
                                         function_start_time)
             return
-        raise RuntimeError(
-            f"gs://{event_blob.bucket.name}/"
-            f"{event_blob.name} could not be triaged.")
+        raise RuntimeError(f"gs://{event_blob.bucket.name}/"
+                           f"{event_blob.name} could not be triaged.")
     else:  # Default behavior submit job as soon as success file lands.
         if basename_object_id == constants.SUCCESS_FILENAME:
             if bq_client:

From 94136b623a14b9f89be307bdc8667750a89a5334 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 19:36:21 -0800
Subject: [PATCH 63/90] [skip ci] add comment to cloudbuild.yaml

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index fb9ff10de..6ed07b7de 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -120,6 +120,7 @@ steps:
     - '-c'
     # pip installing again to get GCB to recognize mocker from pytest-mock
     - 'pip install -r requirements-dev.txt && python3 -m pytest tests -m "not IT"'
+  # GCB can sometimes get hung on this step for no reason but is doomed to not recover.
   timeout: 15s
   id: 'unit-test'
 - name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'

From 790abb1ec1656cce0fed09417fd83bc29fb43060 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 19:38:11 -0800
Subject: [PATCH 64/90] [skip ci] update comment in cloudbuild.yaml

---
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 6ed07b7de..32d39e742 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -120,7 +120,9 @@ steps:
     - '-c'
     # pip installing again to get GCB to recognize mocker from pytest-mock
     - 'pip install -r requirements-dev.txt && python3 -m pytest tests -m "not IT"'
-  # GCB can sometimes get hung on this step for no reason but is doomed to not recover.
+  # GCB sometimes get stuck on this step and is doomed to not recover.
+  # This is usually remedied by just re-running the build.
+  # adding this unit-test step level timeout so we can fail sooner and retry.
   timeout: 15s
   id: 'unit-test'
 - name: 'gcr.io/$PROJECT_ID/gcs_event_based_ingest_ci'

From 94ca2f6891e55acc9cfde24525b375c8737ebce2 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 15 Dec 2020 19:44:01 -0800
Subject: [PATCH 65/90] chore: clean up unused fixture, init files

---
 tools/__init__.py                                            | 0
 tools/cloud_functions/__init__.py                            | 0
 tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py | 5 -----
 3 files changed, 5 deletions(-)
 delete mode 100644 tools/__init__.py
 delete mode 100644 tools/cloud_functions/__init__.py

diff --git a/tools/__init__.py b/tools/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tools/cloud_functions/__init__.py b/tools/cloud_functions/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index 58af67496..f3cd060cd 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -28,11 +28,6 @@
 TEST_DIR = os.path.realpath(os.path.dirname(__file__))
 
 
-def pytest_addoption(parser):
-    # if Terraform was used to deploy resources, pass the state details
-    parser.addoption("--tfstate", action="store", default=None)
-
-
 @pytest.fixture(scope="module")
 def bq() -> bigquery.Client:
     """BigQuery Client"""

From b216d8886836ba170f0500e42578892a00d4320b Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 16 Dec 2020 11:44:14 -0800
Subject: [PATCH 66/90] chore: improve terraform printint in pytest fixture

---
 .../gcs_event_based_ingest/e2e/conftest.py         | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index f3cd060cd..bd64a2660 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -17,6 +17,7 @@
 """End-to-end tests for event based BigQuery ingest Cloud Function."""
 import json
 import os
+import re
 import shlex
 import subprocess
 import uuid
@@ -27,6 +28,8 @@
 
 TEST_DIR = os.path.realpath(os.path.dirname(__file__))
 
+ANSI_ESCAPE_PATTERN = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
+
 
 @pytest.fixture(scope="module")
 def bq() -> bigquery.Client:
@@ -45,8 +48,15 @@ def terraform_infra(request):
 
     def _run(cmd):
         print(
-            subprocess.check_output(cmd, stderr=subprocess.STDOUT,
-                                    cwd=TEST_DIR))
+            ANSI_ESCAPE_PATTERN.sub(
+                '',
+                subprocess.check_output(
+                    cmd,
+                    stderr=subprocess.STDOUT,
+                    cwd=TEST_DIR
+                ).decode('UTF-8')
+            )
+        )
 
     init = shlex.split("terraform init")
     apply = shlex.split("terraform apply -auto-approve")

From d5fe02bfc88e1cd554731f7cd0b1f76f996d146a Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 16 Dec 2020 13:47:03 -0800
Subject: [PATCH 67/90] better bq job ids

---
 .../gcs_event_based_ingest/README.md          |  7 ++-
 .../gcs_ocn_bq_ingest/common/constants.py     |  3 ++
 .../gcs_ocn_bq_ingest/common/ordering.py      |  2 +-
 .../gcs_ocn_bq_ingest/common/utils.py         | 43 +++++++------------
 .../gcs_ocn_bq_ingest/main.py                 |  2 +-
 5 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 8cf9a18d7..25ab421aa 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -278,8 +278,13 @@ SELECT
 FROM
    `region-us`.INFORMATION_SCHEMA.JOBS_BY_PROJECT
 WHERE
-   (SELECT value FROM UNNEST(labels) WHERE key = "component") = "gcf-ingest-"
+   (SELECT value FROM UNNEST(labels) WHERE key = "component") = "event-based-gcs-ingest"
 ```
+If your external queries have mutliple sql statements only the parent job will
+follow the `gcf-ingest-*` naming convention. Children jobs (for each statement)
+begin with prefix _script_job. These jobs will still be labelled with
+`component` and `cloud-function-name`.
+For more information see [Scripting in Standard SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/scripting)
 
 ## Triggers
 GCS Object Finalize triggers can communicate with Cloud Functions directly or
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index 61931ec31..bd13fef44 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -134,3 +134,6 @@
     "DELETE",
     "MERGE",
 }
+
+# https://cloud.google.com/bigquery/docs/running-jobs#generate-jobid
+NON_BQ_JOB_ID_REGEX = re.compile('[^0-9a-zA-Z_\-]+')
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 488292f39..5195ba248 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -196,7 +196,7 @@ def handle_backlog(
                 f"gs://{next_success_file.bucket}/{next_success_file.name}")
         print("applying next batch for:"
               f"gs://{next_success_file.bucket}/{next_success_file.name}")
-        next_job_id = utils.create_job_id(table_ref, batch)
+        next_job_id = utils.create_job_id(next_success_file.name)
         utils.apply(gcs_client, bq_client, next_success_file, lock_blob,
                     next_job_id)
         return False  # BQ job running
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 81d549cdb..78c2e6419 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -651,35 +651,24 @@ def gcs_path_to_table_ref_and_batch(
     return dest_table_ref, batch_id
 
 
-def create_job_id(dest_table_ref: bigquery.TableReference,
-                  batch_id: Optional[str]):
-    """Create job id prefix with a consistent naming convention.
-    The naming conventions is as follows:
-    gcf-ingest-<dataset_id>-<table_id>-<partition_num>-<batch_id>-
-    Parts that are not inferrable from the GCS path with have a 'None'
-    placeholder. This naming convention is crucial for monitoring the system.
+def create_job_id(success_file_path):
+    """Create job id prefix with a consistent naming convention based on the
+    success file path to give context of what caused this job to be submitted.
+    the rules for success file name -> job id are:
+    1. slashes to dashes
+    2. all non-alphanumeric dash or underscore will be replaced with underscore
     Note, gcf-ingest- can be overridden with environment variable JOB_PREFIX
-
-    Examples:
-
-    Non-partitioned Non batched tables:
-      - gs://${BUCKET}/tpch/lineitem/_SUCCESS
-      - gcf-ingest-tpch-lineitem-None-None-
-    Non-partitioned batched tables:
-      - gs://${BUCKET}/tpch/lineitem/batch000/_SUCCESS
-      - gcf-ingest-tpch-lineitem-None-batch000-
-    Partitioned Batched tables:
-      - gs://${BUCKET}/tpch/lineitem/$20201031/batch000/_SUCCESS
-      - gcf-ingest-tpch-lineitem-20201031-batch000-
+    3. uuid for uniqueness
     """
-    table_partition = dest_table_ref.table_id.split("$")
-    if len(table_partition) < 2:
-        # If there is no partition put a None placeholder
-        table_partition.append("None")
-    return f"{os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)}" \
-           f"{dest_table_ref.dataset_id}-" \
-           f"{'-'.join(table_partition)}-" \
-           f"{batch_id}-{uuid.uuid4()}"
+    clean_job_id = os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)
+    clean_job_id += constants.NON_BQ_JOB_ID_REGEX.sub(
+        '_',
+        success_file_path.replace('/', '-')
+    )
+    # add uniqueness in case we have to "re-process" a success file that is
+    # republished or handle multiple load jobs.
+    clean_job_id += str(uuid.uuid4())
+    return clean_job_id[:1024]  # make sure job id isn't too long
 
 
 def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob,
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 10d2eb3b3..418963313 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -162,7 +162,7 @@ def triage_event(gcs_client: Optional[storage.Client],
                 bq_client,
                 event_blob,
                 None,  # no lock blob when ordering not enabled.
-                utils.create_job_id(table_ref, batch))
+                utils.create_job_id(event_blob.name))
 
 
 def lazy_error_reporting_client() -> error_reporting.Client:

From fcb88a0b49fa56f340622e1a5ca373a9a5236a5f Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 16 Dec 2020 13:53:54 -0800
Subject: [PATCH 68/90] fixup regex escaping

---
 .../gcs_ocn_bq_ingest/common/constants.py                      | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index bd13fef44..1a5a1defa 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -136,4 +136,4 @@
 }
 
 # https://cloud.google.com/bigquery/docs/running-jobs#generate-jobid
-NON_BQ_JOB_ID_REGEX = re.compile('[^0-9a-zA-Z_\-]+')
+NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 78c2e6419..31a7cb589 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -666,7 +666,8 @@ def create_job_id(success_file_path):
         success_file_path.replace('/', '-')
     )
     # add uniqueness in case we have to "re-process" a success file that is
-    # republished or handle multiple load jobs.
+    # republished (e.g. to fix a bad batch of data) or handle multiple load jobs
+    # for a single success file.
     clean_job_id += str(uuid.uuid4())
     return clean_job_id[:1024]  # make sure job id isn't too long
 

From 85cea34a25181996d88278f1adb1924d6a283938 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 16 Dec 2020 14:14:57 -0800
Subject: [PATCH 69/90] make pylint happy

---
 .../gcs_ocn_bq_ingest/common/ordering.py                    | 2 --
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py        | 6 ------
 2 files changed, 8 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 5195ba248..c6362755a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -186,8 +186,6 @@ def handle_backlog(
     if next_backlog_file:
         next_success_file: storage.Blob = bkt.blob(
             next_backlog_file.name.replace("/_backlog/", "/"))
-        table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
-            next_success_file.name, bq_client.project)
         if not next_success_file.exists(client=gcs_client):
             raise exceptions.BacklogException(
                 "backlog contains "
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 418963313..1d2ef71eb 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -151,12 +151,6 @@ def triage_event(gcs_client: Optional[storage.Client],
                            f"{event_blob.name} could not be triaged.")
     else:  # Default behavior submit job as soon as success file lands.
         if basename_object_id == constants.SUCCESS_FILENAME:
-            if bq_client:
-                table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
-                    event_blob.name, bq_client.project)
-            else:
-                table_ref, batch = utils.gcs_path_to_table_ref_and_batch(
-                    event_blob.name, None)
             utils.apply(
                 gcs_client,
                 bq_client,

From f7af0fb0d1847229682f83763fb4975e0d8d3df9 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 16 Dec 2020 15:50:21 -0800
Subject: [PATCH 70/90] [skip ci] more docs

---
 .../gcs_event_based_ingest/ORDERING.md        | 69 ++++++++++++++++++-
 .../gcs_event_based_ingest/README.md          | 15 ++++
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
index c85020276..4ae20dd0f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/ORDERING.md
@@ -75,6 +75,12 @@ address the failed batch:
     "Original Exception:\n"
     f"{traceback.format_exc()}")
 ```
+Note that once the `_bqlock` is removed and `_BACKFILL` is reposted, the Cloud
+Function will proceed by applying the next batch in the `_backlog`. This means,
+if you have applied the batch manually you should remove this object from the
+`_backlog`. However, if you have patched the data on GCS for the failed batch
+and would like the cloud function to apply it, then you leave this object in the
+`_backlog`. 
 
 ## Ordering Mechanics Explained
 We've treated ordering incremental commits to table  as a variation on the
@@ -90,13 +96,14 @@ The Backlog Publisher has two responsibilities:
 1. add incoming success files to a
 table's `_backlog` so they are not "forgotten" by the ingestion system.
 1. if there is a non-empty backlog start the backfill subscriber (if one is not
-already running). This is accomplished by dropping a table level `_BACKFILL`
+already running). This is accomplished by uploading a table level `_BACKFILL`
 file if it does not already exist.
 
 ### Backlog Subscriber
 The Backlog Subscriber is responsible for keeping track of BigQuery jobs running
 on a table and ensure that batches are committed in order. When the backlog is
-not empty for a table the backlog subscriber should be running for that table.
+not empty for a table the backlog subscriber should be running for that table
+unless a job has failed.
 It will either be polling a `RUNNING` BigQuery job for completion, or submitting
 the next batch in the `_backlog`.
 
@@ -106,7 +113,63 @@ The state of what BigQuery job is currently running on a table is kept in a
 In order to escape the maximum nine-minute (540s) Cloud Function Timeout, the
 backfill subscriber will re-trigger itself by posting a new `_BACKFILL` file
 until the `_backlog` for the table prefix is empty. When a new success file
-arrives it is the responsibility of the publisher to restart the subscriber.
+arrives it is the responsibility of the publisher to restart the subscriber if
+one is not already running.
+
+### Example: Life of a Table
+The following process explains the triggers (GCS files) and actions of the
+Cloud Function for a single table prefix.
+
+1. Source data uploaded to GCS prefix for the destination dataset / table, etc.
+    - `gs://ingestion-bucket/dataset/table/historical/2020/01/02/03/foo-data-00.csv`
+    - `gs://ingestion-bucket/dataset/table/historical/2020/01/02/03/foo-data-01.csv`
+    - `gs://ingestion-bucket/dataset/table/historical/2020/01/02/04/foo-data-00.csv`
+    - `gs://ingestion-bucket/dataset/table/incremental/2020/01/02/05/foo-data-01.csv`
+1. Success file uploaded to GCS (to indicate this atomic batch is ready to be
+applied).
+    - `gs://ingestion-bucket/dataset/table/historical/2020/01/02/03/_SUCCESS`
+    - `gs://ingestion-bucket/dataset/table/historical/2020/01/02/04/_SUCCESS`
+    - `gs://ingestion-bucket/dataset/table/incremental/2020/01/02/05/_SUCCESS`
+1. Backlog Publisher adds a pointer to each success file in the backlog for the
+table.
+    - `gs://ingestion-bucket/dataset/table/_backlog/historical/2020/01/02/03/_SUCCESS`
+    - `gs://ingestion-bucket/dataset/table/_backlog/historical/2020/01/02/04/_SUCCESS`
+    - `gs://ingestion-bucket/dataset/table/_backlog/incremental/2020/01/02/05/_SUCCESS`
+1. If the `START_BACKFILL_FILENAME` is set and the file exists at the table prefix, After adding each item the backlog, the Backlog Publisher will start the
+Backfill Subscriber if it is not already running (as indicated by a `_BACKFILL`
+file). If the `START_BACKFILL_FILENAME` is not present the backlog subscriber
+will not be started until this file is uploaded.
+    - `gs://ingestion-bucket/dataset/table/_BACKFILL`
+1. The Backlog Subscriber will look at the backlog and apply the batches in
+order (lexicographic). This process looks like this:
+    1. Claim this backfill file:
+        - `gs://ingestion-bucket/dataset/table/_claimed__BACKFILL_created_at_...`
+    1. Claim first batch in backlog (ensure no duplicate processing):
+        - `gs://ingestion-bucket/dataset/table/historical/2020/01/02/03/_claimed__SUCCESS_created_at_...`
+    1. Submit the BigQuery Job for this batch (load job or external query based on the `_config/*` files)
+        - Ingest the data at the `gs://ingestion-bucket/dataset/table/historical/2020/01/02/03/*` prefix
+        - Store the job ID in `gs://ingestion-bucket/dataset/table/_bqlock`
+    1. Wait for this Job to complete successfully and remove this item from the backlog.
+        - If job is `DONE` with  errors:
+            - Raise exception (do not continue to process any more batches)
+        - If job is `DONE` without errors remove the pointer from the backlog:
+            - DELETE `gs://ingestion-bucket/dataset/table/_backlog/historical/2020/01/02/03/_SUCCESS`
+    1. Repeat from Backlog Subscriber step 2
+        - Where the first item in the backlog is now 
+            - `gs://ingestion-bucket/dataset/table/_backlog/historical/2020/01/02/04/_SUCCESS`
+        - And on the next loop:
+            - `gs://ingestion-bucket/dataset/table/_backlog/incremental/2020/01/02/05/_SUCCESS`
+1. Backlog Subscriber sees the `_backlog/` is empty for the table. In other words
+The BigQuery table is caught up with the data on GCS.        
+    - DELETE `gs://ingestion-bucket/dataset/table/_BACKFILL` and exit
+1. The next day a new incremental arrives
+    - `gs://ingestion-bucket/dataset/table/_backlog/incremental/2020/01/02/05/_SUCCESS`
+1. The Backlog Publisher adds this item to the backlog and wakes up the
+Backfill Subscriber by posting a new `_BACKFILL` file.
+    - `gs://ingestion-bucket/dataset/table/_backlog/incremental/2020/01/02/05/_SUCCESS`
+    - `gs://ingestion-bucket/dataset/table/_BACKFILL`
+1. Backlog Subscriber will handle the backlog of just one item
+(See Backlog Subscriber step #5 and #6 above)
 
 
 ### Note on Handling Race Condition
diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 25ab421aa..d75b826eb 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -443,6 +443,21 @@ files. The utility supports either invoking the Cloud Function main method
 locally (in concurrent threads) or publishing notifications for the success
 files (for a deployed Cloud Function to pick up).
 
+### Backfill and Ordering
+If you use the ordering feature on a table (or function wide) you should use the
+`NOTIFICATIONS` mode to repost notifications to a pub/sub topic that your
+deployed Cloud Function is listening to. The `LOCAL` mode does not support
+ordering because this feature relies on (re)posting files like `_bqlock`,
+`_BACKFILL` and various claim files and getting re-triggered by object
+notifications for these.
+The script will publish the notifications for success files and the Cloud
+Function will add these to the appropriate table's backlog.
+Once the script completes you can drop the `START_BACKFILL_FILENAME`
+(e.g. `_HISTORYDONE`) for each table you want to trigger the backfill for.
+In general, it would not be safe for this utility to drop a `_HISTORYDONE` for
+every table because the parallel historical loads might still be in progress.
+
+
 ### Usage
 ```
 python3 -m backfill -h

From 7971bc39a09a9a89a6d2623e16a3ef3cecf123e8 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 17 Dec 2020 10:03:09 -0800
Subject: [PATCH 71/90] fix default load config return type

---
 .../gcs_event_based_ingest/e2e/conftest.py             | 10 +++-------
 .../gcs_ocn_bq_ingest/common/utils.py                  | 10 +++++-----
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index bd64a2660..b8f12a14c 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -50,13 +50,9 @@ def _run(cmd):
         print(
             ANSI_ESCAPE_PATTERN.sub(
                 '',
-                subprocess.check_output(
-                    cmd,
-                    stderr=subprocess.STDOUT,
-                    cwd=TEST_DIR
-                ).decode('UTF-8')
-            )
-        )
+                subprocess.check_output(cmd,
+                                        stderr=subprocess.STDOUT,
+                                        cwd=TEST_DIR).decode('UTF-8')))
 
     init = shlex.split("terraform init")
     apply = shlex.split("terraform apply -auto-approve")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 31a7cb589..5fcd41045 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -215,8 +215,10 @@ def _get_parent_config(path):
     while config_q:
         recursive_update(merged_config, config_q.popleft(), in_place=True)
     if merged_config == constants.BASE_LOAD_JOB_CONFIG:
-        print("falling back to default CSV load job config")
-        return constants.DEFAULT_LOAD_JOB_CONFIG
+        print("falling back to default CSV load job config. "
+              "Did you forget load.json?")
+        return bigquery.LoadJobConfig.from_api_repr(
+            constants.DEFAULT_LOAD_JOB_CONFIG)
     print(f"merged_config: {merged_config}")
     return bigquery.LoadJobConfig.from_api_repr({"load": merged_config})
 
@@ -662,9 +664,7 @@ def create_job_id(success_file_path):
     """
     clean_job_id = os.getenv('JOB_PREFIX', constants.DEFAULT_JOB_PREFIX)
     clean_job_id += constants.NON_BQ_JOB_ID_REGEX.sub(
-        '_',
-        success_file_path.replace('/', '-')
-    )
+        '_', success_file_path.replace('/', '-'))
     # add uniqueness in case we have to "re-process" a success file that is
     # republished (e.g. to fix a bad batch of data) or handle multiple load jobs
     # for a single success file.

From de19c9878d10dc85098e68b615eaea1ead2507f5 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 7 Jan 2021 12:34:17 -0800
Subject: [PATCH 72/90] fix: fail on failure of children jobs

During multi-statement BQ jobs, child jobs are submitted.
If any of these fail we should consider the job a failure.
---
 .../gcs_ocn_bq_ingest/common/utils.py         | 65 ++++++++++++-------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 5fcd41045..db343a177 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -58,7 +58,7 @@ def external_query(  # pylint: disable=too-many-arguments
     if external_table_config:
         external_table_def = json.loads(external_table_config)
     else:
-        print(f" {gsurl}_config/external.json not found in parents of {gsurl}."
+        print(f" {gsurl}_config/external.json not found in parents of {gsurl}. "
               "Falling back to default PARQUET external table:\n"
               f"{json.dumps(constants.DEFAULT_EXTERNAL_TABLE_DEFINITION)}")
         external_table_def = constants.DEFAULT_EXTERNAL_TABLE_DEFINITION
@@ -101,8 +101,8 @@ def external_query(  # pylint: disable=too-many-arguments
                     and job.statement_type in constants.BQ_DML_STATEMENT_TYPES
                     and job.num_dml_affected_rows < 1):
                 raise exceptions.BigQueryJobFailure(
-                    f"query job {job.job_id} ran successfully but did not affect"
-                    f"any rows.\n {pprint.pformat(job.to_api_repr())}")
+                    f"query job {job.job_id} ran successfully but did not "
+                    f"affect any rows.\n {pprint.pformat(job.to_api_repr())}")
             return
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
@@ -136,10 +136,7 @@ def load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id):
         # Check if job failed quickly
         for job in jobs:
             job.reload(client=bq_client)
-            if job.errors:
-                raise exceptions.BigQueryJobFailure(
-                    f"load job {job.job_id} failed quickly: {job.errors}\n"
-                    f"{pprint.pformat(job.to_api_repr())}")
+            check_for_bq_job_and_children_errors(bq_client, job)
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
 
@@ -240,9 +237,8 @@ def get_batches_for_prefix(
     bucket_name = blob.bucket.name
     prefix_name = blob.name
 
-    prefix_filter = f"{prefix_name}"
     bucket = cached_get_bucket(gcs_client, bucket_name)
-    blobs = list(bucket.list_blobs(prefix=prefix_filter, delimiter="/"))
+    blobs = list(bucket.list_blobs(prefix=prefix_name, delimiter="/"))
 
     cumulative_bytes = 0
     max_batch_size = int(
@@ -309,14 +305,14 @@ def parse_notification(notification: dict) -> Tuple[str, str]:
             return attributes["bucketId"], attributes["objectId"]
         except KeyError:
             raise exceptions.UnexpectedTriggerException(
-                "Issue with Pub/Sub message, did not contain expected"
+                "Issue with Pub/Sub message, did not contain expected "
                 f"attributes: 'bucketId' and 'objectId': {notification}"
             ) from KeyError
     raise exceptions.UnexpectedTriggerException(
         "Cloud Function received unexpected trigger:\n"
         f"{notification}\n"
-        "This function only supports direct Cloud Functions"
-        "Background Triggers or Pub/Sub storage notificaitons"
+        "This function only supports direct Cloud Functions "
+        "Background Triggers or Pub/Sub storage notificaitons "
         "as described in the following links:\n"
         "https://cloud.google.com/storage/docs/pubsub-notifications\n"
         "https://cloud.google.com/functions/docs/tutorials/storage")
@@ -538,6 +534,38 @@ def remove_oldest_backlog_item(
     return False
 
 
+def check_for_bq_job_and_children_errors(bq_client: bigquery.Client,
+                                         job: Union[bigquery.LoadJob,
+                                                    bigquery.QueryJob]):
+    """checks if BigQuery job (or children jobs in case of multi-statement sql)
+    should be considered failed because there were errors or the query affected
+    no rows while FAIL_ON_ZERO_DML_ROWS_AFFECTED env var is set to True
+    (this is the default).
+
+    Args:
+        bq_client: bigquery.Client
+        job: Union[bigquery.LoadJob, bigquery.QueryJob] job to check for errors.
+    Raises:
+        exceptions.BigQueryJobFailure
+    """
+    if job.state != "DONE":
+        wait_on_bq_job_id(bq_client, job.job_id, 5)
+    if job.errors:
+        raise exceptions.BigQueryJobFailure(
+            f"BigQuery Job {job.job_id} failed during backfill with the "
+            f"following errors: {job.errors}\n"
+            f"{pprint.pformat(job.to_api_repr())}")
+    if isinstance(job, bigquery.QueryJob):
+        if (constants.FAIL_ON_ZERO_DML_ROWS_AFFECTED
+                and job.statement_type in constants.BQ_DML_STATEMENT_TYPES
+                and job.num_dml_affected_rows < 1):
+            raise exceptions.BigQueryJobFailure(
+                f"query job {job.job_id} ran successfully but did not "
+                f"affect any rows.\n {pprint.pformat(job.to_api_repr())}")
+        for child_job in bq_client.list_jobs(parent_job=job):
+            check_for_bq_job_and_children_errors(bq_client, child_job)
+
+
 def wait_on_bq_job_id(bq_client: bigquery.Client,
                       job_id: str,
                       polling_timeout: int,
@@ -562,18 +590,7 @@ def wait_on_bq_job_id(bq_client: bigquery.Client,
         job: Union[bigquery.LoadJob,
                    bigquery.QueryJob] = bq_client.get_job(job_id)
         if job.state == "DONE":
-            if job.errors:
-                raise exceptions.BigQueryJobFailure(
-                    f"BigQuery Job {job.job_id} failed during backfill with the"
-                    f"following errors: {job.errors}\n"
-                    f"{pprint.pformat(job.to_api_repr())}")
-            if (isinstance(job, bigquery.QueryJob)
-                    and constants.FAIL_ON_ZERO_DML_ROWS_AFFECTED
-                    and job.statement_type in constants.BQ_DML_STATEMENT_TYPES
-                    and job.num_dml_affected_rows < 1):
-                raise exceptions.BigQueryJobFailure(
-                    f"query job {job.job_id} ran successfully but did not"
-                    f"affect any rows.\n {pprint.pformat(job.to_api_repr())}")
+            check_for_bq_job_and_children_errors(bq_client, job)
             return True
         if job.state in {"RUNNING", "PENDING"}:
             print(f"waiting on BigQuery Job {job.job_id}")

From 61d2c14e8a1ef0ea2d06e1bc71aa0066ee082e58 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 7 Jan 2021 18:28:28 -0800
Subject: [PATCH 73/90] chore: add test for child job failing behavior

---
 .../gcs_ocn_bq_ingest/README.md               |  1 +
 .../gcs_ocn_bq_ingest/common/constants.py     |  3 +-
 .../gcs_ocn_bq_ingest/common/utils.py         | 19 ++----
 .../gcs_ocn_bq_ingest/main.py                 | 21 ++++---
 .../gcs_event_based_ingest/tests/conftest.py  | 58 ++++++++++++++++++-
 .../test_gcs_ocn_bq_ingest_it.py              | 31 +++++++++-
 6 files changed, 107 insertions(+), 26 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index 5ffea5c17..02fea45ff 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -38,6 +38,7 @@ following default behavior.
 | `ORDER_PER_TABLE`\*   | Force jobs to be executed sequentially (rather than parallel) based on the backlog. This is the same as having an `ORDERME` file in every config directory | `False` | 
 | `START_BACKFILL_FILENAME`\*| Block submitting BigQuery Jobs for a table until this file is present at the table prefix. By default this will not happen. | `None` |
 | `RESTART_BUFFER_SECONDS`\* | Buffer before Cloud Function timeout to leave before re-triggering the backfill subscriber | 30 |
+| `USE_ERROR_REPORTING_API` | Should errors be reported using error reporting api to avoid cold restart (optimization) | True |
 
 \* only affect the behavior when ordering is enabled for a table.
 See [ORDERING.md](../ORDERING.md)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index 1a5a1defa..acb6a4b24 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -30,7 +30,7 @@
 # One might consider lowering this to 1-2 seconds to lower the
 # upper bound of expected execution time to stay within the free tier.
 # https://cloud.google.com/functions/pricing#free_tier
-WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "1"))
+WAIT_FOR_JOB_SECONDS = int(os.getenv("WAIT_FOR_JOB_SECONDS", "5"))
 
 DEFAULT_EXTERNAL_TABLE_DEFINITION = {
     # The default must be a self describing data format
@@ -137,3 +137,4 @@
 
 # https://cloud.google.com/bigquery/docs/running-jobs#generate-jobid
 NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index db343a177..fdf078673 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -92,17 +92,8 @@ def external_query(  # pylint: disable=too-many-arguments
     while time.monotonic(
     ) - start_poll_for_errors < constants.WAIT_FOR_JOB_SECONDS:
         job.reload(client=bq_client)
-        if job.errors:
-            raise exceptions.BigQueryJobFailure(
-                f"query job {job.job_id} failed quickly: {job.errors}."
-                f"\n{pprint.pformat(job.to_api_repr())}")
         if job.state == "DONE":
-            if (constants.FAIL_ON_ZERO_DML_ROWS_AFFECTED
-                    and job.statement_type in constants.BQ_DML_STATEMENT_TYPES
-                    and job.num_dml_affected_rows < 1):
-                raise exceptions.BigQueryJobFailure(
-                    f"query job {job.job_id} ran successfully but did not "
-                    f"affect any rows.\n {pprint.pformat(job.to_api_repr())}")
+            check_for_bq_job_and_children_errors(bq_client, job)
             return
         time.sleep(constants.JOB_POLL_INTERVAL_SECONDS)
 
@@ -747,11 +738,9 @@ def apply(
     print(
         "looking for a transformation tranformation sql file in parent _config."
     )
-    external_query_sql = read_gcs_file_if_exists(gcs_client,
-                                                 f"{gsurl}_config/*.sql")
-    if not external_query_sql:
-        external_query_sql = look_for_config_in_parents(gcs_client, gsurl,
-                                                        "*.sql")
+    external_query_sql = look_for_config_in_parents(
+        gcs_client, f"gs://{bkt.name}/{success_blob.name}", '*.sql')
+
     if external_query_sql:
         print("EXTERNAL QUERY")
         print(f"found external query:\n{external_query_sql}")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 1d2ef71eb..934c8a052 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -16,6 +16,7 @@
 # limitations under the License.
 """Background Cloud Function for loading data from GCS to BigQuery.
 """
+import distutils.util
 import os
 import time
 import traceback
@@ -92,13 +93,19 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     except exceptions.EXCEPTIONS_TO_REPORT as original_error:
         # We do this because we know these errors do not require a cold restart
         # of the cloud function.
-        try:
-            lazy_error_reporting_client().report_exception()
-        except Exception:  # pylint: disable=broad-except
-            # This mostly handles the case where error reporting API is not
-            # enabled or IAM permissions did not allow us to report errors with
-            # error reporting API.
-            raise original_error  # pylint: disable=raise-missing-from
+        if (
+            distutils.util.strtobool(
+                os.getenv("USE_ERROR_REPORTING_API", "True"))
+        ):
+            try:
+                lazy_error_reporting_client().report_exception()
+            except Exception:  # pylint: disable=broad-except
+                # This mostly handles the case where error reporting API is not
+                # enabled or IAM permissions did not allow us to report errors
+                # with error reporting API.
+                raise original_error  # pylint: disable=raise-missing-from
+        else:
+            raise original_error
 
 
 def triage_event(gcs_client: Optional[storage.Client],
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index cfdc4323a..239cf98ab 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -530,7 +530,7 @@ def gcs_external_partitioned_config(
         "bq_transform.sql",
     ]))
 
-    sql = "INSERT {dest_dataset}.{dest_table} SELECT * FROM temp_ext"
+    sql = "INSERT {dest_dataset}.{dest_table} SELECT * FROM temp_ext;"
     sql_obj.upload_from_string(sql)
 
     config_obj = gcs_bucket.blob("/".join([
@@ -564,3 +564,59 @@ def teardown():
 
     request.addfinalizer(teardown)
     return config_objs
+
+
+@pytest.fixture
+def no_use_error_reporting(monkeypatch):
+    monkeypatch.setenv("USE_ERROR_REPORTING_API", "False")
+
+
+@pytest.fixture
+def gcs_external_config_bad_statement(
+    request, gcs_bucket, dest_dataset, dest_table, no_use_error_reporting
+) -> List[storage.blob.Blob]:
+    config_objs = []
+    sql_obj = gcs_bucket.blob("/".join([
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+        dest_table.table_id,
+        "_config",
+        "bq_transform.sql",
+    ]))
+
+    sql = ("INSERT {dest_dataset}.{dest_table} SELECT * FROM temp_ext;\n"
+           "INSERT {dest_dataset}.{dest_table} SELECT 1/0;")
+    sql_obj.upload_from_string(sql)
+
+    config_obj = gcs_bucket.blob("/".join([
+        f"{dest_dataset.project}.{dest_dataset.dataset_id}",
+        dest_table.table_id, "_config", "external.json"
+    ]))
+
+    with open(os.path.join(TEST_DIR, "resources",
+                           "nation_schema.json")) as schema:
+        fields = json.load(schema)
+    config = {
+        "schema": {
+            "fields": fields
+        },
+        "csvOptions": {
+            "allowJaggedRows": False,
+            "allowQuotedNewlines": False,
+            "encoding": "UTF-8",
+            "fieldDelimiter": "|",
+            "skipLeadingRows": 0,
+        },
+        "sourceFormat": "CSV",
+        "sourceUris": ["REPLACEME"],
+    }
+    config_obj.upload_from_string(json.dumps(config))
+    config_objs.append(sql_obj)
+    config_objs.append(config_obj)
+
+    def teardown():
+        for do in config_objs:
+            if do.exists():
+                do.delete()
+
+    request.addfinalizer(teardown)
+    return config_objs
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
index e1fe45b18..81709a5b2 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
@@ -143,8 +143,8 @@ def test_load_job_appending_batches(bq, gcs_batched_data, dest_dataset,
 
 
 @pytest.mark.IT
-def test_external_query(bq, gcs_data, gcs_external_config, dest_dataset,
-                        dest_table, mock_env):
+def test_external_query_pure(bq, gcs_data, gcs_external_config, dest_dataset,
+                             dest_table, mock_env):
     """tests the basic external query ingrestion mechanics
     with bq_transform.sql and external.json
     """
@@ -286,3 +286,30 @@ def bq_wait_for_rows(bq_client: bigquery.Client, table: bigquery.Table,
         f"{table.project}.{table.dataset_id}.{table.table_id} to "
         f"reach {expected_num_rows} rows."
         f"last poll returned {actual_num_rows} rows.")
+
+
+@pytest.mark.IT
+def test_external_query_with_bad_statement(bq, gcs_data,
+                                           gcs_external_config_bad_statement,
+                                           dest_dataset, dest_table, mock_env):
+    """tests the basic external query ingrestion mechanics
+    with bq_transform.sql and external.json
+    """
+    if not gcs_data.exists():
+        raise google.cloud.exceptions.NotFound("test data objects must exist")
+    if not all((blob.exists() for blob in gcs_external_config_bad_statement)):
+        raise google.cloud.exceptions.NotFound("config objects must exist")
+
+    test_event = {
+        "attributes": {
+            "bucketId": gcs_data.bucket.name,
+            "objectId": gcs_data.name
+        }
+    }
+    raised = False
+    try:
+        gcs_ocn_bq_ingest.main.main(test_event, None)
+    except gcs_ocn_bq_ingest.common.exceptions.BigQueryJobFailure:
+        raised = True
+
+    assert raised, "bad statement did not raise BigQueryJobFailure"

From fb69a6a280c0c27015939ef3261d928370dd6837 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 7 Jan 2021 18:56:27 -0800
Subject: [PATCH 74/90] fixup flake8

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py        | 6 ++----
 .../gcs_event_based_ingest/tests/conftest.py                | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 934c8a052..0141ed5b4 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -93,10 +93,8 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
     except exceptions.EXCEPTIONS_TO_REPORT as original_error:
         # We do this because we know these errors do not require a cold restart
         # of the cloud function.
-        if (
-            distutils.util.strtobool(
-                os.getenv("USE_ERROR_REPORTING_API", "True"))
-        ):
+        if (distutils.util.strtobool(
+                os.getenv("USE_ERROR_REPORTING_API", "True"))):
             try:
                 lazy_error_reporting_client().report_exception()
             except Exception:  # pylint: disable=broad-except
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 239cf98ab..5dc4c7fa1 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -573,8 +573,8 @@ def no_use_error_reporting(monkeypatch):
 
 @pytest.fixture
 def gcs_external_config_bad_statement(
-    request, gcs_bucket, dest_dataset, dest_table, no_use_error_reporting
-) -> List[storage.blob.Blob]:
+        request, gcs_bucket, dest_dataset, dest_table,
+        no_use_error_reporting) -> List[storage.blob.Blob]:
     config_objs = []
     sql_obj = gcs_bucket.blob("/".join([
         f"{dest_dataset.project}.{dest_dataset.dataset_id}",

From 1aec908e42716a068605c1774dc40323b9faefe4 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 7 Jan 2021 19:00:01 -0800
Subject: [PATCH 75/90] fixup flake8

---
 .../gcs_ocn_bq_ingest/common/constants.py                      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index acb6a4b24..c9a1e8323 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -136,5 +136,4 @@
 }
 
 # https://cloud.google.com/bigquery/docs/running-jobs#generate-jobid
-NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
-
+NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
\ No newline at end of file

From 0490217309274aeccac07f1fe3b95b84692f92df Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 7 Jan 2021 19:27:30 -0800
Subject: [PATCH 76/90] fixup flake8

---
 .../gcs_ocn_bq_ingest/common/constants.py                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index c9a1e8323..90689c1ae 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -136,4 +136,4 @@
 }
 
 # https://cloud.google.com/bigquery/docs/running-jobs#generate-jobid
-NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
\ No newline at end of file
+NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')

From 3c3bd3dcdd5160f5a2e9f27b5ce2f831ca4729a5 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Mon, 11 Jan 2021 13:01:34 -0800
Subject: [PATCH 77/90] feat: separate bq storage and compute project env vars

---
 tools/cloud_functions/gcs_event_based_ingest/README.md   | 7 ++++++-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md   | 3 ++-
 .../gcs_ocn_bq_ingest/common/ordering.py                 | 2 +-
 .../gcs_ocn_bq_ingest/common/utils.py                    | 9 +++------
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index d75b826eb..372590064 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -42,7 +42,7 @@ better fit your naming convention on GCS. Your regex must include
 for destination `dataset`, and `table`.
 Note, that `dataset` can optionally, explicitly specify destination project
 (i.e. `gs://${BUCKET}/project_id.dataset_id/table/....`) alternatively,
-one can set the `BQ_PROJECT` environment variable to set to override the
+one can set the `BQ_STORAGE_PROJECT` environment variable to set to override the
 default target project for datasets at the function level. The default behavior is to 
 infer the project from Application Default Credential (the project in
 which the Cloud Function is running, or the ADC configured in Google Cloud SDK
@@ -234,6 +234,11 @@ at any parent folders `_config` prefix. This allows you dictate
 "for this table any new batch should `WRITE_TRUNCATE` it's parent partition/table"
 or "for that table any new batch should `WRITE_APPEND` to it's parent partition/table".
 
+## Controlling BigQuery Compute Project
+By default BigQuery jobs will be submitted in the project where the Cloud Function
+is deployed. To submit jobs in another BigQuery project set the `BQ_PROJECT`
+environment variable.
+
 ## Monitoring
 Monitoring what data has been loaded by this solution should be done with the
 BigQuery [`INFORMATION_SCHEMA` jobs metadata](https://cloud.google.com/bigquery/docs/information-schema-jobs)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
index 02fea45ff..20c023825 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/README.md
@@ -32,7 +32,8 @@ following default behavior.
 | `DESTINATION_REGEX`   | A [Python Regex with named capturing groups](https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups) for `dataset`, `table`, (optional: `partition` or `yyyy`, `mm`, `dd`, `hh`, `batch`) | (see below)|
 | `MAX_BATCH_BYTES`     | Max bytes for BigQuery Load job      | `15000000000000` ([15 TB](https://cloud.google.com/bigquery/quotas#load_jobs)|
 | `JOB_PREFIX`          | Prefix for BigQuery Job IDs          | `gcf-ingest-` |
-| `BQ_PROJECT`          | Default BQ project to use if not specified in dataset capturing group | Project where Cloud Function is deployed |
+| `BQ_PROJECT`          | Default BQ project to use to submit load / query jobs | Project where Cloud Function is deployed |
+| `BQ_STORAGE_PROJECT`          | Default BQ project to use for target table references if not specified in dataset capturing group | Project where Cloud Function is deployed |
 | `FUNCTION_TIMEOUT_SEC`| Number of seconds set for this deployment of Cloud Function (no longer part of python38 runtime) | 60 |
 | `FAIL_ON_ZERO_DML_ROWS_AFFECTED` | Treat External Queries that result in `numDmlAffectedRows = 0` as failures | True | 
 | `ORDER_PER_TABLE`\*   | Force jobs to be executed sequentially (rather than parallel) based on the backlog. This is the same as having an `ORDERME` file in every config directory | `False` | 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index c6362755a..a53e83d1a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -349,7 +349,7 @@ def _get_clients_if_none(
     harmless if these clients are recreated in the Cloud Function.
     """
     print("instantiating missing clients in backlog subscriber this should only"
-          "happen during integration tests.")
+          " happen during integration tests.")
     if not gcs_client:
         gcs_client = storage.Client(client_info=constants.CLIENT_INFO)
     if not bq_client:
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index fdf078673..f7e6365cf 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -80,10 +80,7 @@ def external_query(  # pylint: disable=too-many-arguments
 
     job: bigquery.QueryJob = bq_client.query(rendered_query,
                                              job_config=job_config,
-                                             job_id=job_id,
-                                             project=os.getenv(
-                                                 "BQ_PROJECT",
-                                                 bq_client.project))
+                                             job_id=job_id)
 
     print(f"started asynchronous query job: {job.job_id}")
 
@@ -653,11 +650,11 @@ def gcs_path_to_table_ref_and_batch(
 
         dest_table_ref = bigquery.TableReference.from_string(
             f"{dataset}.{table}{partition}",
-            default_project=os.getenv("BQ_PROJECT", default_project))
+            default_project=os.getenv("BQ_STORAGE_PROJECT", default_project))
     else:
         dest_table_ref = bigquery.TableReference.from_string(
             f"{dataset}.{table}",
-            default_project=os.getenv("BQ_PROJECT", default_project))
+            default_project=os.getenv("BQ_STORAGE_PROJECT", default_project))
     return dest_table_ref, batch_id
 
 

From 9e8e52f7650a682d47cec160999cf84b06582bea Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Fri, 22 Jan 2021 12:19:30 -0800
Subject: [PATCH 78/90] fix: don't require escaping braces in sql, still
 support {dest_dataset} {dest_table} rendering

---
 .../gcs_ocn_bq_ingest/common/utils.py                  | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index f7e6365cf..5865cb5c2 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -74,9 +74,13 @@ def external_query(  # pylint: disable=too-many-arguments
     # drop partition decorator if present.
     table_id = dest_table_ref.table_id.split("$")[0]
 
-    rendered_query = query.format(
-        dest_dataset=f"`{dest_table_ref.project}`.{dest_table_ref.dataset_id}",
-        dest_table=table_id)
+    # similar syntax to str.format but doesn't require escaping braces
+    # elsewhere in query (e.g. in a regex)
+    rendered_query = query\
+        .replace(
+            "{dest_dataset}",
+            f"`{dest_table_ref.project}`.{dest_table_ref.dataset_id}")\
+        .replace("{dest_table}", table_id)
 
     job: bigquery.QueryJob = bq_client.query(rendered_query,
                                              job_config=job_config,

From 854aa68aeefa9969a1aad5a506142279e7413716 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Fri, 22 Jan 2021 15:10:14 -0800
Subject: [PATCH 79/90] happy newyear! copyright 2020 -> 2021

---
 tools/cloud_functions/gcs_event_based_ingest/__init__.py        | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/backfill.py        | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml    | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/e2e/__init__.py    | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py    | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py    | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf        | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/__init__.py        | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/__init__.py | 2 +-
 .../gcs_ocn_bq_ingest/common/constants.py                       | 2 +-
 .../gcs_ocn_bq_ingest/common/exceptions.py                      | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py    | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py            | 2 +-
 .../gcs_event_based_ingest/scripts/install_terraform.sh         | 2 +-
 .../terraform_module/gcs_ocn_bq_ingest_function/main.tf         | 2 +-
 .../terraform_module/gcs_ocn_bq_ingest_function/outputs.tf      | 2 +-
 .../terraform_module/gcs_ocn_bq_ingest_function/variables.tf    | 2 +-
 .../terraform_module/gcs_ocn_bq_ingest_function/versions.tf     | 2 +-
 .../gcs_event_based_ingest/tests/cli/test_backfill.py           | 2 +-
 tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py  | 2 +-
 .../tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py           | 2 +-
 .../tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py        | 2 +-
 .../tests/gcs_ocn_bq_ingest/test_ordering_it.py                 | 2 +-
 24 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/__init__.py b/tools/cloud_functions/gcs_event_based_ingest/__init__.py
index 7a3efb203..42ed0a407 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/__init__.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/backfill.py b/tools/cloud_functions/gcs_event_based_ingest/backfill.py
index f0a2ce415..105397553 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/backfill.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/backfill.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
index 32d39e742..0ae2de0ae 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
+++ b/tools/cloud_functions/gcs_event_based_ingest/cloudbuild.yaml
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/__init__.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/__init__.py
index 7a3efb203..42ed0a407 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/__init__.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index b8f12a14c..2aa9684e1 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
index b8542631c..8ffa44c2f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/e2e_test.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
index af45d7eed..64e3973d3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/main.tf
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/__init__.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/__init__.py
index 7a3efb203..42ed0a407 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/__init__.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/__init__.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/__init__.py
index 7a3efb203..42ed0a407 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/__init__.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index 90689c1ae..27e104586 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
index 8ab701e8d..7f7b0e04b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index a53e83d1a..95fb99195 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 5865cb5c2..44b8367ee 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 0141ed5b4..5b536ff25 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC.
+# Copyright 2021 Google LLC.
 # This software is provided as-is, without warranty or representation
 # for any use or purpose.
 # Your use of it is subject to your agreement with Google.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh b/tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh
index 70f9cb521..4c1cd6f50 100755
--- a/tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh
+++ b/tools/cloud_functions/gcs_event_based_ingest/scripts/install_terraform.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Copyright 2020 Google Inc.
+# Copyright 2021 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index 6094881c3..9899db2d1 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
index 69d8017ab..5ad0d2b9b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/outputs.tf
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
index 1783034f5..78b1a1991 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/variables.tf
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf
index e4234775c..3085198f2 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/versions.tf
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Google LLC
+ * Copyright 2021 Google LLC
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/cli/test_backfill.py b/tools/cloud_functions/gcs_event_based_ingest/tests/cli/test_backfill.py
index 5e9c20cb1..ac3419706 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/cli/test_backfill.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/cli/test_backfill.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index 5dc4c7fa1..f1400ffc4 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
index 877ac0104..be36a397e 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest.py
@@ -1,5 +1,5 @@
 # dataset/table/_SUCCESS
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
index 81709a5b2..02dbeb318 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_gcs_ocn_bq_ingest_it.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index 9ecf236bc..7fe82d200 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 09daa9dd21219814acdb2a0c8202e7310322f051 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Thu, 18 Feb 2021 12:27:55 -0800
Subject: [PATCH 80/90] clean up newlines in logs / error messages

---
 .../gcs_ocn_bq_ingest/common/ordering.py         |  8 ++++----
 .../gcs_ocn_bq_ingest/common/utils.py            | 16 ++++++++--------
 .../gcs_ocn_bq_ingest/main.py                    |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 95fb99195..83027589d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -98,8 +98,8 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
                 print(f"sleeping for {polling_timeout} seconds because"
                       f"found manual lock gs://{bkt.name}/{lock_blob.name} with"
                       "This will be an infinite loop until the manual lock is "
-                      "released.\n"
-                      f"manual lock contents:\n {lock_contents}. ")
+                      "released. "
+                      f"manual lock contents: {lock_contents}. ")
                 time.sleep(polling_timeout)
                 continue
         else:  # this condition handles absence of _bqlock file
@@ -156,8 +156,8 @@ def wait_on_last_job(bq_client: bigquery.Client, lock_blob: storage.Blob,
             f"gs://{backfill_blob.bucket.name}/{table_prefix}"
             "/_BACKFILL "
             f"to resume the backfill subscriber so it can "
-            "continue with the next item in the backlog.\n"
-            "Original Exception:\n"
+            "continue with the next item in the backlog."
+            "Original Exception:"
             f"{traceback.format_exc()}") from err
 
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 44b8367ee..cf8676f43 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -59,7 +59,7 @@ def external_query(  # pylint: disable=too-many-arguments
         external_table_def = json.loads(external_table_config)
     else:
         print(f" {gsurl}_config/external.json not found in parents of {gsurl}. "
-              "Falling back to default PARQUET external table:\n"
+              "Falling back to default PARQUET external table: "
               f"{json.dumps(constants.DEFAULT_EXTERNAL_TABLE_DEFINITION)}")
         external_table_def = constants.DEFAULT_EXTERNAL_TABLE_DEFINITION
 
@@ -301,12 +301,12 @@ def parse_notification(notification: dict) -> Tuple[str, str]:
                 f"attributes: 'bucketId' and 'objectId': {notification}"
             ) from KeyError
     raise exceptions.UnexpectedTriggerException(
-        "Cloud Function received unexpected trigger:\n"
-        f"{notification}\n"
+        "Cloud Function received unexpected trigger: "
+        f"{notification} "
         "This function only supports direct Cloud Functions "
         "Background Triggers or Pub/Sub storage notificaitons "
-        "as described in the following links:\n"
-        "https://cloud.google.com/storage/docs/pubsub-notifications\n"
+        "as described in the following links: "
+        "https://cloud.google.com/storage/docs/pubsub-notifications "
         "https://cloud.google.com/functions/docs/tutorials/storage")
 
 
@@ -545,7 +545,7 @@ def check_for_bq_job_and_children_errors(bq_client: bigquery.Client,
     if job.errors:
         raise exceptions.BigQueryJobFailure(
             f"BigQuery Job {job.job_id} failed during backfill with the "
-            f"following errors: {job.errors}\n"
+            f"following errors: {job.errors} "
             f"{pprint.pformat(job.to_api_repr())}")
     if isinstance(job, bigquery.QueryJob):
         if (constants.FAIL_ON_ZERO_DML_ROWS_AFFECTED
@@ -553,7 +553,7 @@ def check_for_bq_job_and_children_errors(bq_client: bigquery.Client,
                 and job.num_dml_affected_rows < 1):
             raise exceptions.BigQueryJobFailure(
                 f"query job {job.job_id} ran successfully but did not "
-                f"affect any rows.\n {pprint.pformat(job.to_api_repr())}")
+                f"affect any rows.  {pprint.pformat(job.to_api_repr())}")
         for child_job in bq_client.list_jobs(parent_job=job):
             check_for_bq_job_and_children_errors(bq_client, child_job)
 
@@ -744,7 +744,7 @@ def apply(
 
     if external_query_sql:
         print("EXTERNAL QUERY")
-        print(f"found external query:\n{external_query_sql}")
+        print(f"found external query: {external_query_sql}")
         external_query(gcs_client, bq_client, gsurl, external_query_sql,
                        dest_table_ref, job_id)
         return
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
index 5b536ff25..bf2ccebad 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/main.py
@@ -87,7 +87,7 @@ def main(event: Dict, context):  # pylint: disable=unused-argument
 
     # Unexpected exceptions will actually raise which may cause a cold restart.
     except exceptions.DuplicateNotificationException:
-        print("recieved duplicate notification. this was handled gracefully.\n "
+        print("recieved duplicate notification. this was handled gracefully.  "
               f"{traceback.format_exc()}")
 
     except exceptions.EXCEPTIONS_TO_REPORT as original_error:
@@ -146,7 +146,7 @@ def triage_event(gcs_client: Optional[storage.Client],
                     f"{constants.BACKFILL_FILENAME}"):
                 raise RuntimeError(
                     f"recieved notification for gs://{event_blob.bucket.name}/"
-                    f"{event_blob.name}\n"
+                    f"{event_blob.name} "
                     f"{constants.BACKFILL_FILENAME} files "
                     "are expected only at the table prefix level.")
             ordering.backlog_subscriber(gcs_client, bq_client, event_blob,

From 8821dc090f29878b5797540fc2a90496c67906f9 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Tue, 2 Mar 2021 10:32:33 -0800
Subject: [PATCH 81/90] improve logging in bq failures

improved error logs and tests with object versioning

fixup: logging spaces

fixup ci dickerfile workdir

REVERT ME THIS INTENTIONALLY BREAKS E2E TEST

Revert "REVERT ME THIS INTENTIONALLY BREAKS E2E TEST"

This reverts commit eeffc0fd47184c96d29fc0d8ef07662185076962.

fixup catch client errors during apply

Revert "Revert "REVERT ME THIS INTENTIONALLY BREAKS E2E TEST""

This reverts commit 5824cf372c08ff3432af6ddb1e94852dd78d0853.

simpler one line message exceptions

Revert "Revert "Revert "REVERT ME THIS INTENTIONALLY BREAKS E2E TEST"""

This reverts commit ad3e1da9bc5436b3ac6540da8b7886d377e11b58.
---
 .../gcs_event_based_ingest/Dockerfile.ci      |  4 ++-
 .../gcs_event_based_ingest/e2e/conftest.py    | 23 +++++++++----
 .../gcs_ocn_bq_ingest/common/exceptions.py    | 17 +++++++---
 .../gcs_ocn_bq_ingest/common/ordering.py      |  2 +-
 .../gcs_ocn_bq_ingest/common/utils.py         | 33 ++++++++++++++-----
 .../gcs_ocn_bq_ingest_function/main.tf        |  2 +-
 .../gcs_event_based_ingest/tests/conftest.py  |  6 ++++
 7 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
index d383e7563..2c656ef94 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
+++ b/tools/cloud_functions/gcs_event_based_ingest/Dockerfile.ci
@@ -8,8 +8,10 @@ RUN apt-get update \
         unzip \
     && apt-get autoremove -yqq --purge \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
+WORKDIR /ci
 COPY requirements.txt requirements-dev.txt ./
 COPY scripts/install_terraform.sh ./
 RUN ./install_terraform.sh
 RUN pip3 install --no-cache-dir -r requirements-dev.txt
-ENTRYPOINT ["python3 -m pytest"]
+ENTRYPOINT ["python3", "-m", "pytest"]
+
diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index 2aa9684e1..c0f91da4a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -46,13 +46,24 @@ def gcs() -> storage.Client:
 @pytest.fixture(scope='module')
 def terraform_infra(request):
 
+    def _escape(in_str):
+        if in_str is not None:
+            return ANSI_ESCAPE_PATTERN.sub('', in_str.decode('UTF-8'))
+        return None
+
     def _run(cmd):
-        print(
-            ANSI_ESCAPE_PATTERN.sub(
-                '',
-                subprocess.check_output(cmd,
-                                        stderr=subprocess.STDOUT,
-                                        cwd=TEST_DIR).decode('UTF-8')))
+        result = subprocess.run(cmd,
+                                stdout=subprocess.PIPE,
+                                stderr=subprocess.STDOUT,
+                                cwd=TEST_DIR)
+        print(_escape(result.stdout))
+        if result.returncode == 0:
+            return
+        raise subprocess.CalledProcessError(
+            returncode=result.returncode,
+            cmd=result.args,
+            output=_escape(result.stdout),
+            stderr=_escape(result.stderr))
 
     init = shlex.split("terraform init")
     apply = shlex.split("terraform apply -auto-approve")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
index 7f7b0e04b..7a921696f 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
@@ -17,28 +17,35 @@
 """Custom Exceptions of GCS event based ingest to BigQuery"""
 
 
-class DuplicateNotificationException(Exception):
+class OneLineException(Exception):
+    """base class for exceptions whose messages will be displayed on a single
+    line for better readability in Cloud Function Logs"""
+    def __init__(self, msg):
+        super().__init__(msg.replace('\n', ' ').replace('\r', ''))
+
+
+class DuplicateNotificationException(OneLineException):
     """Exception to indicate that the function was triggered twice for the same
     event."""
 
 
-class BigQueryJobFailure(Exception):
+class BigQueryJobFailure(OneLineException):
     """Exception to indicate that the function was triggered twice for the same
     event."""
 
 
-class DestinationRegexMatchException(Exception):
+class DestinationRegexMatchException(OneLineException):
     """Exception to indicate that a success file did not match the destination
     regex specified in the DESTINATION_REGEX environment variable (or the
     default)"""
 
 
-class UnexpectedTriggerException(Exception):
+class UnexpectedTriggerException(OneLineException):
     """Exception to indicate the cloud function was triggered with an unexpected
     payload."""
 
 
-class BacklogException(Exception):
+class BacklogException(OneLineException):
     """Exception to indicate an issue with the backlog mechanics of this
     function."""
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index 83027589d..e30684eae 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -157,7 +157,7 @@ def wait_on_last_job(bq_client: bigquery.Client, lock_blob: storage.Blob,
             "/_BACKFILL "
             f"to resume the backfill subscriber so it can "
             "continue with the next item in the backlog."
-            "Original Exception:"
+            "Original Exception: "
             f"{traceback.format_exc()}") from err
 
 
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index cf8676f43..580f933de 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -24,7 +24,9 @@
 import os
 import pathlib
 import pprint
+import sys
 import time
+import traceback
 import uuid
 from typing import Any, Deque, Dict, List, Optional, Tuple, Union
 
@@ -716,6 +718,7 @@ def apply(
     lock_blob: Optional[storage.Blob],
     job_id: str,
 ):
+    # pylint: disable=too-many-locals
     """
     Apply an incremental batch to the target BigQuery table via an asynchronous
     load job or external query.
@@ -741,14 +744,28 @@ def apply(
     )
     external_query_sql = look_for_config_in_parents(
         gcs_client, f"gs://{bkt.name}/{success_blob.name}", '*.sql')
+    try:
+
+        if external_query_sql:
+            print("EXTERNAL QUERY")
+            print(f"found external query: {external_query_sql}")
+            external_query(gcs_client, bq_client, gsurl, external_query_sql,
+                           dest_table_ref, job_id)
+            return
 
-    if external_query_sql:
-        print("EXTERNAL QUERY")
-        print(f"found external query: {external_query_sql}")
-        external_query(gcs_client, bq_client, gsurl, external_query_sql,
-                       dest_table_ref, job_id)
+        print("LOAD_JOB")
+        load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id)
         return
 
-    print("LOAD_JOB")
-    load_batches(gcs_client, bq_client, gsurl, dest_table_ref, job_id)
-    return
+    except (google.api_core.exceptions.GoogleAPIError,
+            google.api_core.exceptions.ClientError) as err:
+        etype, value, err_tb = sys.exc_info()
+        msg = (f"failed to submit job {job_id} for {gsurl}: "
+               f"{etype.__name__}: {value}")
+        blob = storage.Blob.from_string(gsurl)
+        table_prefix = get_table_prefix(blob.name)
+        bqlock = storage.Blob.from_string(
+            f"gs://{blob.bucket.name}/{table_prefix}/_bqlock")
+        # Write this error message to avoid confusion.
+        handle_bq_lock(gcs_client, bqlock, msg)
+        raise exceptions.BigQueryJobFailure(msg) from err
diff --git a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
index 9899db2d1..6651c645b 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
+++ b/tools/cloud_functions/gcs_event_based_ingest/terraform_module/gcs_ocn_bq_ingest_function/main.tf
@@ -103,7 +103,7 @@ resource "google_project_iam_binding" "ingester_bq_admin" {
   for_each = toset(concat(var.bigquery_project_ids, [var.project_id]))
   project  = each.key
   members  = [module.data_ingester_service_account.iam_email]
-  role     = "roles/bigquery.dataEditor"
+  role     = "roles/bigquery.admin"
 }
 
 # Allow the GCS service account to publish notification for new objects to the
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index f1400ffc4..fea69c20d 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -52,6 +52,8 @@ def error() -> error_reporting.Client:
 def gcs_bucket(request, gcs) -> storage.bucket.Bucket:
     """GCS bucket for test artifacts"""
     bucket = gcs.create_bucket(str(uuid.uuid4()))
+    bucket.versioning_enabled = True
+    bucket.patch()
     # overide default field delimiter at bucket level
     load_config_json = {
         "fieldDelimiter": "|",
@@ -61,6 +63,10 @@ def gcs_bucket(request, gcs) -> storage.bucket.Bucket:
 
     def teardown():
         load_json_blob.delete()
+        bucket.versioning_enabled = False
+        bucket.patch()
+        for obj in gcs.list_blobs(bucket_or_name=bucket, versions=True):
+            obj.delete()
         bucket.delete(force=True)
 
     request.addfinalizer(teardown)

From 33ae3291b5e81b79c49c296d093dc7b5eb6c3ba3 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 3 Mar 2021 14:54:04 -0800
Subject: [PATCH 82/90] fixup flake8

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 580f933de..89b151b39 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -26,7 +26,6 @@
 import pprint
 import sys
 import time
-import traceback
 import uuid
 from typing import Any, Deque, Dict, List, Optional, Tuple, Union
 

From 57443e0f559f251f7b8f7dd69946fff1811ee23a Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 3 Mar 2021 15:00:50 -0800
Subject: [PATCH 83/90] fixup mypy

---
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 89b151b39..bbd8d88c3 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -760,7 +760,7 @@ def apply(
             google.api_core.exceptions.ClientError) as err:
         etype, value, err_tb = sys.exc_info()
         msg = (f"failed to submit job {job_id} for {gsurl}: "
-               f"{etype.__name__}: {value}")
+               f"{etype.__class__.__name__}: {value}")
         blob = storage.Blob.from_string(gsurl)
         table_prefix = get_table_prefix(blob.name)
         bqlock = storage.Blob.from_string(

From 97f48a73fbb484203b797a6dec613f6e7288122f Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 3 Mar 2021 15:04:44 -0800
Subject: [PATCH 84/90] fixup pylint

---
 tools/cloud_functions/gcs_event_based_ingest/README.md        | 4 ++++
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/README.md b/tools/cloud_functions/gcs_event_based_ingest/README.md
index 372590064..4c0cac057 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/README.md
+++ b/tools/cloud_functions/gcs_event_based_ingest/README.md
@@ -329,6 +329,10 @@ It's better for us to make a conscious decision to adopt new features or adjust
 CI configs or pin older version depending on the type for failure.
 This CI should be run on all new PRs and nightly.
 
+Note, all functionality of the cloud function (including ordering) is
+integration tested against buckets with object versioning enabled to ensure this
+solution works for buckets using this feature.
+
 ### Just Running the Tests
 #### Running in Docker
 ```bash
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index bbd8d88c3..1c5ad6642 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -758,7 +758,7 @@ def apply(
 
     except (google.api_core.exceptions.GoogleAPIError,
             google.api_core.exceptions.ClientError) as err:
-        etype, value, err_tb = sys.exc_info()
+        etype, value, _ = sys.exc_info()
         msg = (f"failed to submit job {job_id} for {gsurl}: "
                f"{etype.__class__.__name__}: {value}")
         blob = storage.Blob.from_string(gsurl)

From 7293f32c0e2444f2d51e4c8f3e59d2f8460c2900 Mon Sep 17 00:00:00 2001
From: Jacob Ferriero <jferriero@google.com>
Date: Wed, 3 Mar 2021 15:40:36 -0800
Subject: [PATCH 85/90] fixup BigQueryJobFailure docstring

---
 .../gcs_event_based_ingest/e2e/conftest.py               | 9 ++++-----
 .../gcs_ocn_bq_ingest/common/exceptions.py               | 7 +++++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
index c0f91da4a..7f3c73205 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/e2e/conftest.py
@@ -59,11 +59,10 @@ def _run(cmd):
         print(_escape(result.stdout))
         if result.returncode == 0:
             return
-        raise subprocess.CalledProcessError(
-            returncode=result.returncode,
-            cmd=result.args,
-            output=_escape(result.stdout),
-            stderr=_escape(result.stderr))
+        raise subprocess.CalledProcessError(returncode=result.returncode,
+                                            cmd=result.args,
+                                            output=_escape(result.stdout),
+                                            stderr=_escape(result.stderr))
 
     init = shlex.split("terraform init")
     apply = shlex.split("terraform apply -auto-approve")
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
index 7a921696f..a9eb9bab5 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/exceptions.py
@@ -20,6 +20,7 @@
 class OneLineException(Exception):
     """base class for exceptions whose messages will be displayed on a single
     line for better readability in Cloud Function Logs"""
+
     def __init__(self, msg):
         super().__init__(msg.replace('\n', ' ').replace('\r', ''))
 
@@ -30,8 +31,10 @@ class DuplicateNotificationException(OneLineException):
 
 
 class BigQueryJobFailure(OneLineException):
-    """Exception to indicate that the function was triggered twice for the same
-    event."""
+    """Exception to indicate that there was an issue with a BigQuery job. This
+    might include client errors (e.g. bad request which can happen if a _SUCCESS
+    file is dropped but there are not data files at the GCS prefix) or server
+    side errors like a job that fails to execute successfully."""
 
 
 class DestinationRegexMatchException(OneLineException):

From 91dd8af64d5e6ac6db736434dc5f45eb38f6719c Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan@ottershome.com>
Date: Tue, 23 Mar 2021 13:51:41 -0400
Subject: [PATCH 86/90] FEATURE: Snapshot the table once a chunk has
 successfully loaded

---
 .../gcs_ocn_bq_ingest/common/constants.py     |  2 +
 .../gcs_ocn_bq_ingest/common/ordering.py      |  6 +++
 .../gcs_ocn_bq_ingest/common/utils.py         | 40 +++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index 27e104586..58d85ff65 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -137,3 +137,5 @@
 
 # https://cloud.google.com/bigquery/docs/running-jobs#generate-jobid
 NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
+
+SNAPSHOTTING = True
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index e30684eae..e2b5287a9 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -94,6 +94,12 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
                 last_job_done = wait_on_last_job(bq_client, lock_blob,
                                                  backfill_blob, lock_contents,
                                                  polling_timeout)
+                # if last_job_done = True, this means that a job just completed
+                # We need to check if SNAPSHOTTING is enabled
+                if last_job_done and constants.SNAPSHOTTING:
+                    print("Snapshotting is enabled. Taking a snapshot")
+                    utils.take_table_snapshot(bq_client, job_id=lock_contents,
+                                              lock_blob_name=lock_blob.name)
             else:
                 print(f"sleeping for {polling_timeout} seconds because"
                       f"found manual lock gs://{bkt.name}/{lock_blob.name} with"
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 1c5ad6642..7ffd8a0aa 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -768,3 +768,43 @@ def apply(
         # Write this error message to avoid confusion.
         handle_bq_lock(gcs_client, bqlock, msg)
         raise exceptions.BigQueryJobFailure(msg) from err
+
+
+def take_table_snapshot(
+        bq_client: bigquery.Client,
+        job_id: str,
+        lock_blob_name: str):
+    """
+    Take a snapshot of the table.
+    We are creating the snapshot in a dataset named <source_dataset>_snapshot
+
+    Args:
+        bq_client: bigquery.Client
+        job_id: str
+        lock_blob_name: str the name of the lock_blob
+    """
+    # Create the job
+    # NOTE: This feature is in ALPHA. We have to access the _properties
+    job_config = bigquery.CopyJobConfig()
+    job_config._properties["copy"]["operationType"] = "SNAPSHOT"  # pylint: disable=W0212
+    print(f"take_table_snapshot: extracting dataset and table name from {lock_blob_name}")
+    destination_match = constants.DESTINATION_REGEX.match(lock_blob_name)
+    if not destination_match:
+        raise RuntimeError(f"Lock Blob Name {lock_blob_name} did not match regex:"
+                           f" {constants.DESTINATION_REGEX.pattern}")
+    destination_details = destination_match.groupdict()
+    try:
+        dataset = destination_details['dataset']
+        table = destination_details['table']
+    except KeyError:
+        raise exceptions.DestinationRegexMatchException(
+            f"Lock Blob Name {lock_blob_name} did not match dataset and table in regex:"
+            f" {constants.DESTINATION_REGEX.pattern}") from KeyError
+
+    source_name = f"{dataset}.{table}"
+    # We need to name the snapshot based off of the job id.
+    snapshot_name = f"{dataset}_snapshot.{job_id}"
+    print(f"Creating snapshot: {snapshot_name}")
+
+    job = bq_client.copy_table(source_name, snapshot_name, job_config=job_config)
+    job.result()  # Wait for the job to complete.

From 29b2412966794e8a1de6b6383490dafb8d7e0218 Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan@ottershome.com>
Date: Wed, 24 Mar 2021 16:09:24 -0400
Subject: [PATCH 87/90] Changing to a copy until the snapshotting feature is
 enabled

---
 .../gcs_ocn_bq_ingest/common/utils.py                       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 7ffd8a0aa..8b5541c00 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -785,8 +785,8 @@ def take_table_snapshot(
     """
     # Create the job
     # NOTE: This feature is in ALPHA. We have to access the _properties
-    job_config = bigquery.CopyJobConfig()
-    job_config._properties["copy"]["operationType"] = "SNAPSHOT"  # pylint: disable=W0212
+    # job_config = bigquery.CopyJobConfig()
+    # job_config._properties["copy"]["operationType"] = "SNAPSHOT"  # pylint: disable=W0212
     print(f"take_table_snapshot: extracting dataset and table name from {lock_blob_name}")
     destination_match = constants.DESTINATION_REGEX.match(lock_blob_name)
     if not destination_match:
@@ -806,5 +806,5 @@ def take_table_snapshot(
     snapshot_name = f"{dataset}_snapshot.{job_id}"
     print(f"Creating snapshot: {snapshot_name}")
 
-    job = bq_client.copy_table(source_name, snapshot_name, job_config=job_config)
+    job = bq_client.copy_table(source_name, snapshot_name)
     job.result()  # Wait for the job to complete.

From db6a98e37853c28c02a5e74eb2730b6708a4598d Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan@ottershome.com>
Date: Wed, 24 Mar 2021 17:34:54 -0400
Subject: [PATCH 88/90] Make the SNAPSHOT_DATSET and ENABLE_SNAPSHOTTING
 constants environment variables

---
 .../gcs_ocn_bq_ingest/common/constants.py                     | 4 +++-
 .../gcs_ocn_bq_ingest/common/ordering.py                      | 2 +-
 .../gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py  | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index 58d85ff65..656173289 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -138,4 +138,6 @@
 # https://cloud.google.com/bigquery/docs/running-jobs#generate-jobid
 NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
 
-SNAPSHOTTING = True
+ENABLE_SNAPSHOTTING = bool(
+    distutils.util.strtobool(os.getenv("ENABLE_SNAPSHOTTING", "False")))
+SNAPSHOT_DATASET = str(os.getenv("SNAPSHOT_DATASET", 'snapshots'))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
index e2b5287a9..28477e201 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/ordering.py
@@ -96,7 +96,7 @@ def backlog_subscriber(gcs_client: Optional[storage.Client],
                                                  polling_timeout)
                 # if last_job_done = True, this means that a job just completed
                 # We need to check if SNAPSHOTTING is enabled
-                if last_job_done and constants.SNAPSHOTTING:
+                if last_job_done and constants.ENABLE_SNAPSHOTTING:
                     print("Snapshotting is enabled. Taking a snapshot")
                     utils.take_table_snapshot(bq_client, job_id=lock_contents,
                                               lock_blob_name=lock_blob.name)
diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
index 8b5541c00..32e40cec2 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/utils.py
@@ -803,7 +803,7 @@ def take_table_snapshot(
 
     source_name = f"{dataset}.{table}"
     # We need to name the snapshot based off of the job id.
-    snapshot_name = f"{dataset}_snapshot.{job_id}"
+    snapshot_name = f"{constants.SNAPSHOT_DATASET}.{job_id}"
     print(f"Creating snapshot: {snapshot_name}")
 
     job = bq_client.copy_table(source_name, snapshot_name)

From 145c2af39c2b74dd8ee882d961252e958bf29a1f Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan@ottershome.com>
Date: Wed, 24 Mar 2021 19:15:35 -0400
Subject: [PATCH 89/90] force enable snapshotting

---
 tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index fea69c20d..c7e85f213 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -88,6 +88,7 @@ def mock_env(gcs, monkeypatch):
 def ordered_mock_env(mock_env, monkeypatch):
     """environment variable mocks"""
     monkeypatch.setenv("ORDER_PER_TABLE", "TRUE")
+    monkeypatch.setenv("ENABLE_SNAPSHOTTING", "TRUE")
 
 
 @pytest.fixture

From 5f69020ff65a1628fb32891a4d1fe0233f987a1f Mon Sep 17 00:00:00 2001
From: Ryan den Otter <ryan@ottershome.com>
Date: Thu, 25 Mar 2021 15:25:12 -0400
Subject: [PATCH 90/90] setting the snapshotting as enabled and included in
 tests by default

---
 .../gcs_ocn_bq_ingest/common/constants.py     |  4 +-
 .../gcs_event_based_ingest/tests/conftest.py  | 20 ++++++++
 .../gcs_ocn_bq_ingest/test_ordering_it.py     | 48 +++++++++++++++++++
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
index 656173289..00cabfda1 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/gcs_ocn_bq_ingest/common/constants.py
@@ -139,5 +139,5 @@
 NON_BQ_JOB_ID_REGEX = re.compile(r'[^0-9a-zA-Z_\-]+')
 
 ENABLE_SNAPSHOTTING = bool(
-    distutils.util.strtobool(os.getenv("ENABLE_SNAPSHOTTING", "False")))
-SNAPSHOT_DATASET = str(os.getenv("SNAPSHOT_DATASET", 'snapshots'))
+    distutils.util.strtobool(os.getenv("ENABLE_SNAPSHOTTING", "FALSE")))
+SNAPSHOT_DATASET = str(os.getenv("SNAPSHOT_DATASET", "snapshots"))
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
index c7e85f213..2bfb910e7 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/conftest.py
@@ -88,6 +88,11 @@ def mock_env(gcs, monkeypatch):
 def ordered_mock_env(mock_env, monkeypatch):
     """environment variable mocks"""
     monkeypatch.setenv("ORDER_PER_TABLE", "TRUE")
+
+
+@pytest.fixture
+def snapshotting_env(mock_env, monkeypatch):
+    """environment variable mocks"""
     monkeypatch.setenv("ENABLE_SNAPSHOTTING", "TRUE")
 
 
@@ -110,6 +115,21 @@ def teardown():
     return dataset
 
 
+@pytest.fixture
+def snapshot_dataset(request, bq, mock_env):
+    dataset = bigquery.Dataset(f"{os.getenv('GCP_PROJECT')}"
+                               f".{os.getenv('SNAPSHOT_DATASET')}")
+    dataset.location = "US"
+    bq.create_dataset(dataset)
+    print(f"created dataset {dataset.dataset_id}")
+
+    def teardown():
+        bq.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
+
+    request.addfinalizer(teardown)
+    return dataset
+
+
 @pytest.fixture
 def dest_table(request, bq, mock_env, dest_dataset) -> bigquery.Table:
     with open(os.path.join(TEST_DIR, "resources",
diff --git a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
index 7fe82d200..55ff73b8a 100644
--- a/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
+++ b/tools/cloud_functions/gcs_event_based_ingest/tests/gcs_ocn_bq_ingest/test_ordering_it.py
@@ -175,6 +175,54 @@ def test_backlog_subscriber_in_order_with_new_batch_after_exit(
     assert num_rows == expected_num_rows
 
 
+@pytest.mark.IT
+@pytest.mark.ORDERING
+def test_snapshotting(
+        bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table,
+        gcs_ordered_update_data, gcs_external_update_config, gcs_backlog,
+        snapshotting_env, snapshot_dataset):
+    """Test basic functionality of backlog subscriber.
+    Populate a backlog with 3 files that make updates where we can assert
+    that these jobs were applied in order.
+
+    To ensure that the subscriber cleans up properly after itself before exit,
+    we will drop a 4th batch after the subscriber has exited and assert that it
+    gets applied as expected.
+    """
+    _run_subscriber(gcs, bq, gcs_external_update_config)
+    table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix(
+        gcs_external_update_config.name)
+    backlog_blobs = gcs_bucket.list_blobs(prefix=f"{table_prefix}/_backlog/")
+    assert backlog_blobs.num_results == 0, "backlog is not empty"
+    bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock")
+    assert not bqlock_blob.exists(), "_bqlock was not cleaned up"
+    rows = bq.query("SELECT alpha_update FROM "
+                    f"{dest_ordered_update_table.dataset_id}"
+                    f".{dest_ordered_update_table.table_id}")
+    expected_num_rows = 1
+    num_rows = 0
+    for row in rows:
+        num_rows += 1
+        assert row["alpha_update"] == "ABC", "backlog not applied in order"
+    assert num_rows == expected_num_rows
+
+    # Now we will test what happens when the publisher posts another batch after
+    # the backlog subscriber has exited.
+    backfill_blob = _post_a_new_batch(gcs_bucket, dest_dataset,
+                                      dest_ordered_update_table)
+    _run_subscriber(gcs, bq, backfill_blob)
+
+    rows = bq.query("SELECT alpha_update FROM "
+                    f"{dest_ordered_update_table.dataset_id}"
+                    f".{dest_ordered_update_table.table_id}")
+    expected_num_rows = 1
+    num_rows = 0
+    for row in rows:
+        num_rows += 1
+        assert row["alpha_update"] == "ABCD", "new incremental not applied"
+    assert num_rows == expected_num_rows
+
+
 @pytest.mark.IT
 @pytest.mark.ORDERING
 @pytest.mark.repeat(NUM_TRIES_SUBSCRIBER_TESTS)