From 160fd7adb014e2a61440207d46c14daaffec078e Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Thu, 4 Dec 2025 15:30:34 -0800 Subject: [PATCH 1/6] update dbt_api test script to use env vars --- training_and_demos/dbt-api/dbt_api_files.py | 50 ++++++++++----------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/training_and_demos/dbt-api/dbt_api_files.py b/training_and_demos/dbt-api/dbt_api_files.py index ace17e8c..005ce536 100755 --- a/training_and_demos/dbt-api/dbt_api_files.py +++ b/training_and_demos/dbt-api/dbt_api_files.py @@ -18,6 +18,7 @@ load_dotenv() base_url = os.getenv("DATACOVES__API_ENDPOINT") token = os.getenv("DATACOVES__API_TOKEN") +account_id = os.getenv("DATACOVES__ACCOUNT_ID") project_slug = os.getenv("DATACOVES__PROJECT_SLUG") environment_slug = os.getenv("DATACOVES__ENVIRONMENT_SLUG") dbt_home = os.getenv("DATACOVES__DBT_HOME") @@ -363,62 +364,57 @@ def delete_project_file(account_id: int, project_slug: str, filename: str): # health_check() - # get_account(1) + # get_account(account_id) - # get_projects(1) - # get_projects(1,"balboa-analytics-datacoves") + # get_projects(account_id) + # get_projects(account_id, project_slug) - # get_environments(1, "balboa-analytics-datacoves") - # get_environments(1, "balboa-analytics-datacoves", "zpg497") + # get_environments(account_id, project_slug) + # get_environments(account_id, project_slug, environment_slug) # Work with files cols = ["environment_slug",'filename', 'metadata', 'inserted_at'] - # files = list_project_files(1, "balboa-analytics-datacoves") - # print_table(files, cols) + # project_files = list_project_files(account_id, project_slug) + # print_table(project_files, cols) - # files = list_environment_files(1, "balboa-analytics-datacoves", "zpg497") - # print_table(files, cols) - # print(files) + # environment_files = list_environment_files(account_id, project_slug, environment_slug) + # print_table(environment_files, cols) filenames = ["graph.gpickle", "graph_summary.json", "partial_parse.msgpack"] # UPLOAD FILES # for filename in filenames: - # upload_env_file(1, "balboa-analytics-datacoves", "zpg497", filename) + # upload_env_file(account_id, project_slug, environment_slug, filename) - # upload_env_file(1, "balboa-analytics-datacoves", "zpg497", "manifest.json", is_manifest=True ) + # upload_env_file(account_id, project_slug, environment_slug, "manifest.json", is_manifest=True ) # DELETE FILES # for filename in filenames: - # delete_env_file(1, "balboa-analytics-datacoves", "zpg497", filename) + # delete_env_file(account_id, project_slug, environment_slug, filename) - # delete_env_file(1, "balboa-analytics-datacoves", "zpg497", "manifest.json") + # delete_env_file(account_id, project_slug, environment_slug, "manifest.json") # SHOW FILE DETAILS # for filename in filenames: - # show_env_file_details(1, "balboa-analytics-datacoves", "zpg497", filename) + # show_env_file_details(account_id, project_slug, environment_slug, filename) # DOWNLOAD Files # for filename in filenames: - # download_env_file(1, "balboa-analytics-datacoves", "zpg497", filename) + # download_env_file(account_id, project_slug, environment_slug, filename) - download_env_manifest(1, "balboa-analytics-datacoves", "zpg497", trimmed = True) - - # download_project_manifest(1, "balboa-analytics-datacoves", trimmed = True) + # download_env_manifest(account_id, project_slug, environment_slug, trimmed = True) # for filename in filenames: - # promote_env_file(1, "balboa-analytics-datacoves", "zpg497", filename) - # download_project_file(1, "balboa-analytics-datacoves", filename) + # promote_env_file(account_id, project_slug, environment_slug, filename) + # download_project_file(account_id, project_slug, filename) # DELETE FILES # for filename in filenames: - # delete_project_file(1, "balboa-analytics-datacoves", filename) - + # delete_project_file(account_id, project_slug, filename) - # files = list_project_files(1, "balboa-analytics-datacoves") - # print_table(files, cols) - # promote_env_file(1, "balboa-analytics-datacoves", "zpg497", "manifest.json") + # promote_env_file(account_id, project_slug, environment_slug, "manifest.json" ) + # download_project_manifest(account_id, project_slug, trimmed = True) - files = list_project_files(1, "balboa-analytics-datacoves") + files = list_project_files(account_id, project_slug) print_table(files, cols) From 833245d246e3a7f3c4c285572790e811f3f41e6e Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Thu, 4 Dec 2025 15:35:33 -0800 Subject: [PATCH 2/6] clear dbt 1.10 deprecation warnings --- transform/dbt_project.yml | 177 +++++++++--------- .../account_usage/stg_pipe_usage_history.yml | 7 +- .../L1_inlets/country_data/_country_data.yml | 5 +- .../country_data/stg_country_populations.yml | 10 +- .../L1_inlets/country_geo/_country_geo.yml | 5 +- .../_covid19_epidemiological_data.yml | 15 +- .../_google_analytics_4.yml | 22 ++- transform/models/L1_inlets/loans/_loans.yml | 25 ++- .../us_population/stg_us_population.yml | 5 +- .../_usgs__earthquake_data.yml | 5 +- .../L2_bays/covid_observations/base_cases.yml | 5 +- transform/models/groups.yml | 13 +- transform/package-lock.yml | 25 ++- transform/packages.yml | 6 +- transform/seeds/state_codes.yml | 5 +- 15 files changed, 175 insertions(+), 155 deletions(-) diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index 31a90e03..6a70a721 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -25,90 +25,88 @@ analysis-paths: ["analysis"] test-paths: ["tests"] seed-paths: ["seeds"] macro-paths: ["macros"] - -target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - - "target" - - "dbt_packages" + - "target" + - "dbt_packages" # Configuring models # Full documentation: https://docs.getdbt.com/docs/configuring-models # These settings can be overridden in the individual model files seeds: - +schema: SEEDS - +docs: - node_color: "#379965" - +quote_columns: false - +persist_docs: - relation: true - columns: true - +dbt-osmosis: "{model}.yml" + +schema: SEEDS + +docs: + node_color: "#379965" + +quote_columns: false + +persist_docs: + relation: true + columns: true + +dbt-osmosis: "{model}.yml" snapshots: - +enabled: "{{ True if target.database == 'balboa' else False }}" + +enabled: "{{ True if target.database == 'balboa' else False }}" - balboa: - +target_database: raw - +target_schema: SNAPSHOTS - +docs: - node_color: "#0f703d" + balboa: + +target_database: raw + +target_schema: SNAPSHOTS + +docs: + node_color: "#0f703d" models: # elementary: # +schema: "elementary" - +dbt-osmosis: "{model}.yml" - - balboa: - +materialized: view - +schema: Z_SCHEMA_NOT_CONFIGURED - - L1_inlets: - account_usage: - +schema: L1_ACCOUNT_USAGE - country_data: - +schema: L1_COUNTRY_DATA - country_geo: - +schema: L1_COUNTRY_GEO - google_analytics_4: - +schema: L1_GOOGLE_ANALYTICS_4 - covid19_epidemiological_data: - +schema: L1_COVID19_EPIDEMIOLOGICAL_DATA - loans: - +schema: L1_LOANS - observe: - +schema: L1_OBSERVE - us_population: - +schema: L1_US_POPULATION - usgs__earthquake_data: - +schema: L1_USGS__EARTHQUAKE_DATA - - L2_bays: - +group: marketing - +docs: - node_color: "#000899" - country_demographics: - +schema: L2_COUNTRY_DEMOGRAPHICS - covid_observations: - +schema: L2_COVID_OBSERVATIONS - snowflake_usage: - +schema: L2_SNOWFLAKE_USAGE - - L3_coves: - +group: marketing - +access: public - +docs: - node_color: "#366ccf" - covid_analytics: - +schema: L3_COVID_ANALYTICS - earthquake_analytics: - +schema: L3_EARTHQUAKE_ANALYTICS - loan_analytics: - +schema: L3_LOAN_ANALYTICS + +dbt-osmosis: "{model}.yml" + + balboa: + +materialized: view + +schema: Z_SCHEMA_NOT_CONFIGURED + + L1_inlets: + account_usage: + +schema: L1_ACCOUNT_USAGE + country_data: + +schema: L1_COUNTRY_DATA + country_geo: + +schema: L1_COUNTRY_GEO + google_analytics_4: + +schema: L1_GOOGLE_ANALYTICS_4 + covid19_epidemiological_data: + +schema: L1_COVID19_EPIDEMIOLOGICAL_DATA + loans: + +schema: L1_LOANS + observe: + +schema: L1_OBSERVE + us_population: + +schema: L1_US_POPULATION + usgs__earthquake_data: + +schema: L1_USGS__EARTHQUAKE_DATA + + L2_bays: + +group: marketing + +docs: + node_color: "#000899" + country_demographics: + +schema: L2_COUNTRY_DEMOGRAPHICS + covid_observations: + +schema: L2_COVID_OBSERVATIONS + snowflake_usage: + +schema: L2_SNOWFLAKE_USAGE + + L3_coves: + +group: marketing + +access: public + +docs: + node_color: "#366ccf" + covid_analytics: + +schema: L3_COVID_ANALYTICS + earthquake_analytics: + +schema: L3_EARTHQUAKE_ANALYTICS + loan_analytics: + +schema: L3_LOAN_ANALYTICS # cannot persist docs on dynamic tables # +persist_docs: # relation: false - +materialized: table + +materialized: table # L4_shares: # +docs: @@ -125,46 +123,43 @@ models: # columns: false - +persist_docs: - relation: true - columns: true + +persist_docs: + relation: true + columns: true - post-hook: - - "{{ dbt_snow_mask.apply_masking_policy('sources') if target.name == 'prd_pii' }}" - - "{{ dbt_snow_mask.apply_masking_policy('models') if target.name == 'prd_pii' }}" + +transient: "{{ 'false' if target.name == 'prd' else 'true' }}" + +post-hook: + - "{{ dbt_snow_mask.apply_masking_policy('sources') if target.name == 'prd_pii' }}" + - "{{ dbt_snow_mask.apply_masking_policy('models') if target.name == 'prd_pii' }}" # This macro creates a non_versioned view of a versioned dbt model # Useful if you want to have a versioned mart but dont want BI tool to break if version # is updated. # - "{{ create_latest_version_view() }}" # Snowflake Defaults - +transient: "{{ 'false' if target.name == 'prd' else 'true' }}" - copy_grants: true - -# This creates a view in snowflake that will track failures. Usefull for dynamic tables dbt isnt refreshing -# Run dbt with: --vars '{"persist_tests": "true", "tests_model": "test_failures"}' + +copy_grants: true on-run-end: - "{{ create_test_failure_view(results) }}" # In development, this will be created in your user schema data_tests: - +store_failures: true - +schema: DBT_TEST__AUDIT - +store_failures_as: view + +store_failures: true + +schema: DBT_TEST__AUDIT + +store_failures_as: view vars: - 'dbt_date:time_zone': 'America/Los_Angeles' + 'dbt_date:time_zone': 'America/Los_Angeles' - create_masking_policy_schema: "false" - use_common_masking_policy_db: "true" - common_masking_policy_db: "BALBOA" - common_masking_policy_schema: "MASKING_POLICIES" - use_force_applying_masking_policy: "true" + create_masking_policy_schema: "false" + use_common_masking_policy_db: "true" + common_masking_policy_db: "BALBOA" + common_masking_policy_schema: "MASKING_POLICIES" + use_force_applying_masking_policy: "true" # These are used in CI/CD to grant access to - db_usage_role_prefix: "z_db__" - schema_usage_role_prefix: "z_schema__" - pr_db_usage_role: "z_db__balboa_tst" + db_usage_role_prefix: "z_db__" + schema_usage_role_prefix: "z_schema__" + pr_db_usage_role: "z_db__balboa_tst" @@ -172,3 +167,5 @@ vars: # flags: # require_explicit_package_overrides_for_builtin_materializations: False # source_freshness_run_project_hooks: True +flags: + require_generic_test_arguments_property: true \ No newline at end of file diff --git a/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml b/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml index 08d367ad..f8665d0d 100644 --- a/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml +++ b/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml @@ -3,9 +3,6 @@ version: 2 models: - name: stg_pipe_usage_history description: Contains raw data on the usage history of pipes. - meta: - owner: '@alice' # Owner of the model - model_maturity: in dev # Development status of the model columns: - name: pipe_id description: A unique identifier assigned to the pipe @@ -21,3 +18,7 @@ models: description: The amount of data in bytes inserted into the pipe during usage - name: files_inserted description: The number of files inserted into the pipe during usage + config: + meta: + owner: '@alice' + model_maturity: in dev \ No newline at end of file diff --git a/transform/models/L1_inlets/country_data/_country_data.yml b/transform/models/L1_inlets/country_data/_country_data.yml index 31fca7ef..02442ad4 100644 --- a/transform/models/L1_inlets/country_data/_country_data.yml +++ b/transform/models/L1_inlets/country_data/_country_data.yml @@ -3,8 +3,6 @@ version: 2 sources: - name: RAW database: RAW - tags: - - daily_run_airbyte tables: - name: COUNTRY_POPULATIONS description: Raw population information from Github Datasets repository @@ -30,3 +28,6 @@ sources: - name: COUNTRY CODE data_type: VARCHAR description: The ISO 3166-1 alpha-3 code representing the country. + config: + tags: + - daily_run_airbyte \ No newline at end of file diff --git a/transform/models/L1_inlets/country_data/stg_country_populations.yml b/transform/models/L1_inlets/country_data/stg_country_populations.yml index bc53619f..d8890034 100644 --- a/transform/models/L1_inlets/country_data/stg_country_populations.yml +++ b/transform/models/L1_inlets/country_data/stg_country_populations.yml @@ -2,13 +2,13 @@ version: 2 models: - name: stg_country_populations - access: public description: Contains population information from the Github Datasets repository. data_tests: - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - country_code - - year + arguments: + combination_of_columns: + - country_code + - year columns: - name: year @@ -37,3 +37,5 @@ models: exclude: [last_year] - name: prior_year description: The prior year for which the population value is recorded + config: + access: public \ No newline at end of file diff --git a/transform/models/L1_inlets/country_geo/_country_geo.yml b/transform/models/L1_inlets/country_geo/_country_geo.yml index da39f01d..4f9a707e 100644 --- a/transform/models/L1_inlets/country_geo/_country_geo.yml +++ b/transform/models/L1_inlets/country_geo/_country_geo.yml @@ -3,8 +3,9 @@ version: 2 sources: - name: COUNTRY_GEO database: RAW - tags: - - earthquake_analysis tables: - name: COUNTRY_POLYGONS description: 'Country geographic boundaries' + config: + tags: + - earthquake_analysis \ No newline at end of file diff --git a/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml b/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml index 04d2f917..bee91474 100644 --- a/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml +++ b/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml @@ -53,13 +53,6 @@ sources: description: Flag indicating if this is the last reported data - name: JHU_DASHBOARD_COVID_19_GLOBAL description: Starschema Global COVID data - freshness: - warn_after: - count: 1 - period: minute - error_after: - count: 36 - period: hour columns: - name: COUNTRY_REGION data_type: VARCHAR @@ -124,3 +117,11 @@ sources: - name: LAST_REPORTED_FLAG data_type: BOOLEAN description: Flag indicating the last reported data + config: + freshness: + warn_after: + count: 1 + period: minute + error_after: + count: 36 + period: hour \ No newline at end of file diff --git a/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml b/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml index 1c1bea01..89873e4c 100644 --- a/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml +++ b/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml @@ -3,19 +3,9 @@ version: 2 sources: - name: GOOGLE_ANALYTICS_4 database: RAW - tags: - - daily_run_fivetran tables: - name: ENGAGEMENT_EVENTS_REPORT description: Table containing raw data on user engagement events from Google Analytics 4 - freshness: - warn_after: - count: 1 - period: day - error_after: - count: 36 - period: day - loaded_at_field: DATE::timestamp columns: - name: DATE data_type: DATE @@ -48,3 +38,15 @@ sources: - name: _FIVETRAN_SYNCED data_type: TIMESTAMP_TZ description: Timestamp of when the data was last synced by Fivetran. + config: + freshness: + warn_after: + count: 1 + period: day + error_after: + count: 36 + period: day + loaded_at_field: DATE::timestamp + config: + tags: + - daily_run_fivetran \ No newline at end of file diff --git a/transform/models/L1_inlets/loans/_loans.yml b/transform/models/L1_inlets/loans/_loans.yml index 49fea83b..b1e10478 100644 --- a/transform/models/L1_inlets/loans/_loans.yml +++ b/transform/models/L1_inlets/loans/_loans.yml @@ -3,21 +3,26 @@ version: 2 sources: - name: LOANS database: RAW - tags: - - daily_run_dlt tables: - name: PERSONAL_LOANS description: 'Personal Loans data' columns: - name: _airbyte_raw_id - meta: - masking_policy: masking_policy_pii_string + config: + meta: + masking_policy: masking_policy_pii_string - name: _airbyte_extracted_at - meta: - masking_policy: masking_policy_pii_timestamp_tz + config: + meta: + masking_policy: masking_policy_pii_timestamp_tz - name: total_acc - meta: - masking_policy: masking_policy_pii_float + config: + meta: + masking_policy: masking_policy_pii_float - name: _airbyte_meta - meta: - masking_policy: masking_policy_pii_variant + config: + meta: + masking_policy: masking_policy_pii_variant + config: + tags: + - daily_run_dlt \ No newline at end of file diff --git a/transform/models/L1_inlets/us_population/stg_us_population.yml b/transform/models/L1_inlets/us_population/stg_us_population.yml index 7f8dfa5d..0f576c09 100644 --- a/transform/models/L1_inlets/us_population/stg_us_population.yml +++ b/transform/models/L1_inlets/us_population/stg_us_population.yml @@ -3,8 +3,6 @@ version: 2 models: - name: stg_us_population description: Model representing the population data for each state in the United States. - access: private - group: marketing columns: - name: state_name description: The name of the state. @@ -30,3 +28,6 @@ models: description: The population count for the year 2018. - name: '2019' description: The population count for the year 2019. + config: + access: private + group: marketing \ No newline at end of file diff --git a/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml b/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml index 1bc9e8bd..50cfdb80 100644 --- a/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml +++ b/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml @@ -3,8 +3,9 @@ version: 2 sources: - name: USGS__EARTHQUAKE_DATA database: RAW - tags: - - earthquake_analysis tables: - name: EARTHQUAKES description: 'Data from the USGS on earthquakes' + config: + tags: + - earthquake_analysis \ No newline at end of file diff --git a/transform/models/L2_bays/covid_observations/base_cases.yml b/transform/models/L2_bays/covid_observations/base_cases.yml index 9dd39b3c..ba97faf2 100644 --- a/transform/models/L2_bays/covid_observations/base_cases.yml +++ b/transform/models/L2_bays/covid_observations/base_cases.yml @@ -40,6 +40,5 @@ models: data_tests: - dbt_expectations.expect_column_values_to_be_between: # min_value: 0 - min_value: -10000000 - # config: - # severity: warn + arguments: + min_value: -10000000 \ No newline at end of file diff --git a/transform/models/groups.yml b/transform/models/groups.yml index 38611cae..5458cf9f 100644 --- a/transform/models/groups.yml +++ b/transform/models/groups.yml @@ -3,12 +3,15 @@ groups: owner: # 'name' or 'email' is required; additional properties allowed email: finance@example.com - slack: finance-data - github: finance-data-team - + config: + meta: + slack: finance-data + github: finance-data-team - name: marketing owner: # 'name' or 'email' is required; additional properties allowed email: markeeting@example.com - slack: markeeting-data - github: markeeting-data-team + config: + meta: + slack: markeeting-data + github: markeeting-data-team \ No newline at end of file diff --git a/transform/package-lock.yml b/transform/package-lock.yml index 98197022..979f8fd7 100644 --- a/transform/package-lock.yml +++ b/transform/package-lock.yml @@ -1,12 +1,17 @@ packages: - - package: dbt-labs/dbt_utils - version: 1.3.0 - - package: metaplane/dbt_expectations - version: 0.10.8 - - package: entechlog/dbt_snow_mask - version: 0.2.6 - - package: dbt-labs/dbt_external_tables + - name: dbt_utils + package: dbt-labs/dbt_utils + version: 1.3.2 + - name: dbt_expectations + package: metaplane/dbt_expectations + version: 0.10.10 + - name: dbt_snow_mask + package: entechlog/dbt_snow_mask + version: 0.2.7 + - name: dbt_external_tables + package: dbt-labs/dbt_external_tables version: 0.11.1 - - package: godatadriven/dbt_date - version: 0.13.0 -sha1_hash: 3fd7c6c9a29c5d4c838a37063b2b3ad273b78906 + - name: dbt_date + package: godatadriven/dbt_date + version: 0.17.0 +sha1_hash: 7aa15b7d43881cbd57e7fdfaf4088f62aefe68d6 diff --git a/transform/packages.yml b/transform/packages.yml index efad2814..9c6a7906 100644 --- a/transform/packages.yml +++ b/transform/packages.yml @@ -1,10 +1,10 @@ packages: - package: dbt-labs/dbt_utils - version: 1.3.0 + version: 1.3.2 - package: metaplane/dbt_expectations - version: 0.10.8 + version: 0.10.10 - package: entechlog/dbt_snow_mask - version: 0.2.6 + version: 0.2.7 # for the latest version tag - package: dbt-labs/dbt_external_tables version: 0.11.1 diff --git a/transform/seeds/state_codes.yml b/transform/seeds/state_codes.yml index a00c4305..315dc2ed 100644 --- a/transform/seeds/state_codes.yml +++ b/transform/seeds/state_codes.yml @@ -4,9 +4,10 @@ seeds: - name: state_codes description: Lookup table used to convert between state abbreviations and state names config: - access: public + meta: + access: public columns: - name: state_name description: Name of the state - name: state_code - description: Two letter abbreviation for the state + description: Two letter abbreviation for the state \ No newline at end of file From e2e5b53985942d6f95c82a5f99ff0e203f12af3e Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Thu, 4 Dec 2025 15:55:20 -0800 Subject: [PATCH 3/6] fix dbt fusion issues --- training_and_demos/dbt_fusion/readme.md | 6 ------ transform/dbt_project.yml | 6 +++--- .../L1_inlets/account_usage/_account_usage.yml | 18 +++++++++--------- .../L1_inlets/us_population/_us_population.yml | 2 +- .../covid_cases_expected_values.csv | 10 +++++----- 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/training_and_demos/dbt_fusion/readme.md b/training_and_demos/dbt_fusion/readme.md index fe2b00f0..36d4d9d5 100644 --- a/training_and_demos/dbt_fusion/readme.md +++ b/training_and_demos/dbt_fusion/readme.md @@ -11,12 +11,6 @@ rm package-lock.yml dbt deps ``` -# Remove unsupported Features - -As of 7/5/2025, Dynamic tables in Snowflake are not supported - - - # Fix configs Use [dbt tool](https://github.com/dbt-labs/dbt-autofix) to fix deprecations diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index 6a70a721..2a6d813b 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -41,7 +41,7 @@ seeds: +persist_docs: relation: true columns: true - +dbt-osmosis: "{model}.yml" + # +dbt-osmosis: "{model}.yml" snapshots: +enabled: "{{ True if target.database == 'balboa' else False }}" @@ -55,7 +55,7 @@ snapshots: models: # elementary: # +schema: "elementary" - +dbt-osmosis: "{model}.yml" + # +dbt-osmosis: "{model}.yml" balboa: +materialized: view @@ -168,4 +168,4 @@ vars: # require_explicit_package_overrides_for_builtin_materializations: False # source_freshness_run_project_hooks: True flags: - require_generic_test_arguments_property: true \ No newline at end of file + require_generic_test_arguments_property: true diff --git a/transform/models/L1_inlets/account_usage/_account_usage.yml b/transform/models/L1_inlets/account_usage/_account_usage.yml index e51304a9..edb058c9 100644 --- a/transform/models/L1_inlets/account_usage/_account_usage.yml +++ b/transform/models/L1_inlets/account_usage/_account_usage.yml @@ -20,7 +20,7 @@ sources: data_type: TIMESTAMP_LTZ description: The timestamp indicating when the pipe usage record ended. - name: CREDITS_USED - data_type: NUMBER + data_type: NUMBER(38,9) description: Amount of credits used by the pipe. - name: BYTES_INSERTED data_type: FLOAT @@ -279,16 +279,16 @@ sources: data_type: DATE description: The date for which storage usage data is recorded. - name: STORAGE_BYTES - data_type: NUMBER + data_type: NUMBER(38,6) description: The amount of storage used, in bytes. - name: STAGE_BYTES - data_type: NUMBER + data_type: NUMBER(38,6) description: The number of bytes used by stage storage. - name: FAILSAFE_BYTES - data_type: NUMBER + data_type: NUMBER(38,6) description: The amount of storage in bytes used for the failsafe feature. - name: HYBRID_TABLE_STORAGE_BYTES - data_type: NUMBER + data_type: NUMBER(38,6) description: Represents the storage usage in bytes for hybrid tables. - name: WAREHOUSE_METERING_HISTORY description: Warehouse metering history raw data @@ -306,14 +306,14 @@ sources: data_type: VARCHAR description: The name of the warehouse whose metering history is recorded. - name: CREDITS_USED - data_type: NUMBER + data_type: NUMBER(38,9) description: The number of credits used by the warehouse during the specified time period. - name: CREDITS_USED_COMPUTE - data_type: NUMBER + data_type: NUMBER(38,9) description: The number of compute credits used by the warehouse. - name: CREDITS_USED_CLOUD_SERVICES - data_type: NUMBER + data_type: NUMBER(38,9) description: Amount of credits used for cloud services. - name: CREDITS_ATTRIBUTED_COMPUTE_QUERIES description: Represents the number of credits attributed to compute queries for the warehouse. - data_type: NUMBER + data_type: NUMBER(38,9) diff --git a/transform/models/L1_inlets/us_population/_us_population.yml b/transform/models/L1_inlets/us_population/_us_population.yml index a8d15ce9..970b111e 100644 --- a/transform/models/L1_inlets/us_population/_us_population.yml +++ b/transform/models/L1_inlets/us_population/_us_population.yml @@ -13,7 +13,7 @@ sources: data_type: VARCHAR description: Name of the state - name: ID - data_type: NUMBER + data_type: NUMBER(19,0) description: Unique identifier for each state - name: _2010 data_type: VARCHAR diff --git a/transform/seeds/test_values/covid_cases_expected_values.csv b/transform/seeds/test_values/covid_cases_expected_values.csv index 89a5aa1c..11d73b4a 100644 --- a/transform/seeds/test_values/covid_cases_expected_values.csv +++ b/transform/seeds/test_values/covid_cases_expected_values.csv @@ -1,6 +1,6 @@ LOCATION_ID,DATE,CONFIRMED,DEATHS,ACTIVE,RECOVERED -236cc20c7e0dc9405abef872351eedb9,2020-12-18,1324,8,1316, -236cc20c7e0dc9405abef872351eedb9,2020-12-21,1332,9,1323, -236cc20c7e0dc9405abef872351eedb9,2020-12-22,1350,10,1340, -236cc20c7e0dc9405abef872351eedb9,2021-01-08,2858,10,2848, -236cc20c7e0dc9405abef872351eedb9,2021-01-15,2427,11,2416, \ No newline at end of file +236cc20c7e0dc9405abef872351eedb9,2020-12-18,1324,8,1316,0 +236cc20c7e0dc9405abef872351eedb9,2020-12-21,1332,9,1323,0 +236cc20c7e0dc9405abef872351eedb9,2020-12-22,1350,10,1340,0 +236cc20c7e0dc9405abef872351eedb9,2021-01-08,2858,10,2848,0 +236cc20c7e0dc9405abef872351eedb9,2021-01-15,2427,11,2416,0 From aeb374de9c60e085993969bdaa6044eb00df1f64 Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Thu, 4 Dec 2025 16:34:22 -0800 Subject: [PATCH 4/6] update containers in GH Actions --- .github/workflows/10_feature_airflow_checks.yml | 3 ++- .github/workflows/10_feature_dbt_checks.yml | 2 +- .github/workflows/15_drop_feature_db.yml | 2 +- .github/workflows/20_release_dbt_checks.yml | 2 +- .github/workflows/30_deploy_changes_to_production.yml | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/10_feature_airflow_checks.yml b/.github/workflows/10_feature_airflow_checks.yml index df8b9c3c..f86152e7 100644 --- a/.github/workflows/10_feature_airflow_checks.yml +++ b/.github/workflows/10_feature_airflow_checks.yml @@ -36,9 +36,10 @@ jobs: airflow: name: Pull Request Airflow Tests runs-on: ubuntu-latest - container: datacoves/ci-airflow-dbt-snowflake:4.0 needs: [validate-branch] + container: datacoves/ci-airflow-dbt-snowflake:4.1 + env: AIRFLOW__CORE__DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/orchestrate/dags AIRFLOW__CORE__DAGBAG_IMPORT_TIMEOUT: 300 diff --git a/.github/workflows/10_feature_dbt_checks.yml b/.github/workflows/10_feature_dbt_checks.yml index 0c36be35..02e9b759 100644 --- a/.github/workflows/10_feature_dbt_checks.yml +++ b/.github/workflows/10_feature_dbt_checks.yml @@ -46,7 +46,7 @@ jobs: # environment: PR_ENV # most people should use this one - container: datacoves/ci-basic-dbt-snowflake:4.0 + container: datacoves/ci-basic-dbt-snowflake:4.1 defaults: run: diff --git a/.github/workflows/15_drop_feature_db.yml b/.github/workflows/15_drop_feature_db.yml index 09cf91a2..5aaef504 100644 --- a/.github/workflows/15_drop_feature_db.yml +++ b/.github/workflows/15_drop_feature_db.yml @@ -23,7 +23,7 @@ jobs: # Alternatively, You can define multiple ENV for different workflows. # https://github.com///settings/environments # environment: PR_ENV - container: datacoves/ci-basic-dbt-snowflake:4.0 + container: datacoves/ci-basic-dbt-snowflake:4.1 defaults: run: diff --git a/.github/workflows/20_release_dbt_checks.yml b/.github/workflows/20_release_dbt_checks.yml index 5aa690be..fd0a65c0 100644 --- a/.github/workflows/20_release_dbt_checks.yml +++ b/.github/workflows/20_release_dbt_checks.yml @@ -125,7 +125,7 @@ jobs: # environment: PR_ENV # Most people should use this docker image - container: datacoves/ci-basic-dbt-snowflake:4.0 + container: datacoves/ci-basic-dbt-snowflake:4.1 defaults: run: diff --git a/.github/workflows/30_deploy_changes_to_production.yml b/.github/workflows/30_deploy_changes_to_production.yml index 01f086e4..49bd5585 100644 --- a/.github/workflows/30_deploy_changes_to_production.yml +++ b/.github/workflows/30_deploy_changes_to_production.yml @@ -25,7 +25,7 @@ jobs: # Alternatively, You can define multiple ENV for different workflows. # https://github.com///settings/environments # environment: PR_ENV - container: datacoves/ci-basic-dbt-snowflake:4.0 + container: datacoves/ci-basic-dbt-snowflake:4.1 defaults: run: From 8c0206942b6dad0c4f51089e049735b862cc87a9 Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Thu, 4 Dec 2025 16:50:07 -0800 Subject: [PATCH 5/6] fix yml link issues and update dbt version in pre-commit --- .pre-commit-config.yaml | 4 ++-- .../models/L1_inlets/account_usage/stg_pipe_usage_history.yml | 2 +- transform/models/L1_inlets/country_data/_country_data.yml | 2 +- .../models/L1_inlets/country_data/stg_country_populations.yml | 2 +- transform/models/L1_inlets/country_geo/_country_geo.yml | 2 +- .../_covid19_epidemiological_data.yml | 2 +- .../L1_inlets/google_analytics_4/_google_analytics_4.yml | 2 +- transform/models/L1_inlets/loans/_loans.yml | 2 +- .../models/L1_inlets/us_population/stg_us_population.yml | 2 +- transform/models/L2_bays/covid_observations/base_cases.yml | 2 +- transform/models/groups.yml | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e5297228..2a5f75b8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,8 +25,8 @@ repos: additional_dependencies: [ "sqlfluff-templater-dbt==3.1.1", - "dbt-core==1.8.8", - "dbt-snowflake==1.8.4", + "dbt-core==1.10.15", + "dbt-snowflake==1.10.3", ] args: [--config, transform/.sqlfluff] diff --git a/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml b/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml index f8665d0d..7d518396 100644 --- a/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml +++ b/transform/models/L1_inlets/account_usage/stg_pipe_usage_history.yml @@ -21,4 +21,4 @@ models: config: meta: owner: '@alice' - model_maturity: in dev \ No newline at end of file + model_maturity: in dev diff --git a/transform/models/L1_inlets/country_data/_country_data.yml b/transform/models/L1_inlets/country_data/_country_data.yml index 02442ad4..0745a717 100644 --- a/transform/models/L1_inlets/country_data/_country_data.yml +++ b/transform/models/L1_inlets/country_data/_country_data.yml @@ -30,4 +30,4 @@ sources: description: The ISO 3166-1 alpha-3 code representing the country. config: tags: - - daily_run_airbyte \ No newline at end of file + - daily_run_airbyte diff --git a/transform/models/L1_inlets/country_data/stg_country_populations.yml b/transform/models/L1_inlets/country_data/stg_country_populations.yml index d8890034..997a6c79 100644 --- a/transform/models/L1_inlets/country_data/stg_country_populations.yml +++ b/transform/models/L1_inlets/country_data/stg_country_populations.yml @@ -38,4 +38,4 @@ models: - name: prior_year description: The prior year for which the population value is recorded config: - access: public \ No newline at end of file + access: public diff --git a/transform/models/L1_inlets/country_geo/_country_geo.yml b/transform/models/L1_inlets/country_geo/_country_geo.yml index 4f9a707e..2ad5a9b5 100644 --- a/transform/models/L1_inlets/country_geo/_country_geo.yml +++ b/transform/models/L1_inlets/country_geo/_country_geo.yml @@ -8,4 +8,4 @@ sources: description: 'Country geographic boundaries' config: tags: - - earthquake_analysis \ No newline at end of file + - earthquake_analysis diff --git a/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml b/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml index bee91474..2f67efec 100644 --- a/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml +++ b/transform/models/L1_inlets/covid19_epidemiological_data/_covid19_epidemiological_data.yml @@ -124,4 +124,4 @@ sources: period: minute error_after: count: 36 - period: hour \ No newline at end of file + period: hour diff --git a/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml b/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml index 89873e4c..d182def4 100644 --- a/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml +++ b/transform/models/L1_inlets/google_analytics_4/_google_analytics_4.yml @@ -49,4 +49,4 @@ sources: loaded_at_field: DATE::timestamp config: tags: - - daily_run_fivetran \ No newline at end of file + - daily_run_fivetran diff --git a/transform/models/L1_inlets/loans/_loans.yml b/transform/models/L1_inlets/loans/_loans.yml index b1e10478..c241004e 100644 --- a/transform/models/L1_inlets/loans/_loans.yml +++ b/transform/models/L1_inlets/loans/_loans.yml @@ -25,4 +25,4 @@ sources: masking_policy: masking_policy_pii_variant config: tags: - - daily_run_dlt \ No newline at end of file + - daily_run_dlt diff --git a/transform/models/L1_inlets/us_population/stg_us_population.yml b/transform/models/L1_inlets/us_population/stg_us_population.yml index 0f576c09..2d73812c 100644 --- a/transform/models/L1_inlets/us_population/stg_us_population.yml +++ b/transform/models/L1_inlets/us_population/stg_us_population.yml @@ -30,4 +30,4 @@ models: description: The population count for the year 2019. config: access: private - group: marketing \ No newline at end of file + group: marketing diff --git a/transform/models/L2_bays/covid_observations/base_cases.yml b/transform/models/L2_bays/covid_observations/base_cases.yml index ba97faf2..718f9887 100644 --- a/transform/models/L2_bays/covid_observations/base_cases.yml +++ b/transform/models/L2_bays/covid_observations/base_cases.yml @@ -41,4 +41,4 @@ models: - dbt_expectations.expect_column_values_to_be_between: # min_value: 0 arguments: - min_value: -10000000 \ No newline at end of file + min_value: -10000000 diff --git a/transform/models/groups.yml b/transform/models/groups.yml index 5458cf9f..86b45ff7 100644 --- a/transform/models/groups.yml +++ b/transform/models/groups.yml @@ -14,4 +14,4 @@ groups: config: meta: slack: markeeting-data - github: markeeting-data-team \ No newline at end of file + github: markeeting-data-team From 7581da29d644450cffdcf1d38d81a84884a72754 Mon Sep 17 00:00:00 2001 From: Noel Gomez Date: Thu, 4 Dec 2025 17:02:48 -0800 Subject: [PATCH 6/6] fix yaml lint issues --- .../models/L1_inlets/country_data/stg_country_populations.yml | 2 +- .../L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/transform/models/L1_inlets/country_data/stg_country_populations.yml b/transform/models/L1_inlets/country_data/stg_country_populations.yml index 997a6c79..a261b58b 100644 --- a/transform/models/L1_inlets/country_data/stg_country_populations.yml +++ b/transform/models/L1_inlets/country_data/stg_country_populations.yml @@ -25,7 +25,7 @@ models: # Declare the versions, and highlight the diffs versions: - v: 1 - # Matches what's above -- nothing more needed + # Matches what's above -- nothing more needed - v: 2 columns: - include: all diff --git a/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml b/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml index 50cfdb80..52edf1a6 100644 --- a/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml +++ b/transform/models/L1_inlets/usgs__earthquake_data/_usgs__earthquake_data.yml @@ -8,4 +8,4 @@ sources: description: 'Data from the USGS on earthquakes' config: tags: - - earthquake_analysis \ No newline at end of file + - earthquake_analysis