diff --git a/.github/workflows/10_feature_airflow_checks.yml b/.github/workflows/10_feature_airflow_checks.yml index bd97fb9cf..8b8a23621 100644 --- a/.github/workflows/10_feature_airflow_checks.yml +++ b/.github/workflows/10_feature_airflow_checks.yml @@ -37,6 +37,7 @@ jobs: name: Pull Request Airflow Tests runs-on: ubuntu-latest container: datacoves/ci-airflow-dbt-snowflake:3.4 + needs: [validate-branch] env: AIRFLOW__CORE__DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/orchestrate/dags diff --git a/.github/workflows/10_feature_dbt_checks.yml b/.github/workflows/10_feature_dbt_checks.yml index 8932535b3..fcc39ec82 100644 --- a/.github/workflows/10_feature_dbt_checks.yml +++ b/.github/workflows/10_feature_dbt_checks.yml @@ -36,6 +36,7 @@ jobs: dbt: name: Pull Request dbt Tests runs-on: ubuntu-latest + needs: [validate-branch] # Set environment variables in # https://github.com////settings/variables/actions @@ -71,6 +72,9 @@ jobs: # cannot be applied when using the Datacoves permifrost security model. DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }} + # Full refresh control variables + FULL_REFRESH_FLAG: ${{ contains(github.event.pull_request.labels.*.name, 'full-refresh') && '--full-refresh' || '' }} + steps: - name: Checkout branch uses: actions/checkout@v3.5.0 @@ -88,12 +92,17 @@ jobs: run: "dbt deps" - name: Create PR database - run: "dbt --no-write-json run-operation create_database" + run: dbt --no-write-json run-operation create_database - name: Get prod manifest id: prod_manifest run: "../automate/dbt/get_artifacts.sh" + - name: Clone incremental models that are directly or indirectly affected by the change + run: | + dbt clone -s state:modified+,config.materialized:incremental,state:old --state logs + dbt clone -s state:modified+,config.materialized:snapshot,state:old --state logs + ##### Governance Checks # this first runs dbt but creates enpty tables, this is enough to then run the hooks and fail fast @@ -107,13 +116,13 @@ jobs: # There is an issue with --empty and dynamic tables so need to exclude them - name: Governance run of dbt with EMPTY models using slim mode - if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }} - run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty --exclude config.materialized:dynamic_table" + if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }} + run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty --exclude config.materialized:dynamic_table ${{ env.FULL_REFRESH_FLAG }}" # There is an issue with --empty and dynamic tables so need to exclude - name: Governance run of dbt with EMPTY models using full run - if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }} - run: "dbt build --fail-fast --empty --exclude config.materialized:dynamic_table" + if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }} + run: "dbt build --fail-fast --empty --exclude config.materialized:dynamic_table ${{ env.FULL_REFRESH_FLAG }}" - name: Generate Docs Combining Prod and branch catalog.json run: "dbt-coves generate docs --merge-deferred --state logs" @@ -123,12 +132,12 @@ jobs: ##### Real dbt run given that we passed governance checks - name: Run dbt build slim mode - if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }} - run: "dbt build --fail-fast --defer --state logs --select state:modified+" + if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' }} + run: "dbt build --fail-fast --defer --state logs --select state:modified+ ${{ env.FULL_REFRESH_FLAG }}" - name: Run dbt build full run - if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }} - run: "dbt build --fail-fast" + if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' }} + run: "dbt build --fail-fast ${{ env.FULL_REFRESH_FLAG }}" - name: Grant access to PR database id: grant-access-to-database diff --git a/.github/workflows/20_release_dbt_checks.yml b/.github/workflows/20_release_dbt_checks.yml index c63a9a712..72fea0d0f 100644 --- a/.github/workflows/20_release_dbt_checks.yml +++ b/.github/workflows/20_release_dbt_checks.yml @@ -36,6 +36,7 @@ jobs: dbt: name: Pull Request dbt Tests runs-on: ubuntu-latest + needs: [validate-branch] # Set environment variables in # https://github.com////settings/variables/actions diff --git a/automate/dbt/get_artifacts.sh b/automate/dbt/get_artifacts.sh index bc88afb5b..2806c72f1 100755 --- a/automate/dbt/get_artifacts.sh +++ b/automate/dbt/get_artifacts.sh @@ -14,7 +14,7 @@ if [ $LINES_IN_MANIFEST -eq 0 ] then echo "Manifest not found in Snowflake stage, contact the Snowflake administrator to load a updated manifest to snowflake." # This is used by github actions - echo "::set-output name=manifest_found::false" + echo "manifest_found=false" >> $GITHUB_OUTPUT # This is used by Jenkins # echo "false" > temp_MANIFEST_FOUND.txt @@ -22,7 +22,7 @@ else echo "Updated manifest from production" # This is used by github actions - echo "::set-output name=manifest_found::true" + echo "manifest_found=true" >> $GITHUB_OUTPUT # This is used by Jenkins # echo "true" > temp_MANIFEST_FOUND.txt diff --git a/transform/models/L1_inlets/country_geo/stg_country_polygons.sql b/transform/models/L1_inlets/country_geo/stg_country_polygons.sql index adcf45281..ae6a659b0 100644 --- a/transform/models/L1_inlets/country_geo/stg_country_polygons.sql +++ b/transform/models/L1_inlets/country_geo/stg_country_polygons.sql @@ -8,12 +8,12 @@ with raw_source as ( final as ( select - features:properties:ADMIN::STRING as country_name, - features:properties:ISO_A2::STRING as country_code_2, - features:properties:ISO_A3::STRING as country_code_3, - features:type::STRING as feature_type, - features:geometry:type::STRING as geometry_type, - TRY_TO_GEOGRAPHY(features:geometry) as geography, + name as country_name, + iso3166_1_alpha_2 as country_code_2, + iso3166_1_alpha_3 as country_code_3, + type as feature_type, + geometry_type, + try_to_geography(geometry) as geography, features as raw_geojson from raw_source @@ -26,8 +26,8 @@ select feature_type, geometry_type, geography, - case when geography is not NULL then ST_AREA(geography) end as area_m2, - case when geography is not NULL then ST_PERIMETER(geography) end as perimeter_m, - case when geography is not NULL then ST_CENTROID(geography) end as centroid, + case when geography is not null then st_area(geography) end as area_m2, + case when geography is not null then st_perimeter(geography) end as perimeter_m, + case when geography is not null then st_centroid(geography) end as centroid, raw_geojson from final diff --git a/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.sql b/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.sql index 14a66a09a..a04194851 100644 --- a/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.sql +++ b/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.sql @@ -1,6 +1,7 @@ {{ config( materialized='incremental', - incremental_strategy='merge' + incremental_strategy='merge', + on_schema_change='fail' ) }} with country_polygons as ( @@ -33,6 +34,7 @@ earthquakes as ( final as ( select + 1 as ct, earthquakes.*, country_polygons.country_code_2 as country_code from earthquakes, country_polygons diff --git a/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.yml b/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.yml index 59c2dce65..5df9b2e85 100644 --- a/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.yml +++ b/transform/models/L3_coves/earthquake_analytics/earthquakes_around_the_world.yml @@ -14,3 +14,5 @@ models: description: 'The date when the earthquake occurred.' - name: country_code description: 'ISO country code representing the location where the earthquake took place.' + - name: ct + description: temp col to test full refresh