diff --git a/.github/workflows/colp_build.yml b/.github/workflows/colp_build.yml index 8f196431c4..788f7183c6 100644 --- a/.github/workflows/colp_build.yml +++ b/.github/workflows/colp_build.yml @@ -47,6 +47,10 @@ jobs: AWS_SECRET_ACCESS_KEY: "op://Data Engineering/DO_keys/AWS_SECRET_ACCESS_KEY" AWS_ACCESS_KEY_ID: "op://Data Engineering/DO_keys/AWS_ACCESS_KEY_ID" BUILD_ENGINE_SERVER: "op://Data Engineering/EDM_DATA/server_url" + BUILD_ENGINE_HOST: "op://Data Engineering/EDM_DATA/server" + BUILD_ENGINE_USER: "op://Data Engineering/EDM_DATA/username" + BUILD_ENGINE_PASSWORD: "op://Data Engineering/EDM_DATA/password" + BUILD_ENGINE_PORT: "op://Data Engineering/EDM_DATA/port" - name: Finish container setup ... working-directory: ./ diff --git a/products/colp/bash/02a_load_data.sh b/products/colp/bash/02a_load_data.sh new file mode 100755 index 0000000000..c1a58cca84 --- /dev/null +++ b/products/colp/bash/02a_load_data.sh @@ -0,0 +1,7 @@ +#!/bin/bash +source bash/config.sh + +# Load seeds via dbt +dbt seed --select usetype_mappings +run_sql_file sql/load_modifications.sql +run_sql_file sql/geo_inputs.sql diff --git a/products/colp/bash/02b_python.sh b/products/colp/bash/02b_python.sh new file mode 100755 index 0000000000..f76a9b951d --- /dev/null +++ b/products/colp/bash/02b_python.sh @@ -0,0 +1,5 @@ +#!/bin/bash +source bash/config.sh + +python3 -m python.geocode +python3 -m python.geo_qaqc diff --git a/products/colp/bash/02_build.sh b/products/colp/bash/02c_sql_build.sh similarity index 69% rename from products/colp/bash/02_build.sh rename to products/colp/bash/02c_sql_build.sh index 7c28934e7e..a2157dfc1c 100755 --- a/products/colp/bash/02_build.sh +++ b/products/colp/bash/02c_sql_build.sh @@ -1,13 +1,6 @@ #!/bin/bash source bash/config.sh -run_sql_file sql/load_modifications.sql - -run_sql_file sql/geo_inputs.sql - -python3 -m python.geocode -python3 -m python.geo_qaqc - run_sql_file sql/_procedures.sql run_sql_file sql/clean_parcelname.sql run_sql_file sql/create_colp.sql diff --git a/products/colp/colp.sh b/products/colp/colp.sh index c769a641f0..3d0f1a4898 100755 --- a/products/colp/colp.sh +++ b/products/colp/colp.sh @@ -2,10 +2,17 @@ case $1 in dataloading ) ./bash/01_dataloading.sh ;; - build ) ./bash/02_build.sh ;; + build ) + ./bash/02a_load_data.sh + ./bash/02b_python.sh + ./bash/02c_sql_build.sh + ;; + load_data ) ./bash/02a_load_data.sh ;; + python ) ./bash/02b_python.sh ;; + sql_build ) ./bash/02c_sql_build.sh ;; qaqc ) ./bash/03_qaqc.sh ;; export ) ./bash/04_export.sh ;; upload ) python3 -m dcpy.connectors.edm.publishing upload -p db-colp -a public-read ;; sql) sql $@ ;; - * ) echo "COMMAND \"$1\" is not found. (valid commands: dataloading|build|export|upload)" ;; + * ) echo "COMMAND \"$1\" is not found. (valid commands: dataloading|build|load_data|python|sql_build|qaqc|export|upload)" ;; esac diff --git a/products/colp/dbt_project.yml b/products/colp/dbt_project.yml new file mode 100644 index 0000000000..f0c62676d3 --- /dev/null +++ b/products/colp/dbt_project.yml @@ -0,0 +1,5 @@ +name: "colp" + +profile: "dcp-de-postgres" + +seed-paths: ["seeds"] diff --git a/products/colp/profiles.yml b/products/colp/profiles.yml new file mode 100644 index 0000000000..6712525864 --- /dev/null +++ b/products/colp/profiles.yml @@ -0,0 +1,11 @@ +dcp-de-postgres: + target: dev + outputs: + dev: + type: postgres + host: "{{ env_var('BUILD_ENGINE_HOST') }}" + user: "{{ env_var('BUILD_ENGINE_USER') }}" + password: "{{ env_var('BUILD_ENGINE_PASSWORD') }}" + port: "{{ env_var('BUILD_ENGINE_PORT') | as_number }}" + dbname: "{{ env_var('BUILD_ENGINE_DB') }}" + schema: "{{ env_var('BUILD_ENGINE_SCHEMA') }}" diff --git a/products/colp/seeds/_seeds.yml b/products/colp/seeds/_seeds.yml new file mode 100644 index 0000000000..5d46565b33 --- /dev/null +++ b/products/colp/seeds/_seeds.yml @@ -0,0 +1,11 @@ +version: 2 + +seeds: + - name: usetype_mappings + description: | + Mapping table for cleaning IPIS use type values. + Maps source use type names to standardized cleaned versions. + config: + column_types: + source: text + cleaned: text diff --git a/products/colp/seeds/usetype_mappings.csv b/products/colp/seeds/usetype_mappings.csv new file mode 100644 index 0000000000..5e908f7df2 --- /dev/null +++ b/products/colp/seeds/usetype_mappings.csv @@ -0,0 +1,83 @@ +source,cleaned +AGCY EXECUTVE OFFICE,AGENCY EXECUTIVE OFFICE +AGCY OFFICE,AGENCY OFFICE +ALTERNTV HIGH SCHOOL,ALTERNATIVE HIGH SCHOOL +AMBUL HEALTH FACIL,AMBULATORY HEALTH FACILITY +COMBINED MAINT/STRG,COMBINED MAINTENANCE/STORAGE FACILITY +COURT (SPORTS),TENNIS COURT BASKETBALL COURT ETC +CRIMINAL JUSTICE FAC,CRIMINAL JUSTICE FACILITY +EARLY CHILDHD CENTER,EARLY CHILDHOOD CENTER +EDUC SKILLS CENTER,EDUCATIONAL SKILLS CENTER +FACILITIES MAINT,FACILITIES MAINTENANCE +HIGHWAY STRIP/ETC,MALL/TRIANGLE/HIGHWAY STRIP/PARK STRIP +IN USE-RES STRUCTURE,IN USE-RESIDENTIAL STRUCTURE +INDOOR MAINT/STORAGE,INDOOR MAINTENANCE/STORAGE/GARAGE FACILITY +INDOOR PKNG GARAGE,INDOOR PARKING GARAGE +INDOOR STORAGE(WHSE),INDOOR STORAGE (WAREHOUSE) +INDOOR STORAGE-BULK,INDOOR STORAGE - BULK MATERIAL +INDOOR STORAGE-EQUIP,INDOOR STORAGE - EQUIPMENT +INDOOR STRG-PRINTED,INDOOR STORAGE - PRINTED MATERIAL/SUPPLIES +INDOOR STRG-SUPPLIES,INDOOR STORAGE - SUPPLIES +MUNICIPL PKNG-INDR,MUNICIPAL PARKING - INDOOR PUBLIC +MUNICPL PKNG-OUTDR,MUNICIPAL PARKING - OUTDOOR PUBLIC +NATURAL AREA/ETC,NATURAL AREA/WETLAND/WILDLIFE REFUGE +NEIGHBORHD SIT AREA,NEIGHBORHOOD SITTING AREA +NO USE-NON RES STRC,NO USE-NON RESIDENTIAL STRUCTURE +NO USE-NON RESIDENTIAL STRUCTURES,NO USE-NON RESIDENTIAL STRUCTURE +NO USE-RES STRUCTURE,NO USE-RESIDENTIAL STRUCTURE +NO USE-WFT (ANY AGY),NO USE-WATERFRONT (ANY AGENCY) +NON-RES SOC SERV FAC,NON-RESIDENTIAL SOCIAL SERVICE FACILITY +O/D STRG-AGY VEHICLE,OUTDOOR STORAGE - AGENCY VEHICLES +OFC LEASED BY EDUC,OFFICE LEASED BY EDUC +OTH DETENTION FACIL,OTHER DETENTION FACILITY +OTH INDOOR MAINT/STG,OTHER INDOOR MAINTENANCE/STORAGE FACILITY +OTH OUTDOOR STORAGE,OTHER OUTDOOR STORAGE +OTH OUTDR MAINT/STG,OTHER OUTDOOR MAINTENANCE/STORAGE FACILITY +OTH SECURE DETENTION,OTHER SECURE DETENTION FACILITY +OTH SOLID WASTE FAC,OTHER SOLID WASTE FACILITY +OTH WATER SUPPLY FAC,OTHER WATER SUPPLY FACILITY +OTH WATER TRANS FAC,OTHER WATERFRONT TRANSPORTATION FACILITY +OTHER COLLEGE/UNIV,OTHER COLLEGE/UNIVERSITY +OTHER CULTURAL FAC,OTHER CULTURAL FACILITY +OTHER HEALTH FACILTY,OTHER HEALTH FACILITY +OTHER POLL CNTRL FAC,OTHER POLLUTION CONTROL FACILITY +OTHER PUB SAFETY FAC,OTHER PUBLIC SAFETY FACILITY +OTHER REC FACILITY,OTHER RECREATIONAL FACILITY +OTHER SPC REC-INDOOR,OTHER SPECIAL RECREATIONAL FACILITY - INDOOR +OTHER TESTING FACIL,OTHER TESTING FACILITY +OTHER TRANS FACILITY,OTHER TRANSPORTATION FACILITY +OTHR SPC REC-OUTDOOR,OTHER SPECIAL RECREATIONAL FACILITY - OUTDOOR +OUTDOOR MAINT/STORGE,OUTDOOR MAINTENANCE/STORAGE FACILITY +OUTDOOR PKNG-AGENCY,OUTDOOR PARKING - AGENCY VEHICLES +OUTDOOR PKNG-CLIENTS,OUTDOOR PARKING - CLIENTS +OUTDOOR STORAGE-BULK,OUTDOOR STORAGE - BULK MATERIAL +OUTDOOR STRG-EQUIP,OUTDOOR STORAGE - EQUIPMENT +OUTDOOR TOW POUND,OUTDOOR PARKING - TOW POUND +OUTDR PKNG-EMPLOYEES,OUTDOOR PARKING - EMPLOYEES +PLAYGRND/SPORTS AREA,PLAYGROUND/SPORTS AREA +POLLUTION CONTRL FAC,POLLUTION CONTROL FACILITY +PUB SAFETY FACILITY,PUBLIC SAFETY FACILITY +RECREATION CTR/GYM,RECREATION CENTER/GYMNASIUM +RECREATIONL FACILITY,RECREATIONAL FACILITY +RES FACIL - CHILDREN,RESIDENTIAL FACILITY FOR CHILDREN +RES HEALTH CARE FAC,RESIDENTIAL HEALTH CARE FACILITY +SECTION STATION,SANITATION SECTION STATION +SLUDGE DE-WATERING,SLUDGE DE-WATERING FACILITY +SOCIAL SERVICE FACIL,SOCIAL SERVICE FACILITY +SOLID WASTE TSFR STN,SOLID WASTE TRANSFER STATION +SPEC RECREATN-INDOOR,SPECIAL RECREATIONAL FACILITY - INDOOR +SPECIAL EDUC SCHOOL,SPECIAL EDUCATION SCHOOL +SPECIAL REC-OUTDOOR,SPECIAL RECREATIONAL FACILITY - OUTDOOR +STORMWATER PUMPING,STORMWATER PUMPING STATION +TRAIN FAC-INDOOR,TRAINING FACILITY - INDOOR +TRAIN FACIL-OUTDOOR,TRAINING FACILITY - OUTDOOR +TRANS/PUBLIC PARKING,TRANSPORTATION/PUBLIC PARKING FACILITY +TRANSL HSG - ADULT,TRANSITIONAL HOUSING - ADULT +TRANSL HSG - FAMILY,TRANSITIONAL HOUSING - FAMILY +TRANSL HSG - MEN,TRANSITIONAL HOUSING - ADULT MEN +TRANSL HSG - WOMEN,TRANSITIONAL HOUSING - ADULT WOMEN +UNDEVELOP OPEN SPACE,UNDEVELOPED OPEN SPACE +VOCATION HIGH SCHOOL,VOCATIONAL HIGH SCHOOL +WASTEWATER PUMPING,WASTEWATER PUMPING STATION +WATER POLL CNTRL PLT,WATER POLLUTION CONTROL PLANT +WATER SUPPLY FACIL,WATER SUPPLY FACILITY diff --git a/products/colp/sql/colp_qaqc.sql b/products/colp/sql/colp_qaqc.sql index 0edf8d1ed8..18bc2bad1f 100644 --- a/products/colp/sql/colp_qaqc.sql +++ b/products/colp/sql/colp_qaqc.sql @@ -359,16 +359,17 @@ current AS ( GROUP BY "USETYPE" ) SELECT - a.usetype, - a.v_previous, - b.v_current, - b.num_records_current, - a.num_records_previous, - b.num_records_current - a.num_records_previous AS difference + p.usetype, + c.usetype AS usetype_curr, + p.v_previous, + c.v_current, + c.num_records_current, + p.num_records_previous, + c.num_records_current - p.num_records_previous AS difference INTO usetype_changes -FROM prev AS a -INNER JOIN current AS b - ON a.usetype = b.usetype; +FROM prev AS p +FULL OUTER JOIN current AS c + ON p.usetype = c.usetype; -- Create QAQC tables of count of records by agency and usetype DROP TABLE IF EXISTS records_by_agency; diff --git a/products/colp/sql/create_colp.sql b/products/colp/sql/create_colp.sql index 2c9342720f..da298c91b8 100644 --- a/products/colp/sql/create_colp.sql +++ b/products/colp/sql/create_colp.sql @@ -205,6 +205,39 @@ normed_name_merge AS ( ON a._parcelname = b.old_name ), +usetype_mapped AS ( + SELECT + a.uid, + a.borough, + a.block, + a.lot, + a.bbl, + a.geo_bbl, + a.mapbbl, + a.hnum, + a._sname, + a._parcelname, + a.parcelname, + a.agency, + a._usecode, + -- Apply usetype mappings, keeping original if no mapping exists + coalesce(b.cleaned, a._usetype) AS _usetype, + a.ownership, + a.leased, + a.finalcom, + a.agreement, + a.xcoord, + a.ycoord, + a.latitude, + a.longitude, + a.geom, + a.cd, + a.sname + FROM normed_name_merge AS a + LEFT JOIN usetype_mappings AS b + ON a._usetype = b.source +), + categorized AS ( SELECT a.*, @@ -309,7 +342,7 @@ categorized AS ( a._usecode = '1410' OR a._usecode = '1400' THEN 'PROPERTY WITH RESIDENTIAL USE' END) AS excatdesc - FROM normed_name_merge AS a + FROM usetype_mapped AS a ) -- Reorder columns for output