diff --git a/.github/workflows/fast-build.yml b/.github/workflows/fast-build.yml new file mode 100644 index 0000000..020d2df --- /dev/null +++ b/.github/workflows/fast-build.yml @@ -0,0 +1,98 @@ +name: Fastbuild postgis images + +on: + push: + branches: + - "5-speed-up-builds" + pull_request: + branches: + - "5-speed-up-builds" + +concurrency: + group: ${{ github.head_ref || github.ref_name }} + cancel-in-progress: true + +env: + # dynamic doesn't work, only static + BUILD_NUM: $(date +%Y%m%d%H%M%S) + REPOSITORY: + TIGER_DOMAIN: clad-github-builder.rit.uw.edu + +jobs: + + clear-cache: + runs-on: self-hosted + + steps: + - name: Clear docker cache + run: docker system prune -af --volumes + - name: Delete docker volumes not included in system prune + run: docker volume prune -af + # run: docker buildx prune -a --verbose + - name: export BUILD_NUM to file + run: date +%Y%m%d%H%M%S > ~/BUILD_NUM + + + + build-degauss-foundry: + runs-on: self-hosted + needs: clear-cache + + steps: + - uses: actions/checkout@v5 + - name: import BUILD_NUM from file + run: echo "BUILD_NUM=$(cat ~/BUILD_NUM)" >> $GITHUB_ENV + - name: print env + run: env | sort + - name: Build the degauss-foundry image + run: docker build --load --cpu-quota $(( $(nproc --all)*100000 )) degauss-foundry --platform linux/amd64 --tag genoa-container-registry.washington.palantircloud.com/degauss-foundry:${{ env.BUILD_NUM }} + - name: Run Trivy CVE vulnerability scanner + uses: aquasecurity/trivy-action@0.33.1 + with: + image-ref: genoa-container-registry.washington.palantircloud.com/degauss-foundry:${{ env.BUILD_NUM }} + format: 'table' + exit-code: '1' + ignore-unfixed: true + timeout: '1h' + vuln-type: 'os,library' + severity: 'CRITICAL,HIGH,MEDIUM' + + setup-postgis-states: + runs-on: self-hosted + needs: clear-cache + outputs: + matrix: ${{ steps.matrix.outputs.value }} + + steps: + - id: matrix + run: | + echo 'value=["ak", "al", "ar", "az", "ca", "co", "ct", "dc", "de", "fl", "ga", "hi", "ia", "id", "il", "in", "ks", "ky", "la", "ma", "md", "me", "mi", "mn", "mo", "ms", "mt", "nc", "nd", "ne", "nh", "nj", "nm", "nv", "ny", "oh", "ok", "or", "pa", "ri", "sc", "sd", "tn", "tx", "ut", "va", "vt", "wa", "wi", "wv", "wy", "pr"]' >> $GITHUB_OUTPUT + - run: | + echo "${{ steps.matrix.outputs.value }}" + + build-postgis-states: + runs-on: self-hosted + needs: setup-postgis-states + continue-on-error: true + strategy: + matrix: + value: ${{fromJSON(needs.setup-postgis-states.outputs.matrix)}} + + steps: + - uses: actions/checkout@v5 + - name: import BUILD_NUM from file + run: echo "BUILD_NUM=$(cat ~/BUILD_NUM)" >> $GITHUB_ENV + - name: print env + run: env | sort + - name: Build the postgis image + run: docker build --shm-size 4g --load --cpu-quota $(( $(nproc --all)*100000 )) --build-arg TIGER_DOMAIN=clad-github-builder.rit.uw.edu --build-arg state_var=${{ matrix.value }} uwpostgis-foundry --platform linux/amd64 --tag genoa-container-registry.washington.palantircloud.com/postgis-${{ matrix.value }}:${{ env.BUILD_NUM }} + - name: Run Trivy CVE vulnerability scanner + uses: aquasecurity/trivy-action@0.33.1 + with: + image-ref: genoa-container-registry.washington.palantircloud.com/postgis-${{ matrix.value }}:${{ env.BUILD_NUM }} + format: 'table' + exit-code: '1' + ignore-unfixed: true + timeout: '1h' + vuln-type: 'os,library' + severity: 'CRITICAL,HIGH,MEDIUM' diff --git a/degauss-foundry/Dockerfile b/degauss-foundry/Dockerfile index 19a7fb6..88bde6e 100644 --- a/degauss-foundry/Dockerfile +++ b/degauss-foundry/Dockerfile @@ -3,7 +3,7 @@ RUN apk upgrade --no-cache # DeGAUSS container metadata ENV degauss_name="geocoder" -ENV degauss_version="3.3.0" +ENV degauss_version="3.4.0" ENV degauss_description="geocodes" ENV degauss_argument="valid_geocode_score_threshold [default: 0.5]" diff --git a/degauss-foundry/entrypointCSV.R b/degauss-foundry/entrypoint.R old mode 100755 new mode 100644 similarity index 98% rename from degauss-foundry/entrypointCSV.R rename to degauss-foundry/entrypoint.R index 41835c3..9132750 --- a/degauss-foundry/entrypointCSV.R +++ b/degauss-foundry/entrypoint.R @@ -14,9 +14,7 @@ opt <- docopt::docopt(doc) if (is.null(opt$score_threshold)) opt$score_threshold <- 0.5 d <- readr::read_csv(opt$filename, show_col_types = FALSE) - -## 8/23 - FOR FOUNDRY PROCESS -# d <- readr::read_csv('/opt/palantir/sidecars/shared-volumes/shared/infile.csv') +# d <- readr::read_csv('test/my_address_file.csv') # d <- readr::read_csv('test/my_address_file_missing.csv') ## must contain character column called address @@ -160,4 +158,3 @@ if (opt$score_threshold != "all") { ) knitr::kable(geocode_summary %>% dplyr::select(geocode_result, `n (%)`)) } - diff --git a/degauss-foundry/entrypoint.py b/degauss-foundry/entrypoint.py index 7a0e861..33be44c 100644 --- a/degauss-foundry/entrypoint.py +++ b/degauss-foundry/entrypoint.py @@ -12,13 +12,11 @@ args = parser.parse_args() the_command = args.command.split(" ") - def run_process(exe): "Define a function for running commands and capturing stdout line by line" p = subprocess.Popen(exe, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) return iter(p.stdout.readline, b"") - start_flag_fname = "/opt/palantir/sidecars/shared-volumes/shared/start_flag" done_flag_fname = "/opt/palantir/sidecars/shared-volumes/shared/done_flag" close_flag_fname = "/opt/palantir/sidecars/shared-volumes/shared/close_flag" diff --git a/degauss-foundry/lib/geocoder/us.rb b/degauss-foundry/lib/geocoder/us.rb index a39bc81..a3904ba 100644 --- a/degauss-foundry/lib/geocoder/us.rb +++ b/degauss-foundry/lib/geocoder/us.rb @@ -7,7 +7,7 @@ # General usage is as follows: # # >> require 'geocoder/us' -# >> db = Geocoder::US::Database.new("/opt/tiger/geocoder.db") +# >> db = Geocoder::US::Database.new("/opt/geocoder.db") # >> p db.geocode("1600 Pennsylvania Av, Washington DC") # # [{:pretyp=>"", :street=>"Pennsylvania", :sufdir=>"NW", :zip=>"20502", diff --git a/degauss-foundry/lib/geocoder/us/database.rb b/degauss-foundry/lib/geocoder/us/database.rb index d7e44ac..141ecba 100644 --- a/degauss-foundry/lib/geocoder/us/database.rb +++ b/degauss-foundry/lib/geocoder/us/database.rb @@ -153,8 +153,7 @@ def execute_statement (st, *params) result = st.execute(*params) columns = result.columns.map {|c| c.to_sym} result.each {|row| - rows << Hash[*(columns.zip(row).flatten)]} - + rows << columns.zip(row).to_h} end if @debug runtime = format("%.3f", Time.now - start) diff --git a/degauss-foundry/renv.lock b/degauss-foundry/renv.lock deleted file mode 100644 index bc69a91..0000000 --- a/degauss-foundry/renv.lock +++ /dev/null @@ -1,593 +0,0 @@ -{ - "R": { - "Version": "4.2.2", - "Repositories": [ - { - "Name": "CRAN", - "URL": "https://cran.rstudio.com" - } - ] - }, - "Packages": { - "R6": { - "Package": "R6", - "Version": "2.5.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "470851b6d5d0ac559e9d01bb352b4021", - "Requirements": [] - }, - "bit": { - "Package": "bit", - "Version": "4.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "d242abec29412ce988848d0294b208fd", - "Requirements": [] - }, - "bit64": { - "Package": "bit64", - "Version": "4.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "9fe98599ca456d6552421db0d6772d8f", - "Requirements": [ - "bit" - ] - }, - "cachem": { - "Package": "cachem", - "Version": "1.0.5", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "5346f76a33eb7417812c270b04a5581b", - "Requirements": [ - "fastmap", - "rlang" - ] - }, - "cli": { - "Package": "cli", - "Version": "3.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "eb9fc121ad9a1075c471107ef185be46", - "Requirements": [] - }, - "clipr": { - "Package": "clipr", - "Version": "0.8.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "3f038e5ac7f41d4ac41ce658c85e3042", - "Requirements": [] - }, - "cpp11": { - "Package": "cpp11", - "Version": "0.4.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "ed588261931ee3be2c700d22e94a29ab", - "Requirements": [] - }, - "crayon": { - "Package": "crayon", - "Version": "1.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "e8a1e41acf02548751f45c718d55aa6a", - "Requirements": [] - }, - "dht": { - "Package": "dht", - "Version": "1.2.3", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteRepo": "dht", - "RemoteUsername": "degauss-org", - "RemoteRef": "HEAD", - "RemoteSha": "4d10437461416e688ea259a37c2ce3b5bae96238", - "Hash": "b16ae4484d2ef4a83738f6069c7868d3", - "Requirements": [ - "cli", - "dplyr", - "fs", - "glue", - "magrittr", - "prettyunits", - "ps", - "purrr", - "readr", - "stringr", - "tidyr", - "tidyselect", - "whisker", - "withr" - ] - }, - "digest": { - "Package": "digest", - "Version": "0.6.37", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "a0cbe758a531d054b537d16dff4d58a1", - "Requirements": [] - }, - "docopt": { - "Package": "docopt", - "Version": "0.7.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "e9eeef7931ee99ca0093f3f20b88e09b", - "Requirements": [] - }, - "dplyr": { - "Package": "dplyr", - "Version": "1.0.10", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "539412282059f7f0c07295723d23f987", - "Requirements": [ - "R6", - "generics", - "glue", - "lifecycle", - "magrittr", - "pillar", - "rlang", - "tibble", - "tidyselect", - "vctrs" - ] - }, - "ellipsis": { - "Package": "ellipsis", - "Version": "0.3.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077", - "Requirements": [ - "rlang" - ] - }, - "evaluate": { - "Package": "evaluate", - "Version": "0.14", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "ec8ca05cffcc70569eaaad8469d2a3a7", - "Requirements": [] - }, - "fansi": { - "Package": "fansi", - "Version": "1.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "83a8afdbe71839506baa9f90eebad7ec", - "Requirements": [] - }, - "fastmap": { - "Package": "fastmap", - "Version": "1.1.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "77bd60a6157420d4ffa93b27cf6a58b8", - "Requirements": [] - }, - "fs": { - "Package": "fs", - "Version": "1.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7c89603d81793f0d5486d91ab1fc6f1d", - "Requirements": [] - }, - "generics": { - "Package": "generics", - "Version": "0.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "15e9634c0fcd294799e9b2e929ed1b86", - "Requirements": [] - }, - "glue": { - "Package": "glue", - "Version": "1.6.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e", - "Requirements": [] - }, - "highr": { - "Package": "highr", - "Version": "0.8", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4dc5bb88961e347a0f4d8aad597cbfac", - "Requirements": [] - }, - "hms": { - "Package": "hms", - "Version": "1.1.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "41100392191e1244b887878b533eea91", - "Requirements": [ - "ellipsis", - "lifecycle", - "pkgconfig", - "rlang", - "vctrs" - ] - }, - "jsonlite": { - "Package": "jsonlite", - "Version": "1.7.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "98138e0994d41508c7a6b84a0600cfcb", - "Requirements": [] - }, - "knitr": { - "Package": "knitr", - "Version": "1.31", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c3994c036d19fc22c5e2a209c8298bfb", - "Requirements": [ - "evaluate", - "highr", - "markdown", - "stringr", - "xfun", - "yaml" - ] - }, - "lifecycle": { - "Package": "lifecycle", - "Version": "1.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "001cecbeac1cff9301bdc3775ee46a86", - "Requirements": [ - "cli", - "glue", - "rlang" - ] - }, - "magrittr": { - "Package": "magrittr", - "Version": "2.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7ce2733a9826b3aeb1775d56fd305472", - "Requirements": [] - }, - "mappp": { - "Package": "mappp", - "Version": "1.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "969ed5fab0c1752ddd0a43f789c9ac4e", - "Requirements": [ - "memoise", - "parallelly", - "pbmcapply", - "progress", - "purrr", - "rlang" - ] - }, - "markdown": { - "Package": "markdown", - "Version": "1.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "61e4a10781dd00d7d81dd06ca9b94e95", - "Requirements": [ - "mime", - "xfun" - ] - }, - "memoise": { - "Package": "memoise", - "Version": "2.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "a0bc51650201a56d00a4798523cc91b3", - "Requirements": [ - "cachem", - "rlang" - ] - }, - "mime": { - "Package": "mime", - "Version": "0.10", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "26fa77e707223e1ce042b2b5d09993dc", - "Requirements": [] - }, - "parallelly": { - "Package": "parallelly", - "Version": "1.25.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "bc53006c11a08ba955ccbf85b586875f", - "Requirements": [] - }, - "pbmcapply": { - "Package": "pbmcapply", - "Version": "1.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4332529a23c4995d743ef489be54a839", - "Requirements": [] - }, - "pillar": { - "Package": "pillar", - "Version": "1.8.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "f2316df30902c81729ae9de95ad5a608", - "Requirements": [ - "cli", - "fansi", - "glue", - "lifecycle", - "rlang", - "utf8", - "vctrs" - ] - }, - "pkgconfig": { - "Package": "pkgconfig", - "Version": "2.0.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "01f28d4278f15c76cddbea05899c5d6f", - "Requirements": [] - }, - "prettyunits": { - "Package": "prettyunits", - "Version": "1.1.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "95ef9167b75dde9d2ccc3c7528393e7e", - "Requirements": [] - }, - "progress": { - "Package": "progress", - "Version": "1.2.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061", - "Requirements": [ - "R6", - "crayon", - "hms", - "prettyunits" - ] - }, - "ps": { - "Package": "ps", - "Version": "1.7.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "68dd03d98a5efd1eb3012436de45ba83", - "Requirements": [] - }, - "purrr": { - "Package": "purrr", - "Version": "1.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "1ad491d27989ec6c26a2918ad6df116b", - "Requirements": [ - "cli", - "lifecycle", - "magrittr", - "rlang", - "vctrs" - ] - }, - "readr": { - "Package": "readr", - "Version": "2.1.3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "2dfbfc673ccb3de3d8836b4b3bd23d14", - "Requirements": [ - "R6", - "cli", - "clipr", - "cpp11", - "crayon", - "hms", - "lifecycle", - "rlang", - "tibble", - "tzdb", - "vroom" - ] - }, - "renv": { - "Package": "renv", - "Version": "0.15.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c1078316e1d4f70275fc1ea60c0bc431", - "Requirements": [] - }, - "rlang": { - "Package": "rlang", - "Version": "1.0.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "4ed1f8336c8d52c3e750adcdc57228a7", - "Requirements": [] - }, - "stringi": { - "Package": "stringi", - "Version": "1.8.4", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "a68b980681bcbc84c7a67003fa796bfb", - "Requirements": [] - }, - "stringr": { - "Package": "stringr", - "Version": "1.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "671a4d384ae9d32fc47a14e98bfa3dc8", - "Requirements": [ - "cli", - "glue", - "lifecycle", - "magrittr", - "rlang", - "stringi", - "vctrs" - ] - }, - "tibble": { - "Package": "tibble", - "Version": "3.1.8", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "56b6934ef0f8c68225949a8672fe1a8f", - "Requirements": [ - "fansi", - "lifecycle", - "magrittr", - "pillar", - "pkgconfig", - "rlang", - "vctrs" - ] - }, - "tidyr": { - "Package": "tidyr", - "Version": "1.2.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "cdb403db0de33ccd1b6f53b83736efa8", - "Requirements": [ - "cpp11", - "dplyr", - "ellipsis", - "glue", - "lifecycle", - "magrittr", - "purrr", - "rlang", - "tibble", - "tidyselect", - "vctrs" - ] - }, - "tidyselect": { - "Package": "tidyselect", - "Version": "1.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "79540e5fcd9e0435af547d885f184fd5", - "Requirements": [ - "cli", - "glue", - "lifecycle", - "rlang", - "vctrs", - "withr" - ] - }, - "tzdb": { - "Package": "tzdb", - "Version": "0.3.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e", - "Requirements": [ - "cpp11" - ] - }, - "utf8": { - "Package": "utf8", - "Version": "1.2.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c9c462b759a5cc844ae25b5942654d13", - "Requirements": [] - }, - "vctrs": { - "Package": "vctrs", - "Version": "0.5.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "970324f6572b4fd81db507b5d4062cb0", - "Requirements": [ - "cli", - "glue", - "lifecycle", - "rlang" - ] - }, - "vroom": { - "Package": "vroom", - "Version": "1.6.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "64f81fdead6e0d250fb041e175d123ab", - "Requirements": [ - "bit64", - "cli", - "cpp11", - "crayon", - "glue", - "hms", - "lifecycle", - "progress", - "rlang", - "tibble", - "tidyselect", - "tzdb", - "vctrs", - "withr" - ] - }, - "whisker": { - "Package": "whisker", - "Version": "0.4.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c6abfa47a46d281a7d5159d0a8891e88", - "Requirements": [] - }, - "withr": { - "Package": "withr", - "Version": "2.5.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "c0e49a9760983e81e55cdd9be92e7182", - "Requirements": [] - }, - "xfun": { - "Package": "xfun", - "Version": "0.23", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "791a57f43c887111490851dcd166d344", - "Requirements": [] - }, - "yaml": { - "Package": "yaml", - "Version": "2.2.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "2826c5d9efb0a88f657c7a679c7106db", - "Requirements": [] - } - } -} diff --git a/degauss-foundry/src/liblwgeom/vsprintf.c b/degauss-foundry/src/liblwgeom/vsprintf.c index e4c3dfd..abad18c 100644 --- a/degauss-foundry/src/liblwgeom/vsprintf.c +++ b/degauss-foundry/src/liblwgeom/vsprintf.c @@ -147,9 +147,7 @@ lw_vasprintf (result, format, args) va_list temp; /* Use va_copy for compatibility with both 32 and 64 bit args */ - // __va_copy(temp, args); - - va_copy(temp, args); + __va_copy(temp, args); return int_vasprintf (result, format, &temp); } diff --git a/test/address_test.csv b/test/address_test.csv new file mode 100644 index 0000000..57e830a --- /dev/null +++ b/test/address_test.csv @@ -0,0 +1,202 @@ +id,address +55000100280,"2854 ROSEANN LN GREEN TOWNSHIP, OH 45239" +9800060045,"407 SOUTHVIEW AV CINCINNATI, OH 45219" +59100090241,"909 GRETNA LN FOREST PARK, OH 45240" +55001310120,"P.O. BOX 12345 GREEN TOWNSHIP, OH 45238" +4100010061,"PO 12345 CINCINNATI, OH 45208" +9000010083,"3333 BURNET AVE CINCINNATI, OH 45229" +9000010086,"3333 BURNET AVE SYRACUSE, NY 13206" +21600460082,"222 E CENTRAL PKWY CINCINNATI, OH 45202" +67100240229,foreign +2800040021,verify +59004140168,unknown +50002200628, +67100040004,NA +5100020177,"4506 CAMBERWELL RD CINCINNATI, OH 45209" +55000100212,"5585 FAIRWOOD RD GREEN TOWNSHIP, OH 45239" +51000810328,"6628 JULY CT COLERAIN TOWNSHIP, OH 45239" +61201400371,"5126 BRASHER AV BLUE ASH, OH 45242" +19200650054,"3708 TAPPAN AV CINCINNATI, OH 45223" +60000110098,"11924 7TH AV SYCAMORE TOWNSHIP, OH 45249" +60001000117,"7401 KENNEDY LN SYCAMORE TOWNSHIP, OH 45242" +50002700262,"7085 JEANNIE AV ANDERSON TOWNSHIP, OH 45230" +60002110309,"6120 ST REGIS DR SYCAMORE TOWNSHIP, OH 45236" +22100190292,"4162 CHAMBERS ST CINCINNATI, OH 45223" +61201110164,"17 CARPENTERS RIDGE RD BLUE ASH, OH 45241" +52500160078,"6639 APACHE CIR MADEIRA, OH 45243" +50003700268,"6296 CRITTENDEN DR ANDERSON TOWNSHIP, OH 45244" +17900770440,"1029 RUTLEDGE AV CINCINNATI, OH 45205" +60002310072,"8462 WICKLOW AV SYCAMORE TOWNSHIP, OH 45236" +61200600204,"9354 FLORAL AV BLUE ASH, OH 45242" +51000710151,"8145 BLANCHETTA DR COLERAIN TOWNSHIP, OH 45239" +52700100025,"6827 MT VERNON AV MARIEMONT, OH 45227" +22500030012,"2385 MONTANA AV CINCINNATI, OH 45211" +51000700019,"3053 BANNING RD COLERAIN TOWNSHIP, OH 45239" +65100520093,"4228 FRANKLIN AV NORWOOD, OH 45212" +14900120025,"2388 MARYLAND AV CINCINNATI, OH 45204" +59500020243,"1532 SOUTHRIDGE LN NORTH COLLEGE HILL, OH 45231" +60800100075,"4104 CARRIAGELITE DR SHARONVILLE, OH 45241" +51001010270,"3657 VERNIER DR COLERAIN TOWNSHIP, OH 45251" +2200040012,"1307 DILLON AV CINCINNATI, OH 45208" +55000640056,"4370 AIRYMONT CT GREEN TOWNSHIP, OH 45211" +59600100129,"11 ALBION LN GLENDALE, OH 45246" +67100160157,"2152 BOLSER DR READING, OH 45215" +54001200027,"6513 RAPID RUN RD DELHI TOWNSHIP, OH 45233" +60000930179,"5849 BAYBERRY DR SYCAMORE TOWNSHIP, OH 45242" +02040A150084,"1628 MINION AV CINCINNATI, OH 45205" +5400040073,"3329 GRAYDON AV CINCINNATI, OH 45207" +59400100515,"955 SCHUMARD AV LINCOLN HEIGHTS, OH 45215" +60200080044,"4229 SIBLEY AV SILVERTON, OH 45236" +51001010052,"3630 POOLE RD COLERAIN TOWNSHIP, OH 45251" +9500050041,"2131 VINE ST CINCINNATI, OH 45202" +4100010010,"3611 SHAW AV CINCINNATI, OH 45208" +51000410420,"9982 DUNRAVEN DR COLERAIN TOWNSHIP, OH 45251" +24700060285,"1135 WIONNA AV CINCINNATI, OH 45224" +61201500517,"10960 E ALLENHURST BLVD BLUE ASH, OH 45241" +59002310126,"1119 GARNOA DR SPRINGFIELD TOWNSHIP, OH 45231" +3700030043,"6406 ROE ST CINCINNATI, OH 45227" +59500080299,"1947 STERLING AV NORTH COLLEGE HILL, OH 45239" +11500070009,"3989 BEECHWOOD AV CINCINNATI, OH 45229" +55001610086,"5921 WERK RD GREEN TOWNSHIP, OH 45248" +13100070229,"1922 CATALINA AV CINCINNATI, OH 45237" +5800010049,"3518 HUDSON AV CINCINNATI, OH 45207" +51000240123,"2740 GREENBROOK LN COLERAIN TOWNSHIP, OH 45251" +55002010182,"5979 GAINES RD GREEN TOWNSHIP, OH 45247" +54000600136,"5358 LILIBET CT DELHI TOWNSHIP, OH 45238" +17900750537,"1292 MCKEONE AV CINCINNATI, OH 45205" +1900020125,"1338 HERLIN PL CINCINNATI, OH 45208" +50002810394,"6996 QUEENSWAY LN ANDERSON TOWNSHIP, OH 45230" +52100020167,"2 MELISSA CT TERRACE PARK, OH 45174" +24500060047,"28 WOODSDALE AV CINCINNATI, OH 45216" +57000500301,"7841 SURREYWOOD DR MIAMI TOWNSHIP, OH 45052" +55000410311,"5286 SIDNEY RD GREEN TOWNSHIP, OH 45238" +52002140068,"5731 MONNING PL COLUMBIA TOWNSHIP, OH 45227" +59002820347,"12065 HAZELHURST LN SPRINGFIELD TOWNSHIP, OH 45240" +100050097,"6254 CRESTVIEW PL CINCINNATI, OH 45230" +54000330120,"5056 FRANCISVIEW DR DELHI TOWNSHIP, OH 45238" +18000800026,"4736 GLENWAY AV CINCINNATI, OH 45238" +51000320424,"2609 MERRITTVIEW LN COLERAIN TOWNSHIP, OH 45231" +59900700299,"850 CLEARFIELD LN SPRINGDALE, OH 45240" +24800010022,"3020 WEST TOWER AV CINCINNATI, OH 45238" +17900780057,"4056 VINEDALE AV CINCINNATI, OH 45205" +52001720079,"6831 WINDWARD AV COLUMBIA TOWNSHIP, OH 45227" +17900750214,"1048 SUNSET AV CINCINNATI, OH 45205" +11900010082,"1739 NORTHCUTT AV CINCINNATI, OH 45237" +55002430066,"6673 POWNER FARM DR GREEN TOWNSHIP, OH 45248" +60800050170,"3511 BEEKLEY WOODS DR SHARONVILLE, OH 45241" +7200020282,"1227 MARTIN DR CINCINNATI, OH 45202" +8800070090,"246 EARNSHAW AV CINCINNATI, OH 45219" +62902310012,"9400 CUNNINGHAM RD INDIAN HILL, OH 45243" +4000020104,"3751 HYDE PARK AV CINCINNATI, OH 45209" +54000400362,"4993 DUEBBER DR DELHI TOWNSHIP, OH 45238" +61201120040,"5 MUIRFIELD LN BLUE ASH, OH 45241" +55001810077,"4211 RACE RD GREEN TOWNSHIP, OH 45211" +50100100063,"5027 VILLAGE DR NEWTOWN, OH 45244" +60800300205,"5089 LORD ALFRED CT SHARONVILLE, OH 45241" +59100080228,"11754 HAMLET RD FOREST PARK, OH 45240" +17900750148,"1236 SLIKER AV CINCINNATI, OH 45205" +51000810258,"3257 BLUEACRES DR COLERAIN TOWNSHIP, OH 45239" +4200020020,"3549 BURCH AV CINCINNATI, OH 45208" +57000100143,"7554 BUFFALO RIDGE RD MIAMI TOWNSHIP, OH 45002" +59100030081,"510 BRUNSWICK DR FOREST PARK, OH 45240" +62100080047,"10 HIGHRIDGE DR LOVELAND, OH 45140" +3600040321,"4514 WHETSEL AV CINCINNATI, OH 45227" +57001700095,"3139 BRUNSMAN WY MIAMI TOWNSHIP, OH 45052" +62100070171,"509 NAVAHO DR LOVELAND, OH 45140" +59003930243,"10572 FARMHILL CT SPRINGFIELD TOWNSHIP, OH 45231" +55002320114,"6268 SPRINGMYER DR GREEN TOWNSHIP, OH 45248" +57000200016,"7547 BUFFALO RIDGE RD MIAMI TOWNSHIP, OH 45002" +55000100575,"3194 BALSAMRIDGE DR GREEN TOWNSHIP, OH 45239" +59100090432,"11528 GAFFNEY PL FOREST PARK, OH 45240" +61100110179,"10261 FALLING WATERS LN EVENDALE, OH 45241" +51000530339,"2595 CORNWALL DR COLERAIN TOWNSHIP, OH 45231" +51000730015,"7181 PIPPIN RD COLERAIN TOWNSHIP, OH 45239" +61100800227,"9636 REXFORD DR EVENDALE, OH 45241" +2400030126,"4456 EASTERN AV CINCINNATI, OH 45226" +60800070174,"10945 CONESTOGA CT SHARONVILLE, OH 45241" +11200020024,"630 GREENWOOD AV CINCINNATI, OH 45229" +51000740315,"6691 ACRE DR COLERAIN TOWNSHIP, OH 45239" +20200320176,"1553 KNOX ST CINCINNATI, OH 45214" +55000410216,"5153 RALPH AV GREEN TOWNSHIP, OH 45238" +24700050244,"7921 CHERRYWOOD CT CINCINNATI, OH 45224" +55002320075,"4019 EBENEZER RD GREEN TOWNSHIP, OH 45248" +59003910105,"1882 ROOSEVELT AV SPRINGFIELD TOWNSHIP, OH 45240" +23200020096,"6207 SAVANNAH AV CINCINNATI, OH 45224" +58200070157,"111 BANK AV SAINT BERNARD, OH 45217" +11400020023,"3895 SPRING HOUSE LN CINCINNATI, OH 45217" +59003710280,"9386 MONTORO DR SPRINGFIELD TOWNSHIP, OH 45231" +60800010039,"3307 HAGEMAN AV SHARONVILLE, OH 45241" +59002410156,"904 W MC KELVEY RD SPRINGFIELD TOWNSHIP, OH 45231" +52001000061,"4342 ASHLEY OAKS DR COLUMBIA TOWNSHIP, OH 45227" +20400170164,"1842 WYOMING AV CINCINNATI, OH 45205" +59700800005,"2 BRADNOR PL GREENHILLS, OH 45218" +58200130279,"90 ANGELS WY SAINT BERNARD, OH 45217" +60000111194,"11961 1ST AV SYCAMORE TOWNSHIP, OH 45249" +9800040154,"518 FORTUNE AV CINCINNATI, OH 45219" +21200620068,"3201 STANHOPE AV CINCINNATI, OH 45211" +65100410066,"3917 CATHERINE AV NORWOOD, OH 45212" +56000900081,"9960 CAROLINA TRACE RD HARRISON TOWNSHIP, OH 45030" +59600020095,"332 E SHARON RD GLENDALE, OH 45246" +53001000019,"10101 HAMILTON CLEVES RD CROSBY TOWNSHIP, OH 45030" +55000820460,"6076 LAGRANGE LN GREEN TOWNSHIP, OH 45239" +67100260206,"1325 FUHRMAN RD READING, OH 45215" +60300250056,"8831 WELLERSTATION DR MONTGOMERY, OH 45249" +55100130024,"4222 CHURCHVIEW LN CHEVIOT, OH 45211" +55100070083,"4054 HARDING AV CHEVIOT, OH 45211" +51000420446,"2916 WILLOW RIDGE DR COLERAIN TOWNSHIP, OH 45251" +59002500206,"10506 HADLEY RD SPRINGFIELD TOWNSHIP, OH 45218" +54001200680,"905 BRAEMORE LN DELHI TOWNSHIP, OH 45233" +54000420149,"335 GLEN OAKS DR DELHI TOWNSHIP, OH 45238" +21000730135,"3071 PENROSE PL CINCINNATI, OH 45211" +56100040040,"304 BROOKS LN HARRISON, OH 45030" +65100080039,"5114 GLOBE AV NORWOOD, OH 45212" +20200400358,"1637 PULTE ST CINCINNATI, OH 45225" +60300050094,"8001 MONTE DR MONTGOMERY, OH 45242" +50100130018,"3380 IVY HILLS BLVD NEWTOWN, OH 45244" +16700020157,"7411 GRACELY DR CINCINNATI, OH 45233" +65100400026,"3945 FOREST AV NORWOOD, OH 45212" +65100410095,"3905 AVILLA PL NORWOOD, OH 45212" +21200670036,"3324 ROBINET DR CINCINNATI, OH 45238" +57200070231,"23 TIMBERLINE CT CLEVES, OH 45002" +62002010167,"8524 WHISPERWOODS LN SYMMES TOWNSHIP, OH 45249" +54000630102,"775 IVYHILL DR DELHI TOWNSHIP, OH 45238" +65100110162,"2619 SHERIDAN DR NORWOOD, OH 45212" +52700200252,"3832 INDIANVIEW AV MARIEMONT, OH 45227" +20300300018,"1655 WAVERLY AV CINCINNATI, OH 45214" +50000420115,"1692 TONOPAH DR ANDERSON TOWNSHIP, OH 45255" +65100040079,"5621 ALVINA AV NORWOOD, OH 45212" +52500250067,"6029 CHEROKEE DR MADEIRA, OH 45243" +100040270,"2085 AUTUMNHILL CT CINCINNATI, OH 45230" +51000110447,"2537 RETFORD DR COLERAIN TOWNSHIP, OH 45231" +55001130165,"4072 SIMCA LN GREEN TOWNSHIP, OH 45211" +5500060041,"2922 HACKBERRY ST CINCINNATI, OH 45206" +55000620224,"3490 CENTURION DR GREEN TOWNSHIP, OH 45211" +50001440012,"8103 CABINET CIR ANDERSON TOWNSHIP, OH 45244" +59002230110,"8774 CONSTANCE LN SPRINGFIELD TOWNSHIP, OH 45231" +60002320132,"4028 LIMERICK AV SYCAMORE TOWNSHIP, OH 45236" +61201700308,"11174 JARDIN PL BLUE ASH, OH 45241" +52001710431,"6634 MURRAY AV COLUMBIA TOWNSHIP, OH 45227" +60002000130,"4133 TREBOR DR SYCAMORE TOWNSHIP, OH 45236" +60800320302,"5462 VICTORIAN WY SHARONVILLE, OH 45241" +55002520302,"6215 KINGOAK DR GREEN TOWNSHIP, OH 45248" +200030031,"6522 WALDORF PL CINCINNATI, OH 45230" +60300010229,"80 WEST ST MONTGOMERY, OH 45242" +51001110141,"9858 CAPSTAN DR COLERAIN TOWNSHIP, OH 45251" +52601100145,"5178 ROLLMAN ESTATES DR AMBERLEY VILLAGE, OH 45236" +51000440160,"10056 STURGEON LN COLERAIN TOWNSHIP, OH 45251" +62100210082,"106 PHEASANT WOODS CT LOVELAND, OH 45140" +52500240012,"5687 WHETSEL AV MADEIRA, OH 45227" +11500010197,"3876 VINE ST CINCINNATI, OH 45217" +56100070246,"143 FLINTSTONE DR HARRISON, OH 45030" +59900420081,"341 CHERRY ST SPRINGDALE, OH 45246" +59100120068,"11547 ISLANDALE DR FOREST PARK, OH 45240" +60000140255,"12001 STILLWIND DR SYCAMORE TOWNSHIP, OH 45249" +24500010135,"8423 DIXIE AV CINCINNATI, OH 45216" +9400050062,"150 MULBERRY ST CINCINNATI, OH 45202" +12100020081,"5649 LAWNDALE PL CINCINNATI, OH 45212" +55002430015,"6595 BRIDGETOWN RD GREEN TOWNSHIP, OH 45248" +59004120073,"1696 CLAYBURN CIR SPRINGFIELD TOWNSHIP, OH 45240" +21100680080,"3114 MANNING AV CINCINNATI, OH 45211" +61200800338,"9500 RAVEN LN BLUE ASH, OH 45242" +59001801015,"9620 PEPPER CIR SPRINGFIELD TOWNSHIP, OH 45231" +55000610256,"4331 BOUDINOT AV GREEN TOWNSHIP, OH 45211" \ No newline at end of file diff --git a/uwpostgis-foundry/Dockerfile b/uwpostgis-foundry/Dockerfile index dd2f212..2427e3f 100644 --- a/uwpostgis-foundry/Dockerfile +++ b/uwpostgis-foundry/Dockerfile @@ -1,39 +1,37 @@ -FROM postgis/postgis:16-3.5-alpine AS buildtime_init_builder +# https://stackoverflow.com/questions/34751814/build-postgres-docker-container-with-initial-schema +# Initialize the database during the build +# PART 1 +FROM postgis/postgis:17-3.5-alpine AS db_creator -ARG state_var +# copy statics +RUN mkdir -m 777 /gisdata +COPY ./src/alpinedb/download_national.sh . +COPY ./src/alpinedb/download_state.sh . +COPY ./src/alpinedb/1-load_data.sh /docker-entrypoint-initdb.d/ + +# national data first +# build vars ARG TIGER_DOMAIN ARG POSTGRES_DB=geocoder ARG POSTGRES_USER=clad_svc ARG POSTGRES_PASSWORD=not_on_gitlab ARG GEOCODER_YEAR=2020 -ENV STATES=${state_var} +# runtime vars ENV TIGER_DOMAIN=${TIGER_DOMAIN} ENV PGDATA=/pgdata SHELL ["/bin/bash", "-c"] - -RUN apk add wget unzip postgis libintl -# Make data dir -RUN mkdir -p /gisdata \ - && chmod 777 -R /gisdata - -# download national data first -COPY ./src/alpinedb/download_national.sh . -RUN ./download_national.sh - -#download state data in separate layer -COPY ./src/alpinedb/download_state.sh . -COPY ./src/alpinedb/1-load_data.sh /docker-entrypoint-initdb.d/ +RUN apk add libintl postgis unzip wget RUN echo "docker_temp_server_stop && exit 0" > /docker-entrypoint-initdb.d/900-exit_before_boot.sh +RUN ./download_national.sh -# force build to not use cache from a previous state(s) build: -ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache +# state data second +# ARG forces build to not use cache from a previous state(s) build: +ARG state_var +ENV STATES=${state_var} RUN ./download_state.sh -# https://stackoverflow.com/questions/34751814/build-postgres-docker-container-with-initial-schema -# Initialize the database during the build -# PART 1 RUN bash -x docker-entrypoint.sh postgres FROM alpine:latest AS buildtime_init @@ -78,6 +76,7 @@ RUN python3 -m pip freeze | sed 's/--.*//' | xargs python3 -m pip install --upgr SHELL ["/bin/bash", "-c"] # Foundry customizations +RUN adduser --uid 5001 --disabled-password user RUN mkdir -p /opt/palantir/sidecars/shared-volumes/shared/ RUN chown 5001 /opt/palantir/sidecars/shared-volumes/shared/ ENV SHARED_DIR=/opt/palantir/sidecars/shared-volumes/shared @@ -91,21 +90,10 @@ RUN /foundry_venv/bin/pip freeze | sed 's/--.*//' | xargs /foundry_venv/bin/pip ADD entrypoint.py /usr/bin/entrypoint RUN chmod +x /usr/bin/entrypoint -COPY process_csv.py ./ +# copy database from db_creator +COPY --chmod=700 --from=db_creator /pgdata /pgdata +RUN chown -R 5001:5001 /tmp /var/run/postgresql /pgdata WORKDIR /opt/palantir/sidecars/shared-volumes/shared/ - -RUN chown -R 5001:5001 /tmp /foundry_venv /var/run/postgresql - -# https://stackoverflow.com/questions/34751814/build-postgres-docker-container-with-initial-schema -# Initialize the database during the build -# PART 2 -## copy database from initialization layer -RUN mkdir -m 700 /pgdata -RUN chown 5001:5001 /pgdata -COPY --chown=5001:5001 --chmod=700 --from=buildtime_init_builder /pgdata /pgdata -ENV PGDATA=/pgdata - USER 5001 - -ENTRYPOINT entrypoint -c "/foundry_venv/bin/python3 /process_csv.py" +ENTRYPOINT entrypoint -c "/process_csv.py" diff --git a/uwpostgis-foundry/src/alpinedb/download_state.sh b/uwpostgis-foundry/src/alpinedb/download_state.sh index 6cb6077..e078a9d 100755 --- a/uwpostgis-foundry/src/alpinedb/download_state.sh +++ b/uwpostgis-foundry/src/alpinedb/download_state.sh @@ -12,6 +12,7 @@ usage(){ [[ -z ${YEAR} ]] && YEAR="2020" [[ -z ${GISDATA} ]] && GISDATA="/tmp" #"/gisdata" [[ -z ${OUTDIR} ]] && OUTDIR="${GISDATA}/gisdata/" +[[ -z ${DOWNLOADTOOL} ]] && DOWNLOADTOOL="wget --no-check-certificate --mirror --reject=html --no-verbose " [[ -z ${UNZIPTOOL} ]] && UNZIPTOOL=unzip while getopts "d:s:y:?" arg; do @@ -73,7 +74,7 @@ load_state_data () { for i in "${files[@]}" do - wget $BASEURL/ADDR/$i --no-verbose --mirror + ${DOWNLOADTOOL} $BASEURL/ADDR/$i done cd $GISDATA/$BASEPATH/ADDR @@ -87,7 +88,7 @@ load_state_data () { # Place ############# cd $GISDATA - wget $BASEURL/PLACE/tl_${YEAR}_${FIPS}_place.zip --mirror --reject=html --no-verbose + ${DOWNLOADTOOL} $BASEURL/PLACE/tl_${YEAR}_${FIPS}_place.zip cd $GISDATA/$BASEPATH/PLACE for z in tl_${YEAR}_${FIPS}*_place.zip ; @@ -99,7 +100,7 @@ load_state_data () { # Cousub ############# cd $GISDATA - wget $BASEURL/COUSUB/tl_${YEAR}_${FIPS}_cousub.zip --mirror --reject=html --no-verbose + ${DOWNLOADTOOL} $BASEURL/COUSUB/tl_${YEAR}_${FIPS}_cousub.zip cd $GISDATA/$BASEPATH/COUSUB for z in tl_${YEAR}_${FIPS}*_cousub.zip ; @@ -111,7 +112,7 @@ load_state_data () { # Tract ############# cd $GISDATA - wget $BASEURL/TRACT/tl_${YEAR}_${FIPS}_tract.zip --mirror --reject=html --no-verbose + ${DOWNLOADTOOL} $BASEURL/TRACT/tl_${YEAR}_${FIPS}_tract.zip cd $GISDATA/$BASEPATH/TRACT for z in tl_${YEAR}_${FIPS}*_tract.zip ; @@ -127,7 +128,7 @@ load_state_data () { for i in "${files[@]}" do - wget $BASEURL/FACES/$i --no-verbose --mirror + ${DOWNLOADTOOL} $BASEURL/FACES/$i done cd $GISDATA/$BASEPATH/FACES/ @@ -145,7 +146,7 @@ load_state_data () { for i in "${files[@]}" do - wget $BASEURL/FEATNAMES/$i --no-verbose --mirror + ${DOWNLOADTOOL} $BASEURL/FEATNAMES/$i done cd $GISDATA/$BASEPATH/FEATNAMES/ @@ -164,7 +165,7 @@ load_state_data () { for i in "${files[@]}" do - wget $BASEURL/EDGES/$i --no-verbose --mirror + ${DOWNLOADTOOL} $BASEURL/EDGES/$i done cd $GISDATA/$BASEPATH/EDGES