From 8a5c261faf04e57fc5a4e8d2b6671201a0ade3d5 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Thu, 19 Dec 2024 14:29:34 -0800 Subject: [PATCH 1/4] fix #210 - strip dst_crs option from cli and always request EPSG:3005 - use some other tool to reproject the data --- README.md | 64 ++++++++++++--------------------------------- src/bcdata/bc2pg.py | 5 ++-- src/bcdata/cli.py | 11 -------- src/bcdata/wfs.py | 10 +++---- tests/test_wfs.py | 16 ++++++------ 5 files changed, 29 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 37751dc..e4c5791 100644 --- a/README.md +++ b/README.md @@ -16,26 +16,18 @@ Note that this tool depends on BC Open Geospatial Web Services API at [openmaps. **Disclaimer** *It is the user's responsibility to check the licensing for any downloads, data are generally licensed as [OGL-BC](http://www2.gov.bc.ca/gov/content/governments/about-the-bc-government/databc/open-data/open-government-license-bc)* -## Dependencies and installation +## Installation and dependencies -`bcdata` requires `gdal`. If working with an OS such as linux or MacOS, with `gdal` already installed to a known location, install with `pip`: +Install with `pip`: $ pip install bcdata -Alternatively, `conda` can be used to install/manage the required dependencies as per the [GeoPandas guide](https://geopandas.org/en/stable/getting_started/install.html): - - $ conda create --name bcdataenv - $ conda activate bcdataenv - $ conda config --env --add channels conda-forge - $ conda config --env --set channel_priority strict - $ conda install python=3 geopandas - $ conda install rasterio - $ pip install bcdata +Note that `bcdata` requires `gdal` to already be installed to a known location. +If this is not the case on your system, the [GeoPandas guide](https://geopandas.org/en/stable/getting_started/install.html) is useful for setting up a stand alone environment with `conda`. +## Configuration -### Configuration - -#### Default PostgreSQL database +### Default PostgreSQL database The default target database connection (used by `bc2pg`) can be set via the `DATABASE_URL` environment variable (the password parameter should not be required if using a [.pgpass file](https://www.postgresql.org/docs/current/libpq-pgpass.html)) @@ -44,7 +36,7 @@ Linux/Mac: `export DATABASE_URL=postgresql://{username}:{password}@{hostname}:{p Windows: `SET DATABASE_URL=postgresql://{username}:{password}@{hostname}:{port}/{database}` -#### Layer list / layer schema cache +### Layer list / layer schema cache To reduce the volume of requests, information about data requested is cached locally. Schemas of individual layers that have previously been requested are cached with the cache file name matching the object/table name. @@ -91,7 +83,6 @@ AERODROME_STATUS AIRCRAFT_ACCESS_IND AIRPORT_NAME ``` ### CLI - Commands available via the bcdata command line interface are documented with the --help option ``` @@ -167,7 +158,6 @@ Options: CRS of provided bounds --indent INTEGER Indentation level for JSON output --compact / --not-compact Use compact separators (',', ':'). - --dst-crs, --dst_crs TEXT Destination CRS -s, --sortby TEXT Name of sort field -l, --lowercase Write column/properties names as lowercase -m, --promote-to-multi Promote features to multipart @@ -190,7 +180,6 @@ Options: --bounds TEXT Bounds: "left bottom right top" or "[left, bottom, right, top]". Coordinates are BC Albers (default) or --bounds_crs [required] - --dst-crs TEXT CRS of output file --bounds-crs TEXT CRS of provided bounds -r, --resolution INTEGER -a, --align Align provided bounds to provincial standard @@ -319,29 +308,19 @@ The JSON output can be manipulated with [jq](https://stedolan.github.io/jq/). Fo etc... } -Dump data to geojson ([`EPSG:4326` only](https://tools.ietf.org/html/rfc7946#section-4)): +Dump data as supplied by server (BC Albers): $ bcdata dump bc-airports > bc-airports.geojson -Get a single feature and send it to geojsonio (requires [geojson-cli](https://github.com/mapbox/geojsonio-cli)). Note the double quotes required around a CQL FILTER provided to the `--query` option. - - $ bcdata dump \ - WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW \ - --query "AIRPORT_NAME='Terrace (Northwest Regional) Airport'" \ - | geojsonio - -Save a layer to a geopackage in BC Albers: - - $ bcdata cat bc-airports --dst-crs EPSG:3005 \ - | fio collect \ - | fio load -f GPKG --dst-crs EPSG:3005 airports.gpkg +Dump data to geojson that conforms to [RFC7946](https://tools.ietf.org/html/rfc7946#section-4)): -Note that this will not work if the source data has mixed geometry types. + $ bcdata dump bc-airports | \ + ogr2ogr -f GeoJSON -lco RFC7946=YES /vsistdout/ /vsistdin/ > bc-airports-4326.geojson -Load to parquet on s3: +Dump to Parquet on S3: - $ bcdata dump bc-airports \ - | ogr2ogr -f Parquet /vsis3/$BUCKET/airports.parquet -s_srs EPSG:4326 -t_srs EPSG:3005 /vsistdin/ + $ bcdata dump bc-airports | \ + ogr2ogr -f Parquet /vsis3/$BUCKET/airports.parquet /vsistdin/ Load data to postgres and run a spatial query: @@ -366,19 +345,8 @@ Load data to postgres and run a spatial query: ## Projections / CRS -**CLI** - -`bcdata dump` returns GeoJSON in WGS84 (`EPSG:4326`). - -`bcdata cat` provides the `--dst-crs` option, use any CRS the WFS server supports. - -`bcdata bc2pg` loads data to PostgreSQL in BC Albers (`EPSG:3005`). - - -**Python module** - -`bcdata.get_data()` defaults to `EPSG:4236` but any CRS can be specified (that the server will accept). - +All data are as supplied from the server by default: BC Albers / `EPSG:3005`. +Use some other tool to reproject the data as required. ## Development and testing diff --git a/src/bcdata/bc2pg.py b/src/bcdata/bc2pg.py index 90c8a8d..e26d1a5 100644 --- a/src/bcdata/bc2pg.py +++ b/src/bcdata/bc2pg.py @@ -95,7 +95,7 @@ def bc2pg( # noqa: C901 # if geometry type is not provided, determine type by making the first request if not geometry_type: - df = WFS.request_features(url=urls[0], as_gdf=True, crs="epsg:3005", lowercase=True) + df = WFS.request_features(url=urls[0], as_gdf=True, lowercase=True) geometry_type = df.geom_type.unique()[0] # keep only the first type if numpy.any(df.has_z.unique()[0]): # geopandas does not include Z in geom_type string geometry_type = geometry_type + "Z" @@ -107,7 +107,6 @@ def bc2pg( # noqa: C901 df_temp = WFS.request_features( url=urls[-1], as_gdf=True, - crs="epsg:3005", lowercase=True, silent=True, ) @@ -158,7 +157,7 @@ def bc2pg( # noqa: C901 for n, url in enumerate(urls): # if first url not downloaded above when checking geom type, do now if df is None: - df = WFS.request_features(url=url, as_gdf=True, crs="epsg:3005", lowercase=True) + df = WFS.request_features(url=url, as_gdf=True, lowercase=True) # tidy the resulting dataframe df = df.rename_geometry("geom") # lowercasify diff --git a/src/bcdata/cli.py b/src/bcdata/cli.py index d9ff899..f0b213b 100644 --- a/src/bcdata/cli.py +++ b/src/bcdata/cli.py @@ -73,7 +73,6 @@ def bounds_handler(ctx, param, value): help='Bounds: "left bottom right top" or "[left, bottom, right, top]". Coordinates are BC Albers (default) or --bounds_crs', ) -dst_crs_opt = click.option("--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS") lowercase_opt = click.option( "--lowercase", "-l", is_flag=True, help="Write column/properties names as lowercase" @@ -141,11 +140,6 @@ def info(dataset, indent, meta_member, verbose, quiet): @cli.command() @click.option("--out_file", "-o", help="Output file", default="dem25.tif") @bounds_opt_dem -@click.option( - "--dst-crs", - help="CRS of output file", - default="EPSG:3005", -) @click.option( "--bounds-crs", help="CRS of provided bounds", @@ -169,7 +163,6 @@ def dem( bounds, bounds_crs, align, - dst_crs, out_file, resolution, interpolation, @@ -184,7 +177,6 @@ def dem( out_file=out_file, align=align, src_crs=bounds_crs, - dst_crs=dst_crs, resolution=resolution, interpolation=interpolation, ) @@ -276,7 +268,6 @@ def dump( ) @indent_opt @compact_opt -@dst_crs_opt @click.option("--sortby", "-s", help="Name of sort field") @lowercase_opt @click.option( @@ -296,7 +287,6 @@ def cat( bounds_crs, indent, compact, - dst_crs, sortby, lowercase, promote_to_multi, @@ -327,7 +317,6 @@ def cat( url=url, as_gdf=False, lowercase=lowercase, - crs=dst_crs, promote_to_multi=promote_to_multi, ) for feat in featurecollection["features"]: diff --git a/src/bcdata/wfs.py b/src/bcdata/wfs.py index f78ecce..abde127 100644 --- a/src/bcdata/wfs.py +++ b/src/bcdata/wfs.py @@ -358,7 +358,7 @@ def define_requests( "request": "GetFeature", "typeName": table, "outputFormat": "json", - "SRSNAME": "EPSG:4326", + "SRSNAME": "EPSG:3005", # just in case (this should always be the default) } if sortby: request["sortby"] = sortby.upper() @@ -384,7 +384,6 @@ def request_features( self, url, as_gdf=False, - crs="EPSG:4326", lowercase=False, promote_to_multi=False, ): @@ -395,9 +394,7 @@ def request_features( # load to gdf for reprojection/minor data cleaning if len(featurecollection["features"]) > 0: gdf = gpd.GeoDataFrame.from_features(featurecollection) - gdf = gdf.set_crs("EPSG:4326") - if crs != "EPSG:4326": - gdf = gdf.to_crs(crs) + gdf = gdf.set_crs("EPSG:3005") if gdf.geometry.name != "geometry": gdf = gdf.rename_geometry("geometry") if lowercase: @@ -416,7 +413,6 @@ def request_features( def get_data( dataset, query=None, - crs="epsg:4326", bounds=None, bounds_crs="epsg:3005", count=None, @@ -440,7 +436,7 @@ def get_data( for url in urls: results.append( WFS.request_features( - url, crs=crs, as_gdf=True, lowercase=lowercase, promote_to_multi=promote_to_multi + url, as_gdf=True, lowercase=lowercase, promote_to_multi=promote_to_multi ) ) if len(results) > 1: diff --git a/tests/test_wfs.py b/tests/test_wfs.py index 801b5db..d3ed20d 100644 --- a/tests/test_wfs.py +++ b/tests/test_wfs.py @@ -72,7 +72,7 @@ def test_get_data_asgdf(): def test_get_data_asgdf_crs(): - gdf = bcdata.get_data(UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True, crs="EPSG:3005") + gdf = bcdata.get_data(UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True) assert gdf.crs == "EPSG:3005" @@ -86,21 +86,21 @@ def test_get_data_small(): assert data["type"] == "FeatureCollection" +def test_get_data_count(): + data = bcdata.get_data(AIRPORTS_TABLE, count=100) + assert len(data["features"]) == 100 + + def test_get_data_lowercase(): - data = bcdata.get_data(AIRPORTS_TABLE, lowercase=True) + data = bcdata.get_data(AIRPORTS_TABLE, lowercase=True, count=1) assert "airport_name" in data["features"][0]["properties"].keys() def test_get_data_crs(): - data = bcdata.get_data(AIRPORTS_TABLE, crs="EPSG:3005") + data = bcdata.get_data(AIRPORTS_TABLE, count=1) assert data["crs"]["properties"]["name"] == "urn:ogc:def:crs:EPSG::3005" -def test_get_data_count(): - data = bcdata.get_data(AIRPORTS_TABLE, count=100) - assert len(data["features"]) == 100 - - # this presumes the page size will always be less than the total number of wells def test_get_data_paged_count(): wfs = bcdata.wfs.BCWFS() From 3166814a16aab5ec218dc1097629e9fcddac89c4 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Thu, 19 Dec 2024 14:31:11 -0800 Subject: [PATCH 2/4] remove pasted character --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e4c5791..9d34a60 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ Usage: bcdata dump [OPTIONS] DATASET $ bcdata dump bc-airports --query "AIRPORT_NAME='Victoria Harbour (Shoal Point) Heliport'" $ bcdata dump bc-airports --bounds xmin ymin xmax ymax - It can also be combined to read bounds of a feature dataset using Fiona:  + It can also be combined to read bounds of a feature dataset using Fiona: $ bcdata dump bc-airports --bounds $(fio info aoi.shp --bounds) Options: From 89cb6883db9fd82ca5a2c3e0f97395d97d3d84b3 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Thu, 19 Dec 2024 14:56:54 -0800 Subject: [PATCH 3/4] bump version and note removal of reprojection options --- CHANGES.txt | 4 ++++ src/bcdata/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5fd675c..b09ba4a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,10 @@ Changes ======= +1.0.0 () +------------------ +- use some other tool for reprojection - always request data in BC Albers and remove options for reprojection (#210) + 0.14.0 (2024-12-17) ------------------ - simplify WFS module, standardize cat/dump options diff --git a/src/bcdata/__init__.py b/src/bcdata/__init__.py index d6ec031..f5e62bd 100644 --- a/src/bcdata/__init__.py +++ b/src/bcdata/__init__.py @@ -22,4 +22,4 @@ raise Exception(f"Failed to download primary key database at {PRIMARY_KEY_DB_URL}") primary_keys = {} -__version__ = "0.14.0" +__version__ = "1.0.0dev0" From 41537caa2bf78455ff92fc918ce338c19ddfe97b Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Fri, 20 Dec 2024 13:17:42 -0800 Subject: [PATCH 4/4] call this v0.15.0 --- CHANGES.txt | 2 +- src/bcdata/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index b09ba4a..4696636 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,7 +1,7 @@ Changes ======= -1.0.0 () +0.15.0 (2024-12-20) ------------------ - use some other tool for reprojection - always request data in BC Albers and remove options for reprojection (#210) diff --git a/src/bcdata/__init__.py b/src/bcdata/__init__.py index f5e62bd..4b436e5 100644 --- a/src/bcdata/__init__.py +++ b/src/bcdata/__init__.py @@ -22,4 +22,4 @@ raise Exception(f"Failed to download primary key database at {PRIMARY_KEY_DB_URL}") primary_keys = {} -__version__ = "1.0.0dev0" +__version__ = "0.15.0"