diff --git a/CHANGES.txt b/CHANGES.txt index 5fd675c..4696636 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,10 @@ Changes ======= +0.15.0 (2024-12-20) +------------------ +- use some other tool for reprojection - always request data in BC Albers and remove options for reprojection (#210) + 0.14.0 (2024-12-17) ------------------ - simplify WFS module, standardize cat/dump options diff --git a/README.md b/README.md index 37751dc..9d34a60 100644 --- a/README.md +++ b/README.md @@ -16,26 +16,18 @@ Note that this tool depends on BC Open Geospatial Web Services API at [openmaps. **Disclaimer** *It is the user's responsibility to check the licensing for any downloads, data are generally licensed as [OGL-BC](http://www2.gov.bc.ca/gov/content/governments/about-the-bc-government/databc/open-data/open-government-license-bc)* -## Dependencies and installation +## Installation and dependencies -`bcdata` requires `gdal`. If working with an OS such as linux or MacOS, with `gdal` already installed to a known location, install with `pip`: +Install with `pip`: $ pip install bcdata -Alternatively, `conda` can be used to install/manage the required dependencies as per the [GeoPandas guide](https://geopandas.org/en/stable/getting_started/install.html): - - $ conda create --name bcdataenv - $ conda activate bcdataenv - $ conda config --env --add channels conda-forge - $ conda config --env --set channel_priority strict - $ conda install python=3 geopandas - $ conda install rasterio - $ pip install bcdata +Note that `bcdata` requires `gdal` to already be installed to a known location. +If this is not the case on your system, the [GeoPandas guide](https://geopandas.org/en/stable/getting_started/install.html) is useful for setting up a stand alone environment with `conda`. +## Configuration -### Configuration - -#### Default PostgreSQL database +### Default PostgreSQL database The default target database connection (used by `bc2pg`) can be set via the `DATABASE_URL` environment variable (the password parameter should not be required if using a [.pgpass file](https://www.postgresql.org/docs/current/libpq-pgpass.html)) @@ -44,7 +36,7 @@ Linux/Mac: `export DATABASE_URL=postgresql://{username}:{password}@{hostname}:{p Windows: `SET DATABASE_URL=postgresql://{username}:{password}@{hostname}:{port}/{database}` -#### Layer list / layer schema cache +### Layer list / layer schema cache To reduce the volume of requests, information about data requested is cached locally. Schemas of individual layers that have previously been requested are cached with the cache file name matching the object/table name. @@ -91,7 +83,6 @@ AERODROME_STATUS AIRCRAFT_ACCESS_IND AIRPORT_NAME ``` ### CLI - Commands available via the bcdata command line interface are documented with the --help option ``` @@ -167,7 +158,6 @@ Options: CRS of provided bounds --indent INTEGER Indentation level for JSON output --compact / --not-compact Use compact separators (',', ':'). - --dst-crs, --dst_crs TEXT Destination CRS -s, --sortby TEXT Name of sort field -l, --lowercase Write column/properties names as lowercase -m, --promote-to-multi Promote features to multipart @@ -190,7 +180,6 @@ Options: --bounds TEXT Bounds: "left bottom right top" or "[left, bottom, right, top]". Coordinates are BC Albers (default) or --bounds_crs [required] - --dst-crs TEXT CRS of output file --bounds-crs TEXT CRS of provided bounds -r, --resolution INTEGER -a, --align Align provided bounds to provincial standard @@ -213,7 +202,7 @@ Usage: bcdata dump [OPTIONS] DATASET $ bcdata dump bc-airports --query "AIRPORT_NAME='Victoria Harbour (Shoal Point) Heliport'" $ bcdata dump bc-airports --bounds xmin ymin xmax ymax - It can also be combined to read bounds of a feature dataset using Fiona:  + It can also be combined to read bounds of a feature dataset using Fiona: $ bcdata dump bc-airports --bounds $(fio info aoi.shp --bounds) Options: @@ -319,29 +308,19 @@ The JSON output can be manipulated with [jq](https://stedolan.github.io/jq/). Fo etc... } -Dump data to geojson ([`EPSG:4326` only](https://tools.ietf.org/html/rfc7946#section-4)): +Dump data as supplied by server (BC Albers): $ bcdata dump bc-airports > bc-airports.geojson -Get a single feature and send it to geojsonio (requires [geojson-cli](https://github.com/mapbox/geojsonio-cli)). Note the double quotes required around a CQL FILTER provided to the `--query` option. - - $ bcdata dump \ - WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW \ - --query "AIRPORT_NAME='Terrace (Northwest Regional) Airport'" \ - | geojsonio - -Save a layer to a geopackage in BC Albers: - - $ bcdata cat bc-airports --dst-crs EPSG:3005 \ - | fio collect \ - | fio load -f GPKG --dst-crs EPSG:3005 airports.gpkg +Dump data to geojson that conforms to [RFC7946](https://tools.ietf.org/html/rfc7946#section-4)): -Note that this will not work if the source data has mixed geometry types. + $ bcdata dump bc-airports | \ + ogr2ogr -f GeoJSON -lco RFC7946=YES /vsistdout/ /vsistdin/ > bc-airports-4326.geojson -Load to parquet on s3: +Dump to Parquet on S3: - $ bcdata dump bc-airports \ - | ogr2ogr -f Parquet /vsis3/$BUCKET/airports.parquet -s_srs EPSG:4326 -t_srs EPSG:3005 /vsistdin/ + $ bcdata dump bc-airports | \ + ogr2ogr -f Parquet /vsis3/$BUCKET/airports.parquet /vsistdin/ Load data to postgres and run a spatial query: @@ -366,19 +345,8 @@ Load data to postgres and run a spatial query: ## Projections / CRS -**CLI** - -`bcdata dump` returns GeoJSON in WGS84 (`EPSG:4326`). - -`bcdata cat` provides the `--dst-crs` option, use any CRS the WFS server supports. - -`bcdata bc2pg` loads data to PostgreSQL in BC Albers (`EPSG:3005`). - - -**Python module** - -`bcdata.get_data()` defaults to `EPSG:4236` but any CRS can be specified (that the server will accept). - +All data are as supplied from the server by default: BC Albers / `EPSG:3005`. +Use some other tool to reproject the data as required. ## Development and testing diff --git a/src/bcdata/__init__.py b/src/bcdata/__init__.py index d6ec031..4b436e5 100644 --- a/src/bcdata/__init__.py +++ b/src/bcdata/__init__.py @@ -22,4 +22,4 @@ raise Exception(f"Failed to download primary key database at {PRIMARY_KEY_DB_URL}") primary_keys = {} -__version__ = "0.14.0" +__version__ = "0.15.0" diff --git a/src/bcdata/bc2pg.py b/src/bcdata/bc2pg.py index 90c8a8d..e26d1a5 100644 --- a/src/bcdata/bc2pg.py +++ b/src/bcdata/bc2pg.py @@ -95,7 +95,7 @@ def bc2pg( # noqa: C901 # if geometry type is not provided, determine type by making the first request if not geometry_type: - df = WFS.request_features(url=urls[0], as_gdf=True, crs="epsg:3005", lowercase=True) + df = WFS.request_features(url=urls[0], as_gdf=True, lowercase=True) geometry_type = df.geom_type.unique()[0] # keep only the first type if numpy.any(df.has_z.unique()[0]): # geopandas does not include Z in geom_type string geometry_type = geometry_type + "Z" @@ -107,7 +107,6 @@ def bc2pg( # noqa: C901 df_temp = WFS.request_features( url=urls[-1], as_gdf=True, - crs="epsg:3005", lowercase=True, silent=True, ) @@ -158,7 +157,7 @@ def bc2pg( # noqa: C901 for n, url in enumerate(urls): # if first url not downloaded above when checking geom type, do now if df is None: - df = WFS.request_features(url=url, as_gdf=True, crs="epsg:3005", lowercase=True) + df = WFS.request_features(url=url, as_gdf=True, lowercase=True) # tidy the resulting dataframe df = df.rename_geometry("geom") # lowercasify diff --git a/src/bcdata/cli.py b/src/bcdata/cli.py index d9ff899..f0b213b 100644 --- a/src/bcdata/cli.py +++ b/src/bcdata/cli.py @@ -73,7 +73,6 @@ def bounds_handler(ctx, param, value): help='Bounds: "left bottom right top" or "[left, bottom, right, top]". Coordinates are BC Albers (default) or --bounds_crs', ) -dst_crs_opt = click.option("--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS") lowercase_opt = click.option( "--lowercase", "-l", is_flag=True, help="Write column/properties names as lowercase" @@ -141,11 +140,6 @@ def info(dataset, indent, meta_member, verbose, quiet): @cli.command() @click.option("--out_file", "-o", help="Output file", default="dem25.tif") @bounds_opt_dem -@click.option( - "--dst-crs", - help="CRS of output file", - default="EPSG:3005", -) @click.option( "--bounds-crs", help="CRS of provided bounds", @@ -169,7 +163,6 @@ def dem( bounds, bounds_crs, align, - dst_crs, out_file, resolution, interpolation, @@ -184,7 +177,6 @@ def dem( out_file=out_file, align=align, src_crs=bounds_crs, - dst_crs=dst_crs, resolution=resolution, interpolation=interpolation, ) @@ -276,7 +268,6 @@ def dump( ) @indent_opt @compact_opt -@dst_crs_opt @click.option("--sortby", "-s", help="Name of sort field") @lowercase_opt @click.option( @@ -296,7 +287,6 @@ def cat( bounds_crs, indent, compact, - dst_crs, sortby, lowercase, promote_to_multi, @@ -327,7 +317,6 @@ def cat( url=url, as_gdf=False, lowercase=lowercase, - crs=dst_crs, promote_to_multi=promote_to_multi, ) for feat in featurecollection["features"]: diff --git a/src/bcdata/wfs.py b/src/bcdata/wfs.py index f78ecce..abde127 100644 --- a/src/bcdata/wfs.py +++ b/src/bcdata/wfs.py @@ -358,7 +358,7 @@ def define_requests( "request": "GetFeature", "typeName": table, "outputFormat": "json", - "SRSNAME": "EPSG:4326", + "SRSNAME": "EPSG:3005", # just in case (this should always be the default) } if sortby: request["sortby"] = sortby.upper() @@ -384,7 +384,6 @@ def request_features( self, url, as_gdf=False, - crs="EPSG:4326", lowercase=False, promote_to_multi=False, ): @@ -395,9 +394,7 @@ def request_features( # load to gdf for reprojection/minor data cleaning if len(featurecollection["features"]) > 0: gdf = gpd.GeoDataFrame.from_features(featurecollection) - gdf = gdf.set_crs("EPSG:4326") - if crs != "EPSG:4326": - gdf = gdf.to_crs(crs) + gdf = gdf.set_crs("EPSG:3005") if gdf.geometry.name != "geometry": gdf = gdf.rename_geometry("geometry") if lowercase: @@ -416,7 +413,6 @@ def request_features( def get_data( dataset, query=None, - crs="epsg:4326", bounds=None, bounds_crs="epsg:3005", count=None, @@ -440,7 +436,7 @@ def get_data( for url in urls: results.append( WFS.request_features( - url, crs=crs, as_gdf=True, lowercase=lowercase, promote_to_multi=promote_to_multi + url, as_gdf=True, lowercase=lowercase, promote_to_multi=promote_to_multi ) ) if len(results) > 1: diff --git a/tests/test_wfs.py b/tests/test_wfs.py index 801b5db..d3ed20d 100644 --- a/tests/test_wfs.py +++ b/tests/test_wfs.py @@ -72,7 +72,7 @@ def test_get_data_asgdf(): def test_get_data_asgdf_crs(): - gdf = bcdata.get_data(UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True, crs="EPSG:3005") + gdf = bcdata.get_data(UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True) assert gdf.crs == "EPSG:3005" @@ -86,21 +86,21 @@ def test_get_data_small(): assert data["type"] == "FeatureCollection" +def test_get_data_count(): + data = bcdata.get_data(AIRPORTS_TABLE, count=100) + assert len(data["features"]) == 100 + + def test_get_data_lowercase(): - data = bcdata.get_data(AIRPORTS_TABLE, lowercase=True) + data = bcdata.get_data(AIRPORTS_TABLE, lowercase=True, count=1) assert "airport_name" in data["features"][0]["properties"].keys() def test_get_data_crs(): - data = bcdata.get_data(AIRPORTS_TABLE, crs="EPSG:3005") + data = bcdata.get_data(AIRPORTS_TABLE, count=1) assert data["crs"]["properties"]["name"] == "urn:ogc:def:crs:EPSG::3005" -def test_get_data_count(): - data = bcdata.get_data(AIRPORTS_TABLE, count=100) - assert len(data["features"]) == 100 - - # this presumes the page size will always be less than the total number of wells def test_get_data_paged_count(): wfs = bcdata.wfs.BCWFS()