From 398522da72b62fb3850df1e6634bb5d33e3a9782 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Fri, 12 Jul 2024 17:02:56 -0700 Subject: [PATCH 01/17] update get_table_definition to work with api updates, update airports table schema in test --- bcdata/bcdc.py | 16 ++++++---------- tests/test_bcdc.py | 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/bcdata/bcdc.py b/bcdata/bcdc.py index 13f549d..0affb0f 100644 --- a/bcdata/bcdc.py +++ b/bcdata/bcdc.py @@ -9,7 +9,7 @@ log = logging.getLogger(__name__) -BCDC_API_URL = "https://catalogue.data.gov.bc.ca/api/3/action/" +BCDC_API_URL = "https://toyger.data.gov.bc.ca/api/3/action/" class ServiceException(Exception): @@ -119,15 +119,12 @@ def get_table_definition(table_name): # noqa: C901 # multiple format resource elif resource["format"] == "multiple": - # if multiple format, check for table name match in this location + # if multiple format, check for table name match in the preview info if resource["preview_info"]: # check that layer_name key is present - if "layer_name" in json.loads(resource["preview_info"]): + if "layer_name" in resource["preview_info"][0].keys(): # then check if it matches the table name - if ( - json.loads(resource["preview_info"])["layer_name"] - == table_name - ): + if resource["preview_info"][0]["layer_name"] == table_name: if "object_table_comments" in resource.keys(): table_comments = resource["object_table_comments"] else: @@ -143,13 +140,12 @@ def get_table_definition(table_name): # noqa: C901 ) log.debug(resource) - # uniquify the result if len(matches) > 0: - matched = list(set(matches))[0] + matched = matches[0] # just retain the first match return { "description": matched[0], # notes=description "comments": matched[1], - "schema": json.loads(matched[2]), + "schema": matched[2], } else: raise ValueError( diff --git a/tests/test_bcdc.py b/tests/test_bcdc.py index 38cb63f..fd26078 100644 --- a/tests/test_bcdc.py +++ b/tests/test_bcdc.py @@ -8,7 +8,7 @@ AIRPORTS_TABLE = "WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW" AIRPORTS_DESCRIPTION = "BC Airports identifies locations where aircraft may take-off and land. No guarantee is given that an identified point will be maintained to sufficient standards for landing and take-off of any/all aircraft. It includes airports, aerodromes, water aerodromes, heliports, and airstrips." AIRPORTS_COMMENTS = """GSR_AIRPORTS_SVW is a spatially enabled layer comprising AIRPORTS is a point dataset identifying locations where aircraft can take-off and land. No guarantee is given that an identified point will be maintained to sufficient standards for landing and take-off of any/all aircraft. It includes airports, aerodromes, water aerodromes, heliports, and airstrips.""" -AIRPORTS_SCHEMA = """[{"data_precision": 200, "column_comments": "CUSTODIAN_ORG_DESCRIPTION contains the name or description of the custodial organization (usually Ministry and Branch)", "short_name": "CUST_ORG", "data_type": "VARCHAR2", "column_name": "CUSTODIAN_ORG_DESCRIPTION"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_CLASS designates the category of business, i.e., airTransportation", "short_name": "BUS_CAT_CL", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_CLASS"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_DESCRIPTION describes the category of business, i.e., Air Transportation", "short_name": "BUS_CAT_DS", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_DESCRIPTION"}, {"data_precision": 500, "column_comments": "OCCUPANT_TYPE_DESCRIPTION contains the description of the occupant type, e.g. Hospital", "short_name": "OCCPNT_TYP", "data_type": "VARCHAR2", "column_name": "OCCUPANT_TYPE_DESCRIPTION"}, {"data_precision": 20, "column_comments": "SOURCE_DATA_ID is a unique occupant id either supplied by the source data system or produced by GSR, depending on the value of SUPPLIED_SOURCE_ID_IND", "short_name": "SRCDATA_ID", "data_type": "VARCHAR2", "column_name": "SOURCE_DATA_ID"}, {"data_precision": 1, "column_comments": "SUPPLIED_SOURCE_ID_IND is an indicator of whether the source data id was supplied by the supplier (Y) or DataBC (N)", "short_name": "SRC_ID_IND", "data_type": "VARCHAR2", "column_name": "SUPPLIED_SOURCE_ID_IND"}, {"data_precision": 500, "column_comments": "AIRPORT_NAME is a business name that can identify the occupant who provides the BC Government or BC Government related services to public, e.g., Burnaby General Hospital, Golden Food Bank", "short_name": "NAME", "data_type": "VARCHAR2", "column_name": "AIRPORT_NAME"}, {"data_precision": 4000, "column_comments": "DESCRIPTION describes the Occupant in more detail, e.g., aerodrome.", "short_name": "DESCRIPTN", "data_type": "VARCHAR2", "column_name": "DESCRIPTION"}, {"data_precision": 1000, "column_comments": "PHYSICAL_ADDRESS contains the civic or non-civic address as a single string, structured according to the specification of the Physical Address and Geocoding Standard, e.g., 420 GORGE RD E, VICTORIA, BC.", "short_name": "ADDRESS", "data_type": "VARCHAR2", "column_name": "PHYSICAL_ADDRESS"}, {"data_precision": 1000, "column_comments": "ALIAS_ADDRESS contains an address string, not a parsed address. It is the address that will be displayed for presentation purposes, e.g., 32900 Marshall Road, Abbotsford, BC", "short_name": "ALIAS_ADDR", "data_type": "VARCHAR2", "column_name": "ALIAS_ADDRESS"}, {"data_precision": 200, "column_comments": "STREET_ADDRESS is a free form expression of the site descriptor (e.g., unit) and the civic building number / street / street indicator portion of an address, e.g., Unit 1, 123 Main Street East.", "short_name": "ST_ADDRESS", "data_type": "VARCHAR2", "column_name": "STREET_ADDRESS"}, {"data_precision": 15, "column_comments": "POSTAL_CODE is the Canadian Postal code value associated with the physical address, e.g., V9Z 2K1", "short_name": "POSTAL_CD", "data_type": "VARCHAR2", "column_name": "POSTAL_CODE"}, {"data_precision": 100, "column_comments": "LOCALITY is the name of the municipality, community, Federal Indian Reserve (IR), subdivision, regional district, indigenous land or natural feature the occupant site is located in, e.g., Victoria, Saanich IR 1, Capital Regional District.", "short_name": "LOCALITY", "data_type": "VARCHAR2", "column_name": "LOCALITY"}, {"data_precision": 50, "column_comments": "CONTACT PHONE contains the general office phone number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_PHONE", "data_type": "VARCHAR2", "column_name": "CONTACT_PHONE"}, {"data_precision": 100, "column_comments": "CONTACT_EMAIL contains the \\"general office\\" email address of the Occupant.", "short_name": "CONT_EMAIL", "data_type": "VARCHAR2", "column_name": "CONTACT_EMAIL"}, {"data_precision": 50, "column_comments": "CONTACT FAX contains the general office fax number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_FAX", "data_type": "VARCHAR2", "column_name": "CONTACT_FAX"}, {"data_precision": 500, "column_comments": "WEBSITE_URL contains the link to the Home page of the Occupant\'s Website", "short_name": "WEBSITE", "data_type": "VARCHAR2", "column_name": "WEBSITE_URL"}, {"data_precision": 500, "column_comments": "IMAGE_URL contains a full URL link to a picture of the Occupant\'s Location.", "short_name": "IMAGE_URL", "data_type": "VARCHAR2", "column_name": "IMAGE_URL"}, {"data_precision": 9, "column_comments": "LATITUDE is the geographic coordinate, in decimal degrees (dd.dddddd), of the location of the feature as measured from the equator, e.g., 55.323653", "short_name": "LATITUDE", "data_type": "NUMBER", "column_name": "LATITUDE"}, {"data_precision": 10, "column_comments": "LONGITUDE is the geographic coordinate, in decimal degrees (-ddd.dddddd), of the location of the feature as measured from the prime meridian, e.g., -123.093544", "short_name": "LONGITUDE", "data_type": "NUMBER", "column_name": "LONGITUDE"}, {"data_precision": 1000, "column_comments": "KEYWORDS contains text strings supplied by the Custodian, to be used for search/query purposes. Keywords are separated by the ; delimiter.", "short_name": "KEYWORDS", "data_type": "VARCHAR2", "column_name": "KEYWORDS"}, {"data_precision": 7, "column_comments": "DATE_UPDATED contains the date that the Occupant data was updated in the Occupant structure (system-generated)", "short_name": "DT_UPDATE", "data_type": "DATE", "column_name": "DATE_UPDATED"}, {"data_precision": 1, "column_comments": "SITE_GEOCODED_IND contains a Flag/indicator (Y/N) that the Occupant Physical Address has been geo-coded by the DataBC Address Geocoder and the results provide a valid site address, e.g., Y, N", "short_name": "GEOCD_IND", "data_type": "VARCHAR2", "column_name": "SITE_GEOCODED_IND"}, {"data_precision": 100, "column_comments": "AERODROME STATUS identifies if the facility is certified or registered according to Transport Canada standards, or a derived status from other sources, i.e., Certified, Registered, Decommissioned, Null (unknown).", "short_name": "AER_STATUS", "data_type": "VARCHAR2", "column_name": "AERODROME_STATUS"}, {"data_precision": 1, "column_comments": "AIRCRAFT ACCESS IND indicates whether fixed wing aircraft, not including seaplanes, can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "AIRCR_ACS", "data_type": "VARCHAR2", "column_name": "AIRCRAFT_ACCESS_IND"}, {"data_precision": 50, "column_comments": "DATA_SOURCE is the project or resource from which the aerodrome data was derived, e.g., Canadian Flight Supplement.", "short_name": "DATA_SRCE", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE"}, {"data_precision": 50, "column_comments": "DATA SOURCE YEAR is the year of the project or resource containing the listed aerodrome data, e.g., 2014.", "short_name": "DATASRC_YR", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE_YEAR"}, {"data_precision": 10, "column_comments": "ELEVATION is the published elevation (in metres) of an aerodrome, or if not published, elevation taken from Google Earth (in metres), e.g., 10", "short_name": "ELEVATION", "data_type": "NUMBER", "column_name": "ELEVATION"}, {"data_precision": 1, "column_comments": "FUEL_AVAILABILITY_IND indicates whether fuel is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "FUEL_AVAIL", "data_type": "VARCHAR2", "column_name": "FUEL_AVAILABILITY_IND"}, {"data_precision": 1, "column_comments": "HELICOPTER_ACCESS_IND indicates whether helicopters can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "HELI_ACS", "data_type": "VARCHAR2", "column_name": "HELICOPTER_ACCESS_IND"}, {"data_precision": 4, "column_comments": "IATA_CODE is the International Air Transport Associations\'s unique identifier code, e.g., YYJ.", "short_name": "IATA", "data_type": "VARCHAR2", "column_name": "IATA_CODE"}, {"data_precision": 4, "column_comments": "ICAO_CODE is the International Civil Aviation Organizations\'s unique identifier code, e.g., CYYJ.", "short_name": "ICAO", "data_type": "VARCHAR2", "column_name": "ICAO_CODE"}, {"data_precision": 10, "column_comments": "MAX_RUNWAY_LENGTH is the length of the longest runway at an aerodrome in metres, e.g., 700", "short_name": "MX_RWAY_LN", "data_type": "NUMBER", "column_name": "MAX_RUNWAY_LENGTH"}, {"data_precision": 10, "column_comments": "NUMBER_OF_RUNWAYS is the total number of runways at an aerodrome, e.g., 5", "short_name": "NUM_RWAY", "data_type": "NUMBER", "column_name": "NUMBER_OF_RUNWAYS"}, {"data_precision": 1, "column_comments": "OIL_AVAILABILITY_IND indicates whether fuel oil is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "OIL_AVAIL", "data_type": "VARCHAR2", "column_name": "OIL_AVAILABILITY_IND"}, {"data_precision": 50, "column_comments": "RUNWAY_SURFACE identifies the material used in a runway or helipad\'s construction, e.g., gravel, asphalt, Null (unknown).", "short_name": "RWAY_SURF", "data_type": "VARCHAR2", "column_name": "RUNWAY_SURFACE"}, {"data_precision": 1, "column_comments": "SEAPLANE_ACCESS_IND indicates whether seaplanes can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "SEAPLN_ACC", "data_type": "VARCHAR2", "column_name": "SEAPLANE_ACCESS_IND"}, {"data_precision": 4, "column_comments": "TC_LID_CODE is the Transport Canada Location Identifier unique code, e.g., CAP5.", "short_name": "TC_LID", "data_type": "VARCHAR2", "column_name": "TC_LID_CODE"}, {"data_precision": 64, "column_comments": "SHAPE is the column used to reference the spatial coordinates defining the feature.", "short_name": "SHAPE", "data_type": "SDO_GEOMETRY", "column_name": "SHAPE"}, {"data_precision": 10, "column_comments": "SEQUENCE_ID contains a value to distinguish occupant instances. Where a single occupant can have multiple instances (representing different services, for example), this field distinguishes this occupant instance from other instances of the same or different occupants.", "short_name": "SEQ_ID", "data_type": "NUMBER", "column_name": "SEQUENCE_ID"}, {"data_precision": 4000, "column_comments": "SE_ANNO_CAD_DATA is a binary column used by spatial tools to store annotation, curve features and CAD data when using the SDO_GEOMETRY storage data type.", "short_name": null, "data_type": "BLOB", "column_name": "SE_ANNO_CAD_DATA"}]""" +AIRPORTS_SCHEMA = """[{"data_precision": "200", "column_comments": "CUSTODIAN_ORG_DESCRIPTION contains the name or description of the custodial organization (usually Ministry and Branch)", "short_name": "CUST_ORG", "data_type": "VARCHAR2", "column_name": "CUSTODIAN_ORG_DESCRIPTION"}, {"data_precision": "1000", "column_comments": "BUSINESS_CATEGORY_CLASS designates the category of business, i.e., airTransportation", "short_name": "BUS_CAT_CL", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_CLASS"}, {"data_precision": "1000", "column_comments": "BUSINESS_CATEGORY_DESCRIPTION describes the category of business, i.e., Air Transportation", "short_name": "BUS_CAT_DS", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_DESCRIPTION"}, {"data_precision": "500", "column_comments": "OCCUPANT_TYPE_DESCRIPTION contains the description of the occupant type, e.g. Hospital", "short_name": "OCCPNT_TYP", "data_type": "VARCHAR2", "column_name": "OCCUPANT_TYPE_DESCRIPTION"}, {"data_precision": "20", "column_comments": "SOURCE_DATA_ID is a unique occupant id either supplied by the source data system or produced by GSR, depending on the value of SUPPLIED_SOURCE_ID_IND", "short_name": "SRCDATA_ID", "data_type": "VARCHAR2", "column_name": "SOURCE_DATA_ID"}, {"data_precision": "1", "column_comments": "SUPPLIED_SOURCE_ID_IND is an indicator of whether the source data id was supplied by the supplier (Y) or DataBC (N)", "short_name": "SRC_ID_IND", "data_type": "VARCHAR2", "column_name": "SUPPLIED_SOURCE_ID_IND"}, {"data_precision": "500", "column_comments": "AIRPORT_NAME is a business name that can identify the occupant who provides the BC Government or BC Government related services to public, e.g., Burnaby General Hospital, Golden Food Bank", "short_name": "NAME", "data_type": "VARCHAR2", "column_name": "AIRPORT_NAME"}, {"data_precision": "4000", "column_comments": "DESCRIPTION describes the Occupant in more detail, e.g., aerodrome.", "short_name": "DESCRIPTN", "data_type": "VARCHAR2", "column_name": "DESCRIPTION"}, {"data_precision": "1000", "column_comments": "PHYSICAL_ADDRESS contains the civic or non-civic address as a single string, structured according to the specification of the Physical Address and Geocoding Standard, e.g., 420 GORGE RD E, VICTORIA, BC.", "short_name": "ADDRESS", "data_type": "VARCHAR2", "column_name": "PHYSICAL_ADDRESS"}, {"data_precision": "1000", "column_comments": "ALIAS_ADDRESS contains an address string, not a parsed address. It is the address that will be displayed for presentation purposes, e.g., 32900 Marshall Road, Abbotsford, BC", "short_name": "ALIAS_ADDR", "data_type": "VARCHAR2", "column_name": "ALIAS_ADDRESS"}, {"data_precision": "200", "column_comments": "STREET_ADDRESS is a free form expression of the site descriptor (e.g., unit) and the civic building number / street / street indicator portion of an address, e.g., Unit 1, 123 Main Street East.", "short_name": "ST_ADDRESS", "data_type": "VARCHAR2", "column_name": "STREET_ADDRESS"}, {"data_precision": "10", "column_comments": "POSTAL_CODE is the Canadian Postal code value associated with the physical address, e.g., V9Z 2K1", "short_name": "POSTAL_CD", "data_type": "VARCHAR2", "column_name": "POSTAL_CODE"}, {"data_precision": "100", "column_comments": "LOCALITY is the name of the municipality, community, Federal Indian Reserve (IR), subdivision, regional district, indigenous land or natural feature the occupant site is located in, e.g., Victoria, Saanich IR 1, Capital Regional District.", "short_name": "LOCALITY", "data_type": "VARCHAR2", "column_name": "LOCALITY"}, {"data_precision": "50", "column_comments": "CONTACT PHONE contains the general office phone number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_PHONE", "data_type": "VARCHAR2", "column_name": "CONTACT_PHONE"}, {"data_precision": "100", "column_comments": "CONTACT_EMAIL contains the \\"general office\\" email address of the Occupant.", "short_name": "CONT_EMAIL", "data_type": "VARCHAR2", "column_name": "CONTACT_EMAIL"}, {"data_precision": "50", "column_comments": "CONTACT FAX contains the general office fax number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_FAX", "data_type": "VARCHAR2", "column_name": "CONTACT_FAX"}, {"data_precision": "500", "column_comments": "WEBSITE_URL contains the link to the Home page of the Occupant\'s Website", "short_name": "WEBSITE", "data_type": "VARCHAR2", "column_name": "WEBSITE_URL"}, {"data_precision": "500", "column_comments": "IMAGE_URL contains a full URL link to a picture of the Occupant\'s Location.", "short_name": "IMAGE_URL", "data_type": "VARCHAR2", "column_name": "IMAGE_URL"}, {"data_precision": "9", "column_comments": "LATITUDE is the geographic coordinate, in decimal degrees (dd.dddddd), of the location of the feature as measured from the equator, e.g., 55.323653", "short_name": "LATITUDE", "data_type": "NUMBER", "column_name": "LATITUDE"}, {"data_precision": "10", "column_comments": "LONGITUDE is the geographic coordinate, in decimal degrees (-ddd.dddddd), of the location of the feature as measured from the prime meridian, e.g., -123.093544", "short_name": "LONGITUDE", "data_type": "NUMBER", "column_name": "LONGITUDE"}, {"data_precision": "1000", "column_comments": "KEYWORDS contains text strings supplied by the Custodian, to be used for search/query purposes. Keywords are separated by the ; delimiter.", "short_name": "KEYWORDS", "data_type": "VARCHAR2", "column_name": "KEYWORDS"}, {"data_precision": "7", "column_comments": "DATE_UPDATED contains the date that the Occupant data was updated in the Occupant structure (system-generated)", "short_name": "DT_UPDATE", "data_type": "DATE", "column_name": "DATE_UPDATED"}, {"data_precision": "1", "column_comments": "SITE_GEOCODED_IND contains a Flag/indicator (Y/N) that the Occupant Physical Address has been geo-coded by the DataBC Address Geocoder and the results provide a valid site address, e.g., Y, N", "short_name": "GEOCD_IND", "data_type": "VARCHAR2", "column_name": "SITE_GEOCODED_IND"}, {"data_precision": "100", "column_comments": "AERODROME STATUS identifies if the facility is certified or registered according to Transport Canada standards, or a derived status from other sources, i.e., Certified, Registered, Decommissioned, Null (unknown).", "short_name": "AER_STATUS", "data_type": "VARCHAR2", "column_name": "AERODROME_STATUS"}, {"data_precision": "1", "column_comments": "AIRCRAFT ACCESS IND indicates whether fixed wing aircraft, not including seaplanes, can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "AIRCR_ACS", "data_type": "VARCHAR2", "column_name": "AIRCRAFT_ACCESS_IND"}, {"data_precision": "50", "column_comments": "DATA_SOURCE is the project or resource from which the aerodrome data was derived, e.g., Canadian Flight Supplement.", "short_name": "DATA_SRCE", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE"}, {"data_precision": "50", "column_comments": "DATA SOURCE YEAR is the year of the project or resource containing the listed aerodrome data, e.g., 2014.", "short_name": "DATASRC_YR", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE_YEAR"}, {"data_precision": "10", "column_comments": "ELEVATION is the published elevation (in metres) of an aerodrome, or if not published, elevation taken from Google Earth (in metres), e.g., 10", "short_name": "ELEVATION", "data_type": "NUMBER", "column_name": "ELEVATION"}, {"data_precision": "1", "column_comments": "FUEL_AVAILABILITY_IND indicates whether fuel is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "FUEL_AVAIL", "data_type": "VARCHAR2", "column_name": "FUEL_AVAILABILITY_IND"}, {"data_precision": "1", "column_comments": "HELICOPTER_ACCESS_IND indicates whether helicopters can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "HELI_ACS", "data_type": "VARCHAR2", "column_name": "HELICOPTER_ACCESS_IND"}, {"data_precision": "4", "column_comments": "IATA_CODE is the International Air Transport Associations\'s unique identifier code, e.g., YYJ.", "short_name": "IATA", "data_type": "VARCHAR2", "column_name": "IATA_CODE"}, {"data_precision": "4", "column_comments": "ICAO_CODE is the International Civil Aviation Organizations\'s unique identifier code, e.g., CYYJ.", "short_name": "ICAO", "data_type": "VARCHAR2", "column_name": "ICAO_CODE"}, {"data_precision": "10", "column_comments": "MAX_RUNWAY_LENGTH is the length of the longest runway at an aerodrome in metres, e.g., 700", "short_name": "MX_RWAY_LN", "data_type": "NUMBER", "column_name": "MAX_RUNWAY_LENGTH"}, {"data_precision": "10", "column_comments": "NUMBER_OF_RUNWAYS is the total number of runways at an aerodrome, e.g., 5", "short_name": "NUM_RWAY", "data_type": "NUMBER", "column_name": "NUMBER_OF_RUNWAYS"}, {"data_precision": "1", "column_comments": "OIL_AVAILABILITY_IND indicates whether fuel oil is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "OIL_AVAIL", "data_type": "VARCHAR2", "column_name": "OIL_AVAILABILITY_IND"}, {"data_precision": "50", "column_comments": "RUNWAY_SURFACE identifies the material used in a runway or helipad\'s construction, e.g., gravel, asphalt, Null (unknown).", "short_name": "RWAY_SURF", "data_type": "VARCHAR2", "column_name": "RUNWAY_SURFACE"}, {"data_precision": "1", "column_comments": "SEAPLANE_ACCESS_IND indicates whether seaplanes can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "SEAPLN_ACC", "data_type": "VARCHAR2", "column_name": "SEAPLANE_ACCESS_IND"}, {"data_precision": "4", "column_comments": "TC_LID_CODE is the Transport Canada Location Identifier unique code, e.g., CAP5.", "short_name": "TC_LID", "data_type": "VARCHAR2", "column_name": "TC_LID_CODE"}, {"data_precision": "64", "column_comments": "SHAPE is the column used to reference the spatial coordinates defining the feature.", "short_name": "SHAPE", "data_type": "SDO_GEOMETRY", "column_name": "SHAPE"}, {"data_precision": "10", "column_comments": "SEQUENCE_ID contains a value to distinguish occupant instances. Where a single occupant can have multiple instances (representing different services, for example), this field distinguishes this occupant instance from other instances of the same or different occupants.", "short_name": "SEQ_ID", "data_type": "NUMBER", "column_name": "SEQUENCE_ID"}, {"data_precision": "4000", "column_comments": "SE_ANNO_CAD_DATA is a binary column used by spatial tools to store annotation, curve features and CAD data when using the SDO_GEOMETRY storage data type.", "data_type": "BLOB", "column_name": "SE_ANNO_CAD_DATA"}]""" def test_get_table_name(): From 6d4ab527095b8e07f223bb98a9d00906e6cbd04d Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Wed, 14 Aug 2024 14:52:50 -0700 Subject: [PATCH 02/17] back to integers for data_precision --- tests/test_bcdc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_bcdc.py b/tests/test_bcdc.py index fd26078..052a84c 100644 --- a/tests/test_bcdc.py +++ b/tests/test_bcdc.py @@ -8,7 +8,7 @@ AIRPORTS_TABLE = "WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW" AIRPORTS_DESCRIPTION = "BC Airports identifies locations where aircraft may take-off and land. No guarantee is given that an identified point will be maintained to sufficient standards for landing and take-off of any/all aircraft. It includes airports, aerodromes, water aerodromes, heliports, and airstrips." AIRPORTS_COMMENTS = """GSR_AIRPORTS_SVW is a spatially enabled layer comprising AIRPORTS is a point dataset identifying locations where aircraft can take-off and land. No guarantee is given that an identified point will be maintained to sufficient standards for landing and take-off of any/all aircraft. It includes airports, aerodromes, water aerodromes, heliports, and airstrips.""" -AIRPORTS_SCHEMA = """[{"data_precision": "200", "column_comments": "CUSTODIAN_ORG_DESCRIPTION contains the name or description of the custodial organization (usually Ministry and Branch)", "short_name": "CUST_ORG", "data_type": "VARCHAR2", "column_name": "CUSTODIAN_ORG_DESCRIPTION"}, {"data_precision": "1000", "column_comments": "BUSINESS_CATEGORY_CLASS designates the category of business, i.e., airTransportation", "short_name": "BUS_CAT_CL", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_CLASS"}, {"data_precision": "1000", "column_comments": "BUSINESS_CATEGORY_DESCRIPTION describes the category of business, i.e., Air Transportation", "short_name": "BUS_CAT_DS", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_DESCRIPTION"}, {"data_precision": "500", "column_comments": "OCCUPANT_TYPE_DESCRIPTION contains the description of the occupant type, e.g. Hospital", "short_name": "OCCPNT_TYP", "data_type": "VARCHAR2", "column_name": "OCCUPANT_TYPE_DESCRIPTION"}, {"data_precision": "20", "column_comments": "SOURCE_DATA_ID is a unique occupant id either supplied by the source data system or produced by GSR, depending on the value of SUPPLIED_SOURCE_ID_IND", "short_name": "SRCDATA_ID", "data_type": "VARCHAR2", "column_name": "SOURCE_DATA_ID"}, {"data_precision": "1", "column_comments": "SUPPLIED_SOURCE_ID_IND is an indicator of whether the source data id was supplied by the supplier (Y) or DataBC (N)", "short_name": "SRC_ID_IND", "data_type": "VARCHAR2", "column_name": "SUPPLIED_SOURCE_ID_IND"}, {"data_precision": "500", "column_comments": "AIRPORT_NAME is a business name that can identify the occupant who provides the BC Government or BC Government related services to public, e.g., Burnaby General Hospital, Golden Food Bank", "short_name": "NAME", "data_type": "VARCHAR2", "column_name": "AIRPORT_NAME"}, {"data_precision": "4000", "column_comments": "DESCRIPTION describes the Occupant in more detail, e.g., aerodrome.", "short_name": "DESCRIPTN", "data_type": "VARCHAR2", "column_name": "DESCRIPTION"}, {"data_precision": "1000", "column_comments": "PHYSICAL_ADDRESS contains the civic or non-civic address as a single string, structured according to the specification of the Physical Address and Geocoding Standard, e.g., 420 GORGE RD E, VICTORIA, BC.", "short_name": "ADDRESS", "data_type": "VARCHAR2", "column_name": "PHYSICAL_ADDRESS"}, {"data_precision": "1000", "column_comments": "ALIAS_ADDRESS contains an address string, not a parsed address. It is the address that will be displayed for presentation purposes, e.g., 32900 Marshall Road, Abbotsford, BC", "short_name": "ALIAS_ADDR", "data_type": "VARCHAR2", "column_name": "ALIAS_ADDRESS"}, {"data_precision": "200", "column_comments": "STREET_ADDRESS is a free form expression of the site descriptor (e.g., unit) and the civic building number / street / street indicator portion of an address, e.g., Unit 1, 123 Main Street East.", "short_name": "ST_ADDRESS", "data_type": "VARCHAR2", "column_name": "STREET_ADDRESS"}, {"data_precision": "10", "column_comments": "POSTAL_CODE is the Canadian Postal code value associated with the physical address, e.g., V9Z 2K1", "short_name": "POSTAL_CD", "data_type": "VARCHAR2", "column_name": "POSTAL_CODE"}, {"data_precision": "100", "column_comments": "LOCALITY is the name of the municipality, community, Federal Indian Reserve (IR), subdivision, regional district, indigenous land or natural feature the occupant site is located in, e.g., Victoria, Saanich IR 1, Capital Regional District.", "short_name": "LOCALITY", "data_type": "VARCHAR2", "column_name": "LOCALITY"}, {"data_precision": "50", "column_comments": "CONTACT PHONE contains the general office phone number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_PHONE", "data_type": "VARCHAR2", "column_name": "CONTACT_PHONE"}, {"data_precision": "100", "column_comments": "CONTACT_EMAIL contains the \\"general office\\" email address of the Occupant.", "short_name": "CONT_EMAIL", "data_type": "VARCHAR2", "column_name": "CONTACT_EMAIL"}, {"data_precision": "50", "column_comments": "CONTACT FAX contains the general office fax number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_FAX", "data_type": "VARCHAR2", "column_name": "CONTACT_FAX"}, {"data_precision": "500", "column_comments": "WEBSITE_URL contains the link to the Home page of the Occupant\'s Website", "short_name": "WEBSITE", "data_type": "VARCHAR2", "column_name": "WEBSITE_URL"}, {"data_precision": "500", "column_comments": "IMAGE_URL contains a full URL link to a picture of the Occupant\'s Location.", "short_name": "IMAGE_URL", "data_type": "VARCHAR2", "column_name": "IMAGE_URL"}, {"data_precision": "9", "column_comments": "LATITUDE is the geographic coordinate, in decimal degrees (dd.dddddd), of the location of the feature as measured from the equator, e.g., 55.323653", "short_name": "LATITUDE", "data_type": "NUMBER", "column_name": "LATITUDE"}, {"data_precision": "10", "column_comments": "LONGITUDE is the geographic coordinate, in decimal degrees (-ddd.dddddd), of the location of the feature as measured from the prime meridian, e.g., -123.093544", "short_name": "LONGITUDE", "data_type": "NUMBER", "column_name": "LONGITUDE"}, {"data_precision": "1000", "column_comments": "KEYWORDS contains text strings supplied by the Custodian, to be used for search/query purposes. Keywords are separated by the ; delimiter.", "short_name": "KEYWORDS", "data_type": "VARCHAR2", "column_name": "KEYWORDS"}, {"data_precision": "7", "column_comments": "DATE_UPDATED contains the date that the Occupant data was updated in the Occupant structure (system-generated)", "short_name": "DT_UPDATE", "data_type": "DATE", "column_name": "DATE_UPDATED"}, {"data_precision": "1", "column_comments": "SITE_GEOCODED_IND contains a Flag/indicator (Y/N) that the Occupant Physical Address has been geo-coded by the DataBC Address Geocoder and the results provide a valid site address, e.g., Y, N", "short_name": "GEOCD_IND", "data_type": "VARCHAR2", "column_name": "SITE_GEOCODED_IND"}, {"data_precision": "100", "column_comments": "AERODROME STATUS identifies if the facility is certified or registered according to Transport Canada standards, or a derived status from other sources, i.e., Certified, Registered, Decommissioned, Null (unknown).", "short_name": "AER_STATUS", "data_type": "VARCHAR2", "column_name": "AERODROME_STATUS"}, {"data_precision": "1", "column_comments": "AIRCRAFT ACCESS IND indicates whether fixed wing aircraft, not including seaplanes, can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "AIRCR_ACS", "data_type": "VARCHAR2", "column_name": "AIRCRAFT_ACCESS_IND"}, {"data_precision": "50", "column_comments": "DATA_SOURCE is the project or resource from which the aerodrome data was derived, e.g., Canadian Flight Supplement.", "short_name": "DATA_SRCE", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE"}, {"data_precision": "50", "column_comments": "DATA SOURCE YEAR is the year of the project or resource containing the listed aerodrome data, e.g., 2014.", "short_name": "DATASRC_YR", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE_YEAR"}, {"data_precision": "10", "column_comments": "ELEVATION is the published elevation (in metres) of an aerodrome, or if not published, elevation taken from Google Earth (in metres), e.g., 10", "short_name": "ELEVATION", "data_type": "NUMBER", "column_name": "ELEVATION"}, {"data_precision": "1", "column_comments": "FUEL_AVAILABILITY_IND indicates whether fuel is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "FUEL_AVAIL", "data_type": "VARCHAR2", "column_name": "FUEL_AVAILABILITY_IND"}, {"data_precision": "1", "column_comments": "HELICOPTER_ACCESS_IND indicates whether helicopters can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "HELI_ACS", "data_type": "VARCHAR2", "column_name": "HELICOPTER_ACCESS_IND"}, {"data_precision": "4", "column_comments": "IATA_CODE is the International Air Transport Associations\'s unique identifier code, e.g., YYJ.", "short_name": "IATA", "data_type": "VARCHAR2", "column_name": "IATA_CODE"}, {"data_precision": "4", "column_comments": "ICAO_CODE is the International Civil Aviation Organizations\'s unique identifier code, e.g., CYYJ.", "short_name": "ICAO", "data_type": "VARCHAR2", "column_name": "ICAO_CODE"}, {"data_precision": "10", "column_comments": "MAX_RUNWAY_LENGTH is the length of the longest runway at an aerodrome in metres, e.g., 700", "short_name": "MX_RWAY_LN", "data_type": "NUMBER", "column_name": "MAX_RUNWAY_LENGTH"}, {"data_precision": "10", "column_comments": "NUMBER_OF_RUNWAYS is the total number of runways at an aerodrome, e.g., 5", "short_name": "NUM_RWAY", "data_type": "NUMBER", "column_name": "NUMBER_OF_RUNWAYS"}, {"data_precision": "1", "column_comments": "OIL_AVAILABILITY_IND indicates whether fuel oil is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "OIL_AVAIL", "data_type": "VARCHAR2", "column_name": "OIL_AVAILABILITY_IND"}, {"data_precision": "50", "column_comments": "RUNWAY_SURFACE identifies the material used in a runway or helipad\'s construction, e.g., gravel, asphalt, Null (unknown).", "short_name": "RWAY_SURF", "data_type": "VARCHAR2", "column_name": "RUNWAY_SURFACE"}, {"data_precision": "1", "column_comments": "SEAPLANE_ACCESS_IND indicates whether seaplanes can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "SEAPLN_ACC", "data_type": "VARCHAR2", "column_name": "SEAPLANE_ACCESS_IND"}, {"data_precision": "4", "column_comments": "TC_LID_CODE is the Transport Canada Location Identifier unique code, e.g., CAP5.", "short_name": "TC_LID", "data_type": "VARCHAR2", "column_name": "TC_LID_CODE"}, {"data_precision": "64", "column_comments": "SHAPE is the column used to reference the spatial coordinates defining the feature.", "short_name": "SHAPE", "data_type": "SDO_GEOMETRY", "column_name": "SHAPE"}, {"data_precision": "10", "column_comments": "SEQUENCE_ID contains a value to distinguish occupant instances. Where a single occupant can have multiple instances (representing different services, for example), this field distinguishes this occupant instance from other instances of the same or different occupants.", "short_name": "SEQ_ID", "data_type": "NUMBER", "column_name": "SEQUENCE_ID"}, {"data_precision": "4000", "column_comments": "SE_ANNO_CAD_DATA is a binary column used by spatial tools to store annotation, curve features and CAD data when using the SDO_GEOMETRY storage data type.", "data_type": "BLOB", "column_name": "SE_ANNO_CAD_DATA"}]""" +AIRPORTS_SCHEMA = """[{"data_precision": 200, "column_comments": "CUSTODIAN_ORG_DESCRIPTION contains the name or description of the custodial organization (usually Ministry and Branch)", "short_name": "CUST_ORG", "data_type": "VARCHAR2", "column_name": "CUSTODIAN_ORG_DESCRIPTION"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_CLASS designates the category of business, i.e., airTransportation", "short_name": "BUS_CAT_CL", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_CLASS"}, {"data_precision": 1000, "column_comments": "BUSINESS_CATEGORY_DESCRIPTION describes the category of business, i.e., Air Transportation", "short_name": "BUS_CAT_DS", "data_type": "VARCHAR2", "column_name": "BUSINESS_CATEGORY_DESCRIPTION"}, {"data_precision": 500, "column_comments": "OCCUPANT_TYPE_DESCRIPTION contains the description of the occupant type, e.g. Hospital", "short_name": "OCCPNT_TYP", "data_type": "VARCHAR2", "column_name": "OCCUPANT_TYPE_DESCRIPTION"}, {"data_precision": 20, "column_comments": "SOURCE_DATA_ID is a unique occupant id either supplied by the source data system or produced by GSR, depending on the value of SUPPLIED_SOURCE_ID_IND", "short_name": "SRCDATA_ID", "data_type": "VARCHAR2", "column_name": "SOURCE_DATA_ID"}, {"data_precision": 1, "column_comments": "SUPPLIED_SOURCE_ID_IND is an indicator of whether the source data id was supplied by the supplier (Y) or DataBC (N)", "short_name": "SRC_ID_IND", "data_type": "VARCHAR2", "column_name": "SUPPLIED_SOURCE_ID_IND"}, {"data_precision": 500, "column_comments": "AIRPORT_NAME is a business name that can identify the occupant who provides the BC Government or BC Government related services to public, e.g., Burnaby General Hospital, Golden Food Bank", "short_name": "NAME", "data_type": "VARCHAR2", "column_name": "AIRPORT_NAME"}, {"data_precision": 4000, "column_comments": "DESCRIPTION describes the Occupant in more detail, e.g., aerodrome.", "short_name": "DESCRIPTN", "data_type": "VARCHAR2", "column_name": "DESCRIPTION"}, {"data_precision": 1000, "column_comments": "PHYSICAL_ADDRESS contains the civic or non-civic address as a single string, structured according to the specification of the Physical Address and Geocoding Standard, e.g., 420 GORGE RD E, VICTORIA, BC.", "short_name": "ADDRESS", "data_type": "VARCHAR2", "column_name": "PHYSICAL_ADDRESS"}, {"data_precision": 1000, "column_comments": "ALIAS_ADDRESS contains an address string, not a parsed address. It is the address that will be displayed for presentation purposes, e.g., 32900 Marshall Road, Abbotsford, BC", "short_name": "ALIAS_ADDR", "data_type": "VARCHAR2", "column_name": "ALIAS_ADDRESS"}, {"data_precision": 200, "column_comments": "STREET_ADDRESS is a free form expression of the site descriptor (e.g., unit) and the civic building number / street / street indicator portion of an address, e.g., Unit 1, 123 Main Street East.", "short_name": "ST_ADDRESS", "data_type": "VARCHAR2", "column_name": "STREET_ADDRESS"}, {"data_precision": 15, "column_comments": "POSTAL_CODE is the Canadian Postal code value associated with the physical address, e.g., V9Z 2K1", "short_name": "POSTAL_CD", "data_type": "VARCHAR2", "column_name": "POSTAL_CODE"}, {"data_precision": 100, "column_comments": "LOCALITY is the name of the municipality, community, Federal Indian Reserve (IR), subdivision, regional district, indigenous land or natural feature the occupant site is located in, e.g., Victoria, Saanich IR 1, Capital Regional District.", "short_name": "LOCALITY", "data_type": "VARCHAR2", "column_name": "LOCALITY"}, {"data_precision": 50, "column_comments": "CONTACT PHONE contains the general office phone number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_PHONE", "data_type": "VARCHAR2", "column_name": "CONTACT_PHONE"}, {"data_precision": 100, "column_comments": "CONTACT_EMAIL contains the \\"general office\\" email address of the Occupant.", "short_name": "CONT_EMAIL", "data_type": "VARCHAR2", "column_name": "CONTACT_EMAIL"}, {"data_precision": 50, "column_comments": "CONTACT FAX contains the general office fax number of the Occupant, e.g., (250) 555-1234 or 250-555-1234", "short_name": "CONT_FAX", "data_type": "VARCHAR2", "column_name": "CONTACT_FAX"}, {"data_precision": 500, "column_comments": "WEBSITE_URL contains the link to the Home page of the Occupant\'s Website", "short_name": "WEBSITE", "data_type": "VARCHAR2", "column_name": "WEBSITE_URL"}, {"data_precision": 500, "column_comments": "IMAGE_URL contains a full URL link to a picture of the Occupant\'s Location.", "short_name": "IMAGE_URL", "data_type": "VARCHAR2", "column_name": "IMAGE_URL"}, {"data_precision": 9, "column_comments": "LATITUDE is the geographic coordinate, in decimal degrees (dd.dddddd), of the location of the feature as measured from the equator, e.g., 55.323653", "short_name": "LATITUDE", "data_type": "NUMBER", "column_name": "LATITUDE"}, {"data_precision": 10, "column_comments": "LONGITUDE is the geographic coordinate, in decimal degrees (-ddd.dddddd), of the location of the feature as measured from the prime meridian, e.g., -123.093544", "short_name": "LONGITUDE", "data_type": "NUMBER", "column_name": "LONGITUDE"}, {"data_precision": 1000, "column_comments": "KEYWORDS contains text strings supplied by the Custodian, to be used for search/query purposes. Keywords are separated by the ; delimiter.", "short_name": "KEYWORDS", "data_type": "VARCHAR2", "column_name": "KEYWORDS"}, {"data_precision": 7, "column_comments": "DATE_UPDATED contains the date that the Occupant data was updated in the Occupant structure (system-generated)", "short_name": "DT_UPDATE", "data_type": "DATE", "column_name": "DATE_UPDATED"}, {"data_precision": 1, "column_comments": "SITE_GEOCODED_IND contains a Flag/indicator (Y/N) that the Occupant Physical Address has been geo-coded by the DataBC Address Geocoder and the results provide a valid site address, e.g., Y, N", "short_name": "GEOCD_IND", "data_type": "VARCHAR2", "column_name": "SITE_GEOCODED_IND"}, {"data_precision": 100, "column_comments": "AERODROME STATUS identifies if the facility is certified or registered according to Transport Canada standards, or a derived status from other sources, i.e., Certified, Registered, Decommissioned, Null (unknown).", "short_name": "AER_STATUS", "data_type": "VARCHAR2", "column_name": "AERODROME_STATUS"}, {"data_precision": 1, "column_comments": "AIRCRAFT ACCESS IND indicates whether fixed wing aircraft, not including seaplanes, can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "AIRCR_ACS", "data_type": "VARCHAR2", "column_name": "AIRCRAFT_ACCESS_IND"}, {"data_precision": 50, "column_comments": "DATA_SOURCE is the project or resource from which the aerodrome data was derived, e.g., Canadian Flight Supplement.", "short_name": "DATA_SRCE", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE"}, {"data_precision": 50, "column_comments": "DATA SOURCE YEAR is the year of the project or resource containing the listed aerodrome data, e.g., 2014.", "short_name": "DATASRC_YR", "data_type": "VARCHAR2", "column_name": "DATA_SOURCE_YEAR"}, {"data_precision": 10, "column_comments": "ELEVATION is the published elevation (in metres) of an aerodrome, or if not published, elevation taken from Google Earth (in metres), e.g., 10", "short_name": "ELEVATION", "data_type": "NUMBER", "column_name": "ELEVATION"}, {"data_precision": 1, "column_comments": "FUEL_AVAILABILITY_IND indicates whether fuel is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "FUEL_AVAIL", "data_type": "VARCHAR2", "column_name": "FUEL_AVAILABILITY_IND"}, {"data_precision": 1, "column_comments": "HELICOPTER_ACCESS_IND indicates whether helicopters can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "HELI_ACS", "data_type": "VARCHAR2", "column_name": "HELICOPTER_ACCESS_IND"}, {"data_precision": 4, "column_comments": "IATA_CODE is the International Air Transport Associations\'s unique identifier code, e.g., YYJ.", "short_name": "IATA", "data_type": "VARCHAR2", "column_name": "IATA_CODE"}, {"data_precision": 4, "column_comments": "ICAO_CODE is the International Civil Aviation Organizations\'s unique identifier code, e.g., CYYJ.", "short_name": "ICAO", "data_type": "VARCHAR2", "column_name": "ICAO_CODE"}, {"data_precision": 10, "column_comments": "MAX_RUNWAY_LENGTH is the length of the longest runway at an aerodrome in metres, e.g., 700", "short_name": "MX_RWAY_LN", "data_type": "NUMBER", "column_name": "MAX_RUNWAY_LENGTH"}, {"data_precision": 10, "column_comments": "NUMBER_OF_RUNWAYS is the total number of runways at an aerodrome, e.g., 5", "short_name": "NUM_RWAY", "data_type": "NUMBER", "column_name": "NUMBER_OF_RUNWAYS"}, {"data_precision": 1, "column_comments": "OIL_AVAILABILITY_IND indicates whether fuel oil is available at this aerodrome, i.e., Y, N, Null (unknown)", "short_name": "OIL_AVAIL", "data_type": "VARCHAR2", "column_name": "OIL_AVAILABILITY_IND"}, {"data_precision": 50, "column_comments": "RUNWAY_SURFACE identifies the material used in a runway or helipad\'s construction, e.g., gravel, asphalt, Null (unknown).", "short_name": "RWAY_SURF", "data_type": "VARCHAR2", "column_name": "RUNWAY_SURFACE"}, {"data_precision": 1, "column_comments": "SEAPLANE_ACCESS_IND indicates whether seaplanes can land at this aerodrome, i.e., Y, N, Null (unknown).", "short_name": "SEAPLN_ACC", "data_type": "VARCHAR2", "column_name": "SEAPLANE_ACCESS_IND"}, {"data_precision": 4, "column_comments": "TC_LID_CODE is the Transport Canada Location Identifier unique code, e.g., CAP5.", "short_name": "TC_LID", "data_type": "VARCHAR2", "column_name": "TC_LID_CODE"}, {"data_precision": 64, "column_comments": "SHAPE is the column used to reference the spatial coordinates defining the feature.", "short_name": "SHAPE", "data_type": "SDO_GEOMETRY", "column_name": "SHAPE"}, {"data_precision": 10, "column_comments": "SEQUENCE_ID contains a value to distinguish occupant instances. Where a single occupant can have multiple instances (representing different services, for example), this field distinguishes this occupant instance from other instances of the same or different occupants.", "short_name": "SEQ_ID", "data_type": "NUMBER", "column_name": "SEQUENCE_ID"}, {"data_precision": 4000, "column_comments": "SE_ANNO_CAD_DATA is a binary column used by spatial tools to store annotation, curve features and CAD data when using the SDO_GEOMETRY storage data type.", "data_type": "BLOB", "column_name": "SE_ANNO_CAD_DATA"}]""" def test_get_table_name(): From aad944118917722627720c884577db0aba9b6e04 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Wed, 14 Aug 2024 16:47:19 -0700 Subject: [PATCH 03/17] simplify looking for table schema, return empty description dict when schema not present in api --- bcdata/bcdc.py | 72 +++++++++++++++----------------------------------- 1 file changed, 22 insertions(+), 50 deletions(-) diff --git a/bcdata/bcdc.py b/bcdata/bcdc.py index 0affb0f..b7d2bfb 100644 --- a/bcdata/bcdc.py +++ b/bcdata/bcdc.py @@ -36,7 +36,10 @@ def _package_show(package): @stamina.retry(on=requests.HTTPError, timeout=60) def _table_definition(table_name): - r = requests.get(BCDC_API_URL + "package_search", params={"q": table_name}) + r = requests.get( + BCDC_API_URL + "package_search", + params={"q": "res_extras_object_name:" + table_name}, + ) if r.status_code != 200: log.warning(r.headers) if r.status_code in [400, 401, 404]: @@ -66,7 +69,7 @@ def get_table_name(package): return layer_names[0] -def get_table_definition(table_name): # noqa: C901 +def get_table_definition(table_name): """ Given a table/object name, search BCDC for the first package/resource with a matching "object_name", returns dict: {"comments": <>, "notes": <>, "schema": {} } @@ -84,7 +87,11 @@ def get_table_definition(table_name): # noqa: C901 log.warning( f"BC Data Catalouge API search provides no results for: {table_name}" ) - return [] + return { + "description": None, + "comments": None, + "schema": None, + } else: matches = [] # iterate through results of search (packages) @@ -92,53 +99,18 @@ def get_table_definition(table_name): # noqa: C901 notes = result["notes"] # iterate through resources associated with each package for resource in result["resources"]: - # where to find schema details depends on format type - if resource["format"] == "wms": - if urlparse(resource["url"]).path.split("/")[3] == table_name: - if "object_table_comments" in resource.keys(): - table_comments = resource["object_table_comments"] - else: - table_comments = None - # only add to matches if schema details found - if "details" in resource.keys() and resource["details"] != "": - table_details = resource["details"] - matches.append((notes, table_comments, table_details)) - log.debug(resource) - # oracle sde format type - if resource["format"] == "oracle_sde": - if resource["object_name"] == table_name: - if "object_table_comments" in resource.keys(): - table_comments = resource["object_table_comments"] - else: - table_comments = None - # only add to matches if schema details found - if "details" in resource.keys() and resource["details"] != "": - table_details = resource["details"] - matches.append((notes, table_comments, table_details)) - log.debug(resource) - - # multiple format resource - elif resource["format"] == "multiple": - # if multiple format, check for table name match in the preview info - if resource["preview_info"]: - # check that layer_name key is present - if "layer_name" in resource["preview_info"][0].keys(): - # then check if it matches the table name - if resource["preview_info"][0]["layer_name"] == table_name: - if "object_table_comments" in resource.keys(): - table_comments = resource["object_table_comments"] - else: - table_comments = None - # only add to matches if schema details found - if ( - "details" in resource.keys() - and resource["details"] != "" - ): - table_details = resource["details"] - matches.append( - (notes, table_comments, table_details) - ) - log.debug(resource) + log.debug(resource) + if "object_table_comments" in resource.keys(): + table_comments = resource["object_table_comments"] + else: + table_comments = None + if "details" in resource.keys() and resource["details"] != "": + table_details = resource["details"] + else: + table_details = None + # require schema but not description + if table_details: + matches.append((notes, table_comments, table_details)) if len(matches) > 0: matched = matches[0] # just retain the first match From 81c3908eb96cd87736a51902e4a6ea1645ca3b66 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Wed, 14 Aug 2024 16:52:35 -0700 Subject: [PATCH 04/17] note changes, bump version --- CHANGES.txt | 4 ++++ bcdata/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4f9a3bf..031e80c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,10 @@ Changes ======= +0.12.0 () +------------------ +- support Data Catalogue API changes (#188) + 0.11.0 (2024-07-29) ------------------ - upgrade dependencies diff --git a/bcdata/__init__.py b/bcdata/__init__.py index d9017d8..78ec6cf 100644 --- a/bcdata/__init__.py +++ b/bcdata/__init__.py @@ -15,4 +15,4 @@ "https://raw.githubusercontent.com/smnorris/bcdata/main/data/primary_keys.json" ) -__version__ = "0.11.1dev0" +__version__ = "0.12.0dev0" From 33ef9c66c2b2c9af0ac06739e4432f1b9eee4213 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Thu, 15 Aug 2024 17:22:58 -0700 Subject: [PATCH 05/17] schema is now json, fix merge error --- bcdata/bcdc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcdata/bcdc.py b/bcdata/bcdc.py index d642075..cad282c 100644 --- a/bcdata/bcdc.py +++ b/bcdata/bcdc.py @@ -108,13 +108,13 @@ def get_table_definition(table_name): # (below only retains the final schema/comments if there is more than one # package with this information) if "details" in resource.keys() and resource["details"] != "": - table_definition["schema"] = json.loads(resource["details"]) + table_definition["schema"] = resource["details"] # look for comments only if details/schema is present if "object_table_comments" in resource.keys(): table_definition["comments"] = resource["object_table_comments"] if not table_definition["schema"]: - raise log.warning( + log.warning( f"BC Data Catalouge API search provides no schema for: {table_name}" ) From ff3015548fbc80efc09f11e4d92c9092c8400c3a Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Thu, 15 Aug 2024 17:26:00 -0700 Subject: [PATCH 06/17] fix empty details check --- bcdata/bcdc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcdata/bcdc.py b/bcdata/bcdc.py index cad282c..338359d 100644 --- a/bcdata/bcdc.py +++ b/bcdata/bcdc.py @@ -107,7 +107,7 @@ def get_table_definition(table_name): # presume description and details are the same for all resources # (below only retains the final schema/comments if there is more than one # package with this information) - if "details" in resource.keys() and resource["details"] != "": + if "details" in resource.keys() and resource["details"] != []: table_definition["schema"] = resource["details"] # look for comments only if details/schema is present if "object_table_comments" in resource.keys(): From 0ba4483e1895ae762710e7aeb22e07d061b3cc9b Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 12:47:26 -0800 Subject: [PATCH 07/17] add pyproject.toml, remove setup.py and move module folder into /src --- pyproject.toml | 55 ++++++++++++++++++++++++++++++ setup.py | 55 ------------------------------ {bcdata => src/bcdata}/__init__.py | 0 {bcdata => src/bcdata}/bc2pg.py | 0 {bcdata => src/bcdata}/bcdc.py | 0 {bcdata => src/bcdata}/cli.py | 0 {bcdata => src/bcdata}/database.py | 0 {bcdata => src/bcdata}/wcs.py | 0 {bcdata => src/bcdata}/wfs.py | 0 9 files changed, 55 insertions(+), 55 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.py rename {bcdata => src/bcdata}/__init__.py (100%) rename {bcdata => src/bcdata}/bc2pg.py (100%) rename {bcdata => src/bcdata}/bcdc.py (100%) rename {bcdata => src/bcdata}/cli.py (100%) rename {bcdata => src/bcdata}/database.py (100%) rename {bcdata => src/bcdata}/wcs.py (100%) rename {bcdata => src/bcdata}/wfs.py (100%) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..dfd7947 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "bcdata" +dynamic = ["version"] +readme = "README.md" +license = {file = "LICENSE"} +authors = [ + {name="Simon Norris", email="snorris@hillcrestgeo.ca"}, +] +description = "Download open data, monitor and report on changes" +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 1 - Planning", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "License :: OSI Approved :: MIT License", + "Topic :: Scientific/Engineering :: GIS", + "Operating System :: OS Independent" +] +dependencies = [ + "geoalchemy2", + "geopandas", + "owslib", + "psycopg2", + "rasterio", + "requests", + "sqlalchemy", + "stamina" +] + +[project.optional-dependencies] +test = [ + "pytest", + "pre-commit", + "requests-mock" +] + +[project.scripts] +bcdata = "bcdata.cli:cli" + +[project.urls] +Homepage = "https://github.com/smnorris/bcdata" +Issues = "https://github.com/smnorris/bcdata" + +[tool.setuptools.dynamic] +version = {attr = "bcdata.__version__"} + +[tool.ruff] +line-length = 100 \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 4d85047..0000000 --- a/setup.py +++ /dev/null @@ -1,55 +0,0 @@ -import os - -from setuptools import find_packages, setup - - -def read(fname): - return open(os.path.join(os.path.dirname(__file__), fname)).read() - - -# Parse the version -with open("bcdata/__init__.py", "r") as f: - for line in f: - if line.find("__version__") >= 0: - version = line.split("=")[1].strip() - version = version.strip('"') - version = version.strip("'") - break - -# Get the long description from the relevant file -with open("README.md", encoding="utf-8") as f: - long_description = f.read() - -setup( - name="bcdata", - version=version, - description="Python tools for quick access to DataBC geo-data available via WFS", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Utilities", - "Topic :: Scientific/Engineering :: GIS", - ], - keywords='gis geospatial data BC DataBC download "Britsh Columbia"', - author="Simon Norris", - author_email="snorris@hillcrestgeo.ca", - url="https://github.com/smnorris/bcdata", - license="MIT", - packages=find_packages(exclude=["ez_setup", "examples", "tests"]), - include_package_data=True, - zip_safe=False, - install_requires=read("requirements.txt").splitlines(), - extras_require={"test": ["pytest>=3", "pre-commit", "requests_mock"]}, - entry_points=""" - [console_scripts] - bcdata=bcdata.cli:cli - """, -) diff --git a/bcdata/__init__.py b/src/bcdata/__init__.py similarity index 100% rename from bcdata/__init__.py rename to src/bcdata/__init__.py diff --git a/bcdata/bc2pg.py b/src/bcdata/bc2pg.py similarity index 100% rename from bcdata/bc2pg.py rename to src/bcdata/bc2pg.py diff --git a/bcdata/bcdc.py b/src/bcdata/bcdc.py similarity index 100% rename from bcdata/bcdc.py rename to src/bcdata/bcdc.py diff --git a/bcdata/cli.py b/src/bcdata/cli.py similarity index 100% rename from bcdata/cli.py rename to src/bcdata/cli.py diff --git a/bcdata/database.py b/src/bcdata/database.py similarity index 100% rename from bcdata/database.py rename to src/bcdata/database.py diff --git a/bcdata/wcs.py b/src/bcdata/wcs.py similarity index 100% rename from bcdata/wcs.py rename to src/bcdata/wcs.py diff --git a/bcdata/wfs.py b/src/bcdata/wfs.py similarity index 100% rename from bcdata/wfs.py rename to src/bcdata/wfs.py From 1b90abde0e2c1725fa8e691fde5646209c084fff Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 12:57:27 -0800 Subject: [PATCH 08/17] testing workflow - remove refs to setup.py, fix paths --- .github/workflows/tests.yml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 40fdd18..1bf55e4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,20 +6,18 @@ on: paths: - '.github/workflows/tests.yaml' - 'requirements*.txt' - - 'setup.py' - 'MANIFEST.in' - 'pyproject.toml' - - 'bcdata/**' + - 'src/bcdata/**' - 'tests/**' pull_request: branches: [ main ] paths: - '.github/workflows/tests.yaml' - 'requirements*.txt' - - 'setup.py' - 'MANIFEST.in' - 'pyproject.toml' - - 'bcdata/**' + - 'src/bcdata/**' - 'tests/**' jobs: @@ -78,16 +76,14 @@ jobs: - name: Install dependencies run: | - python${{ matrix.python-version }} -m venv testenv - . testenv/bin/activate + python${{ matrix.python-version }} -m venv .venv + . .venv/bin/activate python -m pip install --upgrade pip - python -m pip install -r requirements-dev.txt - python setup.py clean - python -m pip install --no-deps --force-reinstall -e .[test] + python -m pip install .[test] - name: Run tests run: | - . testenv/bin/activate + . .venv/bin/activate python -m pytest -v -rxXs env: DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres From 0c930357bc676170e5dc1577bae971b94e95864b Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 13:02:08 -0800 Subject: [PATCH 09/17] use psycopg2 bin --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dfd7947..20575c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "geoalchemy2", "geopandas", "owslib", - "psycopg2", + "psycopg2-binary", "rasterio", "requests", "sqlalchemy", From 9d2319931382504c33af53db1511fdf5afffce98 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 13:46:38 -0800 Subject: [PATCH 10/17] add release workflow --- .github/workflows/release.yml | 117 ++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..0796a77 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,117 @@ +name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI + +on: push + +jobs: + build: + name: Build distribution 📦 + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: >- + Publish Python 🐍 distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/bcdata + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + github-release: + name: >- + Sign the Python 🐍 distribution 📦 with Sigstore + and upload them to GitHub Release + needs: + - publish-to-pypi + runs-on: ubuntu-latest + + permissions: + contents: write # IMPORTANT: mandatory for making GitHub Releases + id-token: write # IMPORTANT: mandatory for sigstore + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v3.0.0 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "" + - name: Upload artifact signatures to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + # Upload to GitHub Release using the `gh` CLI. + # `dist/` contains the built packages, and the + # sigstore-produced signatures and certificates. + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' + + publish-to-testpypi: + name: Publish Python 🐍 distribution 📦 to TestPyPI + needs: + - build + runs-on: ubuntu-latest + + environment: + name: testpypi + url: https://test.pypi.org/p/bcdata + + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ \ No newline at end of file From e6821ff7189e13a76fcd20fd6dc79d77b2678f3d Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 13:50:33 -0800 Subject: [PATCH 11/17] trigger workflow --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0796a77..62cff32 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,4 @@ -name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI +name: Publish to PyPI and TestPyPI on: push From 0f78633608272d2d83213495eff53ea0055c91f3 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 14:38:33 -0800 Subject: [PATCH 12/17] format with ruff --- .gitignore | 4 -- .pre-commit-config.yaml | 11 +++++ data/primary_keys.json | 105 ++++++++++++++++++++-------------------- data/validate.py | 1 - src/bcdata/__init__.py | 27 +++++------ src/bcdata/bc2pg.py | 34 +++---------- src/bcdata/bcdc.py | 29 +++-------- src/bcdata/cli.py | 12 ++--- src/bcdata/database.py | 20 ++------ src/bcdata/wcs.py | 10 +--- src/bcdata/wfs.py | 43 +++++----------- tests/test_bc2pg.py | 24 +++------ tests/test_bcdc.py | 12 ++--- tests/test_wcs.py | 7 +-- tests/test_wfs.py | 24 ++------- 15 files changed, 127 insertions(+), 236 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.gitignore b/.gitignore index 539acc2..6e8737e 100644 --- a/.gitignore +++ b/.gitignore @@ -58,7 +58,3 @@ target/ # ignore generated file that gets manually copied into README cli.md - -# ignore linting and pre-commit config -.pre-commit-config.yaml -.flake8 \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c8aca82 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.9 + hooks: + - id: ruff + name: lint with ruff + - id: ruff + name: sort imports with ruff + args: [--select, I, --fix] + - id: ruff-format + name: format with ruff \ No newline at end of file diff --git a/data/primary_keys.json b/data/primary_keys.json index cecd638..969f3e9 100644 --- a/data/primary_keys.json +++ b/data/primary_keys.json @@ -1,54 +1,53 @@ { - "whse_admin_boundaries.clab_indian_reserves": "clab_id", - "whse_admin_boundaries.clab_national_parks": "national_park_id", - "whse_admin_boundaries.fadm_designated_areas": "feature_id", - "whse_admin_boundaries.fadm_special_protection_area": "feature_id", - "whse_admin_boundaries.fadm_tfl_all_sp": "tfl_all_sysid", - "whse_basemapping.bcgs_20k_grid": "map_tile", - "whse_basemapping.dbm_mof_50k_grid": "map_tile", - "whse_basemapping.gba_local_reg_greenspaces_sp": "local_reg_greenspace_id", - "whse_basemapping.gba_local_reg_greenspaces_sp": "local_reg_greenspace_id", - "whse_basemapping.gba_railway_structure_lines_sp": "railway_structure_line_id", - "whse_basemapping.gba_railway_tracks_sp": "railway_track_id", - "whse_basemapping.gba_transmission_lines_sp": "transmission_line_id", - "whse_basemapping.gns_geographical_names_sp": "geographical_names_id", - "whse_basemapping.nts_250k_grid": "map_tile", - "whse_basemapping.trim_cultural_lines": "objectid", - "whse_basemapping.trim_cultural_points": "objectid", - "whse_basemapping.trim_ebm_airfields": "objectid", - "whse_basemapping.trim_ebm_ocean": "objectid", - "whse_basemapping.utmg_utm_zones_sp": "utm_zone", - "whse_cadastre.pmbc_parcel_fabric_poly_svw": "parcel_fabric_poly_id", - "whse_environmental_monitoring.envcan_hydrometric_stn_sp": "hydrometric_station_id", - "whse_fish.fiss_stream_sample_sites_sp": "stream_sample_site_id", - "whse_fish.pscis_assessment_svw": "stream_crossing_id", - "whse_forest_tenure.ften_managed_licence_poly_svw": "objectid", - "whse_forest_tenure.ften_range_poly_svw": "objectid", - "whse_forest_tenure.ften_recreation_poly_svw": "rmf_skey", - "whse_forest_vegetation.ogsr_priority_def_area_cur_sp": "ogsr_pdac_sysid", - "whse_forest_vegetation.rec_visual_landscape_inventory": "vli_polygon_no", - "whse_forest_vegetation.veg_comp_lyr_r1_poly": "feature_id", - "whse_forest_vegetation.veg_consolidated_cut_blocks_sp": "veg_consolidated_cut_block_id", - "whse_human_cultural_economic.hist_historic_environments_sp": "historic_environment_id", - "whse_imagery_and_base_maps.mot_road_structure_sp": "hwy_structure_class_id", - "whse_land_use_planning.rmp_landscape_rsrv_design_sp": "rmp_lrd_sysid", - "whse_land_use_planning.rmp_ogma_legal_current_svw": "legal_ogma_internal_id", - "whse_land_use_planning.rmp_ogma_non_legal_current_svw": "non_legal_ogma_internal_id", - "whse_land_use_planning.rmp_plan_legal_poly_svw": "legal_feat_id", - "whse_land_use_planning.rmp_plan_non_legal_poly_svw": "non_legal_feat_id", - "whse_land_use_planning.rmp_strgc_land_rsrce_plan_svw": "strgc_land_rsrce_plan_id", - "whse_legal_admin_boundaries.abms_municipalities_sp": "lgl_admin_area_id", - "whse_legal_admin_boundaries.wcl_conservation_areas_ngo_sp": "conservation_areas_ngo_id", - "whse_legal_admin_boundaries.wcl_conservation_lands_sp": "conservation_land_id", - "whse_mineral_tenure.mta_acquired_tenure_svw": "tenure_number_id", - "whse_mineral_tenure.og_petrlm_dev_rds_pre06_pub_sp": "og_petrlm_dev_rd_pre06_pub_id", - "whse_mineral_tenure.og_road_segment_permit_sp": "og_road_segment_permit_id", - "whse_tantalis.ta_conservancy_areas_svw": "admin_area_sid", - "whse_tantalis.ta_crown_tenures_svw": "objectid", - "whse_tantalis.ta_park_ecores_pa_svw": "admin_area_sid", - "whse_tantalis.ta_wildlife_mgmt_areas_svw": "admin_area_sid", - "whse_water_management.wls_community_ws_pub_svw": "wls_cw_sysid", - "whse_wildlife_management.wcp_fish_sensitive_ws_poly": "fish_sensitive_ws_poly_id", - "whse_wildlife_management.wcp_ungulate_winter_range_sp": "ungulate_winter_range_id", - "whse_wildlife_management.wcp_wildlife_habitat_area_poly": "habitat_area_id" -} \ No newline at end of file + "whse_admin_boundaries.clab_indian_reserves": "clab_id", + "whse_admin_boundaries.clab_national_parks": "national_park_id", + "whse_admin_boundaries.fadm_designated_areas": "feature_id", + "whse_admin_boundaries.fadm_special_protection_area": "feature_id", + "whse_admin_boundaries.fadm_tfl_all_sp": "tfl_all_sysid", + "whse_basemapping.bcgs_20k_grid": "map_tile", + "whse_basemapping.dbm_mof_50k_grid": "map_tile", + "whse_basemapping.gba_local_reg_greenspaces_sp": "local_reg_greenspace_id", + "whse_basemapping.gba_railway_structure_lines_sp": "railway_structure_line_id", + "whse_basemapping.gba_railway_tracks_sp": "railway_track_id", + "whse_basemapping.gba_transmission_lines_sp": "transmission_line_id", + "whse_basemapping.gns_geographical_names_sp": "geographical_names_id", + "whse_basemapping.nts_250k_grid": "map_tile", + "whse_basemapping.trim_cultural_lines": "objectid", + "whse_basemapping.trim_cultural_points": "objectid", + "whse_basemapping.trim_ebm_airfields": "objectid", + "whse_basemapping.trim_ebm_ocean": "objectid", + "whse_basemapping.utmg_utm_zones_sp": "utm_zone", + "whse_cadastre.pmbc_parcel_fabric_poly_svw": "parcel_fabric_poly_id", + "whse_environmental_monitoring.envcan_hydrometric_stn_sp": "hydrometric_station_id", + "whse_fish.fiss_stream_sample_sites_sp": "stream_sample_site_id", + "whse_fish.pscis_assessment_svw": "stream_crossing_id", + "whse_forest_tenure.ften_managed_licence_poly_svw": "objectid", + "whse_forest_tenure.ften_range_poly_svw": "objectid", + "whse_forest_tenure.ften_recreation_poly_svw": "rmf_skey", + "whse_forest_vegetation.ogsr_priority_def_area_cur_sp": "ogsr_pdac_sysid", + "whse_forest_vegetation.rec_visual_landscape_inventory": "vli_polygon_no", + "whse_forest_vegetation.veg_comp_lyr_r1_poly": "feature_id", + "whse_forest_vegetation.veg_consolidated_cut_blocks_sp": "veg_consolidated_cut_block_id", + "whse_human_cultural_economic.hist_historic_environments_sp": "historic_environment_id", + "whse_imagery_and_base_maps.mot_road_structure_sp": "hwy_structure_class_id", + "whse_land_use_planning.rmp_landscape_rsrv_design_sp": "rmp_lrd_sysid", + "whse_land_use_planning.rmp_ogma_legal_current_svw": "legal_ogma_internal_id", + "whse_land_use_planning.rmp_ogma_non_legal_current_svw": "non_legal_ogma_internal_id", + "whse_land_use_planning.rmp_plan_legal_poly_svw": "legal_feat_id", + "whse_land_use_planning.rmp_plan_non_legal_poly_svw": "non_legal_feat_id", + "whse_land_use_planning.rmp_strgc_land_rsrce_plan_svw": "strgc_land_rsrce_plan_id", + "whse_legal_admin_boundaries.abms_municipalities_sp": "lgl_admin_area_id", + "whse_legal_admin_boundaries.wcl_conservation_areas_ngo_sp": "conservation_areas_ngo_id", + "whse_legal_admin_boundaries.wcl_conservation_lands_sp": "conservation_land_id", + "whse_mineral_tenure.mta_acquired_tenure_svw": "tenure_number_id", + "whse_mineral_tenure.og_petrlm_dev_rds_pre06_pub_sp": "og_petrlm_dev_rd_pre06_pub_id", + "whse_mineral_tenure.og_road_segment_permit_sp": "og_road_segment_permit_id", + "whse_tantalis.ta_conservancy_areas_svw": "admin_area_sid", + "whse_tantalis.ta_crown_tenures_svw": "objectid", + "whse_tantalis.ta_park_ecores_pa_svw": "admin_area_sid", + "whse_tantalis.ta_wildlife_mgmt_areas_svw": "admin_area_sid", + "whse_water_management.wls_community_ws_pub_svw": "wls_cw_sysid", + "whse_wildlife_management.wcp_fish_sensitive_ws_poly": "fish_sensitive_ws_poly_id", + "whse_wildlife_management.wcp_ungulate_winter_range_sp": "ungulate_winter_range_id", + "whse_wildlife_management.wcp_wildlife_habitat_area_poly": "habitat_area_id" +} diff --git a/data/validate.py b/data/validate.py index 4256cb0..97c8887 100644 --- a/data/validate.py +++ b/data/validate.py @@ -4,7 +4,6 @@ import bcdata - LOG_FORMAT = "%(asctime)s:%(levelname)s:%(name)s: %(message)s" with open("primary_keys.json", "r") as file: diff --git a/src/bcdata/__init__.py b/src/bcdata/__init__.py index 51055d6..2105ecb 100644 --- a/src/bcdata/__init__.py +++ b/src/bcdata/__init__.py @@ -1,21 +1,18 @@ import requests -from .bc2pg import bc2pg -from .bcdc import get_table_definition, get_table_name -from .wcs import get_dem -from .wfs import ( - define_requests, - get_count, - get_data, - get_features, - get_sortkey, - list_tables, - validate_name, -) +from .bc2pg import bc2pg as bc2pg +from .bcdc import get_table_definition as get_table_definition +from .bcdc import get_table_name as get_table_name +from .wcs import get_dem as get_dem +from .wfs import define_requests as define_requests +from .wfs import get_count as get_count +from .wfs import get_data as get_data +from .wfs import get_features as get_features +from .wfs import get_sortkey as get_sortkey +from .wfs import list_tables as list_tables +from .wfs import validate_name as validate_name -PRIMARY_KEY_DB_URL = ( - "https://raw.githubusercontent.com/smnorris/bcdata/main/data/primary_keys.json" -) +PRIMARY_KEY_DB_URL = "https://raw.githubusercontent.com/smnorris/bcdata/main/data/primary_keys.json" # BCDC does not indicate which column in the schema is the primary key. # In this absence, bcdata maintains its own dictionary of {table: primary_key}, diff --git a/src/bcdata/bc2pg.py b/src/bcdata/bc2pg.py index 7f530f3..8af497c 100644 --- a/src/bcdata/bc2pg.py +++ b/src/bcdata/bc2pg.py @@ -1,12 +1,6 @@ -import json import logging -import os -import geopandas as gpd import numpy -import stamina -from geoalchemy2 import Geometry -import requests from shapely.geometry.linestring import LineString from shapely.geometry.multilinestring import MultiLineString from shapely.geometry.multipoint import MultiPoint @@ -103,19 +97,13 @@ def bc2pg( # noqa: C901 table_definition = bcdata.get_table_definition(dataset) if not table_definition["schema"]: - raise ValueError( - "Cannot create table, schema details not found via bcdc api" - ) + raise ValueError("Cannot create table, schema details not found via bcdc api") # if geometry type is not provided, determine type by making the first request if not geometry_type: - df = WFS.make_requests( - [urls[0]], as_gdf=True, crs="epsg:3005", lowercase=True - ) + df = WFS.make_requests([urls[0]], as_gdf=True, crs="epsg:3005", lowercase=True) geometry_type = df.geom_type.unique()[0] # keep only the first type - if numpy.any( - df.has_z.unique()[0] - ): # geopandas does not include Z in geom_type string + if numpy.any(df.has_z.unique()[0]): # geopandas does not include Z in geom_type string geometry_type = geometry_type + "Z" # if geometry type is still not populated try the last request @@ -129,9 +117,7 @@ def bc2pg( # noqa: C901 lowercase=True, silent=True, ) - geometry_type = df_temp.geom_type.unique()[ - 0 - ] # keep only the first type + geometry_type = df_temp.geom_type.unique()[0] # keep only the first type if numpy.any( df_temp.has_z.unique()[0] ): # geopandas does not include Z in geom_type string @@ -170,9 +156,7 @@ def bc2pg( # noqa: C901 # check if column provided in sortby option is present in dataset if sortby and sortby.lower() not in column_names: - raise ValueError( - f"Specified sortby column {sortby} is not present in {dataset}" - ) + raise ValueError(f"Specified sortby column {sortby} is not present in {dataset}") # load the data if not schema_only: @@ -180,9 +164,7 @@ def bc2pg( # noqa: C901 for n, url in enumerate(urls): # if first url not downloaded above when checking geom type, do now if df is None: - df = WFS.make_requests( - [url], as_gdf=True, crs="epsg:3005", lowercase=True - ) + df = WFS.make_requests([url], as_gdf=True, crs="epsg:3005", lowercase=True) # tidy the resulting dataframe df = df.rename_geometry("geom") # lowercasify @@ -205,9 +187,7 @@ def bc2pg( # noqa: C901 for feature in df["geom"] ] df["geom"] = [ - MultiLineString([feature]) - if isinstance(feature, LineString) - else feature + MultiLineString([feature]) if isinstance(feature, LineString) else feature for feature in df["geom"] ] df["geom"] = [ diff --git a/src/bcdata/bcdc.py b/src/bcdata/bcdc.py index c8561d9..6529432 100644 --- a/src/bcdata/bcdc.py +++ b/src/bcdata/bcdc.py @@ -1,4 +1,3 @@ -import json import logging from urllib.parse import urlparse @@ -77,9 +76,7 @@ def get_table_definition(table_name): # only allow searching for tables present in WFS list table_name = table_name.upper() if table_name not in bcdata.list_tables(): - raise ValueError( - f"Only tables available via WFS are supported, {table_name} not found" - ) + raise ValueError(f"Only tables available via WFS are supported, {table_name} not found") # search the api for the provided table r = _table_definition(table_name) @@ -94,9 +91,7 @@ def get_table_definition(table_name): # if there are no matching results, let the user know if r.json()["result"]["count"] == 0: - log.warning( - f"BC Data Catalogue API search provides no results for: {table_name}" - ) + log.warning(f"BC Data Catalogue API search provides no results for: {table_name}") else: # iterate through results of search (packages) for result in r.json()["result"]["results"]: @@ -105,18 +100,14 @@ def get_table_definition(table_name): # iterate through resources associated with each package for resource in result["resources"]: # only examine geographic resources with object name key - if ( - "object_name" in resource.keys() - and resource["bcdc_type"] == "geographic" - ): + if "object_name" in resource.keys() and resource["bcdc_type"] == "geographic": # confirm that object name matches table name and schema is present if ( ( table_name == resource["object_name"] # hack to handle object name / table name mismatch for NR Districts or ( - table_name - == "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG" + table_name == "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG" and resource["object_name"] == "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SP" ) @@ -127,19 +118,13 @@ def get_table_definition(table_name): table_definition["schema"] = resource["details"] # look for comments only if details/schema was found if "object_table_comments" in resource.keys(): - table_definition["comments"] = resource[ - "object_table_comments" - ] + table_definition["comments"] = resource["object_table_comments"] if not table_definition["schema"]: - log.warning( - f"BC Data Catalouge API search provides no schema for: {table_name}" - ) + log.warning(f"BC Data Catalouge API search provides no schema for: {table_name}") # add primary key if present in bcdata.primary_keys if table_name.lower() in bcdata.primary_keys: - table_definition["primary_key"] = bcdata.primary_keys[ - table_name.lower() - ].upper() + table_definition["primary_key"] = bcdata.primary_keys[table_name.lower()].upper() return table_definition diff --git a/src/bcdata/cli.py b/src/bcdata/cli.py index 073e66f..9775aa8 100644 --- a/src/bcdata/cli.py +++ b/src/bcdata/cli.py @@ -73,9 +73,7 @@ def bounds_handler(ctx, param, value): help='Bounds: "left bottom right top" or "[left, bottom, right, top]". Coordinates are BC Albers (default) or --bounds_crs', ) -dst_crs_opt = click.option( - "--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS" -) +dst_crs_opt = click.option("--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS") lowercase_opt = click.option( "--lowercase", "-l", is_flag=True, help="Write column/properties names as lowercase" @@ -393,9 +391,7 @@ def bc2pg( if refresh and append: raise ValueError("Options append and refresh are not compatible") if refresh and (schema == "bcdata"): - raise ValueError( - "Refreshing tables in bcdata schema is not supported, use another schema" - ) + raise ValueError("Refreshing tables in bcdata schema is not supported, use another schema") elif refresh and schema: schema_target = schema elif refresh and not schema: @@ -406,9 +402,7 @@ def bc2pg( if not table: table = bcdata.validate_name(dataset).lower().split(".") if schema_target + "." + table not in db.tables: - raise ValueError( - f"Cannot refresh, {schema_target}.{table} not found in database" - ) + raise ValueError(f"Cannot refresh, {schema_target}.{table} not found in database") out_table = bcdata.bc2pg( dataset, db_url, diff --git a/src/bcdata/database.py b/src/bcdata/database.py index c8cec36..e294238 100644 --- a/src/bcdata/database.py +++ b/src/bcdata/database.py @@ -81,9 +81,7 @@ def execute_many(self, sql, params): def create_schema(self, schema): if schema not in self.schemas: log.info(f"Schema {schema} does not exist, creating it") - dbq = sql.SQL("CREATE SCHEMA {schema}").format( - schema=sql.Identifier(schema) - ) + dbq = sql.SQL("CREATE SCHEMA {schema}").format(schema=sql.Identifier(schema)) self.execute(dbq) def drop_table(self, schema, table): @@ -98,18 +96,14 @@ def drop_table(self, schema, table): def refresh(self, schema, table): # move data from temp table to target table if schema + "." + table in self.tables: - log.warning( - f"Truncating table {schema}.{table} and refreshing from bcdata.{table}" - ) + log.warning(f"Truncating table {schema}.{table} and refreshing from bcdata.{table}") dbq = sql.SQL("TRUNCATE {schema}.{table}").format( schema=sql.Identifier(schema), table=sql.Identifier(table), ) self.execute(dbq) columns = list( - set(self.get_columns("bcdata", table)).intersection( - self.get_columns(schema, table) - ) + set(self.get_columns("bcdata", table)).intersection(self.get_columns(schema, table)) ) identifiers = [sql.Identifier(c) for c in columns] dbq = sql.SQL( @@ -124,9 +118,7 @@ def refresh(self, schema, table): self.execute(dbq) self.drop_table("bcdata", table) else: - raise ValueError( - f"Target table {schema}.{table} does not exist in database" - ) + raise ValueError(f"Target table {schema}.{table} does not exist in database") def define_table( self, @@ -140,9 +132,7 @@ def define_table( ): """build sqlalchemy table definition from bcdc provided json definitions""" # remove columns of unsupported types, redundant columns - table_details = [ - c for c in table_details if c["data_type"] in self.supported_types.keys() - ] + table_details = [c for c in table_details if c["data_type"] in self.supported_types.keys()] table_details = [ c for c in table_details diff --git a/src/bcdata/wcs.py b/src/bcdata/wcs.py index 1c7fa97..03a10c7 100644 --- a/src/bcdata/wcs.py +++ b/src/bcdata/wcs.py @@ -5,8 +5,6 @@ import requests import stamina -import bcdata - log = logging.getLogger(__name__) WCS_URL = "https://openmaps.gov.bc.ca/om/wcs" @@ -120,15 +118,11 @@ def get_dem( file.write(r.content) elif r.headers["Content-Type"] == "application/vnd.ogc.se_xml;charset=UTF-8": raise RuntimeError( - "WCS request {} failed with error {}".format( - r.url, str(r.content.decode("utf-8")) - ) + "WCS request {} failed with error {}".format(r.url, str(r.content.decode("utf-8"))) ) else: raise RuntimeError( - "WCS request {} failed, content type {}".format( - r.url, str(r.headers["Content-Type"]) - ) + "WCS request {} failed, content type {}".format(r.url, str(r.headers["Content-Type"])) ) if as_rasterio: return rasterio.open(out_file, "r") diff --git a/src/bcdata/wfs.py b/src/bcdata/wfs.py index 6c63662..9c873da 100644 --- a/src/bcdata/wfs.py +++ b/src/bcdata/wfs.py @@ -5,7 +5,6 @@ import sys import warnings import xml.etree.ElementTree as ET -from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta from pathlib import Path from urllib.parse import urlencode @@ -34,9 +33,7 @@ class BCWFS(object): def __init__(self, refresh=False): self.wfs_url = "https://openmaps.gov.bc.ca/geo/pub/wfs" - self.ows_url = ( - "http://openmaps.gov.bc.ca/geo/pub/ows?service=WFS&request=Getcapabilities" - ) + self.ows_url = "http://openmaps.gov.bc.ca/geo/pub/ows?service=WFS&request=Getcapabilities" # point to cache path if "BCDATA_CACHE" in os.environ: @@ -63,9 +60,7 @@ def __init__(self, refresh=False): ".//{http://www.opengis.net/ows/1.1}Constraint[@name='CountDefault']" )[0] self.pagesize = int( - countdefault.find( - "ows:DefaultValue", {"ows": "http://www.opengis.net/ows/1.1"} - ).text + countdefault.find("ows:DefaultValue", {"ows": "http://www.opengis.net/ows/1.1"}).text ) self.request_headers = {"User-Agent": "bcdata.py ({bcdata.__version__})"} @@ -98,17 +93,13 @@ def _request_schema(self, table): @stamina.retry(on=requests.HTTPError, timeout=60) def _request_capabilities(self): capabilities = ET.tostring( - wfs200.WebFeatureService_2_0_0( - self.ows_url, "2.0.0", None, False - )._capabilities, + wfs200.WebFeatureService_2_0_0(self.ows_url, "2.0.0", None, False)._capabilities, encoding="unicode", ) return capabilities @stamina.retry(on=requests.HTTPError, timeout=60) - def _request_count( - self, table, query=None, bounds=None, bounds_crs=None, geom_column=None - ): + def _request_count(self, table, query=None, bounds=None, bounds_crs=None, geom_column=None): payload = { "service": "WFS", "version": "2.0.0", @@ -194,9 +185,7 @@ def get_capabilities(self): with open(os.path.join(self.cache_path, "capabilities.xml"), "r") as f: return f.read() - def get_count( - self, dataset, query=None, bounds=None, bounds_crs="EPSG:3005", geom_column=None - ): + def get_count(self, dataset, query=None, bounds=None, bounds_crs="EPSG:3005", geom_column=None): """Ask DataBC WFS how many features there are in a table/query/bounds""" table = self.validate_name(dataset) geom_column = self.get_schema(table)["geometry_column"] @@ -244,9 +233,7 @@ def list_tables(self): return [ i.strip("pub:") for i in list( - WebFeatureService( - self.ows_url, version="2.0.0", xml=self.capabilities - ).contents + WebFeatureService(self.ows_url, version="2.0.0", xml=self.capabilities).contents ) ] @@ -351,9 +338,7 @@ def define_requests( urls.append(self.wfs_url + "?" + urlencode(request, doseq=True)) return urls - def make_requests( - self, urls, as_gdf=False, crs="epsg4326", lowercase=False, silent=False - ): + def make_requests(self, urls, as_gdf=False, crs="epsg4326", lowercase=False, silent=False): """turn urls into data""" # loop through urls results = [] @@ -365,17 +350,15 @@ def make_requests( # if specified, lowercasify all properties if lowercase: for feature in outjson["features"]: - feature["properties"] = { - k.lower(): v for k, v in feature["properties"].items() - } + feature["properties"] = {k.lower(): v for k, v in feature["properties"].items()} if not as_gdf: # If output crs is specified, include the crs object in the json # But as default, we prefer to default to 4326 and RFC7946 (no crs) if crs.lower() != "epsg:4326": crs_int = crs.split(":")[1] - outjson[ - "crs" - ] = f"""{{"type":"name","properties":{{"name":"urn:ogc:def:crs:EPSG::{crs_int}"}}}}""" + outjson["crs"] = ( + f"""{{"type":"name","properties":{{"name":"urn:ogc:def:crs:EPSG::{crs_int}"}}}}""" + ) return outjson else: if len(outjson["features"]) > 0: @@ -435,9 +418,7 @@ def get_features( for url in urls: for feature in self._request_features(url): if lowercase: - feature["properties"] = { - k.lower(): v for k, v in feature["properties"].items() - } + feature["properties"] = {k.lower(): v for k, v in feature["properties"].items()} yield feature diff --git a/tests/test_bc2pg.py b/tests/test_bc2pg.py index 1792aeb..ba14eef 100644 --- a/tests/test_bc2pg.py +++ b/tests/test_bc2pg.py @@ -38,9 +38,7 @@ def test_bc2pg_50kgrid(): def test_bc2pg_count(): bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, count=10) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 10 DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) @@ -48,21 +46,15 @@ def test_bc2pg_count(): def test_bc2pg_bounds(): bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, bounds=[1188000, 377051, 1207437, 390361]) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 8 DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) def test_bc2pg_bounds_count(): - bcdata.bc2pg( - AIRPORTS_TABLE, DB_URL, bounds=[1188000, 377051, 1207437, 390361], count=6 - ) + bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, bounds=[1188000, 377051, 1207437, 390361], count=6) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 6 DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) @@ -102,9 +94,7 @@ def test_bc2pg_geometry_type_invalid(): def test_bc2pg_primary_key_invalid(): with pytest.raises(Exception): - bcdata.bc2pg( - AIRPORTS_TABLE, DB_URL, count=10, primary_key="airport_primary_key" - ) + bcdata.bc2pg(AIRPORTS_TABLE, DB_URL, count=10, primary_key="airport_primary_key") def test_bc2pg_z(): @@ -166,9 +156,7 @@ def test_bc2pg_filter(): query="AIRPORT_NAME='Terrace (Northwest Regional) Airport'", ) assert AIRPORTS_TABLE in DB_CONNECTION.tables - r = DB_CONNECTION.query( - "select airport_name from whse_imagery_and_base_maps.gsr_airports_svw" - ) + r = DB_CONNECTION.query("select airport_name from whse_imagery_and_base_maps.gsr_airports_svw") assert len(r) == 1 assert r[0][0] == "Terrace (Northwest Regional) Airport" DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) diff --git a/tests/test_bcdc.py b/tests/test_bcdc.py index b2686d1..4fb3c31 100644 --- a/tests/test_bcdc.py +++ b/tests/test_bcdc.py @@ -43,17 +43,13 @@ def test_get_table_definition_format_multi(): assert table_definition["schema"] columns = [c["column_name"] for c in table_definition["schema"]] assert ( - bcdata.primary_keys[ - "whse_forest_vegetation.ogsr_priority_def_area_cur_sp" - ].upper() + bcdata.primary_keys["whse_forest_vegetation.ogsr_priority_def_area_cur_sp"].upper() in columns ) def test_get_table_definition_format_multi_nopreview(): - table_definition = bcdc.get_table_definition( - "WHSE_BASEMAPPING.FWA_NAMED_POINT_FEATURES_SP" - ) + table_definition = bcdc.get_table_definition("WHSE_BASEMAPPING.FWA_NAMED_POINT_FEATURES_SP") assert table_definition["description"] assert table_definition["comments"] assert table_definition["schema"] @@ -78,9 +74,7 @@ def test_get_table_definition_format_oracle_sde(): def test_get_table_definition_nr_districts(): - table_definition = bcdc.get_table_definition( - "WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG" - ) + table_definition = bcdc.get_table_definition("WHSE_ADMIN_BOUNDARIES.ADM_NR_DISTRICTS_SPG") assert table_definition["description"] assert table_definition["comments"] assert table_definition["schema"] diff --git a/tests/test_wcs.py b/tests/test_wcs.py index fc3ff4b..ba25509 100644 --- a/tests/test_wcs.py +++ b/tests/test_wcs.py @@ -25,9 +25,7 @@ def test_dem(tmpdir): def test_dem_align(tmpdir): bounds = [1046891, 704778, 1055345, 709629] - out_file = bcdata.get_dem( - bounds, os.path.join(tmpdir, "test_dem_align.tif"), align=True - ) + out_file = bcdata.get_dem(bounds, os.path.join(tmpdir, "test_dem_align.tif"), align=True) assert os.path.exists(out_file) with rasterio.open(out_file) as src: bounds = src.bounds @@ -39,8 +37,7 @@ def test_dem_rasterio(tmpdir): bounds = [1046891, 704778, 1055345, 709629] src = bcdata.get_dem(bounds, as_rasterio=True) stats = [ - {"min": float(b.min()), "max": float(b.max()), "mean": float(b.mean())} - for b in src.read() + {"min": float(b.min()), "max": float(b.max()), "mean": float(b.mean())} for b in src.read() ] assert stats[0]["max"] == 3982 diff --git a/tests/test_wfs.py b/tests/test_wfs.py index bb666ff..d9dd48d 100644 --- a/tests/test_wfs.py +++ b/tests/test_wfs.py @@ -1,5 +1,3 @@ -import os - import pytest import requests import requests_mock @@ -7,7 +5,6 @@ from geopandas.geodataframe import GeoDataFrame import bcdata -from bcdata.wfs import ServiceException AIRPORTS_PACKAGE = "bc-airports" AIRPORTS_TABLE = "WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW" @@ -58,9 +55,7 @@ def test_get_count_filtered(): def test_get_count_bounds(): - assert ( - bcdata.get_count(AIRPORTS_TABLE, bounds=[1188000, 377051, 1207437, 390361]) == 8 - ) + assert bcdata.get_count(AIRPORTS_TABLE, bounds=[1188000, 377051, 1207437, 390361]) == 8 def test_get_sortkey_known(): @@ -77,9 +72,7 @@ def test_get_data_asgdf(): def test_get_data_asgdf_crs(): - gdf = bcdata.get_data( - UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True, crs="EPSG:3005" - ) + gdf = bcdata.get_data(UTMZONES_KEY, query="UTM_ZONE=10", as_gdf=True, crs="EPSG:3005") assert gdf.crs == "EPSG:3005" @@ -100,10 +93,7 @@ def test_get_data_lowercase(): def test_get_data_crs(): data = bcdata.get_data(AIRPORTS_TABLE, crs="EPSG:3005") - assert ( - data["crs"] - == """{"type":"name","properties":{"name":"urn:ogc:def:crs:EPSG::3005"}}""" - ) + assert data["crs"] == """{"type":"name","properties":{"name":"urn:ogc:def:crs:EPSG::3005"}}""" def test_get_features(): @@ -136,8 +126,7 @@ def test_cql_filter(): ) assert len(data["features"]) == 1 assert ( - data["features"][0]["properties"]["AIRPORT_NAME"] - == "Terrace (Northwest Regional) Airport" + data["features"][0]["properties"]["AIRPORT_NAME"] == "Terrace (Northwest Regional) Airport" ) @@ -154,7 +143,4 @@ def test_cql_bounds_filter(): bounds_crs="EPSG:3005", ) assert len(data["features"]) == 1 - assert ( - data["features"][0]["properties"]["AIRPORT_NAME"] - == "Victoria International Airport" - ) + assert data["features"][0]["properties"]["AIRPORT_NAME"] == "Victoria International Airport" From 37555dc4c3c7fe61d6960aaef83c1dd4c8e61fd1 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 14:42:02 -0800 Subject: [PATCH 13/17] write test file to tmpdir --- tests/test_wcs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_wcs.py b/tests/test_wcs.py index ba25509..2a9a921 100644 --- a/tests/test_wcs.py +++ b/tests/test_wcs.py @@ -35,7 +35,7 @@ def test_dem_align(tmpdir): def test_dem_rasterio(tmpdir): bounds = [1046891, 704778, 1055345, 709629] - src = bcdata.get_dem(bounds, as_rasterio=True) + src = bcdata.get_dem(bounds, os.path.join(tmpdir, "test_dem_rasterio.tif"), as_rasterio=True) stats = [ {"min": float(b.min()), "max": float(b.max()), "mean": float(b.mean())} for b in src.read() ] From 8380f5de82971e346241878fcfa9750b1f6273dd Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Tue, 3 Dec 2024 14:53:52 -0800 Subject: [PATCH 14/17] do not upload to testpypi --- .github/workflows/release.yml | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 62cff32..3cf9feb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,4 @@ -name: Publish to PyPI and TestPyPI +name: Publish to PyPI and create release on GitHub on: push @@ -90,28 +90,4 @@ jobs: run: >- gh release upload '${{ github.ref_name }}' dist/** - --repo '${{ github.repository }}' - - publish-to-testpypi: - name: Publish Python 🐍 distribution 📦 to TestPyPI - needs: - - build - runs-on: ubuntu-latest - - environment: - name: testpypi - url: https://test.pypi.org/p/bcdata - - permissions: - id-token: write # IMPORTANT: mandatory for trusted publishing - - steps: - - name: Download all the dists - uses: actions/download-artifact@v4 - with: - name: python-package-distributions - path: dist/ - - name: Publish distribution 📦 to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ \ No newline at end of file + --repo '${{ github.repository }}' \ No newline at end of file From 4a196c68dd35c4cbe1ae17a45ddf6e9793b4cc4c Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Wed, 4 Dec 2024 17:17:04 -0800 Subject: [PATCH 15/17] bc2parquet proof of concept --- bcdata/cli.py | 139 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 130 insertions(+), 9 deletions(-) diff --git a/bcdata/cli.py b/bcdata/cli.py index 073e66f..cf1002f 100644 --- a/bcdata/cli.py +++ b/bcdata/cli.py @@ -6,6 +6,12 @@ import click from cligj import compact_opt, indent_opt, quiet_opt, verbose_opt +from shapely.geometry.linestring import LineString +from shapely.geometry.multilinestring import MultiLineString +from shapely.geometry.multipoint import MultiPoint +from shapely.geometry.multipolygon import MultiPolygon +from shapely.geometry.point import Point +from shapely.geometry.polygon import Polygon import bcdata from bcdata.database import Database @@ -22,6 +28,27 @@ def complete_dataset_names(ctx, param, incomplete): return [k for k in bcdata.list_tables() if k.startswith(incomplete)] +def ensure_single_geometry_type(df): + """If mix of single/multi part geometries are present, promote all geometries to multipart""" + geomtypes = sorted( + [t.upper() for t in df.geometry.geom_type.dropna(axis=0, how="all").unique()], key=len + ) + if len(geomtypes) > 1 and geomtypes[1] == "MULTI" + geomtypes[0]: + df.geometry = [ + MultiPoint([feature]) if isinstance(feature, Point) else feature + for feature in df.geometry + ] + df.geometry = [ + MultiLineString([feature]) if isinstance(feature, LineString) else feature + for feature in df.geometry + ] + df.geometry = [ + MultiPolygon([feature]) if isinstance(feature, Polygon) else feature + for feature in df.geometry + ] + return df + + # bounds handling direct from rasterio # https://github.com/mapbox/rasterio/blob/master/rasterio/rio/options.py # https://github.com/mapbox/rasterio/blob/master/rasterio/rio/clip.py @@ -73,9 +100,7 @@ def bounds_handler(ctx, param, value): help='Bounds: "left bottom right top" or "[left, bottom, right, top]". Coordinates are BC Albers (default) or --bounds_crs', ) -dst_crs_opt = click.option( - "--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS" -) +dst_crs_opt = click.option("--dst-crs", "--dst_crs", default="epsg:4326", help="Destination CRS") lowercase_opt = click.option( "--lowercase", "-l", is_flag=True, help="Write column/properties names as lowercase" @@ -393,9 +418,7 @@ def bc2pg( if refresh and append: raise ValueError("Options append and refresh are not compatible") if refresh and (schema == "bcdata"): - raise ValueError( - "Refreshing tables in bcdata schema is not supported, use another schema" - ) + raise ValueError("Refreshing tables in bcdata schema is not supported, use another schema") elif refresh and schema: schema_target = schema elif refresh and not schema: @@ -406,9 +429,7 @@ def bc2pg( if not table: table = bcdata.validate_name(dataset).lower().split(".") if schema_target + "." + table not in db.tables: - raise ValueError( - f"Cannot refresh, {schema_target}.{table} not found in database" - ) + raise ValueError(f"Cannot refresh, {schema_target}.{table} not found in database") out_table = bcdata.bc2pg( dataset, db_url, @@ -439,3 +460,103 @@ def bc2pg( # do not notify of data load completion when no data load has occured if not schema_only: log.info("Load of {} to {} in {} complete".format(dataset, out_table, db_url)) + + +@cli.command() +@click.argument("dataset", type=click.STRING, shell_complete=complete_dataset_names) +@click.option("--out_file", "-o", help="Output file") +@click.option( + "--query", + help="A valid CQL or ECQL query", +) +@bounds_opt +@click.option( + "--bounds-crs", + "--bounds_crs", + help="CRS of provided bounds", + default="EPSG:3005", +) +@click.option( + "--count", + "-c", + default=None, + type=int, + help="Total number of features to load", +) +@click.option("--sortby", "-s", help="Name of sort field") +@verbose_opt +@quiet_opt +def bc2parquet( + dataset, + out_file, + query, + bounds, + bounds_crs, + count, + sortby, + verbose, + quiet, +): + """Dump table to parquet + *Script assumes that all data fits in memory* + For larger than memory data, call script the script multiple times (filtering data as required) + """ + # Note that the conversion can easily be done with ogr2ogr (or gpq): + # + # bcdata dump | + # ogr2ogr -f Parquet \ + # .parquet \ + # -lco SORT_BY_BBOX=YES \ + # /vsistdin?buffer_limit=-1/ + + # this command does much the same, but also cleans the data slightly + + verbosity = verbose - quiet + configure_logging(verbosity) + log = logging.getLogger(__name__) + + dataset = bcdata.validate_name(dataset) + + # default to writing to .parquet + if not out_file: + out_file = dataset.lower() + ".parquet" + + # get data + gdf = bcdata.get_data( + dataset, + query=query, + bounds=bounds, + bounds_crs=bounds_crs, + count=count, + sortby=sortby, + as_gdf=True, + crs="epsg:3005", + lowercase=True, + ) + + # make sure geometry is "geometry" + if gdf.geometry.name != "geometry": + gdf = gdf.rename_geometry("geometry") + + # deal with mixed singlepart/multipart geometry types + gdf = ensure_single_geometry_type(gdf) + + # set other data types by introspecting the data + gdf = gdf.convert_dtypes() + + # preserve column order, remove redundant/unsuported columns + table_definition = bcdata.get_table_definition(dataset) + column_names = [ + c["column_name"].lower() + for c in table_definition["schema"] + if c["column_name"] not in ["FEATURE_AREA_SQM", "FEATURE_LENGTH_M"] + and c["data_type"] in ["NUMBER", "VARCHAR2", "DATE"] + ] + gdf = gdf[column_names] + + log.info(f"Writing {dataset} to parquet: {out_file}") + + # we don't bother sorting by bbox because: + # - geopandas doesn't currently offer the option + # - for existing tools, we want all data anyway + gdf.to_parquet(out_file) From 6aaed4f2919ba0b44144acdbdf34849b539e8e76 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Mon, 9 Dec 2024 14:30:21 -0800 Subject: [PATCH 16/17] remove 2parquet command, enhance dump command to default to always standardize spatial types and strip redundant/unsupported columns --- bcdata/bc2pg.py | 5 +- bcdata/cli.py | 137 +++++----------------------------------------- bcdata/wfs.py | 77 ++++++++++++++++++++------ tests/test_wfs.py | 19 ++++++- 4 files changed, 92 insertions(+), 146 deletions(-) diff --git a/bcdata/bc2pg.py b/bcdata/bc2pg.py index 7f530f3..3f6e93d 100644 --- a/bcdata/bc2pg.py +++ b/bcdata/bc2pg.py @@ -195,10 +195,7 @@ def bc2pg( # noqa: C901 df_nulls = df_nulls.drop(columns=["geom"]) # remove rows with null geometry from geodataframe df = df[df["geom"].notna()] - # cast to everything multipart because responses can have mixed types - # geopandas does not have a built in function: - # https://gis.stackexchange.com/questions/311320/casting-geometry-to-multi-using-geopandas - # (but only cast if geometry_type is not specified to be singlepart) + # promote to multipart if promote_to_multi: df["geom"] = [ MultiPoint([feature]) if isinstance(feature, Point) else feature diff --git a/bcdata/cli.py b/bcdata/cli.py index cf1002f..b4bd84a 100644 --- a/bcdata/cli.py +++ b/bcdata/cli.py @@ -27,28 +27,6 @@ def configure_logging(verbosity): def complete_dataset_names(ctx, param, incomplete): return [k for k in bcdata.list_tables() if k.startswith(incomplete)] - -def ensure_single_geometry_type(df): - """If mix of single/multi part geometries are present, promote all geometries to multipart""" - geomtypes = sorted( - [t.upper() for t in df.geometry.geom_type.dropna(axis=0, how="all").unique()], key=len - ) - if len(geomtypes) > 1 and geomtypes[1] == "MULTI" + geomtypes[0]: - df.geometry = [ - MultiPoint([feature]) if isinstance(feature, Point) else feature - for feature in df.geometry - ] - df.geometry = [ - MultiLineString([feature]) if isinstance(feature, LineString) else feature - for feature in df.geometry - ] - df.geometry = [ - MultiPolygon([feature]) if isinstance(feature, Polygon) else feature - for feature in df.geometry - ] - return df - - # bounds handling direct from rasterio # https://github.com/mapbox/rasterio/blob/master/rasterio/rio/options.py # https://github.com/mapbox/rasterio/blob/master/rasterio/rio/clip.py @@ -231,10 +209,17 @@ def dem( help="CRS of provided bounds", default="EPSG:3005", ) +@click.option( + "--no-clean", + "-nc", + help="Do not do any data standardization", + is_flag=True, + default=True, +) @lowercase_opt @verbose_opt @quiet_opt -def dump(dataset, query, out_file, bounds, bounds_crs, lowercase, verbose, quiet): +def dump(dataset, query, out_file, bounds, bounds_crs, no_clean, lowercase, verbose, quiet): """Write DataBC features to stdout as GeoJSON feature collection. \b @@ -250,8 +235,12 @@ def dump(dataset, query, out_file, bounds, bounds_crs, lowercase, verbose, quiet verbosity = verbose - quiet configure_logging(verbosity) table = bcdata.validate_name(dataset) + if no_clean: + clean = False + else: + clean = True data = bcdata.get_data( - table, query=query, bounds=bounds, bounds_crs=bounds_crs, lowercase=lowercase + table, query=query, bounds=bounds, bounds_crs=bounds_crs, lowercase=lowercase, clean=clean ) if out_file: with open(out_file, "w") as sink: @@ -460,103 +449,3 @@ def bc2pg( # do not notify of data load completion when no data load has occured if not schema_only: log.info("Load of {} to {} in {} complete".format(dataset, out_table, db_url)) - - -@cli.command() -@click.argument("dataset", type=click.STRING, shell_complete=complete_dataset_names) -@click.option("--out_file", "-o", help="Output file") -@click.option( - "--query", - help="A valid CQL or ECQL query", -) -@bounds_opt -@click.option( - "--bounds-crs", - "--bounds_crs", - help="CRS of provided bounds", - default="EPSG:3005", -) -@click.option( - "--count", - "-c", - default=None, - type=int, - help="Total number of features to load", -) -@click.option("--sortby", "-s", help="Name of sort field") -@verbose_opt -@quiet_opt -def bc2parquet( - dataset, - out_file, - query, - bounds, - bounds_crs, - count, - sortby, - verbose, - quiet, -): - """Dump table to parquet - *Script assumes that all data fits in memory* - For larger than memory data, call script the script multiple times (filtering data as required) - """ - # Note that the conversion can easily be done with ogr2ogr (or gpq): - # - # bcdata dump | - # ogr2ogr -f Parquet \ - # .parquet \ - # -lco SORT_BY_BBOX=YES \ - # /vsistdin?buffer_limit=-1/ - - # this command does much the same, but also cleans the data slightly - - verbosity = verbose - quiet - configure_logging(verbosity) - log = logging.getLogger(__name__) - - dataset = bcdata.validate_name(dataset) - - # default to writing to .parquet - if not out_file: - out_file = dataset.lower() + ".parquet" - - # get data - gdf = bcdata.get_data( - dataset, - query=query, - bounds=bounds, - bounds_crs=bounds_crs, - count=count, - sortby=sortby, - as_gdf=True, - crs="epsg:3005", - lowercase=True, - ) - - # make sure geometry is "geometry" - if gdf.geometry.name != "geometry": - gdf = gdf.rename_geometry("geometry") - - # deal with mixed singlepart/multipart geometry types - gdf = ensure_single_geometry_type(gdf) - - # set other data types by introspecting the data - gdf = gdf.convert_dtypes() - - # preserve column order, remove redundant/unsuported columns - table_definition = bcdata.get_table_definition(dataset) - column_names = [ - c["column_name"].lower() - for c in table_definition["schema"] - if c["column_name"] not in ["FEATURE_AREA_SQM", "FEATURE_LENGTH_M"] - and c["data_type"] in ["NUMBER", "VARCHAR2", "DATE"] - ] - gdf = gdf[column_names] - - log.info(f"Writing {dataset} to parquet: {out_file}") - - # we don't bother sorting by bbox because: - # - geopandas doesn't currently offer the option - # - for existing tools, we want all data anyway - gdf.to_parquet(out_file) diff --git a/bcdata/wfs.py b/bcdata/wfs.py index 6c63662..a9ae024 100644 --- a/bcdata/wfs.py +++ b/bcdata/wfs.py @@ -16,6 +16,12 @@ from owslib.feature import schema as wfs_schema from owslib.feature import wfs200 from owslib.wfs import WebFeatureService +from shapely.geometry.linestring import LineString +from shapely.geometry.multilinestring import MultiLineString +from shapely.geometry.multipoint import MultiPoint +from shapely.geometry.multipolygon import MultiPolygon +from shapely.geometry.point import Point +from shapely.geometry.polygon import Polygon import bcdata @@ -25,6 +31,26 @@ log = logging.getLogger(__name__) +def ensure_single_geometry_type(df): + """If mix of single/multi part geometries are present, promote all geometries to multipart""" + geomtypes = sorted( + [t.upper() for t in df.geometry.geom_type.dropna(axis=0, how="all").unique()], key=len + ) + if len(geomtypes) > 1 and geomtypes[1] == "MULTI" + geomtypes[0]: + df.geometry = [ + MultiPoint([feature]) if isinstance(feature, Point) else feature + for feature in df.geometry + ] + df.geometry = [ + MultiLineString([feature]) if isinstance(feature, LineString) else feature + for feature in df.geometry + ] + df.geometry = [ + MultiPolygon([feature]) if isinstance(feature, Polygon) else feature + for feature in df.geometry + ] + return df + class ServiceException(Exception): pass @@ -158,6 +184,7 @@ def _request_features(self, url, silent=False): log.warning(f"Response text: {r.text}") r.raise_for_status() return r.json()["features"] + def build_bounds_filter(self, query, bounds, bounds_crs, geom_column): """The bbox param shortcut is mutually exclusive with CQL_FILTER, @@ -352,7 +379,7 @@ def define_requests( return urls def make_requests( - self, urls, as_gdf=False, crs="epsg4326", lowercase=False, silent=False + self, dataset, urls, as_gdf=False, crs="epsg4326", lowercase=False, silent=False, clean=True ): """turn urls into data""" # loop through urls @@ -362,28 +389,41 @@ def make_requests( outjson = dict(type="FeatureCollection", features=[]) for result in results: outjson["features"] += result + # if specified, lowercasify all properties if lowercase: for feature in outjson["features"]: feature["properties"] = { k.lower(): v for k, v in feature["properties"].items() } - if not as_gdf: - # If output crs is specified, include the crs object in the json - # But as default, we prefer to default to 4326 and RFC7946 (no crs) - if crs.lower() != "epsg:4326": - crs_int = crs.split(":")[1] - outjson[ - "crs" - ] = f"""{{"type":"name","properties":{{"name":"urn:ogc:def:crs:EPSG::{crs_int}"}}}}""" - return outjson + + # load to geodataframe, standardize data slightly + if len(outjson["features"]) > 0: + gdf = gpd.GeoDataFrame.from_features(outjson) + gdf.crs = crs + # minor data cleaning as default + if clean: + if gdf.geometry.name != "geometry": + gdf = gdf.rename_geometry("geometry") + gdf = ensure_single_geometry_type(gdf) + table_definition = bcdata.get_table_definition(dataset) + column_names = [ + c["column_name"] + for c in table_definition["schema"] + if c["column_name"] not in ["FEATURE_AREA_SQM", "FEATURE_LENGTH_M"] + and c["data_type"] in ["NUMBER", "VARCHAR2", "DATE"] + ] + if lowercase: + column_names = [c.lower() for c in column_names] + gdf = gdf[column_names + ["geometry"]] else: - if len(outjson["features"]) > 0: - gdf = gpd.GeoDataFrame.from_features(outjson) - gdf.crs = crs - else: - gdf = gpd.GeoDataFrame() + gdf = gpd.GeoDataFrame() + + if as_gdf: return gdf + + else: + return json.loads(gdf.to_json()) def get_data( self, @@ -396,8 +436,10 @@ def get_data( sortby=None, as_gdf=False, lowercase=False, + clean=True ): """Request features from DataBC WFS and return GeoJSON featurecollection or geodataframe""" + dataset = self.validate_name(dataset) urls = self.define_requests( dataset, query=query, @@ -407,7 +449,7 @@ def get_data( count=count, sortby=sortby, ) - return self.make_requests(urls, as_gdf, crs, lowercase) + return self.make_requests(dataset, urls, as_gdf=as_gdf, crs=crs, lowercase=lowercase, clean=clean) def get_features( self, @@ -460,6 +502,7 @@ def define_requests( query=query, crs=crs, bounds=bounds, + bounds_crs=bounds_crs, count=count, sortby=sortby, check_count=check_count, @@ -489,6 +532,7 @@ def get_data( sortby=None, as_gdf=False, lowercase=False, + clean=True ): WFS = BCWFS() return WFS.get_data( @@ -501,6 +545,7 @@ def get_data( sortby=sortby, as_gdf=as_gdf, lowercase=lowercase, + clean=clean ) diff --git a/tests/test_wfs.py b/tests/test_wfs.py index bb666ff..7d20aa6 100644 --- a/tests/test_wfs.py +++ b/tests/test_wfs.py @@ -1,6 +1,7 @@ import os import pytest +import json import requests import requests_mock import stamina @@ -101,8 +102,7 @@ def test_get_data_lowercase(): def test_get_data_crs(): data = bcdata.get_data(AIRPORTS_TABLE, crs="EPSG:3005") assert ( - data["crs"] - == """{"type":"name","properties":{"name":"urn:ogc:def:crs:EPSG::3005"}}""" + data["crs"]["properties"]["name"] == 'urn:ogc:def:crs:EPSG::3005' ) @@ -158,3 +158,18 @@ def test_cql_bounds_filter(): data["features"][0]["properties"]["AIRPORT_NAME"] == "Victoria International Airport" ) + +def test_clean(): + data = bcdata.get_data( + AIRPORTS_TABLE, + query="AIRPORT_NAME='Terrace (Northwest Regional) Airport'", + ) + assert "SE_ANNO_CAD_DATA" not in data["features"][0]["properties"].keys() + +def test_no_clean(): + data = bcdata.get_data( + AIRPORTS_TABLE, + query="AIRPORT_NAME='Terrace (Northwest Regional) Airport'", + clean=False + ) + assert "SE_ANNO_CAD_DATA" in data["features"][0]["properties"].keys() \ No newline at end of file From e0b2d5882a4d63bb7a67b03dfec9633f4f1c12c8 Mon Sep 17 00:00:00 2001 From: Simon Norris Date: Wed, 11 Dec 2024 14:08:47 -0800 Subject: [PATCH 17/17] date --- CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 3d84f52..b712685 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,7 +1,7 @@ Changes ======= -0.13.0 () +0.13.0 (2024-12-11) ------------------ - support Data Catalogue API changes (#188) - bump dependencies