From 32e0892341e6d260294ef5d056b1e1fc01305853 Mon Sep 17 00:00:00 2001 From: galenseilis Date: Mon, 27 Oct 2025 07:29:10 -0700 Subject: [PATCH] ruff check --select ALL --fix --- data/validate.py | 4 +-- src/bcdata/bc2pg.py | 6 ++-- src/bcdata/bcdc.py | 7 ++-- src/bcdata/cli.py | 16 ++++----- src/bcdata/database.py | 14 ++++---- src/bcdata/wfs.py | 77 +++++++++++++++++++----------------------- tests/test_bc2pg.py | 10 +++--- tests/test_bcdc.py | 6 ++-- tests/test_cli.py | 4 +-- 9 files changed, 68 insertions(+), 76 deletions(-) diff --git a/data/validate.py b/data/validate.py index 97c8887..f85db19 100644 --- a/data/validate.py +++ b/data/validate.py @@ -6,7 +6,7 @@ LOG_FORMAT = "%(asctime)s:%(levelname)s:%(name)s: %(message)s" -with open("primary_keys.json", "r") as file: +with open("primary_keys.json") as file: """validate pk database""" logging.basicConfig(stream=sys.stderr, level=20, format=LOG_FORMAT) log = logging.getLogger(__name__) @@ -21,7 +21,7 @@ if column not in [c["column_name"].lower() for c in schema]: raise ValueError(f"Column {column} not found in {table}") log.info( - "Validation successful - columns listed in primary_keys.json are present in listed tables" + "Validation successful - columns listed in primary_keys.json are present in listed tables", ) else: invalid_keys = list(pk_db_tables - bcdata_tables) diff --git a/src/bcdata/bc2pg.py b/src/bcdata/bc2pg.py index e26d1a5..d26d382 100644 --- a/src/bcdata/bc2pg.py +++ b/src/bcdata/bc2pg.py @@ -74,7 +74,7 @@ def bc2pg( # noqa: C901 # define requests urls = WFS.define_requests( - dataset, query=query, bounds=bounds, bounds_crs=bounds_crs, count=count, sortby=sortby + dataset, query=query, bounds=bounds, bounds_crs=bounds_crs, count=count, sortby=sortby, ) df = None # just for tracking if first download is done by geometry type check @@ -112,7 +112,7 @@ def bc2pg( # noqa: C901 ) geometry_type = df_temp.geom_type.unique()[0] # keep only the first type if numpy.any( - df_temp.has_z.unique()[0] + df_temp.has_z.unique()[0], ): # geopandas does not include Z in geom_type string geometry_type = geometry_type + "Z" # drop the last request dataframe to free up memory @@ -132,7 +132,7 @@ def bc2pg( # noqa: C901 c["column_name"].upper() for c in table_definition["schema"] ]: raise ValueError( - "Column {primary_key} specified as primary_key does not exist in source" + "Column {primary_key} specified as primary_key does not exist in source", ) # build the table definition and create table diff --git a/src/bcdata/bcdc.py b/src/bcdata/bcdc.py index 9e57c4e..4fe88bd 100644 --- a/src/bcdata/bcdc.py +++ b/src/bcdata/bcdc.py @@ -62,15 +62,14 @@ def get_table_name(package): if len(layer_names) > 1: raise ValueError( "Package {} includes more than one WFS resource, specify one of the following: \n{}".format( - package, "\n".join(layer_names) - ) + package, "\n".join(layer_names), + ), ) return layer_names[0] def get_table_definition(table_name): - """ - Given a table/object name, search BCDC for the first package/resource with a matching "object_name", + """Given a table/object name, search BCDC for the first package/resource with a matching "object_name", returns dict: {"comments": <>, "notes": <>, "schema": {} } """ # only allow searching for tables present in WFS list diff --git a/src/bcdata/cli.py b/src/bcdata/cli.py index d7528c9..e502a62 100644 --- a/src/bcdata/cli.py +++ b/src/bcdata/cli.py @@ -29,11 +29,11 @@ def complete_dataset_names(ctx, param, incomplete): def from_like_context(ctx, param, value): """Return the value for an option from the context if the option - or `--all` is given, else return None.""" + or `--all` is given, else return None. + """ if ctx.obj and ctx.obj.get("like") and (value == "like" or ctx.obj.get("all_like")): return ctx.obj["like"][param.name] - else: - return None + return None def bounds_handler(ctx, param, value): @@ -47,7 +47,7 @@ def bounds_handler(ctx, param, value): return retval except Exception: raise click.BadParameter( - "{0!r} is not a valid bounding box representation".format(value) + f"{value!r} is not a valid bounding box representation", ) else: # pragma: no cover return retval @@ -67,7 +67,7 @@ def bounds_handler(ctx, param, value): lowercase_opt = click.option( - "--lowercase", "-l", is_flag=True, help="Write column/properties names as lowercase" + "--lowercase", "-l", is_flag=True, help="Write column/properties names as lowercase", ) @@ -161,7 +161,7 @@ def info(dataset, indent, meta_member, verbose, quiet): @verbose_opt @quiet_opt def dump( - dataset, query, count, bounds, bounds_crs, sortby, lowercase, promote_to_multi, verbose, quiet + dataset, query, count, bounds, bounds_crs, sortby, lowercase, promote_to_multi, verbose, quiet, ): """Write DataBC features to stdout as GeoJSON feature collection. @@ -364,7 +364,7 @@ def bc2pg( raise ValueError("Options append and refresh are not compatible") if refresh and (schema == "bcdata"): raise ValueError("Refreshing tables in bcdata schema is not supported, use another schema") - elif refresh and schema: + if refresh and schema: schema_target = schema elif refresh and not schema: schema_target, table = bcdata.validate_name(dataset).lower().split(".") @@ -404,4 +404,4 @@ def bc2pg( # do not notify of data load completion when no data load has occured if not schema_only: - log.info("Load of {} to {} in {} complete".format(dataset, out_table, db_url)) + log.info(f"Load of {dataset} to {out_table} in {db_url} complete") diff --git a/src/bcdata/database.py b/src/bcdata/database.py index e294238..a1541b5 100644 --- a/src/bcdata/database.py +++ b/src/bcdata/database.py @@ -9,7 +9,7 @@ log = logging.getLogger(__name__) -class Database(object): +class Database: """Wrapper around sqlalchemy""" def __init__(self, url=os.environ.get("DATABASE_URL")): @@ -103,13 +103,13 @@ def refresh(self, schema, table): ) self.execute(dbq) columns = list( - set(self.get_columns("bcdata", table)).intersection(self.get_columns(schema, table)) + set(self.get_columns("bcdata", table)).intersection(self.get_columns(schema, table)), ) identifiers = [sql.Identifier(c) for c in columns] dbq = sql.SQL( """INSERT INTO {schema}.{table} ({columns}) - SELECT {columns} FROM bcdata.{table}""" + SELECT {columns} FROM bcdata.{table}""", ).format( schema=sql.Identifier(schema), table=sql.Identifier(table), @@ -130,7 +130,7 @@ def define_table( table_comments=None, primary_key=None, ): - """build sqlalchemy table definition from bcdc provided json definitions""" + """Build sqlalchemy table definition from bcdc provided json definitions""" # remove columns of unsupported types, redundant columns table_details = [c for c in table_details if c["data_type"] in self.supported_types.keys()] table_details = [ @@ -159,7 +159,7 @@ def define_table( column_type, primary_key=True, comment=column_comments, - ) + ), ) else: columns.append( @@ -167,7 +167,7 @@ def define_table( column_name, column_type, comment=column_comments, - ) + ), ) # make everything multipart @@ -212,7 +212,7 @@ def log(self, schema_name, table_name): table_name text PRIMARY KEY, latest_download timestamp WITH TIME ZONE ); - """ + """, ) self.execute( """INSERT INTO bcdata.log (table_name, latest_download) diff --git a/src/bcdata/wfs.py b/src/bcdata/wfs.py index abde127..5e93f95 100644 --- a/src/bcdata/wfs.py +++ b/src/bcdata/wfs.py @@ -51,7 +51,7 @@ class ServiceException(Exception): pass -class BCWFS(object): +class BCWFS: """Wrapper around web feature service""" def __init__(self, refresh=False): @@ -71,7 +71,7 @@ def __init__(self, refresh=False): # if the file is named something else, prompt user to delete it else: raise RuntimeError( - f"Cache file exists, delete before using bcdata: {self.cache_path}" + f"Cache file exists, delete before using bcdata: {self.cache_path}", ) # create cache folder if it does not exist p.mkdir(parents=True, exist_ok=True) @@ -80,10 +80,10 @@ def __init__(self, refresh=False): self.capabilities = self.get_capabilities() # get pagesize from xml using the xpath from https://github.com/bcgov/bcdata/ countdefault = ET.fromstring(self.capabilities).findall( - ".//{http://www.opengis.net/ows/1.1}Constraint[@name='CountDefault']" + ".//{http://www.opengis.net/ows/1.1}Constraint[@name='CountDefault']", )[0] self.pagesize = int( - countdefault.find("ows:DefaultValue", {"ows": "http://www.opengis.net/ows/1.1"}).text + countdefault.find("ows:DefaultValue", {"ows": "http://www.opengis.net/ows/1.1"}).text, ) self.request_headers = {"User-Agent": "bcdata.py ({bcdata.__version__})"} @@ -93,16 +93,14 @@ def check_cached_file(self, cache_file): cache_file = os.path.join(self.cache_path, cache_file) if not os.path.exists(os.path.join(cache_file)): return True - else: - mod_date = datetime.fromtimestamp(os.path.getmtime(cache_file)) - # if file older than specified days or empty, return true - if ( - mod_date < (datetime.now() - timedelta(days=self.cache_refresh_days)) - or os.stat(cache_file).st_size == 0 - ): - return True - else: - return False + mod_date = datetime.fromtimestamp(os.path.getmtime(cache_file)) + # if file older than specified days or empty, return true + if ( + mod_date < (datetime.now() - timedelta(days=self.cache_refresh_days)) + or os.stat(cache_file).st_size == 0 + ): + return True + return False @stamina.retry(on=requests.HTTPError, timeout=60) def _request_schema(self, table): @@ -146,7 +144,7 @@ def _request_count(self, table, query=None, bounds=None, bounds_crs=None, geom_c log.error(f"Response headers: {r.headers}") log.error(f"Response text: {r.text}") raise ServiceException(r.text) # presumed request error - elif r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry + if r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry log.warning(f"HTTP error: {r.status_code}, retrying") log.warning(f"Response headers: {r.headers}") log.warning(f"Response text: {r.text}") @@ -166,7 +164,7 @@ def _request_features(self, url, silent=False): log.error(f"Response headers: {r.headers}") log.error(f"Response text: {r.text}") raise ServiceException(r.text) # presumed request error - elif r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry + if r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry log.warning(f"HTTP error: {r.status_code}") log.warning(f"Response headers: {r.headers}") log.warning(f"Response text: {r.text}") @@ -186,7 +184,7 @@ def _request_featurecollection(self, url, silent=False): log.error(f"Response headers: {r.headers}") log.error(f"Response text: {r.text}") raise ServiceException(r.text) # presumed request error - elif r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry + if r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry log.warning(f"HTTP error: {r.status_code}") log.warning(f"Response headers: {r.headers}") log.warning(f"Response text: {r.text}") @@ -214,8 +212,7 @@ def build_bounds_filter(self, query, bounds, bounds_crs, geom_column): return cql_filter def get_capabilities(self): - """ - Request server capabilities (layer definitions). + """Request server capabilities (layer definitions). Cache response as file daily, caching to one of: - $BCDATA_CACHE environment variable - default (~/.bcdata) @@ -225,7 +222,7 @@ def get_capabilities(self): with open(os.path.join(self.cache_path, "capabilities.xml"), "w") as f: f.write(self._request_capabilities()) # load cached xml from file - with open(os.path.join(self.cache_path, "capabilities.xml"), "r") as f: + with open(os.path.join(self.cache_path, "capabilities.xml")) as f: return f.read() def get_count(self, dataset, query=None, bounds=None, bounds_crs="EPSG:3005", geom_column=None): @@ -248,7 +245,7 @@ def get_schema(self, table): schema = self._request_schema(table) f.write(json.dumps(schema, indent=4)) # load cached schema - with open(os.path.join(self.cache_path, table), "r") as f: + with open(os.path.join(self.cache_path, table)) as f: return json.loads(f.read()) def get_sortkey(self, table): @@ -258,25 +255,24 @@ def get_sortkey(self, table): if table.lower() in bcdata.primary_keys: return bcdata.primary_keys[table.lower()].upper() # if pk not known, use OBJECTID as default sort key when present - elif "OBJECTID" in columns: + if "OBJECTID" in columns: return "OBJECTID" # if OBJECTID is not present (several GSR tables), use SEQUENCE_ID - elif "SEQUENCE_ID" in columns: + if "SEQUENCE_ID" in columns: return "SEQUENCE_ID" # otherwise, presume first column is best value to sort by # (in some cases this will be incorrect) - else: - log.warning( - f"Reliable sort key for {table} cannot be determined, defaulting to first column {columns[0]}" - ) - return columns[0] + log.warning( + f"Reliable sort key for {table} cannot be determined, defaulting to first column {columns[0]}", + ) + return columns[0] def list_tables(self): - """read and parse capabilities xml, which lists all tables available""" + """Read and parse capabilities xml, which lists all tables available""" return [ i.strip("pub:") for i in list( - WebFeatureService(self.ows_url, version="2.0.0", xml=self.capabilities).contents + WebFeatureService(self.ows_url, version="2.0.0", xml=self.capabilities).contents, ) ] @@ -284,8 +280,7 @@ def validate_name(self, dataset): """Check wfs/cache and the bcdc api to see if dataset name is valid""" if dataset.upper() in self.list_tables(): return dataset.upper() - else: - return bcdata.get_table_name(dataset.upper()) + return bcdata.get_table_name(dataset.upper()) def define_requests( self, @@ -304,6 +299,7 @@ def define_requests( - http://www.opengeospatial.org/standards/wfs - http://docs.geoserver.org/stable/en/user/services/wfs/vendor.html - http://docs.geoserver.org/latest/en/user/tutorials/cql/cql_tutorial.html + """ # validate the table name table = self.validate_name(dataset) @@ -315,9 +311,9 @@ def define_requests( # find out how many records are in the table if not count and check_count is False: raise ValueError( - "{count: Null, check_count=False} is invalid, either provide record count or let bcdata request it" + "{count: Null, check_count=False} is invalid, either provide record count or let bcdata request it", ) - elif ( + if ( not count and check_count is True ): # if not provided a count, get one if not told otherwise count = self.get_count( @@ -337,8 +333,7 @@ def define_requests( bounds_crs=bounds_crs, geom_column=geom_column, ) - if count > n: - count = n + count = min(count, n) log.info(f"Total features requested: {count}") @@ -406,8 +401,7 @@ def request_features( if as_gdf: return gdf - else: - return json.loads(gdf.to_json()) + return json.loads(gdf.to_json()) def get_data( @@ -436,8 +430,8 @@ def get_data( for url in urls: results.append( WFS.request_features( - url, as_gdf=True, lowercase=lowercase, promote_to_multi=promote_to_multi - ) + url, as_gdf=True, lowercase=lowercase, promote_to_multi=promote_to_multi, + ), ) if len(results) > 1: gdf = pd.concat(results) @@ -447,8 +441,7 @@ def get_data( gdf = gpd.GeoDataFrame() if as_gdf: return gdf - else: - return json.loads(gdf.to_json()) + return json.loads(gdf.to_json()) def get_count(dataset, query=None, bounds=None, bounds_crs="EPSG:3005"): diff --git a/tests/test_bc2pg.py b/tests/test_bc2pg.py index ba14eef..a6f7086 100644 --- a/tests/test_bc2pg.py +++ b/tests/test_bc2pg.py @@ -23,7 +23,7 @@ def test_bc2pg(): r = DB_CONNECTION.query( """ SELECT ST_geometrytype(geom) from whse_imagery_and_base_maps.gsr_airports_svw limit 1 - """ + """, ) assert r[0][0] == "ST_MultiPoint" DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) @@ -76,7 +76,7 @@ def test_bc2pg_geometry_type(): r = DB_CONNECTION.query( """ SELECT ST_geometrytype(geom) from whse_imagery_and_base_maps.gsr_airports_svw limit 1 - """ + """, ) assert r[0][0] == "ST_Point" DB_CONNECTION.execute("drop table " + AIRPORTS_TABLE) @@ -103,7 +103,7 @@ def test_bc2pg_z(): r = DB_CONNECTION.query( """ SELECT ST_NDims(geom) from whse_basemapping.fwa_stream_networks_sp limit 1 - """ + """, ) assert r[0][0] == 3 DB_CONNECTION.execute("drop table " + STREAMS_TABLE) @@ -121,7 +121,7 @@ def test_bc2pg_primary_key(): WHERE relname = 'pscis_assessment_svw' AND nspname = 'whse_fish' AND indisprimary - """ + """, ) assert r[0][0] == "stream_crossing_id" DB_CONNECTION.execute("drop table " + ASSESSMENTS_TABLE) @@ -143,7 +143,7 @@ def test_bc2pg_primary_key_default(): WHERE relname = 'pscis_assessment_svw' AND nspname = 'whse_fish' AND indisprimary - """ + """, ) assert r[0][0] == "stream_crossing_id" DB_CONNECTION.execute("drop table " + ASSESSMENTS_TABLE) diff --git a/tests/test_bcdc.py b/tests/test_bcdc.py index 6a15f88..0854f88 100644 --- a/tests/test_bcdc.py +++ b/tests/test_bcdc.py @@ -36,7 +36,7 @@ def test_get_table_definition(): def test_get_table_definition_format_multi(): table_definition = bcdc.get_table_definition( - "WHSE_FOREST_VEGETATION.OGSR_PRIORITY_DEF_AREA_CUR_SP" + "WHSE_FOREST_VEGETATION.OGSR_PRIORITY_DEF_AREA_CUR_SP", ) assert table_definition["description"] assert table_definition["comments"] @@ -57,7 +57,7 @@ def test_get_table_definition_format_multi_nopreview(): def test_get_table_definition_format_multi_nolayer(): table_definition = bcdc.get_table_definition( - "WHSE_HUMAN_CULTURAL_ECONOMIC.HIST_HISTORIC_ENVIRONMNT_PA_SV" + "WHSE_HUMAN_CULTURAL_ECONOMIC.HIST_HISTORIC_ENVIRONMNT_PA_SV", ) assert table_definition["description"] # assert table_definition["comments"] there are no comments associated with this dataset @@ -66,7 +66,7 @@ def test_get_table_definition_format_multi_nolayer(): def test_get_table_definition_format_oracle_sde(): table_definition = bcdc.get_table_definition( - "WHSE_LAND_USE_PLANNING.RMP_LANDSCAPE_RSRV_DESIGN_SP" + "WHSE_LAND_USE_PLANNING.RMP_LANDSCAPE_RSRV_DESIGN_SP", ) assert table_definition["description"] assert table_definition["comments"] diff --git a/tests/test_cli.py b/tests/test_cli.py index 6878ded..c98abe2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -24,7 +24,7 @@ def test_info_table(): runner = CliRunner() result = runner.invoke(cli, ["info", AIRPORTS_TABLE]) assert result.exit_code == 0 - assert 'name": "{}"'.format(AIRPORTS_TABLE) in result.output + assert f'name": "{AIRPORTS_TABLE}"' in result.output assert '"count": 455' in result.output @@ -32,7 +32,7 @@ def test_info_package(): runner = CliRunner() result = runner.invoke(cli, ["info", AIRPORTS_PACKAGE]) assert result.exit_code == 0 - assert 'name": "{}"'.format(AIRPORTS_TABLE) in result.output + assert f'name": "{AIRPORTS_TABLE}"' in result.output assert '"count": 455' in result.output