From 725f25689a378d04c32b75262ace286310620494 Mon Sep 17 00:00:00 2001 From: ediakatos Date: Fri, 25 Apr 2025 14:27:11 +0100 Subject: [PATCH] Added documentation for each script and updated the main README --- README.md | 8 ++------ docs/__main__.md | 11 +++++++++++ docs/constants.md | 14 ++++++++++++++ docs/hazards.md | 19 +++++++++++++++++++ docs/s3.md | 8 ++++++++ docs/utils.md | 32 ++++++++++++++++++++++++++++++++ src/utils/utils.py | 10 ++++++---- 7 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 docs/__main__.md create mode 100644 docs/constants.md create mode 100644 docs/hazards.md create mode 100644 docs/s3.md create mode 100644 docs/utils.md diff --git a/README.md b/README.md index 3511341..759e5cd 100644 --- a/README.md +++ b/README.md @@ -30,16 +30,11 @@ make hooks # Install pre-commit hooks ```bash make paths # Setup paths make aws_etl # Run the hazard processing pipeline +make local_etl # Run the hazard processing pipeline locally without S3 make test # Run unit tests make lint # Run lint checks ``` -Or directly with Poetry: - -```bash -poetry run python -m src.main.__main__ -``` - ## Configuration Create a `~/.hazard_tool_rc` file in your home directory @@ -50,6 +45,7 @@ export S3_BUCKET= export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= export AWS_DEFAULT_REGION= +export USE_LOCAL=true ``` Optionally, adjust path constants (e.g., `ADMIN_VECTOR_PATH`, `POPULATION_RASTER_PATH`) diff --git a/docs/__main__.md b/docs/__main__.md new file mode 100644 index 0000000..c88e4da --- /dev/null +++ b/docs/__main__.md @@ -0,0 +1,11 @@ +# __main__.py + +This is the entrypoint for the Hazard Processing Tool CLI. + +It performs the following steps: + +- Parses configuration and environment flags +- Loads administrative boundary data +- Prepares exposure datasets (population and hazard rasters) +- Invokes hazard-specific processing functions from `hazards.py` +- Exports results via `export_dataset` (to S3 or locally based on `USE_LOCAL`) diff --git a/docs/constants.md b/docs/constants.md new file mode 100644 index 0000000..20eeaeb --- /dev/null +++ b/docs/constants.md @@ -0,0 +1,14 @@ +# constants.py + +This module loads environment variables and defines I/O paths: + +- Uses `dotenv` to load variables from `~/.hazard_tool_rc`. +- USE_LOCAL: boolean flag to toggle between S3 mode and local mode. +- S3_BUCKET, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION: AWS credentials. +- PATH: I/O prefix (`/vsis3/{S3_BUCKET}/` or `./`). +- Defines relative keys for all raster/shapefile inputs and output CSVs: + - POPULATION_RASTER_PATH + - HAZARD_RASTER_PATH + - ADMIN_VECTOR_PATH + - HAZARD_INPUT_PATH + - HAZARD_OUTPUT_PATH diff --git a/docs/hazards.md b/docs/hazards.md new file mode 100644 index 0000000..9bcddc7 --- /dev/null +++ b/docs/hazards.md @@ -0,0 +1,19 @@ +# hazards.py + +This module defines processing functions for each hazard type supported by the tool: + +- process_flood: Reads flood raster, applies threshold, + computes zonal statistics. +- process_earthquake: Opens earthquake hazard data, + thresholds, and aggregates exposure. +- process_landslide: Processes landslide rasters similarly to + flood and earthquake. +- process_deforestation: Computes separate statistics for tree + cover loss and forest cover. +- process_cyclone: Loads cyclone wind return period data and + summarizes by admin unit. +- coastal_erosion: Buffers administrative boundaries, reads + shoreline erosion shapefile, and calculates mean erosion rate. + +Each function takes a GeoDataFrame of administrative boundaries +and returns a pandas DataFrame with aggregated hazard metrics. diff --git a/docs/s3.md b/docs/s3.md new file mode 100644 index 0000000..538df10 --- /dev/null +++ b/docs/s3.md @@ -0,0 +1,8 @@ +# s3.py + +This module sets up AWS S3 export functionality. + +- Establishes a boto3 Session and S3 client using AWS credentials + loaded from constants. +- export_dataset(df, hazard): sorts and writes a pandas DataFrame as a CSV + either to S3 or locally based on the USE_LOCAL flag. diff --git a/docs/utils.md b/docs/utils.md new file mode 100644 index 0000000..3a4e1a0 --- /dev/null +++ b/docs/utils.md @@ -0,0 +1,32 @@ +# utils.py + +This module provides geospatial utility functions supporting hazard +impact analysis. It includes raster mask generation, exposure computation, +and zonal statistics aggregation. + +## compute_hazard_mask(hazard_raster, population_raster,hazard_threshold) + +Reprojects a hazard raster to match the CRS of a population raster. +Applies a threshold to create a binary mask. + +## compute_population_exposure(hazard_mask_raster,population_raster) + +Multiplies the population raster by the binary hazard mask to compute exposed +population counts. + +## prep_data(hazard_raster_paths, population_raster_path,hazard_threshold) + +Loads all hazard and population rasters, applies mask and exposure computation +to return a data array per hazard. + +## compute_zonal_stat(raster, vector, stats) + +Uses rasterstats.zonal_stats to calculate aggregate statistics for a raster +over administrative boundaries. + +## compute_hazard_population_exposure(hazard_raster,population_raster, hazard_threshold) + +Computes population exposure, total population, and ratio of exposure per +admin unit. + +### compute_binary_zonal_stat(raster_path, vector_path, threshold) diff --git a/src/utils/utils.py b/src/utils/utils.py index d80b337..2f45c61 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -68,9 +68,9 @@ def compute_zonal_stat( data_value: np.ndarray, exp_affine: Affine, admin_df: gpd.GeoDataFrame, - agg: str, # noqa: E501 + agg: str, ) -> List[float]: - """ "" + """ Compute zonal statistics for raster/pop the exposure data" """ stats = zonal_stats(admin_df, data_value, affine=exp_affine, stats=agg, nodata=-999) @@ -81,9 +81,11 @@ def compute_zonal_stat( def compute_hazard_population_exposure( admin_df: gpd.GeoDataFrame, pop_raster: xr.DataArray, - pop_exp_raster: xr.DataArray, # noqa: E501 + pop_exp_raster: xr.DataArray, ) -> pd.DataFrame: - + """ + Compute population exposure from hazard mask and population raster + """ pop_arr = pop_raster[0].values pop_affine = pop_raster.rio.transform()