From 1c06a799d31c44765507ed50df099f2b9a504e22 Mon Sep 17 00:00:00 2001 From: jn Date: Sat, 7 Feb 2026 11:02:17 +0100 Subject: [PATCH 1/2] improves vignettes --- vignettes/articles/v2-evaluation.Rmd | 39 +++++++++++++++++----------- vignettes/articles/v2-intro.Rmd | 33 ++++++++++++++++++----- vignettes/articles/v2-parameters.Rmd | 28 +++++++++++++++++--- 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/vignettes/articles/v2-evaluation.Rmd b/vignettes/articles/v2-evaluation.Rmd index 24e262f..5d925fb 100644 --- a/vignettes/articles/v2-evaluation.Rmd +++ b/vignettes/articles/v2-evaluation.Rmd @@ -19,6 +19,15 @@ knitr::opts_chunk$set( This vignette shows how to evaluate supercells and interpret the diagnostics. The metrics quantify spatial compactness, value homogeneity, and their balance. +The metrics are derived from the same combined distance used during segmentation: + +$$ +D = \sqrt{\left(\frac{d_s}{\text{step}}\right)^2 + \left(\frac{d_v}{c}\right)^2} +$$ + +where $d_s$ is the spatial distance in grid-cell units, $d_v$ is the value-space distance defined by `dist_fun`, and $c$ is `compactness`. + + All examples use the `volcano` raster for simplicity, but the same workflow applies to multi-layer rasters. ```{r} @@ -36,14 +45,14 @@ Pixel metrics provide a map of distances from each pixel to its supercell center They are useful for diagnosing local failures, such as areas where the supercells are too large or too irregular. Pixel metrics are provided with the `sc_metrics_pixels()` function, which accepts the original raster and the supercell polygons^[This function reads supercells parameters internally from the `supercell` object, so it does not require the `step`, `compactness`, or `dist_fun` arguments], and returns a multi-layer raster with the requested metrics. -The pixel metrics include four layers^[Use the `metrics` argument to request a subset of metrics], each with a specific interpretation: +The pixel metrics include four layers^[Use the `metrics` argument to request a subset of metrics], each with a specific interpretation and a simple definition: -1. `spatial` is the distance from each pixel to its supercell center in grid-cell units (unless the input supercells were created with `step_unit = "map"`, in which case distances are reported in map units). +1. `spatial`: $d_s$, the distance from each pixel to its supercell center in grid-cell units (unless the input supercells were created with `step_unit = "map"`, in which case distances are reported in map units). Lower spatial values indicate cells that are closer to the center and more compact supercells, while higher values indicate cells that are farther from the center and may indicate irregular shapes or outliers. -2. `value` is the distance from each pixel to its supercell center in the value space defined by your `dist_fun`. +2. `value`: $d_v$, the distance from each pixel to its supercell center in the value space defined by your `dist_fun`. Lower value distances indicate more homogeneous supercells, while higher values indicate more heterogeneous supercells or outliers. -3. `combined` blends spatial and value terms using `step` and `compactness`, and is mainly useful for ranking pixels or supercells within a single run -- it is not directly comparable across runs with different parameters or distance functions. -4. `balance` is the signed log ratio of scaled value to spatial distance, so negative values indicate spatial dominance and positive values indicate value dominance. +3. `combined`: $D = \sqrt{\left(d_s/\text{step}\right)^2 + \left(d_v/c\right)^2}$, the combined distance used to assign pixels to centers. This is mainly useful for ranking pixels or supercells within a single run -- it is not directly comparable across runs with different parameters or distance functions. +4. `balance`: $\log\left(\frac{d_v/c}{d_s/\text{step}}\right)$, the signed log ratio of scaled value to scaled spatial distance. Negative values indicate spatial dominance and positive values indicate value dominance. By default, `sc_metrics_pixels()` returns `spatial`, `value`, `combined`, and `balance`, and uses `scale = TRUE`. With scaling on, the spatial and value layers are returned as `spatial_scaled` and `value_scaled`. @@ -62,13 +71,13 @@ Metrics on the supercell level summarize each polygon with a single value for ea They are useful for quality screening, outlier detection, or joining metrics to attributes. They are calculated with the `sc_metrics_supercells()` function, which accepts the original raster and the supercell polygons, and returns an `sf` object with one row per supercell and the requested metrics as columns. -The supercell metrics include four variables, each with a specific interpretation: +The supercell metrics include four variables, each with a specific interpretation and definition as per-supercell means: -1. `mean_spatial_dist` is the average spatial distance from pixels to the supercell center. Lower values indicate more compact shapes. -2. `mean_value_dist` is the average value distance from pixels to the supercell center in the value space defined by `dist_fun`. +1. `mean_spatial_dist`: $\overline{d_s}$, the average spatial distance from pixels to the supercell center. Lower values indicate more compact shapes. +2. `mean_value_dist`: $\overline{d_v}$, the average value distance from pixels to the supercell center in the value space defined by `dist_fun`. Lower values indicate more homogeneous supercells. -3. `mean_combined_dist` blends spatial and value terms using `step` and `compactness`, and is mainly useful for ranking supercells within a single run. -4. `balance` is the signed log ratio of scaled value to scaled spatial distance, so negative values indicate spatial dominance and positive values indicate value dominance. +3. `mean_combined_dist`: $\overline{D}$, the average combined distance within each supercell. +4. `balance`: $\log\left(\frac{\overline{d_v}/c}{\overline{d_s}/\text{step}}\right)$, the signed log ratio of scaled distances. ```{r} supercell_metrics <- sc_metrics_supercells(vol, vol_sc) @@ -88,14 +97,14 @@ Global metrics provide a single-row summary and are best for comparing different They are calculated with `sc_metrics_global()`, which accepts the original raster and the supercell polygons and returns a `data.frame`. In these metrics, each supercell contributes equally to the global averages.^[If you want to weight by area or number of pixels, you can use the supercell-level metrics and calculate your own weighted averages. Also, let us know if you want a built-in option for weighted global metrics.] -The global metrics include the same four distance summaries as the supercell metrics: +The global metrics include the same four distance summaries as the supercell metrics, averaged across supercells (each supercell has equal weight): -1. `mean_spatial_dist` is the average spatial distance from pixels to their supercell centers. +1. `mean_spatial_dist`: the mean of per-supercell $\overline{d_s}$. Lower values indicate more compact shapes. -2. `mean_value_dist` is the average value distance from pixels to their supercell centers in the value space defined by `dist_fun`. +2. `mean_value_dist`: the mean of per-supercell $\overline{d_v}$ in the value space defined by `dist_fun`. Lower values indicate more homogeneous supercells. -3. `mean_combined_dist` blends spatial and value terms using `step` and `compactness`, and is mainly useful for ranking or comparing runs. -4. `balance` is the mean signed log ratio of scaled value to scaled spatial distance, so negative values indicate spatial dominance and positive values indicate value dominance. +3. `mean_combined_dist`: the mean of per-supercell $\overline{D}$, useful for ranking or comparing runs. +4. `balance`: $\overline{\log\left(\frac{\overline{d_v}/c}{\overline{d_s}/\text{step}}\right)}$, the mean of per-supercell balance values. By default, `sc_metrics_global()` returns the scaled distances. When `scale = TRUE`, the spatial and value summaries are returned as `mean_spatial_dist_scaled` and `mean_value_dist_scaled`. diff --git a/vignettes/articles/v2-intro.Rmd b/vignettes/articles/v2-intro.Rmd index d331241..dfb58f1 100644 --- a/vignettes/articles/v2-intro.Rmd +++ b/vignettes/articles/v2-intro.Rmd @@ -47,6 +47,25 @@ Alternatively, the `"auto"` option for `compactness` enables SLIC0-style adaptiv To assess quality of the resulting supercells, use `sc_metrics_pixels()` for pixel-level distances, `sc_metrics_supercells()` for per-supercell summaries, and `sc_metrics_global()` for a general overview. These metrics help compare different parameter settings or input preprocessing choices. +# Workflow summary + +Basic workflows follow the same pattern: choose scale (`step` or `k`), tune or set `compactness`, create supercells, and evaluate. + +```{r} +#| message: false +# read data +vol <- terra::rast(system.file("raster/volcano.tif", package = "supercells")) + +# choose scale and tune compactness +tune <- supercells::sc_tune_compactness(vol, step = 8, metrics = "local") + +# create supercells +vol_sc <- supercells::sc_slic(vol, step = 8, compactness = tune$compactness) + +# evaluate +metrics_global <- supercells::sc_metrics_global(vol, vol_sc) +``` + # Minimal example The goal of this example is to derive supercells from a raster and visualize them. @@ -148,11 +167,13 @@ They are useful for comparing parameter settings across multiple runs, such as a global_metrics ``` - +To learn more about the package and its capabilities, check out the following articles: + +- [**Choosing parameters**: a practical guide to `step`, `k`, and `compactness`.](https://jakubnowosad.com/supercells/articles/v2-parameters.html) +- [**Evaluation and diagnostics**: how to read and compare pixel, supercell, and global metrics.](https://jakubnowosad.com/supercells/articles/v2-evaluation.html) +- [**Benchmarks**: performance notes.](https://jakubnowosad.com/supercells/articles/v2-benchmarks.html) +- [**Changes since v1**: a concise summary of what’s new in v2.](https://jakubnowosad.com/supercells/articles/v2-changes-since-v1.html) + +# References diff --git a/vignettes/articles/v2-parameters.Rmd b/vignettes/articles/v2-parameters.Rmd index 982c32a..ac9d977 100644 --- a/vignettes/articles/v2-parameters.Rmd +++ b/vignettes/articles/v2-parameters.Rmd @@ -29,6 +29,22 @@ library(terra) vol <- terra::rast(system.file("raster/volcano.tif", package = "supercells")) ``` +# Supercells algorithm + +The supercells algorithm is an iterative process that starts with initial centers and assigns pixels to the nearest center based on a combined distance that incorporates both value similarity and spatial proximity. +After the initial assignment, the algorithm updates the centers by averaging the values of the assigned pixels and recalculating the combined distance for the next iteration. +This process continues until the specified number of iterations is reached. + +In this package, the combined distance follows the standard SLIC form: + +$$ +D = \sqrt{\left(\frac{d_s}{\text{step}}\right)^2 + \left(\frac{d_v}{c}\right)^2} +$$ + +where $d_s$ is the spatial distance in grid-cell units, $d_v$ is the value-space distance (from `dist_fun`), and $c$ is the `compactness` value. +When `step_unit = "map"`, the `step` value is converted to cells before segmentation; distances are still computed in grid-cell units. +Larger `compactness` down-weights the value term, making shapes more regular, while smaller values emphasize value similarity. + # Choosing step or k You can control the number and size of supercells using either `step` or `k`. @@ -72,8 +88,6 @@ plot(sf::st_geometry(centers_preview), add = TRUE, pch = 3, col = "red") 2. also custom centers --> -A quick visual comparison of two step values can also be helpful. - ```{r} #| fig-show: "hold" sc_step_small <- sc_slic(vol, step = 6, compactness = 5) @@ -108,8 +122,14 @@ plot(sc_compact_high[0], add = TRUE, border = "red", lwd = 0.5) The `sc_tune_compactness()` function estimates a reasonable starting value from a short run of the algorithm. It supports two summaries with `metrics = "global"` and `metrics = "local"`. -The global version looks at overall balance, while the local version uses a neighborhood-based estimate. - +The global version looks at overall balance between value and spatial distances, while the local version uses a neighborhood-based value scale. +More precisely: + +- **Global**: runs a short pilot segmentation (`iter = 1` by default), computes pixel-level `spatial` and `value` distances, then takes their medians over pixels. The compactness is estimated as `compactness = (median(value) / value_scale) * step / median(spatial)`. + This aligns the median value and spatial terms in the combined distance. +- **Local**: computes, for each center, the mean value distance within a local $2 \times \text{step}$ window, then returns the median of those per-center means (after `value_scale`). + This yields a compactness tied to local value variability, without explicitly using spatial distances. + The local estimate is often more stable for heterogeneous rasters. ```{r} From 0f494cbb3025744f19f4f1e2dfb1e1c042ae533c Mon Sep 17 00:00:00 2001 From: jn Date: Sat, 7 Feb 2026 11:02:36 +0100 Subject: [PATCH 2/2] updates balance metric --- R/sc_metrics_global.R | 4 ++-- man/sc_metrics_global.Rd | 2 +- src/metrics_global.cpp | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/sc_metrics_global.R b/R/sc_metrics_global.R index 4406427..6eeae86 100644 --- a/R/sc_metrics_global.R +++ b/R/sc_metrics_global.R @@ -43,7 +43,7 @@ #' value and spatial distances using `compactness` and `step`, averaged across #' supercells. Returned as `mean_combined_dist`.} #' \item{balance}{Mean signed log ratio of scaled value distance to scaled -#' spatial distance; 0 indicates balance.} +#' spatial distance (averaged across supercells); 0 indicates balance.} #' } #' When `scale = TRUE`, `mean_spatial_dist` and `mean_value_dist` are returned as #' `mean_spatial_dist_scaled` and `mean_value_dist_scaled`. @@ -77,7 +77,7 @@ sc_metrics_global = function(x, sc, mean_value_dist = out[["mean_value_dist"]] mean_spatial_dist = out[["mean_spatial_dist"]] * prep$spatial_scale mean_combined_dist = out[["mean_combined_dist"]] - balance = log(out[["balance"]]) + balance = out[["balance"]] if (isTRUE(scale)) { if (isTRUE(prep$adaptive_compactness)) { diff --git a/man/sc_metrics_global.Rd b/man/sc_metrics_global.Rd index a3bcac7..ba737e4 100644 --- a/man/sc_metrics_global.Rd +++ b/man/sc_metrics_global.Rd @@ -61,7 +61,7 @@ their supercell centers, averaged across supercells; units are grid cells value and spatial distances using \code{compactness} and \code{step}, averaged across supercells. Returned as \code{mean_combined_dist}.} \item{balance}{Mean signed log ratio of scaled value distance to scaled -spatial distance; 0 indicates balance.} +spatial distance (averaged across supercells); 0 indicates balance.} } When \code{scale = TRUE}, \code{mean_spatial_dist} and \code{mean_value_dist} are returned as \code{mean_spatial_dist_scaled} and \code{mean_value_dist_scaled}. diff --git a/src/metrics_global.cpp b/src/metrics_global.cpp index bcdd091..00e1ce7 100644 --- a/src/metrics_global.cpp +++ b/src/metrics_global.cpp @@ -96,7 +96,8 @@ cpp11::list sc_metrics_global_cpp(cpp11::integers_matrix<> clusters, double mean_value_sum = 0.0; double mean_spatial_sum = 0.0; double mean_combined_sum = 0.0; - double balance_ratio_sum = 0.0; + double balance_log_sum = 0.0; + int balance_count = 0; double mean_value_scaled_sum = 0.0; int active_clusters = 0; @@ -120,7 +121,11 @@ cpp11::list sc_metrics_global_cpp(cpp11::integers_matrix<> clusters, mean_value_scaled_sum += mv / denom; } if (denom != 0.0 && step != 0 && ms > 0.0) { - balance_ratio_sum += (mv / denom) / (ms / step); + double ratio = (mv / denom) / (ms / step); + if (ratio > 0.0) { + balance_log_sum += std::log(ratio); + balance_count += 1; + } } active_clusters += 1; } @@ -128,15 +133,17 @@ cpp11::list sc_metrics_global_cpp(cpp11::integers_matrix<> clusters, double mean_value = NA_REAL; double mean_spatial = NA_REAL; double mean_combined = NA_REAL; - double balance_ratio_mean = NA_REAL; + double balance_log_mean = NA_REAL; double mean_value_scaled = NA_REAL; if (active_clusters > 0) { mean_value = mean_value_sum / active_clusters; mean_spatial = mean_spatial_sum / active_clusters; mean_combined = mean_combined_sum / active_clusters; - balance_ratio_mean = balance_ratio_sum / active_clusters; mean_value_scaled = mean_value_scaled_sum / active_clusters; } + if (balance_count > 0) { + balance_log_mean = balance_log_sum / balance_count; + } cpp11::writable::list result(6); result.names() = {"n_supercells", "mean_value_dist", "mean_spatial_dist", @@ -145,7 +152,7 @@ cpp11::list sc_metrics_global_cpp(cpp11::integers_matrix<> clusters, result.at(1) = cpp11::as_sexp(mean_value); result.at(2) = cpp11::as_sexp(mean_spatial); result.at(3) = cpp11::as_sexp(mean_combined); - result.at(4) = cpp11::as_sexp(balance_ratio_mean); + result.at(4) = cpp11::as_sexp(balance_log_mean); result.at(5) = cpp11::as_sexp(mean_value_scaled); return result; }