From edf20e2267066cf11d440ef88dcf67715ef6b7cd Mon Sep 17 00:00:00 2001 From: mgovorcin Date: Thu, 23 Apr 2026 10:08:08 -0700 Subject: [PATCH] Revert "Add NISAR geometry, mask, and tropo match utilities" due to precommit test failures --- src/opera_utils/nisar/__init__.py | 8 +- src/opera_utils/nisar/_download.py | 116 +------------------ src/opera_utils/nisar/_geometry.py | 175 ----------------------------- src/opera_utils/nisar/_mask.py | 138 ----------------------- src/opera_utils/tropo/__init__.py | 9 +- src/opera_utils/tropo/_match.py | 143 ----------------------- 6 files changed, 3 insertions(+), 586 deletions(-) delete mode 100644 src/opera_utils/nisar/_geometry.py delete mode 100644 src/opera_utils/nisar/_mask.py delete mode 100644 src/opera_utils/tropo/_match.py diff --git a/src/opera_utils/nisar/__init__.py b/src/opera_utils/nisar/__init__.py index c7941e25..53bd9ca5 100644 --- a/src/opera_utils/nisar/__init__.py +++ b/src/opera_utils/nisar/__init__.py @@ -2,10 +2,8 @@ from __future__ import annotations -from ._download import download_gslcs, run_download -from ._geometry import prepare_incidence_angle +from ._download import run_download from ._gunw_search import search_gunw -from ._mask import get_gslc_mask, get_gunw_mask from ._info import ( find_intersecting_frames, get_frame_latlon_bounds, @@ -30,13 +28,9 @@ "OrbitDirection", "OutOfBoundsError", "UrlType", - "download_gslcs", "find_intersecting_frames", "get_frame_latlon_bounds", - "get_gslc_mask", - "get_gunw_mask", "get_nisar_bbox", - "prepare_incidence_angle", "load_gpkg", "nisar_frame_info", "open_file", diff --git a/src/opera_utils/nisar/_download.py b/src/opera_utils/nisar/_download.py index 7c7384cb..1b18e9ee 100644 --- a/src/opera_utils/nisar/_download.py +++ b/src/opera_utils/nisar/_download.py @@ -3,21 +3,15 @@ from __future__ import annotations import logging -from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from pathlib import Path -import netrc -import shutil - import h5py import numpy as np -import requests from shapely import from_wkt from tqdm.auto import tqdm from tqdm.contrib.concurrent import process_map -from opera_utils._types import PathOrStr from opera_utils.nisar._product import ( NISAR_GSLC_GRIDS, NISAR_GSLC_IDENTIFICATION, @@ -35,7 +29,7 @@ logger = logging.getLogger("opera_utils") -__all__ = ["download_gslcs", "process_file", "run_download"] +__all__ = ["process_file", "run_download"] def process_file( @@ -272,114 +266,6 @@ def _extract_subset_from_h5( dst.attrs["source_file"] = source_name -def download_gslcs( - output_dir: PathOrStr, - bbox: tuple[float, float, float, float] | None = None, - track_frame_number: int | None = None, - relative_orbit_number: int | None = None, - orbit_direction: str | None = None, - start_datetime: datetime | None = None, - end_datetime: datetime | None = None, - max_jobs: int = 4, - short_name: str = "NISAR_L2_GSLC_BETA_V1", -) -> list[Path]: - """Download full NISAR GSLC files matching the given search criteria. - - Parameters - ---------- - output_dir : Path | str - Directory to save downloaded files to. - bbox : tuple[float, float, float, float], optional - Bounding box as (west, south, east, north) in degrees lon/lat. - track_frame_number : int, optional - Track frame number (stays constant for repeat passes). - relative_orbit_number : int, optional - Relative orbit number to filter by. - orbit_direction : str, optional - Orbit direction: "A" for ascending, "D" for descending. - start_datetime : datetime, optional - Start datetime of the product search. - end_datetime : datetime, optional - End datetime of the product search. - max_jobs : int, optional - Number of parallel downloads, by default 4. - short_name : str, optional - CMR collection short name, by default "NISAR_L2_GSLC_BETA_V1". - - Returns - ------- - list[Path] - List of paths to the downloaded files. - - Examples - -------- - Download by bounding box: - - >>> download_gslcs( # doctest: +SKIP - ... output_dir="./gslc_data", - ... bbox=(-120.5, 35.0, -119.5, 36.0), - ... ) - - Download by track/orbit: - - >>> download_gslcs( # doctest: +SKIP - ... output_dir="./gslc_data", - ... relative_orbit_number=64, - ... orbit_direction="A", - ... ) - - """ - output_dir = Path(output_dir) - products = search( - bbox=bbox, - track_frame_number=track_frame_number, - relative_orbit_number=relative_orbit_number, - orbit_direction=orbit_direction, - start_datetime=start_datetime, - end_datetime=end_datetime, - url_type=UrlType.HTTPS, - short_name=short_name, - ) - - if not products: - logger.warning("No GSLC products found matching search criteria") - return [] - - logger.info(f"Found {len(products)} GSLC products to download") - output_dir.mkdir(exist_ok=True, parents=True) - - auth = netrc.netrc().authenticators("urs.earthdata.nasa.gov") - if auth is None: - msg = "No .netrc entry found for urs.earthdata.nasa.gov" - raise ValueError(msg) - username, _, password = auth - session = requests.Session() - session.auth = (username, password) - - def _download_one(product: GslcProduct) -> Path: - url = str(product.filename) - out_path = output_dir / Path(url).name - if out_path.exists(): - logger.info(f"Skipped (exists): {out_path.name}") - return out_path - logger.debug(f"Downloading {out_path.name}") - with session.get(url, stream=True) as r: - r.raise_for_status() - with open(out_path, "wb") as f: - shutil.copyfileobj(r.raw, f) - return out_path - - out_paths: list[Path] = [] - with ThreadPoolExecutor(max_workers=max_jobs) as pool: - futures = {pool.submit(_download_one, p): p for p in products} - for future in tqdm( - as_completed(futures), total=len(futures), desc="Downloading GSLC" - ): - out_paths.append(future.result()) - - return sorted(out_paths) - - def _get_rowcol_slice( product: GslcProduct, bbox: tuple[float, float, float, float] | None, diff --git a/src/opera_utils/nisar/_geometry.py b/src/opera_utils/nisar/_geometry.py deleted file mode 100644 index 9a2a856e..00000000 --- a/src/opera_utils/nisar/_geometry.py +++ /dev/null @@ -1,175 +0,0 @@ -"""Incidence angle and LOS unit vector computation from NISAR GSLC radarGrid.""" - -from __future__ import annotations - -import multiprocessing as mp -from pathlib import Path - -import h5py -import numpy as np -import rioxarray as rxr -import xarray as xr -from pyproj import Transformer -from rasterio.enums import Resampling -from scipy.interpolate import RegularGridInterpolator -from tqdm import tqdm - - -def _interp_chunk( - args: tuple, -) -> tuple[slice, list[np.ndarray]]: - sl, y_vals, x_vals, dem_chunk, heights, y_rg_inc, x_rg, arrays_3d, src_epsg, dem_crs = args - transformer = Transformer.from_crs(dem_crs, f"EPSG:{src_epsg}", always_xy=True) - yy_c, xx_c = np.meshgrid(y_vals, x_vals, indexing="ij") - xx_rg, yy_rg = transformer.transform(xx_c.ravel(), yy_c.ravel()) - pts = np.column_stack([dem_chunk.ravel(), yy_rg, xx_rg]) - results = [] - for arr in arrays_3d: - rgi = RegularGridInterpolator( - (heights, y_rg_inc, x_rg), - arr, - method="linear", - bounds_error=False, - fill_value=np.nan, - ) - results.append(rgi(pts).reshape(dem_chunk.shape).astype("float32")) - return sl, results - - -def prepare_incidence_angle( - gslc_path: str | Path, - dem_path: str | Path, - ref_tif: str | Path, - output_path: str | Path = "incidence_angle_surface.tif", - los_output_path: str | Path = "los_unit_vector.tif", - chunk_size: int = 200, - n_workers: int = 8, -) -> tuple[Path, Path]: - """Compute incidence angle and LOS unit vector rasters from a NISAR GSLC. - - Reads the 3-D radarGrid datacube from ``gslc_path``, interpolates it to - the DEM surface height at each pixel, then reprojects the result to match - ``ref_tif``. - - Parameters - ---------- - gslc_path : str or Path - Path to a NISAR GSLC HDF5 file containing - ``/science/LSAR/GSLC/metadata/radarGrid``. - dem_path : str or Path - Path to a DEM GeoTIFF covering the area of interest. - ref_tif : str or Path - Reference GeoTIFF whose grid the outputs are reprojected to match. - output_path : str or Path - Output path for the incidence angle raster (single band, degrees). - los_output_path : str or Path - Output path for the LOS unit vector raster - (3 bands: east, north, up). - chunk_size : int - Number of DEM rows to process per parallel chunk. - n_workers : int - Number of worker processes for parallel interpolation. - - Returns - ------- - tuple[Path, Path] - Paths to the written incidence angle and LOS unit vector rasters. - - """ - with h5py.File(gslc_path) as f: - rg = f["science/LSAR/GSLC/metadata/radarGrid"] - heights = rg["heightAboveEllipsoid"][:] - x_rg = rg["xCoordinates"][:] - y_rg = rg["yCoordinates"][:] - inc_3d = rg["incidenceAngle"][:] - los_x_3d = rg["losUnitVectorX"][:] - los_y_3d = rg["losUnitVectorY"][:] - src_epsg = int(rg["projection"].attrs["epsg_code"]) - - assert np.all(np.diff(heights) > 0), "heights must be strictly increasing" - assert np.all(np.diff(x_rg) > 0), "x_rg must be strictly increasing" - - t_to_rg = Transformer.from_crs("EPSG:4326", f"EPSG:{src_epsg}", always_xy=True) - dem_da = rxr.open_rasterio(dem_path, masked=True).squeeze() - dem_val = dem_da.values - dem_crs = dem_da.rio.crs - - dem_xs = [float(dem_da.x.min()), float(dem_da.x.max())] - dem_ys = [float(dem_da.y.min()), float(dem_da.y.max())] - t_dem_to_rg = Transformer.from_crs(dem_crs, f"EPSG:{src_epsg}", always_xy=True) - dem_xs_rg, dem_ys_rg = t_dem_to_rg.transform(dem_xs, dem_ys) - - x_overlap = max(dem_xs_rg) > x_rg.min() and min(dem_xs_rg) < x_rg.max() - y_overlap = max(dem_ys_rg) > y_rg.min() and min(dem_ys_rg) < y_rg.max() - if not (x_overlap and y_overlap): - raise ValueError( - "DEM and radarGrid do not overlap. " - "Check that gslc_path covers your area of interest." - ) - - # Flip y axis so RegularGridInterpolator gets strictly increasing coords - y_rg_flip = y_rg[::-1] - inc_3d_flip = inc_3d[:, ::-1, :] - los_x_3d_flip = los_x_3d[:, ::-1, :] - los_y_3d_flip = los_y_3d[:, ::-1, :] - - dem_crs_str = str(dem_crs) - n_rows = dem_da.shape[0] - slices = [ - slice(i, min(i + chunk_size, n_rows)) for i in range(0, n_rows, chunk_size) - ] - tasks = [ - ( - sl, - dem_da.y.values[sl], - dem_da.x.values, - dem_val[sl], - heights, - y_rg_flip, - x_rg, - [inc_3d_flip, los_x_3d_flip, los_y_3d_flip], - src_epsg, - dem_crs_str, - ) - for sl in slices - ] - - inc_surface = np.full(dem_da.shape, np.nan, dtype="float32") - los_east_surf = np.full(dem_da.shape, np.nan, dtype="float32") - los_north_surf = np.full(dem_da.shape, np.nan, dtype="float32") - - with mp.get_context("fork").Pool(processes=n_workers) as pool: - for sl, results in tqdm( - pool.imap(_interp_chunk, tasks), - total=len(tasks), - desc="incidence + LOS interp", - ): - inc_surface[sl] = results[0] - los_east_surf[sl] = results[1] - los_north_surf[sl] = results[2] - - los_up_surf = np.sqrt( - np.clip(1.0 - los_east_surf**2 - los_north_surf**2, 0, None) - ).astype("float32") - - ref = rxr.open_rasterio(ref_tif, masked=True).squeeze() - - inc_da = dem_da.copy(data=inc_surface).rio.write_crs(dem_crs) - inc_matched = inc_da.rio.reproject_match(ref, resampling=Resampling.bilinear) - inc_matched.rio.to_raster(output_path, compress="deflate", dtype="float32") - - los_stack = np.stack([los_east_surf, los_north_surf, los_up_surf], axis=0) - los_da = xr.DataArray( - los_stack, - dims=("band", "y", "x"), - coords={"band": [1, 2, 3], "y": dem_da.y.values, "x": dem_da.x.values}, - ).rio.write_crs(dem_crs) - los_matched = los_da.rio.reproject_match(ref, resampling=Resampling.bilinear) - los_matched.rio.to_raster( - los_output_path, - compress="deflate", - dtype="float32", - descriptions=["los_east", "los_north", "los_up"], - ) - - return Path(output_path), Path(los_output_path) diff --git a/src/opera_utils/nisar/_mask.py b/src/opera_utils/nisar/_mask.py deleted file mode 100644 index cb2ad5b9..00000000 --- a/src/opera_utils/nisar/_mask.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Extract validity masks from NISAR GSLC and GUNW HDF5 products.""" - -from __future__ import annotations - -from pathlib import Path -from typing import Literal - -import h5py -import numpy as np -import rioxarray as rxr -import xarray as xr - - -def get_gslc_mask( - gslc_path: str | Path, - frequency: str = "frequencyA", - output_path: str | Path | None = None, -) -> xr.DataArray: - """Extract the validity mask from a NISAR GSLC product. - - The GSLC mask encodes the subswath number of valid samples. - Valid pixels have value >= 1; invalid pixels are 0; fill (outside - radar extent) is 255. This function returns a boolean mask where - ``True`` means valid (subswath value >= 1). - - Parameters - ---------- - gslc_path : str or Path - Path to a NISAR GSLC HDF5 file. - frequency : str - Frequency layer to read (``"frequencyA"`` or ``"frequencyB"``). - output_path : str or Path, optional - If provided, write the boolean mask as a GeoTIFF (uint8, 1=valid). - - Returns - ------- - xr.DataArray - Boolean DataArray (True = valid pixel) on the GSLC native grid, - with CRS and spatial coordinates attached. - - """ - gslc_path = Path(gslc_path) - grp_path = f"science/LSAR/GSLC/grids/{frequency}" - - with h5py.File(gslc_path) as f: - grp = f[grp_path] - raw = grp["mask"][:] - x_coords = grp["xCoordinates"][:] - y_coords = grp["yCoordinates"][:] - epsg = int(grp["projection"][()]) - - # valid = subswath value >= 1 (0 = invalid focus, 255 = fill/nodata) - valid = (raw >= 1) & (raw != 255) - - da = xr.DataArray( - valid.astype(np.uint8), - dims=("y", "x"), - coords={"y": y_coords, "x": x_coords}, - attrs={"long_name": "valid sample mask", "flag_values": "0,1", - "flag_meanings": "invalid valid"}, - ).rio.write_crs(f"EPSG:{epsg}") - - if output_path is not None: - da.rio.write_nodata(255, inplace=True) - da.rio.to_raster(output_path, dtype="uint8", compress="deflate") - - return da - - -GunwLayer = Literal["wrappedInterferogram", "unwrappedInterferogram", "pixelOffsets"] - - -def get_gunw_mask( - gunw_path: str | Path, - layer: GunwLayer = "unwrappedInterferogram", - frequency: str = "frequencyA", - output_path: str | Path | None = None, -) -> xr.DataArray: - """Extract a validity mask from a NISAR GUNW product. - - The GUNW mask is a 3-digit decimal bitfield (uint8): - - hundreds digit: water flag (1 = water, 0 = non-water) - - tens digit: reference RSLC subswath (0 = invalid) - - units digit: secondary RSLC subswath (0 = invalid) - - A pixel is considered valid when both subswath digits are non-zero - and the water flag is 0. Fill value is 255. - - Parameters - ---------- - gunw_path : str or Path - Path to a NISAR GUNW HDF5 file. - layer : {"unwrappedInterferogram", "wrappedInterferogram", "pixelOffsets"} - Which grid layer's mask to read. Default is ``"unwrappedInterferogram"``. - frequency : str - Frequency layer to read (currently only ``"frequencyA"`` in GUNW). - output_path : str or Path, optional - If provided, write the boolean mask as a GeoTIFF (uint8, 1=valid). - - Returns - ------- - xr.DataArray - Boolean DataArray (True = valid, non-water pixel) with CRS and - spatial coordinates attached. - - """ - gunw_path = Path(gunw_path) - grp_path = f"science/LSAR/GUNW/grids/{frequency}/{layer}" - - with h5py.File(gunw_path) as f: - grp = f[grp_path] - raw = grp["mask"][:] - x_coords = grp["xCoordinates"][:] - y_coords = grp["yCoordinates"][:] - epsg = int(grp["projection"][()]) - - # Decode 3-digit decimal encoding - water_flag = raw // 100 # hundreds digit - ref_subswath = (raw % 100) // 10 # tens digit - sec_subswath = raw % 10 # units digit - fill = raw == 255 - - valid = (water_flag == 0) & (ref_subswath != 0) & (sec_subswath != 0) & ~fill - - da = xr.DataArray( - valid.astype(np.uint8), - dims=("y", "x"), - coords={"y": y_coords, "x": x_coords}, - attrs={"long_name": "valid sample mask", "flag_values": "0,1", - "flag_meanings": "invalid valid", - "source_layer": layer}, - ).rio.write_crs(f"EPSG:{epsg}") - - if output_path is not None: - da.rio.write_nodata(255, inplace=True) - da.rio.to_raster(output_path, dtype="uint8", compress="deflate") - - return da diff --git a/src/opera_utils/tropo/__init__.py b/src/opera_utils/tropo/__init__.py index e26cef36..c094b7c5 100644 --- a/src/opera_utils/tropo/__init__.py +++ b/src/opera_utils/tropo/__init__.py @@ -2,12 +2,5 @@ from ._apply import apply_tropo from ._crop import crop_tropo -from ._match import apply_tropo_correction, match_and_apply_tropo, read_reference_point -__all__ = [ - "apply_tropo", - "apply_tropo_correction", - "crop_tropo", - "match_and_apply_tropo", - "read_reference_point", -] +__all__ = ["apply_tropo", "crop_tropo"] diff --git a/src/opera_utils/tropo/_match.py b/src/opera_utils/tropo/_match.py deleted file mode 100644 index 28f73ac9..00000000 --- a/src/opera_utils/tropo/_match.py +++ /dev/null @@ -1,143 +0,0 @@ -"""Match and apply tropospheric corrections to interferograms by secondary date.""" - -from __future__ import annotations - -import re -from pathlib import Path - -import numpy as np -import rioxarray as rxr - - -def read_reference_point(reference_point_file: str | Path) -> tuple[int, int]: - """Read ``(row, col)`` from a ``reference_point.txt`` file.""" - row_col = Path(reference_point_file).read_text().strip().split(",") - return int(row_col[0]), int(row_col[1]) - - -def apply_tropo_correction( - ifg_path: str | Path, - tropo_path: str | Path, - output_path: str | Path, - reference_point: tuple[int, int] | None = None, - scale: float = 1.0, -) -> None: - """Subtract a tropospheric delay correction from an interferogram. - - Parameters - ---------- - ifg_path : str or Path - Input interferogram GeoTIFF (radians or metres). - tropo_path : str or Path - Tropospheric correction GeoTIFF on the same or compatible grid. - output_path : str or Path - Output path for the corrected raster. - reference_point : tuple[int, int], optional - ``(row, col)`` pixel used to re-reference the correction before - subtracting, consistent with the timeseries reference point. - scale : float - Multiplicative scale applied to the correction before subtracting. - Default is 1.0 (no scaling). - - """ - ifg = rxr.open_rasterio(ifg_path, masked=True).squeeze() - tropo = rxr.open_rasterio(tropo_path, masked=True).squeeze() - - if not (ifg.rio.crs == tropo.rio.crs and ifg.shape == tropo.shape): - tropo = tropo.rio.reproject_match(ifg) - - tropo_data = tropo.values.astype(np.float32) - - if reference_point is not None: - ref_row, ref_col = reference_point - ref_val = tropo_data[ref_row, ref_col] - if np.isfinite(ref_val): - tropo_data -= ref_val - else: - import warnings - - warnings.warn( - f"Reference pixel ({ref_row}, {ref_col}) is NaN in " - f"{Path(tropo_path).name}; skipping re-referencing.", - stacklevel=2, - ) - - corrected = ifg - (tropo_data * scale) - corrected.rio.write_nodata(float("nan"), inplace=True) - corrected.rio.to_raster(output_path, dtype="float32") - - -def match_and_apply_tropo( - ifg_files: list[Path], - tropo_dir: str | Path, - output_suffix: str = ".iono_tropo_corrected.tif", - input_suffix: str = ".iono_corrected.tif", - reference_point_file: str | Path | None = None, - overwrite: bool = False, - scale: float = 1.0, -) -> list[tuple[Path, Path, Path]]: - """Match interferograms to tropospheric corrections by secondary date and apply. - - Parameters - ---------- - ifg_files : list[Path] - Interferogram GeoTIFF paths named ``{ref}_{sec}``. - tropo_dir : str or Path - Directory containing ``tropo_correction_{sec}T*.tif`` files. - output_suffix : str - Suffix for output corrected files. - input_suffix : str - Suffix to strip from input filenames when building output names. - reference_point_file : str or Path, optional - Path to ``reference_point.txt`` containing ``row,col``. If provided, - the correction is re-referenced to that pixel before subtracting. - overwrite : bool - If False (default), skip files that already exist. - scale : float - Multiplicative scale applied to each correction before subtracting. - - Returns - ------- - list[tuple[Path, Path, Path]] - ``(ifg_path, tropo_path, output_path)`` for each file processed. - - """ - tropo_dir = Path(tropo_dir) - - reference_point: tuple[int, int] | None = None - if reference_point_file is not None: - reference_point = read_reference_point(reference_point_file) - - tropo_by_date: dict[str, Path] = {} - for p in tropo_dir.glob("tropo_correction_*.tif"): - m = re.search(r"tropo_correction_(\d{8})", p.name) - if m: - tropo_by_date[m.group(1)] = p - - processed: list[tuple[Path, Path, Path]] = [] - for ifg_path in ifg_files: - m = re.match(r"(\d{8})_(\d{8})", ifg_path.name) - if not m: - continue - - sec_date = m.group(2) - if sec_date not in tropo_by_date: - continue - - tropo_path = tropo_by_date[sec_date] - stem = ifg_path.name[: -len(input_suffix)] - output_path = ifg_path.parent / f"{stem}{output_suffix}" - - if output_path.exists() and not overwrite: - continue - - apply_tropo_correction( - ifg_path, - tropo_path, - output_path, - reference_point=reference_point, - scale=scale, - ) - processed.append((ifg_path, tropo_path, output_path)) - - return processed