diff --git a/README.md b/README.md index d8ff6c2..affda23 100644 --- a/README.md +++ b/README.md @@ -489,6 +489,8 @@ With `--yac-3d`, ushow generates a dynamic fractional mask from the source field - Data types: Float32, Float64, Int64 - Supports consolidated metadata (.zmetadata) for faster loading - Supports unstructured data (ICON, FESOM, etc.) + - Supports structured grids with 1D coordinate arrays (automatic meshgrid expansion) + - Supports curvilinear grids with 2D coordinate arrays - Reads coordinates from embedded latitude/longitude arrays - Dimension metadata via `_ARRAY_DIMENSIONS` attribute (xarray convention) - Multi-file time concatenation supported via glob patterns diff --git a/src/file_zarr.c b/src/file_zarr.c index e69c4bc..c3cf43b 100644 --- a/src/file_zarr.c +++ b/src/file_zarr.c @@ -29,6 +29,8 @@ static const char *TIME_NAMES[] = {"time", "t", "Time", "TIME", NULL}; static const char *DEPTH_NAMES[] = {"depth", "z", "lev", "level", "nz", "nz1", NULL}; static const char *NODE_NAMES[] = {"values", "nod2", "nod2d", "node", "nodes", "ncells", "npoints", NULL}; +static const char *LAT_NAMES[] = {"lat", "latitude", "y", "nlat", "rlat", "j", NULL}; +static const char *LON_NAMES[] = {"lon", "longitude", "x", "nlon", "rlon", "i", NULL}; /* Internal zarr store data */ typedef struct { @@ -322,11 +324,10 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { varname[len] = '\0'; /* Skip coordinate variables */ - if (strcasecmp(varname, "latitude") == 0 || - strcasecmp(varname, "longitude") == 0 || - strcasecmp(varname, "lat") == 0 || - strcasecmp(varname, "lon") == 0 || - strcasecmp(varname, "time") == 0) { + if (matches_name_list(varname, LAT_NAMES) || + matches_name_list(varname, LON_NAMES) || + matches_name_list(varname, TIME_NAMES) || + matches_name_list(varname, DEPTH_NAMES)) { continue; } @@ -343,29 +344,17 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { ZarrArray *za = parse_zarray(array_path, item, zattrs); if (!za) continue; - /* Check if this is a data variable (has spatial dimension matching mesh) */ - int has_spatial = 0; - int node_dim = -1; - for (int d = 0; d < za->ndim; d++) { - if (za->shape[d] == mesh->n_points) { - has_spatial = 1; - node_dim = d; - break; - } - } - if (!has_spatial) { - free_zarray(za); - continue; - } - /* Identify dimensions from _ARRAY_DIMENSIONS attribute */ int time_dim = -1; int depth_dim = -1; + int node_dim = -1; + int lat_dim = -1; + int lon_dim = -1; if (zattrs) { cJSON *dims = cJSON_GetObjectItem(zattrs, "_ARRAY_DIMENSIONS"); if (dims && cJSON_IsArray(dims)) { - for (int d = 0; d < cJSON_GetArraySize(dims); d++) { + for (int d = 0; d < cJSON_GetArraySize(dims) && d < za->ndim; d++) { cJSON *dim_item = cJSON_GetArrayItem(dims, d); if (!dim_item || !cJSON_IsString(dim_item)) continue; const char *dim_name = dim_item->valuestring; @@ -376,11 +365,40 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { depth_dim = d; } else if (matches_name_list(dim_name, NODE_NAMES)) { node_dim = d; + } else if (matches_name_list(dim_name, LAT_NAMES)) { + lat_dim = d; + } else if (matches_name_list(dim_name, LON_NAMES)) { + lon_dim = d; } } } } + /* Check if this is a data variable (has spatial dimension matching mesh) */ + int has_spatial = 0; + for (int d = 0; d < za->ndim; d++) { + if (za->shape[d] == mesh->n_points) { + has_spatial = 1; + if (node_dim < 0) node_dim = d; + break; + } + } + + /* For structured/curvilinear grids, check if lat*lon dimensions match n_points */ + if (!has_spatial && + mesh->orig_nx > 0 && mesh->orig_ny > 0 && + lat_dim >= 0 && lon_dim >= 0 && + za->shape[lat_dim] == mesh->orig_ny && + za->shape[lon_dim] == mesh->orig_nx) { + has_spatial = 1; + node_dim = (lat_dim > lon_dim) ? lat_dim : lon_dim; + } + + if (!has_spatial) { + free_zarray(za); + continue; + } + /* Create variable structure */ USVar *var = calloc(1, sizeof(USVar)); if (!var) { @@ -463,9 +481,10 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { if (stat(zarray_path, &st) != 0) continue; /* Skip coordinate variables */ - if (strcasecmp(entry->d_name, "latitude") == 0 || - strcasecmp(entry->d_name, "longitude") == 0 || - strcasecmp(entry->d_name, "time") == 0) { + if (matches_name_list(entry->d_name, LAT_NAMES) || + matches_name_list(entry->d_name, LON_NAMES) || + matches_name_list(entry->d_name, TIME_NAMES) || + matches_name_list(entry->d_name, DEPTH_NAMES)) { continue; } @@ -484,16 +503,45 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { continue; } + /* Identify dimensions */ + int time_dim = -1, depth_dim = -1, node_dim = -1; + int lat_dim = -1, lon_dim = -1; + if (zattrs) { + cJSON *dims = cJSON_GetObjectItem(zattrs, "_ARRAY_DIMENSIONS"); + if (dims && cJSON_IsArray(dims)) { + for (int d = 0; d < cJSON_GetArraySize(dims) && d < za->ndim; d++) { + cJSON *dim_item = cJSON_GetArrayItem(dims, d); + if (!dim_item || !cJSON_IsString(dim_item)) continue; + const char *dim_name = dim_item->valuestring; + if (matches_name_list(dim_name, TIME_NAMES)) time_dim = d; + else if (matches_name_list(dim_name, DEPTH_NAMES)) depth_dim = d; + else if (matches_name_list(dim_name, NODE_NAMES)) node_dim = d; + else if (matches_name_list(dim_name, LAT_NAMES)) lat_dim = d; + else if (matches_name_list(dim_name, LON_NAMES)) lon_dim = d; + } + } + } + /* Check spatial dimension */ int has_spatial = 0; - int node_dim = -1; for (int d = 0; d < za->ndim; d++) { if (za->shape[d] == mesh->n_points) { has_spatial = 1; - node_dim = d; + if (node_dim < 0) node_dim = d; break; } } + + /* For structured/curvilinear grids, check if lat*lon dimensions match n_points */ + if (!has_spatial && + mesh->orig_nx > 0 && mesh->orig_ny > 0 && + lat_dim >= 0 && lon_dim >= 0 && + za->shape[lat_dim] == mesh->orig_ny && + za->shape[lon_dim] == mesh->orig_nx) { + has_spatial = 1; + node_dim = (lat_dim > lon_dim) ? lat_dim : lon_dim; + } + if (!has_spatial) { free_zarray(za); cJSON_Delete(zarray); @@ -501,22 +549,6 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { continue; } - /* Identify dimensions */ - int time_dim = -1, depth_dim = -1; - if (zattrs) { - cJSON *dims = cJSON_GetObjectItem(zattrs, "_ARRAY_DIMENSIONS"); - if (dims && cJSON_IsArray(dims)) { - for (int d = 0; d < cJSON_GetArraySize(dims); d++) { - cJSON *dim_item = cJSON_GetArrayItem(dims, d); - if (!dim_item || !cJSON_IsString(dim_item)) continue; - const char *dim_name = dim_item->valuestring; - if (matches_name_list(dim_name, TIME_NAMES)) time_dim = d; - else if (matches_name_list(dim_name, DEPTH_NAMES)) depth_dim = d; - else if (matches_name_list(dim_name, NODE_NAMES)) node_dim = d; - } - } - } - /* Create variable */ USVar *var = calloc(1, sizeof(USVar)); if (!var) { @@ -538,6 +570,17 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { for (int d = 0; d < za->ndim && d < MAX_DIMS; d++) { var->dim_sizes[d] = za->shape[d]; + + /* Copy dimension names from _ARRAY_DIMENSIONS */ + if (zattrs) { + cJSON *dims = cJSON_GetObjectItem(zattrs, "_ARRAY_DIMENSIONS"); + if (dims && d < cJSON_GetArraySize(dims)) { + cJSON *dim_item = cJSON_GetArrayItem(dims, d); + if (dim_item && cJSON_IsString(dim_item)) { + strncpy(var->dim_names[d], dim_item->valuestring, MAX_NAME_LEN - 1); + } + } + } } if (zattrs) { @@ -556,11 +599,14 @@ USVar *zarr_scan_variables(USFile *file, USMesh *mesh) { var_tail = var; var_count++; - printf("Found zarr variable: %s [shape=", entry->d_name); + printf("Found zarr variable: %s [", entry->d_name); for (int d = 0; d < za->ndim; d++) { - printf("%s%zu", d > 0 ? "x" : "", za->shape[d]); + printf("%s%s=%zu", d > 0 ? ", " : "", var->dim_names[d], za->shape[d]); } - printf("]\n"); + printf("]"); + if (time_dim >= 0) printf(" (time=%d)", time_dim); + if (depth_dim >= 0) printf(" (depth=%d)", depth_dim); + printf("\n"); } closedir(dir); } @@ -597,127 +643,214 @@ static void *zarr_read_chunk(const char *chunk_path, ZarrArray *za, size_t expec } /* - * Read a 2D slice from zarr variable (handles multi-chunk spatial dimensions) + * Internal helper: read a spatial slice from a zarr array. + * Handles both single spatial dimension (unstructured) and two spatial + * dimensions (structured lat/lon grids) with multi-chunk support. */ -int zarr_read_slice(USVar *var, size_t time_idx, size_t depth_idx, float *data) { - if (!var || !var->zarr_data || !data) return -1; - - ZarrArray *za = (ZarrArray *)var->zarr_data; - size_t n_points = var->mesh->n_points; +static int zarr_read_spatial_slice(ZarrArray *za, int time_dim_id, int depth_dim_id, + size_t time_idx, size_t depth_idx, float *data) { + if (!za || !data) return -1; /* Determine time/depth chunk indices */ - size_t time_chunk = 0, depth_chunk = 0; - size_t local_time = time_idx, local_depth = depth_idx; + size_t time_chunk = 0, local_time = time_idx; + size_t depth_chunk = 0, local_depth = depth_idx; - if (var->time_dim_id >= 0) { - time_chunk = time_idx / za->chunks[var->time_dim_id]; - local_time = time_idx % za->chunks[var->time_dim_id]; + if (time_dim_id >= 0) { + time_chunk = time_idx / za->chunks[time_dim_id]; + local_time = time_idx % za->chunks[time_dim_id]; } - if (var->depth_dim_id >= 0) { - depth_chunk = depth_idx / za->chunks[var->depth_dim_id]; - local_depth = depth_idx % za->chunks[var->depth_dim_id]; + if (depth_dim_id >= 0) { + depth_chunk = depth_idx / za->chunks[depth_dim_id]; + local_depth = depth_idx % za->chunks[depth_dim_id]; } - /* Find spatial dimension (not time, not depth) */ - int spatial_dim = -1; + /* Identify spatial dimensions (everything except time and depth) */ + int spatial_dims[MAX_DIMS]; + int n_spatial = 0; for (int d = 0; d < za->ndim; d++) { - if (d != var->time_dim_id && d != var->depth_dim_id) { - spatial_dim = d; - break; + if (d != time_dim_id && d != depth_dim_id) { + spatial_dims[n_spatial++] = d; } } - if (spatial_dim < 0) { + if (n_spatial < 1) { fprintf(stderr, "Could not identify spatial dimension\n"); return -1; } - /* Calculate number of spatial chunks */ - size_t spatial_shape = za->shape[spatial_dim]; - size_t spatial_chunk_size = za->chunks[spatial_dim]; - size_t n_spatial_chunks = (spatial_shape + spatial_chunk_size - 1) / spatial_chunk_size; + /* Calculate chunk strides (for indexing within a chunk) */ + size_t chunk_strides[MAX_DIMS]; + chunk_strides[za->ndim - 1] = 1; + for (int d = za->ndim - 2; d >= 0; d--) { + chunk_strides[d] = chunk_strides[d + 1] * za->chunks[d + 1]; + } - /* Calculate expected uncompressed size per chunk */ + /* Expected uncompressed chunk size */ size_t chunk_elements = 1; for (int d = 0; d < za->ndim; d++) { chunk_elements *= za->chunks[d]; } size_t expected_size = chunk_elements * za->dtype_size; - /* Read all spatial chunks and combine */ - size_t output_offset = 0; - - for (size_t spatial_chunk = 0; spatial_chunk < n_spatial_chunks; spatial_chunk++) { - /* Build chunk filename */ - char chunk_path[PATH_MAX]; - char chunk_key[256] = ""; + /* Base offset within chunk for fixed dimensions (time, depth) */ + size_t base_offset = 0; + if (time_dim_id >= 0) { + base_offset += local_time * chunk_strides[time_dim_id]; + } + if (depth_dim_id >= 0) { + base_offset += local_depth * chunk_strides[depth_dim_id]; + } - for (int d = 0; d < za->ndim; d++) { - char part[32]; - size_t chunk_idx; + if (n_spatial == 1) { + /* --- Single spatial dimension (unstructured or flattened) --- */ + int sd = spatial_dims[0]; + size_t spatial_shape = za->shape[sd]; + size_t spatial_chunk_size = za->chunks[sd]; + size_t n_chunks = (spatial_shape + spatial_chunk_size - 1) / spatial_chunk_size; + + size_t output_offset = 0; + for (size_t sc = 0; sc < n_chunks; sc++) { + /* Build chunk key */ + char chunk_key[256] = ""; + for (int d = 0; d < za->ndim; d++) { + char part[32]; + size_t ci; + if (d == time_dim_id) ci = time_chunk; + else if (d == depth_dim_id) ci = depth_chunk; + else ci = sc; + if (d > 0) strcat(chunk_key, "."); + snprintf(part, sizeof(part), "%zu", ci); + strcat(chunk_key, part); + } - if (d == var->time_dim_id) { - chunk_idx = time_chunk; - } else if (d == var->depth_dim_id) { - chunk_idx = depth_chunk; + char chunk_path[PATH_MAX]; + snprintf(chunk_path, sizeof(chunk_path), "%s/%s", za->array_path, chunk_key); + + void *chunk_data = zarr_read_chunk(chunk_path, za, expected_size); + if (!chunk_data) return -1; + + size_t remaining = spatial_shape - output_offset; + size_t points_in_chunk = (remaining < spatial_chunk_size) ? remaining : spatial_chunk_size; + + /* Copy with type conversion using stride-based offset */ + size_t stride = chunk_strides[sd]; + if (za->dtype == 'f' && za->dtype_size == 4) { + float *src = (float *)chunk_data; + for (size_t i = 0; i < points_in_chunk; i++) + data[output_offset + i] = src[base_offset + i * stride]; + } else if (za->dtype == 'd') { + double *src = (double *)chunk_data; + for (size_t i = 0; i < points_in_chunk; i++) + data[output_offset + i] = (float)src[base_offset + i * stride]; + } else if (za->dtype == 'i' && za->dtype_size == 8) { + int64_t *src = (int64_t *)chunk_data; + for (size_t i = 0; i < points_in_chunk; i++) + data[output_offset + i] = (float)src[base_offset + i * stride]; } else { - chunk_idx = spatial_chunk; + fprintf(stderr, "Unsupported dtype: %c (size %d)\n", za->dtype, za->dtype_size); + free(chunk_data); + return -1; } - if (d > 0) strcat(chunk_key, "."); - snprintf(part, sizeof(part), "%zu", chunk_idx); - strcat(chunk_key, part); + free(chunk_data); + output_offset += points_in_chunk; } + } else if (n_spatial == 2) { + /* --- Two spatial dimensions (structured grid: e.g. lat, lon) --- */ + int d0 = spatial_dims[0]; /* First spatial dim (e.g. lat) */ + int d1 = spatial_dims[1]; /* Second spatial dim (e.g. lon) */ + size_t shape0 = za->shape[d0]; + size_t shape1 = za->shape[d1]; + size_t chunk0 = za->chunks[d0]; + size_t chunk1 = za->chunks[d1]; + size_t n_chunks0 = (shape0 + chunk0 - 1) / chunk0; + size_t n_chunks1 = (shape1 + chunk1 - 1) / chunk1; + + for (size_t c0 = 0; c0 < n_chunks0; c0++) { + for (size_t c1 = 0; c1 < n_chunks1; c1++) { + /* Build chunk key */ + char chunk_key[256] = ""; + for (int d = 0; d < za->ndim; d++) { + char part[32]; + size_t ci; + if (d == time_dim_id) ci = time_chunk; + else if (d == depth_dim_id) ci = depth_chunk; + else if (d == d0) ci = c0; + else ci = c1; /* d == d1 */ + if (d > 0) strcat(chunk_key, "."); + snprintf(part, sizeof(part), "%zu", ci); + strcat(chunk_key, part); + } - snprintf(chunk_path, sizeof(chunk_path), "%s/%s", za->array_path, chunk_key); + char chunk_path[PATH_MAX]; + snprintf(chunk_path, sizeof(chunk_path), "%s/%s", za->array_path, chunk_key); - /* Read chunk */ - void *chunk_data = zarr_read_chunk(chunk_path, za, expected_size); - if (!chunk_data) return -1; + void *chunk_data = zarr_read_chunk(chunk_path, za, expected_size); + if (!chunk_data) return -1; - /* Calculate how many points to copy from this chunk */ - size_t remaining = n_points - output_offset; - size_t points_in_chunk = (remaining < spatial_chunk_size) ? remaining : spatial_chunk_size; + size_t start0 = c0 * chunk0; + size_t start1 = c1 * chunk1; + size_t actual0 = ((shape0 - start0) < chunk0) ? (shape0 - start0) : chunk0; + size_t actual1 = ((shape1 - start1) < chunk1) ? (shape1 - start1) : chunk1; - /* Calculate offset within chunk for our time index */ - size_t slice_offset = 0; - if (var->time_dim_id >= 0 && var->time_dim_id < spatial_dim) { - /* Time dimension comes before spatial: offset = local_time * spatial_chunk_size */ - slice_offset = local_time * spatial_chunk_size; - } - (void)local_depth; /* Not used in current 2D slice logic */ - - /* Copy and convert to float */ - if (za->dtype == 'f' && za->dtype_size == 4) { - /* Already float32 */ - memcpy(data + output_offset, - (char *)chunk_data + slice_offset * sizeof(float), - points_in_chunk * sizeof(float)); - } else if (za->dtype == 'd') { - /* Double to float */ - double *src = (double *)((char *)chunk_data + slice_offset * sizeof(double)); - for (size_t i = 0; i < points_in_chunk; i++) { - data[output_offset + i] = (float)src[i]; - } - } else if (za->dtype == 'i' && za->dtype_size == 8) { - /* Int64 to float */ - int64_t *src = (int64_t *)((char *)chunk_data + slice_offset * sizeof(int64_t)); - for (size_t i = 0; i < points_in_chunk; i++) { - data[output_offset + i] = (float)src[i]; + size_t stride0 = chunk_strides[d0]; + size_t stride1 = chunk_strides[d1]; + + if (za->dtype == 'f' && za->dtype_size == 4) { + float *src = (float *)chunk_data; + for (size_t j = 0; j < actual0; j++) { + for (size_t i = 0; i < actual1; i++) { + size_t co = base_offset + j * stride0 + i * stride1; + size_t oi = (start0 + j) * shape1 + (start1 + i); + data[oi] = src[co]; + } + } + } else if (za->dtype == 'd') { + double *src = (double *)chunk_data; + for (size_t j = 0; j < actual0; j++) { + for (size_t i = 0; i < actual1; i++) { + size_t co = base_offset + j * stride0 + i * stride1; + size_t oi = (start0 + j) * shape1 + (start1 + i); + data[oi] = (float)src[co]; + } + } + } else if (za->dtype == 'i' && za->dtype_size == 8) { + int64_t *src = (int64_t *)chunk_data; + for (size_t j = 0; j < actual0; j++) { + for (size_t i = 0; i < actual1; i++) { + size_t co = base_offset + j * stride0 + i * stride1; + size_t oi = (start0 + j) * shape1 + (start1 + i); + data[oi] = (float)src[co]; + } + } + } else { + fprintf(stderr, "Unsupported dtype: %c (size %d)\n", za->dtype, za->dtype_size); + free(chunk_data); + return -1; + } + + free(chunk_data); } - } else { - fprintf(stderr, "Unsupported dtype: %c (size %d)\n", za->dtype, za->dtype_size); - free(chunk_data); - return -1; } - - free(chunk_data); - output_offset += points_in_chunk; + } else { + fprintf(stderr, "Unsupported number of spatial dimensions: %d\n", n_spatial); + return -1; } return 0; } +/* + * Read a 2D slice from zarr variable (handles multi-chunk spatial dimensions) + */ +int zarr_read_slice(USVar *var, size_t time_idx, size_t depth_idx, float *data) { + if (!var || !var->zarr_data || !data) return -1; + + return zarr_read_spatial_slice((ZarrArray *)var->zarr_data, + var->time_dim_id, var->depth_dim_id, + time_idx, depth_idx, data); +} + /* * Estimate data range by sampling */ @@ -1271,80 +1404,12 @@ int zarr_read_slice_fileset(USFileSet *fs, USVar *var, return -1; } - /* Now read the slice using the ZarrArray */ - size_t n_points = var->mesh->n_points; - - /* Determine chunk indices */ - size_t time_chunk = 0; - size_t local_time_in_chunk = local_time; - - if (var->time_dim_id >= 0) { - time_chunk = local_time / za->chunks[var->time_dim_id]; - local_time_in_chunk = local_time % za->chunks[var->time_dim_id]; - } - - /* Build chunk filename */ - char chunk_path[PATH_MAX]; - char chunk_key[256] = ""; + /* Read the slice using the shared helper */ + int result = zarr_read_spatial_slice(za, var->time_dim_id, var->depth_dim_id, + local_time, depth_idx, data); - for (int d = 0; d < za->ndim; d++) { - char part[32]; - size_t chunk_idx; - - if (d == var->time_dim_id) { - chunk_idx = time_chunk; - } else if (d == var->depth_dim_id) { - chunk_idx = depth_idx / za->chunks[d]; - } else { - chunk_idx = 0; - } - - if (d > 0) strcat(chunk_key, "."); - snprintf(part, sizeof(part), "%zu", chunk_idx); - strcat(chunk_key, part); - } - - snprintf(chunk_path, sizeof(chunk_path), "%s/%s", za->array_path, chunk_key); - - /* Calculate expected uncompressed size */ - size_t chunk_elements = 1; - for (int d = 0; d < za->ndim; d++) { - chunk_elements *= za->chunks[d]; - } - size_t expected_size = chunk_elements * za->dtype_size; - - /* Read chunk */ - void *chunk_data = zarr_read_chunk(chunk_path, za, expected_size); - if (!chunk_data) { - if (need_free_za) free_zarray(za); - return -1; - } - - /* Extract the slice we need */ - size_t slice_offset = 0; - if (var->time_dim_id >= 0 && var->time_dim_id == 0) { - slice_offset = local_time_in_chunk * za->chunks[1]; - } - - /* Copy and convert to float */ - if (za->dtype == 'f' && za->dtype_size == 4) { - memcpy(data, (char *)chunk_data + slice_offset * sizeof(float), n_points * sizeof(float)); - } else if (za->dtype == 'd') { - double *src = (double *)((char *)chunk_data + slice_offset * sizeof(double)); - for (size_t i = 0; i < n_points; i++) { - data[i] = (float)src[i]; - } - } else if (za->dtype == 'i' && za->dtype_size == 8) { - int64_t *src = (int64_t *)((char *)chunk_data + slice_offset * sizeof(int64_t)); - for (size_t i = 0; i < n_points; i++) { - data[i] = (float)src[i]; - } - } - - free(chunk_data); if (need_free_za) free_zarray(za); - - return 0; + return result; } USDimInfo *zarr_get_dim_info_fileset(USFileSet *fs, USVar *var, int *n_dims_out) { diff --git a/src/mesh.c b/src/mesh.c index 6a8d3fa..45a8571 100644 --- a/src/mesh.c +++ b/src/mesh.c @@ -411,9 +411,12 @@ static cJSON *mesh_read_json(const char *path) { return json; } -/* Read and decompress a zarr coordinate array (handles multi-chunk arrays) */ +/* Read and decompress a zarr coordinate array (handles N-dimensional, multi-chunk arrays). + * Returns flattened array of all values. Sets *ndim_out if non-NULL. + * For 2D coords, *dim0_out and *dim1_out give the shape. */ static double *read_zarr_coord(const char *base_path, const char *coord_name, - size_t *n_points_out) { + size_t *n_points_out, int *ndim_out, + size_t *dim0_out, size_t *dim1_out) { char coord_path[PATH_MAX]; char zarray_path[PATH_MAX]; @@ -423,7 +426,6 @@ static double *read_zarr_coord(const char *base_path, const char *coord_name, /* Read .zarray metadata */ cJSON *zarray = mesh_read_json(zarray_path); if (!zarray) { - fprintf(stderr, "Failed to read %s\n", zarray_path); return NULL; } @@ -433,19 +435,36 @@ static double *read_zarr_coord(const char *base_path, const char *coord_name, cJSON_Delete(zarray); return NULL; } - size_t n_points = (size_t)cJSON_GetArrayItem(shape, 0)->valuedouble; + + int ndim = cJSON_GetArraySize(shape); + if (ndim > 2) { + /* Only support 1D and 2D coordinate arrays */ + cJSON_Delete(zarray); + return NULL; + } + + size_t shape_dims[2] = {0, 0}; + size_t chunk_dims[2] = {0, 0}; + size_t n_points = 1; + for (int i = 0; i < ndim; i++) { + shape_dims[i] = (size_t)cJSON_GetArrayItem(shape, i)->valuedouble; + chunk_dims[i] = shape_dims[i]; /* Default: one chunk */ + n_points *= shape_dims[i]; + } + *n_points_out = n_points; + if (ndim_out) *ndim_out = ndim; + if (dim0_out) *dim0_out = shape_dims[0]; + if (dim1_out) *dim1_out = (ndim > 1) ? shape_dims[1] : 0; - /* Get chunk size */ + /* Get chunk sizes */ cJSON *chunks = cJSON_GetObjectItem(zarray, "chunks"); - size_t chunk_size = n_points; /* Default: single chunk */ - if (chunks && cJSON_IsArray(chunks) && cJSON_GetArraySize(chunks) >= 1) { - chunk_size = (size_t)cJSON_GetArrayItem(chunks, 0)->valuedouble; + if (chunks && cJSON_IsArray(chunks)) { + for (int i = 0; i < ndim && i < cJSON_GetArraySize(chunks); i++) { + chunk_dims[i] = (size_t)cJSON_GetArrayItem(chunks, i)->valuedouble; + } } - /* Calculate number of chunks */ - size_t n_chunks = (n_points + chunk_size - 1) / chunk_size; - /* Get dtype */ cJSON *dtype_obj = cJSON_GetObjectItem(zarray, "dtype"); const char *dtype_str = dtype_obj ? dtype_obj->valuestring : " this_chunk_bytes) { - void *temp = malloc(nbytes); - if (!temp) { - free(compressed); - free(raw_data); - cJSON_Delete(zarray); - return NULL; - } - int result = blosc_decompress(compressed, temp, nbytes); + chunk_data = malloc(nbytes); + if (!chunk_data) { free(compressed); - if (result < 0) { - fprintf(stderr, "Blosc decompression failed for %s chunk %zu\n", - coord_name, chunk_idx); - free(temp); - free(raw_data); - cJSON_Delete(zarray); - return NULL; - } - /* Copy only the needed portion */ - memcpy(chunk_dest, temp, this_chunk_bytes); - free(temp); - } else { - int result = blosc_decompress(compressed, chunk_dest, nbytes); - free(compressed); - if (result < 0) { - fprintf(stderr, "Blosc decompression failed for %s chunk %zu\n", - coord_name, chunk_idx); - free(raw_data); - cJSON_Delete(zarray); - return NULL; - } + free(raw_data); + cJSON_Delete(zarray); + return NULL; + } + int result = blosc_decompress(compressed, chunk_data, nbytes); + free(compressed); + if (result < 0) { + fprintf(stderr, "Blosc decompression failed for %s\n", chunk_path); + free(chunk_data); + free(raw_data); + cJSON_Delete(zarray); + return NULL; } } else { fprintf(stderr, "Unknown compressor: %s\n", compressor_id); @@ -568,7 +602,26 @@ static double *read_zarr_coord(const char *base_path, const char *coord_name, return NULL; } - offset += this_chunk_points; + /* Copy chunk data into the output buffer at the right position */ + if (ndim == 1) { + size_t dst_offset = ci[0] * chunk_dims[0]; + memcpy((char *)raw_data + dst_offset * dtype_size, + chunk_data, chunk_actual[0] * dtype_size); + } else { + /* 2D: copy row by row to handle chunking properly */ + size_t row_start = ci[0] * chunk_dims[0]; + size_t col_start = ci[1] * chunk_dims[1]; + for (size_t r = 0; r < chunk_actual[0]; r++) { + size_t dst_offset = (row_start + r) * shape_dims[1] + col_start; + size_t src_offset = r * chunk_dims[1]; + memcpy((char *)raw_data + dst_offset * dtype_size, + (char *)chunk_data + src_offset * dtype_size, + chunk_actual[1] * dtype_size); + } + } + + if (chunk_data != compressed) free(chunk_data); + else free(compressed); } cJSON_Delete(zarray); @@ -608,18 +661,34 @@ USMesh *mesh_create_from_zarr(USFile *file) { printf("Loading coordinates from zarr store: %s\n", base_path); - /* Try to read latitude and longitude */ + /* Try to read latitude and longitude coordinate arrays */ size_t lat_points = 0, lon_points = 0; + int lat_ndim = 0, lon_ndim = 0; + size_t lat_dim0 = 0, lat_dim1 = 0; + size_t lon_dim0 = 0, lon_dim1 = 0; + + /* Try common coordinate names (ordered by likelihood) */ + static const char *zarr_lat_names[] = { + "latitude", "lat", "y", "nav_lat", "glat", "clat", + "yt_ocean", "yu_ocean", "yh", "yq", + "latCell", "latVertex", NULL + }; + static const char *zarr_lon_names[] = { + "longitude", "lon", "x", "nav_lon", "glon", "clon", + "xt_ocean", "xu_ocean", "xh", "xq", + "lonCell", "lonVertex", NULL + }; - /* Try different coordinate names */ - double *lat = read_zarr_coord(base_path, "latitude", &lat_points); - if (!lat) { - lat = read_zarr_coord(base_path, "lat", &lat_points); + double *lat = NULL; + for (int i = 0; zarr_lat_names[i] != NULL && !lat; i++) { + lat = read_zarr_coord(base_path, zarr_lat_names[i], &lat_points, + &lat_ndim, &lat_dim0, &lat_dim1); } - double *lon = read_zarr_coord(base_path, "longitude", &lon_points); - if (!lon) { - lon = read_zarr_coord(base_path, "lon", &lon_points); + double *lon = NULL; + for (int i = 0; zarr_lon_names[i] != NULL && !lon; i++) { + lon = read_zarr_coord(base_path, zarr_lon_names[i], &lon_points, + &lon_ndim, &lon_dim0, &lon_dim1); } if (!lat || !lon) { @@ -629,15 +698,77 @@ USMesh *mesh_create_from_zarr(USFile *file) { return NULL; } - if (lat_points != lon_points) { - fprintf(stderr, "Coordinate array size mismatch: lat=%zu, lon=%zu\n", - lat_points, lon_points); + printf("Coordinate info: lat %dD [%zu", lat_ndim, lat_dim0); + if (lat_ndim == 2) printf("x%zu", lat_dim1); + printf("], lon %dD [%zu", lon_ndim, lon_dim0); + if (lon_ndim == 2) printf("x%zu", lon_dim1); + printf("]\n"); + + size_t n_points; + CoordType coord_type; + size_t orig_nx = 0, orig_ny = 0; + + if (lat_ndim == 2 && lon_ndim == 2) { + /* Both 2D -> curvilinear grid (already flattened by read_zarr_coord) */ + if (lat_dim0 != lon_dim0 || lat_dim1 != lon_dim1) { + fprintf(stderr, "2D coordinate arrays have different shapes\n"); + free(lat); + free(lon); + return NULL; + } + coord_type = COORD_TYPE_2D_CURVILINEAR; + orig_ny = lat_dim0; + orig_nx = lat_dim1; + n_points = lat_points; /* Already = dim0 * dim1 */ + printf("Detected: 2D curvilinear grid (%zu x %zu = %zu points)\n", + orig_ny, orig_nx, n_points); + } else if (lat_ndim == 1 && lon_ndim == 1 && lat_points == lon_points) { + /* Same size 1D arrays -> unstructured */ + coord_type = COORD_TYPE_1D_UNSTRUCTURED; + n_points = lat_points; + printf("Detected: 1D unstructured coordinates (%zu points)\n", n_points); + } else if (lat_ndim == 1 && lon_ndim == 1) { + /* Different size 1D arrays -> structured grid, create meshgrid */ + coord_type = COORD_TYPE_1D_STRUCTURED; + orig_nx = lon_points; + orig_ny = lat_points; + n_points = orig_nx * orig_ny; + printf("Detected: 1D structured grid (%zu x %zu = %zu points)\n", + orig_nx, orig_ny, n_points); + + /* Read 1D coordinate arrays and expand to meshgrid */ + double *lon_2d = malloc(n_points * sizeof(double)); + double *lat_2d = malloc(n_points * sizeof(double)); + if (!lon_2d || !lat_2d) { + free(lon_2d); + free(lat_2d); + free(lon); + free(lat); + return NULL; + } + + /* Create meshgrid (flatten in row-major order: lat varies slowest) */ + size_t idx = 0; + for (size_t j = 0; j < orig_ny; j++) { + for (size_t i = 0; i < orig_nx; i++) { + lon_2d[idx] = lon[i]; + lat_2d[idx] = lat[j]; + idx++; + } + } + + free(lon); + free(lat); + lon = lon_2d; + lat = lat_2d; + } else { + fprintf(stderr, "Unsupported coordinate combination: lat %dD, lon %dD\n", + lat_ndim, lon_ndim); free(lat); free(lon); return NULL; } - size_t n_points = lat_points; printf("Loaded %zu coordinate points from zarr store\n", n_points); /* Normalize longitude to [-180, 180] */ @@ -646,14 +777,18 @@ USMesh *mesh_create_from_zarr(USFile *file) { while (lon[i] < -180.0) lon[i] += 360.0; } - /* Create mesh - zarr data is always unstructured (1D coordinate arrays) */ - USMesh *mesh = mesh_create(lon, lat, n_points, COORD_TYPE_1D_UNSTRUCTURED); + /* Create mesh */ + USMesh *mesh = mesh_create(lon, lat, n_points, coord_type); if (!mesh) { free(lon); free(lat); return NULL; } + /* Store original grid dimensions for structured grids */ + mesh->orig_nx = orig_nx; + mesh->orig_ny = orig_ny; + return mesh; } diff --git a/tests/test_file_zarr.c b/tests/test_file_zarr.c index c369cb8..2d7ebe7 100644 --- a/tests/test_file_zarr.c +++ b/tests/test_file_zarr.c @@ -1336,6 +1336,660 @@ TEST(zarr_fileset_boundary_read) { return 1; } +/* ---- Structured grid zarr tests (1D structured + 2D curvilinear) ---- */ + +/* + * Create a zarr store with a structured grid (1D lat/lon with different sizes). + * lat has ny values, lon has nx values, data has shape [n_times, ny, nx]. + * This tests meshgrid expansion in mesh_create_from_zarr. + */ +static const char *create_structured_zarr_store(int nx, int ny, int n_times) { + static char store_path[256]; + snprintf(store_path, sizeof(store_path), "/tmp/test_ushow_zarr_struct_%d_%d.zarr", + getpid(), zarr_test_counter++); + + char cmd[512]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", store_path); + int ret = system(cmd); + (void)ret; + + if (mkdir(store_path, 0755) != 0) return NULL; + if (write_zarr_file(store_path, ".zgroup", "{\"zarr_format\":2}") != 0) return NULL; + if (write_zarr_file(store_path, ".zattrs", "{}") != 0) return NULL; + + /* Create 1D latitude array [ny] */ + char array_dir[512]; + snprintf(array_dir, sizeof(array_dir), "%s/lat", store_path); + if (mkdir(array_dir, 0755) != 0) return NULL; + + char zarray_lat[1024]; + snprintf(zarray_lat, sizeof(zarray_lat), + "{\"chunks\":[%d],\"compressor\":null,\"dtype\":\"n_points, (size_t)(nx * ny)); + ASSERT_EQ(mesh->coord_type, COORD_TYPE_1D_STRUCTURED); + ASSERT_EQ_SIZET(mesh->orig_nx, (size_t)nx); + ASSERT_EQ_SIZET(mesh->orig_ny, (size_t)ny); + + /* Check first row: lat should be -90 for all, lon should vary */ + ASSERT_NEAR(mesh->lat[0], -90.0, 1.0); + ASSERT_NEAR(mesh->lon[0], -180.0, 1.0); + ASSERT_NEAR(mesh->lat[nx - 1], -90.0, 1.0); + ASSERT_NEAR(mesh->lon[nx - 1], 180.0, 1.0); + + /* Check last row: lat should be 90 for all */ + ASSERT_NEAR(mesh->lat[(ny - 1) * nx], 90.0, 1.0); + + mesh_free(mesh); + zarr_close(file); + cleanup_test_zarr(store_path); + return 1; +} + +/* Test 2D curvilinear grid mesh creation */ +TEST(curvilinear_mesh_creation) { + const int nx = 36, ny = 18, nt = 2; + const char *store_path = create_curvilinear_zarr_store(nx, ny, nt); + ASSERT_NOT_NULL(store_path); + + USFile *file = zarr_open(store_path); + ASSERT_NOT_NULL(file); + + USMesh *mesh = mesh_create_from_zarr(file); + ASSERT_NOT_NULL(mesh); + + ASSERT_EQ_SIZET(mesh->n_points, (size_t)(nx * ny)); + ASSERT_EQ(mesh->coord_type, COORD_TYPE_2D_CURVILINEAR); + ASSERT_EQ_SIZET(mesh->orig_nx, (size_t)nx); + ASSERT_EQ_SIZET(mesh->orig_ny, (size_t)ny); + + /* Coordinates should be in valid range */ + for (size_t i = 0; i < mesh->n_points; i++) { + ASSERT_GE(mesh->lat[i], -95.0); /* Curvilinear can have small perturbation */ + ASSERT_LE(mesh->lat[i], 95.0); + ASSERT_GE(mesh->lon[i], -180.0); + ASSERT_LE(mesh->lon[i], 180.0); + } + + mesh_free(mesh); + zarr_close(file); + cleanup_test_zarr(store_path); + return 1; +} + +/* Test scanning variables for structured grid */ +TEST(structured_scan_variables) { + const int nx = 36, ny = 18, nt = 3; + const char *store_path = create_structured_zarr_store(nx, ny, nt); + ASSERT_NOT_NULL(store_path); + + USFile *file = zarr_open(store_path); + ASSERT_NOT_NULL(file); + + USMesh *mesh = mesh_create_from_zarr(file); + ASSERT_NOT_NULL(mesh); + + USVar *vars = zarr_scan_variables(file, mesh); + ASSERT_NOT_NULL(vars); + + /* Should find sst variable */ + int found_sst = 0; + USVar *v = vars; + while (v) { + if (strcmp(v->name, "sst") == 0) { + found_sst = 1; + ASSERT_STR_EQ(v->units, "K"); + } + v = v->next; + } + ASSERT_TRUE(found_sst); + + mesh_free(mesh); + zarr_close(file); + cleanup_test_zarr(store_path); + return 1; +} + +/* Test scanning variables for curvilinear grid */ +TEST(curvilinear_scan_variables) { + const int nx = 36, ny = 18, nt = 3; + const char *store_path = create_curvilinear_zarr_store(nx, ny, nt); + ASSERT_NOT_NULL(store_path); + + USFile *file = zarr_open(store_path); + ASSERT_NOT_NULL(file); + + USMesh *mesh = mesh_create_from_zarr(file); + ASSERT_NOT_NULL(mesh); + + USVar *vars = zarr_scan_variables(file, mesh); + ASSERT_NOT_NULL(vars); + + /* Should find sst variable */ + int found_sst = 0; + USVar *v = vars; + while (v) { + if (strcmp(v->name, "sst") == 0) { + found_sst = 1; + ASSERT_STR_EQ(v->units, "K"); + } + v = v->next; + } + ASSERT_TRUE(found_sst); + + mesh_free(mesh); + zarr_close(file); + cleanup_test_zarr(store_path); + return 1; +} + +/* Test reading data from structured grid */ +TEST(structured_read_slice) { + const int nx = 36, ny = 18, nt = 3; + const char *store_path = create_structured_zarr_store(nx, ny, nt); + ASSERT_NOT_NULL(store_path); + + USFile *file = zarr_open(store_path); + ASSERT_NOT_NULL(file); + + USMesh *mesh = mesh_create_from_zarr(file); + ASSERT_NOT_NULL(mesh); + + USVar *vars = zarr_scan_variables(file, mesh); + ASSERT_NOT_NULL(vars); + + /* Find sst variable */ + USVar *sst = NULL; + USVar *v = vars; + while (v) { + if (strcmp(v->name, "sst") == 0) { sst = v; break; } + v = v->next; + } + ASSERT_NOT_NULL(sst); + + float *data = malloc(mesh->n_points * sizeof(float)); + ASSERT_NOT_NULL(data); + + /* Read time step 0 */ + int status = zarr_read_slice(sst, 0, 0, data); + ASSERT_EQ(status, 0); + + /* Verify data: sst[0,j,i] = 273 + lat[j]*0.5 + lon[i]*0.1 */ + /* Corner (0,0): lat=-90, lon=-180 -> 273 + (-45) + (-18) = 210 */ + ASSERT_NEAR(data[0], 210.0f, 1.0f); + + /* Corner (ny-1, nx-1): lat=90, lon=180 -> 273 + 45 + 18 = 336 */ + ASSERT_NEAR(data[(ny - 1) * nx + (nx - 1)], 336.0f, 1.0f); + + /* Read time step 2 and verify time offset */ + status = zarr_read_slice(sst, 2, 0, data); + ASSERT_EQ(status, 0); + + /* Corner (0,0): 273 + (-45) + (-18) + 2*0.5 = 211 */ + ASSERT_NEAR(data[0], 211.0f, 1.0f); + + free(data); + mesh_free(mesh); + zarr_close(file); + cleanup_test_zarr(store_path); + return 1; +} + +/* Test reading data from curvilinear grid */ +TEST(curvilinear_read_slice) { + const int nx = 36, ny = 18, nt = 2; + const char *store_path = create_curvilinear_zarr_store(nx, ny, nt); + ASSERT_NOT_NULL(store_path); + + USFile *file = zarr_open(store_path); + ASSERT_NOT_NULL(file); + + USMesh *mesh = mesh_create_from_zarr(file); + ASSERT_NOT_NULL(mesh); + + USVar *vars = zarr_scan_variables(file, mesh); + ASSERT_NOT_NULL(vars); + + /* Find sst variable */ + USVar *sst = NULL; + USVar *v = vars; + while (v) { + if (strcmp(v->name, "sst") == 0) { sst = v; break; } + v = v->next; + } + ASSERT_NOT_NULL(sst); + + float *data = malloc(mesh->n_points * sizeof(float)); + ASSERT_NOT_NULL(data); + + int status = zarr_read_slice(sst, 0, 0, data); + ASSERT_EQ(status, 0); + + /* Verify data: sst[0,j,i] = 273 + j*0.5 + i*0.1 */ + /* Point (0,0): 273 + 0 + 0 = 273 */ + ASSERT_NEAR(data[0], 273.0f, 0.1f); + + /* Point (ny-1, nx-1): 273 + (ny-1)*0.5 + (nx-1)*0.1 */ + float expected = 273.0f + (ny - 1) * 0.5f + (nx - 1) * 0.1f; + ASSERT_NEAR(data[(ny - 1) * nx + (nx - 1)], expected, 0.1f); + + /* Read time 1: should differ by 0.5 */ + float *data1 = malloc(mesh->n_points * sizeof(float)); + ASSERT_NOT_NULL(data1); + status = zarr_read_slice(sst, 1, 0, data1); + ASSERT_EQ(status, 0); + ASSERT_NEAR(data1[0] - data[0], 0.5f, 0.01f); + + free(data); + free(data1); + mesh_free(mesh); + zarr_close(file); + cleanup_test_zarr(store_path); + return 1; +} + +/* Test structured grid with multi-chunk spatial dimensions */ +TEST(structured_multichunk_read) { + const int nx = 36, ny = 18, nt = 2; + int n_points = nx * ny; + + /* Create store with smaller spatial chunks */ + static char store_path[256]; + snprintf(store_path, sizeof(store_path), "/tmp/test_ushow_zarr_struct_mc_%d_%d.zarr", + getpid(), zarr_test_counter++); + + char cmd[512]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", store_path); + int ret = system(cmd); + (void)ret; + + if (mkdir(store_path, 0755) != 0) return 0; + if (write_zarr_file(store_path, ".zgroup", "{\"zarr_format\":2}") != 0) return 0; + if (write_zarr_file(store_path, ".zattrs", "{}") != 0) return 0; + + /* 1D lat [ny] */ + char array_dir[512]; + snprintf(array_dir, sizeof(array_dir), "%s/lat", store_path); + mkdir(array_dir, 0755); + char zarray[1024]; + snprintf(zarray, sizeof(zarray), + "{\"chunks\":[%d],\"compressor\":null,\"dtype\":\" ny) j_count = ny - j_start; + + for (int ic = 0; ic < n_chunks_x; ic++) { + int i_start = ic * cx; + int i_count = cx; + if (i_start + i_count > nx) i_count = nx - i_start; + + /* Zarr chunks are always full size (edge chunks are padded) */ + float *chunk_data = calloc(chunk_size, sizeof(float)); + for (int j = 0; j < j_count; j++) { + for (int i = 0; i < i_count; i++) { + chunk_data[j * cx + i] = (float)((j_start + j) * 100 + (i_start + i) + t * 10000); + } + } + char chunk_name[64]; + snprintf(chunk_name, sizeof(chunk_name), "%d.%d.%d", t, jc, ic); + write_zarr_chunk(store_path, "sst", chunk_name, chunk_data, chunk_size * sizeof(float)); + free(chunk_data); + } + } + } + + /* Now test reading */ + USFile *file = zarr_open(store_path); + ASSERT_NOT_NULL(file); + + USMesh *mesh = mesh_create_from_zarr(file); + ASSERT_NOT_NULL(mesh); + ASSERT_EQ_SIZET(mesh->n_points, (size_t)n_points); + + USVar *vars = zarr_scan_variables(file, mesh); + ASSERT_NOT_NULL(vars); + + USVar *sst = NULL; + USVar *v = vars; + while (v) { + if (strcmp(v->name, "sst") == 0) { sst = v; break; } + v = v->next; + } + ASSERT_NOT_NULL(sst); + + float *data = malloc(n_points * sizeof(float)); + ASSERT_NOT_NULL(data); + + int status = zarr_read_slice(sst, 0, 0, data); + ASSERT_EQ(status, 0); + + /* Verify: data[j*nx + i] should be j*100 + i */ + ASSERT_NEAR(data[0], 0.0f, 0.01f); + ASSERT_NEAR(data[1], 1.0f, 0.01f); + ASSERT_NEAR(data[nx], 100.0f, 0.01f); + ASSERT_NEAR(data[5 * nx + 7], 507.0f, 0.01f); + ASSERT_NEAR(data[(ny - 1) * nx + (nx - 1)], (float)((ny - 1) * 100 + (nx - 1)), 0.01f); + + /* Read time 1 */ + status = zarr_read_slice(sst, 1, 0, data); + ASSERT_EQ(status, 0); + ASSERT_NEAR(data[0], 10000.0f, 0.01f); + ASSERT_NEAR(data[5 * nx + 7], 10507.0f, 0.01f); + + free(data); + mesh_free(mesh); + zarr_close(file); + cleanup_test_zarr(store_path); + return 1; +} + RUN_TESTS("File Zarr") #else /* !HAVE_ZARR */