From 0b8cb5ac454c147120082f9acdd87ef2172ecc38 Mon Sep 17 00:00:00 2001 From: Callan Gray Date: Mon, 13 Oct 2025 16:35:20 +0800 Subject: [PATCH 1/5] Add support for coords validation --- tests/test_core.py | 78 +++++++++++++++++++++++++++---------- xarray_schema/components.py | 2 +- xarray_schema/dataset.py | 9 +++-- 3 files changed, 64 insertions(+), 25 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 688eca6..6268987 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -124,36 +124,72 @@ def test_attr_schema(type, value, validate, json): @pytest.mark.parametrize( - 'component, schema_args, validate, match', + 'component, schema_args, schema_kwargs, validate, match', [ - (DTypeSchema, np.integer, np.float32, r'.*float.*'), - (DimsSchema, ('foo', 'bar'), ('foo',), r'.*length.*'), - (DimsSchema, ('foo', 'bar'), ('foo', 'baz'), r'.*mismatch.*'), - (ShapeSchema, (1, 2, None), (1, 2), r'.*number of dimensions.*'), - (ShapeSchema, (1, 4, 4), (1, 3, 4), r'.*mismatch.*'), - (NameSchema, 'foo', 'bar', r'.*name bar != foo.*'), - (ArrayTypeSchema, np.ndarray, 'bar', r'.*array_type.*'), + (DTypeSchema, (np.integer,), {}, np.float32, r'.*float.*'), + (DimsSchema, (('foo', 'bar'),), {}, ('foo',), r'.*length.*'), + (DimsSchema, (('foo', 'bar'),), {}, ('foo', 'baz'), r'.*mismatch.*'), + (ShapeSchema, ((1, 2, None),), {} ,(1, 2), r'.*number of dimensions.*'), + (ShapeSchema, ((1, 4, 4),), {}, (1, 3, 4), r'.*mismatch.*'), + (NameSchema, ('foo',), {}, 'bar', r'.*name bar != foo.*'), + (ArrayTypeSchema, (np.ndarray,), {}, 'bar', r'.*array_type.*'), # schema_args for ChunksSchema include [chunks, dims, shape] - (ChunksSchema, {'x': 3}, (((2, 2),), ('x',), (4,)), r'.*(3).*'), - (ChunksSchema, {'x': (2, 1)}, (((2, 2),), ('x',), (4,)), r'.*(2, 1).*'), - (ChunksSchema, {'x': (2, 1)}, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), - (ChunksSchema, True, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), + (ChunksSchema, ({'x': 3},), {}, (((2, 2),), ('x',), (4,)), r'.*(3).*'), + (ChunksSchema, ({'x': (2, 1)},), {}, (((2, 2),), ('x',), (4,)), r'.*(2, 1).*'), + (ChunksSchema, ({'x': (2, 1)},), {}, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), + (ChunksSchema, (True,), {}, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), ( ChunksSchema, - False, + (False,), + {}, (((2, 2),), ('x',), (4,)), r'.*expected unchunked array but it is chunked*', ), - (ChunksSchema, {'x': -1}, (((1, 2, 1),), ('x',), (4,)), r'.*did not match.*'), - (ChunksSchema, {'x': 2}, (((2, 3, 2),), ('x',), (7,)), r'.*did not match.*'), - (ChunksSchema, {'x': 2}, (((2, 2, 3),), ('x',), (7,)), r'.*did not match.*'), - (ChunksSchema, {'x': 2, 'y': -1}, (((2, 2), (5, 5)), ('x', 'y'), (4, 10)), r'.*(5).*'), + (ChunksSchema, ({'x': -1},), {}, (((1, 2, 1),), ('x',), (4,)), r'.* did not match.*'), + (ChunksSchema, ({'x': 2},), {}, (((2, 3, 2),), ('x',), (7,)), r'.* did not match.*'), + (ChunksSchema, ({'x': 2},), {}, (((2, 2, 3),), ('x',), (7,)), r'.* did not match.*'), + (ChunksSchema, ({'x': 2, 'y': -1},), {}, (((2, 2), (5, 5)), ('x', 'y'), (4, 10)), r'.*(5).*'), + ( + AttrsSchema, + ({'foo': AttrSchema(type=int)},), + {}, + [{'foo': 'bar'}], + r'attrs .* is not of type.*', + ), + ( + AttrsSchema, + ({'foo': AttrSchema(value=1)},), + {}, + [{'foo': 'bar'}], + r'value .* != .*', + ), + ( + CoordsSchema, + ({'x': DataArraySchema(name='x')},), + {}, + [{'x': xr.DataArray([0, 1], name='y')}], + r'name .* != .*', + ), + ( + CoordsSchema, + ({'x': DataArraySchema(name='x', dtype=np.str_)},), + {}, + [{'x': xr.DataArray([0, 1], name='x')}], + r'dtype .* != .*', + ), + ( + CoordsSchema, + ({'x': DataArraySchema(name='x')},), + {"allow_extra_keys": False}, + [{'x': xr.DataArray([0, 1], name='x'), 'y': xr.DataArray([0, 1], name='y')}], + r'coords has extra keys.*', + ), ], ) -def test_component_raises_schema_error(component, schema_args, validate, match): - schema = component(schema_args) +def test_component_raises_schema_error(component, schema_args, schema_kwargs, validate, match): + schema = component(*schema_args, **schema_kwargs) with pytest.raises(SchemaError, match=match): - if component in [ChunksSchema]: # special case construction + if component in (ChunksSchema, AttrsSchema, CoordsSchema): # special case construction schema.validate(*validate) else: schema.validate(validate) @@ -217,7 +253,7 @@ def test_dataset_empty_constructor(): ds_schema = DatasetSchema() assert hasattr(ds_schema, 'validate') jsonschema.validate(ds_schema.json, ds_schema._json_schema) - ds_schema.json == {} + assert ds_schema.json == {} def test_dataset_example(ds): diff --git a/xarray_schema/components.py b/xarray_schema/components.py index 5aefb0e..39d79ab 100644 --- a/xarray_schema/components.py +++ b/xarray_schema/components.py @@ -349,7 +349,7 @@ def validate(self, attr: Any): if self.value is not None: if self.value is not None and self.value != attr: - raise SchemaError(f'name {attr} != {self.value}') + raise SchemaError(f'value {attr} != {self.value}') @property def json(self) -> dict: diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index 9be35ee..bfd2f62 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -79,8 +79,8 @@ def validate(self, ds: xr.Dataset) -> None: else: da_schema.validate(ds.data_vars[key]) - if self.coords is not None: # pragma: no cover - raise NotImplementedError('coords schema not implemented yet') + if self.coords is not None: + self.coords.validate(ds.coords) if self.attrs: self.attrs.validate(ds.attrs) @@ -131,10 +131,13 @@ def coords(self, value: Optional[Union[CoordsSchema, Dict[Hashable, DataArraySch @property def json(self): - obj = {'data_vars': {}, 'attrs': self.attrs.json if self.attrs is not None else {}} + obj = {} if self.data_vars: + obj['data_vars'] = {} for key, var in self.data_vars.items(): obj['data_vars'][key] = var.json if self.coords: obj['coords'] = self.coords.json + if self.attrs: + obj['attrs'] = self.attrs.json return obj From 4735350131287b994447d32d521b8c4d77c5f3ab Mon Sep 17 00:00:00 2001 From: Callan Gray Date: Mon, 13 Oct 2025 17:33:05 +0800 Subject: [PATCH 2/5] Test schema json --- tests/test_core.py | 2 +- xarray_schema/components.py | 6 +++--- xarray_schema/dataarray.py | 11 +++++++---- xarray_schema/dataset.py | 2 ++ 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 6268987..3e15e5a 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -280,7 +280,7 @@ def test_dataset_example(ds): # json roundtrip rt_schema = DatasetSchema.from_json(ds_schema.json) assert isinstance(rt_schema, DatasetSchema) - rt_schema.json == ds_schema.json + assert rt_schema.json == ds_schema.json def test_checks_ds(ds): diff --git a/xarray_schema/components.py b/xarray_schema/components.py index 39d79ab..b7153af 100644 --- a/xarray_schema/components.py +++ b/xarray_schema/components.py @@ -376,12 +376,12 @@ class AttrsSchema(BaseSchema): _json_schema = { 'type': 'object', 'properties': { - 'require_all_keys': { - 'type': 'boolean' - }, # Question: is this the same as JSON's additionalProperties? + 'require_all_keys': {'type': 'boolean'}, 'allow_extra_keys': {'type': 'boolean'}, 'attrs': {'type': 'object'}, }, + 'required': ['attrs'], + 'additionalProperties': False } def __init__( diff --git a/xarray_schema/dataarray.py b/xarray_schema/dataarray.py index 4a71e62..aba51ee 100644 --- a/xarray_schema/dataarray.py +++ b/xarray_schema/dataarray.py @@ -276,12 +276,15 @@ class CoordsSchema(BaseSchema): _json_schema = { 'type': 'object', 'properties': { - 'require_all_keys': { - 'type': 'boolean' - }, # Question: is this the same as JSON's additionalProperties? + 'require_all_keys': {'type': 'boolean'}, 'allow_extra_keys': {'type': 'boolean'}, - 'coords': {'type': 'object'}, + 'coords': { + 'type': 'object', + 'additionalProperties': DataArraySchema._json_schema + }, }, + 'required': ['coords'], + 'additionalProperties': False, } def __init__( diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index bfd2f62..96e2f38 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -25,6 +25,8 @@ class DatasetSchema(BaseSchema): 'coords': {'type': 'object'}, 'attrs': {'type': 'object'}, }, + 'required': [], + 'additionalProperties': False, } def __init__( From 4a586e016e388a8124ae9adc0452b9f605c8af09 Mon Sep 17 00:00:00 2001 From: Callan Gray Date: Mon, 13 Oct 2025 18:08:22 +0800 Subject: [PATCH 3/5] Add CoordsSchema test cases --- tests/test_core.py | 24 +++++++++++++++++++----- xarray_schema/components.py | 2 +- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 3e15e5a..5e3689d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -161,7 +161,14 @@ def test_attr_schema(type, value, validate, json): ({'foo': AttrSchema(value=1)},), {}, [{'foo': 'bar'}], - r'value .* != .*', + r'attrs .* != .*', + ), + ( + AttrsSchema, + ({'foo': AttrSchema(value=1)},), + {'allow_extra_keys': False}, + [{'foo': 'bar', 'x': 0}], + r'attrs has extra keys.*', ), ( CoordsSchema, @@ -172,16 +179,23 @@ def test_attr_schema(type, value, validate, json): ), ( CoordsSchema, - ({'x': DataArraySchema(name='x', dtype=np.str_)},), + ({'x': DataArraySchema(dtype=np.str_)},), {}, - [{'x': xr.DataArray([0, 1], name='x')}], + [{'x': xr.DataArray([0, 1])}], r'dtype .* != .*', ), ( CoordsSchema, - ({'x': DataArraySchema(name='x')},), + ({'x': DataArraySchema(dims=('x',))},), + {}, + [{'x': xr.DataArray([0, 1], name='x')}], + r'dim mismatch in axis .* != .*', + ), + ( + CoordsSchema, + ({'x': DataArraySchema()},), {"allow_extra_keys": False}, - [{'x': xr.DataArray([0, 1], name='x'), 'y': xr.DataArray([0, 1], name='y')}], + [{'x': xr.DataArray([0, 1]), 'y': xr.DataArray([0, 1])}], r'coords has extra keys.*', ), ], diff --git a/xarray_schema/components.py b/xarray_schema/components.py index b7153af..5bc67a5 100644 --- a/xarray_schema/components.py +++ b/xarray_schema/components.py @@ -349,7 +349,7 @@ def validate(self, attr: Any): if self.value is not None: if self.value is not None and self.value != attr: - raise SchemaError(f'value {attr} != {self.value}') + raise SchemaError(f'attrs {attr} != {self.value}') @property def json(self) -> dict: From f8bb6c8cdfd768e09a84768c614b65ec1a85a8e5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 10:17:52 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_core.py | 20 ++++++++++++++++---- xarray_schema/components.py | 2 +- xarray_schema/dataarray.py | 5 +---- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 5e3689d..d4c0d4c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -129,14 +129,20 @@ def test_attr_schema(type, value, validate, json): (DTypeSchema, (np.integer,), {}, np.float32, r'.*float.*'), (DimsSchema, (('foo', 'bar'),), {}, ('foo',), r'.*length.*'), (DimsSchema, (('foo', 'bar'),), {}, ('foo', 'baz'), r'.*mismatch.*'), - (ShapeSchema, ((1, 2, None),), {} ,(1, 2), r'.*number of dimensions.*'), + (ShapeSchema, ((1, 2, None),), {}, (1, 2), r'.*number of dimensions.*'), (ShapeSchema, ((1, 4, 4),), {}, (1, 3, 4), r'.*mismatch.*'), (NameSchema, ('foo',), {}, 'bar', r'.*name bar != foo.*'), (ArrayTypeSchema, (np.ndarray,), {}, 'bar', r'.*array_type.*'), # schema_args for ChunksSchema include [chunks, dims, shape] (ChunksSchema, ({'x': 3},), {}, (((2, 2),), ('x',), (4,)), r'.*(3).*'), (ChunksSchema, ({'x': (2, 1)},), {}, (((2, 2),), ('x',), (4,)), r'.*(2, 1).*'), - (ChunksSchema, ({'x': (2, 1)},), {}, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), + ( + ChunksSchema, + ({'x': (2, 1)},), + {}, + (None, ('x',), (4,)), + r'.*expected array to be chunked.*', + ), (ChunksSchema, (True,), {}, (None, ('x',), (4,)), r'.*expected array to be chunked.*'), ( ChunksSchema, @@ -148,7 +154,13 @@ def test_attr_schema(type, value, validate, json): (ChunksSchema, ({'x': -1},), {}, (((1, 2, 1),), ('x',), (4,)), r'.* did not match.*'), (ChunksSchema, ({'x': 2},), {}, (((2, 3, 2),), ('x',), (7,)), r'.* did not match.*'), (ChunksSchema, ({'x': 2},), {}, (((2, 2, 3),), ('x',), (7,)), r'.* did not match.*'), - (ChunksSchema, ({'x': 2, 'y': -1},), {}, (((2, 2), (5, 5)), ('x', 'y'), (4, 10)), r'.*(5).*'), + ( + ChunksSchema, + ({'x': 2, 'y': -1},), + {}, + (((2, 2), (5, 5)), ('x', 'y'), (4, 10)), + r'.*(5).*', + ), ( AttrsSchema, ({'foo': AttrSchema(type=int)},), @@ -194,7 +206,7 @@ def test_attr_schema(type, value, validate, json): ( CoordsSchema, ({'x': DataArraySchema()},), - {"allow_extra_keys": False}, + {'allow_extra_keys': False}, [{'x': xr.DataArray([0, 1]), 'y': xr.DataArray([0, 1])}], r'coords has extra keys.*', ), diff --git a/xarray_schema/components.py b/xarray_schema/components.py index 5bc67a5..30496a5 100644 --- a/xarray_schema/components.py +++ b/xarray_schema/components.py @@ -381,7 +381,7 @@ class AttrsSchema(BaseSchema): 'attrs': {'type': 'object'}, }, 'required': ['attrs'], - 'additionalProperties': False + 'additionalProperties': False, } def __init__( diff --git a/xarray_schema/dataarray.py b/xarray_schema/dataarray.py index aba51ee..20edfe2 100644 --- a/xarray_schema/dataarray.py +++ b/xarray_schema/dataarray.py @@ -278,10 +278,7 @@ class CoordsSchema(BaseSchema): 'properties': { 'require_all_keys': {'type': 'boolean'}, 'allow_extra_keys': {'type': 'boolean'}, - 'coords': { - 'type': 'object', - 'additionalProperties': DataArraySchema._json_schema - }, + 'coords': {'type': 'object', 'additionalProperties': DataArraySchema._json_schema}, }, 'required': ['coords'], 'additionalProperties': False, From 1b4baa03eb50a4248fde7c8c8bbd40d74b313734 Mon Sep 17 00:00:00 2001 From: Callan Gray Date: Mon, 13 Oct 2025 19:20:03 +0800 Subject: [PATCH 5/5] Simplify dataset deserialization --- tests/test_core.py | 2 +- xarray_schema/dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index d4c0d4c..0e90236 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -105,7 +105,7 @@ def test_component_schema(component, schema_args, validate, json): jsonschema.validate(schema.json, schema._json_schema) # json roundtrip - component.from_json(schema.json).json == json + assert component.from_json(schema.json).json == json @pytest.mark.parametrize( diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index 96e2f38..25e9042 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -49,9 +49,9 @@ def from_json(cls, obj: dict): k: DataArraySchema.from_json(v) for k, v in obj['data_vars'].items() } if 'coords' in obj: - kwargs['coords'] = {k: CoordsSchema.from_json(v) for k, v in obj['coords'].items()} + kwargs['coords'] = CoordsSchema.from_json(obj['coords']) if 'attrs' in obj: - kwargs['attrs'] = {k: AttrsSchema.from_json(v) for k, v in obj['attrs'].items()} + kwargs['attrs'] = AttrsSchema.from_json(obj['attrs']) return cls(**kwargs)