From 923064336e5baddfdebaffa240f4bfcf3be65a80 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 30 Jul 2025 18:06:56 +0100 Subject: [PATCH 01/97] CF Validator: add new module to find all current standard names --- cfdm/cfvalidation.py | 61 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 cfdm/cfvalidation.py diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py new file mode 100644 index 000000000..9c3152962 --- /dev/null +++ b/cfdm/cfvalidation.py @@ -0,0 +1,61 @@ +import os +import pprint +import re + +# Prefer using built-in urllib to extract XML from cf-convention.github.io repo +# over the 'github' module to use the GitHub API directly, because it avoids +# the need for another dependency to the CF Data Tools. +from urllib import request + +# TO parse the XML - better than using manual regex parsing! +import xml.etree.ElementTree as ET + +# I.e. the data at repo location: +# 'https://github.com/cf-convention/cf-convention.github.io/blob/main/Data/' +# 'cf-standard-names/current/src/cf-standard-name-table.xml' but use this +# form under 'https://raw.githubusercontent.com' for raw XML content only +STD_NAME_CURRENT_XML_URL = ( + "https://raw.githubusercontent.com/" + "cf-convention/cf-convention.github.io/refs/heads/main/Data/" + "cf-standard-names/current/src/cf-standard-name-table.xml" +) +SAVE_DIR = "snames_cache" + +XML_STD_NAME_TAG_PATTERN = re.compile(r"") + + +def extract_names_from_xml(snames_xml): + """TODO.""" + root = ET.fromstring(snames_xml) + # Want all elements. Note the regex this corresponds + # to, from SLB older code, is 're.compile(r"")' but + # using the ElementTree is a much more robust means to extract! + all_snames = [ + entry.attrib["id"] for entry in root.findall(".//entry") + ] + + return all_snames + + +def get_all_current_standard_names(): + """TODO.""" + ###print ("Retrieving XML from:", STD_NAME_CURRENT_XML_URL) + with request.urlopen(STD_NAME_CURRENT_XML_URL) as response: + all_snames_xml = response.read() + + all_snames = extract_names_from_xml(all_snames_xml) + total = len(all_snames) + + return all_snames, total + + +def main(): + """TODO.""" + names, total = get_all_current_standard_names() + + pprint.pprint(names) + print(f"Done with total of {len(names)} names parsed") + + +if __name__ == "__main__": + main() From 22783a82af9dbf61f0f3c3fbe1cffa49e1d3c79e Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 4 Aug 2025 16:31:49 +0100 Subject: [PATCH 02/97] Tidy CF Validation logic for retrieving standard names --- cfdm/cfvalidation.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 9c3152962..d2274a157 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -1,3 +1,4 @@ +import logging import os import pprint import re @@ -10,6 +11,9 @@ # TO parse the XML - better than using manual regex parsing! import xml.etree.ElementTree as ET +logger = logging.getLogger(__name__) + + # I.e. the data at repo location: # 'https://github.com/cf-convention/cf-convention.github.io/blob/main/Data/' # 'cf-standard-names/current/src/cf-standard-name-table.xml' but use this @@ -19,9 +23,6 @@ "cf-convention/cf-convention.github.io/refs/heads/main/Data/" "cf-standard-names/current/src/cf-standard-name-table.xml" ) -SAVE_DIR = "snames_cache" - -XML_STD_NAME_TAG_PATTERN = re.compile(r"") def extract_names_from_xml(snames_xml): @@ -39,23 +40,15 @@ def extract_names_from_xml(snames_xml): def get_all_current_standard_names(): """TODO.""" - ###print ("Retrieving XML from:", STD_NAME_CURRENT_XML_URL) + logger.info( + "Retrieving XML for set of current standard names from: ", + STD_NAME_CURRENT_XML_URL + ) # pragma: no cover with request.urlopen(STD_NAME_CURRENT_XML_URL) as response: all_snames_xml = response.read() - all_snames = extract_names_from_xml(all_snames_xml) - total = len(all_snames) - - return all_snames, total - - -def main(): - """TODO.""" - names, total = get_all_current_standard_names() - - pprint.pprint(names) - print(f"Done with total of {len(names)} names parsed") - + logger.debug( + f"Successfully retrived set of {len(all_snames_xml)} standard names" + ) # pragma: no cover -if __name__ == "__main__": - main() + return extract_names_from_xml(all_snames_xml) From 0f2cd316ada4c6e59532ad203d3c304b7124355f Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 4 Aug 2025 16:32:50 +0100 Subject: [PATCH 03/97] Add new function in netcdfread to validate standard names --- cfdm/cfvalidation.py | 4 ++-- cfdm/read_write/netcdf/netcdfread.py | 30 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index d2274a157..18643bb2f 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -28,9 +28,9 @@ def extract_names_from_xml(snames_xml): """TODO.""" root = ET.fromstring(snames_xml) - # Want all elements. Note the regex this corresponds + # Want all elements. Note the regex this corresponds # to, from SLB older code, is 're.compile(r"")' but - # using the ElementTree is a much more robust means to extract! + # using the ElementTree is a much more robust means to extract all_snames = [ entry.attrib["id"] for entry in root.findall(".//entry") ] diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index d277be493..724e30584 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -22,6 +22,7 @@ from s3fs import S3FileSystem from uritools import urisplit +from ...cfvalidation import get_all_current_standard_names from ...data.netcdfindexer import netcdf_indexer from ...decorators import _manage_log_level_via_verbosity from ...functions import abspath, is_log_level_debug, is_log_level_detail @@ -150,6 +151,7 @@ class NetCDFRead(IORead): "is not used by data variable": 15, "not in node_coordinates": 16, "is not locatable in the group hierarchy": 17, + "has an invalid standard name": 20, } def cf_datum_parameters(self): @@ -8270,6 +8272,34 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): # elements. General CF compliance is not checked (e.g. whether or # not grid mapping variable has a grid_mapping_name attribute). # ================================================================ + def _check_standard_name( + self, + parent_ncvar, + ncvar, + construct, + ): + """TODO.""" + # TODO cache once so only need to ingest once + valid_snames = get_all_current_standard_names() + + is_ok = True + + # Check if there is a standard_name attr registered, and if so check + # validity of it + for sname in ("standard_name", "computed_standard_name"): + value = self.implementation.get_property(construct, sname, None) + if value is not None and value not in valid_snames: + is_ok = False + + if not is_ok: + self._add_message( + parent_ncvar, + ncvar, + message="has an invalid standard name", + ) + + return is_ok + def _check_bounds( self, parent_ncvar, coord_ncvar, attribute, bounds_ncvar ): From d62f6c08fe0385beeda11c2bce72491ea7f96865 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 4 Aug 2025 17:57:20 +0100 Subject: [PATCH 04/97] Cache retrieval of standard names from XML at URL --- cfdm/cfvalidation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 18643bb2f..97855a765 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -3,6 +3,8 @@ import pprint import re +from functools import lru_cache + # Prefer using built-in urllib to extract XML from cf-convention.github.io repo # over the 'github' module to use the GitHub API directly, because it avoids # the need for another dependency to the CF Data Tools. @@ -38,6 +40,7 @@ def extract_names_from_xml(snames_xml): return all_snames +@lru_cache def get_all_current_standard_names(): """TODO.""" logger.info( From ba926130ef58f386c65a92e1853d89d2de70bc73 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 4 Aug 2025 17:59:00 +0100 Subject: [PATCH 05/97] Add detection of invalid standard names on coordinates upon read --- cfdm/read_write/netcdf/netcdfread.py | 43 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 724e30584..9d867bbed 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -4912,6 +4912,7 @@ def _create_field_or_domain( create_new = True + known_bad_snames = set() if not coordinates: # DCH ALERT # what to do about duplicate standard names? TODO @@ -4927,9 +4928,18 @@ def _create_field_or_domain( ) in self.implementation.get_coordinates( f ).items(): - if n == self.implementation.get_property( + sname_prop = self.implementation.get_property( coord, "standard_name", None - ): + ) + + # Look for invalid standard names but only + # report them as invalid if they haven't + # already been detected + bad_snames = self._check_standard_name( + field_ncvar, n, coord, known_bad_snames) + known_bad_snames.update(bad_snames) + + if n == sname_prop: coordinates.append(key) # Add the datum to already existing vertical @@ -8275,30 +8285,33 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): def _check_standard_name( self, parent_ncvar, - ncvar, - construct, + coord_ncvar, + coord, + known_bad_snames, ): """TODO.""" # TODO cache once so only need to ingest once valid_snames = get_all_current_standard_names() - is_ok = True # Check if there is a standard_name attr registered, and if so check # validity of it - for sname in ("standard_name", "computed_standard_name"): - value = self.implementation.get_property(construct, sname, None) + bad_snames = set() + for prop in ("standard_name", "computed_standard_name"): + value = self.implementation.get_property(coord, prop, None) if value is not None and value not in valid_snames: - is_ok = False + bad_snames.add(value) - if not is_ok: - self._add_message( - parent_ncvar, - ncvar, - message="has an invalid standard name", - ) + for sname in bad_snames: + if sname not in known_bad_snames: + logger.warning(f"Detected invalid standard name: {sname}") + self._add_message( + parent_ncvar, + coord_ncvar, + message="has an invalid standard name", + ) - return is_ok + return bad_snames def _check_bounds( self, parent_ncvar, coord_ncvar, attribute, bounds_ncvar From ccdd8fc467d4ecbb8af6c1a20ce1057671df3232 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 5 Aug 2025 15:41:54 +0100 Subject: [PATCH 06/97] Update standard names check for distinct conformance doc steps --- cfdm/cfvalidation.py | 15 +++++---- cfdm/read_write/netcdf/netcdfread.py | 46 +++++++++++++--------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 97855a765..836bf16b3 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -10,16 +10,19 @@ # the need for another dependency to the CF Data Tools. from urllib import request -# TO parse the XML - better than using manual regex parsing! +# To parse the XML - better than using manual regex parsing! import xml.etree.ElementTree as ET logger = logging.getLogger(__name__) -# I.e. the data at repo location: -# 'https://github.com/cf-convention/cf-convention.github.io/blob/main/Data/' +# This is the data at the repo location: +# 'github.com/cf-convention/cf-convention.github.io/blob/main/Data/' # 'cf-standard-names/current/src/cf-standard-name-table.xml' but use this -# form under 'https://raw.githubusercontent.com' for raw XML content only +# form under 'https://raw.githubusercontent.com/' for raw XML content only. +# Note: the raw XML s also made available at: +# 'cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-' +# 'table.xml', is that a better location to grab from (may be more stable)? STD_NAME_CURRENT_XML_URL = ( "https://raw.githubusercontent.com/" "cf-convention/cf-convention.github.io/refs/heads/main/Data/" @@ -33,11 +36,11 @@ def extract_names_from_xml(snames_xml): # Want all elements. Note the regex this corresponds # to, from SLB older code, is 're.compile(r"")' but # using the ElementTree is a much more robust means to extract - all_snames = [ + all_standard_names = [ entry.attrib["id"] for entry in root.findall(".//entry") ] - return all_snames + return all_standard_names @lru_cache diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 9d867bbed..8e2b66b6d 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -4928,18 +4928,9 @@ def _create_field_or_domain( ) in self.implementation.get_coordinates( f ).items(): - sname_prop = self.implementation.get_property( + if n == self.implementation.get_property( coord, "standard_name", None - ) - - # Look for invalid standard names but only - # report them as invalid if they haven't - # already been detected - bad_snames = self._check_standard_name( - field_ncvar, n, coord, known_bad_snames) - known_bad_snames.update(bad_snames) - - if n == sname_prop: + ): coordinates.append(key) # Add the datum to already existing vertical @@ -5143,6 +5134,9 @@ def _create_field_or_domain( # ------------------------------------------------------------- self._set_quantization(f, field_ncvar) + # ------------------------------------------------------------- + # Compliance reporting + # ------------------------------------------------------------- # Add the structural read report to the field/domain dataset_compliance = g["dataset_compliance"][field_ncvar] components = dataset_compliance["non-compliance"] @@ -8287,31 +8281,33 @@ def _check_standard_name( parent_ncvar, coord_ncvar, coord, - known_bad_snames, ): """TODO.""" - # TODO cache once so only need to ingest once - valid_snames = get_all_current_standard_names() + invalid_names = [] + for sn_attr in ("standard_name", "computed_standard_name"): + # 1. Check if there is a (computed_)standard_name property + sn_value = self.implementation.get_property(coord, sn_attr, None) + # 2. Check, if requested, if is a string + # TODO, return early if so, to avoid getting list - # Check if there is a standard_name attr registered, and if so check - # validity of it - bad_snames = set() - for prop in ("standard_name", "computed_standard_name"): - value = self.implementation.get_property(coord, prop, None) - if value is not None and value not in valid_snames: - bad_snames.add(value) + # 3. Check, if requested, if string is in the list of valid names + valid_names = get_all_current_standard_names() - for sname in bad_snames: - if sname not in known_bad_snames: - logger.warning(f"Detected invalid standard name: {sname}") + if sn_value is not None and sn_value not in valid_names: + invalid_names.append(sn_value) + logger.warning( + f"Detected invalid standard name: '{sn_attr}' of " + f"'{sn_value}' for {coord_ncvar}" + ) self._add_message( parent_ncvar, coord_ncvar, message="has an invalid standard name", + attribute=sn_attr, ) - return bad_snames + return not invalid_names def _check_bounds( self, parent_ncvar, coord_ncvar, attribute, bounds_ncvar From 634277e08bea7f1362e1360177fb0edda1d877a0 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 5 Aug 2025 17:49:54 +0100 Subject: [PATCH 07/97] Update netcdfread to validate standard names on bounds --- cfdm/cfvalidation.py | 2 +- cfdm/read_write/netcdf/netcdfread.py | 40 ++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 836bf16b3..98d64d70d 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -20,7 +20,7 @@ # 'github.com/cf-convention/cf-convention.github.io/blob/main/Data/' # 'cf-standard-names/current/src/cf-standard-name-table.xml' but use this # form under 'https://raw.githubusercontent.com/' for raw XML content only. -# Note: the raw XML s also made available at: +# Note: the raw XML is also made available at: # 'cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-' # 'table.xml', is that a better location to grab from (may be more stable)? STD_NAME_CURRENT_XML_URL = ( diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 8e2b66b6d..49d1a03f0 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8277,19 +8277,31 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): # not grid mapping variable has a grid_mapping_name attribute). # ================================================================ def _check_standard_name( - self, - parent_ncvar, - coord_ncvar, - coord, - ): + self, parent_ncvar, coord_ncvar, coord_ncvar_attrs): """TODO.""" + # TODO downgrade status to info/debug + logger.warning("Ran _check_standard_name()") + invalid_names = [] for sn_attr in ("standard_name", "computed_standard_name"): # 1. Check if there is a (computed_)standard_name property - sn_value = self.implementation.get_property(coord, sn_attr, None) + sn_value = coord_ncvar_attrs.get(sn_attr) + logger.warning(f"%%%%%% Got sn_value of {sn_value}") # 2. Check, if requested, if is a string - # TODO, return early if so, to avoid getting list + # TODO this is not robust check (may have numpy string type) + # but good enough for now whilts developing + if not isinstance(sn_value, str): + self._add_message( + parent_ncvar, + coord_ncvar, + message=( + f"has a {sn_attr} attribute value that is not a " + "string" + ), + attribute=sn_attr, + conformance="3.3.requirement.1", + ) # 3. Check, if requested, if string is in the list of valid names valid_names = get_all_current_standard_names() @@ -8303,8 +8315,13 @@ def _check_standard_name( self._add_message( parent_ncvar, coord_ncvar, - message="has an invalid standard name", + message=( + f"has a {sn_attr} attribute value that is not " + "a valid name contained in the current standard name " + "table" + ), attribute=sn_attr, + conformance="3.3.requirement.2", ) return not invalid_names @@ -8356,6 +8373,13 @@ def _check_bounds( g = self.read_vars + bounds_ncvar_attrs = g["variable_attributes"][bounds_ncvar] + self._check_standard_name( + parent_ncvar, + bounds_ncvar, + bounds_ncvar_attrs, + ) + if bounds_ncvar not in g["internal_variables"]: bounds_ncvar, message = self._missing_variable( bounds_ncvar, variable_type From 6552be96041138ce0c658a2506473805b1669293 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 5 Aug 2025 18:01:34 +0100 Subject: [PATCH 08/97] Update & document return status for _check_standard_name --- cfdm/read_write/netcdf/netcdfread.py | 35 ++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 49d1a03f0..a4310a90d 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8277,21 +8277,43 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): # not grid mapping variable has a grid_mapping_name attribute). # ================================================================ def _check_standard_name( - self, parent_ncvar, coord_ncvar, coord_ncvar_attrs): - """TODO.""" + self, parent_ncvar, coord_ncvar, coord_ncvar_attrs, + check_is_string=True, check_is_in_table=True, + check_is_in_custom_list=False, + ): + """TODO. + + Return values signfiy status: + + 1. None means there was no (computed_)standard_name for the + given variable. + 2. True means standard name(s) are registered and all valid. + 3. False means standard name(s) are registered and at least one + is invalid. + + """ # TODO downgrade status to info/debug logger.warning("Ran _check_standard_name()") invalid_names = [] + any_sn_found = False + invalid_sn_found = False for sn_attr in ("standard_name", "computed_standard_name"): # 1. Check if there is a (computed_)standard_name property sn_value = coord_ncvar_attrs.get(sn_attr) + # TODO downgrade status to info/debug logger.warning(f"%%%%%% Got sn_value of {sn_value}") + if not sn_value: + continue + + any_sn_found = True + # 2. Check, if requested, if is a string # TODO this is not robust check (may have numpy string type) # but good enough for now whilts developing if not isinstance(sn_value, str): + invalid_sn_found = True self._add_message( parent_ncvar, coord_ncvar, @@ -8307,7 +8329,7 @@ def _check_standard_name( valid_names = get_all_current_standard_names() if sn_value is not None and sn_value not in valid_names: - invalid_names.append(sn_value) + invalid_sn_found = True logger.warning( f"Detected invalid standard name: '{sn_attr}' of " f"'{sn_value}' for {coord_ncvar}" @@ -8324,7 +8346,12 @@ def _check_standard_name( conformance="3.3.requirement.2", ) - return not invalid_names + if not any_sn_found: # no (computed_)standard_name found + return + elif invalid_sn_found: # found at least one invalid standard name + return False + else: # found at least one and all are valid standard names + return True def _check_bounds( self, parent_ncvar, coord_ncvar, attribute, bounds_ncvar From ba30330d93bf3b67769ef0989bdff7ccb5fbcd49 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 5 Aug 2025 18:06:50 +0100 Subject: [PATCH 09/97] Pluralise function name to _check_standard_names --- cfdm/read_write/netcdf/netcdfread.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index a4310a90d..47f704833 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8276,7 +8276,7 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): # elements. General CF compliance is not checked (e.g. whether or # not grid mapping variable has a grid_mapping_name attribute). # ================================================================ - def _check_standard_name( + def _check_standard_names( self, parent_ncvar, coord_ncvar, coord_ncvar_attrs, check_is_string=True, check_is_in_table=True, check_is_in_custom_list=False, @@ -8293,7 +8293,7 @@ def _check_standard_name( """ # TODO downgrade status to info/debug - logger.warning("Ran _check_standard_name()") + logger.warning("Ran _check_standard_names()") invalid_names = [] any_sn_found = False @@ -8312,7 +8312,7 @@ def _check_standard_name( # 2. Check, if requested, if is a string # TODO this is not robust check (may have numpy string type) # but good enough for now whilts developing - if not isinstance(sn_value, str): + if check_is_string and not isinstance(sn_value, str): invalid_sn_found = True self._add_message( parent_ncvar, @@ -8326,9 +8326,10 @@ def _check_standard_name( ) # 3. Check, if requested, if string is in the list of valid names - valid_names = get_all_current_standard_names() - - if sn_value is not None and sn_value not in valid_names: + elif ( + check_is_in_table and sn_value not in + get_all_current_standard_names() + ): invalid_sn_found = True logger.warning( f"Detected invalid standard name: '{sn_attr}' of " @@ -8346,6 +8347,9 @@ def _check_standard_name( conformance="3.3.requirement.2", ) + # TODO implement check_is_in_custom_list for custom list check, + # if so ignore table check for efficiency + if not any_sn_found: # no (computed_)standard_name found return elif invalid_sn_found: # found at least one invalid standard name @@ -8401,7 +8405,7 @@ def _check_bounds( g = self.read_vars bounds_ncvar_attrs = g["variable_attributes"][bounds_ncvar] - self._check_standard_name( + self._check_standard_names( parent_ncvar, bounds_ncvar, bounds_ncvar_attrs, From ce7b9d12fe2a89928c959a4b8daf46c91e911205 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 6 Aug 2025 11:25:37 +0100 Subject: [PATCH 10/97] Prevent redundant argument specification in _check_standard_names --- cfdm/read_write/netcdf/netcdfread.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 47f704833..a8e74d011 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8283,6 +8283,11 @@ def _check_standard_names( ): """TODO. + TODO rough notes, tidy for final docstring... + + Validity depends on the check_X keywords selected which + determine which checks to run vs skip. + Return values signfiy status: 1. None means there was no (computed_)standard_name for the @@ -8293,7 +8298,15 @@ def _check_standard_names( """ # TODO downgrade status to info/debug - logger.warning("Ran _check_standard_names()") + logger.warning("Running _check_standard_names()") + + if check_is_in_custom_list and check_is_in_table: + raise ValueError( + "Can't set both 'check_is_in_custom_list' and " + "'check_is_in_table'. The former is expected " + "to check a subset of the full table hence renders the " + "latter redundant - set it to False with a custom list." + ) invalid_names = [] any_sn_found = False @@ -8325,7 +8338,12 @@ def _check_standard_names( conformance="3.3.requirement.1", ) - # 3. Check, if requested, if string is in the list of valid names + # 3. TODO implement check_is_in_custom_list for custom list check. + # noting that the custom list must contain only valid standard + # names appropriate to the context, else it defeats the point! + + + # 4. Check, if requested, if string is in the list of valid names elif ( check_is_in_table and sn_value not in get_all_current_standard_names() @@ -8347,9 +8365,6 @@ def _check_standard_names( conformance="3.3.requirement.2", ) - # TODO implement check_is_in_custom_list for custom list check, - # if so ignore table check for efficiency - if not any_sn_found: # no (computed_)standard_name found return elif invalid_sn_found: # found at least one invalid standard name From 7ecdd04ab28c13f4414909bd6a2ebb4f5f3d4e26 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 6 Aug 2025 15:53:46 +0100 Subject: [PATCH 11/97] Validation for standard names on geometry node coords --- cfdm/read_write/netcdf/netcdfread.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index a8e74d011..ae50b3ac2 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8341,6 +8341,21 @@ def _check_standard_names( # 3. TODO implement check_is_in_custom_list for custom list check. # noting that the custom list must contain only valid standard # names appropriate to the context, else it defeats the point! + elif ( + check_is_in_custom_list and sn_value not in + check_is_in_custom_list + ): + invalid_sn_found = True + self._add_message( + parent_ncvar, + coord_ncvar, + message=( + f"has a {sn_attr} attribute value that is not " + "a valid current standard name appropriate to " + "the context of the variable in question" + ), + attribute=sn_attr, + ) # 4. Check, if requested, if string is in the list of valid names @@ -8365,6 +8380,7 @@ def _check_standard_names( conformance="3.3.requirement.2", ) + # Three possible return signatures to cover existence and validity: if not any_sn_found: # no (computed_)standard_name found return elif invalid_sn_found: # found at least one invalid standard name @@ -8495,6 +8511,13 @@ def _check_geometry_node_coordinates( geometry_ncvar = g["variable_geometry"].get(field_ncvar) + geometry_ncvar_attrs = g["variable_attributes"][geometry_ncvar] + self._check_standard_names( + node_ncvar, + geometry_ncvar, + geometry_ncvar_attrs, + ) + attribute = { field_ncvar + ":" From d4a6ddee876cc061a5aa2542b849ab20b37e3ba3 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 6 Aug 2025 16:04:53 +0100 Subject: [PATCH 12/97] Validation for standard names: geometry attrs & ancil vars --- cfdm/read_write/netcdf/netcdfread.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index ae50b3ac2..20d495aed 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8624,6 +8624,13 @@ def _check_cell_measures(self, field_ncvar, string, parsed_string): ncvar = values[0] + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + field_ncvar, + ncvar, + ncvar_attrs, + ) + unknown_external = ncvar in external_variables # Check that the variable exists in the file, or if not @@ -8699,6 +8706,13 @@ def _check_geometry_attribute(self, parent_ncvar, string, parsed_string): return False for ncvar in parsed_string: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + parent_ncvar, + ncvar, + ncvar_attrs, + ) + # Check that the geometry variable exists in the file if ncvar not in g["variables"]: ncvar, message = self._missing_variable( @@ -8767,6 +8781,13 @@ def _check_ancillary_variables(self, field_ncvar, string, parsed_string): ok = True for ncvar in parsed_string: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + field_ncvar, + ncvar, + ncvar_attrs, + ) + # Check that the variable exists in the file if ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( From ae3aa48397b24da1747055b2eda77f7d16af2cbe Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 6 Aug 2025 17:33:42 +0100 Subject: [PATCH 13/97] Improve compliance check messages for _check_standard_names --- cfdm/read_write/netcdf/netcdfread.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 20d495aed..24a740c0f 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -126,6 +126,8 @@ class NetCDFRead(IORead): "instance_dimension attribute": 311, "Count dimension": 320, "count_dimension attribute": 321, + "standard_name attribute": 400, + "computed_standard_name attribute": 401, } _code1 = { @@ -151,7 +153,15 @@ class NetCDFRead(IORead): "is not used by data variable": 15, "not in node_coordinates": 16, "is not locatable in the group hierarchy": 17, - "has an invalid standard name": 20, + "has a value that is not a string": 20, + ( + "has a value that is not appropriate to " + "the context of the variable in question" + ): 21, + ( + "has a value that is not a valid name contained " + "in the current standard name table" + ): 22, } def cf_datum_parameters(self): @@ -8331,8 +8341,8 @@ def _check_standard_names( parent_ncvar, coord_ncvar, message=( - f"has a {sn_attr} attribute value that is not a " - "string" + f"{sn_attr} attribute", + f"has a value that is not a string", ), attribute=sn_attr, conformance="3.3.requirement.1", @@ -8350,9 +8360,9 @@ def _check_standard_names( parent_ncvar, coord_ncvar, message=( - f"has a {sn_attr} attribute value that is not " - "a valid current standard name appropriate to " - "the context of the variable in question" + f"{sn_attr} attribute", + f"has a value that is not appropriate to " + "the context of the variable in question", ), attribute=sn_attr, ) @@ -8372,9 +8382,9 @@ def _check_standard_names( parent_ncvar, coord_ncvar, message=( - f"has a {sn_attr} attribute value that is not " - "a valid name contained in the current standard name " - "table" + f"{sn_attr} attribute", + f"has a value that is not a valid name contained " + "in the current standard name table", ), attribute=sn_attr, conformance="3.3.requirement.2", From 42f83e3891685d596211d6b4ff2c3b2812ae562d Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 6 Aug 2025 17:53:44 +0100 Subject: [PATCH 14/97] Validation for standard names: aux, tie point & scalar coords --- cfdm/read_write/netcdf/netcdfread.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 24a740c0f..f0b690f81 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8851,6 +8851,13 @@ def _check_auxiliary_or_scalar_coordinate( g = self.read_vars + coord_ncvar_attrs = g["variable_attributes"][coord_ncvar] + self._check_standard_names( + parent_ncvar, + coord_ncvar, + coord_ncvar_attrs, + ) + if coord_ncvar not in g["internal_variables"]: coord_ncvar, message = self._missing_variable( coord_ncvar, "Auxiliary/scalar coordinate variable" @@ -8918,6 +8925,13 @@ def _check_tie_point_coordinates( g = self.read_vars + tie_point_ncvar_attrs = g["variable_attributes"][tie_point_ncvar] + self._check_standard_names( + parent_ncvar, + tie_point_ncvar, + tie_point_ncvar_attrs, + ) + if tie_point_ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( tie_point_ncvar, "Tie point coordinate variable" From b897fa80fe6cfdc98ea2a06ca1013646a5b6ca59 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 6 Aug 2025 18:08:07 +0100 Subject: [PATCH 15/97] Validation for standard names: node coordinates --- cfdm/read_write/netcdf/netcdfread.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index f0b690f81..1279404e3 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -9008,6 +9008,9 @@ def _check_grid_mapping( g = self.read_vars + # Note: we don't call _check_standard_names for the grid mapping + # check because in this case the standard_name is not standardised + if not parsed_grid_mapping: self._add_message( parent_ncvar, @@ -9114,6 +9117,13 @@ def _check_node_coordinates( g = self.read_vars + geometry_ncvar_attrs = g["variable_attributes"][geometry_ncvar] + self._check_standard_names( + field_ncvar, + geometry_ncvar, + geometry_ncvar_attrs, + ) + incorrectly_formatted = ( "node_coordinates attribute", "is incorrectly formatted", From dc38eea608946c3b519ab57e4b007d83908c5dcb Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 7 Aug 2025 12:04:42 +0100 Subject: [PATCH 16/97] Update comments in netcdfread RE compliance checking --- cfdm/read_write/netcdf/netcdfread.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 1279404e3..c543e8349 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8283,8 +8283,14 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): # # These methods (whose names all start with "_check") check the # minimum required for mapping the file to CFDM structural - # elements. General CF compliance is not checked (e.g. whether or - # not grid mapping variable has a grid_mapping_name attribute). + # elements. + # + # General CF compliance is not checked (e.g. whether or + # not grid mapping variable has a grid_mapping_name attribute) + # except for the case of (so far): + # * whether (computed_)standard_name values are valid according + # to specified criteria under Section 3.3. of the Conformance + # document. # ================================================================ def _check_standard_names( self, parent_ncvar, coord_ncvar, coord_ncvar_attrs, From e007f1fe0b7288f5fb3bce5e8ef98bc8fc6f7de4 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 7 Aug 2025 12:19:06 +0100 Subject: [PATCH 17/97] Validation for standard names: geometry-related variables --- cfdm/read_write/netcdf/netcdfread.py | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index c543e8349..73b8b6076 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -9123,6 +9123,9 @@ def _check_node_coordinates( g = self.read_vars + # TODO is this necessary for the geometry_ncvar too? Note could + # call this in one of many methods directly below instead, so where + # is best to place it if needed? Investigate. geometry_ncvar_attrs = g["variable_attributes"][geometry_ncvar] self._check_standard_names( field_ncvar, @@ -9161,6 +9164,13 @@ def _check_node_coordinates( # Check that the node coordinate variable exists in the # file if ncvar not in g["internal_variables"]: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + field_ncvar, + ncvar, + ncvar_attrs, + ) + ncvar, message = self._missing_variable( ncvar, "Node coordinate variable" ) @@ -9199,6 +9209,13 @@ def _check_node_count( ok = True for ncvar in parsed_node_count: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + field_ncvar, + ncvar, + ncvar_attrs, + ) + # Check that the node count variable exists in the file if ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( @@ -9243,6 +9260,13 @@ def _check_part_node_count( ok = True for ncvar in parsed_part_node_count: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + field_ncvar, + ncvar, + ncvar_attrs, + ) + # Check that the variable exists in the file if ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( @@ -9298,6 +9322,13 @@ def _check_interior_ring( return False for ncvar in parsed_interior_ring: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + field_ncvar, + ncvar, + ncvar_attrs, + ) + # Check that the variable exists in the file if ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( From bc8d655019b8900186b71fd7aafd239e40f5174b Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 7 Aug 2025 12:28:44 +0100 Subject: [PATCH 18/97] Generalise variable names in _check_standard_names --- cfdm/read_write/netcdf/netcdfread.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 73b8b6076..e35bb9579 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8293,7 +8293,7 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): # document. # ================================================================ def _check_standard_names( - self, parent_ncvar, coord_ncvar, coord_ncvar_attrs, + self, parent_ncvar, ncvar, ncvar_attrs, check_is_string=True, check_is_in_table=True, check_is_in_custom_list=False, ): @@ -8314,7 +8314,7 @@ def _check_standard_names( """ # TODO downgrade status to info/debug - logger.warning("Running _check_standard_names()") + logger.warning(f"Running _check_standard_names() for: {ncvar}") if check_is_in_custom_list and check_is_in_table: raise ValueError( @@ -8329,7 +8329,7 @@ def _check_standard_names( invalid_sn_found = False for sn_attr in ("standard_name", "computed_standard_name"): # 1. Check if there is a (computed_)standard_name property - sn_value = coord_ncvar_attrs.get(sn_attr) + sn_value = ncvar_attrs.get(sn_attr) # TODO downgrade status to info/debug logger.warning(f"%%%%%% Got sn_value of {sn_value}") @@ -8345,7 +8345,7 @@ def _check_standard_names( invalid_sn_found = True self._add_message( parent_ncvar, - coord_ncvar, + ncvar, message=( f"{sn_attr} attribute", f"has a value that is not a string", @@ -8364,7 +8364,7 @@ def _check_standard_names( invalid_sn_found = True self._add_message( parent_ncvar, - coord_ncvar, + ncvar, message=( f"{sn_attr} attribute", f"has a value that is not appropriate to " @@ -8382,11 +8382,11 @@ def _check_standard_names( invalid_sn_found = True logger.warning( f"Detected invalid standard name: '{sn_attr}' of " - f"'{sn_value}' for {coord_ncvar}" + f"'{sn_value}' for {ncvar}" ) self._add_message( parent_ncvar, - coord_ncvar, + ncvar, message=( f"{sn_attr} attribute", f"has a value that is not a valid name contained " @@ -9442,8 +9442,16 @@ def _check_coordinate_interpolation( ok = True for interp_ncvar, coords in parsed_coordinate_interpolation.items(): - # Check that the interpolation variable exists in the file - if interp_ncvar not in g["internal_variables"]: + # Check that the interpolation variable exists in the file and + # if it does check standard names, if not register issue + if interp_ncvar in g["internal_variables"]: + interp_ncvar_attrs = g["variable_attributes"][interp_ncvar] + self._check_standard_names( + parent_ncvar, + interp_ncvar, + interp_ncvar_attrs, + ) + else: ncvar, message = self._missing_variable( interp_ncvar, "Interpolation variable" ) From fc77de5276c11fe70a2e55b83ae5b63354e4fe40 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 7 Aug 2025 19:25:36 +0100 Subject: [PATCH 19/97] Validation for standard names: for coordinate interpolation --- cfdm/read_write/netcdf/netcdfread.py | 44 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index e35bb9579..4b05f6683 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8640,12 +8640,15 @@ def _check_cell_measures(self, field_ncvar, string, parsed_string): ncvar = values[0] - ncvar_attrs = g["variable_attributes"][ncvar] - self._check_standard_names( - field_ncvar, - ncvar, - ncvar_attrs, - ) + # TODO SLB: may be None here hence get() and conditional. Do + # we need to do this for each use of _check_standard_names? + ncvar_attrs = g["variable_attributes"].get(ncvar) + if ncvar_attrs: + self._check_standard_names( + field_ncvar, + ncvar, + ncvar_attrs, + ) unknown_external = ncvar in external_variables @@ -9442,16 +9445,15 @@ def _check_coordinate_interpolation( ok = True for interp_ncvar, coords in parsed_coordinate_interpolation.items(): - # Check that the interpolation variable exists in the file and - # if it does check standard names, if not register issue - if interp_ncvar in g["internal_variables"]: - interp_ncvar_attrs = g["variable_attributes"][interp_ncvar] - self._check_standard_names( - parent_ncvar, - interp_ncvar, - interp_ncvar_attrs, - ) - else: + interp_ncvar_attrs = g["variable_attributes"][interp_ncvar] + self._check_standard_names( + parent_ncvar, + interp_ncvar, + interp_ncvar_attrs, + ) + + # Check that the interpolation variable exists in the file + if interp_ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( interp_ncvar, "Interpolation variable" ) @@ -9476,6 +9478,16 @@ def _check_coordinate_interpolation( # Check that the tie point coordinate variables exist in # the file for tie_point_ncvar in coords: + # TODO: is this necessary or is it covered by the interp_ncvar + # standard name check already? + tie_point_interp_ncvar_attrs = g[ + "variable_attributes"][tie_point_ncvar] + self._check_standard_names( + parent_ncvar, + tie_point_ncvar, + tie_point_interp_ncvar_attrs, + ) + if tie_point_ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( tie_point_ncvar, "Tie point coordinate variable" From 4ef5a5ea086bbab31fa0e39d288ae328a8b48511 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 8 Aug 2025 13:30:42 +0100 Subject: [PATCH 20/97] Clarify _check_standard_names dict querying for external variables --- cfdm/read_write/netcdf/netcdfread.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 4b05f6683..bd7a3c265 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8640,8 +8640,13 @@ def _check_cell_measures(self, field_ncvar, string, parsed_string): ncvar = values[0] - # TODO SLB: may be None here hence get() and conditional. Do - # we need to do this for each use of _check_standard_names? + # For external variables, the variable will not be in covered + # in read_vars["variable_attributes"], so in this case we + # can't rely on the ncvar key being present, hence get(). + # Note that at present this is an outlier since only cell + # measures can be external (but consult + # https://cfconventions.org/cf-conventions/ + # cf-conventions.html#external-variables in case this changes). ncvar_attrs = g["variable_attributes"].get(ncvar) if ncvar_attrs: self._check_standard_names( From 9cfb487e7e43908722c3abdf0b9e699287b77378 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 8 Aug 2025 14:04:26 +0100 Subject: [PATCH 21/97] Validation for standard names: for quantization container vars --- cfdm/read_write/netcdf/netcdfread.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index bd7a3c265..2c9a7f707 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -9533,6 +9533,13 @@ def _check_quantization(self, parent_ncvar, ncvar): ok = True + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + parent_ncvar, + ncvar, + ncvar_attrs, + ) + # Check that the quantization variable exists in the file if ncvar not in g["internal_variables"]: ncvar, message = self._missing_variable( From 8f84e72e5c14500dc02c05242d77abac00ee5b94 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 8 Aug 2025 15:04:26 +0100 Subject: [PATCH 22/97] Validation for standard names: for UGRID mesh topology vars --- cfdm/read_write/netcdf/netcdfread.py | 47 ++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 2c9a7f707..c57a3699d 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -10532,6 +10532,13 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): ok = True + mesh_ncvar_attrs = g["variable_attributes"][mesh_ncvar] + self._check_standard_names( + mesh_ncvar, + mesh_ncvar, + mesh_ncvar_attrs, + ) + if mesh_ncvar not in g["internal_variables"]: mesh_ncvar, message = self._missing_variable( mesh_ncvar, "Mesh topology variable" @@ -10599,6 +10606,13 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): ) ok = False else: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + mesh_ncvar, + ncvar, + ncvar_attrs, + ) + dims = [] ncdims = self._ncdimensions(ncvar) if len(ncdims) != 1: @@ -10639,6 +10653,7 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): ok = False elif topology_dimension == 2: ncvar = attributes.get("face_node_connectivity") + if ncvar is None: self._add_message( mesh_ncvar, @@ -10658,8 +10673,17 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): attribute={f"{mesh_ncvar}:face_node_connectivity": ncvar}, ) ok = False + else: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + mesh_ncvar, + ncvar, + ncvar_attrs, + ) + elif topology_dimension == 1: ncvar = attributes.get("edge_node_connectivity") + if ncvar is None: self._add_message( mesh_ncvar, @@ -10679,8 +10703,17 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): attribute={f"{mesh_ncvar}:edge_node_connectivity": ncvar}, ) ok = False + else: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + mesh_ncvar, + ncvar, + ncvar_attrs, + ) + elif topology_dimension == 3: ncvar = attributes.get("volume_node_connectivity") + if ncvar is None: self._add_message( mesh_ncvar, @@ -10705,6 +10738,13 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): }, ) ok = False + else: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + mesh_ncvar, + ncvar, + ncvar_attrs, + ) ncvar = attributes.get("volume_shape_type") if ncvar is None: @@ -10714,6 +10754,13 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): message=("volume_shape_type attribute", "is missing"), ) ok = False + else: + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + mesh_ncvar, + ncvar, + ncvar_attrs, + ) else: self._add_message( mesh_ncvar, From 0cf5b40aac940484cf474741e0302d72f7c9bbf9 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 8 Aug 2025 15:16:06 +0100 Subject: [PATCH 23/97] Validation for standard names: for UGRID location index set --- cfdm/read_write/netcdf/netcdfread.py | 34 ++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index c57a3699d..573374fc8 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -10808,8 +10808,14 @@ def _ugrid_check_location_index_set( location_index_set_ncvar, message=message, ) - ok = False - return ok + return False + else: + ncvar_attrs = g["variable_attributes"][location_index_set_ncvar] + self._check_standard_names( + location_index_set_ncvar, + location_index_set_ncvar, + ncvar_attrs, + ) location_index_set_attributes = g["variable_attributes"][ location_index_set_ncvar @@ -10860,6 +10866,13 @@ def _ugrid_check_location_index_set( attribute={f"{location_index_set_ncvar}:mesh": mesh_ncvar}, ) ok = False + else: + mesh_ncvar_attrs = g["variable_attributes"][mesh_ncvar] + self._check_standard_names( + location_index_set_ncvar, + mesh_ncvar, + mesh_ncvar_attrs, + ) return ok @@ -10916,8 +10929,14 @@ def _ugrid_check_field_location_index_set( f"{parent_ncvar}:location_index_set": location_index_set_ncvar }, ) - ok = False - return ok + return False + else: + ncvar_attrs = g["variable_attributes"][location_index_set_ncvar] + self._check_standard_names( + parent_ncvar, + location_index_set_ncvar, + ncvar_attrs, + ) location_index_set_attributes = g["variable_attributes"][ location_index_set_ncvar @@ -10968,6 +10987,13 @@ def _ugrid_check_field_location_index_set( attribute={f"{location_index_set_ncvar}:mesh": mesh_ncvar}, ) ok = False + else: + mesh_ncvar_attrs = g["variable_attributes"][mesh_ncvar] + self._check_standard_names( + parent_ncvar, + mesh_ncvar, + mesh_ncvar_attrs, + ) parent_ncdims = self._ncdimensions(parent_ncvar) lis_ncdims = self._ncdimensions(location_index_set_ncvar) From 0939181fe65a42f3046e03a001bcf37a4ce21b4b Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 8 Aug 2025 15:19:26 +0100 Subject: [PATCH 24/97] netcdfread: fix to ref appropriate var in existing message --- cfdm/read_write/netcdf/netcdfread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 573374fc8..d05e96690 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -10982,7 +10982,7 @@ def _ugrid_check_field_location_index_set( elif mesh_ncvar not in g["mesh"]: self._add_message( parent_ncvar, - location_index_set_ncvar, + mesh_ncvar, message=("Mesh attribute", "is not a mesh topology variable"), attribute={f"{location_index_set_ncvar}:mesh": mesh_ncvar}, ) From 71e8a5430fac924a567054482ad5d9e4d6846fae Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 8 Aug 2025 15:41:40 +0100 Subject: [PATCH 25/97] Validation for standard names: for UGRID mesh & connectivity --- cfdm/read_write/netcdf/netcdfread.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index d05e96690..5da2c96ab 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -11061,6 +11061,13 @@ def _ugrid_check_field_mesh( attribute={f"{parent_ncvar}:mesh": mesh_ncvar}, ) return False + else: + mesh_ncvar_attrs = g["variable_attributes"][mesh_ncvar] + self._check_standard_names( + parent_ncvar, + mesh_ncvar, + mesh_ncvar_attrs, + ) location = parent_attributes.get("location") if location is None: @@ -11135,10 +11142,8 @@ def _ugrid_check_connectivity_variable( message=(f"{connectivity_attr} attribute", "is missing"), variable=mesh_ncvar, ) - ok = False - return ok - - if connectivity_ncvar not in g["internal_variables"]: + return False + elif connectivity_ncvar not in g["internal_variables"]: connectivity_ncvar, message = self._missing_variable( connectivity_ncvar, f"{connectivity_attr} variable" ) @@ -11151,8 +11156,14 @@ def _ugrid_check_connectivity_variable( }, variable=mesh_ncvar, ) - ok = False - return ok + return False + else: + ncvar_attrs = g["variable_attributes"][connectivity_ncvar] + self._check_standard_names( + parent_ncvar, + connectivity_ncvar, + ncvar_attrs, + ) parent_ncdims = self._ncdimensions(parent_ncvar) connectivity_ncdims = self._ncdimensions(connectivity_ncvar)[0] From 95791c55f63366715b077f7bf21356101daf6764 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 8 Aug 2025 17:30:47 +0100 Subject: [PATCH 26/97] Flesh out formal docstring for _check_standard_names --- cfdm/read_write/netcdf/netcdfread.py | 69 ++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 5da2c96ab..843f6dc64 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8297,20 +8297,69 @@ def _check_standard_names( check_is_string=True, check_is_in_table=True, check_is_in_custom_list=False, ): - """TODO. + """Check the `(computed_)standard_name` attribute for validity. + + Checks performed depend on the `check_*` input flags to enable + or disable given checks and can include a type check and a + check as to whether the name is contained in the current + version of the CF Conventions standard name table, else some + custom list which is expected to be a small subset of names + from the table. + + These checks are in the context of the variable and + parent variable. + + .. versionadded:: NEXTVERSION + + :Parameters: - TODO rough notes, tidy for final docstring... + parent_ncvar: `str` + The netCDF variable name of the parent variable. - Validity depends on the check_X keywords selected which - determine which checks to run vs skip. + ncvar: `str` + The name of the netCDF variable to perform the + standard names check upon. + + ncvar_attrs: `str` + The variable attributes for the netCDF variable, as + stored in the 'read_vars' dictionary under the + 'variable_attributes' key. + + check_is_string: `bool` + Whether or not to check if the type of the attribute + value is a string type. By default this is checked. + + check_is_in_table: `bool` + Whether or not to check if the attribute value is + identical to one of the names contained in the + current version of the CF Conventions standard name + table (as processed from the canonical XML). By + default this is checked. + + check_is_in_custom_list: `list` + Whether or not to check if the attribute value is + identical to one of the names contained in a list + of custom values specified. Set to `False` to + disable this check, else a list of names which is + a small subset of those in the CF Conventions + standard name is expected. + + .. note:: If a list is provided for + `check_is_in_custom_list` it becomes + redundant to check agaist the entire + table, therefore `check_is_in_table` + must be `False` else a `ValueError` + will be raised to reiterate this. - Return values signfiy status: + :Returns: - 1. None means there was no (computed_)standard_name for the - given variable. - 2. True means standard name(s) are registered and all valid. - 3. False means standard name(s) are registered and at least one - is invalid. + `bool` or `None` + The outcome of the check, where `True` means + standard name(s) exist and are (all) valid against + the configured checks, `False` means standard + name(s) exist but at least one is invalid + according to those checks, and `None` means no + (computed_)standard_name was found. """ # TODO downgrade status to info/debug From 2ce6ada0217e5da733ff9f46dcbc30dcc8b4738c Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 14:56:13 +0100 Subject: [PATCH 27/97] Set up skeleton for new test for compliance checking --- cfdm/test/test_compliance_checking.py | 89 +++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 cfdm/test/test_compliance_checking.py diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py new file mode 100644 index 000000000..e23cc2ecf --- /dev/null +++ b/cfdm/test/test_compliance_checking.py @@ -0,0 +1,89 @@ +import atexit +import copy +import datetime +import faulthandler +import logging +import os +import platform +import sys +import tempfile +import unittest + +import numpy as np + +faulthandler.enable() # to debug seg faults and timeouts + +import cfdm + +n_tmpfiles = 1 +tmpfiles = [ + tempfile.mkstemp("_test_functions.nc", dir=os.getcwd())[1] + for i in range(n_tmpfiles) +] +(temp_file,) = tmpfiles + + +def _remove_tmpfiles(): + """Remove temporary files created during tests.""" + for f in tmpfiles: + try: + os.remove(f) + except OSError: + pass + + +atexit.register(_remove_tmpfiles) + + +class ComplianceCheckingTest(unittest.TestCase): + """Test compliance checking functionality.""" + + def setUp(self): + """Preparations called immediately before each test method.""" + # Disable log messages to silence expected warnings + cfdm.log_level("DISABLE") + # Note: to enable all messages for given methods, lines or + # calls (those without a 'verbose' option to do the same) + # e.g. to debug them, wrap them (for methods, start-to-end + # internally) as follows: + # + # cfdm.LOG_LEVEL('DEBUG') + # < ... test code ... > + # cfdm.log_level('DISABLE') + + # 1. Create a file with field with invalid standard names generally + # using our 'kitchen sink' field as a basis + bad_sn_f = cfdm.example_field(1) + # TODO set bad names and then write to tempfile and read back in + + # 1. Create a file with a UGRID field with invalid standard names + # on UGRID components, using our core 'UGRID 1' field as a basis + ugrid_file_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_1.nc" + ) + bad_ugrid_sn_f = cfdm.read(ugrid_file_path) + # TODO set bad names and then write to tempfile and read back in + + def test_extract_names_from_xml(self): + """Test the `extract_names_from_xml` function.""" + # TODO + + def test_get_all_current_standard_names(self): + """Test the `get_all_current_standard_names` function.""" + # TODO + + def test_standard_names_validation_standard_field_read(self): + """Test TODO.""" + # TODO + + def test_standard_names_validation_ugrid_field_read(self): + """Test TODO.""" + # TODO + + def test_dataset_compliance(self): + """Test the `dataset_compliance` method across supported constructs.""" + # TODO + + def test_check_standard_names(self): + """Test the `_check_standard_names` method.""" + # TODO - move to netcdfread test From bccee9ada503699898efda219a37a35cebd366d5 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 15:14:04 +0100 Subject: [PATCH 28/97] Update to finalise methods in test for compliance checking --- cfdm/test/test_compliance_checking.py | 46 ++++++++++++++++++++------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index e23cc2ecf..503aed618 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -36,7 +36,7 @@ def _remove_tmpfiles(): class ComplianceCheckingTest(unittest.TestCase): - """Test compliance checking functionality.""" + """Test CF Conventions compliance checking functionality.""" def setUp(self): """Preparations called immediately before each test method.""" @@ -65,25 +65,47 @@ def setUp(self): # TODO set bad names and then write to tempfile and read back in def test_extract_names_from_xml(self): - """Test the `extract_names_from_xml` function.""" + """Test the `cfvalidation.extract_names_from_xml` function.""" # TODO def test_get_all_current_standard_names(self): - """Test the `get_all_current_standard_names` function.""" + """Test the `cfvalidation.get_all_current_standard_names` function.""" # TODO - def test_standard_names_validation_standard_field_read(self): - """Test TODO.""" - # TODO + def test_field_dataset_compliance(self): + """Test the `Field.dataset_compliance` method. - def test_standard_names_validation_ugrid_field_read(self): - """Test TODO.""" + Note: keeping this test here rather than in the test_Field module + because it requires the creation of 'bad' fields e.g. with invalid + standard names, and we create those as temporary files here already. + """ # TODO - def test_dataset_compliance(self): - """Test the `dataset_compliance` method across supported constructs.""" + def test_domain_dataset_compliance(self): + """Test the `Domain.dataset_compliance` method. + + Note: keeping this test here rather than in the test_Domain module + because it requires the creation of 'bad' fields e.g. with invalid + standard names, and we create those as temporary files here already. + """ # TODO def test_check_standard_names(self): - """Test the `_check_standard_names` method.""" - # TODO - move to netcdfread test + """Test the `NetCDFRead._check_standard_names` method.""" + # TODO + + def test_standard_names_validation_good_standard_field_read(self): + """Test compliance checking on a compliant standard field.""" + # TODO + + def test_standard_names_validation_bad_standard_field_read(self): + """Test compliance checking on a non-compliant standard field.""" + # TODO + + def test_standard_names_validation_good_ugrid_field_read(self): + """Test compliance checking on a compliant UGRID field.""" + # TODO + + def test_standard_names_validation_bad_ugrid_field_read(self): + """Test compliance checking on a non-compliant standard field.""" + # TODO From 7cd569e7557b6c2882af8990ebedc98c72b6b7f2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 15:55:13 +0100 Subject: [PATCH 29/97] Rename 'extract_names_from_xml' to mark as internal-use only --- cfdm/cfvalidation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 98d64d70d..b27cde369 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -30,8 +30,10 @@ ) -def extract_names_from_xml(snames_xml): +def _extract_names_from_xml(snames_xml): """TODO.""" + print("XML IS", snames_xml) + exit() root = ET.fromstring(snames_xml) # Want all elements. Note the regex this corresponds # to, from SLB older code, is 're.compile(r"")' but @@ -57,4 +59,4 @@ def get_all_current_standard_names(): f"Successfully retrived set of {len(all_snames_xml)} standard names" ) # pragma: no cover - return extract_names_from_xml(all_snames_xml) + return _extract_names_from_xml(all_snames_xml) From b60af359a37aa2b0f27e48d7bc0da52872b5fa6b Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 16:05:57 +0100 Subject: [PATCH 30/97] Test 'get_all_current_standard_names' & add to cfdm namespace --- cfdm/__init__.py | 4 ++- cfdm/cfvalidation.py | 2 -- cfdm/test/test_compliance_checking.py | 37 ++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/cfdm/__init__.py b/cfdm/__init__.py index 54e43089e..2a2e5a45d 100644 --- a/cfdm/__init__.py +++ b/cfdm/__init__.py @@ -210,7 +210,7 @@ from .constants import masked -# Internal ones passed on so they can be used in cf-python (see +# Note internal ones here are passed on so they can be used in cf-python (see # comment below) from .functions import ( ATOL, @@ -249,6 +249,8 @@ _display_or_return, ) +from .cfvalidation import get_all_current_standard_names + from .constructs import Constructs from .data import ( diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index b27cde369..9a50d2f2d 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -32,8 +32,6 @@ def _extract_names_from_xml(snames_xml): """TODO.""" - print("XML IS", snames_xml) - exit() root = ET.fromstring(snames_xml) # Want all elements. Note the regex this corresponds # to, from SLB older code, is 're.compile(r"")' but diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 503aed618..9fd6e1725 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -65,12 +65,40 @@ def setUp(self): # TODO set bad names and then write to tempfile and read back in def test_extract_names_from_xml(self): - """Test the `cfvalidation.extract_names_from_xml` function.""" + """Test the `cfvalidation._extract_names_from_xml` function.""" # TODO def test_get_all_current_standard_names(self): """Test the `cfvalidation.get_all_current_standard_names` function.""" # TODO + output = cfdm.cfvalidation.get_all_current_standard_names() + self.assertIsInstance(output, list) + + # The function gets the current table so we can't know exactly how + # many names there will be there going forward, but given there are + # over 4500 names (~4900 at time of writing, Aug 2025) and there is + # a general upward trend with names rarely removed, we can safely + # assume the list is at least 4500 names long and test on this in + # lieu of changing exact size. + self.assertTrue(len(output) > 4500) + + # Check some known names which won't ever be removed are in there + self.assertIn("longitude", output) + self.assertIn("latitude", output) + self.assertIn("time", output) + + # Check a long name with plenty of underscores is in there too + self.assertIn( + "integral_wrt_time_of_radioactivity_concentration_of_113Cd_in_air", + output + ) + + # SLB TODO!: spotted issue with approach in that aliases are valid + # standard names but often historically valid only so not in the + # current table! Maybe we need to parse the 'alias' items too. + # Check known/noted alias is in there. + self.assertIn("atmosphere_moles_of_cfc113", output) + # CURRENT FAIL self.assertIn("moles_of_cfc113_in_atmosphere", output) def test_field_dataset_compliance(self): """Test the `Field.dataset_compliance` method. @@ -109,3 +137,10 @@ def test_standard_names_validation_good_ugrid_field_read(self): def test_standard_names_validation_bad_ugrid_field_read(self): """Test compliance checking on a non-compliant standard field.""" # TODO + + +if __name__ == "__main__": + print("Run date:", datetime.datetime.now()) + cfdm.environment() + print("") + unittest.main(verbosity=2) From ce556f047794408deedd848fbea419e6a993da2f Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 16:39:00 +0100 Subject: [PATCH 31/97] Test '_extract_names_from_xml' & add to cfdm namespace --- cfdm/__init__.py | 5 +- cfdm/test/test_compliance_checking.py | 72 +++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/cfdm/__init__.py b/cfdm/__init__.py index 2a2e5a45d..b8f280291 100644 --- a/cfdm/__init__.py +++ b/cfdm/__init__.py @@ -249,7 +249,10 @@ _display_or_return, ) -from .cfvalidation import get_all_current_standard_names +from .cfvalidation import ( + get_all_current_standard_names, + _extract_names_from_xml +) from .constructs import Constructs diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 9fd6e1725..fef20abfe 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -15,12 +15,16 @@ import cfdm -n_tmpfiles = 1 + +n_tmpfiles = 2 tmpfiles = [ tempfile.mkstemp("_test_functions.nc", dir=os.getcwd())[1] for i in range(n_tmpfiles) ] -(temp_file,) = tmpfiles +( + tmpfile0, + tmpfile1, +) = tmpfiles def _remove_tmpfiles(): @@ -66,7 +70,69 @@ def setUp(self): def test_extract_names_from_xml(self): """Test the `cfvalidation._extract_names_from_xml` function.""" - # TODO + # Check with a small 'dummy' XML table which is the current table + # but with only the first two names included, w/ or w/o aliases + two_name_table_start = """ + + 92 + CF-StandardNameTable-92 + 2025-07-24T14:20:46Z + 2025-07-24T14:20:46Z + Centre for Environmental Data Analysis + support@ceda.ac.uk + + + + 1 + Acoustic area backscattering strength is 10 times the log10 of the ratio of the area backscattering coefficient to the reference value, 1 (m2 m-2). Area backscattering coefficient is the integral of the volume backscattering coefficient over a defined distance. Volume backscattering coefficient is the linear form of acoustic_volume_backscattering_strength_in_sea_water. For further details see MacLennan et. al (2002) doi:10.1006/jmsc.2001.1158. + + + + m + Acoustic centre of mass is the average of all sampled depths weighted by their volume backscattering coefficient. Volume backscattering coefficient is the linear form of acoustic_volume_backscattering_strength_in_sea_water. For further details see Urmy et. al (2012) doi:10.1093/icesjms/fsr205. + + """ + include_two_aliases = """ + mass_concentration_of_chlorophyll_in_sea_water + + + + + mass_concentration_of_chlorophyll_in_sea_water + + + """ + table_end = "" + + two_name_output = cfdm.cfvalidation._extract_names_from_xml( + two_name_table_start + table_end) + self.assertIsInstance(two_name_output, list) + self.assertEqual(len(two_name_output), 2) + self.assertIn( + "acoustic_area_backscattering_strength_in_sea_water", + two_name_output + ) + self.assertIn( + "acoustic_centre_of_mass_in_sea_water", two_name_output) + + two_name_output_w_aliases = cfdm.cfvalidation._extract_names_from_xml( + two_name_table_start + include_two_aliases + table_end) + self.assertIsInstance(two_name_output_w_aliases, list) + self.assertEqual(len(two_name_output_w_aliases), 4) + self.assertIn( + "acoustic_area_backscattering_strength_in_sea_water", + two_name_output_w_aliases + ) + self.assertIn( + "acoustic_centre_of_mass_in_sea_water", two_name_output_w_aliases) + self.assertIn( + "chlorophyll_concentration_in_sea_water", + two_name_output_w_aliases + ) + self.assertIn( + "concentration_of_chlorophyll_in_sea_water", + two_name_output_w_aliases + ) def test_get_all_current_standard_names(self): """Test the `cfvalidation.get_all_current_standard_names` function.""" From df94d93a772cf71e12702c9b7b38358b6734aafc Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 17:20:27 +0100 Subject: [PATCH 32/97] Add alias name inclusion flag to 'get_all_current_standard_names' --- cfdm/cfvalidation.py | 11 +++-- cfdm/test/test_compliance_checking.py | 70 +++++++++++++++++++++------ 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 9a50d2f2d..397080095 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -30,7 +30,7 @@ ) -def _extract_names_from_xml(snames_xml): +def _extract_names_from_xml(snames_xml, include_aliases): """TODO.""" root = ET.fromstring(snames_xml) # Want all elements. Note the regex this corresponds @@ -39,12 +39,16 @@ def _extract_names_from_xml(snames_xml): all_standard_names = [ entry.attrib["id"] for entry in root.findall(".//entry") ] + if include_aliases: + all_standard_names += [ + entry.attrib["id"] for entry in root.findall(".//alias") + ] return all_standard_names @lru_cache -def get_all_current_standard_names(): +def get_all_current_standard_names(include_aliases=False): """TODO.""" logger.info( "Retrieving XML for set of current standard names from: ", @@ -57,4 +61,5 @@ def get_all_current_standard_names(): f"Successfully retrived set of {len(all_snames_xml)} standard names" ) # pragma: no cover - return _extract_names_from_xml(all_snames_xml) + return _extract_names_from_xml( + all_snames_xml, include_aliases=include_aliases) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index fef20abfe..3f468652a 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -71,7 +71,9 @@ def setUp(self): def test_extract_names_from_xml(self): """Test the `cfvalidation._extract_names_from_xml` function.""" # Check with a small 'dummy' XML table which is the current table - # but with only the first two names included, w/ or w/o aliases + # but with only the first two names included, w/ or w/o a few aliases + # (note the aliases don't match up to the two included names but + # this is irrelevant to the testing so OK) two_name_table_start = """ 92 @@ -105,7 +107,7 @@ def test_extract_names_from_xml(self): table_end = "" two_name_output = cfdm.cfvalidation._extract_names_from_xml( - two_name_table_start + table_end) + two_name_table_start + table_end, include_aliases=False) self.assertIsInstance(two_name_output, list) self.assertEqual(len(two_name_output), 2) self.assertIn( @@ -115,28 +117,47 @@ def test_extract_names_from_xml(self): self.assertIn( "acoustic_centre_of_mass_in_sea_water", two_name_output) - two_name_output_w_aliases = cfdm.cfvalidation._extract_names_from_xml( - two_name_table_start + include_two_aliases + table_end) - self.assertIsInstance(two_name_output_w_aliases, list) - self.assertEqual(len(two_name_output_w_aliases), 4) + # No aliases in this table therefore expect same output as before + # when setting 'include_aliases=True' + self.assertEqual( + cfdm.cfvalidation._extract_names_from_xml( + two_name_table_start + table_end, include_aliases=True), + two_name_output + ) + + aliases_inc_output = cfdm.cfvalidation._extract_names_from_xml( + two_name_table_start + include_two_aliases + table_end, + include_aliases=True + ) + self.assertIsInstance(aliases_inc_output, list) + self.assertEqual(len(aliases_inc_output), 4) self.assertIn( "acoustic_area_backscattering_strength_in_sea_water", - two_name_output_w_aliases + aliases_inc_output ) self.assertIn( - "acoustic_centre_of_mass_in_sea_water", two_name_output_w_aliases) + "acoustic_centre_of_mass_in_sea_water", aliases_inc_output) self.assertIn( "chlorophyll_concentration_in_sea_water", - two_name_output_w_aliases + aliases_inc_output ) self.assertIn( "concentration_of_chlorophyll_in_sea_water", - two_name_output_w_aliases + aliases_inc_output + ) + + # When setting 'include_aliases=True' should ignore the two aliases + # in table so expect same as two_name_output + self.assertEqual( + cfdm.cfvalidation._extract_names_from_xml( + two_name_table_start + include_two_aliases + table_end, + include_aliases=False + ), + two_name_output ) def test_get_all_current_standard_names(self): """Test the `cfvalidation.get_all_current_standard_names` function.""" - # TODO output = cfdm.cfvalidation.get_all_current_standard_names() self.assertIsInstance(output, list) @@ -159,12 +180,29 @@ def test_get_all_current_standard_names(self): output ) - # SLB TODO!: spotted issue with approach in that aliases are valid - # standard names but often historically valid only so not in the - # current table! Maybe we need to parse the 'alias' items too. - # Check known/noted alias is in there. + # Check a standard name with known alias self.assertIn("atmosphere_moles_of_cfc113", output) - # CURRENT FAIL self.assertIn("moles_of_cfc113_in_atmosphere", output) + # Since the default behaviour is to not include aliases, this alias + # of the above should not be in the list + self.assertNotIn("moles_of_cfc113_in_atmosphere", output) + + aliases_inc_output = cfdm.cfvalidation.get_all_current_standard_names( + include_aliases=True + ) + self.assertIsInstance(aliases_inc_output, list) + + # As with above length check, can't be sure of eact amount as it + # changes but we can safely put a lower limit on it. At time of + # writing, Aug 2025) there are over 5500 names including aliases + # (where there are ~500 aliases as opposed to non-alias names) so + # set 5000 as a good limit (> 4500 check w/ include_aliases=False) + self.assertTrue(len(aliases_inc_output) > 5000) + + # Check all non-aliases are there, as above + self.assertTrue(set(output).issubset(aliases_inc_output)) + + # This time the alias should be included + self.assertIn("moles_of_cfc113_in_atmosphere", aliases_inc_output) def test_field_dataset_compliance(self): """Test the `Field.dataset_compliance` method. From 3c2d74fac8e86fd7f378b59da53375ea96e7f5bb Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 17:50:22 +0100 Subject: [PATCH 33/97] Update testing for 'get_all_current_standard_names' w/ URL access check --- cfdm/__init__.py | 3 ++- cfdm/cfvalidation.py | 6 +++--- cfdm/test/test_compliance_checking.py | 24 ++++++++++++++++++------ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cfdm/__init__.py b/cfdm/__init__.py index b8f280291..b965fa88c 100644 --- a/cfdm/__init__.py +++ b/cfdm/__init__.py @@ -251,7 +251,8 @@ from .cfvalidation import ( get_all_current_standard_names, - _extract_names_from_xml + _extract_names_from_xml, + _STD_NAME_CURRENT_XML_URL ) from .constructs import Constructs diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 397080095..a19f0a037 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -23,7 +23,7 @@ # Note: the raw XML is also made available at: # 'cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-' # 'table.xml', is that a better location to grab from (may be more stable)? -STD_NAME_CURRENT_XML_URL = ( +_STD_NAME_CURRENT_XML_URL = ( "https://raw.githubusercontent.com/" "cf-convention/cf-convention.github.io/refs/heads/main/Data/" "cf-standard-names/current/src/cf-standard-name-table.xml" @@ -52,9 +52,9 @@ def get_all_current_standard_names(include_aliases=False): """TODO.""" logger.info( "Retrieving XML for set of current standard names from: ", - STD_NAME_CURRENT_XML_URL + _STD_NAME_CURRENT_XML_URL ) # pragma: no cover - with request.urlopen(STD_NAME_CURRENT_XML_URL) as response: + with request.urlopen(_STD_NAME_CURRENT_XML_URL) as response: all_snames_xml = response.read() logger.debug( diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 3f468652a..42f426239 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -9,6 +9,8 @@ import tempfile import unittest +from urllib import request + import numpy as np faulthandler.enable() # to debug seg faults and timeouts @@ -131,12 +133,9 @@ def test_extract_names_from_xml(self): ) self.assertIsInstance(aliases_inc_output, list) self.assertEqual(len(aliases_inc_output), 4) - self.assertIn( - "acoustic_area_backscattering_strength_in_sea_water", - aliases_inc_output - ) - self.assertIn( - "acoustic_centre_of_mass_in_sea_water", aliases_inc_output) + # Check all non-aliases are there, as per above output + self.assertTrue(set(two_name_output).issubset(aliases_inc_output)) + # Also should have the aliases this time self.assertIn( "chlorophyll_concentration_in_sea_water", aliases_inc_output @@ -158,6 +157,19 @@ def test_extract_names_from_xml(self): def test_get_all_current_standard_names(self): """Test the `cfvalidation.get_all_current_standard_names` function.""" + # First check the URL used is actually available in case of issues + # arising in case GitHub endpoints go down + sn_xml_url = cfdm.cfvalidation._STD_NAME_CURRENT_XML_URL + with request.urlopen(sn_xml_url) as response: + self.assertEqual( + response.status, 200, + "Standard name XML inaccesible: unexpected status code " + f"{response.status} for reference URL of: {sn_xml_url}" + ) # 200 == OK + # SLB-DH discuss TODO: what behaviour do we want for the (v. rare) + # case that the URL isn't accessible? Ideally we can skip standard + # name validation with a warning, in these cases. + output = cfdm.cfvalidation.get_all_current_standard_names() self.assertIsInstance(output, list) From 6ac0274bdf2969864d4662e3aef23c0604368055 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 18:28:26 +0100 Subject: [PATCH 34/97] Set up helper func. for creating bad fields in test_compliance_checking --- cfdm/test/test_compliance_checking.py | 51 ++++++++++++++++++--------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 42f426239..42c443edf 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -41,9 +41,35 @@ def _remove_tmpfiles(): atexit.register(_remove_tmpfiles) +def _create_noncompliant_standard_field(compliant_field): + """TODO.""" + pass # TODO + + +def _create_noncompliant_ugrid_field(compliant_field): + """TODO.""" + pass # TODO + + class ComplianceCheckingTest(unittest.TestCase): """Test CF Conventions compliance checking functionality.""" + # 1. Create a file with field with invalid standard names generally + # using our 'kitchen sink' field as a basis + good_standard_sn_f = cfdm.example_field(1) + # TODO set bad names and then write to tempfile and read back in + bad_standard_sn_f = _create_noncompliant_standard_field( + good_standard_sn_f) + + # 1. Create a file with a UGRID field with invalid standard names + # on UGRID components, using our core 'UGRID 1' field as a basis + ugrid_file_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_1.nc" + ) + good_ugrid_sn_f = cfdm.read(ugrid_file_path) + # TODO set bad names and then write to tempfile and read back in + bad_ugrid_sn_f = _create_noncompliant_ugrid_field(good_ugrid_sn_f) + def setUp(self): """Preparations called immediately before each test method.""" # Disable log messages to silence expected warnings @@ -57,19 +83,6 @@ def setUp(self): # < ... test code ... > # cfdm.log_level('DISABLE') - # 1. Create a file with field with invalid standard names generally - # using our 'kitchen sink' field as a basis - bad_sn_f = cfdm.example_field(1) - # TODO set bad names and then write to tempfile and read back in - - # 1. Create a file with a UGRID field with invalid standard names - # on UGRID components, using our core 'UGRID 1' field as a basis - ugrid_file_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "ugrid_1.nc" - ) - bad_ugrid_sn_f = cfdm.read(ugrid_file_path) - # TODO set bad names and then write to tempfile and read back in - def test_extract_names_from_xml(self): """Test the `cfvalidation._extract_names_from_xml` function.""" # Check with a small 'dummy' XML table which is the current table @@ -240,19 +253,23 @@ def test_check_standard_names(self): def test_standard_names_validation_good_standard_field_read(self): """Test compliance checking on a compliant standard field.""" - # TODO + f = self.good_standard_sn_f + print(f.dataset_compliance()) def test_standard_names_validation_bad_standard_field_read(self): """Test compliance checking on a non-compliant standard field.""" - # TODO + f = self.bad_standard_sn_f + pass # TODO def test_standard_names_validation_good_ugrid_field_read(self): """Test compliance checking on a compliant UGRID field.""" - # TODO + f = self.good_ugrid_sn_f + pass # TODO def test_standard_names_validation_bad_ugrid_field_read(self): """Test compliance checking on a non-compliant standard field.""" - # TODO + f = self.bad_ugrid_sn_f + pass # TODO if __name__ == "__main__": From 82764cf1d1fc03e6a7bbd9f2cd72c957d1de5a5b Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 28 Aug 2025 18:33:44 +0100 Subject: [PATCH 35/97] Write tests for testing compliance of good/compliant fields --- cfdm/test/test_compliance_checking.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 42c443edf..90d8e1895 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -251,23 +251,29 @@ def test_check_standard_names(self): """Test the `NetCDFRead._check_standard_names` method.""" # TODO - def test_standard_names_validation_good_standard_field_read(self): - """Test compliance checking on a compliant standard field.""" + def test_standard_names_validation_compliant_field(self): + """Test compliance checking on a compliant non-UGRID field.""" f = self.good_standard_sn_f - print(f.dataset_compliance()) + dc_output = f.dataset_compliance() + self.assertEqual(dc_output, dict()) - def test_standard_names_validation_bad_standard_field_read(self): - """Test compliance checking on a non-compliant standard field.""" + # TODO what else to test on in 'good' case? + + def test_standard_names_validation_noncompliant_field(self): + """Test compliance checking on a non-compliant non-UGRID field.""" f = self.bad_standard_sn_f pass # TODO - def test_standard_names_validation_good_ugrid_field_read(self): + def test_standard_names_validation_compliant_ugrid_field(self): """Test compliance checking on a compliant UGRID field.""" - f = self.good_ugrid_sn_f - pass # TODO + f = self.good_ugrid_sn_f[0] + dc_output = f.dataset_compliance() + self.assertEqual(dc_output, dict()) + + # TODO what else to test on in 'good' case? - def test_standard_names_validation_bad_ugrid_field_read(self): - """Test compliance checking on a non-compliant standard field.""" + def test_standard_names_validation_noncompliant_ugrid_field(self): + """Test compliance checking on a non-compliant UGRID field.""" f = self.bad_ugrid_sn_f pass # TODO From 7568e30d8dad0914e76858e5581e43161d462bd1 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 29 Aug 2025 18:19:00 +0100 Subject: [PATCH 36/97] Populate docstring of both functions in cfvalidation --- cfdm/cfvalidation.py | 56 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index a19f0a037..140deaca3 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -31,7 +31,34 @@ def _extract_names_from_xml(snames_xml, include_aliases): - """TODO.""" + """Extract standard names from a valid Standard Name Table XML document. + + Whether or not to include registered aliases is dependent on the value + of the `include_aliases` flag. + + .. versionadded:: NEXTVERSION + + :Parameters: + + snames_xml: `bytes` + Bytes representing an XML file of any + valid Standard Name Table XML document, or mocked-up + equivalent form. 'entry id' items are extracted, along + with 'alias id' items if requested. + + include_aliases: `bool` + If `True`, include standard names that are aliases + rather than strict entries of the input table. By + default this is `False` so that aliases are excluded. + + :Returns: + + `list` + A list of all CF Conventions standard names in the + given version of the table, including aliases if + requested. + + """ root = ET.fromstring(snames_xml) # Want all elements. Note the regex this corresponds # to, from SLB older code, is 're.compile(r"")' but @@ -49,16 +76,39 @@ def _extract_names_from_xml(snames_xml, include_aliases): @lru_cache def get_all_current_standard_names(include_aliases=False): - """TODO.""" + """Get a list of all CF Standard Names from the current version table. + + Entries are always returned from the current table. By default aliases + are not included in the output but can also be included by setting the + `include_aliases` flag to `True`. + + .. versionadded:: NEXTVERSION + + :Parameters: + + include_aliases: `bool`, optional + If `True`, include standard names that are aliases + rather than strict entries of the current table. By + default this is `False` so that aliases are excluded. + + :Returns: + + `list` + A list of all CF Conventions standard names in the + current version of the table, including aliases if + requested. + + """ logger.info( "Retrieving XML for set of current standard names from: ", _STD_NAME_CURRENT_XML_URL ) # pragma: no cover with request.urlopen(_STD_NAME_CURRENT_XML_URL) as response: all_snames_xml = response.read() + print("TYPE OF ALL_SNAMES_XML IS:", type(all_snames_xml)) logger.debug( - f"Successfully retrived set of {len(all_snames_xml)} standard names" + f"Successfully retrieved list of {len(all_snames_xml)} standard names" ) # pragma: no cover return _extract_names_from_xml( From 4647d3140cd6809dbde0543b8258790658a07ca7 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 1 Sep 2025 16:49:23 +0100 Subject: [PATCH 37/97] Test compliance checking: add function to create file w/ bad names --- cfdm/test/test_compliance_checking.py | 35 ++++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 90d8e1895..a02efb6f4 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -9,6 +9,7 @@ import tempfile import unittest +from netCDF4 import Dataset from urllib import request import numpy as np @@ -20,7 +21,7 @@ n_tmpfiles = 2 tmpfiles = [ - tempfile.mkstemp("_test_functions.nc", dir=os.getcwd())[1] + tempfile.mkstemp("_test_compliance_check.nc", dir=os.getcwd())[1] for i in range(n_tmpfiles) ] ( @@ -41,14 +42,25 @@ def _remove_tmpfiles(): atexit.register(_remove_tmpfiles) -def _create_noncompliant_standard_field(compliant_field): +def _create_noncompliant_names_field(compliant_field, temp_file): """TODO.""" - pass # TODO + cfdm.write(compliant_field, temp_file) + with Dataset(temp_file, "r+") as nc: + field_all_varnames = list(nc.variables.keys()) + # Store a bad name which is the variable name prepended with 'badname_' + # - this makes it an invalid name and one we can identify as being + # tied to the original variable, for testing purposes. + bad_name_mapping = { + varname: "badname_"+ varname for varname in field_all_varnames + } + print("BAD NAME MAPPING IS", bad_name_mapping) -def _create_noncompliant_ugrid_field(compliant_field): - """TODO.""" - pass # TODO + for var_name, bad_std_name in bad_name_mapping.items(): + var = nc.variables[var_name] + var.standard_name = bad_std_name + + return cfdm.read(temp_file)[0] class ComplianceCheckingTest(unittest.TestCase): @@ -58,8 +70,10 @@ class ComplianceCheckingTest(unittest.TestCase): # using our 'kitchen sink' field as a basis good_standard_sn_f = cfdm.example_field(1) # TODO set bad names and then write to tempfile and read back in - bad_standard_sn_f = _create_noncompliant_standard_field( - good_standard_sn_f) + bad_standard_sn_f = _create_noncompliant_names_field( + good_standard_sn_f, tmpfile0) + print("Bad STANDARD is", bad_standard_sn_f) + bad_standard_sn_f.dump() # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis @@ -68,7 +82,10 @@ class ComplianceCheckingTest(unittest.TestCase): ) good_ugrid_sn_f = cfdm.read(ugrid_file_path) # TODO set bad names and then write to tempfile and read back in - bad_ugrid_sn_f = _create_noncompliant_ugrid_field(good_ugrid_sn_f) + #bad_ugrid_sn_f = _create_noncompliant_names_field( + # good_ugrid_sn_f, tmpfile1) + # TODO SLB we can't write UGRID files using cf at the moment, so need + # to find another way to create UGRID dataset with bad names to test on def setUp(self): """Preparations called immediately before each test method.""" From be342fccab5bf3d87e60b58aafd60183cfb1df53 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 1 Sep 2025 20:44:57 +0100 Subject: [PATCH 38/97] Test compliance checking: test non-compliant non-UGRID field --- cfdm/test/test_compliance_checking.py | 70 +++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index a02efb6f4..53c495818 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -49,12 +49,11 @@ def _create_noncompliant_names_field(compliant_field, temp_file): with Dataset(temp_file, "r+") as nc: field_all_varnames = list(nc.variables.keys()) # Store a bad name which is the variable name prepended with 'badname_' - # - this makes it an invalid name and one we can identify as being - # tied to the original variable, for testing purposes. + # - this makes it a certain invalid name and one we can identify as + # being tied to the original variable, for testing purposes. bad_name_mapping = { varname: "badname_"+ varname for varname in field_all_varnames } - print("BAD NAME MAPPING IS", bad_name_mapping) for var_name, bad_std_name in bad_name_mapping.items(): var = nc.variables[var_name] @@ -278,8 +277,69 @@ def test_standard_names_validation_compliant_field(self): def test_standard_names_validation_noncompliant_field(self): """Test compliance checking on a non-compliant non-UGRID field.""" + # TODO remove reference to sn attribute in reason string since this + # is noted separately in the dict value! + expected_reason = ( + "standard_name attribute " + "has a value that is not a valid name contained " + "in the current standard name table" + ) + expected_code = 400022 + expected_noncompl_dict = { + "attribute": "standard_name", + "code": expected_code, + "reason": expected_reason, + } + f = self.bad_standard_sn_f - pass # TODO + dc_output = f.dataset_compliance() + + #from pprint import pprint + # pprint(dc_output) + + # 'ta' is the field variable we test on + self.assertIn("non-compliance", dc_output["ta"]) + noncompliance = dc_output["ta"]["non-compliance"] + + expected_keys = [ + "atmosphere_hybrid_height_coordinate", + "atmosphere_hybrid_height_coordinate_bounds", + "latitude_1", + "longitude_1", + "time", + "x", + "x_bnds" + "y", + "y_bnds", + "b", + "b_bounds", + "surface_altitude", + "rotated_latitude_longitude", + "auxiliary", + "cell_measure", + "air_temperature_standard_error", + ] + for varname in expected_keys: + noncompl_dict = noncompliance.get(varname) + self.assertIsNotNone(noncompl_dict) + self.assertIsInstance(noncompl_dict, list) + self.assertEqual(len(noncompl_dict), 1) + + # Safe to unpack after test above + noncompl_dict = noncompl_dict[0] + self.assertIn("attribute", noncompl_dict) + self.assertEqual(noncompl_dict["attribute"], "standard_name") + self.assertIn("code", noncompl_dict) + self.assertEqual(noncompl_dict["code"], expected_code) + self.assertIn("reason", noncompl_dict) + self.assertEqual(noncompl_dict["reason"], expected_reason) + + # Final check to ensure there isn't anything else in there. + # If keys are missing will be reported to fail more spefically + # on per-key-value checks above + self.assertEqual(noncompl_dict, expected_noncompl_dict) + + # TODO what else to check here? def test_standard_names_validation_compliant_ugrid_field(self): """Test compliance checking on a compliant UGRID field.""" @@ -291,7 +351,7 @@ def test_standard_names_validation_compliant_ugrid_field(self): def test_standard_names_validation_noncompliant_ugrid_field(self): """Test compliance checking on a non-compliant UGRID field.""" - f = self.bad_ugrid_sn_f + # f = self.bad_ugrid_sn_f pass # TODO From 79dfcbb8eb19671eeddbb469053fd486b1c2838c Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 1 Sep 2025 22:35:00 +0100 Subject: [PATCH 39/97] Add message in test assertion to clarify failure case --- cfdm/test/test_compliance_checking.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 53c495818..8bb577f4a 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -302,15 +302,16 @@ def test_standard_names_validation_noncompliant_field(self): noncompliance = dc_output["ta"]["non-compliance"] expected_keys = [ - "atmosphere_hybrid_height_coordinate", + # itself? "ta", + # fails "atmosphere_hybrid_height_coordinate", "atmosphere_hybrid_height_coordinate_bounds", "latitude_1", "longitude_1", "time", - "x", - "x_bnds" - "y", - "y_bnds", + # fails "x", + # fails "x_bnds" + # fails "y", + # fails "y_bnds", "b", "b_bounds", "surface_altitude", @@ -321,7 +322,8 @@ def test_standard_names_validation_noncompliant_field(self): ] for varname in expected_keys: noncompl_dict = noncompliance.get(varname) - self.assertIsNotNone(noncompl_dict) + self.assertIsNotNone( + noncompl_dict, msg=f"Empty non-compliance for '{varname}'") self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) From 9c952eb3de4409b7d06b43412da2f8f54fe3a34a Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 1 Sep 2025 22:57:10 +0100 Subject: [PATCH 40/97] Mark standard name compliance tests which are currently failing --- cfdm/cfvalidation.py | 1 - cfdm/read_write/netcdf/netcdfread.py | 2 +- cfdm/test/test_compliance_checking.py | 10 ++++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cfdm/cfvalidation.py b/cfdm/cfvalidation.py index 140deaca3..586180c3e 100644 --- a/cfdm/cfvalidation.py +++ b/cfdm/cfvalidation.py @@ -105,7 +105,6 @@ def get_all_current_standard_names(include_aliases=False): ) # pragma: no cover with request.urlopen(_STD_NAME_CURRENT_XML_URL) as response: all_snames_xml = response.read() - print("TYPE OF ALL_SNAMES_XML IS:", type(all_snames_xml)) logger.debug( f"Successfully retrieved list of {len(all_snames_xml)} standard names" diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 843f6dc64..a7871e53a 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8438,7 +8438,7 @@ def _check_standard_names( ncvar, message=( f"{sn_attr} attribute", - f"has a value that is not a valid name contained " + "has a value that is not a valid name contained " "in the current standard name table", ), attribute=sn_attr, diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 8bb577f4a..ac7b49b35 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -312,10 +312,10 @@ def test_standard_names_validation_noncompliant_field(self): # fails "x_bnds" # fails "y", # fails "y_bnds", - "b", + # fails "b", "b_bounds", - "surface_altitude", - "rotated_latitude_longitude", + # fails "surface_altitude", + # fails "rotated_latitude_longitude", "auxiliary", "cell_measure", "air_temperature_standard_error", @@ -323,7 +323,9 @@ def test_standard_names_validation_noncompliant_field(self): for varname in expected_keys: noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( - noncompl_dict, msg=f"Empty non-compliance for '{varname}'") + noncompl_dict, + msg=f"Empty non-compliance for variable '{varname}'" + ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) From 686d1fbb6a5bd2806b5f20b32c6d5ce00c538ea2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 2 Sep 2025 16:49:42 +0100 Subject: [PATCH 41/97] Update create_test_files to write UGRID file w/ invalid names --- cfdm/test/create_test_files.py | 54 ++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/cfdm/test/create_test_files.py b/cfdm/test/create_test_files.py index 6f1e0db66..e9c5b953f 100644 --- a/cfdm/test/create_test_files.py +++ b/cfdm/test/create_test_files.py @@ -1964,8 +1964,13 @@ def _make_subsampled_2(filename): return filename -def _make_ugrid_1(filename): - """Create a UGRID file with a 2-d mesh topology.""" +def _make_ugrid_1(filename, standard_names): + """Create a UGRID file with a 2-d mesh topology. + + Standard names to set are input as a dicionary parameter to facilitate + testing on CF compliance checking for a UGRID dataset in the + test_compliance_checking module. + """ n = netCDF4.Dataset(filename, "w") n.Conventions = f"CF-{VN} UGRID-1.0" @@ -2028,39 +2033,39 @@ def _make_ugrid_1(filename): # Mesh node coordinates Mesh2_node_x = n.createVariable("Mesh2_node_x", "f4", ("nMesh2_node",)) - Mesh2_node_x.standard_name = "longitude" + Mesh2_node_x.standard_name = standard_names[0] Mesh2_node_x.units = "degrees_east" Mesh2_node_x[...] = [-45, -43, -45, -43, -45, -43, -40] Mesh2_node_y = n.createVariable("Mesh2_node_y", "f4", ("nMesh2_node",)) - Mesh2_node_y.standard_name = "latitude" + Mesh2_node_y.standard_name = standard_names[1] Mesh2_node_y.units = "degrees_north" Mesh2_node_y[...] = [35, 35, 33, 33, 31, 31, 34] # Optional mesh face and edge coordinate variables Mesh2_face_x = n.createVariable("Mesh2_face_x", "f4", ("nMesh2_face",)) - Mesh2_face_x.standard_name = "longitude" + Mesh2_face_x.standard_name = standard_names[0] Mesh2_face_x.units = "degrees_east" Mesh2_face_x[...] = [-44, -44, -42] Mesh2_face_y = n.createVariable("Mesh2_face_y", "f4", ("nMesh2_face",)) - Mesh2_face_y.standard_name = "latitude" + Mesh2_face_y.standard_name = standard_names[1] Mesh2_face_y.units = "degrees_north" Mesh2_face_y[...] = [34, 32, 34] Mesh2_edge_x = n.createVariable("Mesh2_edge_x", "f4", ("nMesh2_edge",)) - Mesh2_edge_x.standard_name = "longitude" + Mesh2_edge_x.standard_name = standard_names[0] Mesh2_edge_x.units = "degrees_east" Mesh2_edge_x[...] = [-41.5, -41.5, -43, -44, -45, -44, -45, -44, -43] Mesh2_edge_y = n.createVariable("Mesh2_edge_y", "f4", ("nMesh2_edge",)) - Mesh2_edge_y.standard_name = "latitude" + Mesh2_edge_y.standard_name = standard_names[1] Mesh2_edge_y.units = "degrees_north" Mesh2_edge_y[...] = [34.5, 33.5, 34, 35, 34, 33, 32, 31, 32] # Non-mesh coordinates t = n.createVariable("time", "f8", ("time",)) - t.standard_name = "time" + t.standard_name = standard_names[2] t.units = "seconds since 2016-01-01 00:00:00" t.bounds = "time_bounds" t[...] = [43200, 129600] @@ -2070,7 +2075,7 @@ def _make_ugrid_1(filename): # Data variables ta = n.createVariable("ta", "f4", ("time", "nMesh2_face")) - ta.standard_name = "air_temperature" + ta.standard_name = standard_names[3] ta.units = "K" ta.mesh = "Mesh2" ta.location = "face" @@ -2078,7 +2083,7 @@ def _make_ugrid_1(filename): ta[...] = [[282.96, 282.69, 283.21], [281.53, 280.99, 281.23]] v = n.createVariable("v", "f4", ("time", "nMesh2_edge")) - v.standard_name = "northward_wind" + v.standard_name = standard_names[4] v.units = "ms-1" v.mesh = "Mesh2" v.location = "edge" @@ -2089,7 +2094,7 @@ def _make_ugrid_1(filename): ] pa = n.createVariable("pa", "f4", ("time", "nMesh2_node")) - pa.standard_name = "air_pressure" + pa.standard_name = standard_names[5] pa.units = "hPa" pa.mesh = "Mesh2" pa.location = "node" @@ -2340,7 +2345,30 @@ def _make_aggregation_value(filename): subsampled_file_1 = _make_subsampled_1("subsampled_1.nc") subsampled_file_1 = _make_subsampled_2("subsampled_2.nc") -ugrid_1 = _make_ugrid_1("ugrid_1.nc") +# To facilitate testing UGRID file for test_compliance_checking module, +# we need a UGRID dataset with invalid standard names but can't create one +# from a temp_file edit to 'ugrid_1.nc' because we can't yet write UGRID. So +# have a standard names input dictionary here, for now, to create +# a pair of files, one with good and one with bad names. +ugrid_1_valid_standard_names = [ + "longitude", + "latitude", + "time", + "air_temperature", + "northward_wind", + "air_pressure", +] +ugrid_1_bad_standard_names = [ + "badname_" + name for name in ugrid_1_valid_standard_names +] +ugrid_1 = _make_ugrid_1( + "ugrid_1.nc", + ugrid_1_valid_standard_names +) +ugrid_1_bad_names = _make_ugrid_1( + "ugrid_1_bad_names.nc", + ugrid_1_bad_standard_names, +) ugrid_2 = _make_ugrid_2("ugrid_2.nc") aggregation_value = _make_aggregation_value("aggregation_value.nc") From 0fd56ad4fa8691361d94c2e4a335ea8e7deb10f2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 2 Sep 2025 16:52:07 +0100 Subject: [PATCH 42/97] Add notes to testing from SLB-DCH catchup --- cfdm/test/test_compliance_checking.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index ac7b49b35..c80b98fd4 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -294,30 +294,30 @@ def test_standard_names_validation_noncompliant_field(self): f = self.bad_standard_sn_f dc_output = f.dataset_compliance() - #from pprint import pprint - # pprint(dc_output) + from pprint import pprint + pprint(dc_output) # 'ta' is the field variable we test on self.assertIn("non-compliance", dc_output["ta"]) noncompliance = dc_output["ta"]["non-compliance"] expected_keys = [ - # itself? "ta", + # POSSIBLY SOLVED, ATTRIBUTE FIX itself? "ta", # fails "atmosphere_hybrid_height_coordinate", "atmosphere_hybrid_height_coordinate_bounds", "latitude_1", "longitude_1", "time", - # fails "x", - # fails "x_bnds" - # fails "y", - # fails "y_bnds", + # SOLVED, DIM COORDS fails "x", + # POSSIBLY SOLVED, DIM COORDS fails "x_bnds" + # SOLVED, DIM COORDS fails "y", + # POSSIBLY SOLVED, DIM COORDS fails "y_bnds", # fails "b", "b_bounds", # fails "surface_altitude", # fails "rotated_latitude_longitude", "auxiliary", - "cell_measure", + "cell_measure", # ATTRIBUTES FIX SHOULDN'T APPEAR "air_temperature_standard_error", ] for varname in expected_keys: From c31b4d40e870d35cbde679ce549c15f07be9c317 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 2 Sep 2025 18:02:50 +0100 Subject: [PATCH 43/97] Update attribute key in dataset_compliance for bad standard names --- cfdm/read_write/netcdf/netcdfread.py | 11 +++--- cfdm/test/test_compliance_checking.py | 49 ++++++++++++++++++++------- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index a7871e53a..ebaa47b18 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8379,6 +8379,9 @@ def _check_standard_names( for sn_attr in ("standard_name", "computed_standard_name"): # 1. Check if there is a (computed_)standard_name property sn_value = ncvar_attrs.get(sn_attr) + attribute_value = { + f"{ncvar}:{sn_attr}": sn_value + } # TODO downgrade status to info/debug logger.warning(f"%%%%%% Got sn_value of {sn_value}") @@ -8388,18 +8391,18 @@ def _check_standard_names( any_sn_found = True # 2. Check, if requested, if is a string - # TODO this is not robust check (may have numpy string type) + # TODO this is not a robust check (may have numpy string type) # but good enough for now whilts developing if check_is_string and not isinstance(sn_value, str): invalid_sn_found = True self._add_message( parent_ncvar, ncvar, + attribute=attribute_value, message=( f"{sn_attr} attribute", f"has a value that is not a string", ), - attribute=sn_attr, conformance="3.3.requirement.1", ) @@ -8414,12 +8417,12 @@ def _check_standard_names( self._add_message( parent_ncvar, ncvar, + attribute=attribute_value, message=( f"{sn_attr} attribute", f"has a value that is not appropriate to " "the context of the variable in question", ), - attribute=sn_attr, ) @@ -8441,7 +8444,7 @@ def _check_standard_names( "has a value that is not a valid name contained " "in the current standard name table", ), - attribute=sn_attr, + attribute=attribute_value, conformance="3.3.requirement.2", ) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index c80b98fd4..eb624b8fe 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -43,7 +43,7 @@ def _remove_tmpfiles(): def _create_noncompliant_names_field(compliant_field, temp_file): - """TODO.""" + """Create a copy of a field with bad standard names on all variables.""" cfdm.write(compliant_field, temp_file) with Dataset(temp_file, "r+") as nc: @@ -71,8 +71,7 @@ class ComplianceCheckingTest(unittest.TestCase): # TODO set bad names and then write to tempfile and read back in bad_standard_sn_f = _create_noncompliant_names_field( good_standard_sn_f, tmpfile0) - print("Bad STANDARD is", bad_standard_sn_f) - bad_standard_sn_f.dump() + ### bad_standard_sn_f.dump() # SB DEBUG # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis @@ -80,11 +79,15 @@ class ComplianceCheckingTest(unittest.TestCase): os.path.dirname(os.path.abspath(__file__)), "ugrid_1.nc" ) good_ugrid_sn_f = cfdm.read(ugrid_file_path) - # TODO set bad names and then write to tempfile and read back in - #bad_ugrid_sn_f = _create_noncompliant_names_field( - # good_ugrid_sn_f, tmpfile1) - # TODO SLB we can't write UGRID files using cf at the moment, so need - # to find another way to create UGRID dataset with bad names to test on + # Note we can't write UGRID files using cf at the moment, so needed + # another way to create UGRID dataset with bad names to test on + # and the simplest is to write extra 'bad names' file alongside + # 'ugrid_1.nc' in create_test_files module. + bad_names_ugrid_file_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_1_bad_names.nc" + ) + bad_ugrid_sn_f = cfdm.read(bad_names_ugrid_file_path) + ### bad_standard_sn_f.dump() # SB DEBUG def setUp(self): """Preparations called immediately before each test method.""" @@ -285,8 +288,9 @@ def test_standard_names_validation_noncompliant_field(self): "in the current standard name table" ) expected_code = 400022 + # Excludes attribute which we expect in there but depends on varname + # so add that expected key in during the iteration over varnames expected_noncompl_dict = { - "attribute": "standard_name", "code": expected_code, "reason": expected_reason, } @@ -294,6 +298,7 @@ def test_standard_names_validation_noncompliant_field(self): f = self.bad_standard_sn_f dc_output = f.dataset_compliance() + # SLB DEV from pprint import pprint pprint(dc_output) @@ -331,13 +336,21 @@ def test_standard_names_validation_noncompliant_field(self): # Safe to unpack after test above noncompl_dict = noncompl_dict[0] - self.assertIn("attribute", noncompl_dict) - self.assertEqual(noncompl_dict["attribute"], "standard_name") + self.assertIn("code", noncompl_dict) self.assertEqual(noncompl_dict["code"], expected_code) self.assertIn("reason", noncompl_dict) self.assertEqual(noncompl_dict["reason"], expected_reason) + # Form expected attribute which needs the varname and bad name + expected_attribute = { + f"{varname}:standard_name": f"badname_{varname}" + } + expected_noncompl_dict["attribute"] = expected_attribute + + self.assertIn("attribute", noncompl_dict) + self.assertEqual(noncompl_dict["attribute"], expected_attribute) + # Final check to ensure there isn't anything else in there. # If keys are missing will be reported to fail more spefically # on per-key-value checks above @@ -355,8 +368,18 @@ def test_standard_names_validation_compliant_ugrid_field(self): def test_standard_names_validation_noncompliant_ugrid_field(self): """Test compliance checking on a non-compliant UGRID field.""" - # f = self.bad_ugrid_sn_f - pass # TODO + f = self.bad_ugrid_sn_f + + # TODO add error to run to say need to run 'create_test_files' + + # TODO see from below that not all bad names gte set - but want + # that, so should update create_test_files method to set on all + # for bad case. + with Dataset("ugrid_1_bad_names.nc", "r+") as nc: + field_all_varnames = list(nc.variables.keys()) + print("VERIFY") + for varname, var in nc.variables.items(): + print(varname, getattr(var, "standard_name", "No standard_name")) if __name__ == "__main__": From 5711f897d977b1cb4f73f0fb362650bdf560bd4a Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 3 Sep 2025 14:53:10 +0100 Subject: [PATCH 44/97] Create test for compliance checking on UGRID field --- cfdm/test/test_compliance_checking.py | 106 +++++++++++++++++++++----- 1 file changed, 85 insertions(+), 21 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index eb624b8fe..2bbbdcdff 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -19,14 +19,13 @@ import cfdm -n_tmpfiles = 2 +n_tmpfiles = 1 tmpfiles = [ tempfile.mkstemp("_test_compliance_check.nc", dir=os.getcwd())[1] for i in range(n_tmpfiles) ] ( tmpfile0, - tmpfile1, ) = tmpfiles @@ -67,18 +66,18 @@ class ComplianceCheckingTest(unittest.TestCase): # 1. Create a file with field with invalid standard names generally # using our 'kitchen sink' field as a basis - good_standard_sn_f = cfdm.example_field(1) + good_general_sn_f = cfdm.example_field(1) # TODO set bad names and then write to tempfile and read back in - bad_standard_sn_f = _create_noncompliant_names_field( - good_standard_sn_f, tmpfile0) - ### bad_standard_sn_f.dump() # SB DEBUG + bad_general_sn_f = _create_noncompliant_names_field( + good_general_sn_f, tmpfile0) + ### bad_general_sn_f.dump() # SB DEBUG # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis ugrid_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "ugrid_1.nc" ) - good_ugrid_sn_f = cfdm.read(ugrid_file_path) + good_ugrid_sn_f = cfdm.read(ugrid_file_path)[0] # Note we can't write UGRID files using cf at the moment, so needed # another way to create UGRID dataset with bad names to test on # and the simplest is to write extra 'bad names' file alongside @@ -86,8 +85,8 @@ class ComplianceCheckingTest(unittest.TestCase): bad_names_ugrid_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "ugrid_1_bad_names.nc" ) - bad_ugrid_sn_f = cfdm.read(bad_names_ugrid_file_path) - ### bad_standard_sn_f.dump() # SB DEBUG + bad_ugrid_sn_f = cfdm.read(bad_names_ugrid_file_path)[0] + bad_ugrid_sn_f.dump() # SB DEBUG def setUp(self): """Preparations called immediately before each test method.""" @@ -272,7 +271,7 @@ def test_check_standard_names(self): def test_standard_names_validation_compliant_field(self): """Test compliance checking on a compliant non-UGRID field.""" - f = self.good_standard_sn_f + f = self.good_general_sn_f dc_output = f.dataset_compliance() self.assertEqual(dc_output, dict()) @@ -280,8 +279,6 @@ def test_standard_names_validation_compliant_field(self): def test_standard_names_validation_noncompliant_field(self): """Test compliance checking on a non-compliant non-UGRID field.""" - # TODO remove reference to sn attribute in reason string since this - # is noted separately in the dict value! expected_reason = ( "standard_name attribute " "has a value that is not a valid name contained " @@ -295,12 +292,12 @@ def test_standard_names_validation_noncompliant_field(self): "reason": expected_reason, } - f = self.bad_standard_sn_f + f = self.bad_general_sn_f dc_output = f.dataset_compliance() # SLB DEV - from pprint import pprint - pprint(dc_output) + # from pprint import pprint + # pprint(dc_output) # 'ta' is the field variable we test on self.assertIn("non-compliance", dc_output["ta"]) @@ -360,7 +357,7 @@ def test_standard_names_validation_noncompliant_field(self): def test_standard_names_validation_compliant_ugrid_field(self): """Test compliance checking on a compliant UGRID field.""" - f = self.good_ugrid_sn_f[0] + f = self.good_ugrid_sn_f dc_output = f.dataset_compliance() self.assertEqual(dc_output, dict()) @@ -368,18 +365,85 @@ def test_standard_names_validation_compliant_ugrid_field(self): def test_standard_names_validation_noncompliant_ugrid_field(self): """Test compliance checking on a non-compliant UGRID field.""" + expected_reason = ( + "standard_name attribute " + "has a value that is not a valid name contained " + "in the current standard name table" + ) + expected_code = 400022 + # Excludes attribute which we expect in there but depends on varname + # so add that expected key in during the iteration over varnames + expected_noncompl_dict = { + "code": expected_code, + "reason": expected_reason, + } + f = self.bad_ugrid_sn_f + dc_output = f.dataset_compliance() + # SLB DEV # TODO add error to run to say need to run 'create_test_files' # TODO see from below that not all bad names gte set - but want # that, so should update create_test_files method to set on all # for bad case. - with Dataset("ugrid_1_bad_names.nc", "r+") as nc: - field_all_varnames = list(nc.variables.keys()) - print("VERIFY") - for varname, var in nc.variables.items(): - print(varname, getattr(var, "standard_name", "No standard_name")) + # with Dataset("ugrid_1_bad_names.nc", "r+") as nc: + # field_all_varnames = list(nc.variables.keys()) + # print("VERIFY") + # for varname, var in nc.variables.items(): + # print(varname, getattr(var, "standard_name", "No standard_name")) + + from pprint import pprint + pprint(dc_output) + + # 'ta' is the field variable we test on + self.assertIn("non-compliance", dc_output["ta"]) + noncompliance = dc_output["ta"]["non-compliance"] + + expected_keys = [ + # POSSIBLY SOLVED, ATTRIBUTE FIX itself? "ta", + "Mesh2_node_x", + "Mesh2_node_y", + "Mesh2_face_x", + "Mesh2_face_y", + "Mesh2_edge_x", + "Mesh2_edge_y", + "time", + "v", + "pa", + ] + for varname in expected_keys: + noncompl_dict = noncompliance.get(varname) + self.assertIsNotNone( + noncompl_dict, + msg=f"Empty non-compliance for variable '{varname}'" + ) + self.assertIsInstance(noncompl_dict, list) + self.assertEqual(len(noncompl_dict), 1) + + # Safe to unpack after test above + noncompl_dict = noncompl_dict[0] + + self.assertIn("code", noncompl_dict) + self.assertEqual(noncompl_dict["code"], expected_code) + self.assertIn("reason", noncompl_dict) + self.assertEqual(noncompl_dict["reason"], expected_reason) + + # Form expected attribute which needs the varname and bad name + expected_attribute = { + f"{varname}:standard_name": f"badname_{varname}" + } + expected_noncompl_dict["attribute"] = expected_attribute + + self.assertIn("attribute", noncompl_dict) + self.assertEqual(noncompl_dict["attribute"], expected_attribute) + + # Final check to ensure there isn't anything else in there. + # If keys are missing will be reported to fail more spefically + # on per-key-value checks above + self.assertEqual(noncompl_dict, expected_noncompl_dict) + + # TODO what else to check here? if __name__ == "__main__": From eaf0e1122202b68aa567b5889f2e067f9887e9b2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 4 Sep 2025 17:26:07 +0100 Subject: [PATCH 45/97] Update compliance checking test to use gen'd UGRID bad name fields --- cfdm/test/create_test_files.py | 51 +++++++- cfdm/test/test_compliance_checking.py | 165 ++++++++++++++++++++++---- 2 files changed, 189 insertions(+), 27 deletions(-) diff --git a/cfdm/test/create_test_files.py b/cfdm/test/create_test_files.py index e9c5b953f..5966ffb5c 100644 --- a/cfdm/test/create_test_files.py +++ b/cfdm/test/create_test_files.py @@ -1969,8 +1969,24 @@ def _make_ugrid_1(filename, standard_names): Standard names to set are input as a dicionary parameter to facilitate testing on CF compliance checking for a UGRID dataset in the - test_compliance_checking module. + test_compliance_checking module. This should either be 6 names long + or TODO names long where in the latter case extra standard names will + be set on all other variables which have no standard name in + ugrid_1, namely on: + * Mesh2 + * Mesh2_face_nodes + * Mesh2_edge_nodes + * Mesh2_face_edges + * Mesh2_face_links + * Mesh2_edge_face_links + * time_bounds + """ + + extra_sn_setting = False + if len(standard_names) == 13: + extra_sn_setting = True + n = netCDF4.Dataset(filename, "w") n.Conventions = f"CF-{VN} UGRID-1.0" @@ -1994,12 +2010,16 @@ def _make_ugrid_1(filename, standard_names): Mesh2.face_edge_connectivity = "Mesh2_face_edges" Mesh2.face_face_connectivity = "Mesh2_face_links" Mesh2.edge_face_connectivity = "Mesh2_edge_face_links" + if extra_sn_setting: + Mesh2.standard_name = standard_names[6] Mesh2_face_nodes = n.createVariable( "Mesh2_face_nodes", "i4", ("nMesh2_face", "Four"), fill_value=-99 ) Mesh2_face_nodes.long_name = "Maps every face to its corner nodes" Mesh2_face_nodes[...] = [[2, 3, 1, 0], [4, 5, 3, 2], [1, 3, 6, -99]] + if extra_sn_setting: + Mesh2_face_nodes.standard_name = standard_names[7] Mesh2_edge_nodes = n.createVariable( "Mesh2_edge_nodes", "i4", ("Two", "nMesh2_edge") @@ -2009,12 +2029,16 @@ def _make_ugrid_1(filename, standard_names): [1, 3, 3, 0, 2, 2, 2, 5, 3], [6, 6, 1, 1, 0, 3, 4, 4, 5], ] + if extra_sn_setting: + Mesh2_edge_nodes.standard_name = standard_names[8] # Optional mesh topology variables Mesh2_face_edges = n.createVariable( "Mesh2_face_edges", "i4", ("nMesh2_face", "Four"), fill_value=-99 ) Mesh2_face_edges.long_name = "Maps every face to its edges." + if extra_sn_setting: + Mesh2_face_edges.standard_name = standard_names[9] Mesh2_face_links = n.createVariable( "Mesh2_face_links", "i4", ("nMesh2_face", "Four"), fill_value=-99 @@ -2025,11 +2049,15 @@ def _make_ugrid_1(filename, standard_names): [0, -99, -99, -99], [0, -99, -99, -99], ] + if extra_sn_setting: + Mesh2_face_links.standard_name = standard_names[10] Mesh2_edge_face_links = n.createVariable( "Mesh2_edge_face_links", "i4", ("nMesh2_edge", "Two"), fill_value=-99 ) Mesh2_edge_face_links.long_name = "neighbour faces for edges" + if extra_sn_setting: + Mesh2_edge_face_links.standard_name = standard_names[11] # Mesh node coordinates Mesh2_node_x = n.createVariable("Mesh2_node_x", "f4", ("nMesh2_node",)) @@ -2072,6 +2100,8 @@ def _make_ugrid_1(filename, standard_names): t_bounds = n.createVariable("time_bounds", "f8", ("time", "Two")) t_bounds[...] = [[0, 86400], [86400, 172800]] + if extra_sn_setting: + t_bounds.standard_name = standard_names[12] # Data variables ta = n.createVariable("ta", "f4", ("time", "nMesh2_face")) @@ -2358,16 +2388,25 @@ def _make_aggregation_value(filename): "northward_wind", "air_pressure", ] -ugrid_1_bad_standard_names = [ - "badname_" + name for name in ugrid_1_valid_standard_names -] ugrid_1 = _make_ugrid_1( "ugrid_1.nc", ugrid_1_valid_standard_names ) +ugrid_1_bad_standard_names = [ + "badname_" + name for name in ugrid_1_valid_standard_names +] +ugrid_1_bad_standard_names += [ + "badname_Mesh2", # index 6 + "badname_Mesh2_face_nodes", + "badname_Mesh2_edge_nodes", + "badname_Mesh2_face_edges", + "badname_Mesh2_face_links", + "badname_Mesh2_edge_face_links", + "badname_time_bounds", # index 12 +] + ugrid_1_bad_names = _make_ugrid_1( - "ugrid_1_bad_names.nc", - ugrid_1_bad_standard_names, + "ugrid_1_bad_names.nc", ugrid_1_bad_standard_names, ) ugrid_2 = _make_ugrid_2("ugrid_2.nc") diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 2bbbdcdff..a0eebe4f6 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -70,7 +70,7 @@ class ComplianceCheckingTest(unittest.TestCase): # TODO set bad names and then write to tempfile and read back in bad_general_sn_f = _create_noncompliant_names_field( good_general_sn_f, tmpfile0) - ### bad_general_sn_f.dump() # SB DEBUG + ### bad_general_sn_f.dump() # SB DEV # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis @@ -85,8 +85,7 @@ class ComplianceCheckingTest(unittest.TestCase): bad_names_ugrid_file_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "ugrid_1_bad_names.nc" ) - bad_ugrid_sn_f = cfdm.read(bad_names_ugrid_file_path)[0] - bad_ugrid_sn_f.dump() # SB DEBUG + bad_ugrid_sn_fields = cfdm.read(bad_names_ugrid_file_path) def setUp(self): """Preparations called immediately before each test method.""" @@ -363,8 +362,8 @@ def test_standard_names_validation_compliant_ugrid_field(self): # TODO what else to test on in 'good' case? - def test_standard_names_validation_noncompliant_ugrid_field(self): - """Test compliance checking on a non-compliant UGRID field.""" + def test_standard_names_validation_noncompliant_ugrid_fields(self): + """Test compliance checking on non-compliant UGRID fields.""" expected_reason = ( "standard_name attribute " "has a value that is not a valid name contained " @@ -378,8 +377,11 @@ def test_standard_names_validation_noncompliant_ugrid_field(self): "reason": expected_reason, } - f = self.bad_ugrid_sn_f - dc_output = f.dataset_compliance() + # Fields for testing on: those in ugrid_1 with bad names pre-set + f1, f2, f3 = self.bad_ugrid_sn_fields # unpack to shorter names + dc_output_1 = f1.dataset_compliance() + dc_output_2 = f2.dataset_compliance() + dc_output_3 = f2.dataset_compliance() # SLB DEV # TODO add error to run to say need to run 'create_test_files' @@ -387,30 +389,151 @@ def test_standard_names_validation_noncompliant_ugrid_field(self): # TODO see from below that not all bad names gte set - but want # that, so should update create_test_files method to set on all # for bad case. - # with Dataset("ugrid_1_bad_names.nc", "r+") as nc: - # field_all_varnames = list(nc.variables.keys()) - # print("VERIFY") - # for varname, var in nc.variables.items(): - # print(varname, getattr(var, "standard_name", "No standard_name")) + with Dataset("ugrid_1_bad_names.nc", "r+") as nc: + field_all_varnames = list(nc.variables.keys()) + print("VERIFY") + for varname, var in nc.variables.items(): + print(varname, getattr(var, "standard_name", "No standard_name")) + + from pprint import pprint + pprint(dc_output_1) + + # 'pa' is the field variable we test on + self.assertIn("non-compliance", dc_output_1["pa"]) + noncompliance = dc_output_1["pa"]["non-compliance"] + + expected_keys = [ + # itself? "pa", + # not for this field "v", + # not for this field "ta", + "time", + "time_bounds", + "Mesh2", + "Mesh2_node_x", # aka longitude? + "Mesh2_node_y", # aka latitude? + "Mesh2_face_x", # ... etc. + "Mesh2_face_y", + "Mesh2_edge_x", + "Mesh2_edge_y", + "Mesh2_face_nodes", + "Mesh2_edge_nodes", + "Mesh2_face_edges", + "Mesh2_face_links", + "Mesh2_edge_face_links", + ] + for varname in expected_keys: + noncompl_dict = noncompliance.get(varname) + self.assertIsNotNone( + noncompl_dict, + msg=f"Empty non-compliance for variable '{varname}'" + ) + self.assertIsInstance(noncompl_dict, list) + self.assertEqual(len(noncompl_dict), 1) + + # Safe to unpack after test above + noncompl_dict = noncompl_dict[0] + + self.assertIn("code", noncompl_dict) + self.assertEqual(noncompl_dict["code"], expected_code) + self.assertIn("reason", noncompl_dict) + self.assertEqual(noncompl_dict["reason"], expected_reason) + + # Form expected attribute which needs the varname and bad name + expected_attribute = { + f"{varname}:standard_name": f"badname_{varname}" + } + expected_noncompl_dict["attribute"] = expected_attribute + + self.assertIn("attribute", noncompl_dict) + self.assertEqual(noncompl_dict["attribute"], expected_attribute) + + # Final check to ensure there isn't anything else in there. + # If keys are missing will be reported to fail more spefically + # on per-key-value checks above + self.assertEqual(noncompl_dict, expected_noncompl_dict) from pprint import pprint - pprint(dc_output) + pprint(dc_output_2) # 'ta' is the field variable we test on - self.assertIn("non-compliance", dc_output["ta"]) - noncompliance = dc_output["ta"]["non-compliance"] + self.assertIn("non-compliance", dc_output_2["ta"]) + noncompliance = dc_output_2["ta"]["non-compliance"] expected_keys = [ - # POSSIBLY SOLVED, ATTRIBUTE FIX itself? "ta", - "Mesh2_node_x", - "Mesh2_node_y", - "Mesh2_face_x", + # itself? "ta", + # not for this field "pa", + # not for this field "v", + "time", + "time_bounds", + "Mesh2", + "Mesh2_node_x", # aka longitude? + "Mesh2_node_y", # aka latitude? + "Mesh2_face_x", # ... etc. "Mesh2_face_y", "Mesh2_edge_x", "Mesh2_edge_y", + "Mesh2_face_nodes", + "Mesh2_edge_nodes", + "Mesh2_face_edges", + "Mesh2_face_links", + "Mesh2_edge_face_links", + ] + for varname in expected_keys: + noncompl_dict = noncompliance.get(varname) + self.assertIsNotNone( + noncompl_dict, + msg=f"Empty non-compliance for variable '{varname}'" + ) + self.assertIsInstance(noncompl_dict, list) + self.assertEqual(len(noncompl_dict), 1) + + # Safe to unpack after test above + noncompl_dict = noncompl_dict[0] + + self.assertIn("code", noncompl_dict) + self.assertEqual(noncompl_dict["code"], expected_code) + self.assertIn("reason", noncompl_dict) + self.assertEqual(noncompl_dict["reason"], expected_reason) + + # Form expected attribute which needs the varname and bad name + expected_attribute = { + f"{varname}:standard_name": f"badname_{varname}" + } + expected_noncompl_dict["attribute"] = expected_attribute + + self.assertIn("attribute", noncompl_dict) + self.assertEqual(noncompl_dict["attribute"], expected_attribute) + + # Final check to ensure there isn't anything else in there. + # If keys are missing will be reported to fail more spefically + # on per-key-value checks above + self.assertEqual(noncompl_dict, expected_noncompl_dict) + + from pprint import pprint + pprint(dc_output_3) + + # 'v' is the field variable we test on + self.assertIn("non-compliance", dc_output_3["v"]) + noncompliance = dc_output_3["v"]["non-compliance"] + + expected_keys = [ + # itself? "v", + # not for this field "ta", + # not for this field "pa", "time", - "v", - "pa", + "time_bounds", + "Mesh2", + "Mesh2_node_x", # aka longitude? + "Mesh2_node_y", # aka latitude? + "Mesh2_face_x", # ... etc. + "Mesh2_face_y", + "Mesh2_edge_x", + "Mesh2_edge_y", + "Mesh2_face_nodes", + "Mesh2_edge_nodes", + "Mesh2_face_edges", + "Mesh2_face_links", + "Mesh2_edge_face_links", ] for varname in expected_keys: noncompl_dict = noncompliance.get(varname) From 330846e723ce4157e018267ed2e489af7225e8d2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 4 Sep 2025 17:31:53 +0100 Subject: [PATCH 46/97] Make var names consistent in test_compliance_checking on UGRID --- cfdm/test/test_compliance_checking.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index a0eebe4f6..7d8a11188 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -66,11 +66,11 @@ class ComplianceCheckingTest(unittest.TestCase): # 1. Create a file with field with invalid standard names generally # using our 'kitchen sink' field as a basis - good_general_sn_f = cfdm.example_field(1) + good_snames_general_field = cfdm.example_field(1) # TODO set bad names and then write to tempfile and read back in - bad_general_sn_f = _create_noncompliant_names_field( - good_general_sn_f, tmpfile0) - ### bad_general_sn_f.dump() # SB DEV + bad_snames_general_field = _create_noncompliant_names_field( + good_snames_general_field, tmpfile0) + ### bad_snames_general_field.dump() # SB DEV # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis @@ -270,7 +270,7 @@ def test_check_standard_names(self): def test_standard_names_validation_compliant_field(self): """Test compliance checking on a compliant non-UGRID field.""" - f = self.good_general_sn_f + f = self.good_snames_general_field dc_output = f.dataset_compliance() self.assertEqual(dc_output, dict()) @@ -291,7 +291,7 @@ def test_standard_names_validation_noncompliant_field(self): "reason": expected_reason, } - f = self.bad_general_sn_f + f = self.bad_snames_general_field dc_output = f.dataset_compliance() # SLB DEV @@ -389,11 +389,11 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # TODO see from below that not all bad names gte set - but want # that, so should update create_test_files method to set on all # for bad case. - with Dataset("ugrid_1_bad_names.nc", "r+") as nc: - field_all_varnames = list(nc.variables.keys()) - print("VERIFY") - for varname, var in nc.variables.items(): - print(varname, getattr(var, "standard_name", "No standard_name")) + # with Dataset("ugrid_1_bad_names.nc", "r+") as nc: + # field_all_varnames = list(nc.variables.keys()) + # print("VERIFY") + # for varname, var in nc.variables.items(): + # print(varname, getattr(var, "standard_name", "No standard_name")) from pprint import pprint pprint(dc_output_1) From d9f00bd9134af08ad1436272ac42775d91c1cfca Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 5 Sep 2025 18:04:53 +0100 Subject: [PATCH 47/97] Update compliance checking test to mark UGRID present failures --- cfdm/read_write/netcdf/netcdfread.py | 30 ++++++++++ cfdm/test/test_compliance_checking.py | 80 +++++++++++++-------------- 2 files changed, 70 insertions(+), 40 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index ebaa47b18..b42f3efde 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -10053,6 +10053,13 @@ def _ugrid_parse_location_index_set(self, parent_attributes): # The location index set has already been parsed return + ncvar_attrs = g["variable_attributes"][location_index_set_ncvar] + self._check_standard_names( + location_index_set_ncvar, + location_index_set_ncvar, + ncvar_attrs, + ) + if not self._ugrid_check_location_index_set(location_index_set_ncvar): return @@ -10123,6 +10130,14 @@ def _ugrid_create_auxiliary_coordinates( # coordinates. E.g. ("Mesh2_node_x", "Mesh2_node_y") nodes_ncvar = mesh.coordinates_ncvar["node"] + # g = self.read_vars + # ncvar_attrs = g["variable_attributes"][nodes_ncvar] + # self._check_standard_names( + # parent_ncvar, + # nodes_ncvar, + # ncvar_attrs, + # ) + # Get the netCDF variable names of the cell # coordinates. E.g. ("Mesh1_face_x", "Mesh1_face_y"), or None # if there aren't any. @@ -10176,6 +10191,13 @@ def _ugrid_create_auxiliary_coordinates( mesh, location, ) + g = self.read_vars + ncvar_attrs = g["variable_attributes"][node_ncvar] + self._check_standard_names( + parent_ncvar, + node_ncvar, + ncvar_attrs, + ) self.implementation.nc_set_node_coordinate_variable( aux, node_ncvar @@ -10350,6 +10372,14 @@ def _ugrid_create_domain_topology(self, parent_ncvar, f, mesh, location): # appropriate connectivity attribute return + g = self.read_vars + ncvar_attrs = g["variable_attributes"][connectivity_ncvar] + self._check_standard_names( + parent_ncvar, + connectivity_ncvar, + ncvar_attrs, + ) + if not self._ugrid_check_connectivity_variable( parent_ncvar, mesh.mesh_ncvar, diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 7d8a11188..fde4fb486 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -303,7 +303,7 @@ def test_standard_names_validation_noncompliant_field(self): noncompliance = dc_output["ta"]["non-compliance"] expected_keys = [ - # POSSIBLY SOLVED, ATTRIBUTE FIX itself? "ta", + # itself? "ta", # fails "atmosphere_hybrid_height_coordinate", "atmosphere_hybrid_height_coordinate_bounds", "latitude_1", @@ -406,20 +406,20 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # itself? "pa", # not for this field "v", # not for this field "ta", - "time", - "time_bounds", + # fails "time", + # fails "time_bounds", "Mesh2", - "Mesh2_node_x", # aka longitude? - "Mesh2_node_y", # aka latitude? - "Mesh2_face_x", # ... etc. - "Mesh2_face_y", - "Mesh2_edge_x", - "Mesh2_edge_y", - "Mesh2_face_nodes", - "Mesh2_edge_nodes", - "Mesh2_face_edges", - "Mesh2_face_links", - "Mesh2_edge_face_links", + # fails "Mesh2_node_x", # aka longitude? + # fails "Mesh2_node_y", # aka latitude? + # fails "Mesh2_face_x", # ... etc. + # fails "Mesh2_face_y", + # fails "Mesh2_edge_x", + # fails "Mesh2_edge_y", + # fails "Mesh2_face_nodes", + # fails "Mesh2_edge_nodes", + # fails "Mesh2_face_edges", + # fails "Mesh2_face_links", + # fails "Mesh2_edge_face_links", ] for varname in expected_keys: noncompl_dict = noncompliance.get(varname) @@ -463,20 +463,20 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # itself? "ta", # not for this field "pa", # not for this field "v", - "time", - "time_bounds", + # fails "time", + # fails "time_bounds", "Mesh2", - "Mesh2_node_x", # aka longitude? - "Mesh2_node_y", # aka latitude? - "Mesh2_face_x", # ... etc. - "Mesh2_face_y", - "Mesh2_edge_x", - "Mesh2_edge_y", - "Mesh2_face_nodes", - "Mesh2_edge_nodes", - "Mesh2_face_edges", - "Mesh2_face_links", - "Mesh2_edge_face_links", + # fails "Mesh2_node_x", # aka longitude? + # fails "Mesh2_node_y", # aka latitude? + # fails "Mesh2_face_x", # ... etc. + # fails "Mesh2_face_y", + # fails "Mesh2_edge_x", + # fails "Mesh2_edge_y", + # fails "Mesh2_face_nodes", + # fails "Mesh2_edge_nodes", + # fails "Mesh2_face_edges", + # fails "Mesh2_face_links", + # fails "Mesh2_edge_face_links", ] for varname in expected_keys: noncompl_dict = noncompliance.get(varname) @@ -520,20 +520,20 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # itself? "v", # not for this field "ta", # not for this field "pa", - "time", - "time_bounds", + # fails "time", + # fails "time_bounds", "Mesh2", - "Mesh2_node_x", # aka longitude? - "Mesh2_node_y", # aka latitude? - "Mesh2_face_x", # ... etc. - "Mesh2_face_y", - "Mesh2_edge_x", - "Mesh2_edge_y", - "Mesh2_face_nodes", - "Mesh2_edge_nodes", - "Mesh2_face_edges", - "Mesh2_face_links", - "Mesh2_edge_face_links", + # fails "Mesh2_node_x", # aka longitude? + # fails "Mesh2_node_y", # aka latitude? + # fails "Mesh2_face_x", # ... etc. + # fails "Mesh2_face_y", + # fails "Mesh2_edge_x", + # fails "Mesh2_edge_y", + # fails "Mesh2_face_nodes", + # fails "Mesh2_edge_nodes", + # fails "Mesh2_face_edges", + # fails "Mesh2_face_links", + # fails "Mesh2_edge_face_links", ] for varname in expected_keys: noncompl_dict = noncompliance.get(varname) From 74a01cabe96fb28cd43d2acde2759b1bd62b4f19 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 3 Oct 2025 23:59:10 +0100 Subject: [PATCH 48/97] Tidy including removing deprecated TODOs --- cfdm/read_write/netcdf/netcdfread.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index b42f3efde..80746b876 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8382,8 +8382,9 @@ def _check_standard_names( attribute_value = { f"{ncvar}:{sn_attr}": sn_value } - # TODO downgrade status to info/debug - logger.warning(f"%%%%%% Got sn_value of {sn_value}") + logger.debug( + f"Found a {sn_attr} of '{sn_value}' on {ncvar}" + ) if not sn_value: continue @@ -8392,7 +8393,7 @@ def _check_standard_names( # 2. Check, if requested, if is a string # TODO this is not a robust check (may have numpy string type) - # but good enough for now whilts developing + # but good enough for now whilst developing if check_is_string and not isinstance(sn_value, str): invalid_sn_found = True self._add_message( @@ -8406,9 +8407,7 @@ def _check_standard_names( conformance="3.3.requirement.1", ) - # 3. TODO implement check_is_in_custom_list for custom list check. - # noting that the custom list must contain only valid standard - # names appropriate to the context, else it defeats the point! + # 3. Check, if requested, that the SN is in the custom list given elif ( check_is_in_custom_list and sn_value not in check_is_in_custom_list @@ -8425,7 +8424,6 @@ def _check_standard_names( ), ) - # 4. Check, if requested, if string is in the list of valid names elif ( check_is_in_table and sn_value not in @@ -10795,7 +10793,6 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): elif topology_dimension == 3: ncvar = attributes.get("volume_node_connectivity") - if ncvar is None: self._add_message( mesh_ncvar, From 44af20c36e47938357ea47d177a86d7f5296b32a Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 6 Oct 2025 16:36:59 +0100 Subject: [PATCH 49/97] Make string-type checking include NumPy string types --- cfdm/read_write/netcdf/netcdfread.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 80746b876..49a4738ad 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8362,8 +8362,7 @@ def _check_standard_names( (computed_)standard_name was found. """ - # TODO downgrade status to info/debug - logger.warning(f"Running _check_standard_names() for: {ncvar}") + logger.debug(f"Running _check_standard_names() for: {ncvar}") if check_is_in_custom_list and check_is_in_table: raise ValueError( @@ -8391,10 +8390,10 @@ def _check_standard_names( any_sn_found = True - # 2. Check, if requested, if is a string - # TODO this is not a robust check (may have numpy string type) - # but good enough for now whilst developing - if check_is_string and not isinstance(sn_value, str): + # 2. Check, if requested, if name is a native or numpy string type + if check_is_string and not ( + isinstance(sn_value, (str, np.str_, np.bytes_)) + ): invalid_sn_found = True self._add_message( parent_ncvar, From fb6c66ca3ae39833d93baeb8efc477057879af14 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 6 Oct 2025 18:01:09 +0100 Subject: [PATCH 50/97] Prevent duplicate dict in dataset_compliance for UGRID fields --- cfdm/read_write/netcdf/netcdfread.py | 43 ++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 49a4738ad..c5ba0d79c 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -42,6 +42,9 @@ ) from .zarr import ZarrDimension +from pprint import pformat, pprint # DEBUG + + logger = logging.getLogger(__name__) _cached_temporary_files = {} @@ -4266,8 +4269,6 @@ def _create_field_or_domain( f"for {field_ncvar!r}." ) if is_log_level_debug(logger): - from pprint import pformat - logger.debug( f"Mesh dictionary is: {pformat(g['mesh'])}" ) @@ -5539,9 +5540,15 @@ def _add_message( "non-compliance": {}, }, ) - g["dataset_compliance"][parent_ncvar]["non-compliance"].setdefault( + + noncomp = g["dataset_compliance"][parent_ncvar][ + "non-compliance"].setdefault( ncvar, [] - ).append(d) + ) + # d may already be registered in the list, so don't add twice + # TODO but that could be a bug in the _add_message use - check. + if d not in noncomp: + noncomp.append(d) e = g["component_report"].setdefault(variable, {}) e.setdefault(ncvar, []).append(d) @@ -8915,6 +8922,7 @@ def _check_auxiliary_or_scalar_coordinate( g = self.read_vars coord_ncvar_attrs = g["variable_attributes"][coord_ncvar] + pprint(coord_ncvar_attrs) self._check_standard_names( parent_ncvar, coord_ncvar, @@ -9877,6 +9885,7 @@ def _netCDF4_group(self, nc, name): return group, path[-1] + # N - is umbrella one, so maybe not? def _ugrid_parse_mesh_topology(self, mesh_ncvar, attributes): """Parse a UGRID mesh topology or location index set variable. @@ -9951,6 +9960,13 @@ def _ugrid_parse_mesh_topology(self, mesh_ncvar, attributes): else: ncvar = attributes.get(f"{location}_node_connectivity") + ncvar_attrs = g["variable_attributes"][ncvar] + self._check_standard_names( + mesh_ncvar, + ncvar, + ncvar_attrs, + ) + ncdim = self.read_vars["variable_dimensions"].get(ncvar) if ncdim is None: continue @@ -10023,6 +10039,7 @@ def _ugrid_parse_mesh_topology(self, mesh_ncvar, attributes): g["mesh"][mesh_ncvar] = mesh + # Y def _ugrid_parse_location_index_set(self, parent_attributes): """Parse a UGRID location index set variable. @@ -10086,6 +10103,7 @@ def _ugrid_parse_location_index_set(self, parent_attributes): mesh_id=uuid4().hex, ) + # Y def _ugrid_create_auxiliary_coordinates( self, parent_ncvar, @@ -10127,14 +10145,6 @@ def _ugrid_create_auxiliary_coordinates( # coordinates. E.g. ("Mesh2_node_x", "Mesh2_node_y") nodes_ncvar = mesh.coordinates_ncvar["node"] - # g = self.read_vars - # ncvar_attrs = g["variable_attributes"][nodes_ncvar] - # self._check_standard_names( - # parent_ncvar, - # nodes_ncvar, - # ncvar_attrs, - # ) - # Get the netCDF variable names of the cell # coordinates. E.g. ("Mesh1_face_x", "Mesh1_face_y"), or None # if there aren't any. @@ -10209,6 +10219,7 @@ def _ugrid_create_auxiliary_coordinates( mesh.auxiliary_coordinates[location] = auxs return auxs + # N def _ugrid_create_bounds_from_nodes( self, parent_ncvar, @@ -10321,6 +10332,7 @@ def _ugrid_create_bounds_from_nodes( return aux + # Y def _ugrid_create_domain_topology(self, parent_ncvar, f, mesh, location): """Create a domain topology construct. @@ -10451,6 +10463,7 @@ def _ugrid_create_domain_topology(self, parent_ncvar, f, mesh, location): return domain_topology + # N def _ugrid_create_cell_connectivities( self, parent_ncvar, f, mesh, location ): @@ -10551,6 +10564,7 @@ def _ugrid_create_cell_connectivities( return [connectivity] + # N def _ugrid_cell_dimension(self, location, connectivity_ncvar, mesh): """The connectivity variable dimension that indexes the cells. @@ -10588,6 +10602,7 @@ def _ugrid_cell_dimension(self, location, connectivity_ncvar, mesh): return cell_dim + # Y def _ugrid_check_mesh_topology(self, mesh_ncvar): """Check a UGRID mesh topology variable. @@ -10850,6 +10865,7 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): return ok + # Y def _ugrid_check_location_index_set( self, location_index_set_ncvar, @@ -10954,6 +10970,7 @@ def _ugrid_check_location_index_set( return ok + # Y def _ugrid_check_field_location_index_set( self, parent_ncvar, @@ -11091,6 +11108,7 @@ def _ugrid_check_field_location_index_set( self._include_component_report(parent_ncvar, location_index_set_ncvar) return ok + # Y def _ugrid_check_field_mesh( self, parent_ncvar, @@ -11178,6 +11196,7 @@ def _ugrid_check_field_mesh( self._include_component_report(parent_ncvar, mesh_ncvar) return ok + # Y def _ugrid_check_connectivity_variable( self, parent_ncvar, mesh_ncvar, connectivity_ncvar, connectivity_attr ): From 4b7d61a7b7ad0a0c5a77d83b69d62558c0795747 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 7 Oct 2025 02:32:46 +0100 Subject: [PATCH 51/97] Fix bug in netcfread._include_component_report causing bad entry --- cfdm/read_write/netcdf/netcdfread.py | 9 ++++++++- cfdm/test/test_compliance_checking.py | 25 ++++++++----------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index c5ba0d79c..b573095bf 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5549,6 +5549,8 @@ def _add_message( # TODO but that could be a bug in the _add_message use - check. if d not in noncomp: noncomp.append(d) + print("SLB DEBUG") + pprint(d) e = g["component_report"].setdefault(variable, {}) e.setdefault(ncvar, []).append(d) @@ -5590,9 +5592,14 @@ def _include_component_report(self, parent_ncvar, ncvar): g = self.read_vars component_report = g["component_report"].get(ncvar) if component_report: + # TODO SLB suspected bug fix from original code, i.e: + # x = g["dataset_compliance"][parent_ncvar]["non-compliance"] + # if ncvar not in x: + # x[ncvar] = [] + # x[ncvar].extend(component_report[ncvar]) g["dataset_compliance"][parent_ncvar]["non-compliance"].setdefault( ncvar, [] - ).extend(component_report) + ).extend(component_report[ncvar]) def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index fde4fb486..dc17e7af2 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -70,7 +70,6 @@ class ComplianceCheckingTest(unittest.TestCase): # TODO set bad names and then write to tempfile and read back in bad_snames_general_field = _create_noncompliant_names_field( good_snames_general_field, tmpfile0) - ### bad_snames_general_field.dump() # SB DEV # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis @@ -274,8 +273,6 @@ def test_standard_names_validation_compliant_field(self): dc_output = f.dataset_compliance() self.assertEqual(dc_output, dict()) - # TODO what else to test on in 'good' case? - def test_standard_names_validation_noncompliant_field(self): """Test compliance checking on a non-compliant non-UGRID field.""" expected_reason = ( @@ -295,8 +292,8 @@ def test_standard_names_validation_noncompliant_field(self): dc_output = f.dataset_compliance() # SLB DEV - # from pprint import pprint - # pprint(dc_output) + from pprint import pprint + pprint(dc_output) # 'ta' is the field variable we test on self.assertIn("non-compliance", dc_output["ta"]) @@ -352,16 +349,12 @@ def test_standard_names_validation_noncompliant_field(self): # on per-key-value checks above self.assertEqual(noncompl_dict, expected_noncompl_dict) - # TODO what else to check here? - def test_standard_names_validation_compliant_ugrid_field(self): """Test compliance checking on a compliant UGRID field.""" f = self.good_ugrid_sn_f dc_output = f.dataset_compliance() self.assertEqual(dc_output, dict()) - # TODO what else to test on in 'good' case? - def test_standard_names_validation_noncompliant_ugrid_fields(self): """Test compliance checking on non-compliant UGRID fields.""" expected_reason = ( @@ -386,14 +379,14 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # SLB DEV # TODO add error to run to say need to run 'create_test_files' - # TODO see from below that not all bad names gte set - but want + # TODO see from below that not all bad names get set - but want # that, so should update create_test_files method to set on all # for bad case. - # with Dataset("ugrid_1_bad_names.nc", "r+") as nc: - # field_all_varnames = list(nc.variables.keys()) - # print("VERIFY") - # for varname, var in nc.variables.items(): - # print(varname, getattr(var, "standard_name", "No standard_name")) + with Dataset("ugrid_1_bad_names.nc", "r+") as nc: + field_all_varnames = list(nc.variables.keys()) + print("VERIFY") + for varname, var in nc.variables.items(): + print(varname, getattr(var, "standard_name", "No standard_name")) from pprint import pprint pprint(dc_output_1) @@ -566,8 +559,6 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # on per-key-value checks above self.assertEqual(noncompl_dict, expected_noncompl_dict) - # TODO what else to check here? - if __name__ == "__main__": print("Run date:", datetime.datetime.now()) From 11de20778976fb50d7e0cf547519da6ca61c82ce Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 8 Oct 2025 02:11:25 +0100 Subject: [PATCH 52/97] Improve naming in _add_message to clarify netCDF variable parentage --- cfdm/read_write/netcdf/netcdfread.py | 76 +++++++++++++++++----------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index b573095bf..226a8d7ca 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5469,12 +5469,12 @@ def _get_geometry(self, field_ncvar, return_ncvar=False): def _add_message( self, - parent_ncvar, + top_ancestor_ncvar, ncvar, + direct_parent_ncvar=None, message=None, attribute=None, dimensions=None, - variable=None, conformance=None, ): """Stores and logs a message about an issue with a field. @@ -5483,19 +5483,44 @@ def _add_message( :Parameters: - parent_ncvar: `str` - The netCDF variable name of the parent variable. + top_ancestor_ncvar: `str` + The netCDF variable name of the ancestor variable + under which to register the component problem at the + top level. + + This is usually the parent variable of the variable + `ncvar` which has the component problem, but may be + a higher parent e.g. grandparent variable, when there is + an intermediate parent variable in which the problem + should also be registered using `direct_parent_ncvar`, + or `ncvar` itself, where no parent variable exists or + is relevant. *Parameter example:* ``'tas'`` ncvar: `str` - The netCDF variable name of the parent component that + The netCDF variable name with the component that has the problem. *Parameter example:* ``'rotated_latitude_longitude'`` + direct_parent_ncvar: `str` or `None`, optional + The netCDF variable name of the variable which is the + direct parent of the variable `ncvar` which has the + component problem, only to be provided if a higher + parent such as a grandparent variable is set as + the `top_ancestor_ncvar` where it is also important + to register the problem on the direct parent. + + If `None`, the default, then the problem is not + not registered for any further (parent) variable + than `top_ancestor_ncvar`. + + *Parameter example:* + ``'time'`` + message: (`str`, `str`), optional attribute: `dict`, optional @@ -5510,8 +5535,6 @@ def _add_message( *Parameter example:* ``dimensions=('lat', 'lon')`` - variable: `str`, optional - """ g = self.read_vars @@ -5530,30 +5553,28 @@ def _add_message( if dimensions is not None: d["dimensions"] = dimensions - if variable is None: - variable = ncvar + if direct_parent_ncvar is None: + direct_parent_ncvar = ncvar g["dataset_compliance"].setdefault( - parent_ncvar, + top_ancestor_ncvar, { "CF version": self.implementation.get_cf_version(), "non-compliance": {}, }, ) - noncomp = g["dataset_compliance"][parent_ncvar][ - "non-compliance"].setdefault( - ncvar, [] - ) - # d may already be registered in the list, so don't add twice - # TODO but that could be a bug in the _add_message use - check. - if d not in noncomp: - noncomp.append(d) - print("SLB DEBUG") - pprint(d) + g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( + ncvar, [] + ).append(d) - e = g["component_report"].setdefault(variable, {}) - e.setdefault(ncvar, []).append(d) + # Only add a component report if there is need i.e. if the direct + # parent ncvar is not the same as the top_ancestor_ncvar + if direct_parent_ncvar != top_ancestor_ncvar: + e = g["component_report"].setdefault(direct_parent_ncvar, {}) + # print("cr is before:", g["component_report"]) + e.setdefault(ncvar, []).append(d) + # print("cr is after:", g["component_report"]) if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover @@ -5581,8 +5602,8 @@ def _include_component_report(self, parent_ncvar, ncvar): ``'tas'`` ncvar: `str` - The netCDF variable name of the parent component that - has the problem. + The netCDF variable name of the variable with + the component that has the problem. :Returns: @@ -5592,14 +5613,9 @@ def _include_component_report(self, parent_ncvar, ncvar): g = self.read_vars component_report = g["component_report"].get(ncvar) if component_report: - # TODO SLB suspected bug fix from original code, i.e: - # x = g["dataset_compliance"][parent_ncvar]["non-compliance"] - # if ncvar not in x: - # x[ncvar] = [] - # x[ncvar].extend(component_report[ncvar]) g["dataset_compliance"][parent_ncvar]["non-compliance"].setdefault( ncvar, [] - ).extend(component_report[ncvar]) + ).append(component_report) def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. From dbbd929c41e68ef1798ac92a944433a7b707748f Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 10 Oct 2025 16:00:03 +0100 Subject: [PATCH 53/97] Update arg. naming in _check_standard_names to mirror _add_message --- cfdm/read_write/netcdf/netcdfread.py | 93 ++++++++++++++++++---------- 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 226a8d7ca..1e890382a 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -3635,7 +3635,7 @@ def _check_formula_terms( "is incorrectly formatted", ), attribute=attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) for x in parsed_bounds_formula_terms: @@ -3652,7 +3652,7 @@ def _check_formula_terms( "is incorrectly formatted", ), attribute=bounds_attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) continue @@ -3668,7 +3668,7 @@ def _check_formula_terms( ncvar, message=message, attribute=bounds_attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) continue @@ -3681,7 +3681,7 @@ def _check_formula_terms( "has incompatible terms", ), attribute=bounds_attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) continue @@ -3705,7 +3705,7 @@ def _check_formula_terms( "coordinate variable", ), attribute=bounds_attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) continue @@ -3719,7 +3719,7 @@ def _check_formula_terms( ), attribute=bounds_attribute, dimensions=dimensions, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) continue # WRONG - need to account for char arrays: @@ -3733,7 +3733,7 @@ def _check_formula_terms( ), attribute=bounds_attribute, dimensions=dimensions, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) continue @@ -3751,7 +3751,7 @@ def _check_formula_terms( "has incompatible terms", ), attribute=bounds_attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) else: @@ -3804,7 +3804,7 @@ def _check_formula_terms( "has no bounds", ), attribute=attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) def _missing_variable(self, ncvar, message0): @@ -5509,7 +5509,7 @@ def _add_message( direct_parent_ncvar: `str` or `None`, optional The netCDF variable name of the variable which is the direct parent of the variable `ncvar` which has the - component problem, only to be provided if a higher + component problem, *only to be provided* if a higher parent such as a grandparent variable is set as the `top_ancestor_ncvar` where it is also important to register the problem on the direct parent. @@ -5553,9 +5553,6 @@ def _add_message( if dimensions is not None: d["dimensions"] = dimensions - if direct_parent_ncvar is None: - direct_parent_ncvar = ncvar - g["dataset_compliance"].setdefault( top_ancestor_ncvar, { @@ -5569,8 +5566,8 @@ def _add_message( ).append(d) # Only add a component report if there is need i.e. if the direct - # parent ncvar is not the same as the top_ancestor_ncvar - if direct_parent_ncvar != top_ancestor_ncvar: + # parent ncvar is defined so not the same as the top ancestor ncvar + if direct_parent_ncvar: e = g["component_report"].setdefault(direct_parent_ncvar, {}) # print("cr is before:", g["component_report"]) e.setdefault(ncvar, []).append(d) @@ -8323,7 +8320,8 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): # document. # ================================================================ def _check_standard_names( - self, parent_ncvar, ncvar, ncvar_attrs, + self, top_ancestor_ncvar, + ncvar, ncvar_attrs, direct_parent_ncvar=None, check_is_string=True, check_is_in_table=True, check_is_in_custom_list=False, ): @@ -8336,25 +8334,48 @@ def _check_standard_names( custom list which is expected to be a small subset of names from the table. - These checks are in the context of the variable and - parent variable. + These checks are in the context of the variable and at least + one parent variable (though the parent can be set at the variable + itself should no parent exist or be relevant). .. versionadded:: NEXTVERSION :Parameters: - parent_ncvar: `str` - The netCDF variable name of the parent variable. + top_ancestor_ncvar: `str` + The netCDF variable name of the ancestor variable + under which to register the bad standard name at + the top level. + + This is usually the parent variable of the variable + `ncvar` which has the component problem, but may be + a higher parent e.g. grandparent variable, when there is + an intermediate parent variable in which the problem + should also be registered using `direct_parent_ncvar`, + or `ncvar` itself, where no parent variable exists or + is relevant. ncvar: `str` - The name of the netCDF variable to perform the - standard names check upon. + The netCDF variable name with the component that + has the bad standard name. ncvar_attrs: `str` The variable attributes for the netCDF variable, as stored in the 'read_vars' dictionary under the 'variable_attributes' key. + direct_parent_ncvar: `str` or `None`, optional + The netCDF variable name of the variable which is the + direct parent of the variable `ncvar` which has the + bad standard name, *only to be provided* if a higher + parent such as a grandparent variable is set as + the `top_ancestor_ncvar` where it is also important + to register the problem on the direct parent. + + If `None`, the default, then the bad standard name + is not not registered for any further (parent) + variable than `top_ancestor_ncvar`. + check_is_string: `bool` Whether or not to check if the type of the attribute value is a string type. By default this is checked. @@ -8426,7 +8447,7 @@ def _check_standard_names( ): invalid_sn_found = True self._add_message( - parent_ncvar, + top_ancestor_ncvar, ncvar, attribute=attribute_value, message=( @@ -8434,6 +8455,7 @@ def _check_standard_names( f"has a value that is not a string", ), conformance="3.3.requirement.1", + direct_parent_ncvar=direct_parent_ncvar, ) # 3. Check, if requested, that the SN is in the custom list given @@ -8443,7 +8465,7 @@ def _check_standard_names( ): invalid_sn_found = True self._add_message( - parent_ncvar, + top_ancestor_ncvar, ncvar, attribute=attribute_value, message=( @@ -8451,6 +8473,7 @@ def _check_standard_names( f"has a value that is not appropriate to " "the context of the variable in question", ), + direct_parent_ncvar=direct_parent_ncvar, ) # 4. Check, if requested, if string is in the list of valid names @@ -8464,7 +8487,7 @@ def _check_standard_names( f"'{sn_value}' for {ncvar}" ) self._add_message( - parent_ncvar, + top_ancestor_ncvar, ncvar, message=( f"{sn_attr} attribute", @@ -8473,6 +8496,7 @@ def _check_standard_names( ), attribute=attribute_value, conformance="3.3.requirement.2", + direct_parent_ncvar=direct_parent_ncvar, ) # Three possible return signatures to cover existence and validity: @@ -8546,7 +8570,7 @@ def _check_bounds( bounds_ncvar, message=message, attribute=attribute, - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) return False @@ -8563,7 +8587,7 @@ def _check_bounds( message=incorrect_dimensions, attribute=attribute, dimensions=g["variable_dimensions"][bounds_ncvar], - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) ok = False @@ -8574,7 +8598,7 @@ def _check_bounds( message=incorrect_dimensions, attribute=attribute, dimensions=g["variable_dimensions"][bounds_ncvar], - variable=coord_ncvar, + direct_parent_ncvar=coord_ncvar, ) ok = False @@ -8628,7 +8652,7 @@ def _check_geometry_node_coordinates( node_ncvar, message=message, attribute=attribute, - variable=field_ncvar, + direct_parent_ncvar=field_ncvar, ) return False @@ -8643,7 +8667,7 @@ def _check_geometry_node_coordinates( "not in node_coordinates", ), attribute=attribute, - variable=field_ncvar, + direct_parent_ncvar=field_ncvar, ) ok = False @@ -10972,7 +10996,7 @@ def _ugrid_check_location_index_set( mesh_ncvar, message=message, attribute={f"{location_index_set_ncvar}:mesh": mesh_ncvar}, - variable=location_index_set_ncvar, + direct_parent_ncvar=location_index_set_ncvar, ) ok = False elif mesh_ncvar not in g["mesh"]: @@ -11094,7 +11118,7 @@ def _ugrid_check_field_location_index_set( mesh_ncvar, message=message, attribute={f"{location_index_set_ncvar}:mesh": mesh_ncvar}, - variable=location_index_set_ncvar, + direct_parent_ncvar=location_index_set_ncvar, ) ok = False elif mesh_ncvar not in g["mesh"]: @@ -11260,7 +11284,7 @@ def _ugrid_check_connectivity_variable( parent_ncvar, connectivity_ncvar, message=(f"{connectivity_attr} attribute", "is missing"), - variable=mesh_ncvar, + direct_parent_ncvar=mesh_ncvar, ) return False elif connectivity_ncvar not in g["internal_variables"]: @@ -11274,7 +11298,7 @@ def _ugrid_check_connectivity_variable( attribute={ f"{mesh_ncvar}:{connectivity_attr}": connectivity_ncvar }, - variable=mesh_ncvar, + direct_parent_ncvar=mesh_ncvar, ) return False else: @@ -11283,6 +11307,7 @@ def _ugrid_check_connectivity_variable( parent_ncvar, connectivity_ncvar, ncvar_attrs, + direct_parent_ncvar=mesh_ncvar, ) parent_ncdims = self._ncdimensions(parent_ncvar) From cc04b7da4a362a7a5fb92c416ce87d6eea3aa6d7 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 30 Oct 2025 15:51:56 +0000 Subject: [PATCH 54/97] Update structure of dataset_compliance to include attrs as keys --- cfdm/read_write/netcdf/netcdfread.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 1e890382a..12e84dbc5 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5562,16 +5562,22 @@ def _add_message( ) g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( - ncvar, [] + ncvar, [] ).append(d) # Only add a component report if there is need i.e. if the direct # parent ncvar is defined so not the same as the top ancestor ncvar if direct_parent_ncvar: + # Dicts are optimised for key-value lookup, but this requires + # value-key lookup - is there a better way? + varattrs = g["variable_attributes"][top_ancestor_ncvar] + reverse_varattrs = {v: k for k, v in varattrs.items()} + store_attr = reverse_varattrs[ncvar] + e = g["component_report"].setdefault(direct_parent_ncvar, {}) - # print("cr is before:", g["component_report"]) - e.setdefault(ncvar, []).append(d) - # print("cr is after:", g["component_report"]) + # Intermediate key in dict is the attr of relevance + e2 = e.setdefault(store_attr, {}) + e2.setdefault(ncvar, []).append(d) if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover From 3d1a2fc6ea037c111976fa6c43b175ddfaeda90d Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 31 Oct 2025 18:08:05 +0000 Subject: [PATCH 55/97] Update further data compliance structure to add mesh level --- cfdm/read_write/netcdf/netcdfread.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 12e84dbc5..9eb0994b1 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5614,9 +5614,15 @@ def _include_component_report(self, parent_ncvar, ncvar): """ g = self.read_vars + component_report = g["component_report"].get(ncvar) if component_report: - g["dataset_compliance"][parent_ncvar]["non-compliance"].setdefault( + set_on = g["dataset_compliance"][parent_ncvar]["non-compliance"] + if g["mesh"]: + set_on = set_on.setdefault( + "mesh", {} + ) + set_on.setdefault( ncvar, [] ).append(component_report) From 3afa2a89f608238f21c289cee17a846440792563 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 3 Nov 2025 15:49:25 +0000 Subject: [PATCH 56/97] Change data compliance structure to have attributes as keys --- cfdm/read_write/netcdf/netcdfread.py | 39 ++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 9eb0994b1..b7fdbda95 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5548,22 +5548,29 @@ def _add_message( else: code = None - d = {"code": code, "attribute": attribute, "reason": message} + # SLB DEV + # Temporary processing whilst get dataset_compliance data structure + # updated - after this we will change attribute argument inputs + # so arguments are made separately + attribute_key = next(iter(attribute)) + var_name, attribute_name = attribute_key.split(":") + attribute_value = attribute[attribute_key] + d = {"code": code, "value": attribute_value, "reason": message} if dimensions is not None: d["dimensions"] = dimensions - g["dataset_compliance"].setdefault( - top_ancestor_ncvar, - { + noncompliance_dict = { "CF version": self.implementation.get_cf_version(), "non-compliance": {}, - }, + } + g["dataset_compliance"].setdefault( + top_ancestor_ncvar, noncompliance_dict ) g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( - ncvar, [] - ).append(d) + attribute_name, [] ### ncvar, [] + ).append(d) ###{attribute_name: d}) # Only add a component report if there is need i.e. if the direct # parent ncvar is defined so not the same as the top ancestor ncvar @@ -5574,10 +5581,26 @@ def _add_message( reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] + # SADIE + print( + "ARGS ARE", + top_ancestor_ncvar, + ncvar, + direct_parent_ncvar, + message, + attribute, + dimensions, + conformance, + ) + parent_ncdims = self._ncdimensions(top_ancestor_ncvar) + print("NDIMS ARE", parent_ncdims) + #u = _ugrid_check_connectivity_variable() + #print("CONN VAR IS", u) + e = g["component_report"].setdefault(direct_parent_ncvar, {}) # Intermediate key in dict is the attr of relevance e2 = e.setdefault(store_attr, {}) - e2.setdefault(ncvar, []).append(d) + e2.setdefault(attribute_name, []).append(d) # var_name:d here dupes if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover From 218235a406e12fb2c546e6bfb9ebfcddcaa59baf Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 3 Nov 2025 16:15:57 +0000 Subject: [PATCH 57/97] Change data compliance structure to store nested code & values --- cfdm/read_write/netcdf/netcdfread.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index b7fdbda95..f68d66219 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5639,15 +5639,27 @@ def _include_component_report(self, parent_ncvar, ncvar): g = self.read_vars component_report = g["component_report"].get(ncvar) + + # SLB need these to be passed through somehow! + code = 0 + attribute_value = None + # SLB rename this 'd' from _add_message to something better + mesh_d = d = { + "code": code, "value": attribute_value, "reason": None + } + ### print("++++++++++++++COMP REPORT", component_report) if component_report: set_on = g["dataset_compliance"][parent_ncvar]["non-compliance"] if g["mesh"]: set_on = set_on.setdefault( - "mesh", {} + "mesh", mesh_d ) - set_on.setdefault( - ncvar, [] - ).append(component_report) + ###print("SET ON IS", set_on) + set_on["reason"] = {ncvar: component_report} + else: + set_on.setdefault( + ncvar, [] + ).append(component_report) def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. From 7e3e70a14c9873bf08e860fdc9fffcbc44ebb17a Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 3 Nov 2025 20:41:29 +0000 Subject: [PATCH 58/97] Update further data compliance structure to nest UGRI mesh info --- cfdm/read_write/netcdf/netcdfread.py | 29 +++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index f68d66219..dea79f8f4 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5593,14 +5593,10 @@ def _add_message( conformance, ) parent_ncdims = self._ncdimensions(top_ancestor_ncvar) - print("NDIMS ARE", parent_ncdims) - #u = _ugrid_check_connectivity_variable() - #print("CONN VAR IS", u) e = g["component_report"].setdefault(direct_parent_ncvar, {}) - # Intermediate key in dict is the attr of relevance - e2 = e.setdefault(store_attr, {}) - e2.setdefault(attribute_name, []).append(d) # var_name:d here dupes + e2 = e.setdefault(store_attr, noncompliance_dict) + e2["non-compliance"] = {attribute_name: d} if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover @@ -5640,22 +5636,23 @@ def _include_component_report(self, parent_ncvar, ncvar): component_report = g["component_report"].get(ncvar) - # SLB need these to be passed through somehow! + # SLB need these to be passed through somehow! Code irrelevant, though code = 0 - attribute_value = None + attribute_value = "DUMMY" # SLB rename this 'd' from _add_message to something better - mesh_d = d = { - "code": code, "value": attribute_value, "reason": None + value_d = { + "code": code, "value": attribute_value, "reason": {} + } + noncompliance_dict = { + "CF version": self.implementation.get_cf_version(), + "non-compliance": {}, } - ### print("++++++++++++++COMP REPORT", component_report) if component_report: set_on = g["dataset_compliance"][parent_ncvar]["non-compliance"] if g["mesh"]: - set_on = set_on.setdefault( - "mesh", mesh_d - ) - ###print("SET ON IS", set_on) - set_on["reason"] = {ncvar: component_report} + s1 = set_on.setdefault("mesh", value_d) + s2 = s1["reason"].setdefault(ncvar, noncompliance_dict) + s2["non-compliance"] = component_report else: set_on.setdefault( ncvar, [] From 60f908402ee9623ed4524a2d618483af07ce6eaf Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 3 Nov 2025 21:03:30 +0000 Subject: [PATCH 59/97] Allow passing of attrs & dims into _include_component_report --- cfdm/read_write/netcdf/netcdfread.py | 43 +++++++++++++++++++++------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index dea79f8f4..df89592b2 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5610,7 +5610,8 @@ def _add_message( return d - def _include_component_report(self, parent_ncvar, ncvar): + def _include_component_report( + self, parent_ncvar, ncvar, attribute, dimensions=None): """Include a component in the dataset compliance report. .. versionadded:: (cfdm) 1.11.0.0 @@ -5627,6 +5628,18 @@ def _include_component_report(self, parent_ncvar, ncvar): The netCDF variable name of the variable with the component that has the problem. + attribute: `dict` + The name and value of the netCDF attribute that has a problem. + + *Parameter example:* + ``attribute={'tas:cell_measures': 'area: areacella'}`` + + dimensions: sequence of `str`, optional + The netCDF dimensions of the variable that has a problem. + + *Parameter example:* + ``dimensions=('lat', 'lon')`` + :Returns: `None` @@ -5636,21 +5649,22 @@ def _include_component_report(self, parent_ncvar, ncvar): component_report = g["component_report"].get(ncvar) - # SLB need these to be passed through somehow! Code irrelevant, though - code = 0 - attribute_value = "DUMMY" # SLB rename this 'd' from _add_message to something better - value_d = { - "code": code, "value": attribute_value, "reason": {} - } + # SLB Note: have dropped 'code' because it doesn't make sense to + # register a code except at the lowest level... + d = {"value": attribute, "reason": {}} + if dimensions is not None: + d["dimensions"] = dimensions + noncompliance_dict = { "CF version": self.implementation.get_cf_version(), "non-compliance": {}, } + if component_report: set_on = g["dataset_compliance"][parent_ncvar]["non-compliance"] if g["mesh"]: - s1 = set_on.setdefault("mesh", value_d) + s1 = set_on.setdefault("mesh", d) s2 = s1["reason"].setdefault(ncvar, noncompliance_dict) s2["non-compliance"] = component_report else: @@ -11196,7 +11210,12 @@ def _ugrid_check_field_location_index_set( ) ok = False - self._include_component_report(parent_ncvar, location_index_set_ncvar) + # SLB check attribute in question is always "location_index_set" here + # an verify whether dimensions should be registered here + self._include_component_report( + parent_ncvar, location_index_set_ncvar, "location_index_set", + dimensions=g["variable_dimensions"][location_index_set_ncvar] + ) return ok # Y @@ -11284,7 +11303,11 @@ def _ugrid_check_field_mesh( ) ok = False - self._include_component_report(parent_ncvar, mesh_ncvar) + # SLB check attribute in question is always "mesh" here -> + # looks like it could be "location" in some cases? No dims it seems? + self._include_component_report( + parent_ncvar, mesh_ncvar, "mesh" + ) return ok # Y From a4ebf667c8515742b2b7274d451639c783687d74 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 3 Nov 2025 21:20:32 +0000 Subject: [PATCH 60/97] Get UGRID mesh checking non-compliance output as desired --- cfdm/read_write/netcdf/netcdfread.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index df89592b2..7f1c931e3 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5594,9 +5594,10 @@ def _add_message( ) parent_ncdims = self._ncdimensions(top_ancestor_ncvar) - e = g["component_report"].setdefault(direct_parent_ncvar, {}) - e2 = e.setdefault(store_attr, noncompliance_dict) - e2["non-compliance"] = {attribute_name: d} + e = g["component_report"].setdefault( + direct_parent_ncvar, noncompliance_dict) + e2 = e.setdefault(store_attr, d) + e2["reason"] = {ncvar: d.copy()} if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover From 7fdfd80469990744c789dfe15fee79ae2846d7b6 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 4 Nov 2025 01:31:44 +0000 Subject: [PATCH 61/97] Update further data compliance structure to store multiple reasons --- cfdm/read_write/netcdf/netcdfread.py | 50 ++++++++++++++++++---------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 7f1c931e3..4cf75e2bd 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5555,6 +5555,7 @@ def _add_message( attribute_key = next(iter(attribute)) var_name, attribute_name = attribute_key.split(":") attribute_value = attribute[attribute_key] + pre_d = {"code": code, "value": attribute_value, "reason": {}} d = {"code": code, "value": attribute_value, "reason": message} if dimensions is not None: @@ -5567,37 +5568,48 @@ def _add_message( g["dataset_compliance"].setdefault( top_ancestor_ncvar, noncompliance_dict ) - - g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( - attribute_name, [] ### ncvar, [] - ).append(d) ###{attribute_name: d}) - + ###print("READ VARS ARE", g) # Only add a component report if there is need i.e. if the direct # parent ncvar is defined so not the same as the top ancestor ncvar if direct_parent_ncvar: + g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( + attribute_name, [] + ).append(d) + # Dicts are optimised for key-value lookup, but this requires # value-key lookup - is there a better way? varattrs = g["variable_attributes"][top_ancestor_ncvar] reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] - # SADIE - print( - "ARGS ARE", - top_ancestor_ncvar, - ncvar, - direct_parent_ncvar, - message, - attribute, - dimensions, - conformance, - ) + # SLB DEV ARGS + # print( + # "ARGS ARE", + # top_ancestor_ncvar, + # ncvar, + # direct_parent_ncvar, + # message, + # attribute, + # dimensions, + # conformance, + # ) parent_ncdims = self._ncdimensions(top_ancestor_ncvar) e = g["component_report"].setdefault( direct_parent_ncvar, noncompliance_dict) - e2 = e.setdefault(store_attr, d) - e2["reason"] = {ncvar: d.copy()} + e2 = e.setdefault(store_attr, pre_d) + e2["reason"][ncvar] = d + else: + ### print("NON DIRECT PARENT CASE:", ncvar, top_ancestor_ncvar, d) + g1 = g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( + ncvar, pre_d + ) + g1["reason"].setdefault(attribute_name, []).append(d) + # SLB NEW: this shows there are missing parts from the dict! + # Some things aren't being added to the component report when + # should be... + # e = g["component_report"].setdefault( + # top_ancestor_ncvar, noncompliance_dict) if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover @@ -5653,6 +5665,7 @@ def _include_component_report( # SLB rename this 'd' from _add_message to something better # SLB Note: have dropped 'code' because it doesn't make sense to # register a code except at the lowest level... + # SLB NOTE reason is a dict, not a list! d = {"value": attribute, "reason": {}} if dimensions is not None: d["dimensions"] = dimensions @@ -5669,6 +5682,7 @@ def _include_component_report( s2 = s1["reason"].setdefault(ncvar, noncompliance_dict) s2["non-compliance"] = component_report else: + # Never used? Chage up method to be mesh specific, then? set_on.setdefault( ncvar, [] ).append(component_report) From 03559fcab471203c5fb55d086f6d8091a4b2076f Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 4 Nov 2025 01:35:58 +0000 Subject: [PATCH 62/97] Change key name in dataset_compliance to 'attributes' for clarity --- cfdm/read_write/netcdf/netcdfread.py | 20 ++++++++++---------- cfdm/test/test_compliance_checking.py | 16 ++++++++-------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 4cf75e2bd..f558ef892 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1328,7 +1328,7 @@ def read( "verbose": verbose, # Warnings? "warnings": warnings, - "dataset_compliance": {None: {"non-compliance": {}}}, + "dataset_compliance": {None: {"attributes": {}}}, "component_report": {}, "auxiliary_coordinate": {}, "cell_measure": {}, @@ -3895,7 +3895,7 @@ def _create_field_or_domain( "CF version" ] = self.implementation.get_cf_version() g["dataset_compliance"][field_ncvar]["dimensions"] = dimensions - g["dataset_compliance"][field_ncvar].setdefault("non-compliance", {}) + g["dataset_compliance"][field_ncvar].setdefault("attributes", {}) logger.info( " Converting netCDF variable " @@ -5150,7 +5150,7 @@ def _create_field_or_domain( # ------------------------------------------------------------- # Add the structural read report to the field/domain dataset_compliance = g["dataset_compliance"][field_ncvar] - components = dataset_compliance["non-compliance"] + components = dataset_compliance["attributes"] if components: dataset_compliance = {field_ncvar: dataset_compliance} else: @@ -5563,7 +5563,7 @@ def _add_message( noncompliance_dict = { "CF version": self.implementation.get_cf_version(), - "non-compliance": {}, + "attributes": {}, } g["dataset_compliance"].setdefault( top_ancestor_ncvar, noncompliance_dict @@ -5572,7 +5572,7 @@ def _add_message( # Only add a component report if there is need i.e. if the direct # parent ncvar is defined so not the same as the top ancestor ncvar if direct_parent_ncvar: - g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( + g["dataset_compliance"][top_ancestor_ncvar]["attributes"].setdefault( attribute_name, [] ).append(d) @@ -5601,7 +5601,7 @@ def _add_message( e2["reason"][ncvar] = d else: ### print("NON DIRECT PARENT CASE:", ncvar, top_ancestor_ncvar, d) - g1 = g["dataset_compliance"][top_ancestor_ncvar]["non-compliance"].setdefault( + g1 = g["dataset_compliance"][top_ancestor_ncvar]["attributes"].setdefault( ncvar, pre_d ) g1["reason"].setdefault(attribute_name, []).append(d) @@ -5672,15 +5672,15 @@ def _include_component_report( noncompliance_dict = { "CF version": self.implementation.get_cf_version(), - "non-compliance": {}, + "attributes": {}, } if component_report: - set_on = g["dataset_compliance"][parent_ncvar]["non-compliance"] + set_on = g["dataset_compliance"][parent_ncvar]["attributes"] if g["mesh"]: s1 = set_on.setdefault("mesh", d) s2 = s1["reason"].setdefault(ncvar, noncompliance_dict) - s2["non-compliance"] = component_report + s2["attributes"] = component_report else: # Never used? Chage up method to be mesh specific, then? set_on.setdefault( @@ -8373,7 +8373,7 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): if component_report is not None: for var, report in component_report.items(): g["dataset_compliance"][parent_ncvar][ - "non-compliance" + "attributes" ].setdefault(var, []).extend(report) return self.implementation.copy_construct(g[construct_type][ncvar]) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index dc17e7af2..58664178b 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -296,8 +296,8 @@ def test_standard_names_validation_noncompliant_field(self): pprint(dc_output) # 'ta' is the field variable we test on - self.assertIn("non-compliance", dc_output["ta"]) - noncompliance = dc_output["ta"]["non-compliance"] + self.assertIn("attributes", dc_output["ta"]) + noncompliance = dc_output["ta"]["attributes"] expected_keys = [ # itself? "ta", @@ -392,8 +392,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): pprint(dc_output_1) # 'pa' is the field variable we test on - self.assertIn("non-compliance", dc_output_1["pa"]) - noncompliance = dc_output_1["pa"]["non-compliance"] + self.assertIn("attributes", dc_output_1["pa"]) + noncompliance = dc_output_1["pa"]["attributes"] expected_keys = [ # itself? "pa", @@ -449,8 +449,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): pprint(dc_output_2) # 'ta' is the field variable we test on - self.assertIn("non-compliance", dc_output_2["ta"]) - noncompliance = dc_output_2["ta"]["non-compliance"] + self.assertIn("attributes", dc_output_2["ta"]) + noncompliance = dc_output_2["ta"]["attributes"] expected_keys = [ # itself? "ta", @@ -506,8 +506,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): pprint(dc_output_3) # 'v' is the field variable we test on - self.assertIn("non-compliance", dc_output_3["v"]) - noncompliance = dc_output_3["v"]["non-compliance"] + self.assertIn("attributes", dc_output_3["v"]) + noncompliance = dc_output_3["v"]["attributes"] expected_keys = [ # itself? "v", From 5ca8a8b7bcf0d0859d469f0eeb02764640f7859e Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 10 Nov 2025 23:52:53 +0000 Subject: [PATCH 63/97] Updates to get attributes as list in dataset_compliance output --- cfdm/read_write/netcdf/netcdfread.py | 77 +++++++++++++++++----------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index f558ef892..98b194a7b 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1328,7 +1328,7 @@ def read( "verbose": verbose, # Warnings? "warnings": warnings, - "dataset_compliance": {None: {"attributes": {}}}, + "dataset_compliance": {None: {"attributes": []}}, "component_report": {}, "auxiliary_coordinate": {}, "cell_measure": {}, @@ -3895,7 +3895,7 @@ def _create_field_or_domain( "CF version" ] = self.implementation.get_cf_version() g["dataset_compliance"][field_ncvar]["dimensions"] = dimensions - g["dataset_compliance"][field_ncvar].setdefault("attributes", {}) + g["dataset_compliance"][field_ncvar].setdefault("attributes", []) logger.info( " Converting netCDF variable " @@ -5563,7 +5563,7 @@ def _add_message( noncompliance_dict = { "CF version": self.implementation.get_cf_version(), - "attributes": {}, + "attributes": [], } g["dataset_compliance"].setdefault( top_ancestor_ncvar, noncompliance_dict @@ -5572,9 +5572,9 @@ def _add_message( # Only add a component report if there is need i.e. if the direct # parent ncvar is defined so not the same as the top ancestor ncvar if direct_parent_ncvar: - g["dataset_compliance"][top_ancestor_ncvar]["attributes"].setdefault( - attribute_name, [] - ).append(d) + g_next = g["dataset_compliance"][top_ancestor_ncvar][ + "attributes"] + g_next.append(d) # Dicts are optimised for key-value lookup, but this requires # value-key lookup - is there a better way? @@ -5582,17 +5582,6 @@ def _add_message( reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] - # SLB DEV ARGS - # print( - # "ARGS ARE", - # top_ancestor_ncvar, - # ncvar, - # direct_parent_ncvar, - # message, - # attribute, - # dimensions, - # conformance, - # ) parent_ncdims = self._ncdimensions(top_ancestor_ncvar) e = g["component_report"].setdefault( @@ -5601,10 +5590,24 @@ def _add_message( e2["reason"][ncvar] = d else: ### print("NON DIRECT PARENT CASE:", ncvar, top_ancestor_ncvar, d) - g1 = g["dataset_compliance"][top_ancestor_ncvar]["attributes"].setdefault( - ncvar, pre_d - ) - g1["reason"].setdefault(attribute_name, []).append(d) + g1 = g["dataset_compliance"][top_ancestor_ncvar]["attributes"] + # SLB TODO inefficient for querying though? Simple example: + # a = [{1:2}, {3:4}, {5:6}, {7:8}] + # [d[1] for d in a if 1 in d] + # -> to find value for key 1! Need to do something like: + # next((d for d in a if 1 in d), None)[1] = + ###print("Have now g1 of:", g1) + g_next = next((d for d in g1 if ncvar in d), None) + if g_next: + ###print("This time:", g_next[ncvar]) + g_next[ncvar]["reason"].setdefault( + attribute_name, []).append(d) + else: + g1.append({ncvar: pre_d}) + index_next = len(g1) - 1 + g1[index_next][ncvar]["reason"].setdefault( + attribute_name, []).append(d) # correct - comp to above? + # SLB NEW: this shows there are missing parts from the dict! # Some things aren't being added to the component report when # should be... @@ -5672,20 +5675,32 @@ def _include_component_report( noncompliance_dict = { "CF version": self.implementation.get_cf_version(), - "attributes": {}, + "attributes": [], } if component_report: - set_on = g["dataset_compliance"][parent_ncvar]["attributes"] - if g["mesh"]: - s1 = set_on.setdefault("mesh", d) - s2 = s1["reason"].setdefault(ncvar, noncompliance_dict) - s2["attributes"] = component_report + g1 = g["dataset_compliance"][parent_ncvar]["attributes"] + g_next = next((d for d in g1 if ncvar in d), None) + if g_next: + if g["mesh"]: + s1 = g_next.setdefault("mesh", d) + s2 = s1["reason"].setdefault(ncvar, noncompliance_dict) + s2["attributes"].append(component_report) + else: + print("Should we get here?") # SLB DEV + # Never used? Chage up method to be mesh specific, then? + #g1.setdefault( + # ncvar, [] + #).append(component_report) else: - # Never used? Chage up method to be mesh specific, then? - set_on.setdefault( - ncvar, [] - ).append(component_report) + if g["mesh"]: + g1.append({"mesh": d}) + index_next = len(g1) - 1 + s2 = g1[index_next][ncvar]["reason"].setdefault( + ncvar, noncompliance_dict) + s2["attributes"].append(component_report) + else: + print("Should we get here?") # SLB DEV, see same above def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. From f23734a693e92a14461d72ec21a5b8ceb47e509d Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 26 Nov 2025 23:30:25 +0000 Subject: [PATCH 64/97] Add basis of new-form dataset_compliance test structures --- cfdm/test/test_compliance_checking.py | 83 ++++++++++++++++++--------- 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 58664178b..6d16b1862 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -24,9 +24,7 @@ tempfile.mkstemp("_test_compliance_check.nc", dir=os.getcwd())[1] for i in range(n_tmpfiles) ] -( - tmpfile0, -) = tmpfiles +(tmpfile0,) = tmpfiles def _remove_tmpfiles(): @@ -51,7 +49,7 @@ def _create_noncompliant_names_field(compliant_field, temp_file): # - this makes it a certain invalid name and one we can identify as # being tied to the original variable, for testing purposes. bad_name_mapping = { - varname: "badname_"+ varname for varname in field_all_varnames + varname: "badname_" + varname for varname in field_all_varnames } for var_name, bad_std_name in bad_name_mapping.items(): @@ -69,7 +67,8 @@ class ComplianceCheckingTest(unittest.TestCase): good_snames_general_field = cfdm.example_field(1) # TODO set bad names and then write to tempfile and read back in bad_snames_general_field = _create_noncompliant_names_field( - good_snames_general_field, tmpfile0) + good_snames_general_field, tmpfile0 + ) # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis @@ -99,6 +98,30 @@ def setUp(self): # < ... test code ... > # cfdm.log_level('DISABLE') + # Structures to form the desired outputs + # *Variable dict* + var_dict = { + "attributes": [], + "dimensions": [], + } + + # *Attribute list* + attr_list = [ + { + "variables": {}, + "dimensions": [], + # add value, reason and code + }, + ] + + # *Dimension dict* + dim_list = [ + { + "variables": {}, + # add size, reason and code + } + ] + def test_extract_names_from_xml(self): """Test the `cfvalidation._extract_names_from_xml` function.""" # Check with a small 'dummy' XML table which is the current table @@ -138,27 +161,28 @@ def test_extract_names_from_xml(self): table_end = "" two_name_output = cfdm.cfvalidation._extract_names_from_xml( - two_name_table_start + table_end, include_aliases=False) + two_name_table_start + table_end, include_aliases=False + ) self.assertIsInstance(two_name_output, list) self.assertEqual(len(two_name_output), 2) self.assertIn( "acoustic_area_backscattering_strength_in_sea_water", - two_name_output + two_name_output, ) - self.assertIn( - "acoustic_centre_of_mass_in_sea_water", two_name_output) + self.assertIn("acoustic_centre_of_mass_in_sea_water", two_name_output) # No aliases in this table therefore expect same output as before # when setting 'include_aliases=True' self.assertEqual( cfdm.cfvalidation._extract_names_from_xml( - two_name_table_start + table_end, include_aliases=True), - two_name_output + two_name_table_start + table_end, include_aliases=True + ), + two_name_output, ) aliases_inc_output = cfdm.cfvalidation._extract_names_from_xml( two_name_table_start + include_two_aliases + table_end, - include_aliases=True + include_aliases=True, ) self.assertIsInstance(aliases_inc_output, list) self.assertEqual(len(aliases_inc_output), 4) @@ -166,12 +190,10 @@ def test_extract_names_from_xml(self): self.assertTrue(set(two_name_output).issubset(aliases_inc_output)) # Also should have the aliases this time self.assertIn( - "chlorophyll_concentration_in_sea_water", - aliases_inc_output + "chlorophyll_concentration_in_sea_water", aliases_inc_output ) self.assertIn( - "concentration_of_chlorophyll_in_sea_water", - aliases_inc_output + "concentration_of_chlorophyll_in_sea_water", aliases_inc_output ) # When setting 'include_aliases=True' should ignore the two aliases @@ -179,9 +201,9 @@ def test_extract_names_from_xml(self): self.assertEqual( cfdm.cfvalidation._extract_names_from_xml( two_name_table_start + include_two_aliases + table_end, - include_aliases=False + include_aliases=False, ), - two_name_output + two_name_output, ) def test_get_all_current_standard_names(self): @@ -191,9 +213,10 @@ def test_get_all_current_standard_names(self): sn_xml_url = cfdm.cfvalidation._STD_NAME_CURRENT_XML_URL with request.urlopen(sn_xml_url) as response: self.assertEqual( - response.status, 200, + response.status, + 200, "Standard name XML inaccesible: unexpected status code " - f"{response.status} for reference URL of: {sn_xml_url}" + f"{response.status} for reference URL of: {sn_xml_url}", ) # 200 == OK # SLB-DH discuss TODO: what behaviour do we want for the (v. rare) # case that the URL isn't accessible? Ideally we can skip standard @@ -218,7 +241,7 @@ def test_get_all_current_standard_names(self): # Check a long name with plenty of underscores is in there too self.assertIn( "integral_wrt_time_of_radioactivity_concentration_of_113Cd_in_air", - output + output, ) # Check a standard name with known alias @@ -241,7 +264,7 @@ def test_get_all_current_standard_names(self): # Check all non-aliases are there, as above self.assertTrue(set(output).issubset(aliases_inc_output)) - + # This time the alias should be included self.assertIn("moles_of_cfc113_in_atmosphere", aliases_inc_output) @@ -293,6 +316,7 @@ def test_standard_names_validation_noncompliant_field(self): # SLB DEV from pprint import pprint + pprint(dc_output) # 'ta' is the field variable we test on @@ -322,7 +346,7 @@ def test_standard_names_validation_noncompliant_field(self): noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" + msg=f"Empty non-compliance for variable '{varname}'", ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) @@ -386,9 +410,12 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): field_all_varnames = list(nc.variables.keys()) print("VERIFY") for varname, var in nc.variables.items(): - print(varname, getattr(var, "standard_name", "No standard_name")) + print( + varname, getattr(var, "standard_name", "No standard_name") + ) from pprint import pprint + pprint(dc_output_1) # 'pa' is the field variable we test on @@ -418,7 +445,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" + msg=f"Empty non-compliance for variable '{varname}'", ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) @@ -446,6 +473,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual(noncompl_dict, expected_noncompl_dict) from pprint import pprint + pprint(dc_output_2) # 'ta' is the field variable we test on @@ -475,7 +503,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" + msg=f"Empty non-compliance for variable '{varname}'", ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) @@ -503,6 +531,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual(noncompl_dict, expected_noncompl_dict) from pprint import pprint + pprint(dc_output_3) # 'v' is the field variable we test on @@ -532,7 +561,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" + msg=f"Empty non-compliance for variable '{varname}'", ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) From dd68b587ae09baa62707619e7d3bc472a8af47c8 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 17 Dec 2025 13:17:36 +0000 Subject: [PATCH 65/97] Simplify by removing some investigate/dev lines for old structure output --- cfdm/read_write/netcdf/netcdfread.py | 108 ++++++-------------------- cfdm/test/test_compliance_checking.py | 95 +++++++++++----------- 2 files changed, 72 insertions(+), 131 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 98b194a7b..f9840b1ff 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -2198,6 +2198,11 @@ def read( if field_or_domain is not None: all_fields_or_domains[ncvar] = field_or_domain + # SLB add cf version - inject at end, not appearing from init setting + print("STOP HERE 0--------------------") + pprint(g["dataset_compliance"]) + print("STOP HERE 1--------------------") + # ------------------------------------------------------------ # Create domain constructs from UGRID mesh topology variables # ------------------------------------------------------------ @@ -3890,10 +3895,13 @@ def _create_field_or_domain( g["domain_ancillary_key"] = {} dimensions = g["variable_dimensions"][field_ncvar] + + # Register the CF Conventions version at top-level only g["dataset_compliance"].setdefault(field_ncvar, {}) - g["dataset_compliance"][field_ncvar][ - "CF version" - ] = self.implementation.get_cf_version() + + g["dataset_compliance"][ + "CF version"] = self.implementation.get_cf_version() + g["dataset_compliance"][field_ncvar]["dimensions"] = dimensions g["dataset_compliance"][field_ncvar].setdefault("attributes", []) @@ -5548,71 +5556,24 @@ def _add_message( else: code = None - # SLB DEV - # Temporary processing whilst get dataset_compliance data structure - # updated - after this we will change attribute argument inputs - # so arguments are made separately - attribute_key = next(iter(attribute)) - var_name, attribute_name = attribute_key.split(":") - attribute_value = attribute[attribute_key] - pre_d = {"code": code, "value": attribute_value, "reason": {}} - d = {"code": code, "value": attribute_value, "reason": message} + # DEV MAIN + d = {"code": code, "attribute": attribute, "reason": message} if dimensions is not None: d["dimensions"] = dimensions noncompliance_dict = { - "CF version": self.implementation.get_cf_version(), - "attributes": [], + "attributes": [], } g["dataset_compliance"].setdefault( - top_ancestor_ncvar, noncompliance_dict + top_ancestor_ncvar, noncompliance_dict, ) - ###print("READ VARS ARE", g) - # Only add a component report if there is need i.e. if the direct - # parent ncvar is defined so not the same as the top ancestor ncvar - if direct_parent_ncvar: - g_next = g["dataset_compliance"][top_ancestor_ncvar][ - "attributes"] - g_next.append(d) - - # Dicts are optimised for key-value lookup, but this requires - # value-key lookup - is there a better way? - varattrs = g["variable_attributes"][top_ancestor_ncvar] - reverse_varattrs = {v: k for k, v in varattrs.items()} - store_attr = reverse_varattrs[ncvar] - - parent_ncdims = self._ncdimensions(top_ancestor_ncvar) - - e = g["component_report"].setdefault( - direct_parent_ncvar, noncompliance_dict) - e2 = e.setdefault(store_attr, pre_d) - e2["reason"][ncvar] = d - else: - ### print("NON DIRECT PARENT CASE:", ncvar, top_ancestor_ncvar, d) - g1 = g["dataset_compliance"][top_ancestor_ncvar]["attributes"] - # SLB TODO inefficient for querying though? Simple example: - # a = [{1:2}, {3:4}, {5:6}, {7:8}] - # [d[1] for d in a if 1 in d] - # -> to find value for key 1! Need to do something like: - # next((d for d in a if 1 in d), None)[1] = - ###print("Have now g1 of:", g1) - g_next = next((d for d in g1 if ncvar in d), None) - if g_next: - ###print("This time:", g_next[ncvar]) - g_next[ncvar]["reason"].setdefault( - attribute_name, []).append(d) - else: - g1.append({ncvar: pre_d}) - index_next = len(g1) - 1 - g1[index_next][ncvar]["reason"].setdefault( - attribute_name, []).append(d) # correct - comp to above? - # SLB NEW: this shows there are missing parts from the dict! - # Some things aren't being added to the component report when - # should be... - # e = g["component_report"].setdefault( - # top_ancestor_ncvar, noncompliance_dict) + g["dataset_compliance"][top_ancestor_ncvar].setdefault( + ncvar, []).append(d) + if direct_parent_ncvar: + e = g["component_report"].setdefault(direct_parent_ncvar, {}) + e.setdefault(ncvar, []).append(d) if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover @@ -5668,39 +5629,20 @@ def _include_component_report( # SLB rename this 'd' from _add_message to something better # SLB Note: have dropped 'code' because it doesn't make sense to # register a code except at the lowest level... - # SLB NOTE reason is a dict, not a list! d = {"value": attribute, "reason": {}} if dimensions is not None: d["dimensions"] = dimensions noncompliance_dict = { - "CF version": self.implementation.get_cf_version(), + ### "CF version": self.implementation.get_cf_version(), "attributes": [], } + # DEV MAIN if component_report: - g1 = g["dataset_compliance"][parent_ncvar]["attributes"] - g_next = next((d for d in g1 if ncvar in d), None) - if g_next: - if g["mesh"]: - s1 = g_next.setdefault("mesh", d) - s2 = s1["reason"].setdefault(ncvar, noncompliance_dict) - s2["attributes"].append(component_report) - else: - print("Should we get here?") # SLB DEV - # Never used? Chage up method to be mesh specific, then? - #g1.setdefault( - # ncvar, [] - #).append(component_report) - else: - if g["mesh"]: - g1.append({"mesh": d}) - index_next = len(g1) - 1 - s2 = g1[index_next][ncvar]["reason"].setdefault( - ncvar, noncompliance_dict) - s2["attributes"].append(component_report) - else: - print("Should we get here?") # SLB DEV, see same above + g["dataset_compliance"][parent_ncvar].setdefault( + ncvar, [] + ).extend(component_report) def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 6d16b1862..9a11af9fb 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -24,7 +24,9 @@ tempfile.mkstemp("_test_compliance_check.nc", dir=os.getcwd())[1] for i in range(n_tmpfiles) ] -(tmpfile0,) = tmpfiles +( + tmpfile0, +) = tmpfiles def _remove_tmpfiles(): @@ -49,7 +51,7 @@ def _create_noncompliant_names_field(compliant_field, temp_file): # - this makes it a certain invalid name and one we can identify as # being tied to the original variable, for testing purposes. bad_name_mapping = { - varname: "badname_" + varname for varname in field_all_varnames + varname: "badname_"+ varname for varname in field_all_varnames } for var_name, bad_std_name in bad_name_mapping.items(): @@ -67,8 +69,7 @@ class ComplianceCheckingTest(unittest.TestCase): good_snames_general_field = cfdm.example_field(1) # TODO set bad names and then write to tempfile and read back in bad_snames_general_field = _create_noncompliant_names_field( - good_snames_general_field, tmpfile0 - ) + good_snames_general_field, tmpfile0) # 1. Create a file with a UGRID field with invalid standard names # on UGRID components, using our core 'UGRID 1' field as a basis @@ -161,28 +162,27 @@ def test_extract_names_from_xml(self): table_end = "" two_name_output = cfdm.cfvalidation._extract_names_from_xml( - two_name_table_start + table_end, include_aliases=False - ) + two_name_table_start + table_end, include_aliases=False) self.assertIsInstance(two_name_output, list) self.assertEqual(len(two_name_output), 2) self.assertIn( "acoustic_area_backscattering_strength_in_sea_water", - two_name_output, + two_name_output ) - self.assertIn("acoustic_centre_of_mass_in_sea_water", two_name_output) + self.assertIn( + "acoustic_centre_of_mass_in_sea_water", two_name_output) # No aliases in this table therefore expect same output as before # when setting 'include_aliases=True' self.assertEqual( cfdm.cfvalidation._extract_names_from_xml( - two_name_table_start + table_end, include_aliases=True - ), - two_name_output, + two_name_table_start + table_end, include_aliases=True), + two_name_output ) aliases_inc_output = cfdm.cfvalidation._extract_names_from_xml( two_name_table_start + include_two_aliases + table_end, - include_aliases=True, + include_aliases=True ) self.assertIsInstance(aliases_inc_output, list) self.assertEqual(len(aliases_inc_output), 4) @@ -190,10 +190,12 @@ def test_extract_names_from_xml(self): self.assertTrue(set(two_name_output).issubset(aliases_inc_output)) # Also should have the aliases this time self.assertIn( - "chlorophyll_concentration_in_sea_water", aliases_inc_output + "chlorophyll_concentration_in_sea_water", + aliases_inc_output ) self.assertIn( - "concentration_of_chlorophyll_in_sea_water", aliases_inc_output + "concentration_of_chlorophyll_in_sea_water", + aliases_inc_output ) # When setting 'include_aliases=True' should ignore the two aliases @@ -201,9 +203,9 @@ def test_extract_names_from_xml(self): self.assertEqual( cfdm.cfvalidation._extract_names_from_xml( two_name_table_start + include_two_aliases + table_end, - include_aliases=False, + include_aliases=False ), - two_name_output, + two_name_output ) def test_get_all_current_standard_names(self): @@ -213,10 +215,9 @@ def test_get_all_current_standard_names(self): sn_xml_url = cfdm.cfvalidation._STD_NAME_CURRENT_XML_URL with request.urlopen(sn_xml_url) as response: self.assertEqual( - response.status, - 200, + response.status, 200, "Standard name XML inaccesible: unexpected status code " - f"{response.status} for reference URL of: {sn_xml_url}", + f"{response.status} for reference URL of: {sn_xml_url}" ) # 200 == OK # SLB-DH discuss TODO: what behaviour do we want for the (v. rare) # case that the URL isn't accessible? Ideally we can skip standard @@ -241,7 +242,7 @@ def test_get_all_current_standard_names(self): # Check a long name with plenty of underscores is in there too self.assertIn( "integral_wrt_time_of_radioactivity_concentration_of_113Cd_in_air", - output, + output ) # Check a standard name with known alias @@ -315,13 +316,12 @@ def test_standard_names_validation_noncompliant_field(self): dc_output = f.dataset_compliance() # SLB DEV - from pprint import pprint - - pprint(dc_output) + # from pprint import pprint + # pprint(dc_output) # 'ta' is the field variable we test on - self.assertIn("attributes", dc_output["ta"]) - noncompliance = dc_output["ta"]["attributes"] + self.assertIn("non-compliance", dc_output["ta"]) + noncompliance = dc_output["ta"]["non-compliance"] expected_keys = [ # itself? "ta", @@ -346,7 +346,7 @@ def test_standard_names_validation_noncompliant_field(self): noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'", + msg=f"Empty non-compliance for variable '{varname}'" ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) @@ -410,17 +410,17 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): field_all_varnames = list(nc.variables.keys()) print("VERIFY") for varname, var in nc.variables.items(): - print( - varname, getattr(var, "standard_name", "No standard_name") - ) - - from pprint import pprint + print(varname, getattr(var, "standard_name", "No standard_name")) - pprint(dc_output_1) + # from pprint import pprint + # print("DC OUTPUT 1") + # pprint(dc_output_1) - # 'pa' is the field variable we test on - self.assertIn("attributes", dc_output_1["pa"]) - noncompliance = dc_output_1["pa"]["attributes"] + # # 'pa' is the field variable we test on + # self.assertIn("non-compliance", dc_output_1["pa"]) + # noncompliance = dc_output_1 ###["pa"]["non-compliance"] + # print("^^^^^^^^^^^^^" * 20, "HERE HAVE NONCOMP DICT OF:") + # pprint(noncompliance) expected_keys = [ # itself? "pa", @@ -443,9 +443,10 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): ] for varname in expected_keys: noncompl_dict = noncompliance.get(varname) + self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'", + msg=f"Empty non-compliance for variable '{varname}'" ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) @@ -472,13 +473,12 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # on per-key-value checks above self.assertEqual(noncompl_dict, expected_noncompl_dict) - from pprint import pprint - - pprint(dc_output_2) + # from pprint import pprint + # pprint(dc_output_2) # 'ta' is the field variable we test on - self.assertIn("attributes", dc_output_2["ta"]) - noncompliance = dc_output_2["ta"]["attributes"] + self.assertIn("non-compliance", dc_output_2["ta"]) + noncompliance = dc_output_2["ta"]["non-compliance"] expected_keys = [ # itself? "ta", @@ -503,7 +503,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'", + msg=f"Empty non-compliance for variable '{varname}'" ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) @@ -530,13 +530,12 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # on per-key-value checks above self.assertEqual(noncompl_dict, expected_noncompl_dict) - from pprint import pprint - - pprint(dc_output_3) + # from pprint import pprint + # pprint(dc_output_3) # 'v' is the field variable we test on - self.assertIn("attributes", dc_output_3["v"]) - noncompliance = dc_output_3["v"]["attributes"] + self.assertIn("non-compliance", dc_output_3["v"]) + noncompliance = dc_output_3["v"]["non-compliance"] expected_keys = [ # itself? "v", @@ -561,7 +560,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): noncompl_dict = noncompliance.get(varname) self.assertIsNotNone( noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'", + msg=f"Empty non-compliance for variable '{varname}'" ) self.assertIsInstance(noncompl_dict, list) self.assertEqual(len(noncompl_dict), 1) From 43afa11598cc35e5854dc82e81b8fcd19e4d158f Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 17 Dec 2025 13:37:35 +0000 Subject: [PATCH 66/97] Update dicts for forming expected outputs in compliance-checking test --- cfdm/test/test_compliance_checking.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 9a11af9fb..60c954508 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -101,27 +101,23 @@ def setUp(self): # Structures to form the desired outputs # *Variable dict* - var_dict = { - "attributes": [], - "dimensions": [], + per_var_dict = { + "attributes": {}, + "dimensions": {}, } # *Attribute list* - attr_list = [ - { + per_attr_dict = { "variables": {}, - "dimensions": [], - # add value, reason and code - }, - ] + "dimensions": {}, + # add value (string), and optionally reason and code + } # *Dimension dict* - dim_list = [ - { + per_dim_dict = { "variables": {}, - # add size, reason and code - } - ] + # add size (int or None), and optionally reason and code + } def test_extract_names_from_xml(self): """Test the `cfvalidation._extract_names_from_xml` function.""" From 48bf0d1e6982e27de138fa284b7012af59b10901 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 17 Dec 2025 13:46:00 +0000 Subject: [PATCH 67/97] Prevent rogue 'None' key from emerging in dataset_compliance output --- cfdm/read_write/netcdf/netcdfread.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index f9840b1ff..af79cd663 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1328,7 +1328,7 @@ def read( "verbose": verbose, # Warnings? "warnings": warnings, - "dataset_compliance": {None: {"attributes": []}}, + "dataset_compliance": {}, "component_report": {}, "auxiliary_coordinate": {}, "cell_measure": {}, @@ -3903,7 +3903,7 @@ def _create_field_or_domain( "CF version"] = self.implementation.get_cf_version() g["dataset_compliance"][field_ncvar]["dimensions"] = dimensions - g["dataset_compliance"][field_ncvar].setdefault("attributes", []) + ###g["dataset_compliance"][field_ncvar].setdefault("attributes", []) logger.info( " Converting netCDF variable " @@ -5158,7 +5158,7 @@ def _create_field_or_domain( # ------------------------------------------------------------- # Add the structural read report to the field/domain dataset_compliance = g["dataset_compliance"][field_ncvar] - components = dataset_compliance["attributes"] + components = dataset_compliance # SLB edited if components: dataset_compliance = {field_ncvar: dataset_compliance} else: @@ -5557,14 +5557,16 @@ def _add_message( code = None # DEV MAIN - d = {"code": code, "attribute": attribute, "reason": message} + attribute_key = next(iter(attribute)) + var_name, attribute_name = attribute_key.split(":") + attribute_value = attribute[attribute_key] + d = {"code": code, "attribute": attribute_value, "reason": message} + print("VAR NAME IS", var_name) if dimensions is not None: d["dimensions"] = dimensions - noncompliance_dict = { - "attributes": [], - } + noncompliance_dict = {} g["dataset_compliance"].setdefault( top_ancestor_ncvar, noncompliance_dict, ) @@ -5635,7 +5637,6 @@ def _include_component_report( noncompliance_dict = { ### "CF version": self.implementation.get_cf_version(), - "attributes": [], } # DEV MAIN @@ -8329,9 +8330,9 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): if component_report is not None: for var, report in component_report.items(): - g["dataset_compliance"][parent_ncvar][ - "attributes" - ].setdefault(var, []).extend(report) + # SLB edited + g["dataset_compliance"][parent_ncvar].setdefault( + var, []).extend(report) return self.implementation.copy_construct(g[construct_type][ncvar]) From b4985d56e149e333a814a21c8fd5abbd2d71c2fb Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 17 Dec 2025 16:02:19 +0000 Subject: [PATCH 68/97] Register attibute names in dataset_compliance output --- cfdm/read_write/netcdf/netcdfread.py | 30 ++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index af79cd663..bac6332da 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1328,6 +1328,7 @@ def read( "verbose": verbose, # Warnings? "warnings": warnings, + # SLB maybe inject the CF version here, straight away? "dataset_compliance": {}, "component_report": {}, "auxiliary_coordinate": {}, @@ -5557,11 +5558,36 @@ def _add_message( code = None # DEV MAIN + per_var_dict = { + "attributes": {}, + "dimensions": {}, + } + + # *Attribute list* + per_attr_dict = { + "variables": {}, + "dimensions": {}, + # add value (string), and optionally reason and code + } + + # *Dimension dict* + per_dim_dict = { + "variables": {}, + # add size (int or None), and optionally reason and code + } + attribute_key = next(iter(attribute)) var_name, attribute_name = attribute_key.split(":") + # TODO need better way to access this - inefficient, should be able to + # use an in-built function! attribute_value = attribute[attribute_key] - d = {"code": code, "attribute": attribute_value, "reason": message} - print("VAR NAME IS", var_name) + + d = per_var_dict + if code: + per_var_dict["code"] = code + if message: + per_var_dict["reason"] = message + per_var_dict["attributes"][attribute_name] = attribute_value if dimensions is not None: d["dimensions"] = dimensions From 2efde4e2bb4d0e063cee8a132f93af57e74502f9 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 17 Dec 2025 17:02:41 +0000 Subject: [PATCH 69/97] Update dataset_compliance output for nested netCDF component form --- cfdm/read_write/netcdf/netcdfread.py | 46 ++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index bac6332da..c71c3b10f 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5597,8 +5597,12 @@ def _add_message( top_ancestor_ncvar, noncompliance_dict, ) - g["dataset_compliance"][top_ancestor_ncvar].setdefault( - ncvar, []).append(d) + g_top = g["dataset_compliance"][top_ancestor_ncvar] # e.g. pa + g_down = g_top.setdefault("attributes", {}) # e.g. mesh + g_top["attributes"][attribute_name] = per_attr_dict + g_top["attributes"][attribute_name]["variables"][var_name] = d # e.g. Mesh2 + + # IGNORE FOR NOW! if direct_parent_ncvar: e = g["component_report"].setdefault(direct_parent_ncvar, {}) e.setdefault(ncvar, []).append(d) @@ -5650,26 +5654,42 @@ def _include_component_report( `None` """ - g = self.read_vars + per_var_dict = { + "attributes": {}, + "dimensions": {}, + } + + # *Attribute list* + per_attr_dict = { + "variables": {}, + "dimensions": {}, + # add value (string), and optionally reason and code + } + + # *Dimension dict* + per_dim_dict = { + "variables": {}, + # add size (int or None), and optionally reason and code + } + + g = self.read_vars component_report = g["component_report"].get(ncvar) - # SLB rename this 'd' from _add_message to something better - # SLB Note: have dropped 'code' because it doesn't make sense to - # register a code except at the lowest level... - d = {"value": attribute, "reason": {}} + d = per_var_dict if dimensions is not None: d["dimensions"] = dimensions - noncompliance_dict = { - ### "CF version": self.implementation.get_cf_version(), - } + # Unlike for 'attribute' input to _add_message, this 'attribute' is the + # the attribute_name only and not "var_name:attribute_name" to split # DEV MAIN if component_report: - g["dataset_compliance"][parent_ncvar].setdefault( - ncvar, [] - ).extend(component_report) + g_parent = g["dataset_compliance"][parent_ncvar]["attributes"] + g_parent.setdefault(attribute, per_attr_dict) + g_parent[attribute]["variables"].setdefault(ncvar, {}) + g_parent[attribute]["variables"][ncvar].update( + component_report) def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. From b2ce8244628fd7c1a91ceec5ee0484c94fd1e11d Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 17 Dec 2025 17:56:00 +0000 Subject: [PATCH 70/97] Update dataset_compliance output to have dict of dims --- cfdm/read_write/netcdf/netcdfread.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index c71c3b10f..2cd5b652a 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -3903,8 +3903,13 @@ def _create_field_or_domain( g["dataset_compliance"][ "CF version"] = self.implementation.get_cf_version() - g["dataset_compliance"][field_ncvar]["dimensions"] = dimensions - ###g["dataset_compliance"][field_ncvar].setdefault("attributes", []) + # DEV MAIN + # Create dimensions dict and populate with sizes + g_dims = g["dataset_compliance"][field_ncvar] + g_dims.update({"dimensions": {}}) + print("G DIMS IS", g_dims) + for dim in dimensions: + g_dims["dimensions"][dim] = {} logger.info( " Converting netCDF variable " @@ -5590,7 +5595,10 @@ def _add_message( per_var_dict["attributes"][attribute_name] = attribute_value if dimensions is not None: - d["dimensions"] = dimensions + d["dimensions"] + for dim in dimensions: + d["dimensions"].update({dim: per_dim_dict}) + print("NOW DIM DICT IS:", d["dimensions"]) noncompliance_dict = {} g["dataset_compliance"].setdefault( @@ -5672,18 +5680,13 @@ def _include_component_report( "variables": {}, # add size (int or None), and optionally reason and code } + # DEV MAIN g = self.read_vars component_report = g["component_report"].get(ncvar) - d = per_var_dict - if dimensions is not None: - d["dimensions"] = dimensions - # Unlike for 'attribute' input to _add_message, this 'attribute' is the # the attribute_name only and not "var_name:attribute_name" to split - - # DEV MAIN if component_report: g_parent = g["dataset_compliance"][parent_ncvar]["attributes"] g_parent.setdefault(attribute, per_attr_dict) From bc05df8dcd9278cfb2ef4b19ef0f7baedf89a415 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 17 Dec 2025 22:46:59 +0000 Subject: [PATCH 71/97] Update dataset_compliance output to register dimension sizes --- cfdm/read_write/netcdf/netcdfread.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 2cd5b652a..1b1396ecd 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -3903,13 +3903,11 @@ def _create_field_or_domain( g["dataset_compliance"][ "CF version"] = self.implementation.get_cf_version() - # DEV MAIN # Create dimensions dict and populate with sizes - g_dims = g["dataset_compliance"][field_ncvar] - g_dims.update({"dimensions": {}}) - print("G DIMS IS", g_dims) - for dim in dimensions: - g_dims["dimensions"][dim] = {} + g["dataset_compliance"][field_ncvar]["dimensions"] = { + dim: {"size": g["internal_dimension_sizes"][dim]} for + dim in dimensions + } logger.info( " Converting netCDF variable " @@ -5594,11 +5592,12 @@ def _add_message( per_var_dict["reason"] = message per_var_dict["attributes"][attribute_name] = attribute_value + # Create dimensions dict and populate with sizes if dimensions is not None: - d["dimensions"] - for dim in dimensions: - d["dimensions"].update({dim: per_dim_dict}) - print("NOW DIM DICT IS:", d["dimensions"]) + d["dimensions"] = { + dim: {"size": g["internal_dimension_sizes"][dim]} for + dim in dimensions + } noncompliance_dict = {} g["dataset_compliance"].setdefault( From ffccd71bbb874effa602c168d3b9d596e0422610 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 18 Dec 2025 00:50:49 +0000 Subject: [PATCH 72/97] Update dataset_compliance to add child attributes to output structure --- cfdm/read_write/netcdf/netcdfread.py | 36 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 1b1396ecd..93c271daa 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5580,17 +5580,17 @@ def _add_message( } attribute_key = next(iter(attribute)) - var_name, attribute_name = attribute_key.split(":") + attribute_name = attribute_key.split(":")[1] # TODO need better way to access this - inefficient, should be able to # use an in-built function! attribute_value = attribute[attribute_key] d = per_var_dict if code: - per_var_dict["code"] = code + d["code"] = code if message: - per_var_dict["reason"] = message - per_var_dict["attributes"][attribute_name] = attribute_value + d["reason"] = message + d["attributes"][attribute_name] = attribute_value # Create dimensions dict and populate with sizes if dimensions is not None: @@ -5601,18 +5601,30 @@ def _add_message( noncompliance_dict = {} g["dataset_compliance"].setdefault( - top_ancestor_ncvar, noncompliance_dict, - ) + top_ancestor_ncvar, noncompliance_dict) g_top = g["dataset_compliance"][top_ancestor_ncvar] # e.g. pa - g_down = g_top.setdefault("attributes", {}) # e.g. mesh - g_top["attributes"][attribute_name] = per_attr_dict - g_top["attributes"][attribute_name]["variables"][var_name] = d # e.g. Mesh2 + g_top.setdefault("attributes", {}) + g_top["attributes"][attribute_name] = per_attr_dict # e.g. mesh key + # TODO should use update after setdefault also for variables child + # evel below (see approach below in 'if direct_parent_ncvar' block) + g_top["attributes"][attribute_name]["variables"][ncvar] = d # e.g. Mesh2 - # IGNORE FOR NOW! + # DEV MAIN 2 if direct_parent_ncvar: - e = g["component_report"].setdefault(direct_parent_ncvar, {}) - e.setdefault(ncvar, []).append(d) + # Dicts are optimised for key-value lookup, but this requires + # value-key lookup - find a better way to get relevant attr using + # functionlity in this module + varattrs = g["variable_attributes"][top_ancestor_ncvar] + reverse_varattrs = {v: k for k, v in varattrs.items()} + store_attr = reverse_varattrs[ncvar] + + e = g["component_report"].setdefault( + direct_parent_ncvar, {}) # e.g. Mesh2 + e.setdefault("attributes", {}) + # E.g. edge_node_connectivity key: + e["attributes"][store_attr] = per_attr_dict + e["attributes"][store_attr]["variables"][ncvar].update(d) if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover From 66f8ca3879eafd0164f55a5803cd02aa1ebcef53 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 18 Dec 2025 14:22:37 +0000 Subject: [PATCH 73/97] Update dataset_compliance to end with list of code, reason & value --- cfdm/read_write/netcdf/netcdfread.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 93c271daa..98e429191 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5585,12 +5585,20 @@ def _add_message( # use an in-built function! attribute_value = attribute[attribute_key] - d = per_var_dict + # Form a single issue to register (message, code and attr value) + one_issue_dict = {"value": attribute_value} if code: - d["code"] = code + one_issue_dict["code"] = code if message: - d["reason"] = message - d["attributes"][attribute_name] = attribute_value + one_issue_dict["reason"] = message + + # Form lowest-level dict which reports an ultimate issue via a 'reason' + # message, code and attribute value against the attribute name key + d = per_var_dict + # Add message to list of reasons: there may be more than one + # issue/reason listed per attribute! + d["attributes"].setdefault(attribute_name, []) + d["attributes"][attribute_name].append(one_issue_dict) # Create dimensions dict and populate with sizes if dimensions is not None: @@ -5608,6 +5616,11 @@ def _add_message( g_top["attributes"][attribute_name] = per_attr_dict # e.g. mesh key # TODO should use update after setdefault also for variables child # evel below (see approach below in 'if direct_parent_ncvar' block) + + # SLB: is this not repeating nest of attr as per above in d? + print("////////////////////// D IS", d) + print("////////////////////// G_TOP IS", g_top) + g_top["attributes"][attribute_name]["variables"][ncvar] = d # e.g. Mesh2 # DEV MAIN 2 @@ -5633,7 +5646,7 @@ def _add_message( logger.info( " Error processing netCDF variable " - f"{ncvar}{dimensions}: {d['reason']}" + f"{ncvar}{dimensions}: {message}" ) # pragma: no cover return d From 5fc0f976fa0e46414847e55dc7577abdde90e482 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 18 Dec 2025 14:54:17 +0000 Subject: [PATCH 74/97] Update dataset_compliance to improve & tidy parent compliance processing --- cfdm/read_write/netcdf/netcdfread.py | 48 +++++++++++----------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 98e429191..a66a1a261 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5593,10 +5593,10 @@ def _add_message( one_issue_dict["reason"] = message # Form lowest-level dict which reports an ultimate issue via a 'reason' - # message, code and attribute value against the attribute name key + # message, code and attribute value against the attribute name key. + # These go into a *list*, since there may be more than one issue hence + # reason message and corresponding code listed per attribute. d = per_var_dict - # Add message to list of reasons: there may be more than one - # issue/reason listed per attribute! d["attributes"].setdefault(attribute_name, []) d["attributes"][attribute_name].append(one_issue_dict) @@ -5607,23 +5607,13 @@ def _add_message( dim in dimensions } - noncompliance_dict = {} - g["dataset_compliance"].setdefault( - top_ancestor_ncvar, noncompliance_dict) + # Store the issue on the immediate variable. The issue will be + # processed to be stored on ancestor variables via the + # logic in _include_component_report and in the 'direct_parent_ncvar' + # block below if a 'direct_parent_ncvar' is provided. + g["dataset_compliance"].setdefault(ncvar, {}) + g["dataset_compliance"][ncvar].update(d) - g_top = g["dataset_compliance"][top_ancestor_ncvar] # e.g. pa - g_top.setdefault("attributes", {}) - g_top["attributes"][attribute_name] = per_attr_dict # e.g. mesh key - # TODO should use update after setdefault also for variables child - # evel below (see approach below in 'if direct_parent_ncvar' block) - - # SLB: is this not repeating nest of attr as per above in d? - print("////////////////////// D IS", d) - print("////////////////////// G_TOP IS", g_top) - - g_top["attributes"][attribute_name]["variables"][ncvar] = d # e.g. Mesh2 - - # DEV MAIN 2 if direct_parent_ncvar: # Dicts are optimised for key-value lookup, but this requires # value-key lookup - find a better way to get relevant attr using @@ -5632,12 +5622,11 @@ def _add_message( reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] - e = g["component_report"].setdefault( - direct_parent_ncvar, {}) # e.g. Mesh2 - e.setdefault("attributes", {}) - # E.g. edge_node_connectivity key: - e["attributes"][store_attr] = per_attr_dict - e["attributes"][store_attr]["variables"][ncvar].update(d) + g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) + g_parent.setdefault("attributes", {}) + g_parent["attributes"].setdefault(store_attr, per_attr_dict) + g_parent["attributes"][store_attr]["variables"].setdefault(ncvar, {}) + g_parent["attributes"][store_attr]["variables"][ncvar].update(d) if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover @@ -5712,10 +5701,11 @@ def _include_component_report( # Unlike for 'attribute' input to _add_message, this 'attribute' is the # the attribute_name only and not "var_name:attribute_name" to split if component_report: - g_parent = g["dataset_compliance"][parent_ncvar]["attributes"] - g_parent.setdefault(attribute, per_attr_dict) - g_parent[attribute]["variables"].setdefault(ncvar, {}) - g_parent[attribute]["variables"][ncvar].update( + g_parent = g["dataset_compliance"][parent_ncvar] + g_parent.setdefault("attributes", {}) + g_parent["attributes"][attribute] = per_attr_dict # e.g. mesh key + g_parent["attributes"][attribute]["variables"].setdefault(ncvar, {}) + g_parent["attributes"][attribute]["variables"][ncvar].update( component_report) def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): From 6baaa38af144716febe84dbea4d20e10ec7a9d3c Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 18 Dec 2025 15:27:17 +0000 Subject: [PATCH 75/97] Investigate/dev logic towards final new structure output --- cfdm/read_write/netcdf/netcdfread.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index a66a1a261..37e6c1e55 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5611,8 +5611,19 @@ def _add_message( # processed to be stored on ancestor variables via the # logic in _include_component_report and in the 'direct_parent_ncvar' # block below if a 'direct_parent_ncvar' is provided. - g["dataset_compliance"].setdefault(ncvar, {}) - g["dataset_compliance"][ncvar].update(d) + ###g["dataset_compliance"].setdefault(ncvar, {}) + ###g["dataset_compliance"][ncvar].update(d) + + ###g_top["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) + g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) + g_top = g["dataset_compliance"][top_ancestor_ncvar] + g_top.setdefault("attributes", {}) + g_top["attributes"][attribute_name] = per_attr_dict # e.g. mesh key + # TODO should use update after setdefault also for variables child + # evel below (see approach below in 'if direct_parent_ncvar' block) + g_top["attributes"][attribute_name]["variables"].setdefault(ncvar, {}) + g_top["attributes"][attribute_name]["variables"][ncvar].update(d) # e.g. Mesh2 + # END NEW if direct_parent_ncvar: # Dicts are optimised for key-value lookup, but this requires @@ -5621,6 +5632,7 @@ def _add_message( varattrs = g["variable_attributes"][top_ancestor_ncvar] reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] + print("\nFOR CASE:", top_ancestor_ncvar, ncvar, direct_parent_ncvar) g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) g_parent.setdefault("attributes", {}) @@ -5703,7 +5715,7 @@ def _include_component_report( if component_report: g_parent = g["dataset_compliance"][parent_ncvar] g_parent.setdefault("attributes", {}) - g_parent["attributes"][attribute] = per_attr_dict # e.g. mesh key + g_parent["attributes"].setdefault(attribute, per_attr_dict) g_parent["attributes"][attribute]["variables"].setdefault(ncvar, {}) g_parent["attributes"][attribute]["variables"][ncvar].update( component_report) From c2a35db8262fc057720b1e28256ead33452642d5 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 18 Dec 2025 17:23:05 +0000 Subject: [PATCH 76/97] Update dataset_compliance to cater for parent-less ncvar case --- cfdm/read_write/netcdf/netcdfread.py | 59 +++++++++++++++------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 37e6c1e55..ae49bdb72 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5592,13 +5592,18 @@ def _add_message( if message: one_issue_dict["reason"] = message - # Form lowest-level dict which reports an ultimate issue via a 'reason' - # message, code and attribute value against the attribute name key. - # These go into a *list*, since there may be more than one issue hence - # reason message and corresponding code listed per attribute. - d = per_var_dict - d["attributes"].setdefault(attribute_name, []) - d["attributes"][attribute_name].append(one_issue_dict) + # If the top_ancestor_ncvar and ncvar are the same, don't need to + # process under attributes ??? SLB + if top_ancestor_ncvar == ncvar: + d = one_issue_dict.copy() + else: + # Form lowest-level dict which reports an ultimate issue via a 'reason' + # message, code and attribute value against the attribute name key. + # These go into a *list*, since there may be more than one issue hence + # reason message and corresponding code listed per attribute. + d = per_var_dict + d["attributes"].setdefault(attribute_name, []) + d["attributes"][attribute_name].append(one_issue_dict) # Create dimensions dict and populate with sizes if dimensions is not None: @@ -5607,32 +5612,32 @@ def _add_message( dim in dimensions } - # Store the issue on the immediate variable. The issue will be - # processed to be stored on ancestor variables via the - # logic in _include_component_report and in the 'direct_parent_ncvar' - # block below if a 'direct_parent_ncvar' is provided. - ###g["dataset_compliance"].setdefault(ncvar, {}) - ###g["dataset_compliance"][ncvar].update(d) - - ###g_top["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) - g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) - g_top = g["dataset_compliance"][top_ancestor_ncvar] - g_top.setdefault("attributes", {}) - g_top["attributes"][attribute_name] = per_attr_dict # e.g. mesh key - # TODO should use update after setdefault also for variables child - # evel below (see approach below in 'if direct_parent_ncvar' block) - g_top["attributes"][attribute_name]["variables"].setdefault(ncvar, {}) - g_top["attributes"][attribute_name]["variables"][ncvar].update(d) # e.g. Mesh2 - # END NEW + # Process issues emerging on or via attributes + for g_dict in (g["dataset_compliance"],): ### g["component_report"],): + g_dict.setdefault(top_ancestor_ncvar, {}) + g_top = g_dict[top_ancestor_ncvar] + + # If the top_ancestor_ncvar and ncvar are the same, there is a + # problem with an ncvar with no parents - so store directly on ncvar + # TODO should probably make the top_ancestor_ncvar optional + # so that we don't need to do this check! + if top_ancestor_ncvar == ncvar: + g["dataset_compliance"][top_ancestor_ncvar].update(d) + return d + + g_top.setdefault("attributes", {}) + g_top["attributes"][attribute_name] = per_attr_dict # e.g. mesh key + g_top["attributes"][attribute_name]["variables"].setdefault(ncvar, {}) + g_top["attributes"][attribute_name]["variables"][ncvar].update(d) # e.g. Mesh2 if direct_parent_ncvar: + ### print("\nFOR CASE:", top_ancestor_ncvar, ncvar, direct_parent_ncvar) # Dicts are optimised for key-value lookup, but this requires # value-key lookup - find a better way to get relevant attr using # functionlity in this module varattrs = g["variable_attributes"][top_ancestor_ncvar] reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] - print("\nFOR CASE:", top_ancestor_ncvar, ncvar, direct_parent_ncvar) g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) g_parent.setdefault("attributes", {}) @@ -5705,14 +5710,14 @@ def _include_component_report( "variables": {}, # add size (int or None), and optionally reason and code } - # DEV MAIN g = self.read_vars component_report = g["component_report"].get(ncvar) # Unlike for 'attribute' input to _add_message, this 'attribute' is the # the attribute_name only and not "var_name:attribute_name" to split - if component_report: + if component_report: ###: + ### print(f"CR| HAVE NCVAR {ncvar} AND ATTR {attribute}") g_parent = g["dataset_compliance"][parent_ncvar] g_parent.setdefault("attributes", {}) g_parent["attributes"].setdefault(attribute, per_attr_dict) From 794d810d134bd05ea61a7effbbb83c646fb145a2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Thu, 18 Dec 2025 17:42:13 +0000 Subject: [PATCH 77/97] Fix top-level attribute issue emerging in dataset_compliance output --- cfdm/read_write/netcdf/netcdfread.py | 31 ++++++++++------------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index ae49bdb72..5cb2500bf 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5594,7 +5594,7 @@ def _add_message( # If the top_ancestor_ncvar and ncvar are the same, don't need to # process under attributes ??? SLB - if top_ancestor_ncvar == ncvar: + if top_ancestor_ncvar == ncvar or top_ancestor_ncvar is None: d = one_issue_dict.copy() else: # Form lowest-level dict which reports an ultimate issue via a 'reason' @@ -5613,29 +5613,21 @@ def _add_message( } # Process issues emerging on or via attributes - for g_dict in (g["dataset_compliance"],): ### g["component_report"],): - g_dict.setdefault(top_ancestor_ncvar, {}) - g_top = g_dict[top_ancestor_ncvar] - - # If the top_ancestor_ncvar and ncvar are the same, there is a - # problem with an ncvar with no parents - so store directly on ncvar - # TODO should probably make the top_ancestor_ncvar optional - # so that we don't need to do this check! - if top_ancestor_ncvar == ncvar: - g["dataset_compliance"][top_ancestor_ncvar].update(d) - return d - - g_top.setdefault("attributes", {}) - g_top["attributes"][attribute_name] = per_attr_dict # e.g. mesh key - g_top["attributes"][attribute_name]["variables"].setdefault(ncvar, {}) - g_top["attributes"][attribute_name]["variables"][ncvar].update(d) # e.g. Mesh2 + g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) + g_top = g["dataset_compliance"][top_ancestor_ncvar] + + # If the top_ancestor_ncvar and ncvar are the same, there is a + # problem with an ncvar with no parents - so store directly on ncvar + # TODO should probably make the top_ancestor_ncvar optional + # so that we don't need to do this check! + g["dataset_compliance"][top_ancestor_ncvar].update(d) if direct_parent_ncvar: ### print("\nFOR CASE:", top_ancestor_ncvar, ncvar, direct_parent_ncvar) # Dicts are optimised for key-value lookup, but this requires # value-key lookup - find a better way to get relevant attr using # functionlity in this module - varattrs = g["variable_attributes"][top_ancestor_ncvar] + varattrs = g["variable_attributes"][direct_parent_ncvar] ###top_ancestor_ncvar] reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] @@ -5716,8 +5708,7 @@ def _include_component_report( # Unlike for 'attribute' input to _add_message, this 'attribute' is the # the attribute_name only and not "var_name:attribute_name" to split - if component_report: ###: - ### print(f"CR| HAVE NCVAR {ncvar} AND ATTR {attribute}") + if component_report: g_parent = g["dataset_compliance"][parent_ncvar] g_parent.setdefault("attributes", {}) g_parent["attributes"].setdefault(attribute, per_attr_dict) From 36845c449f8b1fa4dfad17c250f6eeb16ac2639a Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 00:14:55 +0000 Subject: [PATCH 78/97] Tidying of netcdfread after dataset_compliance update work --- cfdm/read_write/netcdf/netcdfread.py | 47 ++++++++++----------------- cfdm/test/test_compliance_checking.py | 6 ++-- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 5cb2500bf..563314f1a 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5560,23 +5560,14 @@ def _add_message( else: code = None - # DEV MAIN per_var_dict = { "attributes": {}, "dimensions": {}, } - - # *Attribute list* per_attr_dict = { "variables": {}, "dimensions": {}, - # add value (string), and optionally reason and code - } - - # *Dimension dict* - per_dim_dict = { - "variables": {}, - # add size (int or None), and optionally reason and code + # The value (string), and optionally reason and code, will be added } attribute_key = next(iter(attribute)) @@ -5607,13 +5598,13 @@ def _add_message( # Create dimensions dict and populate with sizes if dimensions is not None: - d["dimensions"] = { + d["dimensions"].update({ dim: {"size": g["internal_dimension_sizes"][dim]} for dim in dimensions - } + }) # Process issues emerging on or via attributes - g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) + g["dataset_compliance"].setdefault(top_ancestor_ncvar, per_attr_dict) g_top = g["dataset_compliance"][top_ancestor_ncvar] # If the top_ancestor_ncvar and ncvar are the same, there is a @@ -5632,11 +5623,13 @@ def _add_message( store_attr = reverse_varattrs[ncvar] g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) - g_parent.setdefault("attributes", {}) + g_parent.setdefault("attributes", per_attr_dict) g_parent["attributes"].setdefault(store_attr, per_attr_dict) g_parent["attributes"][store_attr]["variables"].setdefault(ncvar, {}) g_parent["attributes"][store_attr]["variables"][ncvar].update(d) + # TODO process dimensions on intermediate netCDF objects + if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover else: # pragma: no cover @@ -5684,27 +5677,19 @@ def _include_component_report( `None` """ + g = self.read_vars + component_report = g["component_report"].get(ncvar) - per_var_dict = { - "attributes": {}, - "dimensions": {}, - } - - # *Attribute list* per_attr_dict = { "variables": {}, "dimensions": {}, - # add value (string), and optionally reason and code - } - - # *Dimension dict* - per_dim_dict = { - "variables": {}, - # add size (int or None), and optionally reason and code + # The value (string), and optionally reason and code, will be added } - - g = self.read_vars - component_report = g["component_report"].get(ncvar) + if dimensions is not None: + per_attr_dict["dimensions"].update({ + dim: {"size": g["internal_dimension_sizes"][dim]} for + dim in dimensions + }) # Unlike for 'attribute' input to _add_message, this 'attribute' is the # the attribute_name only and not "var_name:attribute_name" to split @@ -5716,6 +5701,8 @@ def _include_component_report( g_parent["attributes"][attribute]["variables"][ncvar].update( component_report) + # TODO process dimensions on intermediate netCDF objects + def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 60c954508..4d76074f5 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -108,9 +108,9 @@ def setUp(self): # *Attribute list* per_attr_dict = { - "variables": {}, - "dimensions": {}, - # add value (string), and optionally reason and code + "variables": {}, + "dimensions": {}, + # add value (string), and optionally reason and code } # *Dimension dict* From 52ad5051526a96f3a3335ccc2bd4934ab7f16fb8 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 00:55:27 +0000 Subject: [PATCH 79/97] Formatting & tidying of netcdfread module --- cfdm/read_write/netcdf/netcdfread.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 563314f1a..0e989c45b 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -3899,7 +3899,6 @@ def _create_field_or_domain( # Register the CF Conventions version at top-level only g["dataset_compliance"].setdefault(field_ncvar, {}) - g["dataset_compliance"][ "CF version"] = self.implementation.get_cf_version() @@ -5602,7 +5601,6 @@ def _add_message( dim: {"size": g["internal_dimension_sizes"][dim]} for dim in dimensions }) - # Process issues emerging on or via attributes g["dataset_compliance"].setdefault(top_ancestor_ncvar, per_attr_dict) g_top = g["dataset_compliance"][top_ancestor_ncvar] @@ -5611,6 +5609,7 @@ def _add_message( # problem with an ncvar with no parents - so store directly on ncvar # TODO should probably make the top_ancestor_ncvar optional # so that we don't need to do this check! + g["dataset_compliance"][top_ancestor_ncvar].update(d) if direct_parent_ncvar: @@ -5623,9 +5622,10 @@ def _add_message( store_attr = reverse_varattrs[ncvar] g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) - g_parent.setdefault("attributes", per_attr_dict) + g_parent.setdefault("attributes", {}) g_parent["attributes"].setdefault(store_attr, per_attr_dict) g_parent["attributes"][store_attr]["variables"].setdefault(ncvar, {}) + g_parent["attributes"][store_attr]["variables"][ncvar].update(d) # TODO process dimensions on intermediate netCDF objects From f7cda1d52f7b7ccb1128a9a24bff6eaefa34cbd2 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 15:29:53 +0000 Subject: [PATCH 80/97] Fix for dataset_compliance output recording of dimension size --- cfdm/read_write/netcdf/netcdfread.py | 59 ++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 0e989c45b..aeb348d37 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5583,7 +5583,7 @@ def _add_message( one_issue_dict["reason"] = message # If the top_ancestor_ncvar and ncvar are the same, don't need to - # process under attributes ??? SLB + # process under attributes if top_ancestor_ncvar == ncvar or top_ancestor_ncvar is None: d = one_issue_dict.copy() else: @@ -5595,12 +5595,13 @@ def _add_message( d["attributes"].setdefault(attribute_name, []) d["attributes"][attribute_name].append(one_issue_dict) - # Create dimensions dict and populate with sizes - if dimensions is not None: - d["dimensions"].update({ - dim: {"size": g["internal_dimension_sizes"][dim]} for - dim in dimensions - }) + # Create dimensions dict and populate with sizes + if dimensions is not None: + d["dimensions"].update({ + dim: {"size": g["internal_dimension_sizes"][dim]} for + dim in dimensions + }) + # Process issues emerging on or via attributes g["dataset_compliance"].setdefault(top_ancestor_ncvar, per_attr_dict) g_top = g["dataset_compliance"][top_ancestor_ncvar] @@ -5609,26 +5610,45 @@ def _add_message( # problem with an ncvar with no parents - so store directly on ncvar # TODO should probably make the top_ancestor_ncvar optional # so that we don't need to do this check! - g["dataset_compliance"][top_ancestor_ncvar].update(d) if direct_parent_ncvar: - ### print("\nFOR CASE:", top_ancestor_ncvar, ncvar, direct_parent_ncvar) # Dicts are optimised for key-value lookup, but this requires # value-key lookup - find a better way to get relevant attr using # functionlity in this module - varattrs = g["variable_attributes"][direct_parent_ncvar] ###top_ancestor_ncvar] + varattrs = g["variable_attributes"][direct_parent_ncvar] reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) g_parent.setdefault("attributes", {}) - g_parent["attributes"].setdefault(store_attr, per_attr_dict) + g_parent["attributes"].setdefault(store_attr, {"variables": {}}) g_parent["attributes"][store_attr]["variables"].setdefault(ncvar, {}) + # Get ncvar dimensions: + var_dim = g["variable_dimensions"][direct_parent_ncvar] + dim_sizes = { + dim: {"size": g["internal_dimension_sizes"][dim]} + for dim in var_dim + } + g_parent["dimensions"] = dim_sizes + g_parent["attributes"][store_attr]["dimensions"] = dim_sizes + + # Get ncvar dimensions: + var_dim = g["variable_dimensions"][ncvar] + dim_sizes = { + dim: {"size": g["internal_dimension_sizes"][dim]} + for dim in var_dim + } + + # Set these dims on the variable *and* the attribute + # TODO technically derives from the variable only, not its + # attribute too, so is this robust? + if dim_sizes: + d["dimensions"] = dim_sizes + g_parent["attributes"][store_attr]["variables"][ncvar].update(d) - # TODO process dimensions on intermediate netCDF objects if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover @@ -5698,10 +5718,25 @@ def _include_component_report( g_parent.setdefault("attributes", {}) g_parent["attributes"].setdefault(attribute, per_attr_dict) g_parent["attributes"][attribute]["variables"].setdefault(ncvar, {}) + g_parent["attributes"][attribute]["variables"][ncvar].update( component_report) # TODO process dimensions on intermediate netCDF objects + # Process dimensions on intermediate netCDF objects: + # ... on parent ncvar + var_dim = g["variable_dimensions"][parent_ncvar] + dim_sizes = { + dim: {"size": g["internal_dimension_sizes"][dim]} + for dim in var_dim + } + # Set these dims on the variable *and* the attribute + # TODO technically derives from the variable only, not its + # attribute too, so is this robust? + if dim_sizes: + g_parent["dimensions"] = dim_sizes # on var, and on... + g_parent["attributes"][attribute]["dimensions"] = dim_sizes # attr + def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. From ce1b88afb5451ac77f97228eb263c2bfac6be7db Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 15:34:15 +0000 Subject: [PATCH 81/97] Tidying of PR before logic consolidation --- cfdm/read_write/netcdf/netcdfread.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index aeb348d37..fb03c3517 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -4933,8 +4933,6 @@ def _create_field_or_domain( ) create_new = True - - known_bad_snames = set() if not coordinates: # DCH ALERT # what to do about duplicate standard names? TODO @@ -5604,7 +5602,7 @@ def _add_message( # Process issues emerging on or via attributes g["dataset_compliance"].setdefault(top_ancestor_ncvar, per_attr_dict) - g_top = g["dataset_compliance"][top_ancestor_ncvar] + g["dataset_compliance"][top_ancestor_ncvar] # If the top_ancestor_ncvar and ncvar are the same, there is a # problem with an ncvar with no parents - so store directly on ncvar @@ -5649,7 +5647,6 @@ def _add_message( g_parent["attributes"][store_attr]["variables"][ncvar].update(d) - if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover else: # pragma: no cover @@ -5737,7 +5734,6 @@ def _include_component_report( g_parent["dimensions"] = dim_sizes # on var, and on... g_parent["attributes"][attribute]["dimensions"] = dim_sizes # attr - def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. @@ -8547,7 +8543,6 @@ def _check_standard_names( "latter redundant - set it to False with a custom list." ) - invalid_names = [] any_sn_found = False invalid_sn_found = False for sn_attr in ("standard_name", "computed_standard_name"): From f3ebf46772218840cf43262476674660b6327378 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 16:11:39 +0000 Subject: [PATCH 82/97] Begin consolidating _add_message & _include_component_report --- cfdm/read_write/netcdf/netcdfread.py | 125 +++++++++++++-------------- 1 file changed, 62 insertions(+), 63 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index fb03c3517..b352fef98 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5476,6 +5476,39 @@ def _get_geometry(self, field_ncvar, return_ncvar=False): return g["geometries"].get(geometry_ncvar) + def _update_noncompliance_dict( + self, noncompliance_dict, ncvar, parent_ncvar, attribute, + update_dict, + ): + """TODO.""" + var_compliance = noncompliance_dict[parent_ncvar] + var_compliance.setdefault("attributes", {}) + var_compliance["attributes"].setdefault( + attribute, + { + "variables": {}, + "dimensions": {}, + # The value (string), and optionally reason and code, will + # be added later + } + ) + var_compliance["attributes"][attribute]["variables"].setdefault( + ncvar, {}) + var_compliance["attributes"][attribute]["variables"][ncvar].update( + update_dict) + + return var_compliance + + def _process_dimension_sizes(self, ncvar): + """TODO.""" + g = self.read_vars + + var_dims = g["variable_dimensions"][ncvar] + return { + dim: {"size": g["internal_dimension_sizes"][dim]} + for dim in var_dims + } + def _add_message( self, top_ancestor_ncvar, @@ -5557,16 +5590,6 @@ def _add_message( else: code = None - per_var_dict = { - "attributes": {}, - "dimensions": {}, - } - per_attr_dict = { - "variables": {}, - "dimensions": {}, - # The value (string), and optionally reason and code, will be added - } - attribute_key = next(iter(attribute)) attribute_name = attribute_key.split(":")[1] # TODO need better way to access this - inefficient, should be able to @@ -5589,7 +5612,10 @@ def _add_message( # message, code and attribute value against the attribute name key. # These go into a *list*, since there may be more than one issue hence # reason message and corresponding code listed per attribute. - d = per_var_dict + d = { + "attributes": {}, + "dimensions": {}, + } d["attributes"].setdefault(attribute_name, []) d["attributes"][attribute_name].append(one_issue_dict) @@ -5601,13 +5627,7 @@ def _add_message( }) # Process issues emerging on or via attributes - g["dataset_compliance"].setdefault(top_ancestor_ncvar, per_attr_dict) - g["dataset_compliance"][top_ancestor_ncvar] - - # If the top_ancestor_ncvar and ncvar are the same, there is a - # problem with an ncvar with no parents - so store directly on ncvar - # TODO should probably make the top_ancestor_ncvar optional - # so that we don't need to do this check! + g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) g["dataset_compliance"][top_ancestor_ncvar].update(d) if direct_parent_ncvar: @@ -5618,30 +5638,28 @@ def _add_message( reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] + # NEXT + #g_parent = self._update_noncompliance_dict( + # g["component_report"], ncvar, direct_parent_ncvar, store_attr, + # d + #) + g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) g_parent.setdefault("attributes", {}) g_parent["attributes"].setdefault(store_attr, {"variables": {}}) g_parent["attributes"][store_attr]["variables"].setdefault(ncvar, {}) - # Get ncvar dimensions: - var_dim = g["variable_dimensions"][direct_parent_ncvar] - dim_sizes = { - dim: {"size": g["internal_dimension_sizes"][dim]} - for dim in var_dim - } - g_parent["dimensions"] = dim_sizes - g_parent["attributes"][store_attr]["dimensions"] = dim_sizes - - # Get ncvar dimensions: - var_dim = g["variable_dimensions"][ncvar] - dim_sizes = { - dim: {"size": g["internal_dimension_sizes"][dim]} - for dim in var_dim - } - + # Get dimensions for all variables # Set these dims on the variable *and* the attribute # TODO technically derives from the variable only, not its # attribute too, so is this robust? + direct_parent_dim_sizes = self._process_dimension_sizes( + direct_parent_ncvar) + g_parent["dimensions"] = direct_parent_dim_sizes + g_parent["attributes"][store_attr][ + "dimensions"] = direct_parent_dim_sizes + + dim_sizes = self._process_dimension_sizes(ncvar) if dim_sizes: d["dimensions"] = dim_sizes @@ -5697,42 +5715,23 @@ def _include_component_report( g = self.read_vars component_report = g["component_report"].get(ncvar) - per_attr_dict = { - "variables": {}, - "dimensions": {}, - # The value (string), and optionally reason and code, will be added - } - if dimensions is not None: - per_attr_dict["dimensions"].update({ - dim: {"size": g["internal_dimension_sizes"][dim]} for - dim in dimensions - }) - # Unlike for 'attribute' input to _add_message, this 'attribute' is the # the attribute_name only and not "var_name:attribute_name" to split if component_report: - g_parent = g["dataset_compliance"][parent_ncvar] - g_parent.setdefault("attributes", {}) - g_parent["attributes"].setdefault(attribute, per_attr_dict) - g_parent["attributes"][attribute]["variables"].setdefault(ncvar, {}) - - g_parent["attributes"][attribute]["variables"][ncvar].update( - component_report) - - # TODO process dimensions on intermediate netCDF objects - # Process dimensions on intermediate netCDF objects: - # ... on parent ncvar - var_dim = g["variable_dimensions"][parent_ncvar] - dim_sizes = { - dim: {"size": g["internal_dimension_sizes"][dim]} - for dim in var_dim - } + g_parent = self._update_noncompliance_dict( + g["dataset_compliance"], ncvar, parent_ncvar, attribute, + component_report + ) + + # Process dimensions on parent ncvar + dim_sizes = self._process_dimension_sizes(parent_ncvar) # Set these dims on the variable *and* the attribute # TODO technically derives from the variable only, not its # attribute too, so is this robust? if dim_sizes: - g_parent["dimensions"] = dim_sizes # on var, and on... - g_parent["attributes"][attribute]["dimensions"] = dim_sizes # attr + # On both var and attr + g_parent["dimensions"] = dim_sizes + g_parent["attributes"][attribute]["dimensions"] = dim_sizes def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. From c741b2b6549b74f29deffce4cfb53539aa9b9cc5 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 16:22:37 +0000 Subject: [PATCH 83/97] Further consolidation of _add_message & _include_component_report --- cfdm/read_write/netcdf/netcdfread.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index b352fef98..3175c862f 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5481,7 +5481,7 @@ def _update_noncompliance_dict( update_dict, ): """TODO.""" - var_compliance = noncompliance_dict[parent_ncvar] + var_compliance = noncompliance_dict.setdefault(parent_ncvar, {}) var_compliance.setdefault("attributes", {}) var_compliance["attributes"].setdefault( attribute, @@ -5638,17 +5638,14 @@ def _add_message( reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] - # NEXT - #g_parent = self._update_noncompliance_dict( - # g["component_report"], ncvar, direct_parent_ncvar, store_attr, - # d - #) - - g_parent = g["component_report"].setdefault(direct_parent_ncvar, {}) - g_parent.setdefault("attributes", {}) - g_parent["attributes"].setdefault(store_attr, {"variables": {}}) - g_parent["attributes"][store_attr]["variables"].setdefault(ncvar, {}) + dim_sizes = self._process_dimension_sizes(ncvar) + if dim_sizes: + d["dimensions"] = dim_sizes + g_parent = self._update_noncompliance_dict( + g["component_report"], ncvar, direct_parent_ncvar, store_attr, + d + ) # Get dimensions for all variables # Set these dims on the variable *and* the attribute # TODO technically derives from the variable only, not its @@ -5659,12 +5656,6 @@ def _add_message( g_parent["attributes"][store_attr][ "dimensions"] = direct_parent_dim_sizes - dim_sizes = self._process_dimension_sizes(ncvar) - if dim_sizes: - d["dimensions"] = dim_sizes - - g_parent["attributes"][store_attr]["variables"][ncvar].update(d) - if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover else: # pragma: no cover From e8dba0a8fc46bdc8ad2ac621e41f65db33bd1478 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 16:47:58 +0000 Subject: [PATCH 84/97] Consolidation: include dims processing in _update_noncompliance_dict --- cfdm/read_write/netcdf/netcdfread.py | 47 +++++++++++----------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 3175c862f..e82b2f0e8 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5478,12 +5478,15 @@ def _get_geometry(self, field_ncvar, return_ncvar=False): def _update_noncompliance_dict( self, noncompliance_dict, ncvar, parent_ncvar, attribute, - update_dict, + update_dict, include_dimension_sizes=True, ): """TODO.""" var_compliance = noncompliance_dict.setdefault(parent_ncvar, {}) + + # Process attributes var_compliance.setdefault("attributes", {}) - var_compliance["attributes"].setdefault( + attrs_dict = var_compliance["attributes"] + attrs_dict.setdefault( attribute, { "variables": {}, @@ -5492,10 +5495,17 @@ def _update_noncompliance_dict( # be added later } ) - var_compliance["attributes"][attribute]["variables"].setdefault( - ncvar, {}) - var_compliance["attributes"][attribute]["variables"][ncvar].update( - update_dict) + attrs_dict[attribute]["variables"].setdefault(ncvar, {}) + attrs_dict[attribute]["variables"][ncvar].update(update_dict) + + # Optionally process in dimensions + if include_dimension_sizes: + dim_sizes = self._process_dimension_sizes(parent_ncvar) + # Set these dims on the variable *and* the attribute + # TODO technically derives from the variable only, not its + # attribute too, so is this robust? + var_compliance["dimensions"] = dim_sizes + var_compliance["attributes"][attribute]["dimensions"] = dim_sizes return var_compliance @@ -5642,19 +5652,10 @@ def _add_message( if dim_sizes: d["dimensions"] = dim_sizes - g_parent = self._update_noncompliance_dict( + self._update_noncompliance_dict( g["component_report"], ncvar, direct_parent_ncvar, store_attr, - d + d, ) - # Get dimensions for all variables - # Set these dims on the variable *and* the attribute - # TODO technically derives from the variable only, not its - # attribute too, so is this robust? - direct_parent_dim_sizes = self._process_dimension_sizes( - direct_parent_ncvar) - g_parent["dimensions"] = direct_parent_dim_sizes - g_parent["attributes"][store_attr][ - "dimensions"] = direct_parent_dim_sizes if dimensions is None: # pragma: no cover dimensions = "" # pragma: no cover @@ -5709,21 +5710,11 @@ def _include_component_report( # Unlike for 'attribute' input to _add_message, this 'attribute' is the # the attribute_name only and not "var_name:attribute_name" to split if component_report: - g_parent = self._update_noncompliance_dict( + self._update_noncompliance_dict( g["dataset_compliance"], ncvar, parent_ncvar, attribute, component_report ) - # Process dimensions on parent ncvar - dim_sizes = self._process_dimension_sizes(parent_ncvar) - # Set these dims on the variable *and* the attribute - # TODO technically derives from the variable only, not its - # attribute too, so is this robust? - if dim_sizes: - # On both var and attr - g_parent["dimensions"] = dim_sizes - g_parent["attributes"][attribute]["dimensions"] = dim_sizes - def _get_domain_axes(self, ncvar, allow_external=False, parent_ncvar=None): """Find a domain axis identifier for the variable's dimensions. From df9b9712087723c670023e839f272e6ba6cb41fd Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 17:12:04 +0000 Subject: [PATCH 85/97] Remove now-unnecessary conditional with top_ancestor_ncvar --- cfdm/read_write/netcdf/netcdfread.py | 60 ++++++++++++++-------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index e82b2f0e8..2d7a2d750 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5476,6 +5476,17 @@ def _get_geometry(self, field_ncvar, return_ncvar=False): return g["geometries"].get(geometry_ncvar) + def _process_dimension_sizes(self, ncvar): + """TODO.""" + g = self.read_vars + + var_dims = g["variable_dimensions"][ncvar] + return { + dim: {"size": g["internal_dimension_sizes"][dim]} + # Here the 'or []' additoin ensures var_dims of None -> {} output + for dim in (var_dims or []) + } + def _update_noncompliance_dict( self, noncompliance_dict, ncvar, parent_ncvar, attribute, update_dict, include_dimension_sizes=True, @@ -5509,16 +5520,6 @@ def _update_noncompliance_dict( return var_compliance - def _process_dimension_sizes(self, ncvar): - """TODO.""" - g = self.read_vars - - var_dims = g["variable_dimensions"][ncvar] - return { - dim: {"size": g["internal_dimension_sizes"][dim]} - for dim in var_dims - } - def _add_message( self, top_ancestor_ncvar, @@ -5613,28 +5614,23 @@ def _add_message( if message: one_issue_dict["reason"] = message - # If the top_ancestor_ncvar and ncvar are the same, don't need to - # process under attributes - if top_ancestor_ncvar == ncvar or top_ancestor_ncvar is None: - d = one_issue_dict.copy() - else: - # Form lowest-level dict which reports an ultimate issue via a 'reason' - # message, code and attribute value against the attribute name key. - # These go into a *list*, since there may be more than one issue hence - # reason message and corresponding code listed per attribute. - d = { - "attributes": {}, - "dimensions": {}, - } - d["attributes"].setdefault(attribute_name, []) - d["attributes"][attribute_name].append(one_issue_dict) + # Form lowest-level dict which reports an ultimate issue via a 'reason' + # message, code and attribute value against the attribute name key. + # These go into a *list*, since there may be more than one issue hence + # reason message and corresponding code listed per attribute. + d = { + "attributes": {}, + "dimensions": {}, + } + d["attributes"].setdefault(attribute_name, []) + d["attributes"][attribute_name].append(one_issue_dict) - # Create dimensions dict and populate with sizes - if dimensions is not None: - d["dimensions"].update({ - dim: {"size": g["internal_dimension_sizes"][dim]} for - dim in dimensions - }) + # Create dimensions dict and populate with sizes + if dimensions is not None: + d["dimensions"].update({ + dim: {"size": g["internal_dimension_sizes"][dim]} for + dim in dimensions + }) # Process issues emerging on or via attributes g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) @@ -5648,6 +5644,8 @@ def _add_message( reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] + # Update the dimensions to those of the ncvar now, otherwise same + # dict is applicable to store on the direct_parent_ncvar dim_sizes = self._process_dimension_sizes(ncvar) if dim_sizes: d["dimensions"] = dim_sizes From 34d724cea21021f65c9ba60d4b96c4bc3d2b07b4 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 17:26:34 +0000 Subject: [PATCH 86/97] Final tidy of netcdfread module, prepare test_compliance_checking --- cfdm/read_write/netcdf/netcdfread.py | 44 ++++++++++++--------------- cfdm/test/test_compliance_checking.py | 29 ++++++------------ 2 files changed, 28 insertions(+), 45 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 2d7a2d750..d4f71c130 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -11,6 +11,7 @@ from math import log, nan, prod from numbers import Integral from os.path import isdir, isfile, join +from pprint import pformat from typing import Any from uuid import uuid4 @@ -42,8 +43,6 @@ ) from .zarr import ZarrDimension -from pprint import pformat, pprint # DEBUG - logger = logging.getLogger(__name__) @@ -1328,7 +1327,6 @@ def read( "verbose": verbose, # Warnings? "warnings": warnings, - # SLB maybe inject the CF version here, straight away? "dataset_compliance": {}, "component_report": {}, "auxiliary_coordinate": {}, @@ -2199,11 +2197,6 @@ def read( if field_or_domain is not None: all_fields_or_domains[ncvar] = field_or_domain - # SLB add cf version - inject at end, not appearing from init setting - print("STOP HERE 0--------------------") - pprint(g["dataset_compliance"]) - print("STOP HERE 1--------------------") - # ------------------------------------------------------------ # Create domain constructs from UGRID mesh topology variables # ------------------------------------------------------------ @@ -5159,7 +5152,7 @@ def _create_field_or_domain( # ------------------------------------------------------------- # Add the structural read report to the field/domain dataset_compliance = g["dataset_compliance"][field_ncvar] - components = dataset_compliance # SLB edited + components = dataset_compliance if components: dataset_compliance = {field_ncvar: dataset_compliance} else: @@ -5483,7 +5476,7 @@ def _process_dimension_sizes(self, ncvar): var_dims = g["variable_dimensions"][ncvar] return { dim: {"size": g["internal_dimension_sizes"][dim]} - # Here the 'or []' additoin ensures var_dims of None -> {} output + # Here the 'or []' addition ensures var_dims of None -> {} output for dim in (var_dims or []) } @@ -5502,7 +5495,7 @@ def _update_noncompliance_dict( { "variables": {}, "dimensions": {}, - # The value (string), and optionally reason and code, will + # The value (string), and optionally reason and code, may # be added later } ) @@ -5608,33 +5601,35 @@ def _add_message( attribute_value = attribute[attribute_key] # Form a single issue to register (message, code and attr value) - one_issue_dict = {"value": attribute_value} + one_issue_info = {"value": attribute_value} if code: - one_issue_dict["code"] = code + one_issue_info["code"] = code if message: - one_issue_dict["reason"] = message + one_issue_info["reason"] = message # Form lowest-level dict which reports an ultimate issue via a 'reason' # message, code and attribute value against the attribute name key. # These go into a *list*, since there may be more than one issue hence # reason message and corresponding code listed per attribute. - d = { + var_noncompliance_info = { "attributes": {}, "dimensions": {}, } - d["attributes"].setdefault(attribute_name, []) - d["attributes"][attribute_name].append(one_issue_dict) + var_noncompliance_info["attributes"].setdefault(attribute_name, []) + var_noncompliance_info["attributes"][attribute_name].append( + one_issue_info) # Create dimensions dict and populate with sizes if dimensions is not None: - d["dimensions"].update({ + var_noncompliance_info["dimensions"].update({ dim: {"size": g["internal_dimension_sizes"][dim]} for dim in dimensions }) # Process issues emerging on or via attributes g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) - g["dataset_compliance"][top_ancestor_ncvar].update(d) + g["dataset_compliance"][top_ancestor_ncvar].update( + var_noncompliance_info) if direct_parent_ncvar: # Dicts are optimised for key-value lookup, but this requires @@ -5645,14 +5640,15 @@ def _add_message( store_attr = reverse_varattrs[ncvar] # Update the dimensions to those of the ncvar now, otherwise same - # dict is applicable to store on the direct_parent_ncvar + # dict, var_noncompliance_info, is applicable to store on the + # direct_parent_ncvar dim_sizes = self._process_dimension_sizes(ncvar) if dim_sizes: - d["dimensions"] = dim_sizes + var_noncompliance_info["dimensions"] = dim_sizes self._update_noncompliance_dict( g["component_report"], ncvar, direct_parent_ncvar, store_attr, - d, + var_noncompliance_info, ) if dimensions is None: # pragma: no cover @@ -5665,7 +5661,7 @@ def _add_message( f"{ncvar}{dimensions}: {message}" ) # pragma: no cover - return d + return var_noncompliance_info def _include_component_report( self, parent_ncvar, ncvar, attribute, dimensions=None): @@ -8398,7 +8394,6 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): if component_report is not None: for var, report in component_report.items(): - # SLB edited g["dataset_compliance"][parent_ncvar].setdefault( var, []).extend(report) @@ -9067,7 +9062,6 @@ def _check_auxiliary_or_scalar_coordinate( g = self.read_vars coord_ncvar_attrs = g["variable_attributes"][coord_ncvar] - pprint(coord_ncvar_attrs) self._check_standard_names( parent_ncvar, coord_ncvar, diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 4d76074f5..f9418d452 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -5,6 +5,7 @@ import logging import os import platform +from pprint import pprint import sys import tempfile import unittest @@ -99,26 +100,6 @@ def setUp(self): # < ... test code ... > # cfdm.log_level('DISABLE') - # Structures to form the desired outputs - # *Variable dict* - per_var_dict = { - "attributes": {}, - "dimensions": {}, - } - - # *Attribute list* - per_attr_dict = { - "variables": {}, - "dimensions": {}, - # add value (string), and optionally reason and code - } - - # *Dimension dict* - per_dim_dict = { - "variables": {}, - # add size (int or None), and optionally reason and code - } - def test_extract_names_from_xml(self): """Test the `cfvalidation._extract_names_from_xml` function.""" # Check with a small 'dummy' XML table which is the current table @@ -311,6 +292,9 @@ def test_standard_names_validation_noncompliant_field(self): f = self.bad_snames_general_field dc_output = f.dataset_compliance() + print("----------------- TEST 1 NON UGRID ---------------------") + pprint(dc_output) + # SLB DEV # from pprint import pprint # pprint(dc_output) @@ -396,6 +380,11 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): dc_output_2 = f2.dataset_compliance() dc_output_3 = f2.dataset_compliance() + print("----------------- TEST 2 UGRID ---------------------") + pprint(dc_output_1) + pprint(dc_output_2) + pprint(dc_output_3) + # SLB DEV # TODO add error to run to say need to run 'create_test_files' From 22b8ac51ad9bd5d7cf144288cf59a376cbc58ea8 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 18:21:29 +0000 Subject: [PATCH 87/97] Document new keyword noncompliance_report for cfdm.read --- cfdm/read_write/netcdf/netcdfread.py | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index d4f71c130..270c3782f 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -955,6 +955,7 @@ def read( dataset_type=None, cdl_string=False, ignore_unknown_type=False, + warn_on_noncompliance=False, ): """Reads a netCDF dataset from file or OPenDAP URL. @@ -1077,6 +1078,34 @@ def read( .. versionadded:: (cfdm) 1.11.2.0 + noncompliance_report: `bool`, optional + If True then return a warning when any data read in are + not fully compliant by the CF Conventions, with a dictionary + which registers any detected issues in a structured way to + indicate the issue against any netCDF objects (variables, + dimensions and/or attributes) which they affect. Note this is + in an (early) developmental stage, therefore the default is + False to not produce this warning. + + The dictionaries printed in the warning are available + post-read through the dataset_compliance() method + available on a field or domain. + + .. warning:: Compliance checking in cfdm is not yet mature + and therefore only certain issues of + non-compliance will be detected and reported in + the warning dictionary, so this is not intended, + at present, to be a comprehensive check for + compliance according to the latest version of + the CF Conventions. As-is it may be useful as + a guide to issues. + + In future a human-friendly report will be made + available from the warning dictionary output, + but for now it is in a nested structure. + + .. versionadded:: (cfdm) NEXTVERSION + _file_systems: `dict`, optional Provide any already-open S3 file systems. From f24e7bc2beb472a96bf48a8d346eedce8f940d6b Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 18:23:47 +0000 Subject: [PATCH 88/97] Update docs summary of new keyword noncompliance_report for cfdm.read --- cfdm/read_write/netcdf/netcdfread.py | 5 +++-- cfdm/read_write/read.py | 29 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 270c3782f..a8a5504ec 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -1098,11 +1098,12 @@ def read( at present, to be a comprehensive check for compliance according to the latest version of the CF Conventions. As-is it may be useful as - a guide to issues. + a guide to possible issues. In future a human-friendly report will be made available from the warning dictionary output, - but for now it is in a nested structure. + but for now it is only available pretty-printed + from the nested machine-parsable structure. .. versionadded:: (cfdm) NEXTVERSION diff --git a/cfdm/read_write/read.py b/cfdm/read_write/read.py index de08c30aa..0e0bbcac2 100644 --- a/cfdm/read_write/read.py +++ b/cfdm/read_write/read.py @@ -190,6 +190,35 @@ class read(ReadWrite): .. versionadded:: (cfdm) 1.12.0.0 + noncompliance_report: `bool`, optional + If True then return a warning when any data read in are + not fully compliant by the CF Conventions, with a dictionary + which registers any detected issues in a structured way to + indicate the issue against any netCDF objects (variables, + dimensions and/or attributes) which they affect. Note this is + in an (early) developmental stage, therefore the default is + False to not produce this warning. + + The dictionaries printed in the warning are available + post-read through the dataset_compliance() method + available on a field or domain. + + .. warning:: Compliance checking in cfdm is not yet mature + and therefore only certain issues of + non-compliance will be detected and reported in + the warning dictionary, so this is not intended, + at present, to be a comprehensive check for + compliance according to the latest version of + the CF Conventions. As-is it may be useful as + a guide to possible issues. + + In future a human-friendly report will be made + available from the warning dictionary output, + but for now it is only available pretty-printed + from the nested machine-parsable structure. + + .. versionadded:: (cfdm) NEXTVERSION + ignore_unknown_type: Deprecated at version 1.12.2.0 Use *dataset_type* instead. From 07db6bf44891ce66ac426c1e3a20b7694b17b5b8 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 20:38:31 +0000 Subject: [PATCH 89/97] Implement new keyword noncompliance_report for cfdm.read --- cfdm/read_write/netcdf/netcdfread.py | 19 ++++++++++++------- cfdm/read_write/read.py | 2 ++ cfdm/test/test_compliance_checking.py | 1 - 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index a8a5504ec..510fff482 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -11,7 +11,7 @@ from math import log, nan, prod from numbers import Integral from os.path import isdir, isfile, join -from pprint import pformat +from pprint import pformat, pprint from typing import Any from uuid import uuid4 @@ -955,7 +955,7 @@ def read( dataset_type=None, cdl_string=False, ignore_unknown_type=False, - warn_on_noncompliance=False, + noncompliance_report=False, ): """Reads a netCDF dataset from file or OPenDAP URL. @@ -2352,14 +2352,19 @@ def read( out = [x[1] for x in sorted(items)] - if warnings: + # ------------------------------------------------------------ + # Provide requested warnings e.g. about non-compliance + # ------------------------------------------------------------ + if warnings or noncompliance_report: for x in out: - qq = x.dataset_compliance() - if qq: + noncompliance_dict = x.dataset_compliance() + if noncompliance_dict: logger.warning( - f"WARNING: {x.__class__.__name__} incomplete due to " - f"non-CF-compliant dataset. Report:\n{qq}" + f"\nWARNING: {x.__class__.__name__} incomplete or " + "non-stnadard due to non-CF-compliant dataset. " + "Report:\n" ) # pragma: no cover + pprint(noncompliance_dict) if warn_valid and not g["domain"]: # -------------------------------------------------------- diff --git a/cfdm/read_write/read.py b/cfdm/read_write/read.py index 0e0bbcac2..149cddeaa 100644 --- a/cfdm/read_write/read.py +++ b/cfdm/read_write/read.py @@ -277,6 +277,7 @@ def __new__( followlinks=False, cdl_string=False, extra_read_vars=None, + noncompliance_report=False, **kwargs, ): """Read field or domain constructs from datasets. @@ -601,6 +602,7 @@ def _read(self, dataset): "dataset_type", "cdl_string", "extra_read_vars", + "noncompliance_report", ) } diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index f9418d452..bd7e1fad1 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -393,7 +393,6 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): # for bad case. with Dataset("ugrid_1_bad_names.nc", "r+") as nc: field_all_varnames = list(nc.variables.keys()) - print("VERIFY") for varname, var in nc.variables.items(): print(varname, getattr(var, "standard_name", "No standard_name")) From 59cbbf48a4c8aa0b6cea4e4103d05a57a1a50e0e Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 21:09:37 +0000 Subject: [PATCH 90/97] Compliance checking: update UGRID unit test for new output structure --- cfdm/read_write/netcdf/netcdfread.py | 2 +- cfdm/test/test_compliance_checking.py | 357 +++++++++++++------------- 2 files changed, 183 insertions(+), 176 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 510fff482..a9f56eb98 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -2361,7 +2361,7 @@ def read( if noncompliance_dict: logger.warning( f"\nWARNING: {x.__class__.__name__} incomplete or " - "non-stnadard due to non-CF-compliant dataset. " + "non-standard due to non-CF-compliant dataset. " "Report:\n" ) # pragma: no cover pprint(noncompliance_dict) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index bd7e1fad1..77d279cb8 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -362,17 +362,13 @@ def test_standard_names_validation_compliant_ugrid_field(self): def test_standard_names_validation_noncompliant_ugrid_fields(self): """Test compliance checking on non-compliant UGRID fields.""" expected_reason = ( - "standard_name attribute " - "has a value that is not a valid name contained " - "in the current standard name table" + "standard_name attribute has a value that is not a " + "valid name contained in the current standard name table" ) expected_code = 400022 - # Excludes attribute which we expect in there but depends on varname - # so add that expected key in during the iteration over varnames - expected_noncompl_dict = { - "code": expected_code, - "reason": expected_reason, - } + + # SLB DEV + # TODO add error to run to say need to run 'create_test_files' # Fields for testing on: those in ugrid_1 with bad names pre-set f1, f2, f3 = self.bad_ugrid_sn_fields # unpack to shorter names @@ -385,9 +381,6 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): pprint(dc_output_2) pprint(dc_output_3) - # SLB DEV - # TODO add error to run to say need to run 'create_test_files' - # TODO see from below that not all bad names get set - but want # that, so should update create_test_files method to set on all # for bad case. @@ -396,180 +389,194 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): for varname, var in nc.variables.items(): print(varname, getattr(var, "standard_name", "No standard_name")) - # from pprint import pprint - # print("DC OUTPUT 1") - # pprint(dc_output_1) - - # # 'pa' is the field variable we test on - # self.assertIn("non-compliance", dc_output_1["pa"]) - # noncompliance = dc_output_1 ###["pa"]["non-compliance"] - # print("^^^^^^^^^^^^^" * 20, "HERE HAVE NONCOMP DICT OF:") - # pprint(noncompliance) + # ======================================================= + # Field 1/3: top-level dict (1/4) + # ======================================================= + self.assertIsInstance(dc_output_1, dict) + self.assertCountEqual(dc_output_1.keys(), ["pa"]) + + pa = dc_output_1["pa"] + self.assertIsInstance(pa, dict) + self.assertCountEqual(pa.keys(), ["attributes", "dimensions"]) + + # pa.dimensions + pa_dimensions = pa["dimensions"] + self.assertIsInstance(pa_dimensions, dict) + self.assertCountEqual(pa_dimensions.keys(), ["nMesh2_node", "time"]) + self.assertEqual(pa_dimensions["nMesh2_node"], {"size": 7}) + self.assertEqual(pa_dimensions["time"], {"size": 2}) + + # pa.attributes + pa_attributes = pa["attributes"] + self.assertIsInstance(pa_attributes, dict) + self.assertCountEqual(pa_attributes.keys(), ["mesh", "standard_name"]) + + # pa.attributes.standard_name (1/4) + pa_standard_name = pa_attributes["standard_name"] + self.assertIsInstance(pa_standard_name, list) + self.assertEqual(len(pa_standard_name), 1) - expected_keys = [ - # itself? "pa", - # not for this field "v", - # not for this field "ta", - # fails "time", - # fails "time_bounds", - "Mesh2", - # fails "Mesh2_node_x", # aka longitude? - # fails "Mesh2_node_y", # aka latitude? - # fails "Mesh2_face_x", # ... etc. - # fails "Mesh2_face_y", - # fails "Mesh2_edge_x", - # fails "Mesh2_edge_y", - # fails "Mesh2_face_nodes", - # fails "Mesh2_edge_nodes", - # fails "Mesh2_face_edges", - # fails "Mesh2_face_links", - # fails "Mesh2_edge_face_links", - ] - for varname in expected_keys: - noncompl_dict = noncompliance.get(varname) - - self.assertIsNotNone( - noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" - ) - self.assertIsInstance(noncompl_dict, list) - self.assertEqual(len(noncompl_dict), 1) - - # Safe to unpack after test above - noncompl_dict = noncompl_dict[0] - - self.assertIn("code", noncompl_dict) - self.assertEqual(noncompl_dict["code"], expected_code) - self.assertIn("reason", noncompl_dict) - self.assertEqual(noncompl_dict["reason"], expected_reason) - - # Form expected attribute which needs the varname and bad name - expected_attribute = { - f"{varname}:standard_name": f"badname_{varname}" - } - expected_noncompl_dict["attribute"] = expected_attribute - - self.assertIn("attribute", noncompl_dict) - self.assertEqual(noncompl_dict["attribute"], expected_attribute) - - # Final check to ensure there isn't anything else in there. - # If keys are missing will be reported to fail more spefically - # on per-key-value checks above - self.assertEqual(noncompl_dict, expected_noncompl_dict) - - # from pprint import pprint - # pprint(dc_output_2) - - # 'ta' is the field variable we test on - self.assertIn("non-compliance", dc_output_2["ta"]) - noncompliance = dc_output_2["ta"]["non-compliance"] - - expected_keys = [ - # itself? "ta", - # not for this field "pa", - # not for this field "v", - # fails "time", - # fails "time_bounds", - "Mesh2", - # fails "Mesh2_node_x", # aka longitude? - # fails "Mesh2_node_y", # aka latitude? - # fails "Mesh2_face_x", # ... etc. - # fails "Mesh2_face_y", - # fails "Mesh2_edge_x", - # fails "Mesh2_edge_y", - # fails "Mesh2_face_nodes", - # fails "Mesh2_edge_nodes", - # fails "Mesh2_face_edges", - # fails "Mesh2_face_links", - # fails "Mesh2_edge_face_links", - ] - for varname in expected_keys: - noncompl_dict = noncompliance.get(varname) - self.assertIsNotNone( - noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" - ) - self.assertIsInstance(noncompl_dict, list) - self.assertEqual(len(noncompl_dict), 1) - - # Safe to unpack after test above - noncompl_dict = noncompl_dict[0] - - self.assertIn("code", noncompl_dict) - self.assertEqual(noncompl_dict["code"], expected_code) - self.assertIn("reason", noncompl_dict) - self.assertEqual(noncompl_dict["reason"], expected_reason) - - # Form expected attribute which needs the varname and bad name - expected_attribute = { - f"{varname}:standard_name": f"badname_{varname}" - } - expected_noncompl_dict["attribute"] = expected_attribute + self.assertEqual( + pa_standard_name[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2", + }, + ) - self.assertIn("attribute", noncompl_dict) - self.assertEqual(noncompl_dict["attribute"], expected_attribute) + # pa.attributes.mesh + mesh = pa_attributes["mesh"] + self.assertIsInstance(mesh, dict) + self.assertCountEqual(mesh.keys(), ["dimensions", "variables"]) + + # mesh.dimensions + mesh_dimensions = mesh["dimensions"] + self.assertIsInstance(mesh_dimensions, dict) + self.assertCountEqual(mesh_dimensions.keys(), ["nMesh2_node", "time"]) + self.assertEqual(mesh_dimensions["nMesh2_node"], {"size": 7}) + self.assertEqual(mesh_dimensions["time"], {"size": 2}) + + # mesh.variables + mesh_variables = mesh["variables"] + self.assertIsInstance(mesh_variables, dict) + self.assertCountEqual(mesh_variables.keys(), ["Mesh2"]) + + mesh2 = mesh_variables["Mesh2"] + self.assertIsInstance(mesh2, dict) + self.assertCountEqual(mesh2.keys(), ["attributes", "dimensions"]) + + # Mesh2.dimensions + self.assertEqual(mesh2["dimensions"], {}) + + # Mesh2.attributes + mesh2_attributes = mesh2["attributes"] + self.assertIsInstance(mesh2_attributes, dict) + self.assertCountEqual( + mesh2_attributes.keys(), + [ + "edge_node_connectivity", + "face_face_connectivity", + "face_node_connectivity", + ], + ) - # Final check to ensure there isn't anything else in there. - # If keys are missing will be reported to fail more spefically - # on per-key-value checks above - self.assertEqual(noncompl_dict, expected_noncompl_dict) + # ======================================================= + # Field 1/3: edge_node_connectivity (2/4) + # ======================================================= + edge_node = mesh2_attributes["edge_node_connectivity"] + self.assertIsInstance(edge_node, dict) + self.assertCountEqual(edge_node.keys(), ["dimensions", "variables"]) + self.assertEqual(edge_node["dimensions"], {}) + + edge_node_vars = edge_node["variables"] + self.assertIsInstance(edge_node_vars, dict) + self.assertCountEqual(edge_node_vars.keys(), ["Mesh2_edge_nodes"]) + + edge_nodes = edge_node_vars["Mesh2_edge_nodes"] + self.assertIsInstance(edge_nodes, dict) + self.assertCountEqual(edge_nodes.keys(), ["attributes", "dimensions"]) + self.assertEqual( + edge_nodes["dimensions"], + { + "Two": {"size": 2}, + "nMesh2_edge": {"size": 9}, + }, + ) - # from pprint import pprint - # pprint(dc_output_3) + edge_nodes_attrs = edge_nodes["attributes"] + self.assertIsInstance(edge_nodes_attrs, dict) + self.assertCountEqual(edge_nodes_attrs.keys(), ["standard_name"]) - # 'v' is the field variable we test on - self.assertIn("non-compliance", dc_output_3["v"]) - noncompliance = dc_output_3["v"]["non-compliance"] + edge_sn = edge_nodes_attrs["standard_name"] + self.assertIsInstance(edge_sn, list) + self.assertEqual(len(edge_sn), 1) + self.assertEqual( + edge_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_edge_nodes", + }, + ) - expected_keys = [ - # itself? "v", - # not for this field "ta", - # not for this field "pa", - # fails "time", - # fails "time_bounds", - "Mesh2", - # fails "Mesh2_node_x", # aka longitude? - # fails "Mesh2_node_y", # aka latitude? - # fails "Mesh2_face_x", # ... etc. - # fails "Mesh2_face_y", - # fails "Mesh2_edge_x", - # fails "Mesh2_edge_y", - # fails "Mesh2_face_nodes", - # fails "Mesh2_edge_nodes", - # fails "Mesh2_face_edges", - # fails "Mesh2_face_links", - # fails "Mesh2_edge_face_links", - ] - for varname in expected_keys: - noncompl_dict = noncompliance.get(varname) - self.assertIsNotNone( - noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" - ) - self.assertIsInstance(noncompl_dict, list) - self.assertEqual(len(noncompl_dict), 1) + # ======================================================= + # Field 1/3: face_face_connectivity (3/4) + # ======================================================= + face_face = mesh2_attributes["face_face_connectivity"] + self.assertIsInstance(face_face, dict) + self.assertCountEqual(face_face.keys(), ["dimensions", "variables"]) + self.assertEqual(face_face["dimensions"], {}) + + face_face_vars = face_face["variables"] + self.assertIsInstance(face_face_vars, dict) + self.assertCountEqual(face_face_vars.keys(), ["Mesh2_face_links"]) + + face_links = face_face_vars["Mesh2_face_links"] + self.assertIsInstance(face_links, dict) + self.assertCountEqual(face_links.keys(), ["attributes", "dimensions"]) + self.assertEqual( + face_links["dimensions"], + { + "Four": {"size": 4}, + "nMesh2_face": {"size": 3}, + }, + ) - # Safe to unpack after test above - noncompl_dict = noncompl_dict[0] + face_links_attrs = face_links["attributes"] + self.assertIsInstance(face_links_attrs, dict) + self.assertCountEqual(face_links_attrs.keys(), ["standard_name"]) - self.assertIn("code", noncompl_dict) - self.assertEqual(noncompl_dict["code"], expected_code) - self.assertIn("reason", noncompl_dict) - self.assertEqual(noncompl_dict["reason"], expected_reason) + face_links_sn = face_links_attrs["standard_name"] + self.assertIsInstance(face_links_sn, list) + self.assertEqual(len(face_links_sn), 1) + self.assertEqual( + face_links_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_face_links", + }, + ) - # Form expected attribute which needs the varname and bad name - expected_attribute = { - f"{varname}:standard_name": f"badname_{varname}" - } - expected_noncompl_dict["attribute"] = expected_attribute + # ======================================================= + # Field 1/3: face_node_connectivity (4/4) + # ======================================================= + face_node = mesh2_attributes["face_node_connectivity"] + self.assertIsInstance(face_node, dict) + self.assertCountEqual(face_node.keys(), ["dimensions", "variables"]) + self.assertEqual(face_node["dimensions"], {}) + + face_node_vars = face_node["variables"] + self.assertIsInstance(face_node_vars, dict) + self.assertCountEqual(face_node_vars.keys(), ["Mesh2_face_nodes"]) + + face_nodes = face_node_vars["Mesh2_face_nodes"] + self.assertIsInstance(face_nodes, dict) + self.assertCountEqual(face_nodes.keys(), ["attributes", "dimensions"]) + self.assertEqual( + face_nodes["dimensions"], + { + "Four": {"size": 4}, + "nMesh2_face": {"size": 3}, + }, + ) - self.assertIn("attribute", noncompl_dict) - self.assertEqual(noncompl_dict["attribute"], expected_attribute) + face_nodes_attrs = face_nodes["attributes"] + self.assertIsInstance(face_nodes_attrs, dict) + self.assertCountEqual(face_nodes_attrs.keys(), ["standard_name"]) - # Final check to ensure there isn't anything else in there. - # If keys are missing will be reported to fail more spefically - # on per-key-value checks above - self.assertEqual(noncompl_dict, expected_noncompl_dict) + face_nodes_sn = face_nodes_attrs["standard_name"] + self.assertIsInstance(face_nodes_sn, list) + self.assertEqual(len(face_nodes_sn), 1) + self.assertEqual( + face_nodes_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_face_nodes", + }, + ) if __name__ == "__main__": From 88b46e9f8f926f4257a7dd442d2e49aa54b40a08 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 21:26:05 +0000 Subject: [PATCH 91/97] Compliance checking: cover second field in UGRID unit test --- cfdm/test/test_compliance_checking.py | 200 +++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 2 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 77d279cb8..28a2ca6ad 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -377,9 +377,9 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): dc_output_3 = f2.dataset_compliance() print("----------------- TEST 2 UGRID ---------------------") - pprint(dc_output_1) + ###pprint(dc_output_1) pprint(dc_output_2) - pprint(dc_output_3) + ###pprint(dc_output_3) # TODO see from below that not all bad names get set - but want # that, so should update create_test_files method to set on all @@ -578,6 +578,202 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): }, ) + # ======================================================= + # Field 2/3: top-level dict (1/4) + # ======================================================= + # Same structure to field 1 but has some differences, notably: + # * pa -> ta + # * nMesh2_node -> nMesh2_face + # * {'nMesh2_node': {'size': 7} -> {'nMesh2_face': {'size': 3}. + # So similar testing but some different values. + # TODO when we use pytest we can parameterise these assertions + # to prevent duplicating the lines. + self.assertIsInstance(dc_output_2, dict) + self.assertCountEqual(dc_output_2.keys(), ["ta"]) + + ta = dc_output_2["ta"] + self.assertIsInstance(ta, dict) + self.assertCountEqual(ta.keys(), ["attributes", "dimensions"]) + + # pa.dimensions + ta_dimensions = ta["dimensions"] + self.assertIsInstance(ta_dimensions, dict) + self.assertCountEqual(ta_dimensions.keys(), ["nMesh2_face", "time"]) + self.assertEqual(ta_dimensions["nMesh2_face"], {"size": 3}) + self.assertEqual(ta_dimensions["time"], {"size": 2}) + + # ta.attributes + ta_attributes = ta["attributes"] + self.assertIsInstance(ta_attributes, dict) + self.assertCountEqual(ta_attributes.keys(), ["mesh", "standard_name"]) + + # ta.attributes.standard_name (1/4) + ta_standard_name = ta_attributes["standard_name"] + self.assertIsInstance(ta_standard_name, list) + self.assertEqual(len(ta_standard_name), 1) + + self.assertEqual( + ta_standard_name[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2", + }, + ) + + # ta.attributes.mesh + mesh = ta_attributes["mesh"] + self.assertIsInstance(mesh, dict) + self.assertCountEqual(mesh.keys(), ["dimensions", "variables"]) + + # mesh.dimensions + mesh_dimensions = mesh["dimensions"] + self.assertIsInstance(mesh_dimensions, dict) + self.assertCountEqual(mesh_dimensions.keys(), ["nMesh2_face", "time"]) + self.assertEqual(mesh_dimensions["nMesh2_face"], {"size": 3}) + self.assertEqual(mesh_dimensions["time"], {"size": 2}) + + # mesh.variables + mesh_variables = mesh["variables"] + self.assertIsInstance(mesh_variables, dict) + self.assertCountEqual(mesh_variables.keys(), ["Mesh2"]) + + mesh2 = mesh_variables["Mesh2"] + self.assertIsInstance(mesh2, dict) + self.assertCountEqual(mesh2.keys(), ["attributes", "dimensions"]) + + # Mesh2.dimensions + self.assertEqual(mesh2["dimensions"], {}) + + # Mesh2.attributes + mesh2_attributes = mesh2["attributes"] + self.assertIsInstance(mesh2_attributes, dict) + self.assertCountEqual( + mesh2_attributes.keys(), + [ + "edge_node_connectivity", + "face_face_connectivity", + "face_node_connectivity", + ], + ) + + # ======================================================= + # Field 1/3: edge_node_connectivity (2/4) + # ======================================================= + edge_node = mesh2_attributes["edge_node_connectivity"] + self.assertIsInstance(edge_node, dict) + self.assertCountEqual(edge_node.keys(), ["dimensions", "variables"]) + self.assertEqual(edge_node["dimensions"], {}) + + edge_node_vars = edge_node["variables"] + self.assertIsInstance(edge_node_vars, dict) + self.assertCountEqual(edge_node_vars.keys(), ["Mesh2_edge_nodes"]) + + edge_nodes = edge_node_vars["Mesh2_edge_nodes"] + self.assertIsInstance(edge_nodes, dict) + self.assertCountEqual(edge_nodes.keys(), ["attributes", "dimensions"]) + self.assertEqual( + edge_nodes["dimensions"], + { + "Two": {"size": 2}, + "nMesh2_edge": {"size": 9}, + }, + ) + + edge_nodes_attrs = edge_nodes["attributes"] + self.assertIsInstance(edge_nodes_attrs, dict) + self.assertCountEqual(edge_nodes_attrs.keys(), ["standard_name"]) + + edge_sn = edge_nodes_attrs["standard_name"] + self.assertIsInstance(edge_sn, list) + self.assertEqual(len(edge_sn), 1) + self.assertEqual( + edge_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_edge_nodes", + }, + ) + + # ======================================================= + # Field 1/3: face_face_connectivity (3/4) + # ======================================================= + face_face = mesh2_attributes["face_face_connectivity"] + self.assertIsInstance(face_face, dict) + self.assertCountEqual(face_face.keys(), ["dimensions", "variables"]) + self.assertEqual(face_face["dimensions"], {}) + + face_face_vars = face_face["variables"] + self.assertIsInstance(face_face_vars, dict) + self.assertCountEqual(face_face_vars.keys(), ["Mesh2_face_links"]) + + face_links = face_face_vars["Mesh2_face_links"] + self.assertIsInstance(face_links, dict) + self.assertCountEqual(face_links.keys(), ["attributes", "dimensions"]) + self.assertEqual( + face_links["dimensions"], + { + "Four": {"size": 4}, + "nMesh2_face": {"size": 3}, + }, + ) + + face_links_attrs = face_links["attributes"] + self.assertIsInstance(face_links_attrs, dict) + self.assertCountEqual(face_links_attrs.keys(), ["standard_name"]) + + face_links_sn = face_links_attrs["standard_name"] + self.assertIsInstance(face_links_sn, list) + self.assertEqual(len(face_links_sn), 1) + self.assertEqual( + face_links_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_face_links", + }, + ) + + # ======================================================= + # Field 1/3: face_node_connectivity (4/4) + # ======================================================= + face_node = mesh2_attributes["face_node_connectivity"] + self.assertIsInstance(face_node, dict) + self.assertCountEqual(face_node.keys(), ["dimensions", "variables"]) + self.assertEqual(face_node["dimensions"], {}) + + face_node_vars = face_node["variables"] + self.assertIsInstance(face_node_vars, dict) + self.assertCountEqual(face_node_vars.keys(), ["Mesh2_face_nodes"]) + + face_nodes = face_node_vars["Mesh2_face_nodes"] + self.assertIsInstance(face_nodes, dict) + self.assertCountEqual(face_nodes.keys(), ["attributes", "dimensions"]) + self.assertEqual( + face_nodes["dimensions"], + { + "Four": {"size": 4}, + "nMesh2_face": {"size": 3}, + }, + ) + + face_nodes_attrs = face_nodes["attributes"] + self.assertIsInstance(face_nodes_attrs, dict) + self.assertCountEqual(face_nodes_attrs.keys(), ["standard_name"]) + + face_nodes_sn = face_nodes_attrs["standard_name"] + self.assertIsInstance(face_nodes_sn, list) + self.assertEqual(len(face_nodes_sn), 1) + self.assertEqual( + face_nodes_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_face_nodes", + }, + ) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) From d55e70e40c63663acdf651fb79eaed32a6224c94 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 21:43:31 +0000 Subject: [PATCH 92/97] Compliance checking: cover third & final field in UGRID unit test --- cfdm/test/test_compliance_checking.py | 210 ++++++++++++++++++++++++-- 1 file changed, 201 insertions(+), 9 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 28a2ca6ad..80306c0cd 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -374,12 +374,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): f1, f2, f3 = self.bad_ugrid_sn_fields # unpack to shorter names dc_output_1 = f1.dataset_compliance() dc_output_2 = f2.dataset_compliance() - dc_output_3 = f2.dataset_compliance() - - print("----------------- TEST 2 UGRID ---------------------") - ###pprint(dc_output_1) - pprint(dc_output_2) - ###pprint(dc_output_3) + dc_output_3 = f3.dataset_compliance() # TODO see from below that not all bad names get set - but want # that, so should update create_test_files method to set on all @@ -658,7 +653,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): ) # ======================================================= - # Field 1/3: edge_node_connectivity (2/4) + # Field 2/3: edge_node_connectivity (2/4) # ======================================================= edge_node = mesh2_attributes["edge_node_connectivity"] self.assertIsInstance(edge_node, dict) @@ -697,7 +692,7 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): ) # ======================================================= - # Field 1/3: face_face_connectivity (3/4) + # Field 2/3: face_face_connectivity (3/4) # ======================================================= face_face = mesh2_attributes["face_face_connectivity"] self.assertIsInstance(face_face, dict) @@ -736,7 +731,204 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): ) # ======================================================= - # Field 1/3: face_node_connectivity (4/4) + # Field 2/3: face_node_connectivity (4/4) + # ======================================================= + face_node = mesh2_attributes["face_node_connectivity"] + self.assertIsInstance(face_node, dict) + self.assertCountEqual(face_node.keys(), ["dimensions", "variables"]) + self.assertEqual(face_node["dimensions"], {}) + + face_node_vars = face_node["variables"] + self.assertIsInstance(face_node_vars, dict) + self.assertCountEqual(face_node_vars.keys(), ["Mesh2_face_nodes"]) + + face_nodes = face_node_vars["Mesh2_face_nodes"] + self.assertIsInstance(face_nodes, dict) + self.assertCountEqual(face_nodes.keys(), ["attributes", "dimensions"]) + self.assertEqual( + face_nodes["dimensions"], + { + "Four": {"size": 4}, + "nMesh2_face": {"size": 3}, + }, + ) + + face_nodes_attrs = face_nodes["attributes"] + self.assertIsInstance(face_nodes_attrs, dict) + self.assertCountEqual(face_nodes_attrs.keys(), ["standard_name"]) + + face_nodes_sn = face_nodes_attrs["standard_name"] + self.assertIsInstance(face_nodes_sn, list) + self.assertEqual(len(face_nodes_sn), 1) + self.assertEqual( + face_nodes_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_face_nodes", + }, + ) + + # ======================================================= + # Field 3/3: top-level dict (1/4) + # ======================================================= + # Same structure to field 1 (and therefore 2) but has some + # differences, notably: + # * pa/ta -> v + # * nMesh2_node/nMesh2_face -> nMesh2_edge + # * {'nMesh2_node': {'size': 7} (etc.) -> {'nMesh2_edge': {'size': 9}. + # So similar testing but some different values. + # TODO when we use pytest we can parameterise these assertions + # to prevent duplicating the lines. + self.assertIsInstance(dc_output_3, dict) + self.assertCountEqual(dc_output_3.keys(), ["v"]) + + v = dc_output_3["v"] + self.assertIsInstance(v, dict) + self.assertCountEqual(v.keys(), ["attributes", "dimensions"]) + + # pa.dimensions + v_dimensions = v["dimensions"] + self.assertIsInstance(v_dimensions, dict) + self.assertCountEqual(v_dimensions.keys(), ["nMesh2_edge", "time"]) + self.assertEqual(v_dimensions["nMesh2_edge"], {"size": 9}) + self.assertEqual(v_dimensions["time"], {"size": 2}) + + # v.attributes + v_attributes = v["attributes"] + self.assertIsInstance(v_attributes, dict) + self.assertCountEqual(v_attributes.keys(), ["mesh", "standard_name"]) + + # v.attributes.standard_name (1/4) + v_standard_name = v_attributes["standard_name"] + self.assertIsInstance(v_standard_name, list) + self.assertEqual(len(v_standard_name), 1) + + self.assertEqual( + v_standard_name[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2", + }, + ) + + # v.attributes.mesh + mesh = v_attributes["mesh"] + self.assertIsInstance(mesh, dict) + self.assertCountEqual(mesh.keys(), ["dimensions", "variables"]) + + # mesh.dimensions + mesh_dimensions = mesh["dimensions"] + self.assertIsInstance(mesh_dimensions, dict) + self.assertCountEqual(mesh_dimensions.keys(), ["nMesh2_edge", "time"]) + self.assertEqual(mesh_dimensions["nMesh2_edge"], {"size": 9}) + self.assertEqual(mesh_dimensions["time"], {"size": 2}) + + # mesh.variables + mesh_variables = mesh["variables"] + self.assertIsInstance(mesh_variables, dict) + self.assertCountEqual(mesh_variables.keys(), ["Mesh2"]) + + mesh2 = mesh_variables["Mesh2"] + self.assertIsInstance(mesh2, dict) + self.assertCountEqual(mesh2.keys(), ["attributes", "dimensions"]) + + # Mesh2.dimensions + self.assertEqual(mesh2["dimensions"], {}) + + # Mesh2.attributes + mesh2_attributes = mesh2["attributes"] + self.assertIsInstance(mesh2_attributes, dict) + self.assertCountEqual( + mesh2_attributes.keys(), + [ + "edge_node_connectivity", + "face_face_connectivity", + "face_node_connectivity", + ], + ) + + # ======================================================= + # Field 3/3: edge_node_connectivity (2/4) + # ======================================================= + edge_node = mesh2_attributes["edge_node_connectivity"] + self.assertIsInstance(edge_node, dict) + self.assertCountEqual(edge_node.keys(), ["dimensions", "variables"]) + self.assertEqual(edge_node["dimensions"], {}) + + edge_node_vars = edge_node["variables"] + self.assertIsInstance(edge_node_vars, dict) + self.assertCountEqual(edge_node_vars.keys(), ["Mesh2_edge_nodes"]) + + edge_nodes = edge_node_vars["Mesh2_edge_nodes"] + self.assertIsInstance(edge_nodes, dict) + self.assertCountEqual(edge_nodes.keys(), ["attributes", "dimensions"]) + self.assertEqual( + edge_nodes["dimensions"], + { + "Two": {"size": 2}, + "nMesh2_edge": {"size": 9}, + }, + ) + + edge_nodes_attrs = edge_nodes["attributes"] + self.assertIsInstance(edge_nodes_attrs, dict) + self.assertCountEqual(edge_nodes_attrs.keys(), ["standard_name"]) + + edge_sn = edge_nodes_attrs["standard_name"] + self.assertIsInstance(edge_sn, list) + self.assertEqual(len(edge_sn), 1) + self.assertEqual( + edge_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_edge_nodes", + }, + ) + + # ======================================================= + # Field 3/3: face_face_connectivity (3/4) + # ======================================================= + face_face = mesh2_attributes["face_face_connectivity"] + self.assertIsInstance(face_face, dict) + self.assertCountEqual(face_face.keys(), ["dimensions", "variables"]) + self.assertEqual(face_face["dimensions"], {}) + + face_face_vars = face_face["variables"] + self.assertIsInstance(face_face_vars, dict) + self.assertCountEqual(face_face_vars.keys(), ["Mesh2_face_links"]) + + face_links = face_face_vars["Mesh2_face_links"] + self.assertIsInstance(face_links, dict) + self.assertCountEqual(face_links.keys(), ["attributes", "dimensions"]) + self.assertEqual( + face_links["dimensions"], + { + "Four": {"size": 4}, + "nMesh2_face": {"size": 3}, + }, + ) + + face_links_attrs = face_links["attributes"] + self.assertIsInstance(face_links_attrs, dict) + self.assertCountEqual(face_links_attrs.keys(), ["standard_name"]) + + face_links_sn = face_links_attrs["standard_name"] + self.assertIsInstance(face_links_sn, list) + self.assertEqual(len(face_links_sn), 1) + self.assertEqual( + face_links_sn[0], + { + "code": expected_code, + "reason": expected_reason, + "value": "badname_Mesh2_face_links", + }, + ) + + # ======================================================= + # Field 3/3: face_node_connectivity (4/4) # ======================================================= face_node = mesh2_attributes["face_node_connectivity"] self.assertIsInstance(face_node, dict) From 8e4ae98f2dfd5a0b3ede24385e6198687486daa1 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 21:51:41 +0000 Subject: [PATCH 93/97] Remove now-redundant tests in test_compliance_checking --- cfdm/test/test_compliance_checking.py | 86 ++------------------------- 1 file changed, 4 insertions(+), 82 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 80306c0cd..8e6ebf774 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -246,28 +246,6 @@ def test_get_all_current_standard_names(self): # This time the alias should be included self.assertIn("moles_of_cfc113_in_atmosphere", aliases_inc_output) - def test_field_dataset_compliance(self): - """Test the `Field.dataset_compliance` method. - - Note: keeping this test here rather than in the test_Field module - because it requires the creation of 'bad' fields e.g. with invalid - standard names, and we create those as temporary files here already. - """ - # TODO - - def test_domain_dataset_compliance(self): - """Test the `Domain.dataset_compliance` method. - - Note: keeping this test here rather than in the test_Domain module - because it requires the creation of 'bad' fields e.g. with invalid - standard names, and we create those as temporary files here already. - """ - # TODO - - def test_check_standard_names(self): - """Test the `NetCDFRead._check_standard_names` method.""" - # TODO - def test_standard_names_validation_compliant_field(self): """Test compliance checking on a compliant non-UGRID field.""" f = self.good_snames_general_field @@ -277,11 +255,11 @@ def test_standard_names_validation_compliant_field(self): def test_standard_names_validation_noncompliant_field(self): """Test compliance checking on a non-compliant non-UGRID field.""" expected_reason = ( - "standard_name attribute " - "has a value that is not a valid name contained " - "in the current standard name table" + "standard_name attribute has a value that is not a " + "valid name contained in the current standard name table" ) expected_code = 400022 + # Excludes attribute which we expect in there but depends on varname # so add that expected key in during the iteration over varnames expected_noncompl_dict = { @@ -295,63 +273,7 @@ def test_standard_names_validation_noncompliant_field(self): print("----------------- TEST 1 NON UGRID ---------------------") pprint(dc_output) - # SLB DEV - # from pprint import pprint - # pprint(dc_output) - - # 'ta' is the field variable we test on - self.assertIn("non-compliance", dc_output["ta"]) - noncompliance = dc_output["ta"]["non-compliance"] - - expected_keys = [ - # itself? "ta", - # fails "atmosphere_hybrid_height_coordinate", - "atmosphere_hybrid_height_coordinate_bounds", - "latitude_1", - "longitude_1", - "time", - # SOLVED, DIM COORDS fails "x", - # POSSIBLY SOLVED, DIM COORDS fails "x_bnds" - # SOLVED, DIM COORDS fails "y", - # POSSIBLY SOLVED, DIM COORDS fails "y_bnds", - # fails "b", - "b_bounds", - # fails "surface_altitude", - # fails "rotated_latitude_longitude", - "auxiliary", - "cell_measure", # ATTRIBUTES FIX SHOULDN'T APPEAR - "air_temperature_standard_error", - ] - for varname in expected_keys: - noncompl_dict = noncompliance.get(varname) - self.assertIsNotNone( - noncompl_dict, - msg=f"Empty non-compliance for variable '{varname}'" - ) - self.assertIsInstance(noncompl_dict, list) - self.assertEqual(len(noncompl_dict), 1) - - # Safe to unpack after test above - noncompl_dict = noncompl_dict[0] - - self.assertIn("code", noncompl_dict) - self.assertEqual(noncompl_dict["code"], expected_code) - self.assertIn("reason", noncompl_dict) - self.assertEqual(noncompl_dict["reason"], expected_reason) - - # Form expected attribute which needs the varname and bad name - expected_attribute = { - f"{varname}:standard_name": f"badname_{varname}" - } - expected_noncompl_dict["attribute"] = expected_attribute - - self.assertIn("attribute", noncompl_dict) - self.assertEqual(noncompl_dict["attribute"], expected_attribute) - - # Final check to ensure there isn't anything else in there. - # If keys are missing will be reported to fail more spefically - # on per-key-value checks above - self.assertEqual(noncompl_dict, expected_noncompl_dict) + # TODO def test_standard_names_validation_compliant_ugrid_field(self): """Test compliance checking on a compliant UGRID field.""" From 37eb6655004fa69ef8aa7cfa41f5dc4208f6a961 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 22:14:23 +0000 Subject: [PATCH 94/97] Compliance checking: update tests by defining new class constants --- cfdm/test/test_compliance_checking.py | 74 +++++++++++---------------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index 8e6ebf774..dd4652662 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -87,6 +87,12 @@ class ComplianceCheckingTest(unittest.TestCase): ) bad_ugrid_sn_fields = cfdm.read(bad_names_ugrid_file_path) + bad_sn_expected_reason = ( + "standard_name attribute has a value that is not a " + "valid name contained in the current standard name table" + ) + bad_sn_expected_code = 400022 + def setUp(self): """Preparations called immediately before each test method.""" # Disable log messages to silence expected warnings @@ -254,20 +260,8 @@ def test_standard_names_validation_compliant_field(self): def test_standard_names_validation_noncompliant_field(self): """Test compliance checking on a non-compliant non-UGRID field.""" - expected_reason = ( - "standard_name attribute has a value that is not a " - "valid name contained in the current standard name table" - ) - expected_code = 400022 - - # Excludes attribute which we expect in there but depends on varname - # so add that expected key in during the iteration over varnames - expected_noncompl_dict = { - "code": expected_code, - "reason": expected_reason, - } - f = self.bad_snames_general_field + cfdm.write(f, "kitchen-sink-field.-bad-names.nc") dc_output = f.dataset_compliance() print("----------------- TEST 1 NON UGRID ---------------------") @@ -283,12 +277,6 @@ def test_standard_names_validation_compliant_ugrid_field(self): def test_standard_names_validation_noncompliant_ugrid_fields(self): """Test compliance checking on non-compliant UGRID fields.""" - expected_reason = ( - "standard_name attribute has a value that is not a " - "valid name contained in the current standard name table" - ) - expected_code = 400022 - # SLB DEV # TODO add error to run to say need to run 'create_test_files' @@ -336,8 +324,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( pa_standard_name[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2", }, ) @@ -411,8 +399,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( edge_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_edge_nodes", }, ) @@ -450,8 +438,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( face_links_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_face_links", }, ) @@ -489,8 +477,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( face_nodes_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_face_nodes", }, ) @@ -532,8 +520,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( ta_standard_name[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2", }, ) @@ -607,8 +595,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( edge_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_edge_nodes", }, ) @@ -646,8 +634,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( face_links_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_face_links", }, ) @@ -685,8 +673,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( face_nodes_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_face_nodes", }, ) @@ -729,8 +717,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( v_standard_name[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2", }, ) @@ -804,8 +792,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( edge_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_edge_nodes", }, ) @@ -843,8 +831,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( face_links_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_face_links", }, ) @@ -882,8 +870,8 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): self.assertEqual( face_nodes_sn[0], { - "code": expected_code, - "reason": expected_reason, + "code": self.bad_sn_expected_code, + "reason": self.bad_sn_expected_reason, "value": "badname_Mesh2_face_nodes", }, ) From 73e8d2c80eb05b354a9345572f204c82d0e88a9f Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Fri, 19 Dec 2025 23:54:05 +0000 Subject: [PATCH 95/97] Include missing component inclusion & tidy placeholder/dev comments --- cfdm/read_write/netcdf/netcdfread.py | 39 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index a9f56eb98..cf51fb74c 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5187,13 +5187,12 @@ def _create_field_or_domain( # ------------------------------------------------------------- # Add the structural read report to the field/domain dataset_compliance = g["dataset_compliance"][field_ncvar] - components = dataset_compliance - if components: - dataset_compliance = {field_ncvar: dataset_compliance} + if dataset_compliance: + field_compliance = {field_ncvar: dataset_compliance} else: - dataset_compliance = {} + field_compliance = {} - self.implementation.set_dataset_compliance(f, dataset_compliance) + self.implementation.set_dataset_compliance(f, field_compliance) # Return the finished field/domain return f @@ -5662,14 +5661,22 @@ def _add_message( }) # Process issues emerging on or via attributes - g["dataset_compliance"].setdefault(top_ancestor_ncvar, {}) - g["dataset_compliance"][top_ancestor_ncvar].update( - var_noncompliance_info) + #g["dataset_compliance"].setdefault(ncvar, {}) + #g["dataset_compliance"][ncvar].update( + # var_noncompliance_info) + #self._update_noncompliance_dict( + # g["component_report"], ncvar, top_ancestor_ncvar, attribute_name, + # var_noncompliance_info, + #) + self._update_noncompliance_dict( + g["dataset_compliance"], ncvar, top_ancestor_ncvar, attribute_name, + var_noncompliance_info, + ) if direct_parent_ncvar: # Dicts are optimised for key-value lookup, but this requires # value-key lookup - find a better way to get relevant attr using - # functionlity in this module + # functionality in this module varattrs = g["variable_attributes"][direct_parent_ncvar] reverse_varattrs = {v: k for k, v in varattrs.items()} store_attr = reverse_varattrs[ncvar] @@ -8429,8 +8436,10 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): if component_report is not None: for var, report in component_report.items(): - g["dataset_compliance"][parent_ncvar].setdefault( - var, []).extend(report) + self._update_noncompliance_dict( + g["dataset_compliance"], ncvar, parent_ncvar, "PLACEHOLDER", + component_report + ) return self.implementation.copy_construct(g[construct_type][ncvar]) @@ -10213,7 +10222,6 @@ def _ugrid_parse_mesh_topology(self, mesh_ncvar, attributes): g["mesh"][mesh_ncvar] = mesh - # Y def _ugrid_parse_location_index_set(self, parent_attributes): """Parse a UGRID location index set variable. @@ -10277,7 +10285,6 @@ def _ugrid_parse_location_index_set(self, parent_attributes): mesh_id=uuid4().hex, ) - # Y def _ugrid_create_auxiliary_coordinates( self, parent_ncvar, @@ -10506,7 +10513,6 @@ def _ugrid_create_bounds_from_nodes( return aux - # Y def _ugrid_create_domain_topology(self, parent_ncvar, f, mesh, location): """Create a domain topology construct. @@ -10776,7 +10782,6 @@ def _ugrid_cell_dimension(self, location, connectivity_ncvar, mesh): return cell_dim - # Y def _ugrid_check_mesh_topology(self, mesh_ncvar): """Check a UGRID mesh topology variable. @@ -11039,7 +11044,6 @@ def _ugrid_check_mesh_topology(self, mesh_ncvar): return ok - # Y def _ugrid_check_location_index_set( self, location_index_set_ncvar, @@ -11144,7 +11148,6 @@ def _ugrid_check_location_index_set( return ok - # Y def _ugrid_check_field_location_index_set( self, parent_ncvar, @@ -11287,7 +11290,6 @@ def _ugrid_check_field_location_index_set( ) return ok - # Y def _ugrid_check_field_mesh( self, parent_ncvar, @@ -11379,7 +11381,6 @@ def _ugrid_check_field_mesh( ) return ok - # Y def _ugrid_check_connectivity_variable( self, parent_ncvar, mesh_ncvar, connectivity_ncvar, connectivity_attr ): From 1467e6c26a7c81ebc21c1b4d9b951dc94b9d1864 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 5 Jan 2026 23:46:08 +0000 Subject: [PATCH 96/97] Include missing component report to fix non-UGRID output + tidy --- cfdm/read_write/netcdf/netcdfread.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index cf51fb74c..05bf0d0ac 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -5661,17 +5661,11 @@ def _add_message( }) # Process issues emerging on or via attributes - #g["dataset_compliance"].setdefault(ncvar, {}) - #g["dataset_compliance"][ncvar].update( - # var_noncompliance_info) - #self._update_noncompliance_dict( - # g["component_report"], ncvar, top_ancestor_ncvar, attribute_name, - # var_noncompliance_info, - #) self._update_noncompliance_dict( g["dataset_compliance"], ncvar, top_ancestor_ncvar, attribute_name, var_noncompliance_info, ) + self._include_component_report(ncvar, top_ancestor_ncvar, attribute_name) if direct_parent_ncvar: # Dicts are optimised for key-value lookup, but this requires From 100846da59e175a58d18ab78e11f16c727f68cf8 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Mon, 5 Jan 2026 23:48:43 +0000 Subject: [PATCH 97/97] Add TODO note to netcdfread module for investigation --- cfdm/read_write/netcdf/netcdfread.py | 2 ++ cfdm/test/test_compliance_checking.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/cfdm/read_write/netcdf/netcdfread.py b/cfdm/read_write/netcdf/netcdfread.py index 05bf0d0ac..f9013d2cf 100644 --- a/cfdm/read_write/netcdf/netcdfread.py +++ b/cfdm/read_write/netcdf/netcdfread.py @@ -8430,6 +8430,8 @@ def _copy_construct(self, construct_type, parent_ncvar, ncvar): if component_report is not None: for var, report in component_report.items(): + # SLB TODO what should the placeholder be replaced by - should + # we include attribute as input too? self._update_noncompliance_dict( g["dataset_compliance"], ncvar, parent_ncvar, "PLACEHOLDER", component_report diff --git a/cfdm/test/test_compliance_checking.py b/cfdm/test/test_compliance_checking.py index dd4652662..adf2a66e8 100644 --- a/cfdm/test/test_compliance_checking.py +++ b/cfdm/test/test_compliance_checking.py @@ -286,6 +286,9 @@ def test_standard_names_validation_noncompliant_ugrid_fields(self): dc_output_2 = f2.dataset_compliance() dc_output_3 = f3.dataset_compliance() + print("----------------- TEST 2 UGRID ---------------------") + pprint(dc_output_1) + # TODO see from below that not all bad names get set - but want # that, so should update create_test_files method to set on all # for bad case.