diff --git a/python/lib/core/dmod/core/_version.py b/python/lib/core/dmod/core/_version.py index 7320e64e1..ccf9e6286 100644 --- a/python/lib/core/dmod/core/_version.py +++ b/python/lib/core/dmod/core/_version.py @@ -1 +1 @@ -__version__ = '0.7.1' \ No newline at end of file +__version__ = '0.8.0' \ No newline at end of file diff --git a/python/lib/core/dmod/core/meta_data.py b/python/lib/core/dmod/core/meta_data.py index dbf6c494d..9290ac0bf 100644 --- a/python/lib/core/dmod/core/meta_data.py +++ b/python/lib/core/dmod/core/meta_data.py @@ -142,6 +142,52 @@ class DataFormat(PydanticEnum): # TODO: consider whether something to indicate the time step size is necessary # TODO: need format specifically for Nextgen model output (i.e., for evaluations) + @classmethod + def can_format_fulfill(cls, needed: 'DataFormat', alternate: 'DataFormat') -> bool: + """ + Test whether data in an alternate format is capable of satisfying requirements of some other format. + + This function indicates whether data in one format (the alternate format) is compatible with requirements + specified using a different format (the needed format). It is an indication of whether data is **potentially** + capable of satisfying a requirement - even if the data formats of the two are not the same - due to the two + formats being sufficiently similar. + + For example, the NextGen framework can support forcings in either CSV or NetCDF formats, represented as + ``AORC_CSV`` and ``NETCDF_FORCING_CANONICAL`` respectively. A job to execute NextGen would include a forcing + ::class:`DataRequirement` associated (albeit indirectly) with a particular format, with that being one of the + aforementioned values. However, even if the ``AORC_CSV`` data format was in the requirement, data in the + ``NETCDF_FORCING_CANONICAL`` format would be perfectly satisfactory (assuming it otherwise provided what the + job needed). + + Note that the following **is not guaranteed** for all values of ``f_1`` and ``f_2`` (though it will often be the + case): + + ``can_format_fulfill(needed=f_1, alternate=f_2) == can_format_fulfill(needed=f_2, alternate=f_1)`` + + It is guaranteed that ``can_format_fulfill(needed=f_1, alternate=f_1)`` is ``True``. + + Parameters + ---------- + needed : DataFormat + The format defined by some requirement. + alternate : DataFormat + An alternate format for data. + + Returns + ------- + bool + Whether the alternate format is compatible with the needed format. + """ + # Always return True for when the params are the same format + if needed == alternate: + return True + # For these forcing formats, they will all be compatible with each other + compatible_forcing_formats = {cls.AORC_CSV, cls.NETCDF_FORCING_CANONICAL, cls.NETCDF_AORC_DEFAULT} + if needed in compatible_forcing_formats and alternate in compatible_forcing_formats: + return True + # Anything else, they are compatible + return False + @classmethod def get_for_name(cls, name_str: str) -> Optional['DataFormat']: cleaned_up_str = name_str.strip().upper() @@ -642,7 +688,7 @@ def contains(self, other: Union[ContinuousRestriction, DiscreteRestriction, 'Dat return self._extends_continuous_restriction(other) elif isinstance(other, DiscreteRestriction): return self._extends_discrete_restriction(other) - elif self.data_format != other.data_format: + elif not DataFormat.can_format_fulfill(needed=other.data_format, alternate=self.data_format): return False else: for index in other.continuous_restrictions: diff --git a/python/services/dataservice/dmod/dataservice/service.py b/python/services/dataservice/dmod/dataservice/service.py index bd69aca02..84c4ceea6 100644 --- a/python/services/dataservice/dmod/dataservice/service.py +++ b/python/services/dataservice/dmod/dataservice/service.py @@ -989,28 +989,46 @@ def find_dataset_for_requirement(self, requirement: DataRequirement) -> Optional # Keep track of a few things for logging purposes datasets_count_match_category = 0 datasets_count_match_format = 0 + # Keep those of the right category but wrong format, in case one is needed and satisfactory + potentially_compatible_alternates: List[Dataset] = [] for name, dataset in self.get_known_datasets().items(): # Skip anything with the wrong category if dataset.category != requirement.category: continue - else: - datasets_count_match_category += 1 - # ... or a different format + # Keep track of how many of the right category there were for error purposes + datasets_count_match_category += 1 + + # Skip (for now at least) anything with a different format (though set aside if potentially compatible) if dataset.data_format != requirement.domain.data_format: + # Check if this format could fulfill + if DataFormat.can_format_fulfill(needed=requirement.domain.data_format, alternate=dataset.data_format): + # We will return to examine these if no dataset qualifies that has the exact format in requirement + potentially_compatible_alternates.append(dataset) continue - else: - datasets_count_match_format += 1 + # When a dataset matches, keep track for error counts, and then test to see if it qualifies + datasets_count_match_format += 1 + # TODO: need additional test of some kind for cases when the requirement specifies "any" (e.g., "any" + # catchment (from hydrofabric) in realization config, for finding a forcing dataset) if dataset.data_domain.contains(requirement.domain): return dataset + # At this point, no datasets qualify against the exact domain (including format) of the requirement + # However, before failing, check if any have different, but compatible format, and otherwise qualify + for dataset in potentially_compatible_alternates: + if dataset.data_domain.contains(requirement.domain): + return dataset + + # Before failing, treat the count of alternates as being of the same format, for error messaging purposes + datasets_count_match_format += len(potentially_compatible_alternates) + if datasets_count_match_category == 0: msg = "Could not fill requirement for '{}': no datasets for this category" logging.error(msg.format(requirement.category.name)) elif datasets_count_match_format == 0: - msg = "Could not fill requirement with '{}' format domain: no datasets found this format" + msg = "Could not fill requirement with '{}' format domain: no datasets found this (or compatible) format" logging.error(msg.format(requirement.domain.data_format.name)) else: msg = "Could not find dataset meeting all restrictions of requirement: {}" diff --git a/python/services/dataservice/setup.py b/python/services/dataservice/setup.py index d065bfedc..4d7a54bd3 100644 --- a/python/services/dataservice/setup.py +++ b/python/services/dataservice/setup.py @@ -17,8 +17,8 @@ author_email='', url='', license='', - install_requires=["dmod-core>=0.5.0", "dmod-communication>=0.13.0", "dmod-scheduler>=0.10.0", - "dmod-modeldata>=0.9.0", 'redis', "pydantic", "fastapi", "uvicorn[standard]", + install_requires=['dmod-core>=0.8.0', 'dmod-communication>=0.13.0', 'dmod-scheduler>=0.10.0', + 'dmod-modeldata>=0.9.0', 'redis', "pydantic", "fastapi", "uvicorn[standard]", "ngen-config>=0.1.1"], packages=find_namespace_packages(exclude=['dmod.test', 'deprecated', 'conf', 'schemas', 'ssl', 'src']) )