Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/lib/core/dmod/core/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.7.1'
__version__ = '0.8.0'
48 changes: 47 additions & 1 deletion python/lib/core/dmod/core/meta_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,52 @@ class DataFormat(PydanticEnum):
# TODO: consider whether something to indicate the time step size is necessary
# TODO: need format specifically for Nextgen model output (i.e., for evaluations)

@classmethod
def can_format_fulfill(cls, needed: 'DataFormat', alternate: 'DataFormat') -> bool:
"""
Test whether data in an alternate format is capable of satisfying requirements of some other format.

This function indicates whether data in one format (the alternate format) is compatible with requirements
specified using a different format (the needed format). It is an indication of whether data is **potentially**
capable of satisfying a requirement - even if the data formats of the two are not the same - due to the two
formats being sufficiently similar.

For example, the NextGen framework can support forcings in either CSV or NetCDF formats, represented as
``AORC_CSV`` and ``NETCDF_FORCING_CANONICAL`` respectively. A job to execute NextGen would include a forcing
::class:`DataRequirement` associated (albeit indirectly) with a particular format, with that being one of the
aforementioned values. However, even if the ``AORC_CSV`` data format was in the requirement, data in the
``NETCDF_FORCING_CANONICAL`` format would be perfectly satisfactory (assuming it otherwise provided what the
job needed).

Note that the following **is not guaranteed** for all values of ``f_1`` and ``f_2`` (though it will often be the
case):

``can_format_fulfill(needed=f_1, alternate=f_2) == can_format_fulfill(needed=f_2, alternate=f_1)``

It is guaranteed that ``can_format_fulfill(needed=f_1, alternate=f_1)`` is ``True``.

Parameters
----------
needed : DataFormat
The format defined by some requirement.
alternate : DataFormat
An alternate format for data.

Returns
-------
bool
Whether the alternate format is compatible with the needed format.
"""
# Always return True for when the params are the same format
if needed == alternate:
return True
# For these forcing formats, they will all be compatible with each other
compatible_forcing_formats = {cls.AORC_CSV, cls.NETCDF_FORCING_CANONICAL, cls.NETCDF_AORC_DEFAULT}
if needed in compatible_forcing_formats and alternate in compatible_forcing_formats:
return True
# Anything else, they are compatible
return False

@classmethod
def get_for_name(cls, name_str: str) -> Optional['DataFormat']:
cleaned_up_str = name_str.strip().upper()
Expand Down Expand Up @@ -642,7 +688,7 @@ def contains(self, other: Union[ContinuousRestriction, DiscreteRestriction, 'Dat
return self._extends_continuous_restriction(other)
elif isinstance(other, DiscreteRestriction):
return self._extends_discrete_restriction(other)
elif self.data_format != other.data_format:
elif not DataFormat.can_format_fulfill(needed=other.data_format, alternate=self.data_format):
return False
else:
for index in other.continuous_restrictions:
Expand Down
30 changes: 24 additions & 6 deletions python/services/dataservice/dmod/dataservice/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,28 +989,46 @@ def find_dataset_for_requirement(self, requirement: DataRequirement) -> Optional
# Keep track of a few things for logging purposes
datasets_count_match_category = 0
datasets_count_match_format = 0
# Keep those of the right category but wrong format, in case one is needed and satisfactory
potentially_compatible_alternates: List[Dataset] = []

for name, dataset in self.get_known_datasets().items():
# Skip anything with the wrong category
if dataset.category != requirement.category:
continue
else:
datasets_count_match_category += 1

# ... or a different format
# Keep track of how many of the right category there were for error purposes
datasets_count_match_category += 1

# Skip (for now at least) anything with a different format (though set aside if potentially compatible)
if dataset.data_format != requirement.domain.data_format:
# Check if this format could fulfill
if DataFormat.can_format_fulfill(needed=requirement.domain.data_format, alternate=dataset.data_format):
# We will return to examine these if no dataset qualifies that has the exact format in requirement
potentially_compatible_alternates.append(dataset)
continue
else:
datasets_count_match_format += 1

# When a dataset matches, keep track for error counts, and then test to see if it qualifies
datasets_count_match_format += 1
# TODO: need additional test of some kind for cases when the requirement specifies "any" (e.g., "any"
# catchment (from hydrofabric) in realization config, for finding a forcing dataset)
if dataset.data_domain.contains(requirement.domain):
return dataset

# At this point, no datasets qualify against the exact domain (including format) of the requirement
# However, before failing, check if any have different, but compatible format, and otherwise qualify
for dataset in potentially_compatible_alternates:
if dataset.data_domain.contains(requirement.domain):
return dataset

# Before failing, treat the count of alternates as being of the same format, for error messaging purposes
datasets_count_match_format += len(potentially_compatible_alternates)

if datasets_count_match_category == 0:
msg = "Could not fill requirement for '{}': no datasets for this category"
logging.error(msg.format(requirement.category.name))
elif datasets_count_match_format == 0:
msg = "Could not fill requirement with '{}' format domain: no datasets found this format"
msg = "Could not fill requirement with '{}' format domain: no datasets found this (or compatible) format"
logging.error(msg.format(requirement.domain.data_format.name))
else:
msg = "Could not find dataset meeting all restrictions of requirement: {}"
Expand Down
4 changes: 2 additions & 2 deletions python/services/dataservice/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
author_email='',
url='',
license='',
install_requires=["dmod-core>=0.5.0", "dmod-communication>=0.13.0", "dmod-scheduler>=0.10.0",
"dmod-modeldata>=0.9.0", 'redis', "pydantic", "fastapi", "uvicorn[standard]",
install_requires=['dmod-core>=0.8.0', 'dmod-communication>=0.13.0', 'dmod-scheduler>=0.10.0',
'dmod-modeldata>=0.9.0', 'redis', "pydantic", "fastapi", "uvicorn[standard]",
"ngen-config>=0.1.1"],
packages=find_namespace_packages(exclude=['dmod.test', 'deprecated', 'conf', 'schemas', 'ssl', 'src'])
)