NOAA-OWP · robertbartel · Jun 12, 2023 · Jan 31, 2023 · Mar 14, 2023 · Mar 14, 2023
diff --git a/python/lib/core/dmod/core/_version.py b/python/lib/core/dmod/core/_version.py
@@ -1 +1 @@
-__version__ = '0.7.1'
+__version__ = '0.8.0'
diff --git a/python/lib/core/dmod/core/meta_data.py b/python/lib/core/dmod/core/meta_data.py
@@ -142,6 +142,52 @@ class DataFormat(PydanticEnum):
     # TODO: consider whether something to indicate the time step size is necessary
     # TODO: need format specifically for Nextgen model output (i.e., for evaluations)
 
+    @classmethod
+    def can_format_fulfill(cls, needed: 'DataFormat', alternate: 'DataFormat') -> bool:
+        """
+        Test whether data in an alternate format is capable of satisfying requirements of some other format.
+
+        This function indicates whether data in one format (the alternate format) is compatible with requirements
+        specified using a different format (the needed format).  It is an indication of whether data is **potentially**
+        capable of satisfying a requirement - even if the data formats of the two are not the same - due to the two
+        formats being sufficiently similar.
+
+        For example, the NextGen framework can support forcings in either CSV or NetCDF formats, represented as
+        ``AORC_CSV`` and ``NETCDF_FORCING_CANONICAL`` respectively.  A job to execute NextGen would include a forcing
+        ::class:`DataRequirement` associated (albeit indirectly) with a particular format, with that being one of the
+        aforementioned values.  However, even if the ``AORC_CSV`` data format was in the requirement, data in the
+        ``NETCDF_FORCING_CANONICAL`` format would be perfectly satisfactory (assuming it otherwise provided what the
+        job needed).
+
+        Note that the following **is not guaranteed** for all values of ``f_1`` and ``f_2`` (though it will often be the
+        case):
+
+            ``can_format_fulfill(needed=f_1, alternate=f_2) == can_format_fulfill(needed=f_2, alternate=f_1)``
+
+        It is guaranteed that ``can_format_fulfill(needed=f_1, alternate=f_1)`` is ``True``.
+
+        Parameters
+        ----------
+        needed : DataFormat
+            The format defined by some requirement.
+        alternate : DataFormat
+            An alternate format for data.
+
+        Returns
+        -------
+        bool
+            Whether the alternate format is compatible with the needed format.
+        """
+        # Always return True for when the params are the same format
+        if needed == alternate:
+            return True
+        # For these forcing formats, they will all be compatible with each other
+        compatible_forcing_formats = {cls.AORC_CSV, cls.NETCDF_FORCING_CANONICAL, cls.NETCDF_AORC_DEFAULT}
+        if needed in compatible_forcing_formats and alternate in compatible_forcing_formats:
+            return True
+        # Anything else, they are compatible
+        return False
+
     @classmethod
     def get_for_name(cls, name_str: str) -> Optional['DataFormat']:
         cleaned_up_str = name_str.strip().upper()
@@ -642,7 +688,7 @@ def contains(self, other: Union[ContinuousRestriction, DiscreteRestriction, 'Dat
             return self._extends_continuous_restriction(other)
         elif isinstance(other, DiscreteRestriction):
             return self._extends_discrete_restriction(other)
-        elif self.data_format != other.data_format:
+        elif not DataFormat.can_format_fulfill(needed=other.data_format, alternate=self.data_format):
             return False
         else:
             for index in other.continuous_restrictions:

diff --git a/python/services/dataservice/dmod/dataservice/service.py b/python/services/dataservice/dmod/dataservice/service.py
@@ -989,28 +989,46 @@ def find_dataset_for_requirement(self, requirement: DataRequirement) -> Optional
         # Keep track of a few things for logging purposes
         datasets_count_match_category = 0
         datasets_count_match_format = 0
+        # Keep those of the right category but wrong format, in case one is needed and satisfactory
+        potentially_compatible_alternates: List[Dataset] = []
 
         for name, dataset in self.get_known_datasets().items():
             # Skip anything with the wrong category
             if dataset.category != requirement.category:
                 continue
-            else:
-                datasets_count_match_category += 1
 
-            # ... or a different format
+            # Keep track of how many of the right category there were for error purposes
+            datasets_count_match_category += 1
+
+            # Skip (for now at least) anything with a different format (though set aside if potentially compatible)
             if dataset.data_format != requirement.domain.data_format:
+                # Check if this format could fulfill
+                if DataFormat.can_format_fulfill(needed=requirement.domain.data_format, alternate=dataset.data_format):
+                    # We will return to examine these if no dataset qualifies that has the exact format in requirement
+                    potentially_compatible_alternates.append(dataset)
                 continue
-            else:
-                datasets_count_match_format += 1
 
+            # When a dataset matches, keep track for error counts, and then test to see if it qualifies
+            datasets_count_match_format += 1
+            # TODO: need additional test of some kind for cases when the requirement specifies "any" (e.g., "any"
+            #  catchment (from hydrofabric) in realization config, for finding a forcing dataset)
             if dataset.data_domain.contains(requirement.domain):
                 return dataset
 
+        # At this point, no datasets qualify against the exact domain (including format) of the requirement
+        # However, before failing, check if any have different, but compatible format, and otherwise qualify
+        for dataset in potentially_compatible_alternates:
+            if dataset.data_domain.contains(requirement.domain):
+                return dataset
+
+        # Before failing, treat the count of alternates as being of the same format, for error messaging purposes
+        datasets_count_match_format += len(potentially_compatible_alternates)
+
         if datasets_count_match_category == 0:
             msg = "Could not fill requirement for '{}': no datasets for this category"
             logging.error(msg.format(requirement.category.name))
         elif datasets_count_match_format == 0:
-            msg = "Could not fill requirement with '{}' format domain: no datasets found this format"
+            msg = "Could not fill requirement with '{}' format domain: no datasets found this (or compatible) format"
             logging.error(msg.format(requirement.domain.data_format.name))
         else:
             msg = "Could not find dataset meeting all restrictions of requirement: {}"

diff --git a/python/services/dataservice/setup.py b/python/services/dataservice/setup.py
@@ -17,8 +17,8 @@
     author_email='',
     url='',
     license='',
-    install_requires=["dmod-core>=0.5.0", "dmod-communication>=0.13.0", "dmod-scheduler>=0.10.0",
-                      "dmod-modeldata>=0.9.0", 'redis', "pydantic", "fastapi", "uvicorn[standard]",
+    install_requires=['dmod-core>=0.8.0', 'dmod-communication>=0.13.0', 'dmod-scheduler>=0.10.0',
+                      'dmod-modeldata>=0.9.0', 'redis', "pydantic", "fastapi", "uvicorn[standard]",
                       "ngen-config>=0.1.1"],
     packages=find_namespace_packages(exclude=['dmod.test', 'deprecated', 'conf', 'schemas', 'ssl', 'src'])
 )