NOAA-OWP · aaraney · Apr 4, 2024 · Mar 28, 2024 · Mar 28, 2024 · Apr 2, 2024
diff --git a/data/example_hydrofabric_2/hydrofabric.gpkg b/data/example_hydrofabric_2/hydrofabric.gpkg
diff --git a/python/lib/core/dmod/core/_version.py b/python/lib/core/dmod/core/_version.py
@@ -1 +1 @@
-__version__ = '0.13.1'
+__version__ = '0.14.0'
diff --git a/python/lib/core/dmod/core/common/reader.py b/python/lib/core/dmod/core/common/reader.py
@@ -1,7 +1,21 @@
 from typing_extensions import Protocol, runtime_checkable
+from os import SEEK_SET
 
 
 @runtime_checkable
 class Reader(Protocol):
     def read(self, size: int = -1, /) -> bytes:
         """EOF if empty b''."""
+
+
+@runtime_checkable
+class Seeker(Protocol):
+    def seek(self, offset: int, whence: int = SEEK_SET) -> int:
+        """ Change the position to the given offset, returning the absolute position. """
+
+
+@runtime_checkable
+class ReadSeeker(Reader, Seeker, Protocol):
+    """
+    A :class:`Reader` capable of changing the position from which it is reading.
+    """
diff --git a/python/lib/core/dmod/core/dataset.py b/python/lib/core/dmod/core/dataset.py
@@ -198,6 +198,19 @@ def cond_eq(a, b):
             and cond_eq(self.uuid, other.uuid)
         )
 
+    def __hash__(self):
+        members = [
+            self.__class__.__name__,
+            self.name,
+            self.category.name,
+            str(hash(self.data_domain)),
+            self.access_location,
+            str(self.is_read_only),
+            str(hash(self.created_on)),
+        ]
+        description = ",".join(members)
+        return hash(description)
+
     @property
     def manager(self) -> Optional[DatasetManager]:
         """
@@ -242,19 +255,6 @@ def set_manager(self, value: Union[DatasetManager, None]):
         self._manager = value
         self.manager_uuid = value.uuid
 
-    def __hash__(self):
-        members = [
-            self.__class__.__name__,
-            self.name,
-            self.category.name,
-            str(hash(self.data_domain)),
-            self.access_location,
-            str(self.is_read_only),
-            str(hash(self.created_on)),
-        ]
-        description = ",".join(members)
-        return hash(description)
-
     def _set_expires(self, new_expires: datetime):
         """
         "Private" function to set the ::attribute:`expires` property.
@@ -529,8 +529,6 @@ def __init__(self, uuid: Optional[UUID] = None, datasets: Optional[Dict[str, Dat
         self._errors = []
         """ A property attribute to hold errors encountered during operations. """
 
-    # TODO: implement functions and routines for scrubbing temporary datasets as needed
-
     @abstractmethod
     def add_data(self, dataset_name: str, dest: str, data: Optional[Union[bytes, Reader]] = None, source: Optional[str] = None,
                  is_temp: bool = False, **kwargs) -> bool:
@@ -848,7 +846,7 @@ def is_managed_dataset(self, dataset: Dataset) -> bool:
         if dataset.manager is None and self.uuid == dataset.manager_uuid:
             dataset.set_manager(self)
 
-        return
+        return dataset.manager_uuid == self.uuid
 
     def link_user(self, user: DatasetUser, dataset: Dataset) -> bool:
         """

diff --git a/python/lib/core/dmod/core/meta_data.py b/python/lib/core/dmod/core/meta_data.py
@@ -42,6 +42,10 @@ class StandardDatasetIndex(str, PydanticEnum):
     """ Index for the name of a data file within a dataset. """
     COMPOSITE_SOURCE_ID = (9, str, "COMPOSITE_SOURCE_ID")
     """ Index for DATA_ID values of source dataset(s) when dataset is composite format and derives from others. """
+    HYDROFABRIC_VERSION = (10, str, "HYDROFABRIC_VERSION")
+    """ Version string for version of the hydrofabric to use (e.g., 2.0.1). """
+    HYDROFABRIC_REGION = (11, str, "HYDROFABRIC_REGION")
+    """ Region string (e.g., conus, vpu01) for the applicable region of the hydrofabric. """
 
     def __new__(cls, index: int, ty: type, name: str):
         o = str.__new__(cls, name)
@@ -90,8 +94,8 @@ class DataFormat(PydanticEnum):
     index that can be used to distinguish the collections, so that the right data can be identified.
     """
     AORC_CSV = (0,
-                {StandardDatasetIndex.CATCHMENT_ID: None, StandardDatasetIndex.TIME: ""},
-                {"": datetime, "APCP_surface": float, "DLWRF_surface": float, "DSWRF_surface": float,
+                {StandardDatasetIndex.CATCHMENT_ID: None, StandardDatasetIndex.TIME: "Time"},
+                {"Time": datetime, "APCP_surface": float, "DLWRF_surface": float, "DSWRF_surface": float,
                  "PRES_surface": float, "SPFH_2maboveground": float, "TMP_2maboveground": float,
                  "UGRD_10maboveground": float, "VGRD_10maboveground": float, "precip_rate": float},
                 True
@@ -184,6 +188,38 @@ class DataFormat(PydanticEnum):
     T_ROUTE_CONFIG = (13, {StandardDatasetIndex.DATA_ID: None, StandardDatasetIndex.HYDROFABRIC_ID: None}, None, False)
     """ Format for t-route application configuration. """
 
+    NGEN_GEOPACKAGE_HYDROFABRIC_V2 = (14,
+                                      {StandardDatasetIndex.CATCHMENT_ID: "divide_id",
+                                       StandardDatasetIndex.HYDROFABRIC_ID: None,
+                                       StandardDatasetIndex.HYDROFABRIC_REGION: None,
+                                       StandardDatasetIndex.HYDROFABRIC_VERSION: None},
+                                      {"fid": int, "divide_id": str, "geom": Any, "toid": str, "type": str,
+                                       "ds_id": float, "areasqkm": float, "id": str, "lengthkm": float,
+                                       "tot_drainage_areasqkm": float, "has_flowline": bool},
+                                      )
+    """ GeoPackage hydrofabric format v2 used by NextGen (id is catchment id). """
+
+    EMPTY = (15, {}, None, False)
+    """
+    "Format" for an empty dataset that, having no data (yet), doesn't have (or need) an applicable defined structure.
+
+    The intent of this is for simplicity when creating dataset.  This format represents a type of dataset that doesn't,
+    and importantly, **cannot** yet truly have a more specific format that matches its contents.  A key implication is
+    an expectation is that the domain of the dataset (including the format) **must** be changed as soon as any data is
+    added to the dataset.
+    """
+
+    GENERIC = (16, {}, None, False)
+    """ 
+    Format without any indications or restrictions on the defined structure of contained data. 
+
+    This value is very much like ``EMPTY`` except that it is applicable to non-empty datasets.  It represents absolutely
+    nothing about the structure of any contents, and thus that absolutely anything can be contained or added.  In
+    practice, the main intended difference from ``EMPTY`` is that datasets in this format will not be required to update
+    their data domain at the time new data is added (while not applicable to ``EMPTY``, the same is true when any data
+    is removed).
+    """
+
     @classmethod
     def can_format_fulfill(cls, needed: 'DataFormat', alternate: 'DataFormat') -> bool:
         """
@@ -325,7 +361,9 @@ class ContinuousRestriction(Serializable):
 
     variable: StandardDatasetIndex
     begin: datetime
+    """ An inclusive beginning value. """
     end: datetime
+    """ An exclusive end value. """
     datetime_pattern: Optional[str]
     subclass: str = None
     """
@@ -439,9 +477,6 @@ def convert_truncated_serial_form(cls, truncated_json_obj: dict, datetime_format
 
         return json_copy
 
-    def __hash__(self) -> int:
-        return hash((self.variable.name, self.begin, self.end))
-
     def contains(self, other: 'ContinuousRestriction') -> bool:
         """
         Whether this object contains all the values of the given object and the two are of the same index.
@@ -492,6 +527,11 @@ def __init__(
         if allow_reorder:
             self.values.sort()
 
+    def __eq__(self, other):
+        if not isinstance(other, DiscreteRestriction):
+            return False
+        return self.variable == other.variable and sorted(self.values) == sorted(other.values)
+
     def __hash__(self) -> int:
         return hash((self.variable.name, *self.values))
 
@@ -551,7 +591,22 @@ def is_all_possible_values(self) -> bool:
 
 class DataDomain(Serializable):
     """
-    A domain for a dataset, with domain-defining values contained by one or more discrete and/or continuous components.
+    A domain for some collection of data, with defining values contained by discrete and/or continuous components.
+
+    A definition for the domain of some kind of collection of data.  The collection may be something more concrete, like
+    a ::class:`Dataset` instance, or more abstract, like forcing data sufficient to run a requested model execution.
+
+    The definition consists of details on the structure and content of the data within the collection.  Structure is
+    represented by a ::class:`DataFormat` attribute, and contents are represented by collections of
+    ::class:`ContinuousRestriction` and ::class:`DiscreteRestriction` objects.
+
+    While a domain may have any number of continuous or discrete restrictions individually, combined it must have at
+    least one, or validation will fail.
+
+    There is a notion of whether a domain "contains" certain described data.  This described data can be a simple
+    description of some data index and values it, fundamentally the definition of ::class:`ContinuousRestriction` and
+    ::class:`DiscreteRestriction` objects.  The described data can also be more complex, like another fully defined
+    domain.  A function is provided by the type for performing such tests.
     """
     data_format: DataFormat = Field(
     description="The format for the data in this domain, which contains details like the indices and other data fields."
@@ -622,12 +677,17 @@ def handle_type_map(t):
 
     @root_validator()
     def validate_sufficient_restrictions(cls, values):
+        data_format = values.get("data_format")
+        if data_format == DataFormat.EMPTY or data_format == DataFormat.GENERIC:
+            return values
         continuous_restrictions = values.get("continuous_restrictions", {})
         discrete_restrictions = values.get("discrete_restrictions", {})
-        if len(continuous_restrictions) + len(discrete_restrictions) == 0:
-            msg = "Cannot create {} without at least one finite continuous or discrete restriction"
-            raise RuntimeError(msg.format(cls.__name__))
-        return values
+        if len(continuous_restrictions) + len(discrete_restrictions) > 0:
+            return values
+        raise RuntimeError(f"Cannot create {cls.__name__} without at least one finite continuous or discrete "
+                           f"restriction, except when data format is {DataFormat.GENERIC.name} or "
+                           f"{DataFormat.EMPTY.name} (provided value was: "
+                           f"{'None' if data_format is None else data_format.name})")
 
     @classmethod
     def factory_init_from_restriction_collections(cls, data_format: DataFormat, **kwargs) -> 'DataDomain':
@@ -836,6 +896,7 @@ def dict(
         return serial
 
 
+
 class DataCategory(PydanticEnum):
     """
     The general category values for different data.

diff --git a/python/lib/core/dmod/core/serializable.py b/python/lib/core/dmod/core/serializable.py
@@ -366,6 +366,17 @@ class ResultIndicator(Serializable, ABC):
     reason: str = Field(description="A very short, high-level summary of the result.")
     message: str = Field("", description="An optional, more detailed explanation of the result, which by default is an empty string.")
 
+    def __bool__(self) -> bool:
+        """
+        Implementation of truth value testing for instances, which directly depends on the value of ``success``.
+
+        Returns
+        -------
+        bool
+            The current value of the instance's ::attribute:`success` attribute.
+        """
+        return self.success
+
 
 class BasicResultIndicator(ResultIndicator):
     """

diff --git a/python/lib/modeldata/dmod/modeldata/_version.py b/python/lib/modeldata/dmod/modeldata/_version.py
@@ -1 +1 @@
-__version__ = '0.9.5'
+__version__ = '0.10.0'
diff --git a/python/lib/modeldata/dmod/modeldata/hydrofabric/geopackage_hydrofabric.py b/python/lib/modeldata/dmod/modeldata/hydrofabric/geopackage_hydrofabric.py
@@ -1,4 +1,4 @@
-import fiona
+import pyogrio
 import geopandas as gpd
 import hashlib
 from pandas.util import hash_pandas_object
@@ -316,22 +316,25 @@ class GeoPackageHydrofabric(Hydrofabric):
     #_FLOWPATHS_TO_NEX_COL = 'toid'
 
     _DIVIDES_LAYER_NAME = 'divides'
-    _DIVIDES_CAT_ID_COL = 'id'
+    _DIVIDES_CAT_ID_COL = 'divide_id'
     _DIVIDES_TO_NEX_COL = 'toid'
 
     _NEXUS_LAYER_NAME = 'nexus'
     _NEXUS_NEX_ID_COL = 'id'
     _NEXUS_TO_CAT_COL = 'toid'
 
     @classmethod
-    def from_file(cls, geopackage_file: Union[str, Path], vpu: Optional[int] = None, is_conus: bool = False) -> 'GeoPackageHydrofabric':
+    def from_file(cls, geopackage_file: Union[str, Path, bytes], vpu: Optional[int] = None, is_conus: bool = False) -> 'GeoPackageHydrofabric':
         """
-        Initialize a new instance from a GeoPackage file.
+        Initialize a new instance from a GeoPackage file or contents of such a file (as ``bytes``).
+
+        Note that while a warning may appear because of implementation details in ``pyogrio``, this should work
+        perfectly well if passed raw bytes from a file.
 
         Parameters
         ----------
-        geopackage_file: Union[str, Path]
-            The source file for data from which to instantiate.
+        geopackage_file: Union[str, Path, bytes]
+            The source file for data, or raw data from such a file, from which to instantiate.
         vpu: Optional[int]
             The VPU of the hydrofabric to create, if it is known (defaults to ``None``).
         is_conus: bool
@@ -342,7 +345,9 @@ def from_file(cls, geopackage_file: Union[str, Path], vpu: Optional[int] = None,
         GeoPackageHydrofabric
             A new instance of this type.
         """
-        layer_names = fiona.listlayers(geopackage_file)
+        # pyogrio's function returns an ndarry of ndarrays, with inner layer info array containing layer name and type
+        # We only need a list of layer names, though
+        layer_names = [layer_info[0] for layer_info in pyogrio.list_layers(geopackage_file)]
         return cls(layer_names=layer_names,
                    layer_dataframes={ln: gpd.read_file(geopackage_file, layer=ln, engine="pyogrio") for ln in layer_names},
                    vpu=vpu,
@@ -442,8 +447,8 @@ def get_subset_hydrofabric(self, subset: SubsetDefinition) -> 'GeoPackageHydrofa
             # Value[1]: callable no arg function, returning collection of ids for records/rows to include in subset
         subset_query_setups: Dict[str, Tuple[str, Callable[[], Iterable[str]]]] = {
             'flowpaths': ('realized_catchment', lambda: subset.catchment_ids),
-            'divides': ('id', lambda: subset.catchment_ids),
-            'nexus': ('id', lambda: subset.nexus_ids),
+            'divides': (self._DIVIDES_CAT_ID_COL, lambda: subset.catchment_ids),
+            'nexus': (self._NEXUS_NEX_ID_COL, lambda: subset.nexus_ids),
             'flowpath_attributes': ('id', lambda: new_dfs['flowpaths']['id']),
             'flowpath_edge_list': ('id', lambda: new_dfs['flowpaths']['id']),
             'crosswalk': ('id', lambda: new_dfs['flowpaths']['id']),

diff --git a/python/lib/modeldata/setup.py b/python/lib/modeldata/setup.py
@@ -23,7 +23,6 @@
     install_requires=[
         "numpy>=1.20.1",
         "pandas",
-        "fiona",
         "geopandas",
         "dmod-communication>=0.4.2",
         "dmod-core>=0.13.1",

diff --git a/requirements.txt b/requirements.txt
@@ -14,7 +14,6 @@ Deprecated
 cryptography
 flask
 pandas
-fiona
 geopandas
 gitpython
 python-dotenv
@@ -42,7 +41,6 @@ attrs
 Pillow
 Jinja2
 click
-Fiona
 cligj
 munch
 six
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,7 +14,6 @@ Deprecated @@
     cryptography
     flask
     pandas
-    fiona
     geopandas
     gitpython
     python-dotenv
@@ Expand Down Expand Up / @@ -42,7 +41,6 @@ attrs @@
     Pillow
     Jinja2
     click
-    Fiona
     cligj
     munch
     six
@@ Expand Down @@