databrickslabs
diff --git a/‎dbldatagen/spec/__init__.py‎
Lines changed: 39 additions & 0 deletions b/‎dbldatagen/spec/__init__.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎dbldatagen/spec/compat.py‎
Lines changed: 0 additions & 3 deletions b/‎dbldatagen/spec/compat.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎dbldatagen/spec/generator_spec.py‎
Lines changed: 1 addition & 99 deletions b/‎dbldatagen/spec/generator_spec.py‎
Lines changed: 1 addition & 99 deletions
diff --git a/‎dbldatagen/spec/output_targets.py‎
Lines changed: 101 additions & 0 deletions b/‎dbldatagen/spec/output_targets.py‎
Lines changed: 101 additions & 0 deletions
@@ -0,0 +1,39 @@
+"""Pydantic-based specification API for dbldatagen.
+
+This module provides Pydantic models and specifications for defining data generation
+in a type-safe, declarative way.
+"""
+
+# Import only the compat layer by default to avoid triggering Spark/heavy dependencies
+from .compat import BaseModel, Field, constr, root_validator, validator
+
+# Lazy imports for heavy modules - import these explicitly when needed
+# from .column_spec import ColumnSpec
+# from .generator_spec import GeneratorSpec
+# from .generator_spec_impl import GeneratorSpecImpl
+
+__all__ = [
+    "BaseModel",
+    "Field",
+    "constr",
+    "root_validator",
+    "validator",
+    "ColumnSpec",
+    "GeneratorSpec",
+    "GeneratorSpecImpl",
+]
+
+
+def __getattr__(name):
+    """Lazy import heavy modules to avoid triggering Spark initialization."""
+    if name == "ColumnSpec":
+        from .column_spec import ColumnSpec
+        return ColumnSpec
+    elif name == "GeneratorSpec":
+        from .generator_spec import GeneratorSpec
+        return GeneratorSpec
+    elif name == "GeneratorSpecImpl":
+        from .generator_spec_impl import GeneratorSpecImpl
+        return GeneratorSpecImpl
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
@@ -32,9 +32,6 @@
 
 Benefits:
     - **No Installation Required**: Works with whatever Pydantic version is available
-    - **Single Codebase**: One set of code works across both Pydantic versions
-    - **Environment Agnostic**: Application code doesn't need to know which version is installed
-    - **Future-Ready**: Easy migration path to Pydantic V2 API when ready
     - **Databricks Compatible**: Avoids conflicts with pre-installed libraries
 
 Future Migration:
 
@@ -9,105 +9,11 @@
 from dbldatagen.spec.column_spec import ColumnDefinition
 
 from .compat import BaseModel, validator
-
+from .output_targets import UCSchemaTarget, FilePathTarget
 
 logger = logging.getLogger(__name__)
 
 
-class UCSchemaTarget(BaseModel):
-    """Defines a Unity Catalog schema as the output destination for generated data.
-
-    This class represents a Unity Catalog location (catalog.schema) where generated tables
-    will be written. Unity Catalog is Databricks' unified governance solution for data and AI.
-
-    :param catalog: Unity Catalog catalog name where tables will be written
-    :param schema_: Unity Catalog schema (database) name within the catalog
-    :param output_format: Data format for table storage. Defaults to "delta" which is the
-                         recommended format for Unity Catalog tables
-
-    .. note::
-        The schema parameter is named `schema_` (with underscore) to avoid conflict with
-        Python's built-in schema keyword and Pydantic functionality
-
-    .. note::
-        Tables will be written to the location: `{catalog}.{schema_}.{table_name}`
-    """
-    catalog: str
-    schema_: str
-    output_format: str = "delta"  # Default to delta for UC Schema
-
-    @validator("catalog", "schema_")
-    def validate_identifiers(cls, v: str) -> str:
-        """Validates that catalog and schema names are valid identifiers.
-
-        Ensures the identifier is non-empty and follows Python identifier conventions.
-        Issues a warning if the identifier is not a basic Python identifier, as this may
-        cause issues with Unity Catalog.
-
-        :param v: The identifier string to validate (catalog or schema name)
-        :returns: The validated and stripped identifier string
-        :raises ValueError: If the identifier is empty or contains only whitespace
-
-        .. note::
-            This is a Pydantic field validator that runs automatically during model instantiation
-        """
-        if not v.strip():
-            raise ValueError("Identifier must be non-empty.")
-        if not v.isidentifier():
-            logger.warning(
-                f"'{v}' is not a basic Python identifier. Ensure validity for Unity Catalog.")
-        return v.strip()
-
-    def __str__(self) -> str:
-        """Returns a human-readable string representation of the Unity Catalog target.
-
-        :returns: Formatted string showing catalog, schema, format and type
-        """
-        return f"{self.catalog}.{self.schema_} (Format: {self.output_format}, Type: UC Table)"
-
-
-class FilePathTarget(BaseModel):
-    """Defines a file system path as the output destination for generated data.
-
-    This class represents a file system location where generated tables will be written
-    as files. Each table will be written to a subdirectory within the base path.
-
-    :param base_path: Base file system path where table data files will be written.
-                     Each table will be written to {base_path}/{table_name}/
-    :param output_format: File format for data storage. Must be either "csv" or "parquet".
-                         No default value - must be explicitly specified
-
-    .. note::
-        Unlike UCSchemaTarget, this requires an explicit output_format with no default
-
-    .. note::
-        The base_path can be a local file system path, DBFS path, or cloud storage path
-        (e.g., s3://, gs://, abfs://) depending on your environment
-    """
-    base_path: str
-    output_format: Literal["csv", "parquet"]  # No default, must be specified
-
-    @validator("base_path")
-    def validate_base_path(cls, v: str) -> str:
-        """Validates that the base path is non-empty.
-
-        :param v: The base path string to validate
-        :returns: The validated and stripped base path string
-        :raises ValueError: If the base path is empty or contains only whitespace
-
-        .. note::
-            This is a Pydantic field validator that runs automatically during model instantiation
-        """
-        if not v.strip():
-            raise ValueError("base_path must be non-empty.")
-        return v.strip()
-
-    def __str__(self) -> str:
-        """Returns a human-readable string representation of the file path target.
-
-        :returns: Formatted string showing base path, format and type
-        """
-        return f"{self.base_path} (Format: {self.output_format}, Type: File Path)"
 
 
 class TableDefinition(BaseModel):
@@ -342,7 +248,6 @@ def validate(self, strict: bool = True) -> ValidationResult:  # type: ignore[ove
                 )
 
             # Check partitions if specified
-            #TODO: though this can be a model field check, we are checking here so that one can correct
             # Can we find a way to use the default way?
             if table_def.partitions is not None and table_def.partitions <= 0:
                 result.add_error(
@@ -351,7 +256,6 @@ def validate(self, strict: bool = True) -> ValidationResult:  # type: ignore[ove
                 )
 
             # Check for duplicate column names
-            # TODO: Not something possible if we right model, recheck
             column_names = [col.name for col in table_def.columns]
             duplicates = [name for name in set(column_names) if column_names.count(name) > 1]
             if duplicates:
@@ -361,8 +265,6 @@ def validate(self, strict: bool = True) -> ValidationResult:  # type: ignore[ove
 
             # Build column map for reference checking
             column_map = {col.name: col for col in table_def.columns}
-
-            # TODO: Check baseColumn references, this is tricky? check the dbldefaults
             for col in table_def.columns:
                 if col.baseColumn and col.baseColumn != "id":
                     if col.baseColumn not in column_map:
 
@@ -0,0 +1,101 @@
+from .compat import BaseModel, validator
+from typing import Literal
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class UCSchemaTarget(BaseModel):
+    """Defines a Unity Catalog schema as the output destination for generated data.
+
+    This class represents a Unity Catalog location (catalog.schema) where generated tables
+    will be written. Unity Catalog is Databricks' unified governance solution for data and AI.
+
+    :param catalog: Unity Catalog catalog name where tables will be written
+    :param schema_: Unity Catalog schema (database) name within the catalog
+    :param output_format: Data format for table storage. Defaults to "delta" which is the
+                         recommended format for Unity Catalog tables
+
+    .. note::
+        The schema parameter is named `schema_` (with underscore) to avoid conflict with
+        Python's built-in schema keyword and Pydantic functionality
+
+    .. note::
+        Tables will be written to the location: `{catalog}.{schema_}.{table_name}`
+    """
+    catalog: str
+    schema_: str
+    output_format: str = "delta"  # Default to delta for UC Schema
+
+    @validator("catalog", "schema_")
+    def validate_identifiers(cls, v: str) -> str:
+        """Validates that catalog and schema names are valid identifiers.
+
+        Ensures the identifier is non-empty and follows Python identifier conventions.
+        Issues a warning if the identifier is not a basic Python identifier, as this may
+        cause issues with Unity Catalog.
+
+        :param v: The identifier string to validate (catalog or schema name)
+        :returns: The validated and stripped identifier string
+        :raises ValueError: If the identifier is empty or contains only whitespace
+
+        .. note::
+            This is a Pydantic field validator that runs automatically during model instantiation
+        """
+        if not v.strip():
+            raise ValueError("Identifier must be non-empty.")
+        if not v.isidentifier():
+            logger.warning(
+                f"'{v}' is not a basic Python identifier. Ensure validity for Unity Catalog.")
+        return v.strip()
+
+    def __str__(self) -> str:
+        """Returns a human-readable string representation of the Unity Catalog target.
+
+        :returns: Formatted string showing catalog, schema, format and type
+        """
+        return f"{self.catalog}.{self.schema_} (Format: {self.output_format}, Type: UC Table)"
+
+
+class FilePathTarget(BaseModel):
+    """Defines a file system path as the output destination for generated data.
+
+    This class represents a file system location where generated tables will be written
+    as files. Each table will be written to a subdirectory within the base path.
+
+    :param base_path: Base file system path where table data files will be written.
+                     Each table will be written to {base_path}/{table_name}/
+    :param output_format: File format for data storage. Must be either "csv" or "parquet".
+                         No default value - must be explicitly specified
+
+    .. note::
+        Unlike UCSchemaTarget, this requires an explicit output_format with no default
+
+    .. note::
+        The base_path can be a local file system path, DBFS path, or cloud storage path
+        (e.g., s3://, gs://, abfs://) depending on your environment
+    """
+    base_path: str
+    output_format: Literal["csv", "parquet"]  # No default, must be specified
+
+    @validator("base_path")
+    def validate_base_path(cls, v: str) -> str:
+        """Validates that the base path is non-empty.
+
+        :param v: The base path string to validate
+        :returns: The validated and stripped base path string
+        :raises ValueError: If the base path is empty or contains only whitespace
+
+        .. note::
+            This is a Pydantic field validator that runs automatically during model instantiation
+        """
+        if not v.strip():
+            raise ValueError("base_path must be non-empty.")
+        return v.strip()
+
+    def __str__(self) -> str:
+        """Returns a human-readable string representation of the file path target.
+
+        :returns: Formatted string showing base path, format and type
+        """
+        return f"{self.base_path} (Format: {self.output_format}, Type: File Path)"