99from dbldatagen .spec .column_spec import ColumnDefinition
1010
1111from .compat import BaseModel , validator
12-
12+ from . output_targets import UCSchemaTarget , FilePathTarget
1313
1414logger = logging .getLogger (__name__ )
1515
1616
17- class UCSchemaTarget (BaseModel ):
18- """Defines a Unity Catalog schema as the output destination for generated data.
19-
20- This class represents a Unity Catalog location (catalog.schema) where generated tables
21- will be written. Unity Catalog is Databricks' unified governance solution for data and AI.
22-
23- :param catalog: Unity Catalog catalog name where tables will be written
24- :param schema_: Unity Catalog schema (database) name within the catalog
25- :param output_format: Data format for table storage. Defaults to "delta" which is the
26- recommended format for Unity Catalog tables
27-
28- .. note::
29- The schema parameter is named `schema_` (with underscore) to avoid conflict with
30- Python's built-in schema keyword and Pydantic functionality
31-
32- .. note::
33- Tables will be written to the location: `{catalog}.{schema_}.{table_name}`
34- """
35- catalog : str
36- schema_ : str
37- output_format : str = "delta" # Default to delta for UC Schema
38-
39- @validator ("catalog" , "schema_" )
40- def validate_identifiers (cls , v : str ) -> str :
41- """Validates that catalog and schema names are valid identifiers.
42-
43- Ensures the identifier is non-empty and follows Python identifier conventions.
44- Issues a warning if the identifier is not a basic Python identifier, as this may
45- cause issues with Unity Catalog.
46-
47- :param v: The identifier string to validate (catalog or schema name)
48- :returns: The validated and stripped identifier string
49- :raises ValueError: If the identifier is empty or contains only whitespace
50-
51- .. note::
52- This is a Pydantic field validator that runs automatically during model instantiation
53- """
54- if not v .strip ():
55- raise ValueError ("Identifier must be non-empty." )
56- if not v .isidentifier ():
57- logger .warning (
58- f"'{ v } ' is not a basic Python identifier. Ensure validity for Unity Catalog." )
59- return v .strip ()
60-
61- def __str__ (self ) -> str :
62- """Returns a human-readable string representation of the Unity Catalog target.
63-
64- :returns: Formatted string showing catalog, schema, format and type
65- """
66- return f"{ self .catalog } .{ self .schema_ } (Format: { self .output_format } , Type: UC Table)"
67-
68-
69- class FilePathTarget (BaseModel ):
70- """Defines a file system path as the output destination for generated data.
71-
72- This class represents a file system location where generated tables will be written
73- as files. Each table will be written to a subdirectory within the base path.
74-
75- :param base_path: Base file system path where table data files will be written.
76- Each table will be written to {base_path}/{table_name}/
77- :param output_format: File format for data storage. Must be either "csv" or "parquet".
78- No default value - must be explicitly specified
79-
80- .. note::
81- Unlike UCSchemaTarget, this requires an explicit output_format with no default
82-
83- .. note::
84- The base_path can be a local file system path, DBFS path, or cloud storage path
85- (e.g., s3://, gs://, abfs://) depending on your environment
86- """
87- base_path : str
88- output_format : Literal ["csv" , "parquet" ] # No default, must be specified
89-
90- @validator ("base_path" )
91- def validate_base_path (cls , v : str ) -> str :
92- """Validates that the base path is non-empty.
93-
94- :param v: The base path string to validate
95- :returns: The validated and stripped base path string
96- :raises ValueError: If the base path is empty or contains only whitespace
97-
98- .. note::
99- This is a Pydantic field validator that runs automatically during model instantiation
100- """
101- if not v .strip ():
102- raise ValueError ("base_path must be non-empty." )
103- return v .strip ()
104-
105- def __str__ (self ) -> str :
106- """Returns a human-readable string representation of the file path target.
107-
108- :returns: Formatted string showing base path, format and type
109- """
110- return f"{ self .base_path } (Format: { self .output_format } , Type: File Path)"
11117
11218
11319class TableDefinition (BaseModel ):
@@ -342,7 +248,6 @@ def validate(self, strict: bool = True) -> ValidationResult: # type: ignore[ove
342248 )
343249
344250 # Check partitions if specified
345- #TODO: though this can be a model field check, we are checking here so that one can correct
346251 # Can we find a way to use the default way?
347252 if table_def .partitions is not None and table_def .partitions <= 0 :
348253 result .add_error (
@@ -351,7 +256,6 @@ def validate(self, strict: bool = True) -> ValidationResult: # type: ignore[ove
351256 )
352257
353258 # Check for duplicate column names
354- # TODO: Not something possible if we right model, recheck
355259 column_names = [col .name for col in table_def .columns ]
356260 duplicates = [name for name in set (column_names ) if column_names .count (name ) > 1 ]
357261 if duplicates :
@@ -361,8 +265,6 @@ def validate(self, strict: bool = True) -> ValidationResult: # type: ignore[ove
361265
362266 # Build column map for reference checking
363267 column_map = {col .name : col for col in table_def .columns }
364-
365- # TODO: Check baseColumn references, this is tricky? check the dbldefaults
366268 for col in table_def .columns :
367269 if col .baseColumn and col .baseColumn != "id" :
368270 if col .baseColumn not in column_map :
0 commit comments