1+ from datetime import datetime , time , timezone
2+ from itertools import product
3+ from typing import Any , Union , Optional
4+
15import sqlalchemy
6+ from sqlalchemy .engine .interfaces import Dialect
27from sqlalchemy .ext .compiler import compiles
38
4- from typing import Union
9+ from databricks . sql . utils import ParamEscaper
510
6- from datetime import datetime , time
711
12+ def process_literal_param_hack (value : Any ):
13+ """This method is supposed to accept a Python type and return a string representation of that type.
14+ But due to some weirdness in the way SQLAlchemy's literal rendering works, we have to return
15+ the value itself because, by the time it reaches our custom type code, it's already been converted
16+ into a string.
817
9- from databricks .sql .utils import ParamEscaper
18+ TimeTest
19+ DateTimeTest
20+ DateTimeTZTest
21+
22+ This dynamic only seems to affect the literal rendering of datetime and time objects.
23+
24+ All fail without this hack in-place. I'm not sure why. But it works.
25+ """
26+ return value
1027
1128
1229@compiles (sqlalchemy .types .Enum , "databricks" )
@@ -64,7 +81,7 @@ def compile_numeric_databricks(type_, compiler, **kw):
6481@compiles (sqlalchemy .types .DateTime , "databricks" )
6582def compile_datetime_databricks (type_ , compiler , ** kw ):
6683 """
67- We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP " instead of "DATETIME"
84+ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ " instead of "DATETIME"
6885 """
6986 return "TIMESTAMP_NTZ"
7087
@@ -87,13 +104,15 @@ def compile_array_databricks(type_, compiler, **kw):
87104 return f"ARRAY<{ inner } >"
88105
89106
90- class DatabricksDateTimeNoTimezoneType (sqlalchemy .types .TypeDecorator ):
91- """The decimal that pysql creates when it receives the contents of a TIMESTAMP_NTZ
92- includes a timezone of 'Etc/UTC'. But since SQLAlchemy's test suite assumes that
93- the sqlalchemy.types.DateTime type will return a datetime.datetime _without_ any
94- timezone set, we need to strip the timezone off the value received from pysql.
107+ class TIMESTAMP_NTZ (sqlalchemy .types .TypeDecorator ):
108+ """Represents values comprising values of fields year, month, day, hour, minute, and second.
109+ All operations are performed without taking any time zone into account.
110+
111+ Our dialect maps sqlalchemy.types.DateTime() to this type, which means that all DateTime()
112+ objects are stored without tzinfo. To read and write timezone-aware datetimes use
113+ databricks.sql.TIMESTAMP instead.
95114
96- It's not clear if DBR sends a timezone to pysql or if pysql is adding it. This could be a bug.
115+ https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html
97116 """
98117
99118 impl = sqlalchemy .types .DateTime
@@ -106,36 +125,115 @@ def process_result_value(self, value: Union[None, datetime], dialect):
106125 return value .replace (tzinfo = None )
107126
108127
128+ class TIMESTAMP (sqlalchemy .types .TypeDecorator ):
129+ """Represents values comprising values of fields year, month, day, hour, minute, and second,
130+ with the session local time-zone.
131+
132+ Our dialect maps sqlalchemy.types.DateTime() to TIMESTAMP_NTZ, which means that all DateTime()
133+ objects are stored without tzinfo. To read and write timezone-aware datetimes use
134+ this type instead.
135+
136+ ```python
137+ # This won't work
138+ `Column(sqlalchemy.DateTime(timezone=True))`
139+
140+ # But this does
141+ `Column(TIMESTAMP)`
142+ ````
143+
144+ https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html
145+ """
146+
147+ impl = sqlalchemy .types .DateTime
148+
149+ cache_ok = True
150+
151+ def process_result_value (self , value : Union [None , datetime ], dialect ):
152+ if value is None :
153+ return None
154+
155+ if not value .tzinfo :
156+ return value .replace (tzinfo = timezone .utc )
157+ return value
158+
159+ def process_bind_param (
160+ self , value : Union [datetime , None ], dialect
161+ ) -> Optional [datetime ]:
162+ """pysql can pass datetime.datetime() objects directly to DBR"""
163+ return value
164+
165+ def process_literal_param (
166+ self , value : Union [datetime , None ], dialect : Dialect
167+ ) -> str :
168+ """ """
169+ return process_literal_param_hack (value )
170+
171+
172+ @compiles (TIMESTAMP , "databricks" )
173+ def compile_timestamp_databricks (type_ , compiler , ** kw ):
174+ """
175+ We need to override the default DateTime compilation rendering because Databricks uses "TIMESTAMP_NTZ" instead of "DATETIME"
176+ """
177+ return "TIMESTAMP"
178+
179+
109180class DatabricksTimeType (sqlalchemy .types .TypeDecorator ):
110181 """Databricks has no native TIME type. So we store it as a string."""
111182
112183 impl = sqlalchemy .types .Time
113184 cache_ok = True
114185
115- TIME_WITH_MICROSECONDS_FMT = "%H:%M:%S.%f"
116- TIME_NO_MICROSECONDS_FMT = "%H:%M:%S"
186+ BASE_FMT = "%H:%M:%S"
187+ MICROSEC_PART = ".%f"
188+ TIMEZONE_PART = "%z"
189+
190+ def _generate_fmt_string (self , ms : bool , tz : bool ) -> str :
191+ """Return a format string for datetime.strptime() that includes or excludes microseconds and timezone."""
192+ _ = lambda x , y : x if y else ""
193+ return f"{ self .BASE_FMT } { _ (self .MICROSEC_PART ,ms )} { _ (self .TIMEZONE_PART ,tz )} "
194+
195+ @property
196+ def allowed_fmt_strings (self ):
197+ """Time strings can be read with or without microseconds and with or without a timezone."""
198+
199+ if not hasattr (self , "_allowed_fmt_strings" ):
200+ ms_switch = tz_switch = [True , False ]
201+ self ._allowed_fmt_strings = [
202+ self ._generate_fmt_string (x , y )
203+ for x , y in product (ms_switch , tz_switch )
204+ ]
205+
206+ return self ._allowed_fmt_strings
207+
208+ def _parse_result_string (self , value : str ) -> time :
209+ """Parse a string into a time object. Try all allowed formats until one works."""
210+ for fmt in self .allowed_fmt_strings :
211+ try :
212+ # We use timetz() here because we want to preserve the timezone information
213+ # Calling .time() will strip the timezone information
214+ return datetime .strptime (value , fmt ).timetz ()
215+ except ValueError :
216+ pass
217+
218+ raise ValueError (f"Could not parse time string { value } " )
219+
220+ def _determine_fmt_string (self , value : time ) -> str :
221+ """Determine which format string to use to render a time object as a string."""
222+ ms_bool = value .microsecond > 0
223+ tz_bool = value .tzinfo is not None
224+ return self ._generate_fmt_string (ms_bool , tz_bool )
117225
118226 def process_bind_param (self , value : Union [time , None ], dialect ) -> Union [None , str ]:
119227 """Values sent to the database are converted to %:H:%M:%S strings."""
120228 if value is None :
121229 return None
122- return value .strftime (self .TIME_WITH_MICROSECONDS_FMT )
230+ fmt_string = self ._determine_fmt_string (value )
231+ return value .strftime (fmt_string )
123232
124233 # mypy doesn't like this workaround because TypeEngine wants process_literal_param to return a string
125234 def process_literal_param (self , value , dialect ) -> time : # type: ignore
126- """It's not clear to me why this is necessary. Without it, SQLAlchemy's Timetest:test_literal fails
127- because the string literal renderer receives a str() object and calls .isoformat() on it.
128-
129- Whereas this method receives a datetime.time() object which is subsequently passed to that
130- same renderer. And that works.
131-
132- UPDATE: After coping with the literal_processor override in DatabricksStringType, I suspect a similar
133- mechanism is at play. Two different processors are are called in sequence. This is likely a byproduct
134- of Databricks not having a true TIME type. I think the string representation of Time() types is
135- somehow affecting the literal rendering process. But as long as this passes the tests, I'm not
136- worried about it.
137- """
138- return value
235+ """ """
236+ return process_literal_param_hack (value )
139237
140238 def process_result_value (
141239 self , value : Union [None , str ], dialect
@@ -144,13 +242,7 @@ def process_result_value(
144242 if value is None :
145243 return None
146244
147- try :
148- _parsed = datetime .strptime (value , self .TIME_WITH_MICROSECONDS_FMT )
149- except ValueError :
150- # If the string doesn't have microseconds, try parsing it without them
151- _parsed = datetime .strptime (value , self .TIME_NO_MICROSECONDS_FMT )
152-
153- return _parsed .time ()
245+ return self ._parse_result_string (value )
154246
155247
156248class DatabricksStringType (sqlalchemy .types .TypeDecorator ):
0 commit comments