From 98737c4009f82b6dce75d2dcd8fac5714db53e3f Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Thu, 6 Feb 2025 19:12:18 +0000 Subject: [PATCH 01/26] Fix not fetching cursor on insert/update --- databend_sqlalchemy/databend_dialect.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index cc7d91e..4483375 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -1102,6 +1102,11 @@ def create_server_side_cursor(self): def create_default_cursor(self): return self._dbapi_connection.cursor() + def post_exec(self): + if self.isinsert or self.isupdate or self.isdelete: + r = self.cursor.fetchall() + self._rowcount = r[0][0] + class DatabendTypeCompiler(compiler.GenericTypeCompiler): def visit_ARRAY(self, type_, **kw): From dcb77c401214745e47dea6ab9c3f985c6217c0b3 Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Fri, 7 Feb 2025 14:56:25 +0000 Subject: [PATCH 02/26] Fetch the cursor on insert/update/delete/copy into --- databend_sqlalchemy/databend_dialect.py | 54 +++++++++++++++++- tests/test_copy_into.py | 75 ++++++++++++++++++++++++- 2 files changed, 125 insertions(+), 4 deletions(-) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index 4483375..8c40693 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -800,6 +800,9 @@ class DatabendIdentifierPreparer(PGIdentifierPreparer): class DatabendCompiler(PGCompiler): + iscopyintotable: bool = False + iscopyintolocation: bool = False + def get_select_precolumns(self, select, **kw): # call the base implementation because Databend doesn't support DISTINCT ON return super(PGCompiler, self).get_select_precolumns(select, **kw) @@ -971,6 +974,11 @@ def visit_when_merge_unmatched(self, merge_unmatched, **kw): ) def visit_copy_into(self, copy_into, **kw): + if isinstance(copy_into.target, (TableClause,)): + self.iscopyintotable = True + else: + self.iscopyintolocation = True + target = ( self.preparer.format_table(copy_into.target) if isinstance(copy_into.target, (TableClause,)) @@ -1090,8 +1098,21 @@ def visit_google_cloud_storage(self, gcs: GoogleCloudStorage, **kw): f")" ) + def visit_stage(self, stage, **kw): + if stage.path: + return f"@{stage.name}/{stage.path}" + return f"@{stage.name}" + class DatabendExecutionContext(default.DefaultExecutionContext): + iscopyintotable = False + iscopyintolocation = False + + _copy_input_bytes: Optional[int] = None + _copy_output_bytes: Optional[int] = None + _copy_into_table_results: Optional[list[dict]] = None + _copy_into_location_results: dict = None + @sa_util.memoized_property def should_autocommit(self): return False # No DML supported, never autocommit @@ -1103,9 +1124,36 @@ def create_default_cursor(self): return self._dbapi_connection.cursor() def post_exec(self): - if self.isinsert or self.isupdate or self.isdelete: - r = self.cursor.fetchall() - self._rowcount = r[0][0] + self.iscopyintotable = getattr(self.compiled, 'iscopyintotable', False) + self.iscopyintolocation = getattr(self.compiled, 'iscopyintolocation', False) + if (self.isinsert or self.isupdate or self.isdelete or + self.iscopyintolocation or self.iscopyintotable): + result = self.cursor.fetchall() + if self.iscopyintotable: + self._copy_into_table_results = [ + { + 'file': row[0], + 'rows_loaded': row[1], + 'errors_seen': row[2], + 'first_error': row[3], + 'first_error_line': row[4], + } for row in result + ] + self._rowcount = sum(c['rows_loaded'] for c in self._copy_into_table_results) + else: + self._rowcount = result[0][0] + if self.iscopyintolocation: + self._copy_into_location_results = { + 'rows_unloaded': result[0][0], + 'input_bytes': result[0][1], + 'output_bytes': result[0][2], + } + + def copy_into_table_results(self) -> list[dict]: + return self._copy_into_table_results + + def copy_into_location_results(self) -> dict: + return self._copy_into_location_results class DatabendTypeCompiler(compiler.GenericTypeCompiler): diff --git a/tests/test_copy_into.py b/tests/test_copy_into.py index 09b938b..0fd049d 100644 --- a/tests/test_copy_into.py +++ b/tests/test_copy_into.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from sqlalchemy.testing import config, fixture, fixtures, util +from sqlalchemy.testing import config, fixture, fixtures, eq_ from sqlalchemy.testing.assertions import AssertsCompiledSQL from sqlalchemy import ( Table, @@ -12,17 +12,20 @@ schema, cast, literal_column, + text, ) from databend_sqlalchemy import ( CopyIntoTable, CopyIntoLocation, CopyIntoTableOptions, + CopyIntoLocationOptions, CSVFormat, ParquetFormat, GoogleCloudStorage, Compression, FileColumnClause, + StageClause, ) @@ -158,3 +161,73 @@ def test_copy_into_table_sub_select_column_clauses(self): ), checkparams={"1_1": "xyz", "IF_1": "NULL", "IF_2": "NOTNULL"}, ) + + +class CopyIntoResultTest(fixtures.TablesTest): + run_create_tables = "each" + __backend__ = True + + @classmethod + def define_tables(cls, metadata): + Table( + "random_data", + metadata, + Column("id", Integer), + Column("data", String(50)), + databend_engine='Random', + ) + Table( + "loaded", + metadata, + Column("id", Integer), + Column("data", String(50)), + ) + + def test_copy_into_stage_and_table(self, connection): + # create stage + connection.execute(text('CREATE OR REPLACE STAGE mystage')) + # copy into stage from random table limiting 1000 + table = self.tables.random_data + query = table.select().limit(1000) + + copy_into = CopyIntoLocation( + target=StageClause( + name='mystage' + ), + from_=query, + file_format=ParquetFormat(), + options=CopyIntoLocationOptions() + ) + r = connection.execute( + copy_into + ) + eq_(r.rowcount, 1000) + copy_into_results = r.context.copy_into_location_results() + eq_(copy_into_results['rows_unloaded'], 1000) + eq_(copy_into_results['input_bytes'], 16250) + # eq_(copy_into_results['output_bytes'], 4701) # output bytes differs + + # now copy into table + + copy_into_table = CopyIntoTable( + target=self.tables.loaded, + from_=StageClause( + name='mystage' + ), + file_format=ParquetFormat(), + options=CopyIntoTableOptions() + ) + r = connection.execute( + copy_into_table + ) + eq_(r.rowcount, 1000) + copy_into_table_results = r.context.copy_into_table_results() + assert len(copy_into_table_results) == 1 + result = copy_into_table_results[0] + assert result['file'].endswith('.parquet') + eq_(result['rows_loaded'], 1000) + eq_(result['errors_seen'], 0) + eq_(result['first_error'], None) + eq_(result['first_error_line'], None) + + From db05b94bc09b64e7b9f938b7180846052f1d8f74 Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Fri, 7 Feb 2025 14:56:39 +0000 Subject: [PATCH 03/26] Enable CTE tests --- databend_sqlalchemy/requirements.py | 24 ++++++++++++++++++++++++ tests/test_sqlalchemy.py | 21 +++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/databend_sqlalchemy/requirements.py b/databend_sqlalchemy/requirements.py index 099fa9f..a89230b 100644 --- a/databend_sqlalchemy/requirements.py +++ b/databend_sqlalchemy/requirements.py @@ -204,3 +204,27 @@ def json_type(self): def reflect_table_options(self): """Target database must support reflecting table_options.""" return exclusions.open() + + @property + def ctes(self): + """Target database supports CTEs""" + return exclusions.open() + + @property + def ctes_with_update_delete(self): + """target database supports CTES that ride on top of a normal UPDATE + or DELETE statement which refers to the CTE in a correlated subquery. + + """ + return exclusions.open() + + @property + def update_from(self): + """Target must support UPDATE..FROM syntax""" + return exclusions.closed() + + + @property + def delete_from(self): + """Target must support DELETE FROM..FROM or DELETE..USING syntax""" + return exclusions.closed() diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 6c2b5ab..c5d7663 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -17,6 +17,7 @@ from sqlalchemy.testing.suite import BizarroCharacterFKResolutionTest as _BizarroCharacterFKResolutionTest from sqlalchemy.testing.suite import ServerSideCursorsTest as _ServerSideCursorsTest from sqlalchemy.testing.suite import EnumTest as _EnumTest +from sqlalchemy.testing.suite import CTETest as _CTETest from sqlalchemy import types as sql_types from sqlalchemy import testing, select from sqlalchemy.testing import config, eq_ @@ -286,3 +287,23 @@ class EnumTest(_EnumTest): @testing.skip("databend") # Skipped because no supporting enums yet def test_round_trip_executemany(self, connection): pass + +class CTETest(_CTETest): + @classmethod + def define_tables(cls, metadata): + Table( + "some_table", + metadata, + Column("id", Integer, primary_key=True), + Column("data", String(50)), + Column("parent_id", Integer), + ) + + Table( + "some_other_table", + metadata, + Column("id", Integer, primary_key=True), + Column("data", String(50)), + Column("parent_id", Integer), + ) + From 754c7c6206da154dc5816af8139e9782fac9285c Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Mon, 10 Feb 2025 12:34:55 +0000 Subject: [PATCH 04/26] Enable further tests --- databend_sqlalchemy/connector.py | 9 +++- databend_sqlalchemy/databend_dialect.py | 8 ++-- databend_sqlalchemy/provision.py | 8 +++- databend_sqlalchemy/requirements.py | 58 ++++++++++++++++++++----- tests/test_sqlalchemy.py | 20 +++++++++ 5 files changed, 83 insertions(+), 20 deletions(-) diff --git a/databend_sqlalchemy/connector.py b/databend_sqlalchemy/connector.py index 57504b3..e303099 100644 --- a/databend_sqlalchemy/connector.py +++ b/databend_sqlalchemy/connector.py @@ -49,8 +49,13 @@ def escape_item(self, item): return self.escape_number(item) elif isinstance(item, timedelta): return self.escape_string(f"{item.total_seconds()} seconds") + "::interval" - elif isinstance(item, (datetime, date, time, timedelta)): - return self.escape_string(item.strftime("%Y-%m-%d %H:%M:%S")) + elif isinstance(item, time): + # N.B. Date here must match date in DatabendTime.literal_processor - 1970-01-01 + return self.escape_string(item.strftime("1970-01-01 %H:%M:%S.%f")) + "::timestamp" + elif isinstance(item, datetime): + return self.escape_string(item.strftime("%Y-%m-%d %H:%M:%S.%f")) + "::timestamp" + elif isinstance(item, date): + return self.escape_string(item.strftime("%Y-%m-%d")) + "::date" else: return self.escape_string(item) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index 8c40693..c7d1c0a 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -670,7 +670,7 @@ def process(value): class DatabendDateTime(sqltypes.DATETIME): __visit_name__ = "DATETIME" - _reg = re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)") + _reg = re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)\.(\d+)") def result_processor(self, dialect, coltype): def process(value): @@ -698,7 +698,7 @@ def process(value): class DatabendTime(sqltypes.TIME): __visit_name__ = "TIME" - _reg = re.compile(r"(?:\d+)-(?:\d+)-(?:\d+) (\d+):(\d+):(\d+)") + _reg = re.compile(r"(?:\d+)-(?:\d+)-(?:\d+) (\d+):(\d+):(\d+)\.(\d+)") def result_processor(self, dialect, coltype): def process(value): @@ -720,7 +720,7 @@ def literal_processor(self, dialect): def process(value): if value is not None: from_min_value = datetime.datetime.combine( - datetime.date(1000, 1, 1), value + datetime.date(1970, 1, 1), value ) time_str = from_min_value.isoformat(timespec="microseconds") return f"'{time_str}'" @@ -1257,7 +1257,7 @@ class DatabendDialect(default.DefaultDialect): supports_alter = True supports_comments = False supports_empty_insert = False - supports_is_distinct_from = False + supports_is_distinct_from = True supports_multivalues_insert = True supports_statement_cache = False diff --git a/databend_sqlalchemy/provision.py b/databend_sqlalchemy/provision.py index d03b1fc..f2a9fda 100644 --- a/databend_sqlalchemy/provision.py +++ b/databend_sqlalchemy/provision.py @@ -1,7 +1,7 @@ from sqlalchemy.testing.provision import create_db from sqlalchemy.testing.provision import drop_db -from sqlalchemy.testing.provision import configure_follower, update_db_opts +from sqlalchemy.testing.provision import configure_follower, update_db_opts, temp_table_keyword_args @create_db.for_db("databend") @@ -31,6 +31,10 @@ def _databend_drop_db(cfg, eng, ident): conn.exec_driver_sql("DROP DATABASE IF EXISTS %s_test_schema_2" % ident) conn.exec_driver_sql("DROP DATABASE IF EXISTS %s" % ident) +@temp_table_keyword_args.for_db("databend") +def _databend_temp_table_keyword_args(cfg, eng): + return {"prefixes": ["TEMPORARY"]} + @configure_follower.for_db("databend") def _databend_configure_follower(config, ident): @@ -39,5 +43,5 @@ def _databend_configure_follower(config, ident): # Uncomment to debug SQL Statements in tests # @update_db_opts.for_db("databend") -# def _mssql_update_db_opts(db_url, db_opts): +# def _databend_update_db_opts(db_url, db_opts): # db_opts["echo"] = True diff --git a/databend_sqlalchemy/requirements.py b/databend_sqlalchemy/requirements.py index a89230b..dea445e 100644 --- a/databend_sqlalchemy/requirements.py +++ b/databend_sqlalchemy/requirements.py @@ -37,7 +37,7 @@ def binary_literals(self): @property def temporary_tables(self): """target database supports temporary tables""" - return exclusions.closed() # Databend does not currently support temporary tables + return exclusions.open() @property def temp_table_reflection(self): @@ -103,13 +103,6 @@ def datetime_timezone(self): return exclusions.closed() - @property - def time_timezone(self): - """target dialect supports representation of Python - datetime.time() with tzinfo with Time(timezone=True).""" - - return exclusions.closed() - # @property # def datetime_implicit_bound(self): # """target dialect when given a datetime object will bind it such @@ -124,14 +117,14 @@ def datetime_microseconds(self): """target dialect supports representation of Python datetime.datetime() with microsecond objects.""" - return exclusions.closed() + return exclusions.open() @property def timestamp_microseconds(self): """target dialect supports representation of Python datetime.datetime() with microsecond objects but only if TIMESTAMP is used.""" - return exclusions.closed() + return exclusions.open() @property def time(self): @@ -145,6 +138,13 @@ def time_microseconds(self): """target dialect supports representation of Python datetime.time() with microsecond objects.""" + return exclusions.open() + + @property + def time_timezone(self): + """target dialect supports representation of Python + datetime.time() with tzinfo with Time(timezone=True).""" + return exclusions.closed() @property @@ -174,7 +174,14 @@ def unicode_data(self): as well as in result rows. """ - return exclusions.closed() + return exclusions.open() + + @property + def unicode_ddl(self): + """Target driver must support some degree of non-ascii symbol + names. + """ + return exclusions.open() @property def precision_generic_float_type(self): @@ -184,6 +191,14 @@ def precision_generic_float_type(self): """ return exclusions.closed() #ToDo - I couldn't get the test for this one working, not sure where the issue is - AssertionError: {Decimal('15.7563829')} != {Decimal('15.7563827')} + @property + def precision_numerics_many_significant_digits(self): + """target backend supports values with many digits on both sides, + such as 319438950232418390.273596, 87673.594069654243 + + """ + return exclusions.closed() + @property def array_type(self): return exclusions.closed() @@ -198,7 +213,7 @@ def float_is_numeric(self): def json_type(self): """target platform implements a native JSON type.""" - return exclusions.closed() #ToDo - This could be enabled if primary keys were supported + return exclusions.closed() # ToDo - not quite ready to turn on yet, null values are not handled correctly https://github.com/databendlabs/databend/issues/17433 @property def reflect_table_options(self): @@ -228,3 +243,22 @@ def update_from(self): def delete_from(self): """Target must support DELETE FROM..FROM or DELETE..USING syntax""" return exclusions.closed() + + @property + def table_value_constructor(self): + """Database / dialect supports a query like: + + .. sourcecode:: sql + + SELECT * FROM VALUES ( (c1, c2), (c1, c2), ...) + AS some_table(col1, col2) + + SQLAlchemy generates this with the :func:`_sql.values` function. + + """ + return exclusions.open() + + @property + def window_functions(self): + """Target database must support window functions.""" + return exclusions.open() diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index c5d7663..a671210 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -18,6 +18,7 @@ from sqlalchemy.testing.suite import ServerSideCursorsTest as _ServerSideCursorsTest from sqlalchemy.testing.suite import EnumTest as _EnumTest from sqlalchemy.testing.suite import CTETest as _CTETest +from sqlalchemy.testing.suite import JSONTest as _JSONTest from sqlalchemy import types as sql_types from sqlalchemy import testing, select from sqlalchemy.testing import config, eq_ @@ -307,3 +308,22 @@ def define_tables(cls, metadata): Column("parent_id", Integer), ) + @testing.skip("databend") # Skipped because of bug in Databend https://github.com/databendlabs/databend/issues/17432 + def test_select_recursive_round_trip(self, connection): + pass + +class JSONTest(_JSONTest): + @classmethod + def define_tables(cls, metadata): + Table( + "data_table", + metadata, + Column("id", Integer), #, primary_key=True), # removed use of primary key to get test to work + Column("name", String(30), nullable=False), + Column("data", cls.datatype, nullable=False), + Column("nulldata", cls.datatype(none_as_null=True)), + ) + + # ToDo - this does not yet work + def test_path_typed_comparison(self, datatype, value): + pass From 909f53ee581ccc6b25f971e46839384403702df8 Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Tue, 18 Mar 2025 13:21:46 +0000 Subject: [PATCH 05/26] Support for table and column comments --- databend_sqlalchemy/databend_dialect.py | 137 +++++++++++++++++++++++- databend_sqlalchemy/requirements.py | 12 +++ 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index c7d1c0a..0a71680 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -30,10 +30,11 @@ import datetime from types import NoneType +import sqlalchemy.engine.reflection import sqlalchemy.types as sqltypes from typing import Any, Dict, Optional, Union from sqlalchemy import util as sa_util -from sqlalchemy.engine import reflection +from sqlalchemy.engine import reflection, ObjectScope, ObjectKind from sqlalchemy.sql import ( compiler, text, @@ -44,6 +45,7 @@ Subquery, ) from sqlalchemy.dialects.postgresql.base import PGCompiler, PGIdentifierPreparer +from sqlalchemy import Table, MetaData, Column from sqlalchemy.types import ( BIGINT, INTEGER, @@ -1224,6 +1226,12 @@ def post_create_table(self, table): if engine is not None: table_opts.append(f" ENGINE={engine}") + if table.comment is not None: + comment = self.sql_compiler.render_literal_value( + table.comment, sqltypes.String() + ) + table_opts.append(f" COMMENT={comment}") + cluster_keys = db_opts.get("cluster_by") if cluster_keys is not None: if isinstance(cluster_keys, str): @@ -1245,6 +1253,37 @@ def post_create_table(self, table): return " ".join(table_opts) + def get_column_specification(self, column, **kwargs): + colspec = super().get_column_specification(column, **kwargs) + comment = column.comment + if comment is not None: + literal = self.sql_compiler.render_literal_value( + comment, sqltypes.String() + ) + colspec += " COMMENT " + literal + + return colspec + + def visit_set_table_comment(self, create, **kw): + return "ALTER TABLE %s COMMENT = %s" % ( + self.preparer.format_table(create.element), + self.sql_compiler.render_literal_value( + create.element.comment, sqltypes.String() + ), + ) + + def visit_drop_table_comment(self, create, **kw): + return "ALTER TABLE %s COMMENT = ''" % ( + self.preparer.format_table(create.element) + ) + + def visit_set_column_comment(self, create, **kw): + return "ALTER TABLE %s MODIFY %s %s" % ( + self.preparer.format_table(create.element.table), + self.preparer.format_column(create.element), + self.get_column_specification(create.element), + ) + class DatabendDialect(default.DefaultDialect): name = "databend" @@ -1369,7 +1408,7 @@ def has_table(self, connection, table_name, schema=None, **kw): def get_columns(self, connection, table_name, schema=None, **kw): query = text( """ - select column_name, column_type, is_nullable + select column_name, column_type, is_nullable, nullif(column_comment, '') from information_schema.columns where table_name = :table_name and table_schema = :schema_name @@ -1390,6 +1429,7 @@ def get_columns(self, connection, table_name, schema=None, **kw): "type": self._get_column_type(row[1]), "nullable": get_is_nullable(row[2]), "default": None, + "comment": row[3], } for row in result ] @@ -1469,6 +1509,23 @@ def get_table_names(self, connection, schema=None, **kw): result = connection.execute(query, dict(schema_name=schema)) return [row[0] for row in result] + @reflection.cache + def get_temp_table_names(self, connection, schema=None, **kw): + table_name_query = """ + select name + from system.temporary_tables + where database = :schema_name + """ + query = text(table_name_query).bindparams( + bindparam("schema_name", type_=sqltypes.Unicode) + ) + if schema is None: + schema = self.default_schema_name + + result = connection.execute(query, dict(schema_name=schema)) + return [row[0] for row in result] + + @reflection.cache def get_view_names(self, connection, schema=None, **kw): view_name_query = """ @@ -1563,6 +1620,82 @@ def get_table_options(self, connection, table_name, schema=None, **kw): return options + @reflection.cache + def get_table_comment(self, connection, table_name, schema, **kw): + query_text = """ + SELECT comment + FROM system.tables + WHERE database = :schema_name + and name = :table_name + """ + query = text(query_text).bindparams( + bindparam("table_name", type_=sqltypes.Unicode), + bindparam("schema_name", type_=sqltypes.Unicode), + ) + if schema is None: + schema = self.default_schema_name + + result = connection.execute( + query, dict(table_name=table_name, schema_name=schema) + ).one_or_none() + if not result: + raise NoSuchTableError( + f"{self.identifier_preparer.quote_identifier(schema)}." + f"{self.identifier_preparer.quote_identifier(table_name)}" + ) + return {'text': result[0]} if result[0] else reflection.ReflectionDefaults.table_comment() + + def _prepare_filter_names(self, filter_names): + if filter_names: + fn = [name for name in filter_names] + return True, {"filter_names": fn} + else: + return False, {} + + def get_multi_table_comment( + self, connection, schema, filter_names, scope, kind, **kw + ): + meta = MetaData() + all_tab_comments=Table( + "tables", + meta, + Column("database", VARCHAR, nullable=False), + Column("name", VARCHAR, nullable=False), + Column("comment", VARCHAR), + Column("table_type", VARCHAR), + schema='system', + ).alias("a_tab_comments") + + + has_filter_names, params = self._prepare_filter_names(filter_names) + owner = schema or self.default_schema_name + + table_types = set() + if ObjectKind.TABLE in kind: + table_types.add('BASE TABLE') + if ObjectKind.VIEW in kind: + table_types.add('VIEW') + + query = select( + all_tab_comments.c.name, all_tab_comments.c.comment + ).where( + all_tab_comments.c.database == owner, + all_tab_comments.c.table_type.in_(table_types), + sqlalchemy.true() if ObjectScope.DEFAULT in scope else sqlalchemy.false(), + ) + if has_filter_names: + query = query.where(all_tab_comments.c.name.in_(bindparam("filter_names"))) + + result = connection.execute(query, params) + default_comment = reflection.ReflectionDefaults.table_comment + return ( + ( + (schema, table), + {"text": comment} if comment else default_comment(), + ) + for table, comment in result + ) + def do_rollback(self, dbapi_connection): # No transactions pass diff --git a/databend_sqlalchemy/requirements.py b/databend_sqlalchemy/requirements.py index dea445e..acee519 100644 --- a/databend_sqlalchemy/requirements.py +++ b/databend_sqlalchemy/requirements.py @@ -34,6 +34,18 @@ def binary_literals(self): """ return exclusions.closed() # Currently no binary type in Databend + @property + def comment_reflection(self): + """Indicates if the database support table comment reflection""" + return exclusions.open() + + @property + def comment_reflection_full_unicode(self): + """Indicates if the database support table comment reflection in the + full unicode range, including emoji etc. + """ + return exclusions.open() + @property def temporary_tables(self): """target database supports temporary tables""" From 1bb2887162bfc79cb73ae05f2f607003d253b60b Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Tue, 18 Mar 2025 13:22:28 +0000 Subject: [PATCH 06/26] Include CTE test now bug is fixed --- tests/test_sqlalchemy.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index a671210..6b9625e 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -3,7 +3,6 @@ from sqlalchemy.testing.suite import * from sqlalchemy.testing.suite import ComponentReflectionTestExtra as _ComponentReflectionTestExtra -from sqlalchemy.testing.suite import ComponentReflectionTest as _ComponentReflectionTest from sqlalchemy.testing.suite import DeprecatedCompoundSelectTest as _DeprecatedCompoundSelectTest from sqlalchemy.testing.suite import BooleanTest as _BooleanTest from sqlalchemy.testing.suite import BinaryTest as _BinaryTest @@ -24,12 +23,6 @@ from sqlalchemy.testing import config, eq_ -class ComponentReflectionTest(_ComponentReflectionTest): - @testing.requires.index_reflection - def test_get_indexes(self): - pass - - class ComponentReflectionTestExtra(_ComponentReflectionTestExtra): @testing.requires.table_reflection @@ -289,6 +282,7 @@ class EnumTest(_EnumTest): def test_round_trip_executemany(self, connection): pass + class CTETest(_CTETest): @classmethod def define_tables(cls, metadata): @@ -297,7 +291,7 @@ def define_tables(cls, metadata): metadata, Column("id", Integer, primary_key=True), Column("data", String(50)), - Column("parent_id", Integer), + Column("parent_id", Integer), # removed use of foreign key to get test to work ) Table( @@ -308,9 +302,6 @@ def define_tables(cls, metadata): Column("parent_id", Integer), ) - @testing.skip("databend") # Skipped because of bug in Databend https://github.com/databendlabs/databend/issues/17432 - def test_select_recursive_round_trip(self, connection): - pass class JSONTest(_JSONTest): @classmethod From d93a78c578004d89b842d5ab7b854bc9e51511ac Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Tue, 18 Mar 2025 14:42:53 +0000 Subject: [PATCH 07/26] Run against nightly --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dbbea42..2edc747 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest services: databend: - image: datafuselabs/databend + image: datafuselabs/databend:nightly env: QUERY_DEFAULT_USER: databend QUERY_DEFAULT_PASSWORD: databend From 5e6f8bfb7d2413978ee118e2a1d654ddd74af40b Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Tue, 18 Mar 2025 15:02:27 +0000 Subject: [PATCH 08/26] Update pipenv --- Pipfile.lock | 190 +++++++++++++++++++++++++-------------------------- setup.cfg | 4 +- 2 files changed, 97 insertions(+), 97 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 6032a9a..f408bc7 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -18,14 +18,14 @@ "default": { "databend-driver": { "hashes": [ - "sha256:25afa9778f0e261a819c536a72d28864a4ed2ad9b7d60db8e1c374d06c3fb6a7", - "sha256:8725e813ae144890d05080f8ba9312f71d9ecbec6050894464524f374400960a", - "sha256:8969388449c7a2caa2ab3ab6f321dd1a7c98d701b54123a10638c784c1eb46c0", - "sha256:b2d0d22d18b02f40869526d1797bc1b34292b07bad6c50f189d1cecfbb6ca169", - "sha256:c64214ce0af7fcc7b4d4c4dd9ac27a7d0e4d2edd6e57cd9c4debb06cf21ab9a6" + "sha256:1072e8e23440571e1ed8251ab5dc951e6e5740a5803b4fe4dc5489dc267a30c0", + "sha256:1522220653a43581fef8446605a3028029fa44869da6a0ee04d4ee3bbbc6ed0f", + "sha256:65f57989a9a5da821fe1f092d641701ddec4f97c588e20f8e03bfe0b2483f362", + "sha256:663b968a2c3146d5b4ac2e6b3b9ef670d159ec7849e353977029f9f7ad479dfb", + "sha256:bbf06205aee8fc4ae35e0d1a5c0e673cb1b14aa2613ded572dc1320404b9aa2d" ], - "markers": "python_version < '3.14' and python_version >= '3.7'", - "version": "==0.25.0" + "markers": "python_version < '3.14' and python_version >= '3.8'", + "version": "==0.26.2" }, "databend-sqlalchemy": { "editable": true, @@ -108,71 +108,71 @@ "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79", "sha256:f6ff3b14f2df4c41660a7dec01045a045653998784bf8cfcb5a525bdffffbc8f" ], - "markers": "python_version < '3.13' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))", + "markers": "python_version < '3.14' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))", "version": "==3.1.1" }, "sqlalchemy": { "hashes": [ - "sha256:03e08af7a5f9386a43919eda9de33ffda16b44eb11f3b313e6822243770e9763", - "sha256:0572f4bd6f94752167adfd7c1bed84f4b240ee6203a95e05d1e208d488d0d436", - "sha256:07b441f7d03b9a66299ce7ccf3ef2900abc81c0db434f42a5694a37bd73870f2", - "sha256:1bc330d9d29c7f06f003ab10e1eaced295e87940405afe1b110f2eb93a233588", - "sha256:1e0d612a17581b6616ff03c8e3d5eff7452f34655c901f75d62bd86449d9750e", - "sha256:23623166bfefe1487d81b698c423f8678e80df8b54614c2bf4b4cfcd7c711959", - "sha256:2519f3a5d0517fc159afab1015e54bb81b4406c278749779be57a569d8d1bb0d", - "sha256:28120ef39c92c2dd60f2721af9328479516844c6b550b077ca450c7d7dc68575", - "sha256:37350015056a553e442ff672c2d20e6f4b6d0b2495691fa239d8aa18bb3bc908", - "sha256:39769a115f730d683b0eb7b694db9789267bcd027326cccc3125e862eb03bfd8", - "sha256:3c01117dd36800f2ecaa238c65365b7b16497adc1522bf84906e5710ee9ba0e8", - "sha256:3d6718667da04294d7df1670d70eeddd414f313738d20a6f1d1f379e3139a545", - "sha256:3dbb986bad3ed5ceaf090200eba750b5245150bd97d3e67343a3cfed06feecf7", - "sha256:4557e1f11c5f653ebfdd924f3f9d5ebfc718283b0b9beebaa5dd6b77ec290971", - "sha256:46331b00096a6db1fdc052d55b101dbbfc99155a548e20a0e4a8e5e4d1362855", - "sha256:4a121d62ebe7d26fec9155f83f8be5189ef1405f5973ea4874a26fab9f1e262c", - "sha256:4f5e9cd989b45b73bd359f693b935364f7e1f79486e29015813c338450aa5a71", - "sha256:50aae840ebbd6cdd41af1c14590e5741665e5272d2fee999306673a1bb1fdb4d", - "sha256:59b1ee96617135f6e1d6f275bbe988f419c5178016f3d41d3c0abb0c819f75bb", - "sha256:59b8f3adb3971929a3e660337f5dacc5942c2cdb760afcabb2614ffbda9f9f72", - "sha256:66bffbad8d6271bb1cc2f9a4ea4f86f80fe5e2e3e501a5ae2a3dc6a76e604e6f", - "sha256:69f93723edbca7342624d09f6704e7126b152eaed3cdbb634cb657a54332a3c5", - "sha256:6a440293d802d3011028e14e4226da1434b373cbaf4a4bbb63f845761a708346", - "sha256:72c28b84b174ce8af8504ca28ae9347d317f9dba3999e5981a3cd441f3712e24", - "sha256:79d2e78abc26d871875b419e1fd3c0bca31a1cb0043277d0d850014599626c2e", - "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5", - "sha256:8318f4776c85abc3f40ab185e388bee7a6ea99e7fa3a30686580b209eaa35c08", - "sha256:8958b10490125124463095bbdadda5aa22ec799f91958e410438ad6c97a7b793", - "sha256:8c78ac40bde930c60e0f78b3cd184c580f89456dd87fc08f9e3ee3ce8765ce88", - "sha256:90812a8933df713fdf748b355527e3af257a11e415b613dd794512461eb8a686", - "sha256:9bc633f4ee4b4c46e7adcb3a9b5ec083bf1d9a97c1d3854b92749d935de40b9b", - "sha256:9e46ed38affdfc95d2c958de328d037d87801cfcbea6d421000859e9789e61c2", - "sha256:9fe53b404f24789b5ea9003fc25b9a3988feddebd7e7b369c8fac27ad6f52f28", - "sha256:a4e46a888b54be23d03a89be510f24a7652fe6ff660787b96cd0e57a4ebcb46d", - "sha256:a86bfab2ef46d63300c0f06936bd6e6c0105faa11d509083ba8f2f9d237fb5b5", - "sha256:ac9dfa18ff2a67b09b372d5db8743c27966abf0e5344c555d86cc7199f7ad83a", - "sha256:af148a33ff0349f53512a049c6406923e4e02bf2f26c5fb285f143faf4f0e46a", - "sha256:b11d0cfdd2b095e7b0686cf5fabeb9c67fae5b06d265d8180715b8cfa86522e3", - "sha256:b2985c0b06e989c043f1dc09d4fe89e1616aadd35392aea2844f0458a989eacf", - "sha256:b544ad1935a8541d177cb402948b94e871067656b3a0b9e91dbec136b06a2ff5", - "sha256:b5cc79df7f4bc3d11e4b542596c03826063092611e481fcf1c9dfee3c94355ef", - "sha256:b817d41d692bf286abc181f8af476c4fbef3fd05e798777492618378448ee689", - "sha256:b81ee3d84803fd42d0b154cb6892ae57ea6b7c55d8359a02379965706c7efe6c", - "sha256:be9812b766cad94a25bc63bec11f88c4ad3629a0cec1cd5d4ba48dc23860486b", - "sha256:c245b1fbade9c35e5bd3b64270ab49ce990369018289ecfde3f9c318411aaa07", - "sha256:c3f3631693003d8e585d4200730616b78fafd5a01ef8b698f6967da5c605b3fa", - "sha256:c4ae3005ed83f5967f961fd091f2f8c5329161f69ce8480aa8168b2d7fe37f06", - "sha256:c54a1e53a0c308a8e8a7dffb59097bff7facda27c70c286f005327f21b2bd6b1", - "sha256:d0ddd9db6e59c44875211bc4c7953a9f6638b937b0a88ae6d09eb46cced54eff", - "sha256:dc022184d3e5cacc9579e41805a681187650e170eb2fd70e28b86192a479dcaa", - "sha256:e32092c47011d113dc01ab3e1d3ce9f006a47223b18422c5c0d150af13a00687", - "sha256:f7b64e6ec3f02c35647be6b4851008b26cff592a95ecb13b6788a54ef80bbdd4", - "sha256:f942a799516184c855e1a32fbc7b29d7e571b52612647866d4ec1c3242578fcb", - "sha256:f9511d8dd4a6e9271d07d150fb2f81874a3c8c95e11ff9af3a2dfc35fe42ee44", - "sha256:fd3a55deef00f689ce931d4d1b23fa9f04c880a48ee97af488fd215cf24e2a6c", - "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e", - "sha256:fdf3386a801ea5aba17c6410dd1dc8d39cf454ca2565541b5ac42a84e1e28f53" + "sha256:018ee97c558b499b58935c5a152aeabf6d36b3d55d91656abeb6d93d663c0c4c", + "sha256:01da15490c9df352fbc29859d3c7ba9cd1377791faeeb47c100832004c99472c", + "sha256:04545042969833cb92e13b0a3019549d284fd2423f318b6ba10e7aa687690a3c", + "sha256:06205eb98cb3dd52133ca6818bf5542397f1dd1b69f7ea28aa84413897380b06", + "sha256:08cf721bbd4391a0e765fe0fe8816e81d9f43cece54fdb5ac465c56efafecb3d", + "sha256:0d7e3866eb52d914aea50c9be74184a0feb86f9af8aaaa4daefe52b69378db0b", + "sha256:125a7763b263218a80759ad9ae2f3610aaf2c2fbbd78fff088d584edf81f3782", + "sha256:23c5aa33c01bd898f879db158537d7e7568b503b15aad60ea0c8da8109adf3e7", + "sha256:2600a50d590c22d99c424c394236899ba72f849a02b10e65b4c70149606408b5", + "sha256:2d7332868ce891eda48896131991f7f2be572d65b41a4050957242f8e935d5d7", + "sha256:2ed107331d188a286611cea9022de0afc437dd2d3c168e368169f27aa0f61338", + "sha256:3395e7ed89c6d264d38bea3bfb22ffe868f906a7985d03546ec7dc30221ea980", + "sha256:344cd1ec2b3c6bdd5dfde7ba7e3b879e0f8dd44181f16b895940be9b842fd2b6", + "sha256:34d5c49f18778a3665d707e6286545a30339ad545950773d43977e504815fa70", + "sha256:35e72518615aa5384ef4fae828e3af1b43102458b74a8c481f69af8abf7e802a", + "sha256:3eb14ba1a9d07c88669b7faf8f589be67871d6409305e73e036321d89f1d904e", + "sha256:412c6c126369ddae171c13987b38df5122cb92015cba6f9ee1193b867f3f1530", + "sha256:4600c7a659d381146e1160235918826c50c80994e07c5b26946a3e7ec6c99249", + "sha256:463ecfb907b256e94bfe7bcb31a6d8c7bc96eca7cbe39803e448a58bb9fcad02", + "sha256:4a06e6c8e31c98ddc770734c63903e39f1947c9e3e5e4bef515c5491b7737dde", + "sha256:4b2de1523d46e7016afc7e42db239bd41f2163316935de7c84d0e19af7e69538", + "sha256:4dabd775fd66cf17f31f8625fc0e4cfc5765f7982f94dc09b9e5868182cb71c0", + "sha256:4eff9c270afd23e2746e921e80182872058a7a592017b2713f33f96cc5f82e32", + "sha256:52607d0ebea43cf214e2ee84a6a76bc774176f97c5a774ce33277514875a718e", + "sha256:533e0f66c32093a987a30df3ad6ed21170db9d581d0b38e71396c49718fbb1ca", + "sha256:5493a8120d6fc185f60e7254fc056a6742f1db68c0f849cfc9ab46163c21df47", + "sha256:5d2d1fe548def3267b4c70a8568f108d1fed7cbbeccb9cc166e05af2abc25c22", + "sha256:5dfbc543578058c340360f851ddcecd7a1e26b0d9b5b69259b526da9edfa8875", + "sha256:66a40003bc244e4ad86b72abb9965d304726d05a939e8c09ce844d27af9e6d37", + "sha256:67de057fbcb04a066171bd9ee6bcb58738d89378ee3cabff0bffbf343ae1c787", + "sha256:6827f8c1b2f13f1420545bd6d5b3f9e0b85fe750388425be53d23c760dcf176b", + "sha256:6b35e07f1d57b79b86a7de8ecdcefb78485dab9851b9638c2c793c50203b2ae8", + "sha256:7399d45b62d755e9ebba94eb89437f80512c08edde8c63716552a3aade61eb42", + "sha256:788b6ff6728072b313802be13e88113c33696a9a1f2f6d634a97c20f7ef5ccce", + "sha256:78f1b79132a69fe8bd6b5d91ef433c8eb40688ba782b26f8c9f3d2d9ca23626f", + "sha256:79f4f502125a41b1b3b34449e747a6abfd52a709d539ea7769101696bdca6716", + "sha256:7a8517b6d4005facdbd7eb4e8cf54797dbca100a7df459fdaff4c5123265c1cd", + "sha256:7bd5c5ee1448b6408734eaa29c0d820d061ae18cb17232ce37848376dcfa3e92", + "sha256:7f5243357e6da9a90c56282f64b50d29cba2ee1f745381174caacc50d501b109", + "sha256:805cb481474e111ee3687c9047c5f3286e62496f09c0e82e8853338aaaa348f8", + "sha256:871f55e478b5a648c08dd24af44345406d0e636ffe021d64c9b57a4a11518304", + "sha256:87a1ce1f5e5dc4b6f4e0aac34e7bb535cb23bd4f5d9c799ed1633b65c2bcad8c", + "sha256:8a10ca7f8a1ea0fd5630f02feb055b0f5cdfcd07bb3715fc1b6f8cb72bf114e4", + "sha256:995c2bacdddcb640c2ca558e6760383dcdd68830160af92b5c6e6928ffd259b4", + "sha256:9f03143f8f851dd8de6b0c10784363712058f38209e926723c80654c1b40327a", + "sha256:a1c6b0a5e3e326a466d809b651c63f278b1256146a377a528b6938a279da334f", + "sha256:a28f9c238f1e143ff42ab3ba27990dfb964e5d413c0eb001b88794c5c4a528a9", + "sha256:b2cf5b5ddb69142511d5559c427ff00ec8c0919a1e6c09486e9c32636ea2b9dd", + "sha256:b761a6847f96fdc2d002e29e9e9ac2439c13b919adfd64e8ef49e75f6355c548", + "sha256:bf555f3e25ac3a70c67807b2949bfe15f377a40df84b71ab2c58d8593a1e036e", + "sha256:c08a972cbac2a14810463aec3a47ff218bb00c1a607e6689b531a7c589c50723", + "sha256:c457a38351fb6234781d054260c60e531047e4d07beca1889b558ff73dc2014b", + "sha256:c4c433f78c2908ae352848f56589c02b982d0e741b7905228fad628999799de4", + "sha256:d9f119e7736967c0ea03aff91ac7d04555ee038caf89bb855d93bbd04ae85b41", + "sha256:e6b0a1c7ed54a5361aaebb910c1fa864bae34273662bb4ff788a527eafd6e14d", + "sha256:f2bcb085faffcacf9319b1b1445a7e1cfdc6fb46c03f2dce7bc2d9a4b3c1cdc5", + "sha256:fe193d3ae297c423e0e567e240b4324d6b6c280a048e64c77a3ea6886cc2aa87" ], "markers": "python_version >= '3.7'", - "version": "==2.0.36" + "version": "==2.0.39" }, "typing-extensions": { "hashes": [ @@ -186,32 +186,32 @@ "develop": { "black": { "hashes": [ - "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f", - "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd", - "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea", - "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981", - "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b", - "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7", - "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8", - "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175", - "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d", - "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392", - "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad", - "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f", - "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f", - "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b", - "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875", - "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3", - "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800", - "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65", - "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2", - "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812", - "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50", - "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e" + "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", + "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7", + "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da", + "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", + "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", + "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", + "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", + "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", + "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32", + "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", + "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", + "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", + "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0", + "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", + "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", + "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", + "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355", + "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", + "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e", + "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9", + "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", + "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==24.10.0" + "version": "==25.1.0" }, "click": { "hashes": [ @@ -231,12 +231,12 @@ }, "flake8": { "hashes": [ - "sha256:049d058491e228e03e67b390f311bbf88fce2dbaa8fa673e7aea87b7198b8d38", - "sha256:597477df7860daa5aa0fdd84bf5208a043ab96b8e96ab708770ae0364dd03213" + "sha256:1cbc62e65536f65e6d754dfe6f1bada7f5cf392d6f5db3c2b85892466c3e7c1a", + "sha256:c586ffd0b41540951ae41af572e6790dbd49fc12b3aa2541685d253d9bd504bd" ], "index": "pypi", "markers": "python_full_version >= '3.8.1'", - "version": "==7.1.1" + "version": "==7.1.2" }, "iniconfig": { "hashes": [ @@ -312,12 +312,12 @@ }, "pytest": { "hashes": [ - "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", - "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761" + "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", + "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==8.3.4" + "version": "==8.3.5" }, "pytest-xdist": { "hashes": [ diff --git a/setup.cfg b/setup.cfg index 496e9dd..613cd7e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,9 +27,9 @@ project_urls = [options] packages = find: install_requires = - databend-driver>=0.25.4 + databend-driver>=0.26.2 sqlalchemy>=1.4 -python_requires = >=3.7 +python_requires = >=3.8 package_dir = = . From 1320d831e22bf14f9146c4a772c132b14f1d05da Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Tue, 18 Mar 2025 15:19:28 +0000 Subject: [PATCH 09/26] Work in SQLAlchemy 1.4 --- databend_sqlalchemy/databend_dialect.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index 0a71680..08e3ae5 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -34,7 +34,7 @@ import sqlalchemy.types as sqltypes from typing import Any, Dict, Optional, Union from sqlalchemy import util as sa_util -from sqlalchemy.engine import reflection, ObjectScope, ObjectKind +from sqlalchemy.engine import reflection from sqlalchemy.sql import ( compiler, text, @@ -1643,7 +1643,7 @@ def get_table_comment(self, connection, table_name, schema, **kw): f"{self.identifier_preparer.quote_identifier(schema)}." f"{self.identifier_preparer.quote_identifier(table_name)}" ) - return {'text': result[0]} if result[0] else reflection.ReflectionDefaults.table_comment() + return {'text': result[0]} if result[0] else reflection.ReflectionDefaults.table_comment() if hasattr(reflection, 'ReflectionDefault') else {'text': None} def _prepare_filter_names(self, filter_names): if filter_names: @@ -1671,9 +1671,9 @@ def get_multi_table_comment( owner = schema or self.default_schema_name table_types = set() - if ObjectKind.TABLE in kind: + if reflection.ObjectKind.TABLE in kind: table_types.add('BASE TABLE') - if ObjectKind.VIEW in kind: + if reflection.ObjectKind.VIEW in kind: table_types.add('VIEW') query = select( @@ -1681,7 +1681,7 @@ def get_multi_table_comment( ).where( all_tab_comments.c.database == owner, all_tab_comments.c.table_type.in_(table_types), - sqlalchemy.true() if ObjectScope.DEFAULT in scope else sqlalchemy.false(), + sqlalchemy.true() if reflection.ObjectScope.DEFAULT in scope else sqlalchemy.false(), ) if has_filter_names: query = query.where(all_tab_comments.c.name.in_(bindparam("filter_names"))) From dbca246be75b4a5b0ed67f976b234c03e9773c0e Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Mon, 31 Mar 2025 11:30:07 +0100 Subject: [PATCH 10/26] Handle JSON params --- databend_sqlalchemy/connector.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/databend_sqlalchemy/connector.py b/databend_sqlalchemy/connector.py index e303099..e9de5fb 100644 --- a/databend_sqlalchemy/connector.py +++ b/databend_sqlalchemy/connector.py @@ -5,6 +5,7 @@ # Many docstrings in this file are based on the PEP, which is in the public domain. import decimal import re +import json from datetime import datetime, date, time, timedelta from databend_sqlalchemy.errors import Error, NotSupportedError @@ -56,6 +57,8 @@ def escape_item(self, item): return self.escape_string(item.strftime("%Y-%m-%d %H:%M:%S.%f")) + "::timestamp" elif isinstance(item, date): return self.escape_string(item.strftime("%Y-%m-%d")) + "::date" + elif isinstance(item, dict): + return self.escape_string(f'parse_json({json.dumps(item)})') else: return self.escape_string(item) From df75e37a907cc498dd9211c565b86c34ad7c6efa Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Mon, 31 Mar 2025 15:25:50 +0100 Subject: [PATCH 11/26] Update Reserved Words --- databend_sqlalchemy/databend_dialect.py | 44 ++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index 08e3ae5..ecbc078 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -80,6 +80,7 @@ "Comment", "CommentBlock", "Ident", + "IdentVariable", "ColumnPosition", "LiteralString", "LiteralCodeString", @@ -119,6 +120,7 @@ "Caret", "LBrace", "RBrace", + "Dollar", "RArrow", "LongRArrow", "FatRArrow", @@ -187,8 +189,10 @@ "BY", "BROTLI", "BZ2", + "BLOCK", "CALL", "CASE", + "CASE_SENSITIVE", "CAST", "CATALOG", "CATALOGS", @@ -205,6 +209,7 @@ "CONTINUE", "CHAR", "COLUMN", + "COLUMN_MATCH_MODE", "COLUMNS", "CHARACTER", "CONFLICT", @@ -223,6 +228,7 @@ "DATA", "DATE", "DATE_ADD", + "DATE_DIFF", "DATE_PART", "DATE_SUB", "DATE_TRUNC", @@ -266,6 +272,7 @@ "ENGINE", "ENGINES", "EPOCH", + "MICROSECOND", "ERROR_ON_COLUMN_COUNT_MISMATCH", "ESCAPE", "EXCEPTION_BACKTRACE", @@ -295,6 +302,7 @@ "FORMAT_NAME", "FORMATS", "FRAGMENTS", + "FRIDAY", "FROM", "FULL", "FUNCTION", @@ -305,12 +313,14 @@ "GET", "GENERATED", "GEOMETRY", + "GEOGRAPHY", "GLOBAL", "GRAPH", "GROUP", "GZIP", "HAVING", "HIGH", + "HILBERT", "HISTORY", "HIVE", "HOUR", @@ -321,6 +331,7 @@ "IDENTIFIER", "IF", "IN", + "INCLUDE_QUERY_ID", "INCREMENTAL", "INDEX", "INFORMATION", @@ -336,6 +347,9 @@ "INTERVAL", "INTO", "INVERTED", + "PREVIOUS_DAY", + "PROCEDURE", + "PROCEDURES", "IMMEDIATE", "IS", "ISODOW", @@ -346,7 +360,9 @@ "JWT", "KEY", "KILL", + "LAST_DAY", "LATERAL", + "LINEAR", "LOCATION_PREFIX", "LOCKS", "LOGICAL", @@ -378,6 +394,7 @@ "MODIFY", "MATERIALIZED", "MUST_CHANGE_PASSWORD", + "NEXT_DAY", "NON_DISPLAY", "NATURAL", "NETWORK", @@ -430,6 +447,7 @@ "PRIORITY", "PURGE", "PUT", + "PARTIAL", "QUARTER", "QUERY", "QUOTE", @@ -445,6 +463,7 @@ "REPLACE", "RETURN_FAILED_ONLY", "REVERSE", + "SAMPLE", "MERGE", "MATCHED", "MISSING_FIELD_AS", @@ -475,6 +494,8 @@ "RLIKE", "RAW", "OPTIMIZED", + "DECORRELATED", + "SATURDAY", "SCHEMA", "SCHEMAS", "SECOND", @@ -487,6 +508,7 @@ "UNSET", "SESSION", "SETTINGS", + "VARIABLES", "STAGES", "STATISTIC", "SUMMARY", @@ -497,6 +519,7 @@ "SINGLE", "SIZE_LIMIT", "MAX_FILES", + "MONDAY", "SKIP_HEADER", "SMALLINT", "SNAPPY", @@ -505,6 +528,7 @@ "STAGE", "SYNTAX", "USAGE", + "USE_RAW_PATH", "UPDATE", "UPLOAD", "SEQUENCE", @@ -534,6 +558,7 @@ "TENANTS", "TENANT", "THEN", + "THURSDAY", "TIMESTAMP", "TIMEZONE_HOUR", "TIMEZONE_MINUTE", @@ -548,6 +573,7 @@ "TRUNCATE", "TRY_CAST", "TSV", + "TUESDAY", "TUPLE", "TYPE", "UNBOUNDED", @@ -567,11 +593,12 @@ "USING", "VACUUM", "VALUES", - "VALIDATION_MODE", "VARBINARY", "VARCHAR", "VARIANT", + "VARIABLE", "VERBOSE", + "GRAPHICAL", "VIEW", "VIEWS", "VIRTUAL", @@ -605,6 +632,7 @@ "UDF", "HANDLER", "LANGUAGE", + "STATE", "TASK", "TASKS", "TOP", @@ -620,6 +648,7 @@ "INTEGRATION", "ENABLED", "WEBHOOK", + "WEDNESDAY", "ERROR_INTEGRATION", "AUTO_INGEST", "PIPE_EXECUTION_PAUSED", @@ -632,8 +661,21 @@ "ABORT", "ROLLBACK", "TEMPORARY", + "TEMP", "SECONDS", "DAYS", + "DICTIONARY", + "DICTIONARIES", + "PRIMARY", + "SOURCE", + "SQL", + "SUNDAY", + "WAREHOUSES", + "INSPECT", + "ASSIGN", + "NODES", + "UNASSIGN", + "ONLINE", } From 032081f74b5a1fc41be214569798d32ada1dfd8e Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Wed, 14 May 2025 12:04:26 +0100 Subject: [PATCH 12/26] Changes to File Formats --- databend_sqlalchemy/__init__.py | 1 + databend_sqlalchemy/dml.py | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/databend_sqlalchemy/__init__.py b/databend_sqlalchemy/__init__.py index f081fb8..b6c2a03 100644 --- a/databend_sqlalchemy/__init__.py +++ b/databend_sqlalchemy/__init__.py @@ -19,6 +19,7 @@ NDJSONFormat, ParquetFormat, ORCFormat, + AVROFormat, AmazonS3, AzureBlobStorage, GoogleCloudStorage, diff --git a/databend_sqlalchemy/dml.py b/databend_sqlalchemy/dml.py index 18f1827..6e5aae4 100644 --- a/databend_sqlalchemy/dml.py +++ b/databend_sqlalchemy/dml.py @@ -250,6 +250,7 @@ class Compression(Enum): DEFLATE = "DEFLATE" RAW_DEFLATE = "RAW_DEFLATE" XZ = "XZ" + SNAPPY = "SNAPPY" class CopyFormat(ClauseElement): @@ -401,6 +402,30 @@ def __init__( class ParquetFormat(CopyFormat): format_type = "PARQUET" + def __init__( + self, + *, + missing_field_as: str = None, + compression: Compression = None, + ): + super().__init__() + if missing_field_as: + if missing_field_as not in ["ERROR", "FIELD_DEFAULT"]: + raise TypeError( + 'Missing Field As should be "ERROR" or "FIELD_DEFAULT".' + ) + self.options["MISSING_FIELD_AS"] = f"{missing_field_as}" + if compression: + if compression not in [Compression.ZSTD, Compression.SNAPPY]: + raise TypeError( + 'Compression should be None, ZStd, or Snappy.' + ) + self.options["COMPRESSION"] = compression.value + + +class AVROFormat(CopyFormat): + format_type = "AVRO" + def __init__( self, *, @@ -418,6 +443,19 @@ def __init__( class ORCFormat(CopyFormat): format_type = "ORC" + def __init__( + self, + *, + missing_field_as: str = None, + ): + super().__init__() + if missing_field_as: + if missing_field_as not in ["ERROR", "FIELD_DEFAULT"]: + raise TypeError( + 'Missing Field As should be "ERROR" or "FIELD_DEFAULT".' + ) + self.options["MISSING_FIELD_AS"] = f"{missing_field_as}" + class StageClause(ClauseElement, FromClauseRole): """Stage Clause""" From 6c92f44137b7735b5ebc0271c0bf7fe27d187846 Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Wed, 4 Jun 2025 13:02:52 +0100 Subject: [PATCH 13/26] Fix copyInto files clause --- databend_sqlalchemy/databend_dialect.py | 3 ++- tests/test_copy_into.py | 32 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index ecbc078..516c1a3 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -1040,7 +1040,8 @@ def visit_copy_into(self, copy_into, **kw): result = f"COPY INTO {target}" f" FROM {source}" if hasattr(copy_into, "files") and isinstance(copy_into.files, list): - result += f"FILES = {', '.join([f for f in copy_into.files])}" + quoted_files = [f"'{f}'" for f in copy_into.files] + result += f" FILES = ({', '.join(quoted_files)})" if hasattr(copy_into, "pattern") and copy_into.pattern: result += f" PATTERN = '{copy_into.pattern}'" if not isinstance(copy_into.file_format, NoneType): diff --git a/tests/test_copy_into.py b/tests/test_copy_into.py index 0fd049d..471a787 100644 --- a/tests/test_copy_into.py +++ b/tests/test_copy_into.py @@ -162,6 +162,38 @@ def test_copy_into_table_sub_select_column_clauses(self): checkparams={"1_1": "xyz", "IF_1": "NULL", "IF_2": "NOTNULL"}, ) + def test_copy_into_table_files(self): + m = MetaData() + tbl = Table( + "atable", + m, + Column("id", Integer), + schema="test_schema", + ) + + copy_into = CopyIntoTable( + target=tbl, + from_=GoogleCloudStorage( + uri="gcs://some-bucket/a/path/to/files", + credentials="XYZ", + ), + files=['one','two','three'], + file_format=CSVFormat(), + ) + + self.assert_compile( + copy_into, + ( + "COPY INTO test_schema.atable" + " FROM 'gcs://some-bucket/a/path/to/files' " + "CONNECTION = (" + " ENDPOINT_URL = 'https://storage.googleapis.com' " + " CREDENTIAL = 'XYZ' " + ") FILES = ('one', 'two', 'three')" + " FILE_FORMAT = (TYPE = CSV)" + ), + ) + class CopyIntoResultTest(fixtures.TablesTest): run_create_tables = "each" From c463b77bee2b5a57bc871d2c109061d7fcf22454 Mon Sep 17 00:00:00 2001 From: Pat Buxton Date: Thu, 5 Jun 2025 11:17:23 +0100 Subject: [PATCH 14/26] Fix test - input bytes will differ, table data is random --- tests/test_copy_into.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_copy_into.py b/tests/test_copy_into.py index 471a787..3d193cf 100644 --- a/tests/test_copy_into.py +++ b/tests/test_copy_into.py @@ -236,7 +236,7 @@ def test_copy_into_stage_and_table(self, connection): eq_(r.rowcount, 1000) copy_into_results = r.context.copy_into_location_results() eq_(copy_into_results['rows_unloaded'], 1000) - eq_(copy_into_results['input_bytes'], 16250) + # eq_(copy_into_results['input_bytes'], 16250) # input bytes will differ, the table is random # eq_(copy_into_results['output_bytes'], 4701) # output bytes differs # now copy into table From 1a2fe3ec5f20004ea5a8ec06c72cf261551a4372 Mon Sep 17 00:00:00 2001 From: simozzy Date: Thu, 19 Jun 2025 18:46:04 +0100 Subject: [PATCH 15/26] remove code for Geometry, Geography and structured types. --- databend_sqlalchemy/databend_dialect.py | 41 ++++++++++++++++-- databend_sqlalchemy/types.py | 56 +++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index 516c1a3..1d4cbdf 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -23,7 +23,6 @@ Table("some_table", metadata, ..., databend_transient=True|False) """ - import decimal import re import operator @@ -60,6 +59,17 @@ CHAR, TIMESTAMP, ) + +import sqlalchemy +from sqlalchemy import types as sqltypes +from sqlalchemy.sql.base import Executable + +# Check SQLAlchemy version +if sqlalchemy.__version__.startswith('2.'): + from sqlalchemy.types import DOUBLE +else: + from .types import DOUBLE + from sqlalchemy.engine import ExecutionContext, default from sqlalchemy.exc import DBAPIError, NoSuchTableError @@ -71,7 +81,7 @@ AzureBlobStorage, AmazonS3, ) -from .types import INTERVAL +from .types import INTERVAL, TINYINT, BITMAP RESERVED_WORDS = { "Error", @@ -693,6 +703,7 @@ def __init__(self, key_type, value_type): super(MAP, self).__init__() + class DatabendDate(sqltypes.DATE): __visit_name__ = "DATE" @@ -793,12 +804,19 @@ class DatabendInterval(INTERVAL): render_bind_cast = True +class DatabendBitmap(BITMAP): + render_bind_cast = True + + +class DatabendTinyInt(TINYINT): + render_bind_cast = True + # Type converters ischema_names = { "bigint": BIGINT, "int": INTEGER, "smallint": SMALLINT, - "tinyint": SMALLINT, + "tinyint": DatabendTinyInt, "int64": BIGINT, "int32": INTEGER, "int16": SMALLINT, @@ -813,7 +831,7 @@ class DatabendInterval(INTERVAL): "datetime": DatabendDateTime, "timestamp": DatabendDateTime, "float": FLOAT, - "double": FLOAT, + "double": DOUBLE, "float64": FLOAT, "float32": FLOAT, "string": VARCHAR, @@ -826,8 +844,11 @@ class DatabendInterval(INTERVAL): "binary": BINARY, "time": DatabendTime, "interval": DatabendInterval, + "bitmap": DatabendBitmap } + + # Column spec colspecs = { sqltypes.Interval: DatabendInterval, @@ -1227,6 +1248,18 @@ def visit_TIME(self, type_, **kw): def visit_INTERVAL(self, type, **kw): return "INTERVAL" + def visit_DOUBLE(self, type_, **kw): + return "DOUBLE" + + def visit_TINYINT(self, type_, **kw): + return "TINYINT" + + def visit_FLOAT(self, type_, **kw): + return "FLOAT" + + def visit_BITMAP(self, type_, **kw): + return "BITMAP" + class DatabendDDLCompiler(compiler.DDLCompiler): def visit_primary_key_constraint(self, constraint, **kw): diff --git a/databend_sqlalchemy/types.py b/databend_sqlalchemy/types.py index e4f637d..6c27ba4 100644 --- a/databend_sqlalchemy/types.py +++ b/databend_sqlalchemy/types.py @@ -3,6 +3,7 @@ import datetime as dt from typing import Optional, Type, Any +from sqlalchemy import func from sqlalchemy.engine.interfaces import Dialect from sqlalchemy.sql import sqltypes from sqlalchemy.sql import type_api @@ -73,3 +74,58 @@ def process(value: dt.timedelta) -> str: return f"to_interval('{value.total_seconds()} seconds')" return process + + +class TINYINT(sqltypes.Integer): + __visit_name__ = "TINYINT" + native = True + + +class DOUBLE(sqltypes.Double): + __visit_name__ = "DOUBLE" + native = True + + +class FLOAT(sqltypes.Float): + __visit_name__ = "FLOAT" + native = True + + +# The “CamelCase” types are to the greatest degree possible database agnostic + +# For these datatypes, specific SQLAlchemy dialects provide backend-specific “UPPERCASE” datatypes, for a SQL type that has no analogue on other backends + + +class BITMAP(sqltypes.TypeEngine): + __visit_name__ = "BITMAP" + render_bind_cast = True + + def __init__(self, **kwargs): + super(BITMAP, self).__init__() + + def process_result_value(self, value, dialect): + if value is None: + return None + # Assuming the database returns a string representation of the bitmap + return str(value) + + def bind_expression(self, bindvalue): + return func.to_bitmap(bindvalue, type_=self) + + def column_expression(self, col): + return func.bitmap_to_string(col, type_=sqltypes.String) + + def bind_processor(self, dialect): + def process(value): + if value is None: + return None + return str(value) + return process + + def result_processor(self, dialect, coltype): + def process(value): + if value is None: + return None + return str(value) + return process + From d400c57e8bc5e50f861c481452742ae72f22509e Mon Sep 17 00:00:00 2001 From: simozzy Date: Thu, 19 Jun 2025 18:53:10 +0100 Subject: [PATCH 16/26] Update test.yml --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2edc747..dbbea42 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest services: databend: - image: datafuselabs/databend:nightly + image: datafuselabs/databend env: QUERY_DEFAULT_USER: databend QUERY_DEFAULT_PASSWORD: databend From 41817a480e35df6174de9b82e77fb7123bb48ff1 Mon Sep 17 00:00:00 2001 From: simozzy Date: Tue, 24 Jun 2025 13:30:34 +0100 Subject: [PATCH 17/26] reverted change to test file --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dbbea42..2edc747 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest services: databend: - image: datafuselabs/databend + image: datafuselabs/databend:nightly env: QUERY_DEFAULT_USER: databend QUERY_DEFAULT_PASSWORD: databend From 2c5bce3cff7a5219534b666ca79dc0dbf96f136b Mon Sep 17 00:00:00 2001 From: simozzy Date: Wed, 25 Jun 2025 13:24:41 +0100 Subject: [PATCH 18/26] Added tests for TINYINT and BITMAP --- databend_sqlalchemy/types.py | 16 +++-- tests/test_sqlalchemy.py | 121 ++++++++++++++++++++++++++++++++++- 2 files changed, 127 insertions(+), 10 deletions(-) diff --git a/databend_sqlalchemy/types.py b/databend_sqlalchemy/types.py index 6c27ba4..c230b06 100644 --- a/databend_sqlalchemy/types.py +++ b/databend_sqlalchemy/types.py @@ -81,7 +81,7 @@ class TINYINT(sqltypes.Integer): native = True -class DOUBLE(sqltypes.Double): +class DOUBLE(sqltypes.Float): __visit_name__ = "DOUBLE" native = True @@ -106,19 +106,22 @@ def __init__(self, **kwargs): def process_result_value(self, value, dialect): if value is None: return None - # Assuming the database returns a string representation of the bitmap - return str(value) + # Databend returns bitmaps as strings of comma-separated integers + return set(int(x) for x in value.split(',') if x) def bind_expression(self, bindvalue): return func.to_bitmap(bindvalue, type_=self) def column_expression(self, col): - return func.bitmap_to_string(col, type_=sqltypes.String) + # Convert bitmap to string using a custom function + return func.to_string(col, type_=sqltypes.String) def bind_processor(self, dialect): def process(value): if value is None: return None + if isinstance(value, set): + return ','.join(str(x) for x in sorted(value)) return str(value) return process @@ -126,6 +129,5 @@ def result_processor(self, dialect, coltype): def process(value): if value is None: return None - return str(value) - return process - + return set(int(x) for x in value.split(',') if x) + return process \ No newline at end of file diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 6b9625e..597df43 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -18,9 +18,13 @@ from sqlalchemy.testing.suite import EnumTest as _EnumTest from sqlalchemy.testing.suite import CTETest as _CTETest from sqlalchemy.testing.suite import JSONTest as _JSONTest +from sqlalchemy.testing.suite import IntegerTest as _IntegerTest from sqlalchemy import types as sql_types -from sqlalchemy import testing, select -from sqlalchemy.testing import config, eq_ +from sqlalchemy.testing import config +from sqlalchemy import testing, Table, Column, Integer +from sqlalchemy.testing import eq_, fixtures, assertions + +from databend_sqlalchemy.types import TINYINT, BITMAP class ComponentReflectionTestExtra(_ComponentReflectionTestExtra): @@ -282,7 +286,6 @@ class EnumTest(_EnumTest): def test_round_trip_executemany(self, connection): pass - class CTETest(_CTETest): @classmethod def define_tables(cls, metadata): @@ -318,3 +321,115 @@ def define_tables(cls, metadata): # ToDo - this does not yet work def test_path_typed_comparison(self, datatype, value): pass + + + + +class IntegerTest(_IntegerTest, fixtures.TablesTest): + + @classmethod + def define_tables(cls, metadata): + Table( + "tiny_int_table", + metadata, + Column("id", TINYINT) + ) + + def test_tinyint_write_and_read(self, connection): + tiny_int_table = self.tables.tiny_int_table + + # Insert a value + connection.execute( + tiny_int_table.insert(), + [{"id": 127}] # 127 is typically the maximum value for a signed TINYINT + ) + + # Read the value back + result = connection.execute(select(tiny_int_table.c.id)).scalar() + + # Verify the value + eq_(result, 127) + + # Test with minimum value + connection.execute( + tiny_int_table.insert(), + [{"id": -128}] # -128 is typically the minimum value for a signed TINYINT + ) + + result = connection.execute(select(tiny_int_table.c.id).order_by(tiny_int_table.c.id)).first()[0] + eq_(result, -128) + + def test_tinyint_overflow(self, connection): + tiny_int_table = self.tables.tiny_int_table + + # This should raise an exception as it's outside the TINYINT range + with assertions.expect_raises(Exception): # Replace with specific exception if known + connection.execute( + tiny_int_table.insert(), + [{"id": 128}] # 128 is typically outside the range of a signed TINYINT + ) + + with assertions.expect_raises(Exception): # Replace with specific exception if known + connection.execute( + tiny_int_table.insert(), + [{"id": -129}] # -129 is typically outside the range of a signed TINYINT + ) + +class BitmapTest(fixtures.TablesTest): + + @classmethod + def define_tables(cls, metadata): + Table( + "bitmap_table", + metadata, + Column("id", Integer), + Column("bitmap_data", BITMAP) + ) + + """ + Perform a simple test using Databend's bitmap data type to check + that the bitmap data is correctly inserted and retrieved.' + """ + def test_bitmap_write_and_read(self, connection): + bitmap_table = self.tables.bitmap_table + + # Insert a value + connection.execute( + bitmap_table.insert(), + [{"id": 1, "bitmap_data": '1,2,3'}] + ) + + # Read the value back + result = connection.execute( + select(bitmap_table.c.bitmap_data).where(bitmap_table.c.id == 1) + ).scalar() + + # Verify the value + eq_(result, ('1,2,3')) + + """ + Perform a simple test using one of Databend's bitmap operations to check + that the Bitmap data is correctly manipulated.' + """ + def test_bitmap_operations(self, connection): + bitmap_table = self.tables.bitmap_table + + # Insert two values + connection.execute( + bitmap_table.insert(), + [ + {"id": 1, "bitmap_data": "1,4,5"}, + {"id": 2, "bitmap_data": "4,5"} + ] + ) + + # Perform a bitmap AND operation and convert the result to a string + result = connection.execute( + select(func.to_string(func.bitmap_and( + bitmap_table.c.bitmap_data, + func.to_bitmap("3,4,5") + ))).where(bitmap_table.c.id == 1) + ).scalar() + + # Verify the result + eq_(result, "4,5") From e1160cff445c75964fbf3d63bb16f05fab0f0f9a Mon Sep 17 00:00:00 2001 From: simozzy Date: Wed, 25 Jun 2025 13:52:36 +0100 Subject: [PATCH 19/26] Added tests for DOUBLE --- tests/test_sqlalchemy.py | 63 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 597df43..c09b71f 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -24,7 +24,7 @@ from sqlalchemy import testing, Table, Column, Integer from sqlalchemy.testing import eq_, fixtures, assertions -from databend_sqlalchemy.types import TINYINT, BITMAP +from databend_sqlalchemy.types import TINYINT, BITMAP, DOUBLE class ComponentReflectionTestExtra(_ComponentReflectionTestExtra): @@ -375,6 +375,7 @@ def test_tinyint_overflow(self, connection): [{"id": -129}] # -129 is typically outside the range of a signed TINYINT ) + class BitmapTest(fixtures.TablesTest): @classmethod @@ -433,3 +434,63 @@ def test_bitmap_operations(self, connection): # Verify the result eq_(result, "4,5") + + +class DoubleTest(fixtures.TablesTest): + + @classmethod + def define_tables(cls, metadata): + Table( + "double_table", + metadata, + Column("id", Integer), + Column("double_data", DOUBLE) + ) + + def test_double_write_and_read(self, connection): + double_table = self.tables.double_table + + # Insert a value + connection.execute( + double_table.insert(), + [{"id": 1, "double_data": -1.7976931348623157E+308}] + ) + + connection.execute( + double_table.insert(), + [{"id": 2, "double_data": 1.7976931348623157E+308}] + ) + + # Read the value back + result = connection.execute( + select(double_table.c.double_data).where(double_table.c.id == 1) + ).scalar() + + # Verify the value + eq_(result, -1.7976931348623157E+308) + + # Read the value back + result = connection.execute( + select(double_table.c.double_data).where(double_table.c.id == 2) + ).scalar() + + # Verify the value + eq_(result, 1.7976931348623157E+308) + + + def test_double_overflow(self, connection): + double_table = self.tables.double_table + + # This should raise an exception as it's outside the TINYINT range + with assertions.expect_raises(Exception): # Replace with specific exception if known + connection.execute( + double_table.insert(), + [{"id": 3, "double_data": float('inf')}] + ) + + with assertions.expect_raises(Exception): # Replace with specific exception if known + connection.execute( + double_table.insert(), + [{"id": 3, "double_data": float('-inf')}] + ) + From 9d8bd00838de3b132cc3d095a5d34811586e5bc3 Mon Sep 17 00:00:00 2001 From: simozzy Date: Wed, 25 Jun 2025 13:54:15 +0100 Subject: [PATCH 20/26] Update test.yml --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2edc747..dbbea42 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest services: databend: - image: datafuselabs/databend:nightly + image: datafuselabs/databend env: QUERY_DEFAULT_USER: databend QUERY_DEFAULT_PASSWORD: databend From 1115a37d1899326ec6db3023d79f4954ea368d77 Mon Sep 17 00:00:00 2001 From: simozzy Date: Wed, 25 Jun 2025 13:59:09 +0100 Subject: [PATCH 21/26] Update test_sqlalchemy.py --- tests/test_sqlalchemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 4faad7d..8e01c24 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -480,7 +480,7 @@ def test_double_write_and_read(self, connection): def test_double_overflow(self, connection): double_table = self.tables.double_table - # This should raise an exception as it's outside the TINYINT range + # This should raise an exception as it's outside the DOUBLE range with assertions.expect_raises(Exception): # Replace with specific exception if known connection.execute( double_table.insert(), From 08328cbe571ff32c613767f0b5c4cee685d1e172 Mon Sep 17 00:00:00 2001 From: simozzy Date: Wed, 25 Jun 2025 16:07:36 +0100 Subject: [PATCH 22/26] Ensure tests work on sqlalchemy versions 1.4.54 and 2.0 + --- tests/conftest.py | 19 ++++++--- tests/test_copy_into.py | 89 +++++++++++++++++++++------------------- tests/test_sqlalchemy.py | 36 +++++++++++----- 3 files changed, 84 insertions(+), 60 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2b6d9a5..1012246 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,4 @@ from sqlalchemy.dialects import registry -from sqlalchemy import event, Engine, text import pytest registry.register("databend.databend", "databend_sqlalchemy.databend_dialect", "DatabendDialect") @@ -9,9 +8,17 @@ from sqlalchemy.testing.plugin.pytestplugin import * +from packaging import version +import sqlalchemy +if version.parse(sqlalchemy.__version__) >= version.parse('2.0.0'): + from sqlalchemy import event, text + from sqlalchemy import Engine + + + @event.listens_for(Engine, "connect") + def receive_engine_connect(conn, r): + cur = conn.cursor() + cur.execute('SET global format_null_as_str = 0') + cur.close() + -@event.listens_for(Engine, "connect") -def receive_engine_connect(conn, r): - cur = conn.cursor() - cur.execute('SET global format_null_as_str = 0') - cur.close() diff --git a/tests/test_copy_into.py b/tests/test_copy_into.py index 3d193cf..c679510 100644 --- a/tests/test_copy_into.py +++ b/tests/test_copy_into.py @@ -27,6 +27,8 @@ FileColumnClause, StageClause, ) +import sqlalchemy +from packaging import version class CompileDatabendCopyIntoTableTest(fixtures.TestBase, AssertsCompiledSQL): @@ -215,51 +217,52 @@ def define_tables(cls, metadata): Column("data", String(50)), ) - def test_copy_into_stage_and_table(self, connection): - # create stage - connection.execute(text('CREATE OR REPLACE STAGE mystage')) - # copy into stage from random table limiting 1000 - table = self.tables.random_data - query = table.select().limit(1000) + if version.parse(sqlalchemy.__version__) >= version.parse('2.0.0'): + def test_copy_into_stage_and_table(self, connection): + # create stage + connection.execute(text('CREATE OR REPLACE STAGE mystage')) + # copy into stage from random table limiting 1000 + table = self.tables.random_data + query = table.select().limit(1000) - copy_into = CopyIntoLocation( - target=StageClause( - name='mystage' - ), - from_=query, - file_format=ParquetFormat(), - options=CopyIntoLocationOptions() - ) - r = connection.execute( - copy_into - ) - eq_(r.rowcount, 1000) - copy_into_results = r.context.copy_into_location_results() - eq_(copy_into_results['rows_unloaded'], 1000) - # eq_(copy_into_results['input_bytes'], 16250) # input bytes will differ, the table is random - # eq_(copy_into_results['output_bytes'], 4701) # output bytes differs + copy_into = CopyIntoLocation( + target=StageClause( + name='mystage' + ), + from_=query, + file_format=ParquetFormat(), + options=CopyIntoLocationOptions() + ) + r = connection.execute( + copy_into + ) + eq_(r.rowcount, 1000) + copy_into_results = r.context.copy_into_location_results() + eq_(copy_into_results['rows_unloaded'], 1000) + # eq_(copy_into_results['input_bytes'], 16250) # input bytes will differ, the table is random + # eq_(copy_into_results['output_bytes'], 4701) # output bytes differs - # now copy into table + # now copy into table - copy_into_table = CopyIntoTable( - target=self.tables.loaded, - from_=StageClause( - name='mystage' - ), - file_format=ParquetFormat(), - options=CopyIntoTableOptions() - ) - r = connection.execute( - copy_into_table - ) - eq_(r.rowcount, 1000) - copy_into_table_results = r.context.copy_into_table_results() - assert len(copy_into_table_results) == 1 - result = copy_into_table_results[0] - assert result['file'].endswith('.parquet') - eq_(result['rows_loaded'], 1000) - eq_(result['errors_seen'], 0) - eq_(result['first_error'], None) - eq_(result['first_error_line'], None) + copy_into_table = CopyIntoTable( + target=self.tables.loaded, + from_=StageClause( + name='mystage' + ), + file_format=ParquetFormat(), + options=CopyIntoTableOptions() + ) + r = connection.execute( + copy_into_table + ) + eq_(r.rowcount, 1000) + copy_into_table_results = r.context.copy_into_table_results() + assert len(copy_into_table_results) == 1 + result = copy_into_table_results[0] + assert result['file'].endswith('.parquet') + eq_(result['rows_loaded'], 1000) + eq_(result['errors_seen'], 0) + eq_(result['first_error'], None) + eq_(result['first_error_line'], None) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 8e01c24..ba84cf2 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -13,12 +13,13 @@ from sqlalchemy.testing.suite import LongNameBlowoutTest as _LongNameBlowoutTest from sqlalchemy.testing.suite import QuotedNameArgumentTest as _QuotedNameArgumentTest from sqlalchemy.testing.suite import JoinTest as _JoinTest -from sqlalchemy.testing.suite import BizarroCharacterFKResolutionTest as _BizarroCharacterFKResolutionTest + from sqlalchemy.testing.suite import ServerSideCursorsTest as _ServerSideCursorsTest -from sqlalchemy.testing.suite import EnumTest as _EnumTest + from sqlalchemy.testing.suite import CTETest as _CTETest from sqlalchemy.testing.suite import JSONTest as _JSONTest from sqlalchemy.testing.suite import IntegerTest as _IntegerTest + from sqlalchemy import types as sql_types from sqlalchemy.testing import config from sqlalchemy import testing, Table, Column, Integer @@ -26,6 +27,19 @@ from databend_sqlalchemy.types import TINYINT, BITMAP, DOUBLE +from packaging import version +import sqlalchemy +if version.parse(sqlalchemy.__version__) >= version.parse('2.0.0'): + from sqlalchemy.testing.suite import BizarroCharacterFKResolutionTest as _BizarroCharacterFKResolutionTest + from sqlalchemy.testing.suite import EnumTest as _EnumTest +else: + from sqlalchemy.testing.suite import ComponentReflectionTest as _ComponentReflectionTest + + class ComponentReflectionTest(_ComponentReflectionTest): + + @testing.skip("databend") + def test_get_indexes(self): + pass class ComponentReflectionTestExtra(_ComponentReflectionTestExtra): @@ -190,9 +204,9 @@ def test_get_indexes(self, name): class JoinTest(_JoinTest): __requires__ = ("foreign_keys",) - -class BizarroCharacterFKResolutionTest(_BizarroCharacterFKResolutionTest): - __requires__ = ("foreign_keys",) +if version.parse(sqlalchemy.__version__) >= version.parse('2.0.0'): + class BizarroCharacterFKResolutionTest(_BizarroCharacterFKResolutionTest): + __requires__ = ("foreign_keys",) class BinaryTest(_BinaryTest): @@ -278,13 +292,13 @@ def test_roundtrip_fetchall(self): def test_roundtrip_fetchmany(self): pass +if version.parse(sqlalchemy.__version__) >= version.parse('2.0.0'): + class EnumTest(_EnumTest): + __backend__ = True -class EnumTest(_EnumTest): - __backend__ = True - - @testing.skip("databend") # Skipped because no supporting enums yet - def test_round_trip_executemany(self, connection): - pass + @testing.skip("databend") # Skipped because no supporting enums yet + def test_round_trip_executemany(self, connection): + pass class CTETest(_CTETest): From 840c68d1751117f6b2a6fd7f5c61574b5b6f19c2 Mon Sep 17 00:00:00 2001 From: simozzy Date: Wed, 25 Jun 2025 17:26:03 +0100 Subject: [PATCH 23/26] Added types for GEOMETRY and GEOGRAPHY. --- databend_sqlalchemy/databend_dialect.py | 24 +++- databend_sqlalchemy/types.py | 19 ++- tests/test_sqlalchemy.py | 165 +++++++++++++++++++++++- 3 files changed, 202 insertions(+), 6 deletions(-) diff --git a/databend_sqlalchemy/databend_dialect.py b/databend_sqlalchemy/databend_dialect.py index 1d4cbdf..ca3bbac 100644 --- a/databend_sqlalchemy/databend_dialect.py +++ b/databend_sqlalchemy/databend_dialect.py @@ -81,7 +81,7 @@ AzureBlobStorage, AmazonS3, ) -from .types import INTERVAL, TINYINT, BITMAP +from .types import INTERVAL, TINYINT, BITMAP, GEOMETRY, GEOGRAPHY RESERVED_WORDS = { "Error", @@ -811,6 +811,13 @@ class DatabendBitmap(BITMAP): class DatabendTinyInt(TINYINT): render_bind_cast = True + +class DatabendGeometry(GEOMETRY): + render_bind_cast = True + +class DatabendGeography(GEOGRAPHY): + render_bind_cast = True + # Type converters ischema_names = { "bigint": BIGINT, @@ -844,7 +851,9 @@ class DatabendTinyInt(TINYINT): "binary": BINARY, "time": DatabendTime, "interval": DatabendInterval, - "bitmap": DatabendBitmap + "bitmap": DatabendBitmap, + "geometry": DatabendGeometry, + "geography": DatabendGeography } @@ -1260,6 +1269,17 @@ def visit_FLOAT(self, type_, **kw): def visit_BITMAP(self, type_, **kw): return "BITMAP" + def visit_GEOMETRY(self, type_, **kw): + if type_.srid is not None: + return f"GEOMETRY(SRID {type_.srid})" + return "GEOMETRY" + + def visit_GEOGRAPHY(self, type_, **kw): + if type_.srid is not None: + return f"GEOGRAPHY(SRID {type_.srid})" + return "GEOGRAPHY" + + class DatabendDDLCompiler(compiler.DDLCompiler): def visit_primary_key_constraint(self, constraint, **kw): diff --git a/databend_sqlalchemy/types.py b/databend_sqlalchemy/types.py index c230b06..ec16403 100644 --- a/databend_sqlalchemy/types.py +++ b/databend_sqlalchemy/types.py @@ -130,4 +130,21 @@ def process(value): if value is None: return None return set(int(x) for x in value.split(',') if x) - return process \ No newline at end of file + return process + + +class GEOMETRY(sqltypes.TypeEngine): + __visit_name__ = "GEOMETRY" + + def __init__(self, srid=None): + super(GEOMETRY, self).__init__() + self.srid = srid + + +class GEOGRAPHY(sqltypes.TypeEngine): + __visit_name__ = "GEOGRAPHY" + + def __init__(self, srid=None): + super(GEOGRAPHY, self).__init__() + self.srid = srid + diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index ba84cf2..161af38 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -25,7 +25,7 @@ from sqlalchemy import testing, Table, Column, Integer from sqlalchemy.testing import eq_, fixtures, assertions -from databend_sqlalchemy.types import TINYINT, BITMAP, DOUBLE +from databend_sqlalchemy.types import TINYINT, BITMAP, DOUBLE, GEOMETRY, GEOGRAPHY from packaging import version import sqlalchemy @@ -495,14 +495,173 @@ def test_double_overflow(self, connection): double_table = self.tables.double_table # This should raise an exception as it's outside the DOUBLE range - with assertions.expect_raises(Exception): # Replace with specific exception if known + with assertions.expect_raises(Exception): connection.execute( double_table.insert(), [{"id": 3, "double_data": float('inf')}] ) - with assertions.expect_raises(Exception): # Replace with specific exception if known + with assertions.expect_raises(Exception): connection.execute( double_table.insert(), [{"id": 3, "double_data": float('-inf')}] ) + + +class GeometryTest(fixtures.TablesTest): + + @classmethod + def define_tables(cls, metadata): + Table( + "geometry_table", + metadata, + Column("id", Integer), + Column("geometry_data", GEOMETRY) + ) + + """ + Perform a simple test using Databend's bitmap data type to check + that the bitmap data is correctly inserted and retrieved.' + """ + def test_geometry_write_and_read(self, connection): + geometry_table = self.tables.geometry_table + + # Insert a value + connection.execute( + geometry_table.insert(), + [{"id": 1, "geometry_data": 'POINT(10 20)'}] + ) + connection.execute( + geometry_table.insert(), + [{"id": 2, "geometry_data": 'LINESTRING(10 20, 30 40, 50 60)'}] + ) + connection.execute( + geometry_table.insert(), + [{"id": 3, "geometry_data": 'POLYGON((10 20, 30 40, 50 60, 10 20))'}] + ) + connection.execute( + geometry_table.insert(), + [{"id": 4, "geometry_data": 'MULTIPOINT((10 20), (30 40), (50 60))'}] + ) + connection.execute( + geometry_table.insert(), + [{"id": 5, "geometry_data": 'MULTILINESTRING((10 20, 30 40), (50 60, 70 80))'}] + ) + connection.execute( + geometry_table.insert(), + [{"id": 6, "geometry_data": 'MULTIPOLYGON(((10 20, 30 40, 50 60, 10 20)), ((15 25, 25 35, 35 45, 15 25)))'}] + ) + connection.execute( + geometry_table.insert(), + [{"id": 7, "geometry_data": 'GEOMETRYCOLLECTION(POINT(10 20), LINESTRING(10 20, 30 40), POLYGON((10 20, 30 40, 50 60, 10 20)))'}] + ) + + result = connection.execute( + select(geometry_table.c.geometry_data).where(geometry_table.c.id == 1) + ).scalar() + eq_(result, ('{"type": "Point", "coordinates": [10,20]}')) + result = connection.execute( + select(geometry_table.c.geometry_data).where(geometry_table.c.id == 2) + ).scalar() + eq_(result, ('{"type": "LineString", "coordinates": [[10,20],[30,40],[50,60]]}')) + result = connection.execute( + select(geometry_table.c.geometry_data).where(geometry_table.c.id == 3) + ).scalar() + eq_(result, ('{"type": "Polygon", "coordinates": [[[10,20],[30,40],[50,60],[10,20]]]}')) + result = connection.execute( + select(geometry_table.c.geometry_data).where(geometry_table.c.id == 4) + ).scalar() + eq_(result, ('{"type": "MultiPoint", "coordinates": [[10,20],[30,40],[50,60]]}')) + result = connection.execute( + select(geometry_table.c.geometry_data).where(geometry_table.c.id == 5) + ).scalar() + eq_(result, ('{"type": "MultiLineString", "coordinates": [[[10,20],[30,40]],[[50,60],[70,80]]]}')) + result = connection.execute( + select(geometry_table.c.geometry_data).where(geometry_table.c.id == 6) + ).scalar() + eq_(result, ('{"type": "MultiPolygon", "coordinates": [[[[10,20],[30,40],[50,60],[10,20]]],[[[15,25],[25,35],[35,45],[15,25]]]]}')) + result = connection.execute( + select(geometry_table.c.geometry_data).where(geometry_table.c.id == 7) + ).scalar() + eq_(result, ('{"type": "GeometryCollection", "geometries": [{"type": "Point", "coordinates": [10,20]},{"type": "LineString", "coordinates": [[10,20],[30,40]]},{"type": "Polygon", "coordinates": [[[10,20],[30,40],[50,60],[10,20]]]}]}')) + + + + + +class GeographyTest(fixtures.TablesTest): + + @classmethod + def define_tables(cls, metadata): + Table( + "geography_table", + metadata, + Column("id", Integer), + Column("geography_data", GEOGRAPHY) + ) + + """ + Perform a simple test using Databend's bitmap data type to check + that the bitmap data is correctly inserted and retrieved.' + """ + def test_geography_write_and_read(self, connection): + geography_table = self.tables.geography_table + + # Insert a value + connection.execute( + geography_table.insert(), + [{"id": 1, "geography_data": 'POINT(10 20)'}] + ) + connection.execute( + geography_table.insert(), + [{"id": 2, "geography_data": 'LINESTRING(10 20, 30 40, 50 60)'}] + ) + connection.execute( + geography_table.insert(), + [{"id": 3, "geography_data": 'POLYGON((10 20, 30 40, 50 60, 10 20))'}] + ) + connection.execute( + geography_table.insert(), + [{"id": 4, "geography_data": 'MULTIPOINT((10 20), (30 40), (50 60))'}] + ) + connection.execute( + geography_table.insert(), + [{"id": 5, "geography_data": 'MULTILINESTRING((10 20, 30 40), (50 60, 70 80))'}] + ) + connection.execute( + geography_table.insert(), + [{"id": 6, "geography_data": 'MULTIPOLYGON(((10 20, 30 40, 50 60, 10 20)), ((15 25, 25 35, 35 45, 15 25)))'}] + ) + connection.execute( + geography_table.insert(), + [{"id": 7, "geography_data": 'GEOMETRYCOLLECTION(POINT(10 20), LINESTRING(10 20, 30 40), POLYGON((10 20, 30 40, 50 60, 10 20)))'}] + ) + + result = connection.execute( + select(geography_table.c.geography_data).where(geography_table.c.id == 1) + ).scalar() + eq_(result, ('{"type": "Point", "coordinates": [10,20]}')) + result = connection.execute( + select(geography_table.c.geography_data).where(geography_table.c.id == 2) + ).scalar() + eq_(result, ('{"type": "LineString", "coordinates": [[10,20],[30,40],[50,60]]}')) + result = connection.execute( + select(geography_table.c.geography_data).where(geography_table.c.id == 3) + ).scalar() + eq_(result, ('{"type": "Polygon", "coordinates": [[[10,20],[30,40],[50,60],[10,20]]]}')) + result = connection.execute( + select(geography_table.c.geography_data).where(geography_table.c.id == 4) + ).scalar() + eq_(result, ('{"type": "MultiPoint", "coordinates": [[10,20],[30,40],[50,60]]}')) + result = connection.execute( + select(geography_table.c.geography_data).where(geography_table.c.id == 5) + ).scalar() + eq_(result, ('{"type": "MultiLineString", "coordinates": [[[10,20],[30,40]],[[50,60],[70,80]]]}')) + result = connection.execute( + select(geography_table.c.geography_data).where(geography_table.c.id == 6) + ).scalar() + eq_(result, ('{"type": "MultiPolygon", "coordinates": [[[[10,20],[30,40],[50,60],[10,20]]],[[[15,25],[25,35],[35,45],[15,25]]]]}')) + result = connection.execute( + select(geography_table.c.geography_data).where(geography_table.c.id == 7) + ).scalar() + eq_(result, ('{"type": "GeometryCollection", "geometries": [{"type": "Point", "coordinates": [10,20]},{"type": "LineString", "coordinates": [[10,20],[30,40]]},{"type": "Polygon", "coordinates": [[[10,20],[30,40],[50,60],[10,20]]]}]}')) \ No newline at end of file From 2dd9c683d1e24903a70b790a2368ef27b1652938 Mon Sep 17 00:00:00 2001 From: simozzy Date: Mon, 30 Jun 2025 13:24:09 +0100 Subject: [PATCH 24/26] Added Zip compression type --- databend_sqlalchemy/dml.py | 1 + databend_sqlalchemy/types.py | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/databend_sqlalchemy/dml.py b/databend_sqlalchemy/dml.py index 6e5aae4..cb2ad2b 100644 --- a/databend_sqlalchemy/dml.py +++ b/databend_sqlalchemy/dml.py @@ -251,6 +251,7 @@ class Compression(Enum): RAW_DEFLATE = "RAW_DEFLATE" XZ = "XZ" SNAPPY = "SNAPPY" + ZIP = "ZIP" class CopyFormat(ClauseElement): diff --git a/databend_sqlalchemy/types.py b/databend_sqlalchemy/types.py index ec16403..2d53d1f 100644 --- a/databend_sqlalchemy/types.py +++ b/databend_sqlalchemy/types.py @@ -140,11 +140,48 @@ def __init__(self, srid=None): super(GEOMETRY, self).__init__() self.srid = srid + # def bind_expression(self, bindvalue): + # return func.st_geomfromwkt(bindvalue, type_=self) + # + # def column_expression(self, col): + # # Convert bitmap to string using a custom function + # return func.st_aswkt(col, type_=sqltypes.String) + + class GEOGRAPHY(sqltypes.TypeEngine): __visit_name__ = "GEOGRAPHY" + native = True def __init__(self, srid=None): super(GEOGRAPHY, self).__init__() self.srid = srid + def literal_processor(self, dialect): + def process(value): + if value is None: + return 'NULL' + if isinstance(value, str): + # Assume it's already in WKT format + return f"ST_GeogFromText('{value}')" + # If it's some other geographic object, you might need to convert it to WKT first + # This is just an example and might need to be adjusted based on your specific needs + return f"ST_GeogFromText('{value.wkt}')" + + return process + + def bind_expression(self, bindvalue): + return func.ST_GeogFromText(bindvalue, type_=self) + + def column_expression(self, col): + return func.ST_AsText(col, type_=sqltypes.String) + + def result_processor(self, dialect, coltype): + def process(value): + if value is None: + return None + # Assuming the database returns WKT + return value # Or parse into a geographic object if needed + + return process + From 81cd3b5a0a60830db5064a0a2868de74ab2cf742 Mon Sep 17 00:00:00 2001 From: simozzy Date: Tue, 1 Jul 2025 13:10:13 +0100 Subject: [PATCH 25/26] Added code to enable geo tables in tests. Removed redundant code. --- databend_sqlalchemy/types.py | 34 ---------------------------------- tests/test_sqlalchemy.py | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 38 deletions(-) diff --git a/databend_sqlalchemy/types.py b/databend_sqlalchemy/types.py index 2d53d1f..e5a737f 100644 --- a/databend_sqlalchemy/types.py +++ b/databend_sqlalchemy/types.py @@ -140,13 +140,6 @@ def __init__(self, srid=None): super(GEOMETRY, self).__init__() self.srid = srid - # def bind_expression(self, bindvalue): - # return func.st_geomfromwkt(bindvalue, type_=self) - # - # def column_expression(self, col): - # # Convert bitmap to string using a custom function - # return func.st_aswkt(col, type_=sqltypes.String) - class GEOGRAPHY(sqltypes.TypeEngine): @@ -157,31 +150,4 @@ def __init__(self, srid=None): super(GEOGRAPHY, self).__init__() self.srid = srid - def literal_processor(self, dialect): - def process(value): - if value is None: - return 'NULL' - if isinstance(value, str): - # Assume it's already in WKT format - return f"ST_GeogFromText('{value}')" - # If it's some other geographic object, you might need to convert it to WKT first - # This is just an example and might need to be adjusted based on your specific needs - return f"ST_GeogFromText('{value.wkt}')" - - return process - - def bind_expression(self, bindvalue): - return func.ST_GeogFromText(bindvalue, type_=self) - - def column_expression(self, col): - return func.ST_AsText(col, type_=sqltypes.String) - - def result_processor(self, dialect, coltype): - def process(value): - if value is None: - return None - # Assuming the database returns WKT - return value # Or parse into a geographic object if needed - - return process diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 161af38..899a06a 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -512,6 +512,10 @@ class GeometryTest(fixtures.TablesTest): @classmethod def define_tables(cls, metadata): + # Enable the creation of tables with GEOMETRY data type + with cls.bind.begin() as conn: + conn.execute(text("set enable_geo_create_table=1")) + Table( "geometry_table", metadata, @@ -520,8 +524,8 @@ def define_tables(cls, metadata): ) """ - Perform a simple test using Databend's bitmap data type to check - that the bitmap data is correctly inserted and retrieved.' + Perform a simple test using Databend's Geometry data type to check + that the data is correctly inserted and retrieved.' """ def test_geometry_write_and_read(self, connection): geometry_table = self.tables.geometry_table @@ -593,6 +597,10 @@ class GeographyTest(fixtures.TablesTest): @classmethod def define_tables(cls, metadata): + # Enable the creation of tables with GEOGRAPHY data type + with cls.bind.begin() as conn: + conn.execute(text("set enable_geo_create_table=1")) + Table( "geography_table", metadata, @@ -601,8 +609,8 @@ def define_tables(cls, metadata): ) """ - Perform a simple test using Databend's bitmap data type to check - that the bitmap data is correctly inserted and retrieved.' + Perform a simple test using Databend's Geography data type to check + that the data is correctly inserted and retrieved.' """ def test_geography_write_and_read(self, connection): geography_table = self.tables.geography_table From 381f85b58d0de2b7c3638c6d803feb2b42beae29 Mon Sep 17 00:00:00 2001 From: simozzy Date: Fri, 4 Jul 2025 14:52:09 +0100 Subject: [PATCH 26/26] move initialization of enable_geo_create_table --- tests/conftest.py | 1 + tests/test_sqlalchemy.py | 8 -------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1012246..ab4b9aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,6 +19,7 @@ def receive_engine_connect(conn, r): cur = conn.cursor() cur.execute('SET global format_null_as_str = 0') + cur.execute('SET global enable_geo_create_table = 1') cur.close() diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 899a06a..64a04c2 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -512,10 +512,6 @@ class GeometryTest(fixtures.TablesTest): @classmethod def define_tables(cls, metadata): - # Enable the creation of tables with GEOMETRY data type - with cls.bind.begin() as conn: - conn.execute(text("set enable_geo_create_table=1")) - Table( "geometry_table", metadata, @@ -597,10 +593,6 @@ class GeographyTest(fixtures.TablesTest): @classmethod def define_tables(cls, metadata): - # Enable the creation of tables with GEOGRAPHY data type - with cls.bind.begin() as conn: - conn.execute(text("set enable_geo_create_table=1")) - Table( "geography_table", metadata,